From 4e84f4a009c7324e6de0576ba6687278ff0d09d8 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 24 Aug 2022 17:26:32 -0600 Subject: [PATCH 01/17] add runtime artifact instance creation logic --- sdk/python/kfp/components/executor.py | 35 +++++- sdk/python/kfp/components/executor_test.py | 113 ++++++++++++++++++ .../kfp/components/types/artifact_types.py | 18 --- .../components/types/artifact_types_test.py | 99 --------------- 4 files changed, 143 insertions(+), 122 deletions(-) diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/components/executor.py index ffac9008f85..59b18ab2eb1 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/components/executor.py @@ -38,7 +38,7 @@ def __init__(self, executor_input: Dict, function_to_execute: Callable): artifacts_list = artifacts.get('artifacts') if artifacts_list: self._input_artifacts[name] = self._make_input_artifact( - artifacts_list[0]) + artifacts_list[0], name, self._func) for name, artifacts in self._input.get('outputs', {}).get('artifacts', {}).items(): @@ -51,14 +51,21 @@ def __init__(self, executor_input: Dict, function_to_execute: Callable): self._func).return_annotation self._executor_output = {} - @classmethod - def _make_input_artifact(cls, runtime_artifact: Dict): - return artifact_types.create_runtime_artifact(runtime_artifact) + def _make_input_artifact( + self, + runtime_artifact: Dict, + name: str, + func: Callable, + ): + annotations = inspect.getfullargspec(func).annotations + artifact_class = type_annotations.get_io_artifact_class( + annotations.get(name)) + return create_artifact_instance(runtime_artifact, artifact_class) @classmethod def _make_output_artifact(cls, runtime_artifact: Dict): import os - artifact = artifact_types.create_runtime_artifact(runtime_artifact) + artifact = create_artifact_instance(runtime_artifact) os.makedirs(os.path.dirname(artifact.path), exist_ok=True) return artifact @@ -297,3 +304,21 @@ def execute(self): result = self._func(**func_kwargs) self._write_executor_output(result) + + +def create_artifact_instance( + runtime_artifact: Dict, + artifact_cls=None, +) -> artifact_types.Artifact: + """Creates an artifact class instances from a runtime artifact + dictionary.""" + schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') + + artifact_type = artifact_types._SCHEMA_TITLE_TO_TYPE.get(schema_title) + if not artifact_type: + artifact_type = artifact_cls or artifact_types.Artifact + return artifact_type( + uri=runtime_artifact.get('uri', ''), + name=runtime_artifact.get('name', ''), + metadata=runtime_artifact.get('metadata', {}), + ) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index cc54c005e07..23ffa5b9e31 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -119,6 +119,21 @@ def test_func(input_parameter: str): self._get_executor(test_func).execute() + def test_input_artifact_custom_type(self): + + class VertexDataset(Dataset): + pass + + def test_func(input_artifact_one_path: Input[VertexDataset]): + self.assertEqual(input_artifact_one_path.uri, + 'gs://some-bucket/input_artifact_one') + self.assertEqual( + input_artifact_one_path.path, + os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) + self.assertEqual(input_artifact_one_path.name, 'input_artifact_one') + + self._get_executor(test_func).execute() + def test_input_artifact(self): def test_func(input_artifact_one_path: Input[Dataset]): @@ -689,6 +704,104 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, }) + @parameterized.parameters( + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Artifact' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.Artifact, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Model' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.Model, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Dataset' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.Dataset, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Metrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.Metrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.ClassificationMetrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.ClassificationMetrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.SlicedClassificationMetrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.SlicedClassificationMetrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.HTML' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.HTML, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Markdown' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'expected_type': artifact_types.Markdown, + }, + ) + def test_create_artifact_instance( + self, + runtime_artifact, + expected_type, + ): + self.assertIsInstance( + executor.create_artifact_instance(runtime_artifact), expected_type) + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/components/types/artifact_types.py b/sdk/python/kfp/components/types/artifact_types.py index 7b783aa6598..4ab2a82afe7 100644 --- a/sdk/python/kfp/components/types/artifact_types.py +++ b/sdk/python/kfp/components/types/artifact_types.py @@ -510,21 +510,3 @@ def __init__(self, Markdown, ] } - - -def create_runtime_artifact(runtime_artifact: Dict) -> Artifact: - """Creates an Artifact instance from the specified RuntimeArtifact. - - Args: - runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact. - """ - schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') - - artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title) - if not artifact_type: - artifact_type = Artifact - return artifact_type( - uri=runtime_artifact.get('uri', ''), - name=runtime_artifact.get('name', ''), - metadata=runtime_artifact.get('metadata', {}), - ) diff --git a/sdk/python/kfp/components/types/artifact_types_test.py b/sdk/python/kfp/components/types/artifact_types_test.py index 8f906f66505..517d5f9b4ed 100644 --- a/sdk/python/kfp/components/types/artifact_types_test.py +++ b/sdk/python/kfp/components/types/artifact_types_test.py @@ -56,105 +56,6 @@ def test_complex_metrics_bulk_loading(self): expected_json = json.load(json_file) self.assertEqual(expected_json, metrics.metadata) - @parameterized.parameters( - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Artifact' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.Artifact, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Model' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.Model, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Dataset' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.Dataset, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Metrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.Metrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.ClassificationMetrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.ClassificationMetrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.SlicedClassificationMetrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.SlicedClassificationMetrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.HTML' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.HTML, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Markdown' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'expected_type': artifact_types.Markdown, - }, - ) - def test_create_runtime_artifact( - self, - runtime_artifact, - expected_type, - ): - self.assertIsInstance( - artifact_types.create_runtime_artifact(runtime_artifact), - expected_type) - if __name__ == '__main__': unittest.main() From 2d05ad9c39b8afabf816c3ed1eebded10cd1728d Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 24 Aug 2022 20:18:01 -0600 Subject: [PATCH 02/17] refactor executor --- sdk/python/kfp/components/executor.py | 35 +++++++++++++++------------ 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/components/executor.py index 59b18ab2eb1..714387410d7 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/components/executor.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect import json +import os from typing import Any, Callable, Dict, List, Optional, Union from kfp.components import task_final_status @@ -37,37 +38,39 @@ def __init__(self, executor_input: Dict, function_to_execute: Callable): {}).get('artifacts', {}).items(): artifacts_list = artifacts.get('artifacts') if artifacts_list: - self._input_artifacts[name] = self._make_input_artifact( - artifacts_list[0], name, self._func) + self._input_artifacts[name] = self.make_artifact( + artifacts_list[0], + name, + self._func, + ) for name, artifacts in self._input.get('outputs', {}).get('artifacts', {}).items(): artifacts_list = artifacts.get('artifacts') if artifacts_list: - self._output_artifacts[name] = self._make_output_artifact( - artifacts_list[0]) + output_artifact = self.make_artifact( + artifacts_list[0], + name, + self._func, + ) + self._output_artifacts[name] = output_artifact + self.makedirs_recursively(output_artifact.path) self._return_annotation = inspect.signature( self._func).return_annotation self._executor_output = {} - def _make_input_artifact( + def make_artifact( self, runtime_artifact: Dict, name: str, func: Callable, - ): - annotations = inspect.getfullargspec(func).annotations - artifact_class = type_annotations.get_io_artifact_class( - annotations.get(name)) - return create_artifact_instance(runtime_artifact, artifact_class) + ) -> Any: + artifact_cls = func.__annotations__.get(name) + return create_artifact_instance(runtime_artifact, artifact_cls) - @classmethod - def _make_output_artifact(cls, runtime_artifact: Dict): - import os - artifact = create_artifact_instance(runtime_artifact) - os.makedirs(os.path.dirname(artifact.path), exist_ok=True) - return artifact + def makedirs_recursively(self, path: str) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) def _get_input_artifact(self, name: str): return self._input_artifacts.get(name) From 754116834836d6bccb6783278a4200519e472546 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 24 Aug 2022 20:18:11 -0600 Subject: [PATCH 03/17] add executor tests --- sdk/python/kfp/components/executor_test.py | 561 +++++++++++++-------- 1 file changed, 364 insertions(+), 197 deletions(-) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index 23ffa5b9e31..7a61e23c2e9 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -19,6 +19,7 @@ from typing import Callable, Dict, List, NamedTuple, Optional import unittest +from absl.testing import parameterized from kfp.components import executor from kfp.components.task_final_status import PipelineTaskFinalStatus from kfp.components.types import artifact_types @@ -31,64 +32,6 @@ from kfp.components.types.type_annotations import Output from kfp.components.types.type_annotations import OutputPath -_EXECUTOR_INPUT = """\ -{ - "inputs": { - "parameterValues": { - "input_parameter": "Hello, KFP" - }, - "artifacts": { - "input_artifact_one_path": { - "artifacts": [ - { - "metadata": {}, - "name": "input_artifact_one", - "type": { - "schemaTitle": "system.Dataset" - }, - "uri": "gs://some-bucket/input_artifact_one" - } - ] - } - } - }, - "outputs": { - "artifacts": { - "output_artifact_one_path": { - "artifacts": [ - { - "metadata": {}, - "name": "output_artifact_one", - "type": { - "schemaTitle": "system.Model" - }, - "uri": "gs://some-bucket/output_artifact_one" - } - ] - }, - "output_artifact_two": { - "artifacts": [ - { - "metadata": {}, - "name": "output_artifact_two", - "type": { - "schemaTitle": "system.Metrics" - }, - "uri": "gs://some-bucket/output_artifact_two" - } - ] - } - }, - "parameters": { - "output_parameter_path": { - "outputFile": "%(test_dir)s/gcs/some-bucket/some_task/nested/output_parameter" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } -} -""" - class ExecutorTest(unittest.TestCase): @@ -100,67 +43,188 @@ def setUp(cls): artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls._test_dir + '/minio/' artifact_types._S3_LOCAL_MOUNT_PREFIX = cls._test_dir + '/s3/' - def _get_executor( - self, - func: Callable, - executor_input: Optional[str] = None) -> executor.Executor: - if executor_input is None: - executor_input = _EXECUTOR_INPUT + def execute_and_load_output_metadata( + self, func: Callable, executor_input: str) -> executor.Executor: executor_input_dict = json.loads(executor_input % {'test_dir': self._test_dir}) - return executor.Executor( - executor_input=executor_input_dict, function_to_execute=func) + executor.Executor( + executor_input=executor_input_dict, + function_to_execute=func).execute() + with open(os.path.join(self._test_dir, 'output_metadata.json'), + 'r') as f: + return json.loads(f.read()) def test_input_parameter(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "input_parameter": "Hello, KFP" + } + }, + "outputs": { + "parameters": { + "output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ - def test_func(input_parameter: str): + def test_func(input_parameter: str) -> str: self.assertEqual(input_parameter, 'Hello, KFP') + return input_parameter - self._get_executor(test_func).execute() + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertEqual({'parameterValues': { + 'Output': 'Hello, KFP' + }}, output_metadata) def test_input_artifact_custom_type(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + }, + "artifacts": { + "input_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "input_artifact_one", + "type": { + "schemaTitle": "google.VertexDataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + class VertexDataset: + TYPE_NAME = 'google.VertexDataset' + + def __init__(self, name: str, uri: str, metadata: dict) -> None: + self.name = name + self.uri = uri + self.metadata = metadata - class VertexDataset(Dataset): - pass + @property + def path(self) -> str: + return self.uri.replace('gs://', '/gcs/') - def test_func(input_artifact_one_path: Input[VertexDataset]): - self.assertEqual(input_artifact_one_path.uri, + def test_func(input_artifact_one: Input[VertexDataset]): + self.assertEqual(input_artifact_one.uri, 'gs://some-bucket/input_artifact_one') self.assertEqual( - input_artifact_one_path.path, - os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self.assertEqual(input_artifact_one_path.name, 'input_artifact_one') + input_artifact_one.path, + os.path.join(self._test_dir, + '/gcs/some-bucket/input_artifact_one')) + self.assertEqual(input_artifact_one.name, 'input_artifact_one') + self.assertIsInstance(input_artifact_one, VertexDataset) - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_input_artifact(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + }, + "artifacts": { + "input_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "input_artifact_one", + "type": { + "schemaTitle": "google.VertexDataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ - def test_func(input_artifact_one_path: Input[Dataset]): - self.assertEqual(input_artifact_one_path.uri, + def test_func(input_artifact_one: Input[Dataset]): + self.assertEqual(input_artifact_one.uri, 'gs://some-bucket/input_artifact_one') self.assertEqual( - input_artifact_one_path.path, + input_artifact_one.path, os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self.assertEqual(input_artifact_one_path.name, 'input_artifact_one') + self.assertEqual(input_artifact_one.name, 'input_artifact_one') + self.assertIsInstance(input_artifact_one, Dataset) - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_output_artifact(self): + executor_input = """\ + { + "inputs": { + }, + "outputs": { + "artifacts": { + "output_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "output_artifact_one", + "type": { + "schemaTitle": "system.Model" + }, + "uri": "gs://some-bucket/output_artifact_one" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ - def test_func(output_artifact_one_path: Output[Model]): - self.assertEqual(output_artifact_one_path.uri, + def test_func(output_artifact_one: Output[Model]): + self.assertEqual(output_artifact_one.uri, 'gs://some-bucket/output_artifact_one') self.assertEqual( - output_artifact_one_path.path, + output_artifact_one.path, os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self.assertEqual(output_artifact_one_path.name, - 'output_artifact_one') + self.assertEqual(output_artifact_one.name, 'output_artifact_one') + self.assertIsInstance(output_artifact_one, Model) - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_output_parameter(self): + executor_input = """\ + { + "inputs": { + }, + "outputs": { + "parameters": { + "output_parameter_path": { + "outputFile": "%(test_dir)s/gcs/some-bucket/some_task/nested/output_parameter" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ def test_func(output_parameter_path: OutputPath(str)): # Test that output parameters just use the passed in filename. @@ -170,27 +234,96 @@ def test_func(output_parameter_path: OutputPath(str)): with open(output_parameter_path, 'w') as f: f.write('Hello, World!') - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_input_path_artifact(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact_one_path": { + "artifacts": [ + { + "metadata": {}, + "name": "input_artifact_one", + "type": { + "schemaTitle": "system.Dataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ def test_func(input_artifact_one_path: InputPath('Dataset')): self.assertEqual( input_artifact_one_path, os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_output_path_artifact(self): + executor_input = """\ + { + "inputs": { + }, + "outputs": { + "artifacts": { + "output_artifact_one_path": { + "artifacts": [ + { + "metadata": {}, + "name": "output_artifact_one", + "type": { + "schemaTitle": "system.Model" + }, + "uri": "gs://some-bucket/output_artifact_one" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ def test_func(output_artifact_one_path: OutputPath('Model')): self.assertEqual( output_artifact_one_path, os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self._get_executor(test_func).execute() + self.execute_and_load_output_metadata(test_func, executor_input) def test_output_metadata(self): + executor_input = """\ + { + "inputs": { + }, + "outputs": { + "artifacts": { + "output_artifact_two": { + "artifacts": [ + { + "metadata": {}, + "name": "output_artifact_two", + "type": { + "schemaTitle": "system.Metrics" + }, + "uri": "gs://some-bucket/output_artifact_two" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ def test_func(output_artifact_two: Output[Metrics]): output_artifact_two.metadata['key_1'] = 'value_1' @@ -200,20 +333,12 @@ def test_func(output_artifact_two: Output[Metrics]): # log_metric works here since the schema is specified as Metrics. output_artifact_two.log_metric('metric', 0.9) - self._get_executor(test_func).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual( output_metadata, { 'artifacts': { - 'output_artifact_one_path': { - 'artifacts': [{ - 'name': 'output_artifact_one', - 'uri': 'gs://some-bucket/output_artifact_one', - 'metadata': {} - }] - }, 'output_artifact_two': { 'artifacts': [{ 'name': 'output_artifact_two', @@ -234,7 +359,7 @@ def test_function_string_output(self): "inputs": { "parameterValues": { "first_message": "Hello", - "second_message": "", + "second_message": ", ", "third_message": "World" } }, @@ -254,15 +379,13 @@ def test_func( second_message: str, third_message: str, ) -> str: - return first_message + ', ' + second_message + ', ' + third_message + return first_message + second_message + third_message - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) self.assertDictEqual(output_metadata, { 'parameterValues': { - 'Output': 'Hello, , World' + 'Output': 'Hello, World' }, }) @@ -289,10 +412,8 @@ def test_function_with_int_output(self): def test_func(first: int, second: int) -> int: return first + second - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': 42 @@ -322,10 +443,9 @@ def test_function_with_float_output(self): def test_func(first: float, second: float) -> float: return first + second - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': 1.2 @@ -355,10 +475,9 @@ def test_function_with_list_output(self): def test_func(first: int, second: int) -> List: return [first, second] - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': [40, 2] @@ -388,10 +507,9 @@ def test_function_with_dict_output(self): def test_func(first: int, second: int) -> Dict: return {'first': first, 'second': second} - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': { @@ -424,10 +542,9 @@ def test_function_with_typed_list_output(self): def test_func(first: int, second: int) -> List[int]: return [first, second] - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': [40, 2] @@ -457,10 +574,9 @@ def test_function_with_typed_dict_output(self): def test_func(first: int, second: int) -> Dict[str, int]: return {'first': first, 'second': second} - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { 'parameterValues': { 'Output': { @@ -481,7 +597,7 @@ def test_artifact_output(self): }, "outputs": { "artifacts": { - "Output": { + "output": { "artifacts": [ { "name": "output", @@ -498,59 +614,63 @@ def test_artifact_output(self): } """ - def test_func(first: str, second: str) -> Artifact: + def test_func(first: str, second: str, output: Output[Artifact]) -> str: + with open(output.path, 'w') as f: + f.write('artifact output') return first + ', ' + second - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual( output_metadata, { 'artifacts': { - 'Output': { + 'output': { 'artifacts': [{ 'metadata': {}, 'name': 'output', 'uri': 'gs://some-bucket/output' }] } + }, + 'parameterValues': { + 'Output': 'Hello, World' } }) with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: artifact_payload = f.read() - self.assertEqual(artifact_payload, 'Hello, World') + self.assertEqual(artifact_payload, 'artifact output') def test_named_tuple_output(self): executor_input = """\ - { - "outputs": { - "artifacts": { - "output_dataset": { - "artifacts": [ - { - "name": "output_dataset", - "type": { - "schemaTitle": "system.Dataset" - }, - "uri": "gs://some-bucket/output_dataset" + { + "outputs": { + "artifacts": { + "output_dataset": { + "artifacts": [ + { + "name": "output_dataset", + "type": { + "schemaTitle": "system.Dataset" + }, + "uri": "gs://some-bucket/output_dataset" + } + ] } - ] - } - }, - "parameters": { - "output_int": { - "outputFile": "gs://some-bucket/output_int" - }, - "output_string": { - "outputFile": "gs://some-bucket/output_string" + }, + "parameters": { + "output_int": { + "outputFile": "gs://some-bucket/output_int" + }, + "output_string": { + "outputFile": "gs://some-bucket/output_string" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ + } + """ # Functions returning named tuples should work. def func_returning_named_tuple() -> NamedTuple('Outputs', [ @@ -574,11 +694,9 @@ def func_returning_plain_tuple() -> NamedTuple('Outputs', [ for test_func in [ func_returning_named_tuple, func_returning_plain_tuple ]: - self._get_executor(test_func, executor_input).execute() - with open( - os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual( output_metadata, { 'artifacts': { @@ -604,29 +722,29 @@ def func_returning_plain_tuple() -> NamedTuple('Outputs', [ def test_function_with_optional_inputs(self): executor_input = """\ - { - "inputs": { - "parameterValues": { - "first_message": "Hello", - "second_message": "World" - } - }, - "outputs": { - "parameters": { - "output": { - "outputFile": "gs://some-bucket/output" + { + "inputs": { + "parameterValues": { + "first_message": "Hello", + "second_message": "World" + } + }, + "outputs": { + "parameters": { + "output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ + } + """ def test_func( first_message: str = 'default value', second_message: Optional[str] = None, third_message: Optional[str] = None, - forth_argument: str = 'abc', + fourth_argument: str = 'abc', fifth_argument: int = 100, sixth_argument: float = 1.23, seventh_argument: bool = True, @@ -636,17 +754,16 @@ def test_func( return (f'{first_message} ({type(first_message)}), ' f'{second_message} ({type(second_message)}), ' f'{third_message} ({type(third_message)}), ' - f'{forth_argument} ({type(forth_argument)}), ' + f'{fourth_argument} ({type(fourth_argument)}), ' f'{fifth_argument} ({type(fifth_argument)}), ' f'{sixth_argument} ({type(sixth_argument)}), ' f'{seventh_argument} ({type(seventh_argument)}), ' f'{eighth_argument} ({type(eighth_argument)}), ' f'{ninth_argument} ({type(ninth_argument)}).') - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual( output_metadata, { 'parameterValues': { @@ -688,10 +805,9 @@ def test_func(status: PipelineTaskFinalStatus) -> str: f'Error code: {status.error_code}\n' f'Error message: {status.error_message}') - self._get_executor(test_func, executor_input).execute() - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - output_metadata = json.loads(f.read()) + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual( output_metadata, { 'parameterValues': { @@ -704,6 +820,23 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, }) + +class VertexDataset: + schema_title = 'google.VertexDataset' + schema_version = '0.0.0' + + def __init__(self, name: str, uri: str, metadata: dict) -> None: + self.name = name + self.uri = uri + self.metadata = metadata + + @property + def path(self) -> str: + return self.uri.replace('gs://', '/gcs/') + + +class TestDictToArtifact(parameterized.TestCase): + @parameterized.parameters( { 'runtime_artifact': { @@ -714,6 +847,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.Artifact, 'expected_type': artifact_types.Artifact, }, { @@ -725,6 +859,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.Model, 'expected_type': artifact_types.Model, }, { @@ -736,6 +871,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.Dataset, 'expected_type': artifact_types.Dataset, }, { @@ -747,6 +883,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.Metrics, 'expected_type': artifact_types.Metrics, }, { @@ -758,6 +895,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.ClassificationMetrics, 'expected_type': artifact_types.ClassificationMetrics, }, { @@ -769,6 +907,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': artifact_types.SlicedClassificationMetrics, 'expected_type': artifact_types.SlicedClassificationMetrics, }, { @@ -780,6 +919,7 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': None, 'expected_type': artifact_types.HTML, }, { @@ -791,17 +931,44 @@ def test_func(status: PipelineTaskFinalStatus) -> str: }, 'uri': 'gs://some-bucket/input_artifact_one' }, + 'artifact_cls': None, 'expected_type': artifact_types.Markdown, }, ) - def test_create_artifact_instance( + def test_dict_to_artifact_kfp_artifact( self, runtime_artifact, + artifact_cls, expected_type, ): + # with artifact_cls + self.assertIsInstance( + executor.create_artifact_instance( + runtime_artifact, artifact_cls=artifact_cls), expected_type) + + # without artifact_cls self.assertIsInstance( executor.create_artifact_instance(runtime_artifact), expected_type) + def test_dict_to_artifact_nonkfp_artifact(self): + runtime_artifact = { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'google.VertexDataset' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + } + # with artifact_cls + self.assertIsInstance( + executor.create_artifact_instance( + runtime_artifact, artifact_cls=VertexDataset), VertexDataset) + + # without artifact_cls + self.assertIsInstance( + executor.create_artifact_instance(runtime_artifact), + artifact_types.Artifact) + if __name__ == '__main__': unittest.main() From e4666761a7ee2ecb5404a6fdb76022b1c7865813 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 30 Aug 2022 10:50:28 -0600 Subject: [PATCH 04/17] add custom artifact type import handling and tests --- .../kfp/components/component_factory.py | 3 +- .../kfp/components/component_factory_test.py | 4 + .../components/types/custom_artifact_types.py | 201 ++++++ .../types/custom_artifact_types_test.py | 600 ++++++++++++++++++ 4 files changed, 807 insertions(+), 1 deletion(-) create mode 100644 sdk/python/kfp/components/types/custom_artifact_types.py create mode 100644 sdk/python/kfp/components/types/custom_artifact_types_test.py diff --git a/sdk/python/kfp/components/component_factory.py b/sdk/python/kfp/components/component_factory.py index c93f0d5a313..0e91acda7d6 100644 --- a/sdk/python/kfp/components/component_factory.py +++ b/sdk/python/kfp/components/component_factory.py @@ -28,6 +28,7 @@ from kfp.components import structures from kfp.components.container_component_artifact_channel import \ ContainerComponentArtifactChannel +from kfp.components.types import custom_artifact_types from kfp.components.types import type_annotations from kfp.components.types import type_utils @@ -322,7 +323,7 @@ def _get_command_and_args_for_lightweight_component( 'from kfp import dsl', 'from kfp.dsl import *', 'from typing import *', - ] + ] + custom_artifact_types.get_custom_artifact_type_import_statements(func) func_source = _get_function_source_definition(func) source = textwrap.dedent(''' diff --git a/sdk/python/kfp/components/component_factory_test.py b/sdk/python/kfp/components/component_factory_test.py index 6b984b6962b..c20da8fc6ff 100644 --- a/sdk/python/kfp/components/component_factory_test.py +++ b/sdk/python/kfp/components/component_factory_test.py @@ -44,3 +44,7 @@ def test_with_packages_to_install_with_pip_index_url(self): concat_command = ' '.join(command) for package in packages_to_install + pip_index_urls: self.assertTrue(package in concat_command) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/kfp/components/types/custom_artifact_types.py b/sdk/python/kfp/components/types/custom_artifact_types.py new file mode 100644 index 00000000000..c9cc434a317 --- /dev/null +++ b/sdk/python/kfp/components/types/custom_artifact_types.py @@ -0,0 +1,201 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import inspect +import sys +from typing import Any, Callable, Dict, List + +from kfp.components import component_factory +from kfp.components.types import type_annotations + +PYTHON_VERSION = str(sys.version_info.major) + '.' + str(sys.version_info.minor) + + +def get_custom_artifact_type_import_statements(func: Callable) -> List[str]: + """Gets a list of custom artifact type import statements from a lightweight + Python component function.""" + artifact_imports = get_custom_artifact_import_items_from_function(func) + imports_source = [] + for obj_str in artifact_imports: + if '.' in obj_str: + path, name = obj_str.rsplit('.', 1) + imports_source.append(f'from {path} import {name}') + else: + imports_source.append(f'import {obj_str}') + return imports_source + + +def get_param_to_annotation_object(func: Callable) -> Dict[str, Any]: + """Gets a dictionary of parameter name to type annotation object from a + function. + + Args: + func: The function. + + Returns: + Dictionary of parameter name to type annotation object. + """ + signature = inspect.signature(func) + return { + name: parameter.annotation + for name, parameter in signature.parameters.items() + } + + +def get_full_qualname_for_artifact(obj: type) -> str: + """Gets the fully qualified name for an object. For example, for class Foo + in module bar.baz, this function returns bar.baz.Foo. + + Note: typing.get_type_hints purports to do the same thing, but it behaves differently when executed within the scope of a test, so preferring this approach instead. + + Args: + obj: The class or module for which to get the fully qualified name. + + Returns: + The fully qualified name for the class. + """ + module = obj.__module__ + name = obj.__qualname__ + if module is not None: + name = module + '.' + name + return name + + +def get_import_item_from_annotation_string_and_ann_obj(artifact_str: str, + qualname: str) -> str: + """Gets the fully qualified names of the module or type to import from the + annotation string and the annotation object. + + Args: + artifact_str: The type annotation string. + qualname: The fully qualified type annotation name as a string. + + Returns: + The fully qualified names of the module or type to import. + """ + split_qualname = qualname.split('.') + if artifact_str in split_qualname: + name_to_import = '.'.join( + split_qualname[:split_qualname.index(artifact_str) + 1]) + else: + raise TypeError( + f"Module or type name aliases are not supported. You appear to be using an alias in your type annotation: '{qualname}'. This may be due to use of an 'as' statement in an import statement or a reassignment of a module or type to a new name. Reference the module and/or type using the name as defined in the source from which the module or type is imported." + ) + return name_to_import + + +def func_to_annotation_object(func: Callable) -> Dict[str, str]: + """Gets a dict of parameter name to annotation object for a function.""" + signature = inspect.signature(func) + return { + name: parameter.annotation + for name, parameter in signature.parameters.items() + } + + +def func_to_root_annotation_symbols_for_artifacts( + func: Callable) -> Dict[str, str]: + """Gets a map of parameter name to the root symbol used to reference an + annotation for a function. + + For example, the annotation `type_module.Artifact` has the root + symbol `type_module` and the annotation `Artifact` has the root + symbol `Artifact`. + """ + module_node = ast.parse( + component_factory._get_function_source_definition(func)) + args = module_node.body[0].args.args + + def convert_ast_arg_node_to_param_annotation_dict_for_artifacts( + args: List[ast.arg]) -> Dict[str, str]: + """Converts a list of ast.arg nodes to a dictionary of the parameter + name to type annotation as a string (for artifact annotations only). + + Args: + args: AST argument nodes for a function. + + Returns: + A dictionary of parameter name to type annotation as a string. + """ + arg_to_ann = {} + for arg in args: + annotation = arg.annotation + # Handle InputPath() and OutputPath() + if isinstance(annotation, ast.Call): + if isinstance(annotation.func, ast.Name): + arg_to_ann[arg.arg] = annotation.func.id + elif isinstance(annotation.func, ast.Attribute): + arg_to_ann[arg.arg] = annotation.func.value.id + else: + raise TypeError( + f'Unexpected type annotation for {annotation.func}.') + # annotations with a subtype like Input[Artifact] + elif isinstance(annotation, ast.Subscript): + # get inner type + if PYTHON_VERSION < '3.9': + # see "Changed in version 3.9" https://docs.python.org/3/library/ast.html#node-classes + if isinstance(annotation.slice.value, ast.Name): + arg_to_ann[arg.arg] = annotation.slice.value.id + if isinstance(annotation.slice.value, ast.Attribute): + arg_to_ann[arg.arg] = annotation.slice.value.value.id + else: + if isinstance(annotation.slice, ast.Name): + arg_to_ann[arg.arg] = annotation.slice.id + if isinstance(annotation.slice, ast.Attribute): + arg_to_ann[arg.arg] = annotation.slice.value.id + # annotations like type_annotations.Input[Artifact] + elif isinstance(annotation, ast.Attribute): + arg_to_ann[arg.arg] = annotation.value.id + return arg_to_ann + + return convert_ast_arg_node_to_param_annotation_dict_for_artifacts(args) + + +def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: + """Gets a list of fully qualified names of the modules or types to import. + + Args: + func: The component function. + + Returns: + A list containing the fully qualified names of the module or type to import. + """ + + param_to_ann_obj = func_to_annotation_object(func) + param_to_ann_string = func_to_root_annotation_symbols_for_artifacts(func) + + import_items = [] + seen = set() + for param_name, ann_string in param_to_ann_string.items(): + # don't process the same annotation string multiple times + # use the string, not the object in the seen set, since the same object can be referenced different ways in the same function signature + if ann_string in seen: + continue + else: + seen.add(ann_string) + + actual_ann = param_to_ann_obj.get(param_name) + + if actual_ann is not None and type_annotations.is_artifact_annotation( + actual_ann): + artifact_class = type_annotations.get_io_artifact_class(actual_ann) + artifact_qualname = get_full_qualname_for_artifact(artifact_class) + if not artifact_qualname.startswith('kfp.'): + # kfp artifacts are already imported by default + import_items.append( + get_import_item_from_annotation_string_and_ann_obj( + ann_string, artifact_qualname)) + + return import_items diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/components/types/custom_artifact_types_test.py new file mode 100644 index 00000000000..45e983f0081 --- /dev/null +++ b/sdk/python/kfp/components/types/custom_artifact_types_test.py @@ -0,0 +1,600 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import os +import sys +import tempfile +import textwrap +import typing +from typing import Any +import unittest + +from absl.testing import parameterized +import kfp +from kfp import compiler +from kfp import dsl +from kfp.components.types import artifact_types +from kfp.components.types import custom_artifact_types +from kfp.components.types import type_annotations +from kfp.components.types.artifact_types import Artifact +from kfp.components.types.artifact_types import Dataset +from kfp.components.types.type_annotations import Input +from kfp.components.types.type_annotations import InputPath +from kfp.components.types.type_annotations import Output +from kfp.components.types.type_annotations import OutputPath +import typing_extensions +import yaml + +Alias = Artifact +artifact_types_alias = artifact_types + + +class TestFuncToRootAnnotationSymbol(unittest.TestCase): + + def test_no_annotations(self): + + def func(a, b): + pass + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {} + self.assertEqual(actual, expected) + + def test_no_return(self): + + def func(a: int, b: Input[Artifact]): + pass + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'Artifact'} + self.assertEqual(actual, expected) + + def test_with_return(self): + + def func(a: int, b: Input[Artifact]) -> int: + return 1 + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'Artifact'} + self.assertEqual(actual, expected) + + def test_multiline(self): + + def func( + a: int, + b: Input[Artifact], + ) -> int: + return 1 + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'Artifact'} + self.assertEqual(actual, expected) + + def test_alias(self): + + def func(a: int, b: Input[Alias]): + pass + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'Alias'} + self.assertEqual(actual, expected) + + def test_long_form_annotation(self): + + def func(a: int, b: Input[artifact_types.Artifact]): + pass + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'artifact_types'} + self.assertEqual(actual, expected) + + def test_named_tuple(self): + + def func( + a: int, + b: Input[Artifact], + ) -> typing.NamedTuple('MyNamedTuple', [('a', int), ( + 'b', Artifact), ('c', artifact_types.Artifact)]): + InnerNamedTuple = typing.NamedTuple( + 'MyNamedTuple', [('a', int), ('b', Artifact), + ('c', artifact_types.Artifact)]) + return InnerNamedTuple(a=a, b=b, c=b) # type: ignore + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'Artifact'} + self.assertEqual(actual, expected) + + def test_input_output_path(self): + + def func( + a: int, + b: InputPath('Dataset'), + ) -> OutputPath('Dataset'): + return 'dataset' + + actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( + func) + expected = {'b': 'InputPath'} + self.assertEqual(actual, expected) + + +class MyCustomArtifact: + schema_title = 'my_custom_artifact' + schema_version = '0.0.0' + + +class _TestCaseWithThirdPartyPackage(parameterized.TestCase): + + @classmethod + def setUpClass(cls): + + class ThirdPartyArtifact: + schema_title = 'custom.my_third_party_artifact' + schema_version = '0.0.0' + + class_source = textwrap.dedent(inspect.getsource(ThirdPartyArtifact)) + + tmp_dir = tempfile.TemporaryDirectory() + with open(os.path.join(tmp_dir.name, 'my_package.py'), 'w') as f: + f.write(class_source) + sys.path.append(tmp_dir.name) + cls.tmp_dir = tmp_dir + + @classmethod + def teardownClass(cls): + sys.path.pop() + cls.tmp_dir.cleanup() + + +class TestGetParamToAnnObj(unittest.TestCase): + + def test_no_named_tuple(self): + + def func( + a: int, + b: Input[Artifact], + ) -> int: + return 1 + + actual = custom_artifact_types.get_param_to_annotation_object(func) + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[Artifact, + type_annotations.InputAnnotation] + } + self.assertEqual(actual, expected) + + def test_named_tuple(self): + + MyNamedTuple = typing.NamedTuple('MyNamedTuple', [('a', int), + ('b', str)]) + + def func( + a: int, + b: Input[Artifact], + ) -> MyNamedTuple: + InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [('a', int), + ('b', str)]) + return InnerNamedTuple(a=a, b='string') # type: ignore + + actual = custom_artifact_types.get_param_to_annotation_object(func) + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[Artifact, + type_annotations.InputAnnotation] + } + self.assertEqual(actual, expected) + + def test_input_output_path(self): + + def func( + a: int, + b: InputPath('Dataset'), + ) -> OutputPath('Dataset'): + return 'dataset' + + actual = custom_artifact_types.get_param_to_annotation_object(func) + self.assertEqual(actual['a'], int) + self.assertIsInstance(actual['b'], InputPath) + + +class TestGetFullQualnameForArtifact(_TestCaseWithThirdPartyPackage): + # only gets called on artifacts, so don't need to test on all types + @parameterized.parameters([ + (Alias, 'kfp.components.types.artifact_types.Artifact'), + (Artifact, 'kfp.components.types.artifact_types.Artifact'), + (Dataset, 'kfp.components.types.artifact_types.Dataset'), + ]) + def test(self, obj: Any, expected_qualname: str): + self.assertEqual( + custom_artifact_types.get_full_qualname_for_artifact(obj), + expected_qualname) + + def test_my_package_artifact(self): + import my_package + self.assertEqual( + custom_artifact_types.get_full_qualname_for_artifact( + my_package.ThirdPartyArtifact), 'my_package.ThirdPartyArtifact') + + +class TestGetArtifactImportItemsFromFunction(_TestCaseWithThirdPartyPackage): + + def test_no_annotations(self): + + def func(a, b): + pass + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + expected = [] + self.assertEqual(actual, expected) + + def test_no_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + expected = ['my_package.ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + def test_with_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + return 1 + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + expected = ['my_package.ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + def test_multiline(self): + from my_package import ThirdPartyArtifact + + def func( + a: int, + b: Input[ThirdPartyArtifact], + ) -> int: + return 1 + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + expected = ['my_package.ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + def test_alias(self): + from my_package import ThirdPartyArtifact + Alias = ThirdPartyArtifact + + def func(a: int, b: Input[Alias]): + pass + + with self.assertRaisesRegex( + TypeError, r'Module or type name aliases are not supported'): + custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + + def test_long_form_annotation(self): + import my_package + + def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + expected = ['my_package'] + self.assertEqual(actual, expected) + + def test_aliased_module_throws_error(self): + import my_package as my_package_alias + + def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + pass + + with self.assertRaisesRegex( + TypeError, r'Module or type name aliases are not supported'): + custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + + def test_input_output_path(self): + from my_package import ThirdPartyArtifact + + def func( + a: int, + b: InputPath('Dataset'), + c: Output[ThirdPartyArtifact], + ) -> OutputPath('Dataset'): + return 'dataset' + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, ['my_package.ThirdPartyArtifact']) + + +class TestFuncToAnnotationObject(_TestCaseWithThirdPartyPackage): + + def test_no_annotations(self): + + def func(a, b): + pass + + actual = custom_artifact_types.func_to_annotation_object(func) + expected = {'a': inspect._empty, 'b': inspect._empty} + self.assertEqual(actual, expected) + + def test_no_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.func_to_annotation_object(func) + + import my_package + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.InputAnnotation] + } + self.assertEqual(actual, expected) + + def test_with_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + return 1 + + import my_package + actual = custom_artifact_types.func_to_annotation_object(func) + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.InputAnnotation] + } + self.assertEqual(actual, expected) + + def test_alias(self): + from my_package import ThirdPartyArtifact + Alias = ThirdPartyArtifact + + def func(a: int, b: Input[Alias]): + pass + + actual = custom_artifact_types.func_to_annotation_object(func) + + import my_package + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.InputAnnotation] + } + self.assertEqual(actual, expected) + + def test_long_form_annotation(self): + import my_package + + def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.func_to_annotation_object(func) + + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.OutputAnnotation] + } + self.assertEqual(actual, expected) + + def test_aliased_module(self): + import my_package as my_package_alias + + def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.func_to_annotation_object(func) + + import my_package + expected = { + 'a': + int, + 'b': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.OutputAnnotation] + } + self.assertEqual(actual, expected) + + def test_input_output_path(self): + from my_package import ThirdPartyArtifact + + def func( + a: int, + b: InputPath('Dataset'), + c: Output[ThirdPartyArtifact], + ) -> OutputPath('Dataset'): + return 'dataset' + + actual = custom_artifact_types.func_to_annotation_object(func) + + import my_package + expected = { + 'a': + int, + 'b': + kfp.components.types.type_annotations.InputPath('Dataset'), + 'c': + typing_extensions.Annotated[ + my_package.ThirdPartyArtifact, + kfp.components.types.type_annotations.OutputAnnotation] + } + self.assertEqual(actual, expected) + + +class TestGetCustomArtifactTypeImportStatements(_TestCaseWithThirdPartyPackage): + + def test_no_annotations(self): + + def func(a, b): + pass + + actual = custom_artifact_types.get_custom_artifact_type_import_statements( + func) + expected = [] + self.assertEqual(actual, expected) + + def test_no_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.get_custom_artifact_type_import_statements( + func) + + expected = ['from my_package import ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + def test_with_return(self): + from my_package import ThirdPartyArtifact + + def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + return 1 + + import my_package + actual = custom_artifact_types.get_custom_artifact_type_import_statements( + func) + expected = ['from my_package import ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + def test_alias(self): + from my_package import ThirdPartyArtifact + Alias = ThirdPartyArtifact + + def func(a: int, b: Input[Alias]): + pass + + with self.assertRaisesRegex( + TypeError, r'Module or type name aliases are not supported'): + custom_artifact_types.get_custom_artifact_type_import_statements( + func) + + def test_long_form_annotation(self): + import my_package + + def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + pass + + actual = custom_artifact_types.get_custom_artifact_type_import_statements( + func) + + expected = ['import my_package'] + self.assertEqual(actual, expected) + + def test_aliased_module(self): + import my_package as my_package_alias + + def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + pass + + with self.assertRaisesRegex( + TypeError, r'Module or type name aliases are not supported'): + custom_artifact_types.get_custom_artifact_type_import_statements( + func) + + def test_input_output_path(self): + from my_package import ThirdPartyArtifact + + def func( + a: int, + b: InputPath('Dataset'), + c: Output[ThirdPartyArtifact], + ) -> OutputPath('Dataset'): + return 'dataset' + + actual = custom_artifact_types.get_custom_artifact_type_import_statements( + func) + + expected = ['from my_package import ThirdPartyArtifact'] + self.assertEqual(actual, expected) + + +class TestImportStatementAdded(_TestCaseWithThirdPartyPackage): + + def test(self): + import my_package + from my_package import ThirdPartyArtifact + + @dsl.component + def one( + a: int, + b: Output[ThirdPartyArtifact], + ): + pass + + @dsl.component + def two(a: Input[my_package.ThirdPartyArtifact],): + pass + + @dsl.pipeline() + def my_pipeline(): + one_task = one(a=1) + two_task = two(a=one_task.outputs['b']) + + with tempfile.TemporaryDirectory() as tmpdir: + output_path = os.path.join(tmpdir, 'pipeline.yaml') + + compiler.Compiler().compile( + pipeline_func=my_pipeline, package_path=output_path) + with open(output_path) as f: + pipeline_spec = yaml.safe_load(f) + + self.assertIn( + 'from my_package import ThirdPartyArtifact', + ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-one'] + ['container']['command'])) + self.assertIn( + 'import my_package', + ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-two'] + ['container']['command'])) + + +if __name__ == '__main__': + unittest.main() From 2912e24aa5ebe7aa55078d666a245edd349c4175 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 31 Aug 2022 11:24:40 -0600 Subject: [PATCH 05/17] fix artifact class construction --- sdk/python/kfp/components/executor.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/components/executor.py index 714387410d7..332c7155485 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/components/executor.py @@ -67,7 +67,14 @@ def make_artifact( func: Callable, ) -> Any: artifact_cls = func.__annotations__.get(name) - return create_artifact_instance(runtime_artifact, artifact_cls) + if type_annotations.is_artifact(artifact_cls): + # handles artifacts + return create_artifact_instance( + runtime_artifact, artifact_cls=artifact_cls) + else: + # handles InputPath and OutputPath + return create_artifact_instance( + runtime_artifact, artifact_cls=artifact_types.Artifact) def makedirs_recursively(self, path: str) -> None: os.makedirs(os.path.dirname(path), exist_ok=True) @@ -317,10 +324,9 @@ def create_artifact_instance( dictionary.""" schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') - artifact_type = artifact_types._SCHEMA_TITLE_TO_TYPE.get(schema_title) - if not artifact_type: - artifact_type = artifact_cls or artifact_types.Artifact - return artifact_type( + artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE.get( + schema_title) or artifact_cls or artifact_types.Artifact + return artifact_cls( uri=runtime_artifact.get('uri', ''), name=runtime_artifact.get('name', ''), metadata=runtime_artifact.get('metadata', {}), From 8cd018b5945d4283eb012efd10f8b263f858acd9 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 31 Aug 2022 11:52:55 -0600 Subject: [PATCH 06/17] fix custom artifact type in tests --- sdk/python/kfp/components/executor_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index 7a61e23c2e9..6670dd757d5 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -112,7 +112,8 @@ def test_input_artifact_custom_type(self): """ class VertexDataset: - TYPE_NAME = 'google.VertexDataset' + schema_title = 'google.VertexDataset' + schema_version = '0.0.0' def __init__(self, name: str, uri: str, metadata: dict) -> None: self.name = name @@ -121,15 +122,15 @@ def __init__(self, name: str, uri: str, metadata: dict) -> None: @property def path(self) -> str: - return self.uri.replace('gs://', '/gcs/') + return self.uri.replace('gs://', + artifact_types._GCS_LOCAL_MOUNT_PREFIX) def test_func(input_artifact_one: Input[VertexDataset]): self.assertEqual(input_artifact_one.uri, 'gs://some-bucket/input_artifact_one') self.assertEqual( input_artifact_one.path, - os.path.join(self._test_dir, - '/gcs/some-bucket/input_artifact_one')) + os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) self.assertEqual(input_artifact_one.name, 'input_artifact_one') self.assertIsInstance(input_artifact_one, VertexDataset) @@ -832,7 +833,7 @@ def __init__(self, name: str, uri: str, metadata: dict) -> None: @property def path(self) -> str: - return self.uri.replace('gs://', '/gcs/') + return self.uri.replace('gs://', artifact_types._GCS_LOCAL_MOUNT_PREFIX) class TestDictToArtifact(parameterized.TestCase): From a71c26c66678908b05d5a22d80e88b3e2d0ab7a1 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 30 Aug 2022 11:57:02 -0600 Subject: [PATCH 07/17] add typing extensions dependency for all python versions --- sdk/python/requirements.in | 4 +--- sdk/python/requirements.txt | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index 77b764f4dc4..4707c66d6f3 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -28,6 +28,7 @@ protobuf>=3.13.0,<4 PyYAML>=5.3,<6 requests-toolbelt>=0.8.0,<1 tabulate>=0.8.6,<1 +typing-extensions>=3.7.4,<5 ## kfp.deprecated ## cloudpickle>=2.0.0,<3 @@ -39,6 +40,3 @@ uritemplate>=3.0.1,<4 ## kfp.deprecated.cli ## typer>=0.3.2,<1.0 - -## standard library backports ## -typing-extensions>=3.7.4,<5; python_version<"3.9" diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 7478c5f6703..8b73b75729e 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with python 3.9 +# This file is autogenerated by pip-compile with python 3.7 # To update, run: # -# pip-compile +# pip-compile --no-emit-index-url requirements.in # absl-py==1.2.0 # via -r requirements.in @@ -34,7 +34,7 @@ google-api-core==2.8.2 # -r requirements.in # google-cloud-core # google-cloud-storage -google-auth==2.10.0 +google-auth==2.11.0 # via # -r requirements.in # google-api-core @@ -45,7 +45,7 @@ google-cloud-core==2.3.2 # via google-cloud-storage google-cloud-storage==2.5.0 # via -r requirements.in -google-crc32c==1.3.0 +google-crc32c==1.5.0 # via google-resumable-media google-resumable-media==2.3.3 # via google-cloud-storage @@ -53,11 +53,15 @@ googleapis-common-protos==1.56.4 # via google-api-core idna==3.3 # via requests +importlib-metadata==4.12.0 + # via + # click + # jsonschema jsonschema==3.2.0 # via -r requirements.in kfp-pipeline-spec==0.1.16 # via -r requirements.in -kfp-server-api==2.0.0a3 +kfp-server-api==2.0.0a4 # via -r requirements.in kubernetes==23.6.0 # via -r requirements.in @@ -114,19 +118,25 @@ termcolor==1.1.0 # via fire typer==0.6.1 # via -r requirements.in +typing-extensions==4.3.0 + # via + # -r requirements.in + # importlib-metadata uritemplate==3.0.1 # via -r requirements.in -urllib3==1.26.11 +urllib3==1.26.12 # via # kfp-server-api # kubernetes # requests -websocket-client==1.3.3 +websocket-client==1.4.0 # via kubernetes wheel==0.37.1 # via strip-hints wrapt==1.14.1 # via deprecated +zipp==3.8.1 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From 2dd1d94e93867c27fe39caf01a09eec0e837a102 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Fri, 2 Sep 2022 17:19:54 -0600 Subject: [PATCH 08/17] use mock google namespace artifact for tests --- .../types/custom_artifact_types_test.py | 149 +++++++++--------- 1 file changed, 72 insertions(+), 77 deletions(-) diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/components/types/custom_artifact_types_test.py index 45e983f0081..d913a05bb04 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types_test.py +++ b/sdk/python/kfp/components/types/custom_artifact_types_test.py @@ -137,24 +137,19 @@ def func( self.assertEqual(actual, expected) -class MyCustomArtifact: - schema_title = 'my_custom_artifact' - schema_version = '0.0.0' - - class _TestCaseWithThirdPartyPackage(parameterized.TestCase): @classmethod def setUpClass(cls): - class ThirdPartyArtifact: - schema_title = 'custom.my_third_party_artifact' + class VertexDataset: + schema_title = 'google.VertexDataset' schema_version = '0.0.0' - class_source = textwrap.dedent(inspect.getsource(ThirdPartyArtifact)) + class_source = textwrap.dedent(inspect.getsource(VertexDataset)) tmp_dir = tempfile.TemporaryDirectory() - with open(os.path.join(tmp_dir.name, 'my_package.py'), 'w') as f: + with open(os.path.join(tmp_dir.name, 'aiplatform.py'), 'w') as f: f.write(class_source) sys.path.append(tmp_dir.name) cls.tmp_dir = tmp_dir @@ -233,11 +228,11 @@ def test(self, obj: Any, expected_qualname: str): custom_artifact_types.get_full_qualname_for_artifact(obj), expected_qualname) - def test_my_package_artifact(self): - import my_package + def test_aiplatform_artifact(self): + import aiplatform self.assertEqual( custom_artifact_types.get_full_qualname_for_artifact( - my_package.ThirdPartyArtifact), 'my_package.ThirdPartyArtifact') + aiplatform.VertexDataset), 'aiplatform.VertexDataset') class TestGetArtifactImportItemsFromFunction(_TestCaseWithThirdPartyPackage): @@ -253,44 +248,44 @@ def func(a, b): self.assertEqual(actual, expected) def test_no_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]): + def func(a: int, b: Input[VertexDataset]): pass actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = ['my_package.ThirdPartyArtifact'] + expected = ['aiplatform.VertexDataset'] self.assertEqual(actual, expected) def test_with_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + def func(a: int, b: Input[VertexDataset]) -> int: return 1 actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = ['my_package.ThirdPartyArtifact'] + expected = ['aiplatform.VertexDataset'] self.assertEqual(actual, expected) def test_multiline(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset def func( a: int, - b: Input[ThirdPartyArtifact], + b: Input[VertexDataset], ) -> int: return 1 actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = ['my_package.ThirdPartyArtifact'] + expected = ['aiplatform.VertexDataset'] self.assertEqual(actual, expected) def test_alias(self): - from my_package import ThirdPartyArtifact - Alias = ThirdPartyArtifact + from aiplatform import VertexDataset + Alias = VertexDataset def func(a: int, b: Input[Alias]): pass @@ -301,20 +296,20 @@ def func(a: int, b: Input[Alias]): func) def test_long_form_annotation(self): - import my_package + import aiplatform - def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform.VertexDataset]): pass actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = ['my_package'] + expected = ['aiplatform'] self.assertEqual(actual, expected) def test_aliased_module_throws_error(self): - import my_package as my_package_alias + import aiplatform as aiplatform_alias - def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform_alias.VertexDataset]): pass with self.assertRaisesRegex( @@ -323,18 +318,18 @@ def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): func) def test_input_output_path(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset def func( a: int, b: InputPath('Dataset'), - c: Output[ThirdPartyArtifact], + c: Output[VertexDataset], ) -> OutputPath('Dataset'): return 'dataset' actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - self.assertEqual(actual, ['my_package.ThirdPartyArtifact']) + self.assertEqual(actual, ['aiplatform.VertexDataset']) class TestFuncToAnnotationObject(_TestCaseWithThirdPartyPackage): @@ -349,66 +344,66 @@ def func(a, b): self.assertEqual(actual, expected) def test_no_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]): + def func(a: int, b: Input[VertexDataset]): pass actual = custom_artifact_types.func_to_annotation_object(func) - import my_package + import aiplatform expected = { 'a': int, 'b': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.InputAnnotation] } self.assertEqual(actual, expected) def test_with_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + def func(a: int, b: Input[VertexDataset]) -> int: return 1 - import my_package + import aiplatform actual = custom_artifact_types.func_to_annotation_object(func) expected = { 'a': int, 'b': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.InputAnnotation] } self.assertEqual(actual, expected) def test_alias(self): - from my_package import ThirdPartyArtifact - Alias = ThirdPartyArtifact + from aiplatform import VertexDataset + Alias = VertexDataset def func(a: int, b: Input[Alias]): pass actual = custom_artifact_types.func_to_annotation_object(func) - import my_package + import aiplatform expected = { 'a': int, 'b': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.InputAnnotation] } self.assertEqual(actual, expected) def test_long_form_annotation(self): - import my_package + import aiplatform - def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform.VertexDataset]): pass actual = custom_artifact_types.func_to_annotation_object(func) @@ -418,43 +413,43 @@ def func(a: int, b: Output[my_package.ThirdPartyArtifact]): int, 'b': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.OutputAnnotation] } self.assertEqual(actual, expected) def test_aliased_module(self): - import my_package as my_package_alias + import aiplatform as aiplatform_alias - def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform_alias.VertexDataset]): pass actual = custom_artifact_types.func_to_annotation_object(func) - import my_package + import aiplatform expected = { 'a': int, 'b': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.OutputAnnotation] } self.assertEqual(actual, expected) def test_input_output_path(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset def func( a: int, b: InputPath('Dataset'), - c: Output[ThirdPartyArtifact], + c: Output[VertexDataset], ) -> OutputPath('Dataset'): return 'dataset' actual = custom_artifact_types.func_to_annotation_object(func) - import my_package + import aiplatform expected = { 'a': int, @@ -462,7 +457,7 @@ def func( kfp.components.types.type_annotations.InputPath('Dataset'), 'c': typing_extensions.Annotated[ - my_package.ThirdPartyArtifact, + aiplatform.VertexDataset, kfp.components.types.type_annotations.OutputAnnotation] } self.assertEqual(actual, expected) @@ -481,32 +476,32 @@ def func(a, b): self.assertEqual(actual, expected) def test_no_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]): + def func(a: int, b: Input[VertexDataset]): pass actual = custom_artifact_types.get_custom_artifact_type_import_statements( func) - expected = ['from my_package import ThirdPartyArtifact'] + expected = ['from aiplatform import VertexDataset'] self.assertEqual(actual, expected) def test_with_return(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset - def func(a: int, b: Input[ThirdPartyArtifact]) -> int: + def func(a: int, b: Input[VertexDataset]) -> int: return 1 - import my_package + import aiplatform actual = custom_artifact_types.get_custom_artifact_type_import_statements( func) - expected = ['from my_package import ThirdPartyArtifact'] + expected = ['from aiplatform import VertexDataset'] self.assertEqual(actual, expected) def test_alias(self): - from my_package import ThirdPartyArtifact - Alias = ThirdPartyArtifact + from aiplatform import VertexDataset + Alias = VertexDataset def func(a: int, b: Input[Alias]): pass @@ -517,21 +512,21 @@ def func(a: int, b: Input[Alias]): func) def test_long_form_annotation(self): - import my_package + import aiplatform - def func(a: int, b: Output[my_package.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform.VertexDataset]): pass actual = custom_artifact_types.get_custom_artifact_type_import_statements( func) - expected = ['import my_package'] + expected = ['import aiplatform'] self.assertEqual(actual, expected) def test_aliased_module(self): - import my_package as my_package_alias + import aiplatform as aiplatform_alias - def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): + def func(a: int, b: Output[aiplatform_alias.VertexDataset]): pass with self.assertRaisesRegex( @@ -540,37 +535,37 @@ def func(a: int, b: Output[my_package_alias.ThirdPartyArtifact]): func) def test_input_output_path(self): - from my_package import ThirdPartyArtifact + from aiplatform import VertexDataset def func( a: int, b: InputPath('Dataset'), - c: Output[ThirdPartyArtifact], + c: Output[VertexDataset], ) -> OutputPath('Dataset'): return 'dataset' actual = custom_artifact_types.get_custom_artifact_type_import_statements( func) - expected = ['from my_package import ThirdPartyArtifact'] + expected = ['from aiplatform import VertexDataset'] self.assertEqual(actual, expected) class TestImportStatementAdded(_TestCaseWithThirdPartyPackage): def test(self): - import my_package - from my_package import ThirdPartyArtifact + import aiplatform + from aiplatform import VertexDataset @dsl.component def one( a: int, - b: Output[ThirdPartyArtifact], + b: Output[VertexDataset], ): pass @dsl.component - def two(a: Input[my_package.ThirdPartyArtifact],): + def two(a: Input[aiplatform.VertexDataset],): pass @dsl.pipeline() @@ -587,11 +582,11 @@ def my_pipeline(): pipeline_spec = yaml.safe_load(f) self.assertIn( - 'from my_package import ThirdPartyArtifact', + 'from aiplatform import VertexDataset', ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-one'] ['container']['command'])) self.assertIn( - 'import my_package', + 'import aiplatform', ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-two'] ['container']['command'])) From 29cf5de2c94ec432538a0ad74abcbd97e4cbb4f9 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Mon, 12 Sep 2022 14:15:48 -0600 Subject: [PATCH 09/17] remove print statement --- sdk/python/kfp/components/structures.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/python/kfp/components/structures.py b/sdk/python/kfp/components/structures.py index 7900df48cfa..6c5a81a5b79 100644 --- a/sdk/python/kfp/components/structures.py +++ b/sdk/python/kfp/components/structures.py @@ -629,7 +629,6 @@ def from_v1_component_spec( inputs = {} for spec in component_dict.get('inputs', []): type_ = spec.get('type') - print('TYPE', type_) if isinstance(type_, str) and type_ == 'PipelineTaskFinalStatus': inputs[utils.sanitize_input_name( From 779e0c291d67c31f1d0d37fb4763cc509f7c0a0a Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Mon, 12 Sep 2022 14:18:37 -0600 Subject: [PATCH 10/17] update google artifact golden snapshot --- .../pipelines/pipeline_with_google_artifact_type.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index 25f40272820..a45bb465a21 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -66,8 +66,9 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef model_consumer(model: Input[VertexModel],\n \ - \ dataset: Input[VertexDataset]):\n print('Model')\n print('artifact.type:\ + \ *\nfrom aiplatform import VertexModel\nfrom aiplatform import VertexDataset\n\ + \ndef model_consumer(model: Input[VertexModel],\n dataset:\ + \ Input[VertexDataset]):\n print('Model')\n print('artifact.type:\ \ ', type(model))\n print('artifact.name: ', model.name)\n print('artifact.uri:\ \ ', model.uri)\n print('artifact.metadata: ', model.metadata)\n\n \ \ print('Dataset')\n print('artifact.type: ', type(dataset))\n print('artifact.name:\ @@ -98,9 +99,9 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef model_producer(model: Output[aiplatform.VertexModel]):\n\n \ - \ assert isinstance(model, aiplatform.VertexModel), type(model)\n with\ - \ open(model.path, 'w') as f:\n f.write('my model')\n\n" + \ *\nimport aiplatform\n\ndef model_producer(model: Output[aiplatform.VertexModel]):\n\ + \n assert isinstance(model, aiplatform.VertexModel), type(model)\n \ + \ with open(model.path, 'w') as f:\n f.write('my model')\n\n" image: python:3.7 pipelineInfo: name: pipeline-with-google-types From 9d48368104a1fad59abe76fa3c57a4437d1ecd2c Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 13 Sep 2022 10:42:01 -0600 Subject: [PATCH 11/17] resolve some review feedback --- sdk/python/kfp/components/component_factory.py | 8 ++++---- sdk/python/kfp/components/executor.py | 10 +++++----- sdk/python/kfp/components/executor_test.py | 17 +++++++---------- .../components/types/custom_artifact_types.py | 4 +++- .../kfp/components/types/type_annotations.py | 4 ++-- .../components/types/type_annotations_test.py | 10 +++++----- sdk/python/kfp/components/types/type_utils.py | 2 +- sdk/python/requirements.in | 4 +++- sdk/python/requirements.txt | 2 +- 9 files changed, 31 insertions(+), 30 deletions(-) diff --git a/sdk/python/kfp/components/component_factory.py b/sdk/python/kfp/components/component_factory.py index 0e91acda7d6..01bf5f2be49 100644 --- a/sdk/python/kfp/components/component_factory.py +++ b/sdk/python/kfp/components/component_factory.py @@ -172,7 +172,7 @@ def extract_component_interface( # parameter_type is type_annotations.Artifact or one of its subclasses. parameter_type = type_annotations.get_io_artifact_class( parameter_type) - if not type_annotations.is_artifact(parameter_type): + if not type_annotations.is_artifact_class(parameter_type): raise ValueError( 'Input[T] and Output[T] are only supported when T is a ' 'subclass of Artifact. Found `{} with type {}`'.format( @@ -204,7 +204,7 @@ def extract_component_interface( ]: io_name = _maybe_make_unique(io_name, output_names) output_names.add(io_name) - if type_annotations.is_artifact(parameter_type): + if type_annotations.is_artifact_class(parameter_type): schema_version = parameter_type.schema_version output_spec = structures.OutputSpec( type=type_utils.create_bundled_artifact_type( @@ -215,7 +215,7 @@ def extract_component_interface( else: io_name = _maybe_make_unique(io_name, input_names) input_names.add(io_name) - if type_annotations.is_artifact(parameter_type): + if type_annotations.is_artifact_class(parameter_type): schema_version = parameter_type.schema_version input_spec = structures.InputSpec( type=type_utils.create_bundled_artifact_type( @@ -278,7 +278,7 @@ def extract_component_interface( # `def func(output_path: OutputPath()) -> str: ...` output_names.add(output_name) return_ann = signature.return_annotation - if type_annotations.is_artifact(signature.return_annotation): + if type_annotations.is_artifact_class(signature.return_annotation): output_spec = structures.OutputSpec( type=type_utils.create_bundled_artifact_type( return_ann.schema_title, return_ann.schema_version)) diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/components/executor.py index 332c7155485..7ec053c9f07 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/components/executor.py @@ -67,7 +67,7 @@ def make_artifact( func: Callable, ) -> Any: artifact_cls = func.__annotations__.get(name) - if type_annotations.is_artifact(artifact_cls): + if type_annotations.is_artifact_class(artifact_cls): # handles artifacts return create_artifact_instance( runtime_artifact, artifact_cls=artifact_cls) @@ -187,7 +187,7 @@ def _is_parameter(cls, annotation: Any) -> bool: @classmethod def _is_artifact(cls, annotation: Any) -> bool: if type(annotation) == type: - return type_annotations.is_artifact(annotation) + return type_annotations.is_artifact_class(annotation) return False @classmethod @@ -318,14 +318,14 @@ def execute(self): def create_artifact_instance( runtime_artifact: Dict, - artifact_cls=None, -) -> artifact_types.Artifact: + artifact_cls=artifact_types.Artifact, +) -> type: """Creates an artifact class instances from a runtime artifact dictionary.""" schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE.get( - schema_title) or artifact_cls or artifact_types.Artifact + schema_title, artifact_cls) return artifact_cls( uri=runtime_artifact.get('uri', ''), name=runtime_artifact.get('name', ''), diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index 6670dd757d5..d59556d7ba7 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -43,8 +43,8 @@ def setUp(cls): artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls._test_dir + '/minio/' artifact_types._S3_LOCAL_MOUNT_PREFIX = cls._test_dir + '/s3/' - def execute_and_load_output_metadata( - self, func: Callable, executor_input: str) -> executor.Executor: + def execute_and_load_output_metadata(self, func: Callable, + executor_input: str) -> dict: executor_input_dict = json.loads(executor_input % {'test_dir': self._test_dir}) @@ -55,7 +55,7 @@ def execute_and_load_output_metadata( 'r') as f: return json.loads(f.read()) - def test_input_parameter(self): + def test_input_and_output_parameters(self): executor_input = """\ { "inputs": { @@ -95,7 +95,7 @@ def test_input_artifact_custom_type(self): "artifacts": [ { "metadata": {}, - "name": "input_artifact_one", + "name": "input_artifact_one_name", "type": { "schemaTitle": "google.VertexDataset" }, @@ -130,8 +130,9 @@ def test_func(input_artifact_one: Input[VertexDataset]): 'gs://some-bucket/input_artifact_one') self.assertEqual( input_artifact_one.path, - os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self.assertEqual(input_artifact_one.name, 'input_artifact_one') + os.path.join(artifact_types._GCS_LOCAL_MOUNT_PREFIX, + 'some-bucket/input_artifact_one')) + self.assertEqual(input_artifact_one.name, 'input_artifact_one_name') self.assertIsInstance(input_artifact_one, VertexDataset) self.execute_and_load_output_metadata(test_func, executor_input) @@ -140,8 +141,6 @@ def test_input_artifact(self): executor_input = """\ { "inputs": { - "parameterValues": { - }, "artifacts": { "input_artifact_one": { "artifacts": [ @@ -177,8 +176,6 @@ def test_func(input_artifact_one: Input[Dataset]): def test_output_artifact(self): executor_input = """\ { - "inputs": { - }, "outputs": { "artifacts": { "output_artifact_one": { diff --git a/sdk/python/kfp/components/types/custom_artifact_types.py b/sdk/python/kfp/components/types/custom_artifact_types.py index c9cc434a317..811d97790fd 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types.py +++ b/sdk/python/kfp/components/types/custom_artifact_types.py @@ -58,7 +58,9 @@ def get_full_qualname_for_artifact(obj: type) -> str: """Gets the fully qualified name for an object. For example, for class Foo in module bar.baz, this function returns bar.baz.Foo. - Note: typing.get_type_hints purports to do the same thing, but it behaves differently when executed within the scope of a test, so preferring this approach instead. + Note: typing.get_type_hints purports to do the same thing, but it behaves + differently when executed within the scope of a test, so preferring this + approach instead. Args: obj: The class or module for which to get the fully qualified name. diff --git a/sdk/python/kfp/components/types/type_annotations.py b/sdk/python/kfp/components/types/type_annotations.py index 95e54a6a150..74ed3170786 100644 --- a/sdk/python/kfp/components/types/type_annotations.py +++ b/sdk/python/kfp/components/types/type_annotations.py @@ -106,7 +106,7 @@ def __eq__(self, other): def construct_type_for_inputpath_or_outputpath( type_: Union[str, Type, None]) -> Union[str, None]: - if type_annotations.is_artifact(type_): + if type_annotations.is_artifact_class(type_): return type_utils.create_bundled_artifact_type(type_.schema_title, type_.schema_version) elif isinstance( @@ -274,7 +274,7 @@ def get_short_type_name(type_name: str) -> str: return type_name -def is_artifact(artifact_class_or_instance: Type) -> bool: +def is_artifact_class(artifact_class_or_instance: Type) -> bool: # we do not yet support non-pre-registered custom artifact types with instance_schema attribute return hasattr(artifact_class_or_instance, 'schema_title') and hasattr( artifact_class_or_instance, 'schema_version') diff --git a/sdk/python/kfp/components/types/type_annotations_test.py b/sdk/python/kfp/components/types/type_annotations_test.py index 645ed78274e..abd5b1680e3 100644 --- a/sdk/python/kfp/components/types/type_annotations_test.py +++ b/sdk/python/kfp/components/types/type_annotations_test.py @@ -161,31 +161,31 @@ class TestIsArtifact(parameterized.TestCase): 'obj': obj } for obj in artifact_types._SCHEMA_TITLE_TO_TYPE.values()]) def test_true_class(self, obj): - self.assertTrue(type_annotations.is_artifact(obj)) + self.assertTrue(type_annotations.is_artifact_class(obj)) @parameterized.parameters([{ 'obj': obj(name='name', uri='uri', metadata={}) } for obj in artifact_types._SCHEMA_TITLE_TO_TYPE.values()]) def test_true_instance(self, obj): - self.assertTrue(type_annotations.is_artifact(obj)) + self.assertTrue(type_annotations.is_artifact_class(obj)) @parameterized.parameters([{'obj': 'string'}, {'obj': 1}, {'obj': int}]) def test_false(self, obj): - self.assertFalse(type_annotations.is_artifact(obj)) + self.assertFalse(type_annotations.is_artifact_class(obj)) def test_false_no_schema_title(self): class NotArtifact: schema_version = '' - self.assertFalse(type_annotations.is_artifact(NotArtifact)) + self.assertFalse(type_annotations.is_artifact_class(NotArtifact)) def test_false_no_schema_version(self): class NotArtifact: schema_title = '' - self.assertFalse(type_annotations.is_artifact(NotArtifact)) + self.assertFalse(type_annotations.is_artifact_class(NotArtifact)) if __name__ == '__main__': diff --git a/sdk/python/kfp/components/types/type_utils.py b/sdk/python/kfp/components/types/type_utils.py index 1fac3359d1a..0fe48fcf07f 100644 --- a/sdk/python/kfp/components/types/type_utils.py +++ b/sdk/python/kfp/components/types/type_utils.py @@ -413,7 +413,7 @@ def _annotation_to_type_struct(annotation): type_struct = get_canonical_type_name_for_type(annotation) if type_struct: return type_struct - elif type_annotations.is_artifact(annotation): + elif type_annotations.is_artifact_class(annotation): schema_title = annotation.schema_title else: schema_title = str(annotation.__name__) diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index 4707c66d6f3..77b764f4dc4 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -28,7 +28,6 @@ protobuf>=3.13.0,<4 PyYAML>=5.3,<6 requests-toolbelt>=0.8.0,<1 tabulate>=0.8.6,<1 -typing-extensions>=3.7.4,<5 ## kfp.deprecated ## cloudpickle>=2.0.0,<3 @@ -40,3 +39,6 @@ uritemplate>=3.0.1,<4 ## kfp.deprecated.cli ## typer>=0.3.2,<1.0 + +## standard library backports ## +typing-extensions>=3.7.4,<5; python_version<"3.9" diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 8b73b75729e..3820e811a9c 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -118,7 +118,7 @@ termcolor==1.1.0 # via fire typer==0.6.1 # via -r requirements.in -typing-extensions==4.3.0 +typing-extensions==4.3.0 ; python_version < "3.9" # via # -r requirements.in # importlib-metadata From 53fbe1b7f0f0e23f5cff295e8cf0fa4611ada7c5 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 13 Sep 2022 11:56:45 -0600 Subject: [PATCH 12/17] remove handling for OutputPath and InputPath custom artifact types; update function names and tests --- sdk/python/kfp/components/executor.py | 10 +- .../components/types/custom_artifact_types.py | 36 ++-- .../types/custom_artifact_types_test.py | 181 +++++++++++------- 3 files changed, 123 insertions(+), 104 deletions(-) diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/components/executor.py index 7ec053c9f07..2c367131d2d 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/components/executor.py @@ -67,14 +67,8 @@ def make_artifact( func: Callable, ) -> Any: artifact_cls = func.__annotations__.get(name) - if type_annotations.is_artifact_class(artifact_cls): - # handles artifacts - return create_artifact_instance( - runtime_artifact, artifact_cls=artifact_cls) - else: - # handles InputPath and OutputPath - return create_artifact_instance( - runtime_artifact, artifact_cls=artifact_types.Artifact) + return create_artifact_instance( + runtime_artifact, artifact_cls=artifact_cls) def makedirs_recursively(self, path: str) -> None: os.makedirs(os.path.dirname(path), exist_ok=True) diff --git a/sdk/python/kfp/components/types/custom_artifact_types.py b/sdk/python/kfp/components/types/custom_artifact_types.py index 811d97790fd..020cecc9c68 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types.py +++ b/sdk/python/kfp/components/types/custom_artifact_types.py @@ -75,22 +75,22 @@ def get_full_qualname_for_artifact(obj: type) -> str: return name -def get_import_item_from_annotation_string_and_ann_obj(artifact_str: str, - qualname: str) -> str: - """Gets the fully qualified names of the module or type to import from the - annotation string and the annotation object. +def get_symbol_import_path(artifact_class_base_symbol: str, + qualname: str) -> str: + """Gets the fully qualified names of the module or type to import. Args: - artifact_str: The type annotation string. + artifact_class_base_symbol: The base symbol from which the artifact class is referenced (e.g., aiplatform for aiplatform.VertexDataset). qualname: The fully qualified type annotation name as a string. Returns: The fully qualified names of the module or type to import. """ split_qualname = qualname.split('.') - if artifact_str in split_qualname: + if artifact_class_base_symbol in split_qualname: name_to_import = '.'.join( - split_qualname[:split_qualname.index(artifact_str) + 1]) + split_qualname[:split_qualname.index(artifact_class_base_symbol) + + 1]) else: raise TypeError( f"Module or type name aliases are not supported. You appear to be using an alias in your type annotation: '{qualname}'. This may be due to use of an 'as' statement in an import statement or a reassignment of a module or type to a new name. Reference the module and/or type using the name as defined in the source from which the module or type is imported." @@ -107,8 +107,7 @@ def func_to_annotation_object(func: Callable) -> Dict[str, str]: } -def func_to_root_annotation_symbols_for_artifacts( - func: Callable) -> Dict[str, str]: +def func_to_artifact_class_base_symbol(func: Callable) -> Dict[str, str]: """Gets a map of parameter name to the root symbol used to reference an annotation for a function. @@ -134,17 +133,8 @@ def convert_ast_arg_node_to_param_annotation_dict_for_artifacts( arg_to_ann = {} for arg in args: annotation = arg.annotation - # Handle InputPath() and OutputPath() - if isinstance(annotation, ast.Call): - if isinstance(annotation.func, ast.Name): - arg_to_ann[arg.arg] = annotation.func.id - elif isinstance(annotation.func, ast.Attribute): - arg_to_ann[arg.arg] = annotation.func.value.id - else: - raise TypeError( - f'Unexpected type annotation for {annotation.func}.') # annotations with a subtype like Input[Artifact] - elif isinstance(annotation, ast.Subscript): + if isinstance(annotation, ast.Subscript): # get inner type if PYTHON_VERSION < '3.9': # see "Changed in version 3.9" https://docs.python.org/3/library/ast.html#node-classes @@ -157,9 +147,6 @@ def convert_ast_arg_node_to_param_annotation_dict_for_artifacts( arg_to_ann[arg.arg] = annotation.slice.id if isinstance(annotation.slice, ast.Attribute): arg_to_ann[arg.arg] = annotation.slice.value.id - # annotations like type_annotations.Input[Artifact] - elif isinstance(annotation, ast.Attribute): - arg_to_ann[arg.arg] = annotation.value.id return arg_to_ann return convert_ast_arg_node_to_param_annotation_dict_for_artifacts(args) @@ -176,7 +163,7 @@ def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: """ param_to_ann_obj = func_to_annotation_object(func) - param_to_ann_string = func_to_root_annotation_symbols_for_artifacts(func) + param_to_ann_string = func_to_artifact_class_base_symbol(func) import_items = [] seen = set() @@ -197,7 +184,6 @@ def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: if not artifact_qualname.startswith('kfp.'): # kfp artifacts are already imported by default import_items.append( - get_import_item_from_annotation_string_and_ann_obj( - ann_string, artifact_qualname)) + get_symbol_import_path(ann_string, artifact_qualname)) return import_items diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/components/types/custom_artifact_types_test.py index d913a05bb04..dbd1ac6666c 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types_test.py +++ b/sdk/python/kfp/components/types/custom_artifact_types_test.py @@ -41,15 +41,50 @@ artifact_types_alias = artifact_types -class TestFuncToRootAnnotationSymbol(unittest.TestCase): +class _TestCaseWithThirdPartyPackage(parameterized.TestCase): + + @classmethod + def setUpClass(cls): + + class VertexDataset: + schema_title = 'google.VertexDataset' + schema_version = '0.0.0' + + class_source = textwrap.dedent(inspect.getsource(VertexDataset)) + + tmp_dir = tempfile.TemporaryDirectory() + with open(os.path.join(tmp_dir.name, 'aiplatform.py'), 'w') as f: + f.write(class_source) + sys.path.append(tmp_dir.name) + cls.tmp_dir = tmp_dir + + @classmethod + def teardownClass(cls): + sys.path.pop() + cls.tmp_dir.cleanup() + + +class TestFuncToRootAnnotationSymbol(_TestCaseWithThirdPartyPackage): def test_no_annotations(self): def func(a, b): pass - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {} + self.assertEqual(actual, expected) + + def test_input_output_path(self): + + def func( + a: int, + b: InputPath('Dataset'), + c: OutputPath('Dataset'), + ) -> str: + return 'dataset' + + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {} self.assertEqual(actual, expected) @@ -58,8 +93,7 @@ def test_no_return(self): def func(a: int, b: Input[Artifact]): pass - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {'b': 'Artifact'} self.assertEqual(actual, expected) @@ -68,22 +102,17 @@ def test_with_return(self): def func(a: int, b: Input[Artifact]) -> int: return 1 - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {'b': 'Artifact'} self.assertEqual(actual, expected) - def test_multiline(self): + def test_module_base_symbol(self): - def func( - a: int, - b: Input[Artifact], - ) -> int: - return 1 + def func(a: int, b: Input[artifact_types.Artifact]): + pass - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) - expected = {'b': 'Artifact'} + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'artifact_types'} self.assertEqual(actual, expected) def test_alias(self): @@ -91,21 +120,10 @@ def test_alias(self): def func(a: int, b: Input[Alias]): pass - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {'b': 'Alias'} self.assertEqual(actual, expected) - def test_long_form_annotation(self): - - def func(a: int, b: Input[artifact_types.Artifact]): - pass - - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) - expected = {'b': 'artifact_types'} - self.assertEqual(actual, expected) - def test_named_tuple(self): def func( @@ -118,46 +136,61 @@ def func( ('c', artifact_types.Artifact)]) return InnerNamedTuple(a=a, b=b, c=b) # type: ignore - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {'b': 'Artifact'} self.assertEqual(actual, expected) - def test_input_output_path(self): + def test_google_type_class_symbol(self): + from aiplatform import VertexDataset - def func( - a: int, - b: InputPath('Dataset'), - ) -> OutputPath('Dataset'): - return 'dataset' + def func(a: int, b: Input[VertexDataset]) -> int: + return 1 - actual = custom_artifact_types.func_to_root_annotation_symbols_for_artifacts( - func) - expected = {'b': 'InputPath'} + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'VertexDataset'} self.assertEqual(actual, expected) + def test_google_type_module_base_symbol(self): + import aiplatform -class _TestCaseWithThirdPartyPackage(parameterized.TestCase): + def func(a: int, b: Input[aiplatform.VertexDataset]): + pass - @classmethod - def setUpClass(cls): + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'aiplatform'} + self.assertEqual(actual, expected) - class VertexDataset: - schema_title = 'google.VertexDataset' - schema_version = '0.0.0' + def test_google_type_alias(self): + from aiplatform import VertexDataset - class_source = textwrap.dedent(inspect.getsource(VertexDataset)) + Alias = VertexDataset - tmp_dir = tempfile.TemporaryDirectory() - with open(os.path.join(tmp_dir.name, 'aiplatform.py'), 'w') as f: - f.write(class_source) - sys.path.append(tmp_dir.name) - cls.tmp_dir = tmp_dir + def func(a: int, b: Input[Alias]) -> int: + return 1 - @classmethod - def teardownClass(cls): - sys.path.pop() - cls.tmp_dir.cleanup() + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'Alias'} + self.assertEqual(actual, expected) + + def test_using_dsl_symbol_for_generic(self): + + def func(a: int, b: dsl.Input[Artifact], + c: dsl.Output[dsl.Dataset]) -> int: + return 1 + + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'Artifact', 'c': 'dsl'} + self.assertEqual(actual, expected) + + def test_using_kfp_symbol_for_generic(self): + + def func(a: int, b: kfp.dsl.Input[Artifact], + c: kfp.dsl.Output[dsl.Dataset]) -> int: + return 1 + + actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) + expected = {'b': 'Artifact', 'c': 'dsl'} + self.assertEqual(actual, expected) class TestGetParamToAnnObj(unittest.TestCase): @@ -208,7 +241,8 @@ def test_input_output_path(self): def func( a: int, b: InputPath('Dataset'), - ) -> OutputPath('Dataset'): + c: OutputPath('Dataset'), + ) -> str: return 'dataset' actual = custom_artifact_types.get_param_to_annotation_object(func) @@ -321,10 +355,11 @@ def test_input_output_path(self): from aiplatform import VertexDataset def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - ) -> OutputPath('Dataset'): + a: int, + b: InputPath('Dataset'), + c: Output[VertexDataset], + d: OutputPath('Dataset'), + ) -> str: return 'dataset' actual = custom_artifact_types.get_custom_artifact_import_items_from_function( @@ -441,10 +476,11 @@ def test_input_output_path(self): from aiplatform import VertexDataset def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - ) -> OutputPath('Dataset'): + a: int, + b: InputPath('Dataset'), + c: Output[VertexDataset], + d: OutputPath('Dataset'), + ) -> str: return 'dataset' actual = custom_artifact_types.func_to_annotation_object(func) @@ -458,7 +494,9 @@ def func( 'c': typing_extensions.Annotated[ aiplatform.VertexDataset, - kfp.components.types.type_annotations.OutputAnnotation] + kfp.components.types.type_annotations.OutputAnnotation], + 'd': + kfp.components.types.type_annotations.OutputPath('Dataset'), } self.assertEqual(actual, expected) @@ -490,7 +528,7 @@ def func(a: int, b: Input[VertexDataset]): def test_with_return(self): from aiplatform import VertexDataset - def func(a: int, b: Input[VertexDataset]) -> int: + def func(a: int, b: kfp.dsl.Input[VertexDataset]) -> int: return 1 import aiplatform @@ -514,7 +552,7 @@ def func(a: int, b: Input[Alias]): def test_long_form_annotation(self): import aiplatform - def func(a: int, b: Output[aiplatform.VertexDataset]): + def func(a: int, b: dsl.Output[aiplatform.VertexDataset]): pass actual = custom_artifact_types.get_custom_artifact_type_import_statements( @@ -538,10 +576,11 @@ def test_input_output_path(self): from aiplatform import VertexDataset def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - ) -> OutputPath('Dataset'): + a: int, + b: InputPath('Dataset'), + c: Output[VertexDataset], + d: OutputPath('Dataset'), + ) -> str: return 'dataset' actual = custom_artifact_types.get_custom_artifact_type_import_statements( From c24a90b08b673f0e0f35de58bf510dde0fc29cc5 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 13 Sep 2022 12:14:06 -0600 Subject: [PATCH 13/17] clarify named tuple tests --- .../types/custom_artifact_types_test.py | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/components/types/custom_artifact_types_test.py index dbd1ac6666c..0e7d40d9e0f 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types_test.py +++ b/sdk/python/kfp/components/types/custom_artifact_types_test.py @@ -129,12 +129,21 @@ def test_named_tuple(self): def func( a: int, b: Input[Artifact], - ) -> typing.NamedTuple('MyNamedTuple', [('a', int), ( - 'b', Artifact), ('c', artifact_types.Artifact)]): - InnerNamedTuple = typing.NamedTuple( - 'MyNamedTuple', [('a', int), ('b', Artifact), - ('c', artifact_types.Artifact)]) - return InnerNamedTuple(a=a, b=b, c=b) # type: ignore + ) -> typing.NamedTuple('MyNamedTuple', [ + ('c', int), + ('d', Artifact), + ('e', artifact_types.Artifact), + ]): + InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [ + ('c', int), + ('d', Artifact), + ('e', artifact_types.Artifact), + ]) + return InnerNamedTuple( + c=a, + d=b, + e=b, + ) actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) expected = {'b': 'Artifact'} @@ -215,15 +224,19 @@ def func( def test_named_tuple(self): - MyNamedTuple = typing.NamedTuple('MyNamedTuple', [('a', int), - ('b', str)]) + MyNamedTuple = typing.NamedTuple('MyNamedTuple', [ + ('c', int), + ('d', str), + ]) def func( a: int, b: Input[Artifact], ) -> MyNamedTuple: - InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [('a', int), - ('b', str)]) + InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [ + ('c', int), + ('d', str), + ]) return InnerNamedTuple(a=a, b='string') # type: ignore actual = custom_artifact_types.get_param_to_annotation_object(func) From ed51ad0a66f89611ec07e6ec81c2fb439cbb66a7 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 13 Sep 2022 12:43:37 -0600 Subject: [PATCH 14/17] update executor tests --- sdk/python/kfp/components/executor_test.py | 32 ++++++++++++---------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index d59556d7ba7..f41763e8f7a 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -65,7 +65,7 @@ def test_input_and_output_parameters(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -182,7 +182,7 @@ def test_output_artifact(self): "artifacts": [ { "metadata": {}, - "name": "output_artifact_one", + "name": "output_artifact_one_name", "type": { "schemaTitle": "system.Model" }, @@ -203,7 +203,8 @@ def test_func(output_artifact_one: Output[Model]): self.assertEqual( output_artifact_one.path, os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self.assertEqual(output_artifact_one.name, 'output_artifact_one') + self.assertEqual(output_artifact_one.name, + 'output_artifact_one_name') self.assertIsInstance(output_artifact_one, Model) self.execute_and_load_output_metadata(test_func, executor_input) @@ -269,8 +270,6 @@ def test_func(input_artifact_one_path: InputPath('Dataset')): def test_output_path_artifact(self): executor_input = """\ { - "inputs": { - }, "outputs": { "artifacts": { "output_artifact_one_path": { @@ -363,7 +362,7 @@ def test_function_string_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -398,7 +397,7 @@ def test_function_with_int_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -429,7 +428,7 @@ def test_function_with_float_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -461,7 +460,7 @@ def test_function_with_list_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -493,7 +492,7 @@ def test_function_with_dict_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -528,7 +527,7 @@ def test_function_with_typed_list_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -560,7 +559,7 @@ def test_function_with_typed_dict_output(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -607,6 +606,11 @@ def test_artifact_output(self): ] } }, + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, "outputFile": "%(test_dir)s/output_metadata.json" } } @@ -729,7 +733,7 @@ def test_function_with_optional_inputs(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, @@ -787,7 +791,7 @@ def test_function_with_pipeline_task_final_status(self): }, "outputs": { "parameters": { - "output": { + "Output": { "outputFile": "gs://some-bucket/output" } }, From fcd56b385d7c7ecb9206f15e477fc30061a6c0ba Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Tue, 13 Sep 2022 15:29:57 -0600 Subject: [PATCH 15/17] add artifact return and named tuple support; refactor; clean tests --- .../components/types/custom_artifact_types.py | 182 ++--- .../types/custom_artifact_types_test.py | 714 ++++++++---------- sdk/python/kfp/components/types/type_utils.py | 6 + 3 files changed, 395 insertions(+), 507 deletions(-) diff --git a/sdk/python/kfp/components/types/custom_artifact_types.py b/sdk/python/kfp/components/types/custom_artifact_types.py index 020cecc9c68..2fba20b0c6d 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types.py +++ b/sdk/python/kfp/components/types/custom_artifact_types.py @@ -14,13 +14,13 @@ import ast import inspect -import sys -from typing import Any, Callable, Dict, List +from typing import Callable, Dict, List, Union from kfp.components import component_factory from kfp.components.types import type_annotations +from kfp.components.types import type_utils -PYTHON_VERSION = str(sys.version_info.major) + '.' + str(sys.version_info.minor) +RETURN_PREFIX = 'return-' def get_custom_artifact_type_import_statements(func: Callable) -> List[str]: @@ -37,21 +37,44 @@ def get_custom_artifact_type_import_statements(func: Callable) -> List[str]: return imports_source -def get_param_to_annotation_object(func: Callable) -> Dict[str, Any]: - """Gets a dictionary of parameter name to type annotation object from a - function. +def get_param_to_custom_artifact_class(func: Callable) -> Dict[str, type]: + """Gets a map of parameter names to custom artifact classes. - Args: - func: The function. - - Returns: - Dictionary of parameter name to type annotation object. + Return key is 'return-' for normal returns and 'return-' for + typing.NamedTuple returns. """ + param_to_artifact_cls: Dict[str, type] = {} + kfp_artifact_classes = set(type_utils._ARTIFACT_CLASSES_MAPPING.values()) + signature = inspect.signature(func) - return { - name: parameter.annotation - for name, parameter in signature.parameters.items() - } + for name, param in signature.parameters.items(): + annotation = param.annotation + if type_annotations.is_artifact_annotation(annotation): + artifact_class = type_annotations.get_io_artifact_class(annotation) + if artifact_class not in kfp_artifact_classes: + param_to_artifact_cls[name] = artifact_class + elif type_annotations.is_artifact_class(annotation): + param_to_artifact_cls[name] = annotation + if artifact_class not in kfp_artifact_classes: + param_to_artifact_cls[name] = artifact_class + + return_annotation = signature.return_annotation + + if return_annotation is inspect.Signature.empty: + pass + + elif type_utils.is_typed_named_tuple_annotation(return_annotation): + for name, annotation in return_annotation.__annotations__.items(): + if type_annotations.is_artifact_class( + annotation) and annotation not in kfp_artifact_classes: + param_to_artifact_cls[f'{RETURN_PREFIX}{name}'] = annotation + + elif type_annotations.is_artifact_class( + return_annotation + ) and return_annotation not in kfp_artifact_classes: + param_to_artifact_cls[RETURN_PREFIX] = return_annotation + + return param_to_artifact_cls def get_full_qualname_for_artifact(obj: type) -> str: @@ -77,7 +100,8 @@ def get_full_qualname_for_artifact(obj: type) -> str: def get_symbol_import_path(artifact_class_base_symbol: str, qualname: str) -> str: - """Gets the fully qualified names of the module or type to import. + """Gets the fully qualified name of the symbol that must be imported for + the custom artifact type annotation to be referenced successfully. Args: artifact_class_base_symbol: The base symbol from which the artifact class is referenced (e.g., aiplatform for aiplatform.VertexDataset). @@ -98,92 +122,70 @@ def get_symbol_import_path(artifact_class_base_symbol: str, return name_to_import -def func_to_annotation_object(func: Callable) -> Dict[str, str]: - """Gets a dict of parameter name to annotation object for a function.""" - signature = inspect.signature(func) - return { - name: parameter.annotation - for name, parameter in signature.parameters.items() - } - +def traverse_ast_node_values_to_get_id(obj: Union[ast.Slice, None]) -> str: + while not hasattr(obj, 'id'): + obj = getattr(obj, 'value') + return obj.id -def func_to_artifact_class_base_symbol(func: Callable) -> Dict[str, str]: - """Gets a map of parameter name to the root symbol used to reference an - annotation for a function. - For example, the annotation `type_module.Artifact` has the root - symbol `type_module` and the annotation `Artifact` has the root - symbol `Artifact`. - """ +def get_custom_artifact_base_symbol_for_parameter(func: Callable, + arg_name: str) -> str: + """Gets the symbol required for the custom artifact type annotation to be + referenced correctly.""" module_node = ast.parse( component_factory._get_function_source_definition(func)) args = module_node.body[0].args.args + args = {arg.arg: arg for arg in args} + annotation = args[arg_name].annotation + return traverse_ast_node_values_to_get_id(annotation.slice) - def convert_ast_arg_node_to_param_annotation_dict_for_artifacts( - args: List[ast.arg]) -> Dict[str, str]: - """Converts a list of ast.arg nodes to a dictionary of the parameter - name to type annotation as a string (for artifact annotations only). - - Args: - args: AST argument nodes for a function. - - Returns: - A dictionary of parameter name to type annotation as a string. - """ - arg_to_ann = {} - for arg in args: - annotation = arg.annotation - # annotations with a subtype like Input[Artifact] - if isinstance(annotation, ast.Subscript): - # get inner type - if PYTHON_VERSION < '3.9': - # see "Changed in version 3.9" https://docs.python.org/3/library/ast.html#node-classes - if isinstance(annotation.slice.value, ast.Name): - arg_to_ann[arg.arg] = annotation.slice.value.id - if isinstance(annotation.slice.value, ast.Attribute): - arg_to_ann[arg.arg] = annotation.slice.value.value.id - else: - if isinstance(annotation.slice, ast.Name): - arg_to_ann[arg.arg] = annotation.slice.id - if isinstance(annotation.slice, ast.Attribute): - arg_to_ann[arg.arg] = annotation.slice.value.id - return arg_to_ann - - return convert_ast_arg_node_to_param_annotation_dict_for_artifacts(args) +def get_custom_artifact_base_symbol_for_return(func: Callable, + return_name: str) -> str: + """Gets the symbol required for the custom artifact type return annotation + to be referenced correctly.""" + module_node = ast.parse( + component_factory._get_function_source_definition(func)) + return_ann = module_node.body[0].returns -def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: - """Gets a list of fully qualified names of the modules or types to import. + if return_name == RETURN_PREFIX: + if isinstance(return_ann, (ast.Name, ast.Attribute)): + return traverse_ast_node_values_to_get_id(return_ann) + elif isinstance(return_ann, ast.Call): + func = return_ann.func + # handles NamedTuple and typing.NamedTuple + if (isinstance(func, ast.Attribute) and func.value.id == 'typing' and + func.attr == 'NamedTuple') or (isinstance(func, ast.Name) and + func.id == 'NamedTuple'): + nt_field_list = return_ann.args[1].elts + for el in nt_field_list: + if f'{RETURN_PREFIX}{el.elts[0].s}' == return_name: + return traverse_ast_node_values_to_get_id(el.elts[1]) - Args: - func: The component function. + raise TypeError(f"Unexpected type annotation '{return_ann}' for {func}.") - Returns: - A list containing the fully qualified names of the module or type to import. - """ - param_to_ann_obj = func_to_annotation_object(func) - param_to_ann_string = func_to_artifact_class_base_symbol(func) +def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: + """Gets the fully qualified name of the symbol that must be imported for + the custom artifact type annotation to be referenced successfully from a + component function.""" + param_to_ann_obj = get_param_to_custom_artifact_class(func) import_items = [] - seen = set() - for param_name, ann_string in param_to_ann_string.items(): - # don't process the same annotation string multiple times - # use the string, not the object in the seen set, since the same object can be referenced different ways in the same function signature - if ann_string in seen: - continue - else: - seen.add(ann_string) - - actual_ann = param_to_ann_obj.get(param_name) - - if actual_ann is not None and type_annotations.is_artifact_annotation( - actual_ann): - artifact_class = type_annotations.get_io_artifact_class(actual_ann) - artifact_qualname = get_full_qualname_for_artifact(artifact_class) - if not artifact_qualname.startswith('kfp.'): - # kfp artifacts are already imported by default - import_items.append( - get_symbol_import_path(ann_string, artifact_qualname)) + for param_name, artifact_class in param_to_ann_obj.items(): + + base_symbol = get_custom_artifact_base_symbol_for_return( + func, param_name + ) if param_name.startswith( + RETURN_PREFIX) else get_custom_artifact_base_symbol_for_parameter( + func, param_name) + artifact_qualname = get_full_qualname_for_artifact(artifact_class) + symbol_import_path = get_symbol_import_path(base_symbol, + artifact_qualname) + + # could use set here, but want to be have deterministic import ordering + # in compilation + if symbol_import_path not in import_items: + import_items.append(symbol_import_path) return import_items diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/components/types/custom_artifact_types_test.py index 0e7d40d9e0f..c4de629df45 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types_test.py +++ b/sdk/python/kfp/components/types/custom_artifact_types_test.py @@ -23,19 +23,15 @@ from absl.testing import parameterized import kfp -from kfp import compiler from kfp import dsl from kfp.components.types import artifact_types from kfp.components.types import custom_artifact_types -from kfp.components.types import type_annotations from kfp.components.types.artifact_types import Artifact from kfp.components.types.artifact_types import Dataset from kfp.components.types.type_annotations import Input from kfp.components.types.type_annotations import InputPath from kfp.components.types.type_annotations import Output from kfp.components.types.type_annotations import OutputPath -import typing_extensions -import yaml Alias = Artifact artifact_types_alias = artifact_types @@ -64,203 +60,160 @@ def teardownClass(cls): cls.tmp_dir.cleanup() -class TestFuncToRootAnnotationSymbol(_TestCaseWithThirdPartyPackage): +class TestGetParamToCustomArtifactClass(_TestCaseWithThirdPartyPackage): - def test_no_annotations(self): + def test_no_ann(self): - def func(a, b): + def func(): pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {} - self.assertEqual(actual, expected) - - def test_input_output_path(self): - - def func( - a: int, - b: InputPath('Dataset'), - c: OutputPath('Dataset'), - ) -> str: - return 'dataset' - - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_no_return(self): + def test_primitives(self): - def func(a: int, b: Input[Artifact]): + def func(a: str, b: int) -> str: pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Artifact'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_with_return(self): + def test_input_path(self): - def func(a: int, b: Input[Artifact]) -> int: - return 1 + def func(a: InputPath(str), b: InputPath('Dataset')) -> str: + pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Artifact'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_module_base_symbol(self): + def test_output_path(self): - def func(a: int, b: Input[artifact_types.Artifact]): + def func(a: OutputPath(str), b: OutputPath('Dataset')) -> str: pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'artifact_types'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_alias(self): + def test_input_kfp_artifact(self): - def func(a: int, b: Input[Alias]): + def func(a: Input[Artifact]): pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Alias'} - self.assertEqual(actual, expected) - - def test_named_tuple(self): - - def func( - a: int, - b: Input[Artifact], - ) -> typing.NamedTuple('MyNamedTuple', [ - ('c', int), - ('d', Artifact), - ('e', artifact_types.Artifact), - ]): - InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [ - ('c', int), - ('d', Artifact), - ('e', artifact_types.Artifact), - ]) - return InnerNamedTuple( - c=a, - d=b, - e=b, - ) - - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Artifact'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_google_type_class_symbol(self): - from aiplatform import VertexDataset + def test_output_kfp_artifact(self): - def func(a: int, b: Input[VertexDataset]) -> int: - return 1 + def func(a: Output[Artifact]): + pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'VertexDataset'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_google_type_module_base_symbol(self): - import aiplatform + def test_return_kfp_artifact1(self): - def func(a: int, b: Input[aiplatform.VertexDataset]): + def func() -> Artifact: pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'aiplatform'} - self.assertEqual(actual, expected) - - def test_google_type_alias(self): - from aiplatform import VertexDataset + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - Alias = VertexDataset + def test_return_kfp_artifact2(self): - def func(a: int, b: Input[Alias]) -> int: - return 1 + def func() -> dsl.Artifact: + pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Alias'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_using_dsl_symbol_for_generic(self): + def test_named_tuple_primitives(self): - def func(a: int, b: dsl.Input[Artifact], - c: dsl.Output[dsl.Dataset]) -> int: - return 1 + def func() -> typing.NamedTuple('Outputs', [ + ('a', str), + ('b', int), + ]): + pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Artifact', 'c': 'dsl'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {}) - def test_using_kfp_symbol_for_generic(self): + def test_input_google_artifact(self): + import aiplatform + from aiplatform import VertexDataset - def func(a: int, b: kfp.dsl.Input[Artifact], - c: kfp.dsl.Output[dsl.Dataset]) -> int: - return 1 + def func( + a: Input[aiplatform.VertexDataset], + b: Input[VertexDataset], + c: dsl.Input[aiplatform.VertexDataset], + d: kfp.dsl.Input[VertexDataset], + ): + pass - actual = custom_artifact_types.func_to_artifact_class_base_symbol(func) - expected = {'b': 'Artifact', 'c': 'dsl'} - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual( + actual, { + 'a': aiplatform.VertexDataset, + 'b': aiplatform.VertexDataset, + 'c': aiplatform.VertexDataset, + 'd': aiplatform.VertexDataset, + }) + + def test_output_google_artifact(self): + import aiplatform + from aiplatform import VertexDataset + def func( + a: Output[aiplatform.VertexDataset], + b: Output[VertexDataset], + c: dsl.Output[aiplatform.VertexDataset], + d: kfp.dsl.Output[VertexDataset], + ): + pass -class TestGetParamToAnnObj(unittest.TestCase): + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual( + actual, { + 'a': aiplatform.VertexDataset, + 'b': aiplatform.VertexDataset, + 'c': aiplatform.VertexDataset, + 'd': aiplatform.VertexDataset, + }) + + def test_return_google_artifact1(self): + import aiplatform + from aiplatform import VertexDataset - def test_no_named_tuple(self): + def func() -> VertexDataset: + pass - def func( - a: int, - b: Input[Artifact], - ) -> int: - return 1 - - actual = custom_artifact_types.get_param_to_annotation_object(func) - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[Artifact, - type_annotations.InputAnnotation] - } - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {'return-': aiplatform.VertexDataset}) - def test_named_tuple(self): + def test_return_google_artifact2(self): + import aiplatform - MyNamedTuple = typing.NamedTuple('MyNamedTuple', [ - ('c', int), - ('d', str), - ]) + def func() -> aiplatform.VertexDataset: + pass - def func( - a: int, - b: Input[Artifact], - ) -> MyNamedTuple: - InnerNamedTuple = typing.NamedTuple('MyNamedTuple', [ - ('c', int), - ('d', str), - ]) - return InnerNamedTuple(a=a, b='string') # type: ignore - - actual = custom_artifact_types.get_param_to_annotation_object(func) - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[Artifact, - type_annotations.InputAnnotation] - } - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual(actual, {'return-': aiplatform.VertexDataset}) - def test_input_output_path(self): + def test_named_tuple_google_artifact(self): + import aiplatform + from aiplatform import VertexDataset - def func( - a: int, - b: InputPath('Dataset'), - c: OutputPath('Dataset'), - ) -> str: - return 'dataset' + def func() -> typing.NamedTuple('Outputs', [ + ('a', aiplatform.VertexDataset), + ('b', VertexDataset), + ]): + pass - actual = custom_artifact_types.get_param_to_annotation_object(func) - self.assertEqual(actual['a'], int) - self.assertIsInstance(actual['b'], InputPath) + actual = custom_artifact_types.get_param_to_custom_artifact_class(func) + self.assertEqual( + actual, { + 'return-a': aiplatform.VertexDataset, + 'return-b': aiplatform.VertexDataset, + }) class TestGetFullQualnameForArtifact(_TestCaseWithThirdPartyPackage): @@ -282,365 +235,292 @@ def test_aiplatform_artifact(self): aiplatform.VertexDataset), 'aiplatform.VertexDataset') -class TestGetArtifactImportItemsFromFunction(_TestCaseWithThirdPartyPackage): - - def test_no_annotations(self): +class TestGetSymbolImportPath(parameterized.TestCase): - def func(a, b): - pass - - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - expected = [] - self.assertEqual(actual, expected) - - def test_no_return(self): - from aiplatform import VertexDataset - - def func(a: int, b: Input[VertexDataset]): - pass - - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - expected = ['aiplatform.VertexDataset'] + @parameterized.parameters([ + { + 'artifact_class_base_symbol': 'aiplatform', + 'qualname': 'aiplatform.VertexDataset', + 'expected': 'aiplatform' + }, + { + 'artifact_class_base_symbol': 'VertexDataset', + 'qualname': 'aiplatform.VertexDataset', + 'expected': 'aiplatform.VertexDataset' + }, + { + 'artifact_class_base_symbol': 'e', + 'qualname': 'a.b.c.d.e', + 'expected': 'a.b.c.d.e' + }, + { + 'artifact_class_base_symbol': 'c', + 'qualname': 'a.b.c.d.e', + 'expected': 'a.b.c' + }, + ]) + def test(self, artifact_class_base_symbol: str, qualname: str, + expected: str): + actual = custom_artifact_types.get_symbol_import_path( + artifact_class_base_symbol, qualname) self.assertEqual(actual, expected) - def test_with_return(self): - from aiplatform import VertexDataset - - def func(a: int, b: Input[VertexDataset]) -> int: - return 1 - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - expected = ['aiplatform.VertexDataset'] - self.assertEqual(actual, expected) +class TestGetCustomArtifactBaseSymbolForParameter(_TestCaseWithThirdPartyPackage + ): - def test_multiline(self): + def test_input_google_artifact(self): + import aiplatform from aiplatform import VertexDataset def func( - a: int, + a: Input[aiplatform.VertexDataset], b: Input[VertexDataset], - ) -> int: - return 1 - - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - expected = ['aiplatform.VertexDataset'] - self.assertEqual(actual, expected) - - def test_alias(self): - from aiplatform import VertexDataset - Alias = VertexDataset - - def func(a: int, b: Input[Alias]): + c: dsl.Input[aiplatform.VertexDataset], + d: kfp.dsl.Input[VertexDataset], + ): pass - with self.assertRaisesRegex( - TypeError, r'Module or type name aliases are not supported'): - custom_artifact_types.get_custom_artifact_import_items_from_function( - func) + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'a') + self.assertEqual(actual, 'aiplatform') - def test_long_form_annotation(self): - import aiplatform + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'b') + self.assertEqual(actual, 'VertexDataset') - def func(a: int, b: Output[aiplatform.VertexDataset]): - pass + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'c') + self.assertEqual(actual, 'aiplatform') - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - expected = ['aiplatform'] - self.assertEqual(actual, expected) + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'd') + self.assertEqual(actual, 'VertexDataset') - def test_aliased_module_throws_error(self): - import aiplatform as aiplatform_alias - - def func(a: int, b: Output[aiplatform_alias.VertexDataset]): - pass - - with self.assertRaisesRegex( - TypeError, r'Module or type name aliases are not supported'): - custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - - def test_input_output_path(self): + def test_output_google_artifact(self): + import aiplatform from aiplatform import VertexDataset def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - d: OutputPath('Dataset'), - ) -> str: - return 'dataset' + a: Output[aiplatform.VertexDataset], + b: Output[VertexDataset], + c: dsl.Output[aiplatform.VertexDataset], + d: kfp.dsl.Output[VertexDataset], + ): + pass - actual = custom_artifact_types.get_custom_artifact_import_items_from_function( - func) - self.assertEqual(actual, ['aiplatform.VertexDataset']) + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'a') + self.assertEqual(actual, 'aiplatform') + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'b') + self.assertEqual(actual, 'VertexDataset') -class TestFuncToAnnotationObject(_TestCaseWithThirdPartyPackage): + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'c') + self.assertEqual(actual, 'aiplatform') - def test_no_annotations(self): + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_parameter( + func, 'd') + self.assertEqual(actual, 'VertexDataset') - def func(a, b): - pass - actual = custom_artifact_types.func_to_annotation_object(func) - expected = {'a': inspect._empty, 'b': inspect._empty} - self.assertEqual(actual, expected) +class TestGetCustomArtifactBaseSymbolForReturn(_TestCaseWithThirdPartyPackage): - def test_no_return(self): + def test_return_google_artifact1(self): from aiplatform import VertexDataset - def func(a: int, b: Input[VertexDataset]): + def func() -> VertexDataset: pass - actual = custom_artifact_types.func_to_annotation_object(func) + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_return( + func, 'return-') + self.assertEqual(actual, 'VertexDataset') + def test_return_google_artifact2(self): import aiplatform - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.InputAnnotation] - } - self.assertEqual(actual, expected) - def test_with_return(self): - from aiplatform import VertexDataset + def func() -> aiplatform.VertexDataset: + pass - def func(a: int, b: Input[VertexDataset]) -> int: - return 1 + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_return( + func, 'return-') + self.assertEqual(actual, 'aiplatform') + def test_named_tuple_google_artifact(self): import aiplatform - actual = custom_artifact_types.func_to_annotation_object(func) - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.InputAnnotation] - } - self.assertEqual(actual, expected) - - def test_alias(self): from aiplatform import VertexDataset - Alias = VertexDataset - def func(a: int, b: Input[Alias]): + def func() -> typing.NamedTuple('Outputs', [ + ('a', aiplatform.VertexDataset), + ('b', VertexDataset), + ]): pass - actual = custom_artifact_types.func_to_annotation_object(func) + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_return( + func, 'return-a') + self.assertEqual(actual, 'aiplatform') - import aiplatform - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.InputAnnotation] - } - self.assertEqual(actual, expected) - - def test_long_form_annotation(self): - import aiplatform - - def func(a: int, b: Output[aiplatform.VertexDataset]): - pass + actual = custom_artifact_types.get_custom_artifact_base_symbol_for_return( + func, 'return-b') + self.assertEqual(actual, 'VertexDataset') - actual = custom_artifact_types.func_to_annotation_object(func) - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.OutputAnnotation] - } - self.assertEqual(actual, expected) +class TestGetCustomArtifactImportItemsFromFunction( + _TestCaseWithThirdPartyPackage): - def test_aliased_module(self): - import aiplatform as aiplatform_alias + def test_no_ann(self): - def func(a: int, b: Output[aiplatform_alias.VertexDataset]): + def func(): pass - actual = custom_artifact_types.func_to_annotation_object(func) + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, []) - import aiplatform - expected = { - 'a': - int, - 'b': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.OutputAnnotation] - } - self.assertEqual(actual, expected) + def test_primitives(self): - def test_input_output_path(self): - from aiplatform import VertexDataset + def func(a: str, b: int) -> str: + pass - def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - d: OutputPath('Dataset'), - ) -> str: - return 'dataset' + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, []) - actual = custom_artifact_types.func_to_annotation_object(func) - - import aiplatform - expected = { - 'a': - int, - 'b': - kfp.components.types.type_annotations.InputPath('Dataset'), - 'c': - typing_extensions.Annotated[ - aiplatform.VertexDataset, - kfp.components.types.type_annotations.OutputAnnotation], - 'd': - kfp.components.types.type_annotations.OutputPath('Dataset'), - } - self.assertEqual(actual, expected) + def test_input_path(self): + def func(a: InputPath(str), b: InputPath('Dataset')) -> str: + pass -class TestGetCustomArtifactTypeImportStatements(_TestCaseWithThirdPartyPackage): + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, []) - def test_no_annotations(self): + def test_output_path(self): - def func(a, b): + def func(a: OutputPath(str), b: OutputPath('Dataset')) -> str: pass - actual = custom_artifact_types.get_custom_artifact_type_import_statements( + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = [] - self.assertEqual(actual, expected) + self.assertEqual(actual, []) - def test_no_return(self): - from aiplatform import VertexDataset + def test_input_kfp_artifact(self): - def func(a: int, b: Input[VertexDataset]): + def func(a: Input[Artifact]): pass - actual = custom_artifact_types.get_custom_artifact_type_import_statements( + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) + self.assertEqual(actual, []) - expected = ['from aiplatform import VertexDataset'] - self.assertEqual(actual, expected) - - def test_with_return(self): - from aiplatform import VertexDataset + def test_output_kfp_artifact(self): - def func(a: int, b: kfp.dsl.Input[VertexDataset]) -> int: - return 1 + def func(a: Output[Artifact]): + pass - import aiplatform - actual = custom_artifact_types.get_custom_artifact_type_import_statements( + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) - expected = ['from aiplatform import VertexDataset'] - self.assertEqual(actual, expected) + self.assertEqual(actual, []) - def test_alias(self): - from aiplatform import VertexDataset - Alias = VertexDataset + def test_return_kfp_artifact1(self): - def func(a: int, b: Input[Alias]): + def func() -> Artifact: pass - with self.assertRaisesRegex( - TypeError, r'Module or type name aliases are not supported'): - custom_artifact_types.get_custom_artifact_type_import_statements( - func) + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, []) - def test_long_form_annotation(self): - import aiplatform + def test_return_kfp_artifact2(self): - def func(a: int, b: dsl.Output[aiplatform.VertexDataset]): + def func() -> dsl.Artifact: pass - actual = custom_artifact_types.get_custom_artifact_type_import_statements( + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) + self.assertEqual(actual, []) - expected = ['import aiplatform'] - self.assertEqual(actual, expected) - - def test_aliased_module(self): - import aiplatform as aiplatform_alias + def test_named_tuple_primitives(self): - def func(a: int, b: Output[aiplatform_alias.VertexDataset]): + def func() -> typing.NamedTuple('Outputs', [ + ('a', str), + ('b', int), + ]): pass - with self.assertRaisesRegex( - TypeError, r'Module or type name aliases are not supported'): - custom_artifact_types.get_custom_artifact_type_import_statements( - func) + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, []) - def test_input_output_path(self): + def test_input_google_artifact(self): + import aiplatform from aiplatform import VertexDataset def func( - a: int, - b: InputPath('Dataset'), - c: Output[VertexDataset], - d: OutputPath('Dataset'), - ) -> str: - return 'dataset' - - actual = custom_artifact_types.get_custom_artifact_type_import_statements( + a: Input[aiplatform.VertexDataset], + b: Input[VertexDataset], + c: dsl.Input[aiplatform.VertexDataset], + d: kfp.dsl.Input[VertexDataset], + ): + pass + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( func) + self.assertEqual(actual, ['aiplatform', 'aiplatform.VertexDataset']) - expected = ['from aiplatform import VertexDataset'] - self.assertEqual(actual, expected) + def test_output_google_artifact(self): + import aiplatform + from aiplatform import VertexDataset + def func( + a: Output[aiplatform.VertexDataset], + b: Output[VertexDataset], + c: dsl.Output[aiplatform.VertexDataset], + d: kfp.dsl.Output[VertexDataset], + ): + pass + + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) -class TestImportStatementAdded(_TestCaseWithThirdPartyPackage): + self.assertEqual(actual, ['aiplatform', 'aiplatform.VertexDataset']) - def test(self): + def test_return_google_artifact1(self): import aiplatform from aiplatform import VertexDataset - @dsl.component - def one( - a: int, - b: Output[VertexDataset], - ): + def func() -> VertexDataset: pass - @dsl.component - def two(a: Input[aiplatform.VertexDataset],): + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, ['aiplatform.VertexDataset']) + + def test_return_google_artifact2(self): + import aiplatform + + def func() -> aiplatform.VertexDataset: pass - @dsl.pipeline() - def my_pipeline(): - one_task = one(a=1) - two_task = two(a=one_task.outputs['b']) + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, ['aiplatform']) - with tempfile.TemporaryDirectory() as tmpdir: - output_path = os.path.join(tmpdir, 'pipeline.yaml') + def test_named_tuple_google_artifact(self): + import aiplatform + from aiplatform import VertexDataset - compiler.Compiler().compile( - pipeline_func=my_pipeline, package_path=output_path) - with open(output_path) as f: - pipeline_spec = yaml.safe_load(f) + def func() -> typing.NamedTuple('Outputs', [ + ('a', aiplatform.VertexDataset), + ('b', VertexDataset), + ]): + pass - self.assertIn( - 'from aiplatform import VertexDataset', - ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-one'] - ['container']['command'])) - self.assertIn( - 'import aiplatform', - ' '.join(pipeline_spec['deploymentSpec']['executors']['exec-two'] - ['container']['command'])) + actual = custom_artifact_types.get_custom_artifact_import_items_from_function( + func) + self.assertEqual(actual, ['aiplatform', 'aiplatform.VertexDataset']) if __name__ == '__main__': diff --git a/sdk/python/kfp/components/types/type_utils.py b/sdk/python/kfp/components/types/type_utils.py index 0fe48fcf07f..d37246bee87 100644 --- a/sdk/python/kfp/components/types/type_utils.py +++ b/sdk/python/kfp/components/types/type_utils.py @@ -28,6 +28,7 @@ # ComponentSpec I/O types to DSL ontology artifact classes mapping. _ARTIFACT_CLASSES_MAPPING = { + 'artifact': artifact_types.Artifact, 'model': artifact_types.Model, 'dataset': artifact_types.Dataset, 'metrics': artifact_types.Metrics, @@ -423,3 +424,8 @@ def _annotation_to_type_struct(annotation): schema_title = str(annotation) type_struct = get_canonical_type_name_for_type(schema_title) return type_struct or schema_title + + +def is_typed_named_tuple_annotation(annotation: Any) -> bool: + return hasattr(annotation, '_fields') and hasattr(annotation, + '__annotations__') From f125b065e86a4799882957341a1e8fa569c2e856 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 14 Sep 2022 08:02:10 -0600 Subject: [PATCH 16/17] implement review feedback; clean up artifact names --- sdk/python/kfp/components/executor_test.py | 114 ++++++++++++++++----- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index f41763e8f7a..d190c44adff 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -88,14 +88,12 @@ def test_input_artifact_custom_type(self): executor_input = """\ { "inputs": { - "parameterValues": { - }, "artifacts": { "input_artifact_one": { "artifacts": [ { "metadata": {}, - "name": "input_artifact_one_name", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "google.VertexDataset" }, @@ -132,7 +130,10 @@ def test_func(input_artifact_one: Input[VertexDataset]): input_artifact_one.path, os.path.join(artifact_types._GCS_LOCAL_MOUNT_PREFIX, 'some-bucket/input_artifact_one')) - self.assertEqual(input_artifact_one.name, 'input_artifact_one_name') + self.assertEqual( + input_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) self.assertIsInstance(input_artifact_one, VertexDataset) self.execute_and_load_output_metadata(test_func, executor_input) @@ -146,7 +147,7 @@ def test_input_artifact(self): "artifacts": [ { "metadata": {}, - "name": "input_artifact_one", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "google.VertexDataset" }, @@ -168,7 +169,10 @@ def test_func(input_artifact_one: Input[Dataset]): self.assertEqual( input_artifact_one.path, os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self.assertEqual(input_artifact_one.name, 'input_artifact_one') + self.assertEqual( + input_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) self.assertIsInstance(input_artifact_one, Dataset) self.execute_and_load_output_metadata(test_func, executor_input) @@ -182,7 +186,7 @@ def test_output_artifact(self): "artifacts": [ { "metadata": {}, - "name": "output_artifact_one_name", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Model" }, @@ -203,8 +207,10 @@ def test_func(output_artifact_one: Output[Model]): self.assertEqual( output_artifact_one.path, os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self.assertEqual(output_artifact_one.name, - 'output_artifact_one_name') + self.assertEqual( + output_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) self.assertIsInstance(output_artifact_one, Model) self.execute_and_load_output_metadata(test_func, executor_input) @@ -212,8 +218,6 @@ def test_func(output_artifact_one: Output[Model]): def test_output_parameter(self): executor_input = """\ { - "inputs": { - }, "outputs": { "parameters": { "output_parameter_path": { @@ -244,7 +248,7 @@ def test_input_path_artifact(self): "artifacts": [ { "metadata": {}, - "name": "input_artifact_one", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Dataset" }, @@ -276,7 +280,7 @@ def test_output_path_artifact(self): "artifacts": [ { "metadata": {}, - "name": "output_artifact_one", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Model" }, @@ -300,15 +304,13 @@ def test_func(output_artifact_one_path: OutputPath('Model')): def test_output_metadata(self): executor_input = """\ { - "inputs": { - }, "outputs": { "artifacts": { "output_artifact_two": { "artifacts": [ { "metadata": {}, - "name": "output_artifact_two", + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Metrics" }, @@ -338,8 +340,10 @@ def test_func(output_artifact_two: Output[Metrics]): 'artifacts': { 'output_artifact_two': { 'artifacts': [{ - 'name': 'output_artifact_two', - 'uri': 'new-uri', + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'new-uri', 'metadata': { 'key_1': 'value_1', 'key_2': 2, @@ -583,7 +587,7 @@ def test_func(first: int, second: int) -> Dict[str, int]: }, }) - def test_artifact_output(self): + def test_artifact_output1(self): executor_input = """\ { "inputs": { @@ -597,7 +601,8 @@ def test_artifact_output(self): "output": { "artifacts": [ { - "name": "output", + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Artifact" }, @@ -630,8 +635,10 @@ def test_func(first: str, second: str, output: Output[Artifact]) -> str: 'output': { 'artifacts': [{ 'metadata': {}, - 'name': 'output', - 'uri': 'gs://some-bucket/output' + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output' }] } }, @@ -644,6 +651,60 @@ def test_func(first: str, second: str, output: Output[Artifact]) -> str: artifact_payload = f.read() self.assertEqual(artifact_payload, 'artifact output') + def test_artifact_output2(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": "Hello", + "second": "World" + } + }, + "outputs": { + "artifacts": { + "Output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: str, second: str) -> Artifact: + return first + ', ' + second + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'Output': { + 'artifacts': [{ + 'metadata': {}, + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output' + }] + } + }, + }) + + with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: + artifact_payload = f.read() + self.assertEqual(artifact_payload, 'Hello, World') + def test_named_tuple_output(self): executor_input = """\ { @@ -652,7 +713,8 @@ def test_named_tuple_output(self): "output_dataset": { "artifacts": [ { - "name": "output_dataset", + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", "type": { "schemaTitle": "system.Dataset" }, @@ -705,8 +767,10 @@ def func_returning_plain_tuple() -> NamedTuple('Outputs', [ 'output_dataset': { 'artifacts': [{ 'metadata': {}, - 'name': 'output_dataset', - 'uri': 'gs://some-bucket/output_dataset' + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output_dataset' }] } }, From 6ae7fa30deeeed6ae436aef1f4d6c7bf7bd99571 Mon Sep 17 00:00:00 2001 From: connor-mccarthy Date: Wed, 14 Sep 2022 08:04:02 -0600 Subject: [PATCH 17/17] move test method --- sdk/python/kfp/components/executor_test.py | 76 +++++++++++----------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/components/executor_test.py index d190c44adff..6846a0f92ca 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/components/executor_test.py @@ -177,44 +177,6 @@ def test_func(input_artifact_one: Input[Dataset]): self.execute_and_load_output_metadata(test_func, executor_input) - def test_output_artifact(self): - executor_input = """\ - { - "outputs": { - "artifacts": { - "output_artifact_one": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Model" - }, - "uri": "gs://some-bucket/output_artifact_one" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(output_artifact_one: Output[Model]): - self.assertEqual(output_artifact_one.uri, - 'gs://some-bucket/output_artifact_one') - - self.assertEqual( - output_artifact_one.path, - os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self.assertEqual( - output_artifact_one.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' - ) - self.assertIsInstance(output_artifact_one, Model) - - self.execute_and_load_output_metadata(test_func, executor_input) - def test_output_parameter(self): executor_input = """\ { @@ -705,6 +667,44 @@ def test_func(first: str, second: str) -> Artifact: artifact_payload = f.read() self.assertEqual(artifact_payload, 'Hello, World') + def test_output_artifact3(self): + executor_input = """\ + { + "outputs": { + "artifacts": { + "output_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Model" + }, + "uri": "gs://some-bucket/output_artifact_one" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(output_artifact_one: Output[Model]): + self.assertEqual(output_artifact_one.uri, + 'gs://some-bucket/output_artifact_one') + + self.assertEqual( + output_artifact_one.path, + os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) + self.assertEqual( + output_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) + self.assertIsInstance(output_artifact_one, Model) + + self.execute_and_load_output_metadata(test_func, executor_input) + def test_named_tuple_output(self): executor_input = """\ {