kubeflow · connor-mccarthy · Dec 5, 2022 · Nov 15, 2022 · Nov 17, 2022 · Nov 17, 2022
diff --git a/sdk/python/kfp/compiler/compiler.py b/sdk/python/kfp/compiler/compiler.py
@@ -21,8 +21,6 @@
 
 from kfp.compiler import pipeline_spec_builder as builder
 from kfp.components import base_component
-from kfp.components import graph_component
-from kfp.components import yaml_component
 from kfp.components.types import type_utils
 
 
@@ -79,5 +77,14 @@ def compile(
                 pipeline_name=pipeline_name,
                 pipeline_parameters=pipeline_parameters,
             )
+
+            if hasattr(pipeline_func, 'description'):
+                description = pipeline_func.description or None
+
+            else:
+                description = None
+
             builder.write_pipeline_spec_to_file(
-                pipeline_spec=pipeline_spec, package_path=package_path)
+                pipeline_spec=pipeline_spec,
+                pipeline_description=description,
+                package_path=package_path)
diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py
@@ -1490,5 +1490,161 @@ def pipeline_with_input(boolean: bool = False):
             .default_value.bool_value, True)
 
 
+class TestYamlComments(unittest.TestCase):
+
+    def test_comments_include_inputs_and_outputs_and_pipeline_name(self):
+
+        @dsl.component
+        def identity(string: str, model: bool) -> str:
+            return string
+
+        @dsl.pipeline()
+        def my_pipeline(sample_input1: bool = True,
+                        sample_input2: str = 'string') -> str:
+            op1 = identity(string=sample_input2, model=sample_input1)
+            result = op1.output
+            return result
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=my_pipeline, package_path=pipeline_spec_path)
+
+            inputs_string = '# Inputs: \n#    sample_input1: bool [Default: True]\n#    sample_input2: str [Default: string]'
+            outputs_string = '# Outputs: \n#    Output: str'
+            name_string = '# Name: my-pipeline'
+
+            # test name is in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertTrue(name_string in yaml_content)
+
+            # test inputs are in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertTrue(inputs_string in yaml_content)
+
+            # test outputs are in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertTrue(outputs_string in yaml_content)
+
+    def test_comments_include_definition(self):
+
+        @dsl.component
+        def identity(string: str, model: bool) -> str:
+            return string
+
+        @dsl.pipeline()
+        def pipeline_with_no_definition(sample_input1: bool = True,
+                                        sample_input2: str = 'string') -> str:
+            op1 = identity(string=sample_input2, model=sample_input1)
+            result = op1.output
+            return result
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=pipeline_with_no_definition,
+                package_path=pipeline_spec_path)
+
+            definition_string = '# Description: This is a definition of this pipeline'
+
+            # test definition not in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertFalse(definition_string in yaml_content)
+
+        @dsl.pipeline()
+        def pipeline_with_definition(sample_input1: bool = True,
+                                     sample_input2: str = 'string') -> str:
+            """This is a definition of this pipeline."""
+            op1 = identity(string=sample_input2, model=sample_input1)
+            result = op1.output
+            return result
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=pipeline_with_definition,
+                package_path=pipeline_spec_path)
+
+            definition_string = '# Description: This is a definition of this pipeline'
+
+            # test definition in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertTrue(definition_string in yaml_content)
+
+    def test_comments_on_pipeline_with_no_inputs_or_outputs(self):
+
+        @dsl.component
+        def identity(string: str, model: bool) -> str:
+            return string
+
+        @dsl.pipeline()
+        def pipeline_with_no_inputs() -> str:
+            op1 = identity(string='string', model=True)
+            result = op1.output
+            return result
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=pipeline_with_no_inputs,
+                package_path=pipeline_spec_path)
+
+            inputs_string = '# Inputs: \n'
+
+            # test inputs header not in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertFalse(inputs_string in yaml_content)
+
+        @dsl.pipeline()
+        def pipeline_with_no_outputs(sample_input1: bool = True,
+                                     sample_input2: str = 'string'):
+            identity(string=sample_input2, model=sample_input1)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=pipeline_with_no_outputs,
+                package_path=pipeline_spec_path)
+
+            outputs_string = '# Outputs: \n'
+
+            # test outputs header not in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertFalse(outputs_string in yaml_content)
+
+    def test_comments_follow_pattern(self):
+
+        @dsl.component
+        def identity(string: str, model: bool) -> str:
+            return string
+
+        @dsl.pipeline()
+        def my_pipeline(sample_input1: bool = True,
+                        sample_input2: str = 'string') -> str:
+            """This is a definition of this pipeline."""
+            op1 = identity(string=sample_input2, model=sample_input1)
+            result = op1.output
+            return result
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
+            compiler.Compiler().compile(
+                pipeline_func=my_pipeline, package_path=pipeline_spec_path)
+
+            pattern_sample = '# PIPELINE DEFINITION\n# Name: my-pipeline\n# Description: This is a definition of this pipeline.\n# Inputs: \n#    sample_input1: bool [Default: True]\n#    sample_input2: str [Default: string]\n# Outputs: \n#    Output: str'
+
+            # test name is in comments
+            with open(pipeline_spec_path, 'r+') as f:
+                yaml_content = f.read()
+                self.assertTrue(pattern_sample in yaml_content)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py
@@ -1604,6 +1604,7 @@ def create_pipeline_spec(
 
 
 def write_pipeline_spec_to_file(pipeline_spec: pipeline_spec_pb2.PipelineSpec,
+                                pipeline_description: str,
                                 package_path: str) -> None:
     """Writes PipelineSpec into a YAML or JSON (deprecated) file.
 
@@ -1612,6 +1613,8 @@ def write_pipeline_spec_to_file(pipeline_spec: pipeline_spec_pb2.PipelineSpec,
         package_path (str): The path to which to write the PipelineSpec.
     """
     json_dict = json_format.MessageToDict(pipeline_spec)
+    yaml_comments = extract_comments_from_pipeline_spec(json_dict,
+                                                        pipeline_description)
 
     if package_path.endswith('.json'):
         warnings.warn(
@@ -1628,6 +1631,94 @@ def write_pipeline_spec_to_file(pipeline_spec: pipeline_spec_pb2.PipelineSpec,
         with open(package_path, 'w') as yaml_file:
             yaml.dump(json_dict, yaml_file, sort_keys=True)
 
+        with open(package_path, 'r+') as f:
+            old = f.read()
+            f.seek(0)
+            f.write(yaml_comments + old)
+
     else:
         raise ValueError(
             f'The output path {package_path} should end with ".yaml".')
+
+
+def extract_comments_from_pipeline_spec(pipeline_spec: dict,
+                                        pipeline_description: str) -> str:
+    map_parameter_types = {
+        'NUMBER_INTEGER': 'int',
+        'NUMBER_DOUBLE': 'float',
+        'STRING': 'str',
+        'BOOLEAN': 'bool',
+        'LIST': 'list',
+        'STRUCT': 'dict'
+    }
+
+    def add_inputs():
+        if 'inputDefinitions' in pipeline_spec['root']:
+            inputs = pipeline_spec['root']['inputDefinitions']
+            string = '# Inputs: \n'
+
+            if 'parameters' in inputs:
+                for parameter in inputs['parameters']:
+                    string += '#    ' + parameter + ': ' + map_parameter_types[
+                        inputs['parameters'][parameter]['parameterType']]
+                    if 'defaultValue' in inputs['parameters'][parameter]:
+                        string += ' [Default: ' + str(
+                            inputs['parameters'][parameter]
+                            ['defaultValue']) + ']'
+
+                    string += '\n'
+
+            if 'artifacts' in inputs:
+                for artifact in inputs['artifacts']:
+                    if 'schemaTitle' in inputs['artifacts'][artifact][
+                            'artifactType']:
+                        string += '#    ' + artifact + ': ' + inputs[
+                            'artifacts'][artifact]['artifactType'][
+                                'schemaTitle'] + '\n'
+                    #TODO: Add exception tp raise if schematitle doesnt exist
+
+            return string
+        else:
+            return ''
+
+    def add_outputs():
+        if 'outputDefinitions' in pipeline_spec['root']:
+            outputs = pipeline_spec['root']['outputDefinitions']
+            string = '# Outputs: \n'
+
+            if 'parameters' in outputs:
+                for parameter in outputs['parameters']:
+                    string += '#    ' + parameter + ': ' + map_parameter_types[
+                        outputs['parameters'][parameter]['parameterType']]
+                    if 'defaultValue' in outputs['parameters'][parameter]:
+                        string += ' [Default: ' + str(
+                            outputs['parameters'][parameter]
+                            ['defaultValue']) + ']'
+
+                    string += '\n'
+
+            if 'artifacts' in outputs:
+                for artifact in outputs['artifacts']:
+                    if 'schemaTitle' in outputs['artifacts'][artifact][
+                            'artifactType']:
+                        string += '#    ' + artifact + ': ' + outputs[
+                            'artifacts'][artifact]['artifactType'][
+                                'schemaTitle'] + '\n'
+
+                    #TODO: Add exception tp raise if schematitle doesnt exist
+
+            return string
+        else:
+            return ''
+
+    if 'root' not in pipeline_spec:
+        return ''
+
+    comment = '# PIPELINE DEFINITION\n'
+    comment += '# Name: ' + pipeline_spec['pipelineInfo']['name'] + '\n'
+    if pipeline_description:
+        comment += '# Description: ' + pipeline_description + '\n'
+    comment += add_inputs()
+    comment += add_outputs()
+
+    return comment
diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder_test.py b/sdk/python/kfp/compiler/pipeline_spec_builder_test.py
@@ -271,15 +271,15 @@ def test_yaml(self):
         with tempfile.TemporaryDirectory() as tempdir:
             temp_filepath = os.path.join(tempdir, 'output.yaml')
             pipeline_spec_builder.write_pipeline_spec_to_file(
-                self.pipeline_spec, temp_filepath)
+                self.pipeline_spec, None, temp_filepath)
             actual = pipeline_spec_from_file(temp_filepath)
         self.assertEqual(actual, self.pipeline_spec)
 
     def test_yml(self):
         with tempfile.TemporaryDirectory() as tempdir:
             temp_filepath = os.path.join(tempdir, 'output.yml')
             pipeline_spec_builder.write_pipeline_spec_to_file(
-                self.pipeline_spec, temp_filepath)
+                self.pipeline_spec, None, temp_filepath)
             actual = pipeline_spec_from_file(temp_filepath)
         self.assertEqual(actual, self.pipeline_spec)
 
@@ -288,7 +288,7 @@ def test_json(self):
                 DeprecationWarning, r'Compiling to JSON is deprecated'):
             temp_filepath = os.path.join(tempdir, 'output.json')
             pipeline_spec_builder.write_pipeline_spec_to_file(
-                self.pipeline_spec, temp_filepath)
+                self.pipeline_spec, None, temp_filepath)
             actual = pipeline_spec_from_file(temp_filepath)
         self.assertEqual(actual, self.pipeline_spec)
 
@@ -297,7 +297,7 @@ def test_incorrect_extension(self):
                 ValueError, r'should end with "\.yaml"\.'):
             temp_filepath = os.path.join(tempdir, 'output.txt')
             pipeline_spec_builder.write_pipeline_spec_to_file(
-                self.pipeline_spec, temp_filepath)
+                self.pipeline_spec, None, temp_filepath)
 
 
 if __name__ == '__main__':

diff --git a/sdk/python/kfp/components/graph_component.py b/sdk/python/kfp/components/graph_component.py
@@ -42,6 +42,8 @@ def __init__(
         self.pipeline_func = pipeline_func
         self.name = name
 
+        self.description = component_spec.description
+
         args_list = []
         signature = inspect.signature(pipeline_func)
 

diff --git a/sdk/python/kfp/components/structures.py b/sdk/python/kfp/components/structures.py
@@ -834,7 +834,7 @@ def save_to_component_yaml(self, output_file: str) -> None:
         from kfp.compiler import pipeline_spec_builder as builder
 
         pipeline_spec = self.to_pipeline_spec()
-        builder.write_pipeline_spec_to_file(pipeline_spec, output_file)
+        builder.write_pipeline_spec_to_file(pipeline_spec, None, output_file)
 
     def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec:
         """Creates a pipeline instance and constructs the pipeline spec for a