feat(backend): add V2 lightweight Python pipeline sample. Fixes #5689 (…

…#5998)
kubeflow · Jul 11, 2021 · 27415fd · 27415fd
1 parent a4caff3
commit 27415fd
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 1 deletion.
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -53,9 +53,10 @@ COPY backend/src/apiserver/config/sample_config.json /samples/
 RUN set -e; \
     < /samples/sample_config.json jq .[].file --raw-output | while read pipeline_yaml; do \
         pipeline_py="${pipeline_yaml%.yaml}"; \
+        mode=`< /samples/sample_config.json jq ".[] | select(.file == \"${pipeline_yaml}\") | (if .mode == null then \"V1\" else .mode end)" --raw-output`; \
         mv "$pipeline_py" "${pipeline_py}.tmp"; \
         echo 'import kfp; kfp.components.default_base_image_or_builder="gcr.io/google-appengine/python:2020-03-31-141326"' | cat - "${pipeline_py}.tmp" > "$pipeline_py"; \
-        dsl-compile --py "$pipeline_py" --output "$pipeline_yaml" || python3 "$pipeline_py"; \
+        dsl-compile --py "$pipeline_py" --output "$pipeline_yaml" --mode "$mode" || python3 "$pipeline_py"; \
     done
 
 # 3. Start api web server

diff --git a/backend/src/apiserver/config/sample_config.json b/backend/src/apiserver/config/sample_config.json
@@ -18,5 +18,11 @@
     "name": "[Tutorial] DSL - Control structures",
     "description": "[source code](https://github.com/kubeflow/pipelines/tree/master/samples/tutorials/DSL%20-%20Control%20structures) Shows how to use conditional execution and exit handlers. This pipeline will randomly fail to demonstrate that the exit handler gets executed even in case of failure.",
     "file": "/samples/tutorials/DSL - Control structures/DSL - Control structures.py.yaml"
+  },
+  {
+    "name": "[Tutorial] V2 lightweight Python components",
+    "description": "[source code](https://github.com/kubeflow/pipelines/tree/master/samples/v2/lightweight_python_functions_v2_pipeline/lightweight_python_functions_v2_pipeline.py) Shows different component input and output options for KFP v2 components.",
+    "file": "/samples/v2/lightweight_python_functions_v2_pipeline/lightweight_python_functions_v2_pipeline.py.yaml",
+    "mode": "V2_COMPATIBLE"
   }
 ]
diff --git a/...s/v2/lightweight_python_functions_v2_pipeline/lightweight_python_functions_v2_pipeline.py b/...s/v2/lightweight_python_functions_v2_pipeline/lightweight_python_functions_v2_pipeline.py
@@ -0,0 +1,129 @@
+# Copyright 2021 The Kubeflow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Sample pipeline for passing data in KFP v2."""
+from typing import Dict, List
+
+from kfp import dsl
+from kfp import components
+from kfp.components import InputPath, OutputPath
+from kfp.v2.dsl import Input, Output, Dataset, Model, component
+import kfp.compiler as compiler
+
+
+@component
+def preprocess(
+    # An input parameter of type string.
+    message: str,
+    # Use Output[T] to get a metadata-rich handle to the output artifact
+    # of type `Dataset`.
+    output_dataset_one: Output[Dataset],
+    # A locally accessible filepath for another output artifact of type
+    # `Dataset`.
+    output_dataset_two_path: OutputPath('Dataset'),
+    # A locally accessible filepath for an output parameter of type string.
+    output_parameter_path: OutputPath(str),
+    # A locally accessible filepath for an output parameter of type bool.
+    output_bool_parameter_path: OutputPath(bool),
+    # A locally accessible filepath for an output parameter of type dict.
+    output_dict_parameter_path: OutputPath(Dict[str, int]),
+    # A locally accessible filepath for an output parameter of type list.
+    output_list_parameter_path: OutputPath(List[str]),
+):
+  """Dummy preprocessing step"""
+
+  # Use Dataset.path to access a local file path for writing.
+  # One can also use Dataset.uri to access the actual URI file path.
+  with open(output_dataset_one.path, 'w') as f:
+    f.write(message)
+
+  # OutputPath is used to just pass the local file path of the output artifact
+  # to the function.
+  with open(output_dataset_two_path, 'w') as f:
+    f.write(message)
+
+  with open(output_parameter_path, 'w') as f:
+    f.write(message)
+
+  with open(output_bool_parameter_path, 'w') as f:
+    f.write(str(True))  # use either `str()` or `json.dumps()` for bool values.
+
+  import json
+  with open(output_dict_parameter_path, 'w') as f:
+    f.write(json.dumps({'A': 1, 'B': 2}))
+
+  with open(output_list_parameter_path, 'w') as f:
+    f.write(json.dumps(['a', 'b', 'c']))
+
+
+@component
+def train(
+    # Use InputPath to get a locally accessible path for the input artifact
+    # of type `Dataset`.
+    dataset_one_path: InputPath('Dataset'),
+    # Use Input[T] to get a metadata-rich handle to the input artifact
+    # of type `Dataset`.
+    dataset_two: Input[Dataset],
+    # An input parameter of type string.
+    message: str,
+    # Use Output[T] to get a metadata-rich handle to the output artifact
+    # of type `Dataset`.
+    model: Output[Model],
+    # An input parameter of type bool.
+    input_bool: bool,
+    # An input parameter of type dict.
+    input_dict: Dict[str, int],
+    # An input parameter of type List[str].
+    input_list: List[str],
+    # An input parameter of type int with a default value.
+    num_steps: int = 100,
+):
+  """Dummy Training step"""
+  with open(dataset_one_path, 'r') as input_file:
+    dataset_one_contents = input_file.read()
+
+  with open(dataset_two.path, 'r') as input_file:
+    dataset_two_contents = input_file.read()
+
+  line = (f'dataset_one_contents: {dataset_one_contents} || '
+          f'dataset_two_contents: {dataset_two_contents} || '
+          f'message: {message} || '
+          f'input_bool: {input_bool}, type {type(input_bool)} || '
+          f'input_dict: {input_dict}, type {type(input_dict)} || '
+          f'input_list: {input_list}, type {type(input_list)} \n')
+
+  with open(model.path, 'w') as output_file:
+    for i in range(num_steps):
+      output_file.write('Step {}\n{}\n=====\n'.format(i, line))
+
+  # Use model.get() to get a Model artifact, which has a .metadata dictionary
+  # to store arbitrary metadata for the output artifact.
+  model.metadata['accuracy'] = 0.9
+
+
+@dsl.pipeline(pipeline_root='', name='my-test-pipeline-beta')
+def pipeline(message: str = 'message'):
+  preprocess_task = preprocess(message=message)
+  train_task = train(
+      dataset_one=preprocess_task.outputs['output_dataset_one'],
+      dataset_two=preprocess_task.outputs['output_dataset_two'],
+      message=preprocess_task.outputs['output_parameter'],
+      input_bool=preprocess_task.outputs['output_bool_parameter'],
+      input_dict=preprocess_task.outputs['output_dict_parameter'],
+      input_list=preprocess_task.outputs['output_list_parameter'],
+  )
+
+
+if __name__ == '__main__':
+  compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
+      pipeline_func=pipeline, package_path=__file__.replace('.py', '.yaml'))