From 9e9e1081c6f95caedab7fc30efe0eb57c71f428b Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 11 Nov 2022 15:42:33 -0800 Subject: [PATCH] fix(sdk): fix boolean default value serialization bug (#8444) * add test cases * fix bug * add v1 component yaml back compat changes * update golden snapshot --- sdk/python/kfp/compiler/compiler_test.py | 134 ++++++++++++++++++ .../kfp/compiler/pipeline_spec_builder.py | 29 ++-- .../pipelines/xgboost_sample_pipeline.yaml | 22 +-- 3 files changed, 163 insertions(+), 22 deletions(-) diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 2094efb1425..3756221fca8 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -1356,5 +1356,139 @@ def my_pipeline(): print_op(message='Inside second exit handler.') +class TestBoolInputParameterWithDefaultSerializesCorrectly(unittest.TestCase): + # test with default = True, may have false test successes due to protocol buffer boolean default of False + def test_python_component(self): + + @dsl.component + def comp(boolean: bool = True) -> bool: + return boolean + + # test inner component interface + self.assertEqual( + comp.pipeline_spec.components['comp-comp'].input_definitions + .parameters['boolean'].default_value.bool_value, True) + + # test outer pipeline "wrapper" interface + self.assertEqual( + comp.pipeline_spec.root.input_definitions.parameters['boolean'] + .default_value.bool_value, True) + + def test_python_component_with_overrides(self): + + @dsl.component + def comp(boolean: bool = False) -> bool: + return boolean + + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_spec_path = os.path.join(tmpdir, 'output.yaml') + compiler.Compiler().compile( + comp, pipeline_spec_path, pipeline_parameters={'boolean': True}) + pipeline_spec = pipeline_spec_from_file(pipeline_spec_path) + + # test outer pipeline "wrapper" interface + self.assertEqual( + pipeline_spec.root.input_definitions.parameters['boolean'] + .default_value.bool_value, True) + + def test_container_component(self): + + @dsl.container_component + def comp(boolean: bool = True): + return dsl.ContainerSpec(image='alpine', command=['echo', boolean]) + + # test inner component interface + self.assertEqual( + comp.pipeline_spec.components['comp-comp'].input_definitions + .parameters['boolean'].default_value.bool_value, True) + + # test pipeline "wrapper" interface + self.assertEqual( + comp.pipeline_spec.root.input_definitions.parameters['boolean'] + .default_value.bool_value, True) + + def test_container_component_with_overrides(self): + + @dsl.container_component + def comp(boolean: bool = True): + return dsl.ContainerSpec(image='alpine', command=['echo', boolean]) + + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_spec_path = os.path.join(tmpdir, 'output.yaml') + compiler.Compiler().compile( + comp, pipeline_spec_path, pipeline_parameters={'boolean': True}) + pipeline_spec = pipeline_spec_from_file(pipeline_spec_path) + + # test outer pipeline "wrapper" interface + self.assertEqual( + pipeline_spec.root.input_definitions.parameters['boolean'] + .default_value.bool_value, True) + + def test_pipeline_no_input(self): + + @dsl.component + def comp(boolean: bool = True) -> bool: + return boolean + + @dsl.pipeline + def pipeline_no_input(): + comp() + + # test inner component interface + self.assertEqual( + pipeline_no_input.pipeline_spec.components['comp-comp'] + .input_definitions.parameters['boolean'].default_value.bool_value, + True) + + def test_pipeline_with_input(self): + + @dsl.component + def comp(boolean: bool = True) -> bool: + return boolean + + @dsl.pipeline + def pipeline_with_input(boolean: bool = True): + comp(boolean=boolean) + + # test inner component interface + self.assertEqual( + pipeline_with_input.pipeline_spec.components['comp-comp'] + .input_definitions.parameters['boolean'].default_value.bool_value, + True) + + # test pipeline interface + self.assertEqual( + pipeline_with_input.pipeline_spec.root.input_definitions + .parameters['boolean'].default_value.bool_value, True) + + def test_pipeline_with_with_overrides(self): + + @dsl.component + def comp(boolean: bool = True) -> bool: + return boolean + + @dsl.pipeline + def pipeline_with_input(boolean: bool = False): + comp(boolean=boolean) + + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_spec_path = os.path.join(tmpdir, 'output.yaml') + compiler.Compiler().compile( + pipeline_with_input, + pipeline_spec_path, + pipeline_parameters={'boolean': True}) + pipeline_spec = pipeline_spec_from_file(pipeline_spec_path) + + # test inner component interface + self.assertEqual( + pipeline_spec.components['comp-comp'].input_definitions + .parameters['boolean'].default_value.bool_value, True) + + # test pipeline interface + self.assertEqual( + pipeline_spec.root.input_definitions.parameters['boolean'] + .default_value.bool_value, True) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index e10287ef468..a330c963ddc 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -360,9 +360,11 @@ def build_component_spec_for_task( input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) if input_spec.default is not None: - component_spec.input_definitions.parameters[ - input_name].default_value.CopyFrom( - to_protobuf_value(input_spec.default)) + _fill_in_component_input_default_value( + component_spec=component_spec, + input_name=input_name, + default_value=input_spec.default, + ) else: component_spec.input_definitions.artifacts[ @@ -404,9 +406,11 @@ def _build_component_spec_from_component_spec_structure( input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) if input_spec.default is not None: - component_spec.input_definitions.parameters[ - input_name].default_value.CopyFrom( - to_protobuf_value(input_spec.default)) + _fill_in_component_input_default_value( + component_spec=component_spec, + input_name=input_name, + default_value=input_spec.default, + ) else: component_spec.input_definitions.artifacts[ @@ -573,11 +577,15 @@ def _fill_in_component_input_default_value( parameter_type = component_spec.input_definitions.parameters[ input_name].parameter_type if pipeline_spec_pb2.ParameterType.NUMBER_INTEGER == parameter_type: + # cast to int to support v1 component YAML where NUMBER_INTEGER defaults are included as strings + # for example, input Limit: https://raw.githubusercontent.com/kubeflow/pipelines/60a2612541ec08c6a85c237d2ec7525b12543a43/components/datasets/Chicago_Taxi_Trips/component.yaml component_spec.input_definitions.parameters[ - input_name].default_value.number_value = default_value + input_name].default_value.number_value = int(default_value) + # cast to int to support v1 component YAML where NUMBER_DOUBLE defaults are included as strings + # for example, input learning_rate: https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml elif pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE == parameter_type: component_spec.input_definitions.parameters[ - input_name].default_value.number_value = default_value + input_name].default_value.number_value = float(default_value) elif pipeline_spec_pb2.ParameterType.STRING == parameter_type: component_spec.input_definitions.parameters[ input_name].default_value.string_value = default_value @@ -1061,9 +1069,8 @@ def modify_pipeline_spec_with_override( # that match the pipeline inputs definition. for input_name, input_value in (pipeline_parameters or {}).items(): if input_name in pipeline_spec.root.input_definitions.parameters: - pipeline_spec.root.input_definitions.parameters[ - input_name].default_value.CopyFrom( - to_protobuf_value(input_value)) + _fill_in_component_input_default_value(pipeline_spec.root, + input_name, input_value) elif input_name in pipeline_spec.root.input_definitions.artifacts: raise NotImplementedError( 'Default value for artifact input is not supported.') diff --git a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml index f319bb321e9..f36bf203594 100644 --- a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml +++ b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml @@ -7,7 +7,7 @@ components: defaultValue: csv parameterType: STRING limit: - defaultValue: '1000' + defaultValue: 1000.0 parameterType: NUMBER_INTEGER select: defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location @@ -132,19 +132,19 @@ components: defaultValue: gbtree parameterType: STRING label_column: - defaultValue: '0' + defaultValue: 0.0 parameterType: NUMBER_INTEGER learning_rate: - defaultValue: '0.3' + defaultValue: 0.3 parameterType: NUMBER_DOUBLE max_depth: - defaultValue: '6' + defaultValue: 6.0 parameterType: NUMBER_INTEGER min_split_loss: - defaultValue: '0' + defaultValue: 0.0 parameterType: NUMBER_DOUBLE num_iterations: - defaultValue: '10' + defaultValue: 10.0 parameterType: NUMBER_INTEGER objective: defaultValue: reg:squarederror @@ -174,16 +174,16 @@ components: label_column_name: parameterType: STRING learning_rate: - defaultValue: '0.3' + defaultValue: 0.3 parameterType: NUMBER_DOUBLE max_depth: - defaultValue: '6' + defaultValue: 6.0 parameterType: NUMBER_INTEGER min_split_loss: - defaultValue: '0' + defaultValue: 0.0 parameterType: NUMBER_DOUBLE num_iterations: - defaultValue: '10' + defaultValue: 10.0 parameterType: NUMBER_INTEGER objective: defaultValue: reg:squarederror @@ -885,4 +885,4 @@ root: taskInfo: name: xgboost-train-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.4 +sdkVersion: kfp-2.0.0-beta.6