Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(sdk): fix boolean default value serialization bug #8444

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions sdk/python/kfp/compiler/compiler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,5 +1356,139 @@ def my_pipeline():
print_op(message='Inside second exit handler.')


class TestBoolInputParameterWithDefaultSerializesCorrectly(unittest.TestCase):
# test with default = True, may have false test successes due to protocol buffer boolean default of False
def test_python_component(self):

@dsl.component
def comp(boolean: bool = True) -> bool:
return boolean

# test inner component interface
self.assertEqual(
comp.pipeline_spec.components['comp-comp'].input_definitions
.parameters['boolean'].default_value.bool_value, True)

# test outer pipeline "wrapper" interface
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['boolean']
.default_value.bool_value, True)

def test_python_component_with_overrides(self):

@dsl.component
def comp(boolean: bool = False) -> bool:
return boolean

with tempfile.TemporaryDirectory() as tmpdir:
pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
compiler.Compiler().compile(
comp, pipeline_spec_path, pipeline_parameters={'boolean': True})
pipeline_spec = pipeline_spec_from_file(pipeline_spec_path)

# test outer pipeline "wrapper" interface
self.assertEqual(
pipeline_spec.root.input_definitions.parameters['boolean']
.default_value.bool_value, True)

def test_container_component(self):

@dsl.container_component
def comp(boolean: bool = True):
return dsl.ContainerSpec(image='alpine', command=['echo', boolean])

# test inner component interface
self.assertEqual(
comp.pipeline_spec.components['comp-comp'].input_definitions
.parameters['boolean'].default_value.bool_value, True)

# test pipeline "wrapper" interface
self.assertEqual(
comp.pipeline_spec.root.input_definitions.parameters['boolean']
.default_value.bool_value, True)

def test_container_component_with_overrides(self):

@dsl.container_component
def comp(boolean: bool = True):
return dsl.ContainerSpec(image='alpine', command=['echo', boolean])

with tempfile.TemporaryDirectory() as tmpdir:
pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
compiler.Compiler().compile(
comp, pipeline_spec_path, pipeline_parameters={'boolean': True})
pipeline_spec = pipeline_spec_from_file(pipeline_spec_path)

# test outer pipeline "wrapper" interface
self.assertEqual(
pipeline_spec.root.input_definitions.parameters['boolean']
.default_value.bool_value, True)

def test_pipeline_no_input(self):

@dsl.component
def comp(boolean: bool = True) -> bool:
return boolean

@dsl.pipeline
def pipeline_no_input():
comp()

# test inner component interface
self.assertEqual(
pipeline_no_input.pipeline_spec.components['comp-comp']
.input_definitions.parameters['boolean'].default_value.bool_value,
True)

def test_pipeline_with_input(self):

@dsl.component
def comp(boolean: bool = True) -> bool:
return boolean

@dsl.pipeline
def pipeline_with_input(boolean: bool = True):
comp(boolean=boolean)

# test inner component interface
self.assertEqual(
pipeline_with_input.pipeline_spec.components['comp-comp']
.input_definitions.parameters['boolean'].default_value.bool_value,
True)

# test pipeline interface
self.assertEqual(
pipeline_with_input.pipeline_spec.root.input_definitions
.parameters['boolean'].default_value.bool_value, True)

def test_pipeline_with_with_overrides(self):

@dsl.component
def comp(boolean: bool = True) -> bool:
return boolean

@dsl.pipeline
def pipeline_with_input(boolean: bool = False):
comp(boolean=boolean)

with tempfile.TemporaryDirectory() as tmpdir:
pipeline_spec_path = os.path.join(tmpdir, 'output.yaml')
compiler.Compiler().compile(
pipeline_with_input,
pipeline_spec_path,
pipeline_parameters={'boolean': True})
pipeline_spec = pipeline_spec_from_file(pipeline_spec_path)

# test inner component interface
self.assertEqual(
pipeline_spec.components['comp-comp'].input_definitions
.parameters['boolean'].default_value.bool_value, True)

# test pipeline interface
self.assertEqual(
pipeline_spec.root.input_definitions.parameters['boolean']
.default_value.bool_value, True)


if __name__ == '__main__':
unittest.main()
29 changes: 18 additions & 11 deletions sdk/python/kfp/compiler/pipeline_spec_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,11 @@ def build_component_spec_for_task(
input_name].parameter_type = type_utils.get_parameter_type(
input_spec.type)
if input_spec.default is not None:
component_spec.input_definitions.parameters[
input_name].default_value.CopyFrom(
to_protobuf_value(input_spec.default))
_fill_in_component_input_default_value(
component_spec=component_spec,
input_name=input_name,
default_value=input_spec.default,
)

else:
component_spec.input_definitions.artifacts[
Expand Down Expand Up @@ -404,9 +406,11 @@ def _build_component_spec_from_component_spec_structure(
input_name].parameter_type = type_utils.get_parameter_type(
input_spec.type)
if input_spec.default is not None:
component_spec.input_definitions.parameters[
input_name].default_value.CopyFrom(
to_protobuf_value(input_spec.default))
_fill_in_component_input_default_value(
component_spec=component_spec,
input_name=input_name,
default_value=input_spec.default,
)

else:
component_spec.input_definitions.artifacts[
Expand Down Expand Up @@ -573,11 +577,15 @@ def _fill_in_component_input_default_value(
parameter_type = component_spec.input_definitions.parameters[
input_name].parameter_type
if pipeline_spec_pb2.ParameterType.NUMBER_INTEGER == parameter_type:
# cast to int to support v1 component YAML where NUMBER_INTEGER defaults are included as strings
# for example, input Limit: https://raw.githubusercontent.com/kubeflow/pipelines/60a2612541ec08c6a85c237d2ec7525b12543a43/components/datasets/Chicago_Taxi_Trips/component.yaml
component_spec.input_definitions.parameters[
input_name].default_value.number_value = default_value
input_name].default_value.number_value = int(default_value)
# cast to int to support v1 component YAML where NUMBER_DOUBLE defaults are included as strings
# for example, input learning_rate: https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml
elif pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.number_value = default_value
input_name].default_value.number_value = float(default_value)
elif pipeline_spec_pb2.ParameterType.STRING == parameter_type:
component_spec.input_definitions.parameters[
input_name].default_value.string_value = default_value
Expand Down Expand Up @@ -1061,9 +1069,8 @@ def modify_pipeline_spec_with_override(
# that match the pipeline inputs definition.
for input_name, input_value in (pipeline_parameters or {}).items():
if input_name in pipeline_spec.root.input_definitions.parameters:
pipeline_spec.root.input_definitions.parameters[
input_name].default_value.CopyFrom(
to_protobuf_value(input_value))
_fill_in_component_input_default_value(pipeline_spec.root,
input_name, input_value)
elif input_name in pipeline_spec.root.input_definitions.artifacts:
raise NotImplementedError(
'Default value for artifact input is not supported.')
Expand Down
22 changes: 11 additions & 11 deletions sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ components:
defaultValue: csv
parameterType: STRING
limit:
defaultValue: '1000'
defaultValue: 1000.0
parameterType: NUMBER_INTEGER
select:
defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location
Expand Down Expand Up @@ -132,19 +132,19 @@ components:
defaultValue: gbtree
parameterType: STRING
label_column:
defaultValue: '0'
defaultValue: 0.0
Copy link
Member Author

@connor-mccarthy connor-mccarthy Nov 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that these changes were not the intent of this fix, but the improved default_value setting logic caught this type inconsistency that was previously bypassed.

parameterType: NUMBER_INTEGER
learning_rate:
defaultValue: '0.3'
defaultValue: 0.3
parameterType: NUMBER_DOUBLE
max_depth:
defaultValue: '6'
defaultValue: 6.0
parameterType: NUMBER_INTEGER
min_split_loss:
defaultValue: '0'
defaultValue: 0.0
parameterType: NUMBER_DOUBLE
num_iterations:
defaultValue: '10'
defaultValue: 10.0
parameterType: NUMBER_INTEGER
objective:
defaultValue: reg:squarederror
Expand Down Expand Up @@ -174,16 +174,16 @@ components:
label_column_name:
parameterType: STRING
learning_rate:
defaultValue: '0.3'
defaultValue: 0.3
parameterType: NUMBER_DOUBLE
max_depth:
defaultValue: '6'
defaultValue: 6.0
parameterType: NUMBER_INTEGER
min_split_loss:
defaultValue: '0'
defaultValue: 0.0
parameterType: NUMBER_DOUBLE
num_iterations:
defaultValue: '10'
defaultValue: 10.0
parameterType: NUMBER_INTEGER
objective:
defaultValue: reg:squarederror
Expand Down Expand Up @@ -885,4 +885,4 @@ root:
taskInfo:
name: xgboost-train-2
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.4
sdkVersion: kfp-2.0.0-beta.6