From f7e5dcb9d3360f3d1d64e00e40ad543892e82854 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Fri, 9 Jun 2023 14:27:28 +0200 Subject: [PATCH 01/16] add writer component --- fondant/component.py | 58 +++++++++++++++++++++++++++++++++++++++-- tests/test_component.py | 43 +++++++++++++++++++++++------- 2 files changed, 90 insertions(+), 11 deletions(-) diff --git a/fondant/component.py b/fondant/component.py index 7594bcfaf..adb11ebcb 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -112,7 +112,7 @@ def _load_or_create_manifest(self) -> Manifest: """Abstract method that returns the dataset manifest.""" @abstractmethod - def _process_dataset(self, manifest: Manifest) -> dd.DataFrame: + def _process_dataset(self, manifest: Manifest) -> t.Union[None, dd.DataFrame]: """Abstract method that processes the manifest and returns another dataframe. """ @@ -223,7 +223,7 @@ def transform(self, *args, **kwargs) -> dd.DataFrame: kwargs: Arguments provided to the component are passed as keyword arguments """ - def _process_dataset(self, manifest: Manifest) -> dd.DataFrame: + def _process_dataset(self, manifest: Manifest) -> t.Union[None, dd.DataFrame]: """ Creates a DataLoader using the provided manifest and loads the input dataframe using the `load_dataframe` instance, and applies data transformations to it using the `transform` @@ -237,3 +237,57 @@ def _process_dataset(self, manifest: Manifest) -> dd.DataFrame: df = self.transform(dataframe=df, **self.user_arguments) return df + + +class WriteComponent(Component): + """Base class for a Fondant write component.""" + + @classmethod + def _add_and_parse_args(cls, spec: ComponentSpec): + parser = argparse.ArgumentParser() + component_arguments = cls._get_component_arguments(spec) + + for arg in component_arguments.values(): + parser.add_argument( + f"--{arg.name}", + type=kubeflow2python_type[arg.type], # type: ignore + required=True, + help=arg.description, + ) + + return parser.parse_args() + + def _load_or_create_manifest(self) -> Manifest: + return Manifest.from_file(self.input_manifest_path) + + @abstractmethod + def write(self, *args, **kwargs): + """ + Abstract method to write a dataframe to a final custom location. + + Args: + args: The dataframe will be passed in as a positional argument + kwargs: Arguments provided to the component are passed as keyword arguments + """ + + def _process_dataset(self, manifest: Manifest) -> t.Union[None, dd.DataFrame]: + """ + Creates a DataLoader using the provided manifest and loads the input dataframe using the + `load_dataframe` instance, and applies data transformations to it using the `transform` + method implemented by the derived class. Returns a single dataframe. + + Returns: + A `dd.DataFrame` instance with updated data based on the applied data transformations. + """ + data_loader = DaskDataLoader(manifest=manifest, component_spec=self.spec) + df = data_loader.load_dataframe() + self.write(dataframe=df, **self.user_arguments) + + return None + + def _write_data(self, dataframe: dd.DataFrame, *, manifest: Manifest): + """Create a data writer given a manifest and writes out the index and subsets.""" + pass + + def upload_manifest(self, manifest: Manifest, save_path: str): + pass diff --git a/tests/test_component.py b/tests/test_component.py index 4c8f267c0..d35ff8277 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -10,7 +10,7 @@ import pytest import yaml -from fondant.component import LoadComponent, TransformComponent +from fondant.component import LoadComponent, TransformComponent, WriteComponent from fondant.data_io import DaskDataLoader components_path = Path(__file__).parent / "example_specs/components" @@ -75,8 +75,10 @@ def test_component(mock_args): } -def test_valid_transform_kwargs(monkeypatch): - """Test that arguments are passed correctly to `Component.transform` method.""" +def test_transform_component(monkeypatch): + """Test that arguments are passed correctly to `Component.transform` method and that valid + errors are returned when required arguments are missing. + """ class EarlyStopException(Exception): """Used to stop execution early instead of mocking all later functionality.""" @@ -120,12 +122,23 @@ def transform(self, dataframe, *, flag, value): # Instantiate and run component component = MyComponent.from_args() + with pytest.raises(EarlyStopException): component.run() + # Remove component specs from arguments + component_spec_index = sys.argv.index("--component_spec") + del sys.argv[component_spec_index : component_spec_index + 2] + + # Instantiate and run component + with pytest.raises(ValueError): + MyComponent.from_args() + -def test_invalid_transform_kwargs(monkeypatch): - """Test that arguments are passed correctly to `Component.transform` method.""" +def test_write_component(tmp_path_factory, monkeypatch): + """Test that arguments are passed correctly to `Component.write` method and that valid + errors are returned when required arguments are missing. + """ class EarlyStopException(Exception): """Used to stop execution early instead of mocking all later functionality.""" @@ -141,14 +154,16 @@ def mocked_load_dataframe(self): component_spec = arguments_dir / "component.yaml" input_manifest = arguments_dir / "input_manifest.json" - yaml_file_to_json_string(component_spec) + component_spec_string = yaml_file_to_json_string(component_spec) # Implemented Component class - class MyComponent(TransformComponent): - def transform(self, dataframe, *, flag, value): + class MyComponent(WriteComponent): + def write(self, dataframe, *, flag, value): assert flag == "success" assert value == 1 - raise EarlyStopException() + # Mock write function that sinks final data to a local directory + with tmp_path_factory.mktemp("temp") as fn: + dataframe.to_parquet(fn) # Mock CLI arguments sys.argv = [ @@ -163,8 +178,18 @@ def transform(self, dataframe, *, flag, value): "1", "--output_manifest_path", "", + "--component_spec", + f"{component_spec_string}", ] + # # Instantiate and run component + component = MyComponent.from_args() + component.run() + + # Remove component specs from arguments + component_spec_index = sys.argv.index("--component_spec") + del sys.argv[component_spec_index : component_spec_index + 2] + # Instantiate and run component with pytest.raises(ValueError): MyComponent.from_args() From 17c5c7e176649f687d88aa66c41088292577195d Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:40:24 +0200 Subject: [PATCH 02/16] modify component spec schema to accept default arguments --- fondant/schemas/component_spec.json | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/fondant/schemas/component_spec.json b/fondant/schemas/component_spec.json index d196c3db3..2183a3b3d 100644 --- a/fondant/schemas/component_spec.json +++ b/fondant/schemas/component_spec.json @@ -80,6 +80,49 @@ }, "description": { "type": "string" + }, + "default_value": { + "oneOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number", + "format": "float" + }, + { + "type": "boolean" + }, + { + "type": "object" + }, + { + "type": "array", + "items": { + "type": [ + "array", + "string", + "integer", + { + "type": "number", + "format": "float" + }, + "boolean", + "object" + ] + }, + "uniqueItems": true + }, + { + "type": "null" + } + ] } }, "required": [ From a4e44d2e3bf5f77854859dc330f4e4c89aae7d20 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:41:41 +0200 Subject: [PATCH 03/16] enable adding default arguments --- fondant/component.py | 95 +++++++++++++++++---------------------- fondant/component_spec.py | 6 +++ 2 files changed, 47 insertions(+), 54 deletions(-) diff --git a/fondant/component.py b/fondant/component.py index adb11ebcb..b938d606c 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -77,7 +77,6 @@ def from_spec(cls, component_spec: ComponentSpec) -> "Component": metadata = args_dict.pop("metadata") metadata = json.loads(metadata) if metadata else {} - return cls( component_spec, input_manifest_path=input_manifest_path, @@ -86,6 +85,38 @@ def from_spec(cls, component_spec: ComponentSpec) -> "Component": user_arguments=args_dict, ) + @classmethod + def _add_and_parse_args(cls, spec: ComponentSpec): + parser = argparse.ArgumentParser() + component_arguments = cls._get_component_arguments(spec) + + for arg in component_arguments.values(): + # Input manifest is not required for loading component + if arg.name in cls.optional_fondant_arguments(): + input_required = False + default = None + elif arg.default: + input_required = False + default = arg.default + else: + input_required = True + default = None + + parser.add_argument( + f"--{arg.name}", + type=kubeflow2python_type[arg.type], # type: ignore + required=input_required, + default=default, + help=arg.description, + ) + + return parser.parse_args() + + @staticmethod + @abstractmethod + def optional_fondant_arguments() -> t.List[str]: + pass + @staticmethod def _get_component_arguments(spec: ComponentSpec) -> t.Dict[str, Argument]: """ @@ -102,11 +133,6 @@ def _get_component_arguments(spec: ComponentSpec) -> t.Dict[str, Argument]: component_arguments.update(kubeflow_component_spec.output_arguments) return component_arguments - @classmethod - @abstractmethod - def _add_and_parse_args(cls, spec: ComponentSpec) -> argparse.Namespace: - """Abstract method to add and parse the component arguments.""" - @abstractmethod def _load_or_create_manifest(self) -> Manifest: """Abstract method that returns the dataset manifest.""" @@ -142,26 +168,9 @@ def upload_manifest(self, manifest: Manifest, save_path: str): class LoadComponent(Component): """Base class for a Fondant load component.""" - @classmethod - def _add_and_parse_args(cls, spec: ComponentSpec): - parser = argparse.ArgumentParser() - component_arguments = cls._get_component_arguments(spec) - - for arg in component_arguments.values(): - # Input manifest is not required for loading component - if arg.name == "input_manifest_path": - input_required = False - else: - input_required = True - - parser.add_argument( - f"--{arg.name}", - type=kubeflow2python_type[arg.type], # type: ignore - required=input_required, - help=arg.description, - ) - - return parser.parse_args() + @staticmethod + def optional_fondant_arguments() -> t.List[str]: + return ["input_manifest_path"] def _load_or_create_manifest(self) -> Manifest: # create initial manifest @@ -195,20 +204,9 @@ def _process_dataset(self, manifest: Manifest) -> dd.DataFrame: class TransformComponent(Component): """Base class for a Fondant transform component.""" - @classmethod - def _add_and_parse_args(cls, spec: ComponentSpec): - parser = argparse.ArgumentParser() - component_arguments = cls._get_component_arguments(spec) - - for arg in component_arguments.values(): - parser.add_argument( - f"--{arg.name}", - type=kubeflow2python_type[arg.type], # type: ignore - required=True, - help=arg.description, - ) - - return parser.parse_args() + @staticmethod + def optional_fondant_arguments() -> t.List[str]: + return [] def _load_or_create_manifest(self) -> Manifest: return Manifest.from_file(self.input_manifest_path) @@ -242,20 +240,9 @@ def _process_dataset(self, manifest: Manifest) -> t.Union[None, dd.DataFrame]: class WriteComponent(Component): """Base class for a Fondant write component.""" - @classmethod - def _add_and_parse_args(cls, spec: ComponentSpec): - parser = argparse.ArgumentParser() - component_arguments = cls._get_component_arguments(spec) - - for arg in component_arguments.values(): - parser.add_argument( - f"--{arg.name}", - type=kubeflow2python_type[arg.type], # type: ignore - required=True, - help=arg.description, - ) - - return parser.parse_args() + @staticmethod + def optional_fondant_arguments() -> t.List[str]: + return [] def _load_or_create_manifest(self) -> Manifest: return Manifest.from_file(self.input_manifest_path) diff --git a/fondant/component_spec.py b/fondant/component_spec.py index 40f88e4dc..b9dbf6261 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -46,11 +46,14 @@ class Argument: name: name of the argument description: argument description type: the python argument type (str, int, ...) + default: default value of the argument (defaults to None) + """ name: str description: str type: str + default: t.Optional[str] = None class ComponentSubset: @@ -180,6 +183,7 @@ def args(self) -> t.Dict[str, Argument]: name=name, description=arg_info["description"], type=arg_info["type"], + default=arg_info["default"] if "default" in arg_info else None, ) for name, arg_info in self._specification.get("args", {}).items() } @@ -236,6 +240,7 @@ def from_fondant_component_spec( "name": arg.name, "description": arg.description, "type": python2kubeflow_type[arg.type], + **({"default": arg.default} if arg.default is not None else {}), } for arg in fondant_component.args.values() ), @@ -305,6 +310,7 @@ def input_arguments(self) -> t.Mapping[str, Argument]: name=info["name"], description=info["description"], type=info["type"], + default=info["default"] if "default" in info else None, ) for info in self._specification["inputs"] } From 6d2d1ab9425da2321b8af1705a287d82bb532669 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:42:06 +0200 Subject: [PATCH 04/16] test adding default arguments --- .../arguments/component_default_args.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/example_specs/components/arguments/component_default_args.yaml diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/example_specs/components/arguments/component_default_args.yaml new file mode 100644 index 000000000..124c1d362 --- /dev/null +++ b/tests/example_specs/components/arguments/component_default_args.yaml @@ -0,0 +1,29 @@ +name: Example component +description: This is an example component +image: example_component:latest + +args: + string_default_arg: + description: default string argument + type: str + default: foo + integer_default_arg: + description: default integer argument + type: int + default: 1 + float_default_arg: + description: default float argument + type: float + default: 3.14 + bool_default_arg: + description: default bool argument + type: bool + default: 'False' + list_default_arg: + description: default list argument + type: list + default: '["foo", "bar"]' + dict_default_arg: + description: default dict argument + type: dict + default: '{"foo":1, "bar":2}' From af597f82255cae270f495299b0ca122907941586 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:44:53 +0200 Subject: [PATCH 05/16] fixmypy issue --- tests/test_component.py | 61 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/tests/test_component.py b/tests/test_component.py index d35ff8277..42c2214f8 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -140,9 +140,6 @@ def test_write_component(tmp_path_factory, monkeypatch): errors are returned when required arguments are missing. """ - class EarlyStopException(Exception): - """Used to stop execution early instead of mocking all later functionality.""" - # Mock `Dataset.load_dataframe` so no actual data is loaded def mocked_load_dataframe(self): return dd.from_dict({"a": [1, 2, 3]}, npartitions=1) @@ -193,3 +190,61 @@ def write(self, dataframe, *, flag, value): # Instantiate and run component with pytest.raises(ValueError): MyComponent.from_args() + + +def test_default_args_component(tmp_path_factory, monkeypatch): + """Test that arguments are passed correctly to `Component.write` method and that valid + errors are returned when required arguments are missing. + """ + + # Mock `Dataset.load_dataframe` so no actual data is loaded + def mocked_load_dataframe(self): + return dd.from_dict({"a": [1, 2, 3]}, npartitions=1) + + monkeypatch.setattr(DaskDataLoader, "load_dataframe", mocked_load_dataframe) + + # Define paths to specs to instantiate component + arguments_dir = components_path / "arguments" + component_spec = arguments_dir / "component_default_args.yaml" + input_manifest = arguments_dir / "input_manifest.json" + + component_spec_string = yaml_file_to_json_string(component_spec) + + # Implemented Component class + class MyComponent(WriteComponent): + def write( + self, + dataframe, + *, + string_default_arg, + integer_default_arg, + float_default_arg, + bool_default_arg, + list_default_arg, + dict_default_arg, + ): + float_const = 3.14 + # Mock write function that sinks final data to a local directory + assert string_default_arg == "foo" + assert integer_default_arg == 1 + assert float_default_arg == float_const + assert bool_default_arg is False + assert list_default_arg == ["foo", "bar"] + assert dict_default_arg == {"foo": 1, "bar": 2} + + # Mock CLI arguments + sys.argv = [ + "", + "--input_manifest_path", + str(input_manifest), + "--metadata", + "", + "--output_manifest_path", + "", + "--component_spec", + f"{component_spec_string}", + ] + + # # Instantiate and run component + component = MyComponent.from_args() + component.run() From c90b53bb31726f0acb1809a9fda806ae335f28b6 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:48:11 +0200 Subject: [PATCH 06/16] correct docs --- tests/test_component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_component.py b/tests/test_component.py index 42c2214f8..6a98052be 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -193,8 +193,8 @@ def write(self, dataframe, *, flag, value): def test_default_args_component(tmp_path_factory, monkeypatch): - """Test that arguments are passed correctly to `Component.write` method and that valid - errors are returned when required arguments are missing. + """Test that default arguments defined in the fondant spec are passed correctly and have the + proper data type. """ # Mock `Dataset.load_dataframe` so no actual data is loaded From a8e55d81f90625854dfc4526a99ee08e8453afb6 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 14:59:52 +0200 Subject: [PATCH 07/16] update component spec --- fondant/schemas/component_spec.json | 37 +------------------ .../arguments/component_default_args.yaml | 4 ++ tests/test_component.py | 4 ++ 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/fondant/schemas/component_spec.json b/fondant/schemas/component_spec.json index 2183a3b3d..aee436c00 100644 --- a/fondant/schemas/component_spec.json +++ b/fondant/schemas/component_spec.json @@ -81,46 +81,13 @@ "description": { "type": "string" }, - "default_value": { + "default": { "oneOf": [ - { - "type": "array" - }, { "type": "string" }, { - "type": "integer" - }, - { - "type": "number", - "format": "float" - }, - { - "type": "boolean" - }, - { - "type": "object" - }, - { - "type": "array", - "items": { - "type": [ - "array", - "string", - "integer", - { - "type": "number", - "format": "float" - }, - "boolean", - "object" - ] - }, - "uniqueItems": true - }, - { - "type": "null" + "type": "number" } ] } diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/example_specs/components/arguments/component_default_args.yaml index 124c1d362..7a1aabb01 100644 --- a/tests/example_specs/components/arguments/component_default_args.yaml +++ b/tests/example_specs/components/arguments/component_default_args.yaml @@ -27,3 +27,7 @@ args: description: default dict argument type: dict default: '{"foo":1, "bar":2}' + override_default_string_arg: + description: default argument that can be overriden + type: str + default: foo diff --git a/tests/test_component.py b/tests/test_component.py index 6a98052be..537899032 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -222,6 +222,7 @@ def write( bool_default_arg, list_default_arg, dict_default_arg, + override_default_string_arg, ): float_const = 3.14 # Mock write function that sinks final data to a local directory @@ -231,6 +232,7 @@ def write( assert bool_default_arg is False assert list_default_arg == ["foo", "bar"] assert dict_default_arg == {"foo": 1, "bar": 2} + assert override_default_string_arg == "bar" # Mock CLI arguments sys.argv = [ @@ -243,6 +245,8 @@ def write( "", "--component_spec", f"{component_spec_string}", + "--override_default_string_arg", + "bar", ] # # Instantiate and run component From 00dfa5531a89f1423286ee7095fe685454b03f0f Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Mon, 12 Jun 2023 15:17:48 +0200 Subject: [PATCH 08/16] update docs --- docs/component_spec.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/docs/component_spec.md b/docs/component_spec.md index 6d11e6184..fda0f1922 100644 --- a/docs/component_spec.md +++ b/docs/component_spec.md @@ -121,41 +121,52 @@ Please check the [examples](#examples) below to build a better understanding. ### Args The `args` section describes which arguments the component takes. Each argument is defined by a -`description` and a `type`, which should be one of the builtin Python types. +`description` and a `type`, which should be one of the builtin Python types. Additionally, you can +pass an optional argument in the optional `default` field of the arguments. This can be useful in case +your function has many arguments that could be set to default and that don't need to be explicitly defined when +initializing your component. +_Note:_ default iterable arguments such as `dict` and `list` have to be passed as a string +(e.g. `'{"foo":1, "bar":2}`, `'["foo","bar]'`) ```yaml args: custom_argument: description: A custom argument type: str + default_argument: + description: A default argument + type: str + default: bar ``` -These arguments are passed in when the component is instantiated: - +These arguments are passed in when the component is instantiated. Notice that we are not passing the +default argument specified above. You could override the default value in the component spec by passing +it as an argument to the component. ```python from fondant.pipeline import ComponentOp custom_op = ComponentOp( component_spec_path="components/custom_component/fondant_component.yaml", arguments={ - "custom_argument": "foobar" + "custom_argument": "foo" }, ) ``` -And passed as a keyword argument to the `transform()` method of the component. +Afterwards, we pass all keyword arguments to the `transform()` method of the component. ```python from fondant.component import TransformComponent class ExampleComponent(TransformComponent): - def transform(self, dataframe, *, custom_argument): + def transform(self, dataframe, *, custom_argument, default_argument): """Implement your custom logic in this single method Args: dataframe: A Dask dataframe containing the data custom_argument: An argument passed to the component + default_argument: A default argument passed to the components """ ``` From 0fa694f289be59eca832ade2c5f03b02ae998f59 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Tue, 13 Jun 2023 11:21:36 +0200 Subject: [PATCH 09/16] add optional field to schema --- fondant/schemas/component_spec.json | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fondant/schemas/component_spec.json b/fondant/schemas/component_spec.json index aee436c00..f4c58c3a5 100644 --- a/fondant/schemas/component_spec.json +++ b/fondant/schemas/component_spec.json @@ -90,12 +90,29 @@ "type": "number" } ] + }, + "optional": { + "type": "boolean" } }, "required": [ "type" - ] + ], + "if": { + "properties": { + "default": { + "not": {} + } + } + }, + "then": { + "properties": { + "accepts_optional": { + "const": true + } + } + } } } } -} \ No newline at end of file +} From c9f4429f2ad65395fc2d7af8f2448a7e15c0afaf Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Tue, 13 Jun 2023 16:00:52 +0200 Subject: [PATCH 10/16] enable defining default arguments --- fondant/component.py | 11 +++++++++-- fondant/component_spec.py | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fondant/component.py b/fondant/component.py index b938d606c..3c7dafeca 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -92,10 +92,17 @@ def _add_and_parse_args(cls, spec: ComponentSpec): for arg in component_arguments.values(): # Input manifest is not required for loading component - if arg.name in cls.optional_fondant_arguments(): + if arg.name in cls.optional_fondant_arguments() or ( + not arg.default and arg.optional is True + ): + # Input is optional and no default argument is provided, set input_required to + # False. This bypasses providing the argument as a command-line argument. Value will + # be None input_required = False default = None - elif arg.default: + elif arg.default and arg.optional is False: + # Input is required but a default argument is provided, set input_required to False. + # Value of the argument will be the default value input_required = False default = arg.default else: diff --git a/fondant/component_spec.py b/fondant/component_spec.py index b9dbf6261..72efb5c79 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -47,13 +47,14 @@ class Argument: description: argument description type: the python argument type (str, int, ...) default: default value of the argument (defaults to None) - + optional: whether an argument is optional or not (defaults to False) """ name: str description: str type: str default: t.Optional[str] = None + optional: t.Optional[bool] = False class ComponentSubset: @@ -184,6 +185,7 @@ def args(self) -> t.Dict[str, Argument]: description=arg_info["description"], type=arg_info["type"], default=arg_info["default"] if "default" in arg_info else None, + optional=True if "optional" in arg_info else False, ) for name, arg_info in self._specification.get("args", {}).items() } @@ -241,6 +243,11 @@ def from_fondant_component_spec( "description": arg.description, "type": python2kubeflow_type[arg.type], **({"default": arg.default} if arg.default is not None else {}), + **( + {"optional": str(arg.optional)} + if arg.optional is True + else {} + ), } for arg in fondant_component.args.values() ), @@ -311,6 +318,7 @@ def input_arguments(self) -> t.Mapping[str, Argument]: description=info["description"], type=info["type"], default=info["default"] if "default" in info else None, + optional=True if "optional" in info else False, ) for info in self._specification["inputs"] } From 5ee3b31fe74c2b37328874cfeac959fc503f0bd5 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Tue, 13 Jun 2023 16:01:33 +0200 Subject: [PATCH 11/16] add relevant tests --- .../arguments/component_default_args.yaml | 11 +++++++++++ tests/test_component.py | 14 +++++++++++--- tests/test_component_specs.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/example_specs/components/arguments/component_default_args.yaml index 7a1aabb01..694cfacc3 100644 --- a/tests/example_specs/components/arguments/component_default_args.yaml +++ b/tests/example_specs/components/arguments/component_default_args.yaml @@ -31,3 +31,14 @@ args: description: default argument that can be overriden type: str default: foo + non_optional_with_default: + description: this should return the default + type: str + default: foo + non_optional_without_default: + description: this input is required and should be specified otherwise an error would occur + type: str + optional_without_default: + description: this input is optional and without default, if not specified it should return None + type: str + optional: true diff --git a/tests/test_component.py b/tests/test_component.py index 537899032..8cefbaebf 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -192,9 +192,9 @@ def write(self, dataframe, *, flag, value): MyComponent.from_args() -def test_default_args_component(tmp_path_factory, monkeypatch): - """Test that default arguments defined in the fondant spec are passed correctly and have the - proper data type. +def test_default_and_optional_args_component(tmp_path_factory, monkeypatch): + """Test that default arguments and optional defined in the fondant spec are passed correctly + and have the proper data type and assigned value. """ # Mock `Dataset.load_dataframe` so no actual data is loaded @@ -223,6 +223,9 @@ def write( list_default_arg, dict_default_arg, override_default_string_arg, + non_optional_with_default, + non_optional_without_default, + optional_without_default, ): float_const = 3.14 # Mock write function that sinks final data to a local directory @@ -233,6 +236,9 @@ def write( assert list_default_arg == ["foo", "bar"] assert dict_default_arg == {"foo": 1, "bar": 2} assert override_default_string_arg == "bar" + assert non_optional_with_default == "foo" + assert non_optional_without_default == "bar" + assert optional_without_default is None # Mock CLI arguments sys.argv = [ @@ -247,6 +253,8 @@ def write( f"{component_spec_string}", "--override_default_string_arg", "bar", + "--non_optional_without_default", + "bar", ] # # Instantiate and run component diff --git a/tests/test_component_specs.py b/tests/test_component_specs.py index afd2b0133..39efc910c 100644 --- a/tests/test_component_specs.py +++ b/tests/test_component_specs.py @@ -35,6 +35,12 @@ def invalid_fondant_schema() -> dict: return yaml.safe_load(f) +@pytest.fixture +def invalid_fondant_schema_arg_definition() -> dict: + with open(component_specs_path / "invalid_component_args.yaml") as f: + return yaml.safe_load(f) + + def test_component_spec_validation(valid_fondant_schema, invalid_fondant_schema): """Test that the manifest is validated correctly on instantiation.""" ComponentSpec(valid_fondant_schema) @@ -72,3 +78,11 @@ def test_component_spec_no_args(valid_fondant_schema_no_args): assert fondant_component.name == "Example component" assert fondant_component.description == "This is an example component" assert fondant_component.args == {} + + +def test_component_spec_invalid_args(invalid_fondant_schema_arg_definition): + """Test that a component spec with both default and optional arguments are + validated correctly. + """ + with pytest.raises(InvalidComponentSpec): + ComponentSpec(invalid_fondant_schema_arg_definition) From 66fdb4c914e5b0d4155975c99b93462c5a0a45b6 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Tue, 13 Jun 2023 16:47:22 +0200 Subject: [PATCH 12/16] add test file --- .../invalid_component_args.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/example_specs/component_specs/invalid_component_args.yaml diff --git a/tests/example_specs/component_specs/invalid_component_args.yaml b/tests/example_specs/component_specs/invalid_component_args.yaml new file mode 100644 index 000000000..d10c456c1 --- /dev/null +++ b/tests/example_specs/component_specs/invalid_component_args.yaml @@ -0,0 +1,18 @@ +name: Example component +description: This is an example component +image: example_component:latest + +consumes: + images: + data: binary + +produces: + captions: + data: string + +Arguments: + storage_args: + description: invalid argument + type: str + optional: True + default: foo \ No newline at end of file From 61adc40c601f6a0f6002702245262e5feef00cd5 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Wed, 14 Jun 2023 10:06:21 +0200 Subject: [PATCH 13/16] bugfix string bool --- fondant/component_spec.py | 6 ++++-- .../components/arguments/component_default_args.yaml | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fondant/component_spec.py b/fondant/component_spec.py index 72efb5c79..af2d0acf2 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -185,7 +185,7 @@ def args(self) -> t.Dict[str, Argument]: description=arg_info["description"], type=arg_info["type"], default=arg_info["default"] if "default" in arg_info else None, - optional=True if "optional" in arg_info else False, + optional=arg_info["optional"] if "optional" in arg_info else False, ) for name, arg_info in self._specification.get("args", {}).items() } @@ -318,7 +318,9 @@ def input_arguments(self) -> t.Mapping[str, Argument]: description=info["description"], type=info["type"], default=info["default"] if "default" in info else None, - optional=True if "optional" in info else False, + optional=ast.literal_eval(info["optional"]) + if "optional" in info + else False, ) for info in self._specification["inputs"] } diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/example_specs/components/arguments/component_default_args.yaml index 694cfacc3..1d6ef89c0 100644 --- a/tests/example_specs/components/arguments/component_default_args.yaml +++ b/tests/example_specs/components/arguments/component_default_args.yaml @@ -35,10 +35,11 @@ args: description: this should return the default type: str default: foo + optional: False non_optional_without_default: description: this input is required and should be specified otherwise an error would occur type: str optional_without_default: description: this input is optional and without default, if not specified it should return None type: str - optional: true + optional: True From 88b4729acf83d84f47b2ec8d226e36fe0bb25711 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Wed, 14 Jun 2023 13:31:00 +0200 Subject: [PATCH 14/16] change method of defining optionals --- fondant/component.py | 12 +---- fondant/component_spec.py | 22 +++----- fondant/schemas/component_spec.json | 19 +------ .../invalid_component_args.yaml | 18 ------- .../arguments/component_default_args.yaml | 50 ++++++++++++++----- tests/test_component.py | 40 ++++++++++----- tests/test_component_specs.py | 14 ------ 7 files changed, 73 insertions(+), 102 deletions(-) delete mode 100644 tests/example_specs/component_specs/invalid_component_args.yaml diff --git a/fondant/component.py b/fondant/component.py index 2df1cf432..4bb62efd8 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -91,18 +91,10 @@ def _add_and_parse_args(cls, spec: ComponentSpec): component_arguments = cls._get_component_arguments(spec) for arg in component_arguments.values(): - # Input manifest is not required for loading component - if arg.name in cls.optional_fondant_arguments() or ( - not arg.default and arg.optional is True - ): - # Input is optional and no default argument is provided, set input_required to - # False. This bypasses providing the argument as a command-line argument. Value will - # be None + if arg.name in cls.optional_fondant_arguments(): input_required = False default = None - elif arg.default and arg.optional is False: - # Input is required but a default argument is provided, set input_required to False. - # Value of the argument will be the default value + elif arg.default: input_required = False default = arg.default else: diff --git a/fondant/component_spec.py b/fondant/component_spec.py index af2d0acf2..ddc76dfb0 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -18,12 +18,12 @@ # TODO: remove after upgrading to kfpv2 kubeflow2python_type = { - "String": str, - "Integer": int, - "Float": float, - "Boolean": ast.literal_eval, - "JsonObject": json.loads, - "JsonArray": json.loads, + "String": lambda value: str(value) if value != "None" else None, + "Integer": lambda value: int(value) if value != "None" else None, + "Float": lambda value: float(value) if value != "None" else None, + "Boolean": lambda value: ast.literal_eval(value) if value != "None" else None, + "JsonObject": lambda value: json.loads(value) if value != "None" else None, + "JsonArray": lambda value: json.loads(value) if value != "None" else None, } # TODO: Change after upgrading to kfp v2 # :https://www.kubeflow.org/docs/components/pipelines/v2/data-types/parameters/ @@ -54,7 +54,6 @@ class Argument: description: str type: str default: t.Optional[str] = None - optional: t.Optional[bool] = False class ComponentSubset: @@ -185,7 +184,6 @@ def args(self) -> t.Dict[str, Argument]: description=arg_info["description"], type=arg_info["type"], default=arg_info["default"] if "default" in arg_info else None, - optional=arg_info["optional"] if "optional" in arg_info else False, ) for name, arg_info in self._specification.get("args", {}).items() } @@ -243,11 +241,6 @@ def from_fondant_component_spec( "description": arg.description, "type": python2kubeflow_type[arg.type], **({"default": arg.default} if arg.default is not None else {}), - **( - {"optional": str(arg.optional)} - if arg.optional is True - else {} - ), } for arg in fondant_component.args.values() ), @@ -318,9 +311,6 @@ def input_arguments(self) -> t.Mapping[str, Argument]: description=info["description"], type=info["type"], default=info["default"] if "default" in info else None, - optional=ast.literal_eval(info["optional"]) - if "optional" in info - else False, ) for info in self._specification["inputs"] } diff --git a/fondant/schemas/component_spec.json b/fondant/schemas/component_spec.json index f4c58c3a5..9badfc01b 100644 --- a/fondant/schemas/component_spec.json +++ b/fondant/schemas/component_spec.json @@ -90,28 +90,11 @@ "type": "number" } ] - }, - "optional": { - "type": "boolean" } }, "required": [ "type" - ], - "if": { - "properties": { - "default": { - "not": {} - } - } - }, - "then": { - "properties": { - "accepts_optional": { - "const": true - } - } - } + ] } } } diff --git a/tests/example_specs/component_specs/invalid_component_args.yaml b/tests/example_specs/component_specs/invalid_component_args.yaml deleted file mode 100644 index d10c456c1..000000000 --- a/tests/example_specs/component_specs/invalid_component_args.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - data: binary - -produces: - captions: - data: string - -Arguments: - storage_args: - description: invalid argument - type: str - optional: True - default: foo \ No newline at end of file diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/example_specs/components/arguments/component_default_args.yaml index 1d6ef89c0..2d582fbfe 100644 --- a/tests/example_specs/components/arguments/component_default_args.yaml +++ b/tests/example_specs/components/arguments/component_default_args.yaml @@ -15,10 +15,14 @@ args: description: default float argument type: float default: 3.14 - bool_default_arg: + bool_false_default_arg: description: default bool argument type: bool default: 'False' + bool_true_default_arg: + description: default bool argument + type: bool + default: 'True' list_default_arg: description: default list argument type: list @@ -27,19 +31,39 @@ args: description: default dict argument type: dict default: '{"foo":1, "bar":2}' - override_default_string_arg: - description: default argument that can be overriden + string_default_arg_none: + description: default string argument type: str - default: foo - non_optional_with_default: - description: this should return the default + default: None + integer_default_arg_none: + description: default integer argument + type: int + default: None + float_default_arg_none: + description: default float argument + type: float + default: None + bool_default_arg_none: + description: default bool argument + type: bool + default: None + list_default_arg_none: + description: default list argument + type: list + default: None + dict_default_arg_none: + description: default dict argument + type: dict + default: None + override_default_arg: + description: argument with default python value type that can be overriden type: str default: foo - optional: False - non_optional_without_default: - description: this input is required and should be specified otherwise an error would occur - type: str - optional_without_default: - description: this input is optional and without default, if not specified it should return None + override_default_none_arg: + description: argument with default None value type that can be overriden with a valid python type + type: float + default: None + override_default_arg_with_none: + description: argument with default python type that can be overriden with None type: str - optional: True + diff --git a/tests/test_component.py b/tests/test_component.py index 8cefbaebf..100356d9c 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -219,26 +219,38 @@ def write( string_default_arg, integer_default_arg, float_default_arg, - bool_default_arg, + bool_true_default_arg, + bool_false_default_arg, list_default_arg, dict_default_arg, - override_default_string_arg, - non_optional_with_default, - non_optional_without_default, - optional_without_default, + string_default_arg_none, + integer_default_arg_none, + float_default_arg_none, + bool_default_arg_none, + list_default_arg_none, + dict_default_arg_none, + override_default_arg, + override_default_none_arg, + override_default_arg_with_none, ): float_const = 3.14 # Mock write function that sinks final data to a local directory assert string_default_arg == "foo" assert integer_default_arg == 1 assert float_default_arg == float_const - assert bool_default_arg is False + assert bool_false_default_arg is False + assert bool_true_default_arg is True assert list_default_arg == ["foo", "bar"] assert dict_default_arg == {"foo": 1, "bar": 2} - assert override_default_string_arg == "bar" - assert non_optional_with_default == "foo" - assert non_optional_without_default == "bar" - assert optional_without_default is None + assert string_default_arg_none is None + assert integer_default_arg_none is None + assert float_default_arg_none is None + assert bool_default_arg_none is None + assert list_default_arg_none is None + assert dict_default_arg_none is None + assert override_default_arg == "bar" + assert override_default_none_arg == float_const + assert override_default_arg_with_none is None # Mock CLI arguments sys.argv = [ @@ -251,10 +263,12 @@ def write( "", "--component_spec", f"{component_spec_string}", - "--override_default_string_arg", - "bar", - "--non_optional_without_default", + "--override_default_arg", "bar", + "--override_default_none_arg", + "3.14", + "--override_default_arg_with_none", + "None", ] # # Instantiate and run component diff --git a/tests/test_component_specs.py b/tests/test_component_specs.py index 39efc910c..afd2b0133 100644 --- a/tests/test_component_specs.py +++ b/tests/test_component_specs.py @@ -35,12 +35,6 @@ def invalid_fondant_schema() -> dict: return yaml.safe_load(f) -@pytest.fixture -def invalid_fondant_schema_arg_definition() -> dict: - with open(component_specs_path / "invalid_component_args.yaml") as f: - return yaml.safe_load(f) - - def test_component_spec_validation(valid_fondant_schema, invalid_fondant_schema): """Test that the manifest is validated correctly on instantiation.""" ComponentSpec(valid_fondant_schema) @@ -78,11 +72,3 @@ def test_component_spec_no_args(valid_fondant_schema_no_args): assert fondant_component.name == "Example component" assert fondant_component.description == "This is an example component" assert fondant_component.args == {} - - -def test_component_spec_invalid_args(invalid_fondant_schema_arg_definition): - """Test that a component spec with both default and optional arguments are - validated correctly. - """ - with pytest.raises(InvalidComponentSpec): - ComponentSpec(invalid_fondant_schema_arg_definition) From 64614f75fecd524ddea24b06c663aebe85026768 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Wed, 14 Jun 2023 13:37:59 +0200 Subject: [PATCH 15/16] make component spec optional --- fondant/component_spec.py | 2 +- tests/example_specs/component_specs/kubeflow_component.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fondant/component_spec.py b/fondant/component_spec.py index ddc76dfb0..03f3bed16 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -47,7 +47,6 @@ class Argument: description: argument description type: the python argument type (str, int, ...) default: default value of the argument (defaults to None) - optional: whether an argument is optional or not (defaults to False) """ name: str @@ -234,6 +233,7 @@ def from_fondant_component_spec( "name": "component_spec", "description": "The component specification as a dictionary", "type": "JsonObject", + "default": "None", }, *( { diff --git a/tests/example_specs/component_specs/kubeflow_component.yaml b/tests/example_specs/component_specs/kubeflow_component.yaml index 199d2bde0..5f2e33690 100644 --- a/tests/example_specs/component_specs/kubeflow_component.yaml +++ b/tests/example_specs/component_specs/kubeflow_component.yaml @@ -10,6 +10,7 @@ inputs: - name: component_spec description: The component specification as a dictionary type: JsonObject + default: None - name: storage_args description: Storage arguments type: String From 7c7ace737cbb1d4e8777a7493bee6d0b992e0980 Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Thu, 15 Jun 2023 14:56:44 +0200 Subject: [PATCH 16/16] implement PR feedback --- fondant/component.py | 2 +- fondant/component_spec.py | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/fondant/component.py b/fondant/component.py index 27805e4ff..615878f27 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -105,7 +105,7 @@ def _add_and_parse_args(cls, spec: ComponentSpec): parser.add_argument( f"--{arg.name}", - type=kubeflow2python_type[arg.type], # type: ignore + type=kubeflow2python_type(arg.type), # type: ignore required=input_required, default=default, help=arg.description, diff --git a/fondant/component_spec.py b/fondant/component_spec.py index 03f3bed16..26629a803 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -17,14 +17,21 @@ from fondant.schema import Field, KubeflowCommandArguments, Type # TODO: remove after upgrading to kfpv2 -kubeflow2python_type = { - "String": lambda value: str(value) if value != "None" else None, - "Integer": lambda value: int(value) if value != "None" else None, - "Float": lambda value: float(value) if value != "None" else None, - "Boolean": lambda value: ast.literal_eval(value) if value != "None" else None, - "JsonObject": lambda value: json.loads(value) if value != "None" else None, - "JsonArray": lambda value: json.loads(value) if value != "None" else None, +kubeflow_to_python_type_dict = { + "String": str, + "Integer": int, + "Float": float, + "Boolean": ast.literal_eval, + "JsonObject": json.loads, + "JsonArray": json.loads, } + + +def kubeflow2python_type(type_: str) -> t.Any: + map_fn = kubeflow_to_python_type_dict[type_] + return lambda value: map_fn(value) if value != "None" else None # type: ignore + + # TODO: Change after upgrading to kfp v2 # :https://www.kubeflow.org/docs/components/pipelines/v2/data-types/parameters/ python2kubeflow_type = {