From 2aedeed7d544751890394a53ec7b108d2e5cb58c Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 21:40:59 +0800 Subject: [PATCH 01/29] remove dataclass_json Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 64 ++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 9153fca032..6a6ff47009 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -13,7 +13,8 @@ import typing from abc import ABC, abstractmethod from functools import lru_cache -from typing import Dict, List, NamedTuple, Optional, Type, cast +from typing import Dict, List, NamedTuple, Optional, Type, cast, Any +from mashumaro.codecs.json import JSONDecoder, JSONEncoder from dataclasses_json import DataClassJsonMixin, dataclass_json from flyteidl.core import literals_pb2 @@ -326,13 +327,13 @@ class Test(DataClassJsonMixin): def __init__(self): super().__init__("Object-Dataclass-Transformer", object) - self._serializable_classes = [DataClassJSONMixin, DataClassJsonMixin] - try: - from mashumaro.mixins.orjson import DataClassORJSONMixin + # self._serializable_classes = [DataClassJSONMixin, DataClassJsonMixin] + # try: + # from mashumaro.mixins.orjson import DataClassORJSONMixin - self._serializable_classes.append(DataClassORJSONMixin) - except ModuleNotFoundError: - pass + # self._serializable_classes.append(DataClassORJSONMixin) + # except ModuleNotFoundError: + # pass def assert_type(self, expected_type: Type[DataClassJsonMixin], v: T): # Skip iterating all attributes in the dataclass if the type of v already matches the expected_type @@ -425,11 +426,11 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: f"Type {t} cannot be parsed." ) - if not self.is_serializable_class(t): - raise AssertionError( - f"Dataclass {t} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " - f"serialized correctly" - ) + # if not self.is_serializable_class(t): + # raise AssertionError( + # f"Dataclass {t} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " + # f"serialized correctly" + # ) schema = None try: if issubclass(t, DataClassJsonMixin): @@ -473,8 +474,8 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: return _type_models.LiteralType(simple=_type_models.SimpleType.STRUCT, metadata=schema, structure=ts) - def is_serializable_class(self, class_: Type[T]) -> bool: - return any(issubclass(class_, serializable_class) for serializable_class in self._serializable_classes) + # def is_serializable_class(self, class_: Type[T]) -> bool: + # return any(issubclass(class_, serializable_class) for serializable_class in self._serializable_classes) def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: if isinstance(python_val, dict): @@ -486,14 +487,16 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for " f"user defined datatypes in Flytekit" ) - if not self.is_serializable_class(type(python_val)): - raise TypeTransformerFailedError( - f"Dataclass {python_type} should be decorated with @dataclass_json or inherit DataClassJSONMixin to be " - f"serialized correctly" - ) + # if not self.is_serializable_class(type(python_val)): + # raise TypeTransformerFailedError( + # f"Dataclass {python_type} should be decorated with @dataclass_json or inherit DataClassJSONMixin to be " + # f"serialized correctly" + # ) self._serialize_flyte_type(python_val, python_type) - - json_str = python_val.to_json() # type: ignore + print("@@@ python type", python_type) + print("@@@ python val", python_val) + json_str = JSONEncoder(python_type).encode(python_val) + # json_str = python_val.to_json() # type: ignore return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -739,13 +742,16 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: f"{expected_python_type} is not of type @dataclass, only Dataclasses are supported for " "user defined datatypes in Flytekit" ) - if not self.is_serializable_class(expected_python_type): - raise TypeTransformerFailedError( - f"Dataclass {expected_python_type} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " - f"serialized correctly" - ) + # if not self.is_serializable_class(expected_python_type): + # raise TypeTransformerFailedError( + # f"Dataclass {expected_python_type} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " + # f"serialized correctly" + # ) json_str = _json_format.MessageToJson(lv.scalar.generic) - dc = expected_python_type.from_json(json_str) # type: ignore + print("expected_python_type:", expected_python_type) + # decoder = JSONDecoder(expected_python_type) + dc = JSONDecoder(expected_python_type).decode(json_str) + # dc = expected_python_type.from_json(json_str) # type: ignore dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) @@ -1812,7 +1818,7 @@ def convert_marshmallow_json_schema_to_python_class( """ attribute_list = generate_attribute_list_from_dataclass_json(schema, schema_name) - return dataclass_json(dataclasses.make_dataclass(schema_name, attribute_list)) + return dataclasses.make_dataclass(schema_name, attribute_list) def convert_mashumaro_json_schema_to_python_class( @@ -1825,7 +1831,7 @@ def convert_mashumaro_json_schema_to_python_class( """ attribute_list = generate_attribute_list_from_dataclass_json_mixin(schema, schema_name) - return dataclass_json(dataclasses.make_dataclass(schema_name, attribute_list)) + return dataclasses.make_dataclass(schema_name, attribute_list) def _get_element_type(element_property: typing.Dict[str, str]) -> Type: From 58773b334905733cac479167a0e737665a4064fc Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 22:15:17 +0800 Subject: [PATCH 02/29] lint, use setattr and getattr, remove annotations Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 6a6ff47009..86b896ddb2 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -13,10 +13,9 @@ import typing from abc import ABC, abstractmethod from functools import lru_cache -from typing import Dict, List, NamedTuple, Optional, Type, cast, Any -from mashumaro.codecs.json import JSONDecoder, JSONEncoder +from typing import Dict, List, NamedTuple, Optional, Type, cast -from dataclasses_json import DataClassJsonMixin, dataclass_json +from dataclasses_json import DataClassJsonMixin from flyteidl.core import literals_pb2 from google.protobuf import json_format as _json_format from google.protobuf import struct_pb2 as _struct @@ -25,6 +24,7 @@ from google.protobuf.message import Message from google.protobuf.struct_pb2 import Struct from marshmallow_enum import EnumField, LoadDumpOptions +from mashumaro.codecs.json import JSONDecoder, JSONEncoder from mashumaro.mixins.json import DataClassJSONMixin from typing_extensions import Annotated, get_args, get_origin @@ -426,11 +426,6 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: f"Type {t} cannot be parsed." ) - # if not self.is_serializable_class(t): - # raise AssertionError( - # f"Dataclass {t} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " - # f"serialized correctly" - # ) schema = None try: if issubclass(t, DataClassJsonMixin): @@ -487,16 +482,9 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for " f"user defined datatypes in Flytekit" ) - # if not self.is_serializable_class(type(python_val)): - # raise TypeTransformerFailedError( - # f"Dataclass {python_type} should be decorated with @dataclass_json or inherit DataClassJSONMixin to be " - # f"serialized correctly" - # ) + self._serialize_flyte_type(python_val, python_type) - print("@@@ python type", python_type) - print("@@@ python val", python_val) json_str = JSONEncoder(python_type).encode(python_val) - # json_str = python_val.to_json() # type: ignore return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -723,7 +711,7 @@ def _fix_val_int(self, t: typing.Type, val: typing.Any) -> typing.Any: return val - def _fix_dataclass_int(self, dc_type: Type[DataClassJsonMixin], dc: DataClassJsonMixin) -> DataClassJsonMixin: + def _fix_dataclass_int(self, dc_type: Type, dc: dataclasses.dataclass) -> dataclasses.dataclass: # type: ignore """ This is a performance penalty to convert to the right types, but this is expected by the user and hence needs to be done @@ -732,8 +720,9 @@ def _fix_dataclass_int(self, dc_type: Type[DataClassJsonMixin], dc: DataClassJso # https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Value # Thus we will have to walk the given dataclass and typecast values to int, where expected. for f in dataclasses.fields(dc_type): - val = dc.__getattribute__(f.name) - dc.__setattr__(f.name, self._fix_val_int(f.type, val)) + val = getattr(dc, f.name) + setattr(dc, f.name, self._fix_val_int(f.type, val)) + return dc def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[T]) -> T: @@ -742,16 +731,9 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: f"{expected_python_type} is not of type @dataclass, only Dataclasses are supported for " "user defined datatypes in Flytekit" ) - # if not self.is_serializable_class(expected_python_type): - # raise TypeTransformerFailedError( - # f"Dataclass {expected_python_type} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " - # f"serialized correctly" - # ) + json_str = _json_format.MessageToJson(lv.scalar.generic) - print("expected_python_type:", expected_python_type) - # decoder = JSONDecoder(expected_python_type) dc = JSONDecoder(expected_python_type).decode(json_str) - # dc = expected_python_type.from_json(json_str) # type: ignore dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) From aad91c36edd863028097333bcf990472f91a77c9 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 22:22:09 +0800 Subject: [PATCH 03/29] update tests Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_type_engine.py | 6 +----- tests/flytekit/unit/core/test_type_hints.py | 12 ------------ 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index b9c9dac8f2..fb306cefe9 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -15,7 +15,7 @@ import pyarrow as pa import pytest import typing_extensions -from dataclasses_json import DataClassJsonMixin, dataclass_json +from dataclasses_json import DataClassJsonMixin from flyteidl.core import errors_pb2 from google.protobuf import json_format as _json_format from google.protobuf import struct_pb2 as _struct @@ -2396,7 +2396,6 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassJsonMixin): x: int - @dataclass_json @dataclass class MyDataClass: x: int @@ -2426,7 +2425,6 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassORJSONMixin): x: int - @dataclass_json @dataclass class MyDataClass: x: int @@ -2462,7 +2460,6 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassORJSONMixin): x: int - @dataclass_json @dataclass class MyDataClass: x: int @@ -2497,7 +2494,6 @@ class DatumMashumaro(DataClassJSONMixin): x: int y: Color - @dataclass_json @dataclass class Datum(DataClassJSONMixin): x: int diff --git a/tests/flytekit/unit/core/test_type_hints.py b/tests/flytekit/unit/core/test_type_hints.py index 3c4f76316d..00410f74d6 100644 --- a/tests/flytekit/unit/core/test_type_hints.py +++ b/tests/flytekit/unit/core/test_type_hints.py @@ -1102,18 +1102,6 @@ def wf(): wf() -def test_wf_custom_types_missing_dataclass_json(): - with pytest.raises(AssertionError): - - @dataclass - class MyCustomType(object): - pass - - @task - def t1(a: int) -> MyCustomType: - return MyCustomType() - - def test_wf_custom_types(): @dataclass class MyCustomType(DataClassJsonMixin): From 8a7b06d19604b587b4095e1cf66e59e1e392d163 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 22:26:27 +0800 Subject: [PATCH 04/29] remove annotations Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 86b896ddb2..f625c88ca4 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -327,13 +327,6 @@ class Test(DataClassJsonMixin): def __init__(self): super().__init__("Object-Dataclass-Transformer", object) - # self._serializable_classes = [DataClassJSONMixin, DataClassJsonMixin] - # try: - # from mashumaro.mixins.orjson import DataClassORJSONMixin - - # self._serializable_classes.append(DataClassORJSONMixin) - # except ModuleNotFoundError: - # pass def assert_type(self, expected_type: Type[DataClassJsonMixin], v: T): # Skip iterating all attributes in the dataclass if the type of v already matches the expected_type @@ -469,9 +462,6 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: return _type_models.LiteralType(simple=_type_models.SimpleType.STRUCT, metadata=schema, structure=ts) - # def is_serializable_class(self, class_: Type[T]) -> bool: - # return any(issubclass(class_, serializable_class) for serializable_class in self._serializable_classes) - def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: if isinstance(python_val, dict): json_str = json.dumps(python_val) From c0b52905009a2e5bdf8603c8e6514f63d3cbb4cb Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 23:11:39 +0800 Subject: [PATCH 05/29] fix test Signed-off-by: Future-Outlier --- plugins/flytekit-dolt/flytekitplugins/dolt/schema.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index ac279abfc4..221195df99 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -1,10 +1,9 @@ import tempfile import typing -from dataclasses import dataclass +from dataclasses import dataclass, asdict from typing import Type import dolt_integrations.core as dolt_int -from dataclasses_json import DataClassJsonMixin from google.protobuf.json_format import MessageToDict from google.protobuf.struct_pb2 import Struct @@ -20,7 +19,7 @@ @dataclass -class DoltConfig(DataClassJsonMixin): +class DoltConfig: db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -31,7 +30,7 @@ class DoltConfig(DataClassJsonMixin): @dataclass -class DoltTable(DataClassJsonMixin): +class DoltTable: config: DoltConfig data: typing.Optional[pandas.DataFrame] = None @@ -71,7 +70,8 @@ def to_literal( ) s = Struct() - s.update(python_val.to_dict()) # type: ignore + + s.update(asdict(python_val)) # type: ignore return Literal(Scalar(generic=s)) def to_python_value( From fdbf58d29abcc38a578ccb4eb153641ee3e8c13f Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Mon, 18 Mar 2024 23:12:02 +0800 Subject: [PATCH 06/29] nit Signed-off-by: Future-Outlier --- plugins/flytekit-dolt/flytekitplugins/dolt/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index 221195df99..e69692b142 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -1,6 +1,6 @@ import tempfile import typing -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from typing import Type import dolt_integrations.core as dolt_int From 9ee2eb4d71e8e8822313fdffc11465daa45de6d6 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 10:22:19 +0800 Subject: [PATCH 07/29] test dolt Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 72 +++++++++---------- .../flytekitplugins/dolt/schema.py | 10 +-- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index b9f1c2b516..320daf7e4b 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -253,48 +253,48 @@ jobs: python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}} plugin-names: # Please maintain an alphabetical order in the following list - - flytekit-airflow - - flytekit-async-fsspec - - flytekit-aws-athena - - flytekit-aws-batch + # - flytekit-airflow + # - flytekit-async-fsspec + # - flytekit-aws-athena + # - flytekit-aws-batch # TODO: uncomment this when the sagemaker agent is implemented: https://github.com/flyteorg/flyte/issues/4079 # - flytekit-aws-sagemaker - - flytekit-bigquery - - flytekit-dask - - flytekit-data-fsspec - - flytekit-dbt - - flytekit-deck-standard + # - flytekit-bigquery + # - flytekit-dask + # - flytekit-data-fsspec + # - flytekit-dbt + # - flytekit-deck-standard - flytekit-dolt - - flytekit-duckdb - - flytekit-envd - - flytekit-flyteinteractive - - flytekit-greatexpectations - - flytekit-hive - - flytekit-huggingface - - flytekit-identity-aware-proxy - - flytekit-k8s-pod - - flytekit-kf-mpi - - flytekit-kf-pytorch - - flytekit-kf-tensorflow - - flytekit-mlflow - - flytekit-mmcloud - - flytekit-modin - - flytekit-onnx-pytorch - - flytekit-onnx-scikitlearn + # - flytekit-duckdb + # - flytekit-envd + # - flytekit-flyteinteractive + # - flytekit-greatexpectations + # - flytekit-hive + # - flytekit-huggingface + # - flytekit-identity-aware-proxy + # - flytekit-k8s-pod + # - flytekit-kf-mpi + # - flytekit-kf-pytorch + # - flytekit-kf-tensorflow + # - flytekit-mlflow + # - flytekit-mmcloud + # - flytekit-modin + # - flytekit-onnx-pytorch + # - flytekit-onnx-scikitlearn # onnx-tensorflow needs a version of tensorflow that does not work with protobuf>4. # The issue is being tracked on the tensorflow side in https://github.com/tensorflow/tensorflow/issues/53234#issuecomment-1330111693 # flytekit-onnx-tensorflow - - flytekit-openai - - flytekit-pandera - - flytekit-papermill - - flytekit-polars - - flytekit-pydantic - - flytekit-ray - - flytekit-snowflake - - flytekit-spark - - flytekit-sqlalchemy - - flytekit-vaex - - flytekit-whylogs + # - flytekit-openai + # - flytekit-pandera + # - flytekit-papermill + # - flytekit-polars + # - flytekit-pydantic + # - flytekit-ray + # - flytekit-snowflake + # - flytekit-spark + # - flytekit-sqlalchemy + # - flytekit-vaex + # - flytekit-whylogs exclude: # flytekit-modin depends on ray which does not have a 3.11 wheel yet. # Issue tracked in https://github.com/ray-project/ray/issues/27881 diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index e69692b142..ac279abfc4 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -1,9 +1,10 @@ import tempfile import typing -from dataclasses import asdict, dataclass +from dataclasses import dataclass from typing import Type import dolt_integrations.core as dolt_int +from dataclasses_json import DataClassJsonMixin from google.protobuf.json_format import MessageToDict from google.protobuf.struct_pb2 import Struct @@ -19,7 +20,7 @@ @dataclass -class DoltConfig: +class DoltConfig(DataClassJsonMixin): db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -30,7 +31,7 @@ class DoltConfig: @dataclass -class DoltTable: +class DoltTable(DataClassJsonMixin): config: DoltConfig data: typing.Optional[pandas.DataFrame] = None @@ -70,8 +71,7 @@ def to_literal( ) s = Struct() - - s.update(asdict(python_val)) # type: ignore + s.update(python_val.to_dict()) # type: ignore return Literal(Scalar(generic=s)) def to_python_value( From ce8edf55c2c6e63a35b7420c8bfd3b128076b91b Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 10:37:51 +0800 Subject: [PATCH 08/29] trigger ci Signed-off-by: Future-Outlier From a714b835b59393c14e62e9484010562d9d64e794 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 10:43:13 +0800 Subject: [PATCH 09/29] test Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 72 +++++++++---------- .../flytekitplugins/dolt/schema.py | 8 ++- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 320daf7e4b..b9f1c2b516 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -253,48 +253,48 @@ jobs: python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}} plugin-names: # Please maintain an alphabetical order in the following list - # - flytekit-airflow - # - flytekit-async-fsspec - # - flytekit-aws-athena - # - flytekit-aws-batch + - flytekit-airflow + - flytekit-async-fsspec + - flytekit-aws-athena + - flytekit-aws-batch # TODO: uncomment this when the sagemaker agent is implemented: https://github.com/flyteorg/flyte/issues/4079 # - flytekit-aws-sagemaker - # - flytekit-bigquery - # - flytekit-dask - # - flytekit-data-fsspec - # - flytekit-dbt - # - flytekit-deck-standard + - flytekit-bigquery + - flytekit-dask + - flytekit-data-fsspec + - flytekit-dbt + - flytekit-deck-standard - flytekit-dolt - # - flytekit-duckdb - # - flytekit-envd - # - flytekit-flyteinteractive - # - flytekit-greatexpectations - # - flytekit-hive - # - flytekit-huggingface - # - flytekit-identity-aware-proxy - # - flytekit-k8s-pod - # - flytekit-kf-mpi - # - flytekit-kf-pytorch - # - flytekit-kf-tensorflow - # - flytekit-mlflow - # - flytekit-mmcloud - # - flytekit-modin - # - flytekit-onnx-pytorch - # - flytekit-onnx-scikitlearn + - flytekit-duckdb + - flytekit-envd + - flytekit-flyteinteractive + - flytekit-greatexpectations + - flytekit-hive + - flytekit-huggingface + - flytekit-identity-aware-proxy + - flytekit-k8s-pod + - flytekit-kf-mpi + - flytekit-kf-pytorch + - flytekit-kf-tensorflow + - flytekit-mlflow + - flytekit-mmcloud + - flytekit-modin + - flytekit-onnx-pytorch + - flytekit-onnx-scikitlearn # onnx-tensorflow needs a version of tensorflow that does not work with protobuf>4. # The issue is being tracked on the tensorflow side in https://github.com/tensorflow/tensorflow/issues/53234#issuecomment-1330111693 # flytekit-onnx-tensorflow - # - flytekit-openai - # - flytekit-pandera - # - flytekit-papermill - # - flytekit-polars - # - flytekit-pydantic - # - flytekit-ray - # - flytekit-snowflake - # - flytekit-spark - # - flytekit-sqlalchemy - # - flytekit-vaex - # - flytekit-whylogs + - flytekit-openai + - flytekit-pandera + - flytekit-papermill + - flytekit-polars + - flytekit-pydantic + - flytekit-ray + - flytekit-snowflake + - flytekit-spark + - flytekit-sqlalchemy + - flytekit-vaex + - flytekit-whylogs exclude: # flytekit-modin depends on ray which does not have a 3.11 wheel yet. # Issue tracked in https://github.com/ray-project/ray/issues/27881 diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index ac279abfc4..8e5b3a50b4 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -4,7 +4,7 @@ from typing import Type import dolt_integrations.core as dolt_int -from dataclasses_json import DataClassJsonMixin +from dataclasses_json import dataclass_json from google.protobuf.json_format import MessageToDict from google.protobuf.struct_pb2 import Struct @@ -19,8 +19,9 @@ pandas = lazy_module("pandas") +@dataclass_json @dataclass -class DoltConfig(DataClassJsonMixin): +class DoltConfig: db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -30,8 +31,9 @@ class DoltConfig(DataClassJsonMixin): remote_conf: typing.Optional[dolt_int.Remote] = None +@dataclass_json @dataclass -class DoltTable(DataClassJsonMixin): +class DoltTable: config: DoltConfig data: typing.Optional[pandas.DataFrame] = None From ce067a371d0d09dab197fed697eaa6cb3f897cc9 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 10:51:26 +0800 Subject: [PATCH 10/29] remove dolt ci test Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 2 +- plugins/flytekit-dolt/flytekitplugins/dolt/schema.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index b9f1c2b516..026088ae40 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -264,7 +264,7 @@ jobs: - flytekit-data-fsspec - flytekit-dbt - flytekit-deck-standard - - flytekit-dolt + # - flytekit-dolt - flytekit-duckdb - flytekit-envd - flytekit-flyteinteractive diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index 8e5b3a50b4..ac279abfc4 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -4,7 +4,7 @@ from typing import Type import dolt_integrations.core as dolt_int -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin from google.protobuf.json_format import MessageToDict from google.protobuf.struct_pb2 import Struct @@ -19,9 +19,8 @@ pandas = lazy_module("pandas") -@dataclass_json @dataclass -class DoltConfig: +class DoltConfig(DataClassJsonMixin): db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -31,9 +30,8 @@ class DoltConfig: remote_conf: typing.Optional[dolt_int.Remote] = None -@dataclass_json @dataclass -class DoltTable: +class DoltTable(DataClassJsonMixin): config: DoltConfig data: typing.Optional[pandas.DataFrame] = None From dce34ab9b9a4749514d06b4388ae1e648809f674 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 11:32:39 +0800 Subject: [PATCH 11/29] fix tests Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index 99963621a7..2a8038ad97 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -337,7 +337,7 @@ def test_crawl_local_non_nt(source_folder): ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') """ - if os.name == "nt": # don't + if os.name != "nt": # don't return source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ fd = FlyteDirectory(path=source_folder) From 399f975afdc4bbb28b0d448de341ab0731803134 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 11:37:46 +0800 Subject: [PATCH 12/29] use old dolt Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 2 +- .../flytekitplugins/dolt/schema.py | 29 ++++++++----------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 026088ae40..b9f1c2b516 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -264,7 +264,7 @@ jobs: - flytekit-data-fsspec - flytekit-dbt - flytekit-deck-standard - # - flytekit-dolt + - flytekit-dolt - flytekit-duckdb - flytekit-envd - flytekit-flyteinteractive diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index ac279abfc4..5bcecdc97f 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -4,23 +4,21 @@ from typing import Type import dolt_integrations.core as dolt_int -from dataclasses_json import DataClassJsonMixin -from google.protobuf.json_format import MessageToDict +import doltcli as dolt +import pandas +from dataclasses_json import dataclass_json from google.protobuf.struct_pb2 import Struct -from flytekit import FlyteContext, lazy_module +from flytekit import FlyteContext from flytekit.extend import TypeEngine, TypeTransformer from flytekit.models import types as _type_models from flytekit.models.literals import Literal, Scalar from flytekit.models.types import LiteralType -# dolt_int = lazy_module("dolt_integrations") -dolt = lazy_module("doltcli") -pandas = lazy_module("pandas") - +@dataclass_json @dataclass -class DoltConfig(DataClassJsonMixin): +class DoltConfig: db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -30,8 +28,9 @@ class DoltConfig(DataClassJsonMixin): remote_conf: typing.Optional[dolt_int.Remote] = None +@dataclass_json @dataclass -class DoltTable(DataClassJsonMixin): +class DoltTable: config: DoltConfig data: typing.Optional[pandas.DataFrame] = None @@ -58,7 +57,6 @@ def to_literal( db = dolt.Dolt(conf.db_path) with tempfile.NamedTemporaryFile() as f: python_val.data.to_csv(f.name, index=False) - message = f"Generated by Flyte execution id: {ctx.user_space_params.execution_id}" dolt_int.save( db=db, tablename=conf.tablename, @@ -67,11 +65,10 @@ def to_literal( meta_conf=conf.meta_conf, remote_conf=conf.remote_conf, save_args=conf.io_args, - commit_message=message, ) s = Struct() - s.update(python_val.to_dict()) # type: ignore + s.update(python_val.to_dict()) return Literal(Scalar(generic=s)) def to_python_value( @@ -80,12 +77,10 @@ def to_python_value( lv: Literal, expected_python_type: typing.Type[DoltTable], ) -> DoltTable: - if not (lv and lv.scalar and lv.scalar.generic and "config" in lv.scalar.generic): - raise ValueError("DoltTable requires DoltConfig to load python value") - - conf_dict = MessageToDict(lv.scalar.generic["config"]) + if not (lv and lv.scalar and lv.scalar.generic and lv.scalar.generic["config"]): + return pandas.DataFrame() - conf = DoltConfig(**conf_dict) + conf = DoltConfig(**lv.scalar.generic["config"]) db = dolt.Dolt(conf.db_path) with tempfile.NamedTemporaryFile() as f: From dde160b09c7554bc40ab7c0528507588d82612e8 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 11:49:59 +0800 Subject: [PATCH 13/29] fix tests Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index 2a8038ad97..c393209bba 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -337,14 +337,13 @@ def test_crawl_local_non_nt(source_folder): ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') """ - if os.name != "nt": # don't + if os.name == "nt": # don't return source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ fd = FlyteDirectory(path=source_folder) res = fd.crawl() split = [(x, y) for x, y in res] files = [os.path.join(x, y) for x, y in split] - assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} assert set(files) == expected From 610e739ccebe9c263d8d17e534ce06805dc865f0 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 11:56:42 +0800 Subject: [PATCH 14/29] revert Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index c393209bba..99963621a7 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -344,6 +344,7 @@ def test_crawl_local_non_nt(source_folder): res = fd.crawl() split = [(x, y) for x, y in res] files = [os.path.join(x, y) for x, y in split] + assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} assert set(files) == expected From 1819532e5de5042bd2100a3e11c965fcdfd87fb1 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 12:51:11 +0800 Subject: [PATCH 15/29] remove failed test Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_data.py | 58 +++++++++++++-------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index 99963621a7..cc3b9fe00f 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -331,35 +331,35 @@ def test_crawl_local_nt(source_folder): print(f"NT files joined {files}") -def test_crawl_local_non_nt(source_folder): - """ - crawl on the source folder fixture should return for example - ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') - ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') - """ - if os.name == "nt": # don't - return - source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ - fd = FlyteDirectory(path=source_folder) - res = fd.crawl() - split = [(x, y) for x, y in res] - files = [os.path.join(x, y) for x, y in split] - assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} - expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} - assert set(files) == expected - - # Test crawling a directory without trailing / or \ - source_folder = source_folder[:-1] - fd = FlyteDirectory(path=source_folder) - res = fd.crawl() - files = [os.path.join(x, y) for x, y in res] - assert set(files) == expected - - # Test crawling a single file - fd = FlyteDirectory(path=os.path.join(source_folder, "original1.txt")) - res = fd.crawl() - files = [os.path.join(x, y) for x, y in res] - assert len(files) == 0 +# def test_crawl_local_non_nt(source_folder): +# """ +# crawl on the source folder fixture should return for example +# ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') +# ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') +# """ +# if os.name == "nt": # don't +# return +# source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ +# fd = FlyteDirectory(path=source_folder) +# res = fd.crawl() +# split = [(x, y) for x, y in res] +# files = [os.path.join(x, y) for x, y in split] +# assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} +# expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} +# assert set(files) == expected + +# # Test crawling a directory without trailing / or \ +# source_folder = source_folder[:-1] +# fd = FlyteDirectory(path=source_folder) +# res = fd.crawl() +# files = [os.path.join(x, y) for x, y in res] +# assert set(files) == expected + +# # Test crawling a single file +# fd = FlyteDirectory(path=os.path.join(source_folder, "original1.txt")) +# res = fd.crawl() +# files = [os.path.join(x, y) for x, y in res] +# assert len(files) == 0 @pytest.mark.sandbox_test From 8dff896f72b6fff40dc7e79f70f4aa476db2ef2a Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 12:52:26 +0800 Subject: [PATCH 16/29] fix dict error Signed-off-by: Future-Outlier --- plugins/flytekit-dolt/flytekitplugins/dolt/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index 5bcecdc97f..ef8298d4f4 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -77,7 +77,7 @@ def to_python_value( lv: Literal, expected_python_type: typing.Type[DoltTable], ) -> DoltTable: - if not (lv and lv.scalar and lv.scalar.generic and lv.scalar.generic["config"]): + if not (lv and lv.scalar and lv.scalar.generic and "config" in lv.scalar.generic): return pandas.DataFrame() conf = DoltConfig(**lv.scalar.generic["config"]) From da3768dfd1120c3a06f2d208e1d7adc48f2eefc1 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 13:31:19 +0800 Subject: [PATCH 17/29] revert dolt plugin Signed-off-by: Future-Outlier --- plugins/flytekit-dolt/flytekitplugins/dolt/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index ef8298d4f4..02fe546998 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -78,7 +78,7 @@ def to_python_value( expected_python_type: typing.Type[DoltTable], ) -> DoltTable: if not (lv and lv.scalar and lv.scalar.generic and "config" in lv.scalar.generic): - return pandas.DataFrame() + raise ValueError("DoltTable requires DoltConfig to load python value") conf = DoltConfig(**lv.scalar.generic["config"]) db = dolt.Dolt(conf.db_path) From 2757f34f6825900a86e6cc2fa25b9c672b5940cf Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 20:16:47 +0800 Subject: [PATCH 18/29] revert and merge master Signed-off-by: Future-Outlier --- .../flytekitplugins/dolt/schema.py | 25 ++++---- tests/flytekit/unit/core/test_data.py | 58 +++++++++---------- 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py index 02fe546998..ac279abfc4 100644 --- a/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py +++ b/plugins/flytekit-dolt/flytekitplugins/dolt/schema.py @@ -4,21 +4,23 @@ from typing import Type import dolt_integrations.core as dolt_int -import doltcli as dolt -import pandas -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin +from google.protobuf.json_format import MessageToDict from google.protobuf.struct_pb2 import Struct -from flytekit import FlyteContext +from flytekit import FlyteContext, lazy_module from flytekit.extend import TypeEngine, TypeTransformer from flytekit.models import types as _type_models from flytekit.models.literals import Literal, Scalar from flytekit.models.types import LiteralType +# dolt_int = lazy_module("dolt_integrations") +dolt = lazy_module("doltcli") +pandas = lazy_module("pandas") + -@dataclass_json @dataclass -class DoltConfig: +class DoltConfig(DataClassJsonMixin): db_path: str tablename: typing.Optional[str] = None sql: typing.Optional[str] = None @@ -28,9 +30,8 @@ class DoltConfig: remote_conf: typing.Optional[dolt_int.Remote] = None -@dataclass_json @dataclass -class DoltTable: +class DoltTable(DataClassJsonMixin): config: DoltConfig data: typing.Optional[pandas.DataFrame] = None @@ -57,6 +58,7 @@ def to_literal( db = dolt.Dolt(conf.db_path) with tempfile.NamedTemporaryFile() as f: python_val.data.to_csv(f.name, index=False) + message = f"Generated by Flyte execution id: {ctx.user_space_params.execution_id}" dolt_int.save( db=db, tablename=conf.tablename, @@ -65,10 +67,11 @@ def to_literal( meta_conf=conf.meta_conf, remote_conf=conf.remote_conf, save_args=conf.io_args, + commit_message=message, ) s = Struct() - s.update(python_val.to_dict()) + s.update(python_val.to_dict()) # type: ignore return Literal(Scalar(generic=s)) def to_python_value( @@ -80,7 +83,9 @@ def to_python_value( if not (lv and lv.scalar and lv.scalar.generic and "config" in lv.scalar.generic): raise ValueError("DoltTable requires DoltConfig to load python value") - conf = DoltConfig(**lv.scalar.generic["config"]) + conf_dict = MessageToDict(lv.scalar.generic["config"]) + + conf = DoltConfig(**conf_dict) db = dolt.Dolt(conf.db_path) with tempfile.NamedTemporaryFile() as f: diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index cc3b9fe00f..99963621a7 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -331,35 +331,35 @@ def test_crawl_local_nt(source_folder): print(f"NT files joined {files}") -# def test_crawl_local_non_nt(source_folder): -# """ -# crawl on the source folder fixture should return for example -# ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') -# ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') -# """ -# if os.name == "nt": # don't -# return -# source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ -# fd = FlyteDirectory(path=source_folder) -# res = fd.crawl() -# split = [(x, y) for x, y in res] -# files = [os.path.join(x, y) for x, y in split] -# assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} -# expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} -# assert set(files) == expected - -# # Test crawling a directory without trailing / or \ -# source_folder = source_folder[:-1] -# fd = FlyteDirectory(path=source_folder) -# res = fd.crawl() -# files = [os.path.join(x, y) for x, y in res] -# assert set(files) == expected - -# # Test crawling a single file -# fd = FlyteDirectory(path=os.path.join(source_folder, "original1.txt")) -# res = fd.crawl() -# files = [os.path.join(x, y) for x, y in res] -# assert len(files) == 0 +def test_crawl_local_non_nt(source_folder): + """ + crawl on the source folder fixture should return for example + ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'original.txt') + ('/var/folders/jx/54tww2ls58n8qtlp9k31nbd80000gp/T/tmpp14arygf/source/', 'nested/more.txt') + """ + if os.name == "nt": # don't + return + source_folder = os.path.join(source_folder, "") # ensure there's a trailing / or \ + fd = FlyteDirectory(path=source_folder) + res = fd.crawl() + split = [(x, y) for x, y in res] + files = [os.path.join(x, y) for x, y in split] + assert set(split) == {(source_folder, "original.txt"), (source_folder, os.path.join("nested", "more.txt"))} + expected = {os.path.join(source_folder, "original.txt"), os.path.join(source_folder, "nested", "more.txt")} + assert set(files) == expected + + # Test crawling a directory without trailing / or \ + source_folder = source_folder[:-1] + fd = FlyteDirectory(path=source_folder) + res = fd.crawl() + files = [os.path.join(x, y) for x, y in res] + assert set(files) == expected + + # Test crawling a single file + fd = FlyteDirectory(path=os.path.join(source_folder, "original1.txt")) + res = fd.crawl() + files = [os.path.join(x, y) for x, y in res] + assert len(files) == 0 @pytest.mark.sandbox_test From 102085bc66d3fcd5ba31cb17d85fcec436b4297d Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 19 Mar 2024 20:35:42 +0800 Subject: [PATCH 19/29] make dolt plugin commented Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index b9f1c2b516..7241fef0e6 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -258,13 +258,14 @@ jobs: - flytekit-aws-athena - flytekit-aws-batch # TODO: uncomment this when the sagemaker agent is implemented: https://github.com/flyteorg/flyte/issues/4079 + # TODO: uncomment this when the dolt plugin is updated: https://github.com/flyteorg/flyte/issues/5074 # - flytekit-aws-sagemaker - flytekit-bigquery - flytekit-dask - flytekit-data-fsspec - flytekit-dbt - flytekit-deck-standard - - flytekit-dolt + # - flytekit-dolt - flytekit-duckdb - flytekit-envd - flytekit-flyteinteractive From 30223e45c6b773cb25846f5031f92e4f1f783c33 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Thu, 21 Mar 2024 16:25:50 +0800 Subject: [PATCH 20/29] update pyproject.toml Signed-off-by: Future-Outlier --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2ea7997f8f..89dae6ce28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "markdown-it-py", "marshmallow-enum", "marshmallow-jsonschema>=0.12.0", - "mashumaro>=3.9.1", + "mashumaro>=3.11", "protobuf!=4.25.0", "pyarrow", "pygments", From 9b1e617d8198c4ca3c1d4179cff7ec7829590156 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 11:11:29 +0800 Subject: [PATCH 21/29] Update Thomas's and Eduardo's advices Signed-off-by: Future-Outlier Co-authored-by: Thomas J. Fan Co-authored-by: Eduardo Apolinario --- flytekit/core/type_engine.py | 18 +++++++++---- tests/flytekit/unit/core/test_type_engine.py | 27 ++++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index f625c88ca4..933d38839e 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -15,7 +15,7 @@ from functools import lru_cache from typing import Dict, List, NamedTuple, Optional, Type, cast -from dataclasses_json import DataClassJsonMixin +from dataclasses_json import DataClassJsonMixin, dataclass_json from flyteidl.core import literals_pb2 from google.protobuf import json_format as _json_format from google.protobuf import struct_pb2 as _struct @@ -474,7 +474,11 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp ) self._serialize_flyte_type(python_val, python_type) - json_str = JSONEncoder(python_type).encode(python_val) + + if hasattr(python_val, "to_json"): + json_str = python_val.to_json() + else: + json_str = JSONEncoder(python_type).encode(python_val) return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -723,7 +727,11 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: ) json_str = _json_format.MessageToJson(lv.scalar.generic) - dc = JSONDecoder(expected_python_type).decode(json_str) + + if hasattr(expected_python_type, "from_json"): + dc = expected_python_type.from_json(json_str) # type: ignore + else: + dc = JSONDecoder(expected_python_type).decode(json_str) dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) @@ -1790,7 +1798,7 @@ def convert_marshmallow_json_schema_to_python_class( """ attribute_list = generate_attribute_list_from_dataclass_json(schema, schema_name) - return dataclasses.make_dataclass(schema_name, attribute_list) + return dataclass_json(dataclasses.make_dataclass(schema_name, attribute_list)) def convert_mashumaro_json_schema_to_python_class( @@ -1803,7 +1811,7 @@ def convert_mashumaro_json_schema_to_python_class( """ attribute_list = generate_attribute_list_from_dataclass_json_mixin(schema, schema_name) - return dataclasses.make_dataclass(schema_name, attribute_list) + return dataclass_json(dataclasses.make_dataclass(schema_name, attribute_list)) def _get_element_type(element_property: typing.Dict[str, str]) -> Type: diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index fb306cefe9..a49ec87ff2 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -15,7 +15,7 @@ import pyarrow as pa import pytest import typing_extensions -from dataclasses_json import DataClassJsonMixin +from dataclasses_json import DataClassJsonMixin, dataclass_json from flyteidl.core import errors_pb2 from google.protobuf import json_format as _json_format from google.protobuf import struct_pb2 as _struct @@ -2396,6 +2396,7 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassJsonMixin): x: int + @dataclass_json @dataclass class MyDataClass: x: int @@ -2425,6 +2426,7 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassORJSONMixin): x: int + @dataclass_json @dataclass class MyDataClass: x: int @@ -2460,6 +2462,7 @@ class MyDataClassMashumaro(DataClassJsonMixin): class MyDataClassMashumaroORJSON(DataClassORJSONMixin): x: int + @dataclass_json @dataclass class MyDataClass: x: int @@ -2494,17 +2497,31 @@ class DatumMashumaro(DataClassJSONMixin): x: int y: Color + @dataclass_json @dataclass - class Datum(DataClassJSONMixin): + class DatumDataclassJson(DataClassJSONMixin): + x: int + y: Color + + @dataclass + class DatumDataclass: x: int y: Color transformer = DataclassTransformer() ctx = FlyteContext.current_context() - lt = TypeEngine.to_literal_type(Datum) - datum = Datum(5, Color.RED) - lv = transformer.to_literal(ctx, datum, Datum, lt) + lt = TypeEngine.to_literal_type(DatumDataclass) + datum_dataclass = DatumDataclass(5, Color.RED) + lv = transformer.to_literal(ctx, datum_dataclass, DatumDataclass, lt) + gt = transformer.guess_python_type(lt) + pv = transformer.to_python_value(ctx, lv, expected_python_type=gt) + assert datum_dataclass.x == pv.x + assert datum_dataclass.y.value == pv.y + + lt = TypeEngine.to_literal_type(DatumDataclassJson) + datum = DatumDataclassJson(5, Color.RED) + lv = transformer.to_literal(ctx, datum, DatumDataclassJson, lt) gt = transformer.guess_python_type(lt) pv = transformer.to_python_value(ctx, lv, expected_python_type=gt) assert datum.x == pv.x From 38986f0109ed0fa01964e3021b95c1b9cb8abda1 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 11:18:49 +0800 Subject: [PATCH 22/29] print Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 933d38839e..541ce4b7a7 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -477,8 +477,10 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp if hasattr(python_val, "to_json"): json_str = python_val.to_json() + print("TO Json") else: json_str = JSONEncoder(python_type).encode(python_val) + print("JSONEncoder") return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -730,8 +732,10 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore + print("From JSON") else: dc = JSONDecoder(expected_python_type).decode(json_str) + print("JSON Decoder") dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) From 5de561e25e02338f2b30de2c7270bfa98231825e Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 11:49:49 +0800 Subject: [PATCH 23/29] revert print Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 541ce4b7a7..933d38839e 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -477,10 +477,8 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp if hasattr(python_val, "to_json"): json_str = python_val.to_json() - print("TO Json") else: json_str = JSONEncoder(python_type).encode(python_val) - print("JSONEncoder") return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -732,10 +730,8 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore - print("From JSON") else: dc = JSONDecoder(expected_python_type).decode(json_str) - print("JSON Decoder") dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) From bc3f58ba84153fcee366ecafb70174c0b4b0f0ff Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 11:57:26 +0800 Subject: [PATCH 24/29] add encoder and decoder registry Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 933d38839e..7b8123c0ff 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -327,6 +327,8 @@ class Test(DataClassJsonMixin): def __init__(self): super().__init__("Object-Dataclass-Transformer", object) + self._encoder: Dict[Type, JSONEncoder] = {} + self._decoder: Dict[Type, JSONDecoder] = {} def assert_type(self, expected_type: Type[DataClassJsonMixin], v: T): # Skip iterating all attributes in the dataclass if the type of v already matches the expected_type @@ -478,7 +480,9 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp if hasattr(python_val, "to_json"): json_str = python_val.to_json() else: - json_str = JSONEncoder(python_type).encode(python_val) + if not self._encoder.get(python_type): + self._encoder[python_type] = JSONEncoder(python_type) + json_str = self._encoder[python_type].encode(python_val) return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -731,7 +735,9 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore else: - dc = JSONDecoder(expected_python_type).decode(json_str) + if not self._decoder.get(expected_python_type): + self._decoder[expected_python_type] = JSONDecoder(expected_python_type) + dc = self._decoder[expected_python_type].decode(json_str) dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) From d6d4965b7cf3391537f5dc1cca0df4b8d69fb132 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 13:37:29 +0800 Subject: [PATCH 25/29] dolt revert Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 7241fef0e6..b9f1c2b516 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -258,14 +258,13 @@ jobs: - flytekit-aws-athena - flytekit-aws-batch # TODO: uncomment this when the sagemaker agent is implemented: https://github.com/flyteorg/flyte/issues/4079 - # TODO: uncomment this when the dolt plugin is updated: https://github.com/flyteorg/flyte/issues/5074 # - flytekit-aws-sagemaker - flytekit-bigquery - flytekit-dask - flytekit-data-fsspec - flytekit-dbt - flytekit-deck-standard - # - flytekit-dolt + - flytekit-dolt - flytekit-duckdb - flytekit-envd - flytekit-flyteinteractive From 7f759acb1b2ca2be5de3ad47584f18dbb02d8c95 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Mar 2024 18:42:09 +0800 Subject: [PATCH 26/29] add encoder and decoder registry tests Signed-off-by: Future-Outlier --- tests/flytekit/unit/core/test_type_engine.py | 46 +++++++++++++++++--- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index a49ec87ff2..c073e4f9f7 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -9,7 +9,7 @@ from dataclasses import asdict, dataclass, field from datetime import timedelta from enum import Enum, auto -from typing import Optional, Type +from typing import List, Optional, Type import mock import pyarrow as pa @@ -25,13 +25,11 @@ from mashumaro.mixins.orjson import DataClassORJSONMixin from typing_extensions import Annotated, get_args, get_origin -from flytekit import kwtypes +from flytekit import dynamic, kwtypes, task, workflow from flytekit.core.annotation import FlyteAnnotation from flytekit.core.context_manager import FlyteContext, FlyteContextManager from flytekit.core.data_persistence import flyte_tmp_dir -from flytekit.core.dynamic_workflow_task import dynamic from flytekit.core.hash import HashMethod -from flytekit.core.task import task from flytekit.core.type_engine import ( DataclassTransformer, DictTransformer, @@ -2508,7 +2506,7 @@ class DatumDataclass: x: int y: Color - transformer = DataclassTransformer() + transformer = TypeEngine.get_transformer(DatumDataclass) ctx = FlyteContext.current_context() lt = TypeEngine.to_literal_type(DatumDataclass) @@ -2546,6 +2544,44 @@ class DatumDataclass: assert datum_mashumaro_orjson.z.isoformat() == pv.z +def test_dataclass_encoder_and_decoder_registry(): + iterations = 10 + + @dataclass + class Datum: + x: int + y: str + z: dict[int, int] + w: List[int] + + @task + def create_dataclasses() -> List[Datum]: + return [Datum(x=1, y="1", z={1: 1}, w=[1, 1, 1, 1])] + + @task + def concat_dataclasses(x: List[Datum], y: List[Datum]) -> List[Datum]: + return x + y + + @dynamic + def dynamic_wf() -> List[Datum]: + all_dataclasses: List[Datum] = [] + for _ in range(iterations): + data = create_dataclasses() + all_dataclasses = concat_dataclasses(x=all_dataclasses, y=data) + return all_dataclasses + + @workflow + def wf() -> List[Datum]: + return dynamic_wf() + + datum_list = wf() + assert len(datum_list) == iterations + + transformer = TypeEngine.get_transformer(Datum) + assert transformer._encoder.get(Datum) + assert transformer._decoder.get(Datum) + + def test_ListTransformer_get_sub_type(): assert ListTransformer.get_sub_type_or_none(typing.List[str]) is str From 100c838b78218ed6c2aa21796d99c4325b74f36f Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 2 Apr 2024 17:38:31 +0800 Subject: [PATCH 27/29] Update Thomas's advice Signed-off-by: Future-Outlier Co-authored-by: Thomas J. Fan --- flytekit/core/type_engine.py | 22 +++++++++++++------- tests/flytekit/unit/core/test_type_engine.py | 19 ++++++++++++++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 7b8123c0ff..37515457a5 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -480,9 +480,13 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp if hasattr(python_val, "to_json"): json_str = python_val.to_json() else: - if not self._encoder.get(python_type): - self._encoder[python_type] = JSONEncoder(python_type) - json_str = self._encoder[python_type].encode(python_val) + try: + encoder = self._encoder[python_type] + except KeyError: + encoder = JSONEncoder(python_type) + self._encoder[python_type] = encoder + + json_str = encoder.encode(python_val) return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) # type: ignore @@ -709,7 +713,7 @@ def _fix_val_int(self, t: typing.Type, val: typing.Any) -> typing.Any: return val - def _fix_dataclass_int(self, dc_type: Type, dc: dataclasses.dataclass) -> dataclasses.dataclass: # type: ignore + def _fix_dataclass_int(self, dc_type: Type[dataclasses.dataclass], dc: typing.Any) -> typing.Any: # type: ignore """ This is a performance penalty to convert to the right types, but this is expected by the user and hence needs to be done @@ -735,9 +739,13 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore else: - if not self._decoder.get(expected_python_type): - self._decoder[expected_python_type] = JSONDecoder(expected_python_type) - dc = self._decoder[expected_python_type].decode(json_str) + try: + decoder = self._decoder[expected_python_type] + except KeyError: + decoder = JSONDecoder(expected_python_type) + self._decoder[expected_python_type] = decoder + + dc = decoder.decode(json_str) dc = self._fix_structured_dataset_type(expected_python_type, dc) return self._fix_dataclass_int(expected_python_type, self._deserialize_flyte_type(dc, expected_python_type)) diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index c073e4f9f7..48a85d7085 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -2506,9 +2506,26 @@ class DatumDataclass: x: int y: Color - transformer = TypeEngine.get_transformer(DatumDataclass) + @dataclass + class DatumDataUnion: + data: typing.Union[str, float] + + transformer = TypeEngine.get_transformer(DatumDataUnion) ctx = FlyteContext.current_context() + lt = TypeEngine.to_literal_type(DatumDataUnion) + datum_dataunion = DatumDataUnion(data="s3://my-file") + lv = transformer.to_literal(ctx, datum_dataunion, DatumDataUnion, lt) + gt = transformer.guess_python_type(lt) + pv = transformer.to_python_value(ctx, lv, expected_python_type=DatumDataUnion) + assert datum_dataunion.data == pv.data + + datum_dataunion = DatumDataUnion(data="0.123") + lv = transformer.to_literal(ctx, datum_dataunion, DatumDataUnion, lt) + gt = transformer.guess_python_type(lt) + pv = transformer.to_python_value(ctx, lv, expected_python_type=gt) + assert datum_dataunion.data == pv.data + lt = TypeEngine.to_literal_type(DatumDataclass) datum_dataclass = DatumDataclass(5, Color.RED) lv = transformer.to_literal(ctx, datum_dataclass, DatumDataclass, lt) From c1a404d65b4c4c7ee6761b58c37a71f62e1115d9 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Wed, 3 Apr 2024 08:57:11 +0800 Subject: [PATCH 28/29] add comments by Thomas's suggestion Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 37515457a5..0a5e9cc377 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -477,9 +477,13 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp self._serialize_flyte_type(python_val, python_type) + # The `to_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. + # It serializes a data class into a JSON string. if hasattr(python_val, "to_json"): json_str = python_val.to_json() else: + # The function looks up or creates a JSONEncoder specifically designed for the object's type. + # This encoder is then used to convert a data class into a JSON string. try: encoder = self._encoder[python_type] except KeyError: @@ -736,9 +740,13 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: json_str = _json_format.MessageToJson(lv.scalar.generic) + # The `from_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. + # It deserializes a JSON string into a data class. if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore else: + # The function looks up or creates a JSONDecoder specifically designed for the object's type. + # This decoder is then used to convert a JSON string into a data class. try: decoder = self._decoder[expected_python_type] except KeyError: From 0555f6e4f80087674a7f1c4aca0fcc8eae49485e Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Wed, 3 Apr 2024 09:02:33 +0800 Subject: [PATCH 29/29] lint Signed-off-by: Future-Outlier --- flytekit/core/type_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 0a5e9cc377..f9ecc68b79 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -477,7 +477,7 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp self._serialize_flyte_type(python_val, python_type) - # The `to_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. + # The `to_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. # It serializes a data class into a JSON string. if hasattr(python_val, "to_json"): json_str = python_val.to_json() @@ -740,7 +740,7 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: json_str = _json_format.MessageToJson(lv.scalar.generic) - # The `from_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. + # The `from_json` function is integrated through either the `dataclasses_json` decorator or by inheriting from `DataClassJsonMixin`. # It deserializes a JSON string into a data class. if hasattr(expected_python_type, "from_json"): dc = expected_python_type.from_json(json_str) # type: ignore