diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py index bef73d9306..df3554ff21 100644 --- a/dlt/common/libs/pydantic.py +++ b/dlt/common/libs/pydantic.py @@ -32,6 +32,7 @@ is_subclass, is_union_type, ) +from dlt.common.warnings import Dlt100DeprecationWarning try: from pydantic import BaseModel, ValidationError, Json, create_model @@ -69,11 +70,12 @@ class DltConfig(TypedDict, total=False): >>> class ItemModel(BaseModel): >>> b: bool >>> nested: Dict[str, Any] - >>> dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + >>> dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} """ - skip_complex_types: bool + skip_nested_types: bool """If True, columns of complex types (`dict`, `list`, `BaseModel`) will be excluded from dlt schema generated from the model""" + skip_complex_types: bool # deprecated def pydantic_to_table_schema_columns( @@ -90,9 +92,17 @@ def pydantic_to_table_schema_columns( Returns: TTableSchemaColumns: table schema columns dict """ - skip_complex_types = False + skip_nested_types = False if hasattr(model, "dlt_config"): - skip_complex_types = model.dlt_config.get("skip_complex_types", False) + if "skip_complex_types" in model.dlt_config: + warnings.warn( + "`skip_complex_types` is deprecated, use `skip_nested_types` instead.", + Dlt100DeprecationWarning, + stacklevel=2, + ) + skip_nested_types = model.dlt_config["skip_complex_types"] + else: + skip_nested_types = model.dlt_config.get("skip_nested_types", False) result: TTableSchemaColumns = {} @@ -136,7 +146,7 @@ def pydantic_to_table_schema_columns( # try to coerce unknown type to text data_type = "text" - if is_inner_type_pydantic_model and not skip_complex_types: + if is_inner_type_pydantic_model and not skip_nested_types: result[name] = { "name": name, "data_type": "json", @@ -154,7 +164,7 @@ def pydantic_to_table_schema_columns( **hints, "name": snake_case_naming_convention.make_path(name, hints["name"]), } - elif data_type == "json" and skip_complex_types: + elif data_type == "json" and skip_nested_types: continue else: result[name] = { diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 62030565a0..614dec4f30 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -125,14 +125,14 @@ from typing import ClassVar from dlt.common.libs.pydantic import DltConfig class UserWithNesting(User): - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} @dlt.resource(name="user", columns=UserWithNesting) def get_users(): ... ``` -`"skip_complex_types"` omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default +`"skip_nested_types"` omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default behavior of creating child tables for these fields. We do not support `RootModel` that validate simple types. You can add such a validator yourself, see [data filtering section](#filter-transform-and-pivot-data). diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py index f252beaf8a..748259cba1 100644 --- a/tests/common/normalizers/test_json_relational.py +++ b/tests/common/normalizers/test_json_relational.py @@ -88,7 +88,7 @@ def test_preserve_json_value_with_hint(norm: RelationalNormalizer) -> None: assert "value__json" not in flattened_row -def test_child_table_linking(norm: RelationalNormalizer) -> None: +def test_nested_table_linking(norm: RelationalNormalizer) -> None: row = {"f": [{"l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}]} # request _dlt_root_id propagation add_dlt_root_id_propagation(norm) diff --git a/tests/libs/test_deltalake.py b/tests/libs/test_deltalake.py index dc5586eb32..e18fb1abd7 100644 --- a/tests/libs/test_deltalake.py +++ b/tests/libs/test_deltalake.py @@ -143,7 +143,7 @@ def arrow_data( # type: ignore[return] assert dt.to_pyarrow_table().shape == (arrow_table.num_rows, arrow_table.num_columns) # the previous table version should still exist - dt.load_version(1) + dt.load_as_version(1) assert dt.to_pyarrow_table().shape == (arrow_table.num_rows * 2, arrow_table.num_columns) # `merge` should resolve to `append` bevavior diff --git a/tests/libs/test_pydantic.py b/tests/libs/test_pydantic.py index 8ba3f85512..70846dcd72 100644 --- a/tests/libs/test_pydantic.py +++ b/tests/libs/test_pydantic.py @@ -168,7 +168,7 @@ class User(BaseModel): final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc] final_optional: Final[Annotated[Optional[str], None]] # type: ignore[misc] - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} USER_INSTANCE_DATA = dict( @@ -260,9 +260,9 @@ def test_pydantic_model_to_columns_annotated() -> None: assert schema_from_user_class["final_optional"]["nullable"] is True -def test_pydantic_model_skip_complex_types() -> None: +def test_pydantic_model_skip_nested_types() -> None: class SkipNestedModel(Model): - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} result = pydantic_to_table_schema_columns(SkipNestedModel) @@ -393,7 +393,7 @@ class UserPipe(BaseModel): final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc, syntax, unused-ignore] final_optional: Final[Annotated[str | None, None]] # type: ignore[misc, syntax, unused-ignore] - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} # TODO: move to separate test model_freeze = apply_schema_contract_to_model(UserPipe, "evolve", "freeze") @@ -426,7 +426,7 @@ def test_item_list_validation() -> None: class ItemModel(BaseModel): b: bool opt: Optional[int] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False} # non validating items removed from the list (both extra and declared) discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row") @@ -563,7 +563,7 @@ class ItemModel(BaseModel): def test_item_validation() -> None: class ItemModel(BaseModel): b: bool - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False} # non validating items removed from the list (both extra and declared) discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row") @@ -648,9 +648,10 @@ class Parent(BaseModel): optional_parent_attribute: Optional[str] = None -def test_pydantic_model_flattened_when_skip_complex_types_is_true(): +@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types")) +def test_pydantic_model_flattened_when_skip_nested_types_is_true(config_attr: str): class MyParent(Parent): - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {config_attr: True} # type: ignore schema = pydantic_to_table_schema_columns(MyParent) @@ -673,10 +674,11 @@ class MyParent(Parent): } -def test_considers_model_as_complex_when_skip_complex_types_is_false(): +@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types")) +def test_considers_model_as_complex_when_skip_nested_types_is_false(config_attr: str): class MyParent(Parent): data_dictionary: Dict[str, Any] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + dlt_config: ClassVar[DltConfig] = {config_attr: False} # type: ignore schema = pydantic_to_table_schema_columns(MyParent) @@ -691,11 +693,11 @@ class MyParent(Parent): } -def test_considers_dictionary_as_complex_when_skip_complex_types_is_false(): +def test_considers_dictionary_as_complex_when_skip_nested_types_is_false(): class MyParent(Parent): data_list: List[str] = [] data_dictionary: Dict[str, Any] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False} schema = pydantic_to_table_schema_columns(MyParent) @@ -712,11 +714,11 @@ class MyParent(Parent): } -def test_skip_json_types_when_skip_complex_types_is_true_and_field_is_not_pydantic_model(): +def test_skip_json_types_when_skip_nested_types_is_true_and_field_is_not_pydantic_model(): class MyParent(Parent): data_list: List[str] = [] data_dictionary: Dict[str, Any] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} schema = pydantic_to_table_schema_columns(MyParent) diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index bc823a1857..b028edc1bb 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -149,7 +149,7 @@ class EventDetail(BaseModel): is_complete: bool class EventV1(BaseModel): - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} ver: int id: str # noqa @@ -184,7 +184,7 @@ class EventDetailV2(BaseModel): time: Optional[datetime] class EventV2(BaseModel): - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} ver: int id: str # noqa diff --git a/tests/pipeline/cases/contracts/trace.schema.yaml b/tests/pipeline/cases/contracts/trace.schema.yaml index c1c1fde4c7..1d6c31bdd7 100644 --- a/tests/pipeline/cases/contracts/trace.schema.yaml +++ b/tests/pipeline/cases/contracts/trace.schema.yaml @@ -636,9 +636,21 @@ tables: data_type: data_type: text nullable: true + precision: + data_type: bigint + nullable: true + scale: + data_type: bigint + nullable: true + timezone: + data_type: bool + nullable: true nullable: data_type: bool nullable: true + variant: + data_type: bool + nullable: true primary_key: data_type: bool nullable: true @@ -666,7 +678,25 @@ tables: unique: data_type: bool nullable: true - foreign_key: + row_key: + data_type: bool + nullable: true + parent_key: + data_type: bool + nullable: true + root_key: + data_type: bool + nullable: true + merge_key: + data_type: bool + nullable: true + partition: + data_type: bool + nullable: true + cluster: + data_type: bool + nullable: true + sort: data_type: bool nullable: true parent: trace__steps__step_info__load_packages__tables diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 81a52abade..49ebaa57de 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -18,6 +18,7 @@ from dlt.common.schema.typing import ( LOADS_TABLE_NAME, PIPELINE_STATE_TABLE_NAME, + SCHEMA_ENGINE_VERSION, VERSION_TABLE_NAME, TStoredSchema, ) @@ -161,7 +162,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" ) ) - assert github_schema["engine_version"] == 9 + assert github_schema["engine_version"] == SCHEMA_ENGINE_VERSION assert "schema_version_hash" in github_schema["tables"][LOADS_TABLE_NAME]["columns"] # print(github_schema["tables"][PIPELINE_STATE_TABLE_NAME]) # load state @@ -274,7 +275,7 @@ def assert_github_pipeline_end_state( pipeline.sync_destination() # print(pipeline.working_dir) # we have updated schema - assert pipeline.default_schema.ENGINE_VERSION == 9 + assert pipeline.default_schema.ENGINE_VERSION == SCHEMA_ENGINE_VERSION # make sure that schema hash retrieved from the destination is exactly the same as the schema hash that was in storage before the schema was wiped assert pipeline.default_schema.stored_version_hash == orig_schema["version_hash"] @@ -333,7 +334,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: ) pipeline = pipeline.drop() pipeline.sync_destination() - assert pipeline.default_schema.ENGINE_VERSION == 9 + assert pipeline.default_schema.ENGINE_VERSION == SCHEMA_ENGINE_VERSION # schema version does not match `dlt.attach` does not update to the right schema by itself assert pipeline.default_schema.stored_version_hash != github_schema["version_hash"] # state has hash diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py index 98f748ce12..1fe6231279 100644 --- a/tests/pipeline/test_pipeline_extra.py +++ b/tests/pipeline/test_pipeline_extra.py @@ -147,7 +147,7 @@ class User(BaseModel): user_label: UserLabel user_labels: List[UserLabel] - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} user = User( user_id=1, @@ -289,11 +289,11 @@ class Child(BaseModel): optional_child_attribute: Optional[str] = None -def test_flattens_model_when_skip_complex_types_is_set() -> None: +def test_flattens_model_when_skip_nested_types_is_set() -> None: class Parent(BaseModel): child: Child optional_parent_attribute: Optional[str] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} example_data = { "optional_parent_attribute": None, @@ -351,12 +351,12 @@ class Parent(BaseModel): } -def test_considers_model_as_complex_when_skip_complex_types_is_not_set(): +def test_considers_model_as_complex_when_skip_nested_types_is_not_set(): class Parent(BaseModel): child: Child optional_parent_attribute: Optional[str] = None data_dictionary: Dict[str, Any] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False} example_data = { "optional_parent_attribute": None, @@ -412,11 +412,11 @@ class Parent(BaseModel): } -def test_skips_complex_fields_when_skip_complex_types_is_true_and_field_is_not_a_pydantic_model(): +def test_skips_complex_fields_when_skip_nested_types_is_true_and_field_is_not_a_pydantic_model(): class Parent(BaseModel): data_list: List[int] = [] data_dictionary: Dict[str, Any] = None - dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True} example_data = { "optional_parent_attribute": None,