From 937015198df144a91e6ff73b1429d2fd1e23a588 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Wed, 3 Jul 2024 04:06:50 +0000 Subject: [PATCH] fix: set large_dtypes=False as the default behavior on merge Automatic upcasting of the data type is surprising and unexpected for users. IMHO we should not be casting data types unless required --- python/deltalake/table.py | 2 +- python/tests/test_merge.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/deltalake/table.py b/python/deltalake/table.py index ee4e45c171..14ffd3f6f9 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -825,7 +825,7 @@ def merge( target_alias: Optional[str] = None, error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, - large_dtypes: bool = True, + large_dtypes: bool = False, custom_metadata: Optional[Dict[str, str]] = None, ) -> "TableMerger": """Pass the source data which you want to merge on the target delta table, providing a diff --git a/python/tests/test_merge.py b/python/tests/test_merge.py index ae18ecc893..0b047580f1 100644 --- a/python/tests/test_merge.py +++ b/python/tests/test_merge.py @@ -970,17 +970,18 @@ def test_struct_casting(tmp_path: pathlib.Path): assert not df.empty schema = pa.Table.from_pandas(df=df).schema - dt = DeltaTable.create(tmp_path, schema, name="test") metadata = dt.metadata() assert metadata.name == "test" result = ( dt.merge( - source=df_merge, predicate="t.id = s.id", source_alias="s", target_alias="t" + source=df_merge, + predicate="t.id = s.id", + source_alias="s", + target_alias="t", ) .when_matched_update_all() - .when_not_matched_insert_all() .execute() ) assert result is not None