From 75a8285a86969f98bfe226ace16ef5de784b803a Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Wed, 29 May 2024 16:17:09 +0000 Subject: [PATCH 1/2] fix: remove deprecated overwrite_schema configuration which has incorrect behavior Uses of mode='append' and overwrite_schema=True lead to inconsistent behavior between Rust and PyArrow engines for write_deltalake. In the PyArrow case the parameter is quietly omitted so users may experience unexpected behavior since schemas will not actually be overridden. Users of this parameter set most likely want schema_mode='merge' which would allow for schema evolution on appends to a Delta Table Fixes #2553 --- crates/core/Cargo.toml | 2 +- python/Cargo.toml | 2 +- python/deltalake/writer.py | 15 --------------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index a2b168df6e..8056c85f29 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake-core" -version = "0.17.3" +version = "0.18.0" authors.workspace = true keywords.workspace = true readme.workspace = true diff --git a/python/Cargo.toml b/python/Cargo.toml index e1ee2fb738..f5b4cf5b5b 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake-python" -version = "0.17.5" +version = "0.18.0" authors = ["Qingping Hou ", "Will Jones "] homepage = "https://github.com/delta-io/delta-rs" license = "Apache-2.0" diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index 5cd2128a17..1a367c05a0 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -31,8 +31,6 @@ else: from typing_extensions import Literal -import warnings - import pyarrow as pa import pyarrow.dataset as ds import pyarrow.fs as pa_fs @@ -106,7 +104,6 @@ def write_deltalake( name: Optional[str] = ..., description: Optional[str] = ..., configuration: Optional[Mapping[str, Optional[str]]] = ..., - overwrite_schema: bool = ..., schema_mode: Optional[Literal["overwrite"]] = ..., storage_options: Optional[Dict[str, str]] = ..., partition_filters: Optional[List[Tuple[str, str, Any]]] = ..., @@ -134,7 +131,6 @@ def write_deltalake( name: Optional[str] = ..., description: Optional[str] = ..., configuration: Optional[Mapping[str, Optional[str]]] = ..., - overwrite_schema: bool = ..., schema_mode: Optional[Literal["merge", "overwrite"]] = ..., storage_options: Optional[Dict[str, str]] = ..., large_dtypes: bool = ..., @@ -162,7 +158,6 @@ def write_deltalake( name: Optional[str] = ..., description: Optional[str] = ..., configuration: Optional[Mapping[str, Optional[str]]] = ..., - overwrite_schema: bool = ..., schema_mode: Optional[Literal["merge", "overwrite"]] = ..., storage_options: Optional[Dict[str, str]] = ..., predicate: Optional[str] = ..., @@ -196,7 +191,6 @@ def write_deltalake( name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, - overwrite_schema: bool = False, schema_mode: Optional[Literal["merge", "overwrite"]] = None, storage_options: Optional[Dict[str, str]] = None, partition_filters: Optional[List[Tuple[str, str, Any]]] = None, @@ -251,7 +245,6 @@ def write_deltalake( name: User-provided identifier for this table. description: User-provided description for this table. configuration: A map containing configuration options for the metadata action. - overwrite_schema: Deprecated, use schema_mode instead. schema_mode: If set to "overwrite", allows replacing the schema of the table. Set to "merge" to merge with existing schema. storage_options: options passed to the native delta filesystem. predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine. @@ -269,14 +262,6 @@ def write_deltalake( table.update_incremental() __enforce_append_only(table=table, configuration=configuration, mode=mode) - if overwrite_schema: - schema_mode = "overwrite" - - warnings.warn( - "overwrite_schema is deprecated, use schema_mode instead. ", - category=DeprecationWarning, - stacklevel=2, - ) if isinstance(partition_by, str): partition_by = [partition_by] From 00a62ae12ec88f44ddc93118403714669374c676 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Wed, 29 May 2024 16:32:47 +0000 Subject: [PATCH 2/2] chore: update all the version ranges to handle the new 0.18.0 point release Many of the subcrates are unafected by any changes here, so I'm just expanding their compatibility range. --- crates/aws/Cargo.toml | 2 +- crates/azure/Cargo.toml | 2 +- crates/catalog-glue/Cargo.toml | 2 +- crates/deltalake/Cargo.toml | 4 ++-- crates/gcp/Cargo.toml | 2 +- crates/mount/Cargo.toml | 2 +- crates/test/Cargo.toml | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/aws/Cargo.toml b/crates/aws/Cargo.toml index 9ed7b8b6f4..e6913a2162 100644 --- a/crates/aws/Cargo.toml +++ b/crates/aws/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -deltalake-core = { version = "0.17.0", path = "../core" } +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" } aws-smithy-runtime-api = { version="1.1.7" } aws-smithy-runtime = { version="1.1.7", optional = true} aws-credential-types = { version="1.1.7", features = ["hardcoded-credentials"]} diff --git a/crates/azure/Cargo.toml b/crates/azure/Cargo.toml index bd786a404b..cbe55a1b83 100644 --- a/crates/azure/Cargo.toml +++ b/crates/azure/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -deltalake-core = { version = "0.17.0", path = "../core" } +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" } lazy_static = "1" # workspace depenndecies diff --git a/crates/catalog-glue/Cargo.toml b/crates/catalog-glue/Cargo.toml index 427323c3e0..c757563c1b 100644 --- a/crates/catalog-glue/Cargo.toml +++ b/crates/catalog-glue/Cargo.toml @@ -15,7 +15,7 @@ rust-version.workspace = true async-trait = { workspace = true } aws-config = "1" aws-sdk-glue = "1" -deltalake-core = { version = "0.17.0", path = "../core" } +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" } # This can depend on a lowest common denominator of core once that's released # deltalake_core = { version = "0.17.0" } thiserror = { workspace = true } diff --git a/crates/deltalake/Cargo.toml b/crates/deltalake/Cargo.toml index 6dd92ae2ee..4602e158ac 100644 --- a/crates/deltalake/Cargo.toml +++ b/crates/deltalake/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake" -version = "0.17.3" +version = "0.18.0" authors.workspace = true keywords.workspace = true readme.workspace = true @@ -17,7 +17,7 @@ rust-version.workspace = true features = ["azure", "datafusion", "gcs", "hdfs", "json", "python", "s3", "unity-experimental"] [dependencies] -deltalake-core = { version = "0.17.3", path = "../core" } +deltalake-core = { version = "~0.18.0", path = "../core" } deltalake-aws = { version = "0.1.1", path = "../aws", default-features = false, optional = true } deltalake-azure = { version = "0.1.1", path = "../azure", optional = true } deltalake-gcp = { version = "0.2.1", path = "../gcp", optional = true } diff --git a/crates/gcp/Cargo.toml b/crates/gcp/Cargo.toml index ddd574335f..2f171a2fba 100644 --- a/crates/gcp/Cargo.toml +++ b/crates/gcp/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -deltalake-core = { version = "0.17.0", path = "../core" } +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" } lazy_static = "1" # workspace depenndecies diff --git a/crates/mount/Cargo.toml b/crates/mount/Cargo.toml index 979a19592a..a111e8b16e 100644 --- a/crates/mount/Cargo.toml +++ b/crates/mount/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -deltalake-core = { version = "0.17.0", path = "../core", features = [ +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core", features = [ "datafusion", ] } lazy_static = "1" diff --git a/crates/test/Cargo.toml b/crates/test/Cargo.toml index bca9094150..b4fa816176 100644 --- a/crates/test/Cargo.toml +++ b/crates/test/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] bytes = { workspace = true } chrono = { workspace = true, default-features = false, features = ["clock"] } -deltalake-core = { version = "0.17.0", path = "../core" } +deltalake-core = { version = ">=0.17.0, <0.19.0", path = "../core" } dotenvy = "0" fs_extra = "1.3.0" futures = { version = "0.3" }