diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 4ca9fa3f40..555a756736 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -773,7 +773,10 @@ def write_deltalake( import deltalake import pyarrow as pa - from deltalake.schema import _convert_pa_schema_to_delta + from deltalake.schema import ( + _convert_pa_schema_to_delta, + ArrowSchemaConversionMode, + ) from deltalake.writer import ( try_get_deltatable, write_deltalake_pyarrow, @@ -821,7 +824,7 @@ def write_deltalake( warnings.warn("No DynamoDB table specified for Delta Lake locking. Defaulting to unsafe writes.") pyarrow_schema = pa.schema((f.name, f.dtype.to_arrow_dtype()) for f in self.schema()) - delta_schema = _convert_pa_schema_to_delta(pyarrow_schema, large_dtypes=True) + delta_schema = _convert_pa_schema_to_delta(pyarrow_schema, schema_conversion_mode = ArrowSchemaConversionMode.LARGE) if table: table.update_incremental() diff --git a/pyproject.toml b/pyproject.toml index ef1fa3a637..2d54fa0803 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ requires-python = ">=3.8" all = ["getdaft[aws, azure, gcp, ray, pandas, numpy, iceberg, deltalake, sql, unity]"] aws = ["boto3"] azure = [] -deltalake = ["deltalake"] +deltalake = ["deltalake (>=0.19.0,<0.20.0)"] gcp = [] hudi = ["pyarrow >= 8.0.0"] iceberg = ["pyiceberg >= 0.4.0", "packaging"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 2f6ae91c64..9dbf35858d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -35,13 +35,13 @@ opencv-python==4.8.1.78 tiktoken==0.7.0 # Pyarrow -pyarrow==15.0.0 +pyarrow==17.0.0 # Ray ray[data, client]==2.10.0; python_version == '3.8' ray[data, client]==2.34.0; python_version >= '3.9' # Lance -lancedb>=0.6.10; python_version >= '3.8' +lancedb>=0.12.0; python_version >= '3.8' #Iceberg pyiceberg==0.7.0; python_version >= '3.8' @@ -49,7 +49,7 @@ tenacity==8.2.3; python_version >= '3.8' # Delta Lake deltalake==0.5.8; platform_system == "Windows" -deltalake==0.18.2; platform_system != "Windows" and python_version >= '3.8' +deltalake==0.19.1; platform_system != "Windows" and python_version >= '3.8' # Databricks databricks-sdk==0.12.0