diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 0adbe10469c4..78bb6fd48e13 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -166,7 +166,6 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: this=table, kind="VIEW", expression=sg.parse_one(query, read=self.dialect), - properties=sge.Properties(expressions=[sge.TemporaryProperty()]), ) with self._safe_raw_sql(src): @@ -689,6 +688,9 @@ def truncate_table( with self._safe_raw_sql(sge.delete(ident)): pass + def _create_cached_table(self, name: str, expr: ir.Table) -> ir.Table: + return self.create_table(name, expr, schema=expr.schema()) + @contextlib.contextmanager def _create_and_drop_memtable(_conn, table_name, tmp_name, overwrite): diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index cfa27b4ab94b..52258127bced 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1678,6 +1678,7 @@ def test_value_counts_on_expr(backend, alltypes, df): backend.assert_frame_equal(result, expected) +@pytest.mark.xfail_version(datafusion=["datafusion==42"]) def test_group_by_expr(backend, con): expr = ( ibis.memtable( diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index e2f9f3ed522e..e8311fa176de 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -294,11 +294,16 @@ def test_create_table_from_schema(con, new_schema, temp_table): reason="temporary tables not implemented", raises=NotImplementedError, ) -@pytest.mark.never( +@pytest.mark.notyet( ["risingwave"], raises=com.UnsupportedOperationError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) +@pytest.mark.notyet( + ["datafusion"], + raises=Exception, + reason="temp tables are not supported upstream in datafusion", +) @pytest.mark.notimpl( ["flink"], raises=com.IbisError, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 2c522c8dcd7d..b045b586342a 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1068,7 +1068,7 @@ def test_int_scalar(alltypes): assert isinstance(expr.execute(), int) -@pytest.mark.notimpl(["datafusion", "polars", "druid"]) +@pytest.mark.notimpl(["polars", "druid"]) @pytest.mark.notyet( ["clickhouse"], reason="https://github.com/ClickHouse/ClickHouse/issues/6697" ) diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index e8a80757cff9..fce9aa7169f9 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -555,7 +555,6 @@ def test_map_construct_dict(con, keys, values): ) @mark_notimpl_risingwave_hstore @mark_notyet_postgres -@mark_notyet_datafusion def test_map_construct_array_column(con, alltypes, df): expr = ibis.map(ibis.array([alltypes.string_col]), ibis.array([alltypes.int_col])) result = con.execute(expr) diff --git a/ibis/backends/tests/tpc/ds/test_queries.py b/ibis/backends/tests/tpc/ds/test_queries.py index 2f980734c257..8fc55d18c52f 100644 --- a/ibis/backends/tests/tpc/ds/test_queries.py +++ b/ibis/backends/tests/tpc/ds/test_queries.py @@ -1994,6 +1994,9 @@ def test_38(store_sales, catalog_sales, web_sales, date_dim, customer): @tpc_test("ds") +@pytest.mark.notyet( + ["datafusion"], reason="incorrect results", raises=AssertionError, strict=False +) def test_39(inventory, item, warehouse, date_dim): inv = ( inventory.join(item, [("inv_item_sk", "i_item_sk")]) @@ -3980,10 +3983,6 @@ def test_74(customer, store_sales, date_dim, web_sales): return expr -@pytest.mark.notyet( - ["datafusion"], - reason="Invalid argument error: RowConverter column schema mismatch, expected Int32 got Int64", -) @tpc_test("ds") def test_75( catalog_sales, diff --git a/poetry.lock b/poetry.lock index bc719196697e..9aa4a3df9a21 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1679,17 +1679,17 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "datafusion" -version = "41.0.0" +version = "42.0.0" description = "Build and run queries against data" optional = true python-versions = ">=3.6" files = [ - {file = "datafusion-41.0.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4b484035765a4f239737d6313af3cc3822448dfa86738ec44db02dfc4e08057f"}, - {file = "datafusion-41.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:fe324048a63bf462d49cca3b046821fcb546cdab3a13b1fe860aab038c4e4ad4"}, - {file = "datafusion-41.0.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7c6987ad20b238a555fac09851f2329cd8b7e829de98446159ea27a172a5f1f"}, - {file = "datafusion-41.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a10179610c8d211d215ff3533bbd8f5faf3b47b00c0e8371ca9656e98c420380"}, - {file = "datafusion-41.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:a862f6667979a367c30ae58d8770bba044bab09d1da9012ee37cb3bb86fccdf2"}, - {file = "datafusion-41.0.0.tar.gz", hash = "sha256:b2124bcd976520a8dbcb456c200f2cb8b1343285e9329fe757aa628bbd0b08f7"}, + {file = "datafusion-42.0.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e9506356ad5d5f7b0f2d636cea24c5fc72518305aacf81207ecb4d59d0ea6866"}, + {file = "datafusion-42.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:63a07e3779d3bdca0ecafd2eedf6948feff3ecd496a1fcf93baac122a48c6f4f"}, + {file = "datafusion-42.0.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4f04adbef61e70a3f56c6fcf6b615f75bbc42610947092870acff03171a456b"}, + {file = "datafusion-42.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1fb4a7f34225948d7a5db2ee3b3a578dd6b95c2bae39fd940dfd2de90a34088d"}, + {file = "datafusion-42.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:71d9af5a9a6852f8121e4811a913cfec596f4c36240149edea57d27af7318749"}, + {file = "datafusion-42.0.0.tar.gz", hash = "sha256:c88955a9ac59504d9d302be03158e692b9f99c3bf53dca0e84252d81f8c5ca91"}, ] [package.dependencies] @@ -8390,4 +8390,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7cf9aec157e3ef124e9e577b32081da071eb6487db43c477123882941aca7d6c" +content-hash = "2d83799f3242923213e742688c1e9be0fdd352b03dc9073873fb8eebb3084265" diff --git a/pyproject.toml b/pyproject.toml index c7a5c22cd9ea..ab124f275147 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ clickhouse-connect = { version = ">=0.5.23,<1", optional = true, extras = [ "numpy", "pandas", ] } -datafusion = { version = ">=0.6,<42", optional = true } +datafusion = { version = ">=0.6,<43", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<1.2", optional = true } diff --git a/requirements-dev.txt b/requirements-dev.txt index e5f1f8aadf5d..31e3353ee375 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -45,7 +45,7 @@ coverage[toml]==7.6.3 ; python_version >= "3.10" and python_version < "4.0" crashtest==0.4.1 ; python_version >= "3.10" and python_version < "4.0" cryptography==43.0.1 ; python_version >= "3.10" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" -datafusion==41.0.0 ; python_version >= "3.10" and python_version < "4.0" +datafusion==42.0.0 ; python_version >= "3.10" and python_version < "4.0" db-dtypes==1.3.0 ; python_version >= "3.10" and python_version < "4.0" debugpy==1.8.7 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"