From 945ff80cf8f6ef81787125a0842d4972ebd36dac Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 6 Sep 2024 04:53:10 -0400 Subject: [PATCH 1/4] chore(deps): bump duckdb to 1.1.0 --- ibis/backends/duckdb/__init__.py | 6 ++ ibis/backends/duckdb/tests/test_udf.py | 3 +- ibis/backends/tests/test_export.py | 8 ++- ibis/backends/tests/test_numeric.py | 5 +- ibis/backends/tests/test_temporal.py | 7 +- ibis/expr/types/generic.py | 4 +- ibis/expr/types/relations.py | 10 +-- poetry.lock | 98 +++++++++++++------------- pyproject.toml | 12 +++- requirements-dev.txt | 2 +- 10 files changed, 88 insertions(+), 67 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index fceb2680a3da..bf97609ff192 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -16,6 +16,7 @@ import pyarrow_hotfix # noqa: F401 import sqlglot as sg import sqlglot.expressions as sge +from packaging.version import parse as vparse import ibis import ibis.backends.sql.compilers as sc @@ -461,6 +462,11 @@ def _post_connect(self, extensions: Sequence[str] | None = None) -> None: # Default timezone, can't be set with `config` self.settings["timezone"] = "UTC" + # setting this to false disables magic variables-as-tables discovery, + # hopefully eliminating large classes of bugs + if vparse(self.version) > vparse("1"): + self.settings["python_enable_replacements"] = False + self._record_batch_readers_consumed = {} def _load_extensions( diff --git a/ibis/backends/duckdb/tests/test_udf.py b/ibis/backends/duckdb/tests/test_udf.py index b01067e44c7f..a3abc1cdf2a3 100644 --- a/ibis/backends/duckdb/tests/test_udf.py +++ b/ibis/backends/duckdb/tests/test_udf.py @@ -73,7 +73,8 @@ def favg(x: float, where: bool = True) -> float: ... def test_builtin_agg(con, func): import ibis - raw_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] + start, stop = 1, 11 + raw_data = list(map(float, range(start, stop))) data = ibis.memtable({"a": raw_data}) expr = func(data.a) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 235a0657ffd7..48b7be6ec5c6 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -296,15 +296,17 @@ def test_roundtrip_partitioned_parquet(tmp_path, con, backend, awards_players): # Reingest and compare schema reingest = con.read_parquet(outparquet / "*" / "*") - reingest = reingest.cast({"yearID": "int64"}) # avoid type comparison to appease duckdb: as of 0.8.0 it returns large_string - assert reingest.schema().names == awards_players.schema().names + assert reingest.schema().keys() == awards_players.schema().keys() reingest = reingest.order_by(["yearID", "playerID", "awardID", "lgID"]) awards_players = awards_players.order_by(["yearID", "playerID", "awardID", "lgID"]) - backend.assert_frame_equal(reingest.to_pandas(), awards_players.to_pandas()) + # reorder columns to match the partitioning + backend.assert_frame_equal( + reingest.to_pandas(), awards_players[reingest.columns].to_pandas() + ) @pytest.mark.parametrize("ftype", ["csv", "parquet"]) diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 7790dfcca111..d77d7269f669 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -1290,14 +1290,13 @@ def test_floating_mod(backend, alltypes, df): ) @pytest.mark.notyet(["mysql", "pyspark"], raises=AssertionError) @pytest.mark.notyet( - ["duckdb", "sqlite"], - raises=AssertionError, - reason="returns NULL when dividing by zero", + ["sqlite"], raises=AssertionError, reason="returns NULL when dividing by zero" ) @pytest.mark.notyet(["mssql"], raises=PyODBCDataError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) @pytest.mark.notyet(["postgres"], raises=PsycoPg2DivisionByZero) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) +@pytest.mark.xfail_version(duckdb=["duckdb<1.1"]) def test_divide_by_zero(backend, alltypes, df, column, denominator): expr = alltypes[column] / denominator result = expr.name("tmp").execute() diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 8b49d5330e23..b1a907a50e4d 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1900,10 +1900,15 @@ def test_large_timestamp(con): id="ns", marks=[ pytest.mark.notyet( - ["duckdb", "impala", "pyspark", "trino"], + ["impala", "pyspark", "trino"], reason="drivers appear to truncate nanos", raises=AssertionError, ), + pytest.mark.xfail_version( + duckdb=["duckdb<1.1"], + reason="not implemented until 1.1", + raises=AssertionError, + ), pytest.mark.notimpl( ["druid"], reason="ibis normalization truncates nanos", diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index d695ad4954fa..0e77f6040e54 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -1630,11 +1630,11 @@ def approx_nunique(self, where: ir.BooleanValue | None = None) -> ir.IntegerScal >>> t = ibis.examples.penguins.fetch() >>> t.body_mass_g.approx_nunique() ┌────┐ - │ 94 │ + │ 92 │ └────┘ >>> t.body_mass_g.approx_nunique(where=t.species == "Adelie") ┌────┐ - │ 55 │ + │ 61 │ └────┘ """ return ops.ApproxCountDistinct( diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 7ca9bed4fb1b..d0fe76d10aa1 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -3277,11 +3277,11 @@ def join( ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ int64 │ string │ int64 │ string │ ├─────────┼───────────────────┼───────────────┼───────────────────┤ - │ 1732 │ funny │ 60756 │ funny │ - │ 1732 │ Highly quotable │ 60756 │ Highly quotable │ - │ 1732 │ drugs │ 106782 │ drugs │ - │ 5989 │ Leonardo DiCaprio │ 106782 │ Leonardo DiCaprio │ - │ 139385 │ tom hardy │ 89774 │ Tom Hardy │ + │ 60756 │ funny │ 1732 │ funny │ + │ 60756 │ Highly quotable │ 1732 │ Highly quotable │ + │ 89774 │ Tom Hardy │ 139385 │ tom hardy │ + │ 106782 │ drugs │ 1732 │ drugs │ + │ 106782 │ Leonardo DiCaprio │ 5989 │ Leonardo DiCaprio │ └─────────┴───────────────────┴───────────────┴───────────────────┘ """ from ibis.expr.types.joins import Join diff --git a/poetry.lock b/poetry.lock index eec39b039ca6..6a5713884077 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1512,57 +1512,57 @@ toml = ["tomli"] [[package]] name = "duckdb" -version = "1.0.0" +version = "1.1.0" description = "DuckDB in-process database" optional = true python-versions = ">=3.7.0" files = [ - {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4a8ce2d1f9e1c23b9bab3ae4ca7997e9822e21563ff8f646992663f66d050211"}, - {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:19797670f20f430196e48d25d082a264b66150c264c1e8eae8e22c64c2c5f3f5"}, - {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b71c342090fe117b35d866a91ad6bffce61cd6ff3e0cff4003f93fc1506da0d8"}, - {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dd69f44ad212c35ae2ea736b0e643ea2b70f204b8dff483af1491b0e2a4cec"}, - {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8da5f293ecb4f99daa9a9352c5fd1312a6ab02b464653a0c3a25ab7065c45d4d"}, - {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3207936da9967ddbb60644ec291eb934d5819b08169bc35d08b2dedbe7068c60"}, - {file = "duckdb-1.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1128d6c9c33e883b1f5df6b57c1eb46b7ab1baf2650912d77ee769aaa05111f9"}, - {file = "duckdb-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:02310d263474d0ac238646677feff47190ffb82544c018b2ff732a4cb462c6ef"}, - {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:75586791ab2702719c284157b65ecefe12d0cca9041da474391896ddd9aa71a4"}, - {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:83bb415fc7994e641344f3489e40430ce083b78963cb1057bf714ac3a58da3ba"}, - {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:bee2e0b415074e84c5a2cefd91f6b5ebeb4283e7196ba4ef65175a7cef298b57"}, - {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5a4110d2a499312609544ad0be61e85a5cdad90e5b6d75ad16b300bf075b90"}, - {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa389e6a382d4707b5f3d1bc2087895925ebb92b77e9fe3bfb23c9b98372fdc"}, - {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ede6f5277dd851f1a4586b0c78dc93f6c26da45e12b23ee0e88c76519cbdbe0"}, - {file = "duckdb-1.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0b88cdbc0d5c3e3d7545a341784dc6cafd90fc035f17b2f04bf1e870c68456e5"}, - {file = "duckdb-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd1693cdd15375156f7fff4745debc14e5c54928589f67b87fb8eace9880c370"}, - {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c65a7fe8a8ce21b985356ee3ec0c3d3b3b2234e288e64b4cfb03356dbe6e5583"}, - {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:e5a8eda554379b3a43b07bad00968acc14dd3e518c9fbe8f128b484cf95e3d16"}, - {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:a1b6acdd54c4a7b43bd7cb584975a1b2ff88ea1a31607a2b734b17960e7d3088"}, - {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a677bb1b6a8e7cab4a19874249d8144296e6e39dae38fce66a80f26d15e670df"}, - {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:752e9d412b0a2871bf615a2ede54be494c6dc289d076974eefbf3af28129c759"}, - {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aadb99d098c5e32d00dc09421bc63a47134a6a0de9d7cd6abf21780b678663c"}, - {file = "duckdb-1.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83b7091d4da3e9301c4f9378833f5ffe934fb1ad2b387b439ee067b2c10c8bb0"}, - {file = "duckdb-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:6a8058d0148b544694cb5ea331db44f6c2a00a7b03776cc4dd1470735c3d5ff7"}, - {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e40cb20e5ee19d44bc66ec99969af791702a049079dc5f248c33b1c56af055f4"}, - {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7bce1bc0de9af9f47328e24e6e7e39da30093179b1c031897c042dd94a59c8e"}, - {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8355507f7a04bc0a3666958f4414a58e06141d603e91c0fa5a7c50e49867fb6d"}, - {file = "duckdb-1.0.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:39f1a46f5a45ad2886dc9b02ce5b484f437f90de66c327f86606d9ba4479d475"}, - {file = "duckdb-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d29ba477b27ae41676b62c8fae8d04ee7cbe458127a44f6049888231ca58fa"}, - {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:1bea713c1925918714328da76e79a1f7651b2b503511498ccf5e007a7e67d49e"}, - {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_universal2.whl", hash = "sha256:bfe67f3bcf181edbf6f918b8c963eb060e6aa26697d86590da4edc5707205450"}, - {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:dbc6093a75242f002be1d96a6ace3fdf1d002c813e67baff52112e899de9292f"}, - {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba1881a2b11c507cee18f8fd9ef10100be066fddaa2c20fba1f9a664245cd6d8"}, - {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:445d0bb35087c522705c724a75f9f1c13f1eb017305b694d2686218d653c8142"}, - {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:224553432e84432ffb9684f33206572477049b371ce68cc313a01e214f2fbdda"}, - {file = "duckdb-1.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d3914032e47c4e76636ad986d466b63fdea65e37be8a6dfc484ed3f462c4fde4"}, - {file = "duckdb-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:af9128a2eb7e1bb50cd2c2020d825fb2946fdad0a2558920cd5411d998999334"}, - {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dd2659a5dbc0df0de68f617a605bf12fe4da85ba24f67c08730984a0892087e8"}, - {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_universal2.whl", hash = "sha256:ac5a4afb0bc20725e734e0b2c17e99a274de4801aff0d4e765d276b99dad6d90"}, - {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c5a53bee3668d6e84c0536164589d5127b23d298e4c443d83f55e4150fafe61"}, - {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b980713244d7708b25ee0a73de0c65f0e5521c47a0e907f5e1b933d79d972ef6"}, - {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbd4f9fe7b7a56eff96c3f4d6778770dd370469ca2212eddbae5dd63749db5"}, - {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed228167c5d49888c5ef36f6f9cbf65011c2daf9dcb53ea8aa7a041ce567b3e4"}, - {file = "duckdb-1.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46d8395fbcea7231fd5032a250b673cc99352fef349b718a23dea2c0dd2b8dec"}, - {file = "duckdb-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:6ad1fc1a4d57e7616944166a5f9417bdbca1ea65c490797e3786e3a42e162d8a"}, - {file = "duckdb-1.0.0.tar.gz", hash = "sha256:a2a059b77bc7d5b76ae9d88e267372deff19c291048d59450c431e166233d453"}, + {file = "duckdb-1.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5e4cbc408e6e41146dea89b9044dae7356e353db0c96b183e5583ee02bc6ae5d"}, + {file = "duckdb-1.1.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6370ae27ec8167ccfbefb94f58ad9fdc7bac142399960549d6d367f233189868"}, + {file = "duckdb-1.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4e1c3414f7fd01f4810dc8b335deffc91933a159282d65fef11c1286bc0ded04"}, + {file = "duckdb-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6bc2a58689adf5520303c5f68b065b9f980bd31f1366c541b8c7490abaf55cd"}, + {file = "duckdb-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d02be208d2885ca085d4c852b911493b8cdac9d6eae893259da32bd72a437c25"}, + {file = "duckdb-1.1.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:655df442ceebfc6f3fd6c8766e04b60d44dddedfa90275d794f9fab2d3180879"}, + {file = "duckdb-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6e183729bb64be7798ccbfda6283ebf423c869268c25af2b56929e48f763be2f"}, + {file = "duckdb-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:61fb838da51e07ceb0222c4406b059b90e10efcc453c19a3650b73c0112138c4"}, + {file = "duckdb-1.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7807e2f0d3344668e433f0dc1f54bfaddd410589611393e9a7ed56f8dec9514f"}, + {file = "duckdb-1.1.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:3da30b7b466f710d52caa1fdc3ef0bf4176ad7f115953cd9f8b0fbf0f723778f"}, + {file = "duckdb-1.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:b9b6a77ef0183f561b1fc2945fcc762a71570ffd33fea4e3a855d413ed596fe4"}, + {file = "duckdb-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16243e66a9fd0e64ee265f2634d137adc6593f54ddf3ef55cb8a29e1decf6e54"}, + {file = "duckdb-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42b910a149e00f40a1766dc74fa309d4255b912a5d2fdcc387287658048650f6"}, + {file = "duckdb-1.1.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47849d546dc4238c0f20e95fe53b621aa5b08684e68fff91fd84a7092be91a17"}, + {file = "duckdb-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11ec967b67159361ceade34095796a8d19368ea5c30cad988f44896b082b0816"}, + {file = "duckdb-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:510b5885ed6c267b9c0e1e7c6138fdffc2dd6f934a5a95b76da85da127213338"}, + {file = "duckdb-1.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:657bc7ac64d5faf069a782ae73afac51ef30ae2e5d0e09ce6a09d03db84ab35e"}, + {file = "duckdb-1.1.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:89f3de8cba57d19b41cd3c47dd06d979bd2a2ffead115480e37afbe72b02896d"}, + {file = "duckdb-1.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f6486323ab20656d22ffa8f3c6e109dde30d0b327b7c831f22ebcfe747f97fb0"}, + {file = "duckdb-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78a4510f82431ee3f14db689fe8727a4a9062c8f2fbb3bcfe3bfad3c1a198004"}, + {file = "duckdb-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64bf2a6e23840d662bd2ac09206a9bd4fa657418884d69e5c352d4456dc70b3c"}, + {file = "duckdb-1.1.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23fc9aa0af74e3803ed90c8d98280fd5bcac8c940592bf6288e8fd60fb051d00"}, + {file = "duckdb-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1f3aea31341ce400640dd522e4399b941f66df17e39884f446638fe958d6117c"}, + {file = "duckdb-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:3db4ab31c20de4edaef152930836b38e7662cd71370748fdf2c38ba9cf854dc4"}, + {file = "duckdb-1.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3b6b4fe1edfe35f64f403a9f0ab75258cee35abd964356893ee37424174b7e4"}, + {file = "duckdb-1.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad02f50d5a2020822d1638fc1a9bcf082056f11d2e15ccfc1c1ed4d0f85a3be"}, + {file = "duckdb-1.1.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb66e9e7391801928ea134dcab12d2e4c97f2ce0391c603a3e480bbb15830bc8"}, + {file = "duckdb-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:069fb7bca459e31edb32a61f0eea95d7a8a766bef7b8318072563abf8e939593"}, + {file = "duckdb-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e39f9b7b62e64e10d421ff04480290a70129c38067d1a4f600e9212b10542c5a"}, + {file = "duckdb-1.1.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:55ef98bcc7ba745752607f1b926e8d9b7ce32c42c423bbad10c44820aefe23a7"}, + {file = "duckdb-1.1.0-cp38-cp38-macosx_12_0_universal2.whl", hash = "sha256:e2a08175e43b865c1e9611efd18cacd29ddd69093de442b1ebdf312071df7719"}, + {file = "duckdb-1.1.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:0e3644b1f034012d82b9baa12a7ea306fe71dc6623731b28c753c4a617ff9499"}, + {file = "duckdb-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:211a33c1ddb5cc609f75eb43772b0b03b45d2fa89bec107e4715267ca907806a"}, + {file = "duckdb-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e74b6f8a5145abbf7e6c1a2a61f0adbcd493c19b358f524ec9a3cebdf362abb"}, + {file = "duckdb-1.1.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58f1633dd2c5af5088ae2d119418e200855d0699d84f2fae9d46d30f404bcead"}, + {file = "duckdb-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d18caea926b1e301c29b140418fca697aad728129e269b4f82c2795a184549e1"}, + {file = "duckdb-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:cd9fb1408942411ad360f8414bc3fbf0091c396ca903d947a10f2e31324d5cbd"}, + {file = "duckdb-1.1.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bd11bc899cebf5ff936d1276a2dfb7b7db08aba3bcc42924afeafc2163bddb43"}, + {file = "duckdb-1.1.0-cp39-cp39-macosx_12_0_universal2.whl", hash = "sha256:53825a63193c582a78c152ea53de8d145744ddbeea18f452625a82ebc33eb14a"}, + {file = "duckdb-1.1.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:29dc18087de47563b3859a6b98bbed96e1c96ce5db829646dc3b16a916997e7d"}, + {file = "duckdb-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb19319883564237a7a03a104dbe7f445e73519bb67108fcab3d19b6b91fe30"}, + {file = "duckdb-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aac2fcabe2d5072c252d0b3087365f431de812d8199705089fb073e4d039d19c"}, + {file = "duckdb-1.1.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d89eaaa5df8a57e7d2bc1f4c46493bb1fee319a00155f2015810ad2ace6570ae"}, + {file = "duckdb-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d86a6926313913cd2cc7e08816d3e7f72ba340adf2959279b1a80058be6526d9"}, + {file = "duckdb-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8333f3e85fa2a0f1c222b752c2bd42ea875235ff88492f7bcbb6867d0f644eb"}, + {file = "duckdb-1.1.0.tar.gz", hash = "sha256:b4d4c12b1f98732151bd31377753e0da1a20f6423016d2d097d2e31953ec7c23"}, ] [[package]] @@ -7890,7 +7890,7 @@ datafusion = ["datafusion", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "ric decompiler = ["black"] deltalake = ["deltalake"] druid = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pydruid", "rich"] -duckdb = ["duckdb", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] +duckdb = ["duckdb", "numpy", "packaging", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] examples = ["pins"] exasol = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pyexasol", "rich"] flink = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] @@ -7912,4 +7912,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "eef90ccc5c638e17fff405ae65db499f53dda72ff68ecfd08c17a63621c7e268" +content-hash = "0576b4d813c6d84051784638b1e4fc3548cfc92fcfacf3e44f0719c046a44c36" diff --git a/pyproject.toml b/pyproject.toml index cc94bc90d06e..0374ff56a628 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dask = { version = ">=2022.9.1,<2024.3.0", optional = true, extras = [ datafusion = { version = ">=0.6,<41", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } -duckdb = { version = ">=0.8.1,<2", optional = true } +duckdb = { version = ">=0.8.1,<1.2", optional = true } geopandas = { version = ">=0.6,<2", optional = true } geoarrow-types = { version = ">=0.2,<1", optional = true } pyproj = { version = ">=3.3.0,<4", optional = true } @@ -184,7 +184,15 @@ datafusion = [ "rich", ] druid = ["pydruid", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] -duckdb = ["duckdb", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +duckdb = [ + "duckdb", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", + "packaging", +] exasol = ["pyexasol", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] flink = ["pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] impala = ["impyla", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 378d02f739d2..837627c87c11 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -51,7 +51,7 @@ defusedxml==0.7.1 ; python_version >= "3.10" and python_version < "3.13" deltalake==0.19.2 ; python_version >= "3.10" and python_version < "4.0" distlib==0.3.8 ; python_version >= "3.10" and python_version < "4.0" doit==0.36.0 ; python_version >= "3.10" and python_version < "3.13" -duckdb==1.0.0 ; python_version >= "3.10" and python_version < "4.0" +duckdb==1.1.0 ; python_version >= "3.10" and python_version < "4.0" dulwich==0.21.7 ; python_version >= "3.10" and python_version < "4.0" dunamai==1.22.0 ; python_version >= "3.10" and python_version < "4.0" exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" From 50746ea3bdb2baf73c73e9cb2772d9367585b884 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 6 Sep 2024 04:52:07 -0400 Subject: [PATCH 2/4] fix(duckdb): make array position and find_in_set backwards compatible --- ibis/backends/sql/compilers/duckdb.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index 9d97eac0a3c8..c1011ea9f932 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -51,7 +51,6 @@ class DuckDBCompiler(SQLGlotCompiler): SIMPLE_OPS = { ops.Arbitrary: "any_value", - ops.ArrayPosition: "list_indexof", ops.ArrayMin: "list_min", ops.ArrayMax: "list_max", ops.ArrayAny: "list_bool_or", @@ -150,6 +149,13 @@ def visit_ArrayDistinct(self, op, *, arg): ), ) + def visit_ArrayPosition(self, op, *, arg, other): + return self.if_( + arg.is_(NULL) | other.is_(NULL), + NULL, + self.f.coalesce(self.f.list_indexof(arg, other), 0), + ) + def visit_ArrayCollect(self, op, *, arg, where, order_by, include_null): if not include_null: cond = arg.is_(sg.not_(NULL, copy=False)) @@ -352,7 +358,11 @@ def visit_IntervalFromInteger(self, op, *, arg, unit): return self.f[f"to_{unit.plural}"](arg) def visit_FindInSet(self, op, *, needle, values): - return self.f.list_indexof(self.f.array(*values), needle) + return self.if_( + needle.is_(NULL), + NULL, + self.f.coalesce(self.f.list_indexof(self.f.array(*values), needle), 0), + ) def visit_CountDistinctStar(self, op, *, where, arg): # use a tuple because duckdb doesn't accept COUNT(DISTINCT a, b, c, ...) From d12c2c834c0eeb842b7ef3aba81ae935318738dd Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 6 Sep 2024 04:52:39 -0400 Subject: [PATCH 3/4] fix(duckdb): handle arrow UUID values coming back as bytes --- ibis/formats/pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index ea67c9ad7537..a1ea9ea82489 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -383,6 +383,8 @@ def convert(value): return value elif isinstance(value, UUID): return value + elif isinstance(value, bytes): + return UUID(bytes=value) return UUID(value) return convert From 1cb04138f349fd782d3cc8a2ec70015d658660d7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:01:12 -0400 Subject: [PATCH 4/4] test(duckdb): relax approx_nunique assertion --- ibis/backends/tests/test_aggregation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 4028980cab1f..a44d95cb08ac 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -466,6 +466,12 @@ def mean_and_std(v): lambda t, where: t.string_col.approx_nunique(where=where), lambda t, where: t.string_col[where].nunique(), id="approx_nunique", + marks=pytest.mark.xfail_version( + duckdb=["duckdb>=1.1"], + raises=AssertionError, + reason="not exact, even at this tiny scale", + strict=False, + ), ), param( lambda t, where: t.bigint_col.bit_and(where=where),