test(duckdb): get things passing with duckdb 0.10.0 (#8373)

Get tests passing with duckdb 0.10.0
ibis-project · Mar 6, 2024 · 8919478 · 8919478
1 parent 79ce978
commit 8919478
Show file tree

Hide file tree

Showing 11 changed files with 211 additions and 186 deletions.
diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml
@@ -57,6 +57,7 @@ jobs:
         backend:
           - name: duckdb
             title: DuckDB
+            serial: true
             extras:
               - duckdb
               - deltalake

diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py
@@ -11,6 +11,7 @@
 import pandas.testing as tm
 import pyarrow as pa
 import pytest
+from pytest import param
 
 import ibis
 import ibis.expr.datatypes as dt
@@ -106,7 +107,9 @@ def test_read_geo_to_geopandas(con, data_dir, gpd):
     assert isinstance(gdf, gpd.GeoDataFrame)
 
 
-def test_read_geo_from_url(con, monkeypatch):
+def test_read_geo_from_url(monkeypatch):
+    con = ibis.duckdb.connect()
+
     loaded_exts = []
     monkeypatch.setattr(con, "_load_extensions", lambda x, **_: loaded_exts.extend(x))
 
@@ -422,13 +425,33 @@ def test_csv_with_slash_n_null(con, tmp_path):
 
 
 @pytest.mark.xfail(
-    LINUX and SANDBOXED,
-    reason=("nix can't hit GCS because it is sandboxed."),
+    LINUX and SANDBOXED, reason="nix can't hit GCS because it is sandboxed."
+)
+@pytest.mark.parametrize(
+    "extensions",
+    [
+        [],
+        param(
+            ["httpfs"],
+            marks=[
+                pytest.mark.xfail(
+                    duckdb.__version__ == "0.10.0",
+                    reason="https://github.com/duckdb/duckdb/issues/10698",
+                    raises=duckdb.HTTPException,
+                )
+            ],
+        ),
+    ],
 )
-def test_register_filesystem_gcs(con):
+def test_register_filesystem_gcs(extensions):
     fsspec = pytest.importorskip("fsspec")
     pytest.importorskip("gcsfs")
 
+    con = ibis.duckdb.connect()
+
+    for ext in extensions:
+        con.load_extension(ext)
+
     gcs = fsspec.filesystem("gcs")
 
     con.register_filesystem(gcs)

diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py
@@ -1,14 +1,16 @@
 from __future__ import annotations
 
 try:
+    from duckdb import BinderException as DuckDBBinderException
     from duckdb import ConversionException as DuckDBConversionException
     from duckdb import InvalidInputException as DuckDBInvalidInputException
     from duckdb import NotImplementedException as DuckDBNotImplementedException
+    from duckdb import OutOfRangeException as DuckDBOutOfRangeException
     from duckdb import ParserException as DuckDBParserException
 except ImportError:
     DuckDBConversionException = DuckDBInvalidInputException = DuckDBParserException = (
         DuckDBNotImplementedException
-    ) = None
+    ) = DuckDBBinderException = DuckDBOutOfRangeException = None
 
 try:
     from clickhouse_connect.driver.exceptions import (

diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py
@@ -17,6 +17,7 @@
 from ibis import literal as L
 from ibis.backends.tests.errors import (
     ArrowNotImplementedError,
+    DuckDBOutOfRangeException,
     DuckDBParserException,
     ExaQueryError,
     GoogleBadRequest,
@@ -825,18 +826,46 @@ def test_trig_functions_literals(con, expr, expected):
         param(_.dc.acos(), np.arccos, id="acos"),
         param(_.dc.asin(), np.arcsin, id="asin"),
         param(_.dc.atan(), np.arctan, id="atan"),
-        param(_.dc.atan2(_.dc), lambda c: np.arctan2(c, c), id="atan2"),
+        param(
+            _.dc.atan2(_.dc),
+            lambda c: np.arctan2(c, c),
+            id="atan2",
+            marks=[
+                pytest.mark.notyet(
+                    ["mssql", "exasol"], raises=(PyODBCProgrammingError, ExaQueryError)
+                )
+            ],
+        ),
         param(_.dc.cos(), np.cos, id="cos"),
-        param(_.dc.cot(), lambda c: 1.0 / np.tan(c), id="cot"),
         param(_.dc.sin(), np.sin, id="sin"),
         param(_.dc.tan(), np.tan, id="tan"),
     ],
 )
 def test_trig_functions_columns(backend, expr, alltypes, df, expected_fn):
     dc_max = df.double_col.max()
-    expr = alltypes.mutate(dc=(_.double_col / dc_max).nullif(0)).select(tmp=expr)
+    expr = alltypes.mutate(dc=_.double_col / dc_max).select(tmp=expr)
+    result = expr.tmp.to_pandas()
+    expected = expected_fn(df.double_col / dc_max).rename("tmp")
+    backend.assert_series_equal(result, expected)
+
+
+@pytest.mark.notyet(
+    ["mssql", "mysql", "duckdb", "exasol"],
+    raises=(
+        PyODBCProgrammingError,
+        MySQLOperationalError,
+        DuckDBOutOfRangeException,
+        ExaQueryError,
+    ),
+)
+@pytest.mark.broken(
+    ["sqlite", "impala"], raises=AssertionError, reason="behavior doesn't match numpy"
+)
+def test_cotangent(backend, alltypes, df):
+    dc_max = df.double_col.max()
+    expr = alltypes.select(tmp=(_.double_col / dc_max).cot())
     result = expr.tmp.to_pandas()
-    expected = expected_fn((df.double_col / dc_max).replace(0.0, np.nan)).rename("tmp")
+    expected = 1.0 / np.tan(df.double_col / dc_max).rename("tmp")
     backend.assert_series_equal(result, expected)
 
 

diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py
@@ -20,6 +20,7 @@
 from ibis.backends.tests.errors import (
     ArrowInvalid,
     ClickHouseDatabaseError,
+    DuckDBBinderException,
     DuckDBInvalidInputException,
     ExaQueryError,
     GoogleBadRequest,
@@ -1131,69 +1132,33 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name):
     backend.assert_frame_equal(result, expected)
 
 
+no_mixed_timestamp_comparisons = [
+    pytest.mark.notimpl(
+        ["dask"],
+        raises=ValueError,
+        reason="Metadata inference failed in `gt`.",
+    ),
+    pytest.mark.notimpl(
+        ["pandas"],
+        raises=TypeError,
+        reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
+    ),
+    pytest.mark.never(
+        ["duckdb"],
+        raises=DuckDBBinderException,
+        # perhaps we should consider disallowing this in ibis as well
+        reason="DuckDB doesn't allow comparing timestamp with and without timezones",
+    ),
+]
+
+
 @pytest.mark.parametrize(
     "func_name",
     [
-        param(
-            "gt",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `gt`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "ge",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `ge`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "lt",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `lt`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "le",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `le`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
+        param("gt", marks=no_mixed_timestamp_comparisons),
+        param("ge", marks=no_mixed_timestamp_comparisons),
+        param("lt", marks=no_mixed_timestamp_comparisons),
+        param("le", marks=no_mixed_timestamp_comparisons),
         "eq",
         "ne",
     ],

diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py
@@ -1181,7 +1181,6 @@ def test_range_expression_bounds(backend):
     raises=PsycoPg2InternalError,
     reason="Feature is not yet implemented: Unrecognized window function: percent_rank",
 )
-@pytest.mark.broken(["dask"], reason="different result ordering", raises=AssertionError)
 def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df):
     # GH #7631
     t = alltypes
@@ -1197,7 +1196,9 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df):
         .rename(expr.get_name())
     )
 
-    backend.assert_series_equal(result, expected)
+    backend.assert_series_equal(
+        result.value_counts().sort_index(), expected.value_counts().sort_index()
+    )
 
 
 @pytest.mark.notyet(

diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py
@@ -1223,21 +1223,19 @@ def as_scalar(self):
         Examples
         --------
         >>> import ibis
-        >>>
         >>> ibis.options.interactive = True
-        >>>
         >>> t = ibis.examples.penguins.fetch()
         >>> max_gentoo_weight = t.filter(t.species == "Gentoo").body_mass_g.max()
         >>> light_penguins = t.filter(t.body_mass_g < max_gentoo_weight / 2)
-        >>> light_penguins.group_by("species").count()
-        ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-        ┃ species   ┃ CountStar() ┃
-        ┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-        │ string    │ int64       │
-        ├───────────┼─────────────┤
-        │ Adelie    │          15 │
-        │ Chinstrap │           2 │
-        └───────────┴─────────────┘
+        >>> light_penguins.species.value_counts().order_by("species")
+        ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+        ┃ species   ┃ species_count ┃
+        ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+        │ string    │ int64         │
+        ├───────────┼───────────────┤
+        │ Adelie    │            15 │
+        │ Chinstrap │             2 │
+        └───────────┴───────────────┘
         """
         parents = self.op().relations
         if parents:
@@ -1361,21 +1359,19 @@ def as_scalar(self) -> Scalar:
         Examples
         --------
         >>> import ibis
-        >>>
         >>> ibis.options.interactive = True
-        >>>
         >>> t = ibis.examples.penguins.fetch()
         >>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
         >>> from_that_island = t.filter(t.island == heavy_gentoo.island.as_scalar())
-        >>> from_that_island.group_by("species").count()
-        ┏━━━━━━━━━┳━━━━━━━━━━━━━┓
-        ┃ species ┃ CountStar() ┃
-        ┡━━━━━━━━━╇━━━━━━━━━━━━━┩
-        │ string  │ int64       │
-        ├─────────┼─────────────┤
-        │ Adelie  │          44 │
-        │ Gentoo  │         124 │
-        └─────────┴─────────────┘
+        >>> from_that_island.species.value_counts().order_by("species")
+        ┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+        ┃ species ┃ species_count ┃
+        ┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+        │ string  │ int64         │
+        ├─────────┼───────────────┤
+        │ Adelie  │            44 │
+        │ Gentoo  │           124 │
+        └─────────┴───────────────┘
         """
         return self.as_table().as_scalar()
 

diff --git a/ibis/expr/types/numeric.py b/ibis/expr/types/numeric.py
@@ -594,16 +594,16 @@ def cot(self) -> NumericValue:
         --------
         >>> import ibis
         >>> ibis.options.interactive = True
-        >>> t = ibis.memtable({"values": [-1, 0, 1]})
+        >>> t = ibis.memtable({"values": [-1, -2, 3]})
         >>> t.values.cot()
         ┏━━━━━━━━━━━━━┓
         ┃ Cot(values) ┃
         ┡━━━━━━━━━━━━━┩
         │ float64     │
         ├─────────────┤
         │   -0.642093 │
-        │         inf │
-        │    0.642093 │
+        │    0.457658 │
+        │   -7.015253 │
         └─────────────┘
         """
         return ops.Cot(self).to_expr()