test(duckdb): work with 0.10.0

ibis-project · Feb 17, 2024 · 5ff8f67 · 5ff8f67
1 parent 61525e4
commit 5ff8f67
Show file tree

Hide file tree

Showing 10 changed files with 182 additions and 154 deletions.
diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py
@@ -11,6 +11,7 @@
 import pandas.testing as tm
 import pyarrow as pa
 import pytest
+from pytest import param
 
 import ibis
 import ibis.expr.datatypes as dt
@@ -110,7 +111,9 @@ def test_read_geo_from_url(con, monkeypatch):
     loaded_exts = []
     monkeypatch.setattr(con, "_load_extensions", lambda x, **_: loaded_exts.extend(x))
 
-    with pytest.raises((duckdb.IOException, duckdb.CatalogException)):
+    with pytest.raises(
+        (duckdb.IOException, duckdb.CatalogException, duckdb.NotImplementedException)
+    ):
         # The read will fail, either because the URL is bogus (which it is) or
         # because the current connection doesn't have the spatial extension
         # installed and so the call to `st_read` will raise a catalog error.
@@ -422,13 +425,33 @@ def test_csv_with_slash_n_null(con, tmp_path):
 
 
 @pytest.mark.xfail(
-    LINUX and SANDBOXED,
-    reason=("nix can't hit GCS because it is sandboxed."),
+    LINUX and SANDBOXED, reason="nix can't hit GCS because it is sandboxed."
+)
+@pytest.mark.parametrize(
+    "extensions",
+    [
+        [],
+        param(
+            ["httpfs"],
+            marks=[
+                pytest.mark.xfail(
+                    duckdb.__version__ == "0.10.0",
+                    reason="https://github.com/duckdb/duckdb/issues/10698",
+                    raises=duckdb.HTTPException,
+                )
+            ],
+        ),
+    ],
 )
-def test_register_filesystem_gcs(con):
+def test_register_filesystem_gcs(extensions):
     fsspec = pytest.importorskip("fsspec")
     pytest.importorskip("gcsfs")
 
+    con = ibis.duckdb.connect()
+
+    for ext in extensions:
+        con.load_extension(ext)
+
     gcs = fsspec.filesystem("gcs")
 
     con.register_filesystem(gcs)

diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py
@@ -1,14 +1,20 @@
 from __future__ import annotations
 
 try:
+    from duckdb import BinderException as DuckDBBinderException
     from duckdb import ConversionException as DuckDBConversionException
     from duckdb import InvalidInputException as DuckDBInvalidInputException
     from duckdb import NotImplementedException as DuckDBNotImplementedException
+    from duckdb import OutOfRangeException as DuckDBOutOfRangeException
     from duckdb import ParserException as DuckDBParserException
 except ImportError:
     DuckDBConversionException = (
         DuckDBInvalidInputException
-    ) = DuckDBParserException = DuckDBNotImplementedException = None
+    ) = (
+        DuckDBParserException
+    ) = (
+        DuckDBNotImplementedException
+    ) = DuckDBBinderException = DuckDBOutOfRangeException = None
 
 try:
     from clickhouse_connect.driver.exceptions import (

diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py
@@ -17,6 +17,7 @@
 from ibis import literal as L
 from ibis.backends.tests.errors import (
     ArrowNotImplementedError,
+    DuckDBOutOfRangeException,
     DuckDBParserException,
     ExaQueryError,
     GoogleBadRequest,
@@ -825,18 +826,46 @@ def test_trig_functions_literals(con, expr, expected):
         param(_.dc.acos(), np.arccos, id="acos"),
         param(_.dc.asin(), np.arcsin, id="asin"),
         param(_.dc.atan(), np.arctan, id="atan"),
-        param(_.dc.atan2(_.dc), lambda c: np.arctan2(c, c), id="atan2"),
+        param(
+            _.dc.atan2(_.dc),
+            lambda c: np.arctan2(c, c),
+            id="atan2",
+            marks=[
+                pytest.mark.notyet(
+                    ["mssql", "exasol"], raises=(PyODBCProgrammingError, ExaQueryError)
+                )
+            ],
+        ),
         param(_.dc.cos(), np.cos, id="cos"),
-        param(_.dc.cot(), lambda c: 1.0 / np.tan(c), id="cot"),
         param(_.dc.sin(), np.sin, id="sin"),
         param(_.dc.tan(), np.tan, id="tan"),
     ],
 )
 def test_trig_functions_columns(backend, expr, alltypes, df, expected_fn):
     dc_max = df.double_col.max()
-    expr = alltypes.mutate(dc=(_.double_col / dc_max).nullif(0)).select(tmp=expr)
+    expr = alltypes.mutate(dc=_.double_col / dc_max).select(tmp=expr)
+    result = expr.tmp.to_pandas()
+    expected = expected_fn(df.double_col / dc_max).rename("tmp")
+    backend.assert_series_equal(result, expected)
+
+
+@pytest.mark.notyet(
+    ["mssql", "mysql", "duckdb", "exasol"],
+    raises=(
+        PyODBCProgrammingError,
+        MySQLOperationalError,
+        DuckDBOutOfRangeException,
+        ExaQueryError,
+    ),
+)
+@pytest.mark.broken(
+    ["sqlite", "impala"], raises=AssertionError, reason="behavior doesn't match numpy"
+)
+def test_cotangent(backend, alltypes, df):
+    dc_max = df.double_col.max()
+    expr = alltypes.select(tmp=(_.double_col / dc_max).cot())
     result = expr.tmp.to_pandas()
-    expected = expected_fn((df.double_col / dc_max).replace(0.0, np.nan)).rename("tmp")
+    expected = 1.0 / np.tan(df.double_col / dc_max).rename("tmp")
     backend.assert_series_equal(result, expected)
 
 

diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py
@@ -20,6 +20,7 @@
 from ibis.backends.tests.errors import (
     ArrowInvalid,
     ClickHouseDatabaseError,
+    DuckDBBinderException,
     DuckDBInvalidInputException,
     ExaQueryError,
     GoogleBadRequest,
@@ -1131,69 +1132,33 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name):
     backend.assert_frame_equal(result, expected)
 
 
+no_mixed_timestamp_comparisons = [
+    pytest.mark.notimpl(
+        ["dask"],
+        raises=ValueError,
+        reason="Metadata inference failed in `gt`.",
+    ),
+    pytest.mark.notimpl(
+        ["pandas"],
+        raises=TypeError,
+        reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
+    ),
+    pytest.mark.never(
+        ["duckdb"],
+        raises=DuckDBBinderException,
+        # perhaps we should consider disallowing this in ibis as well
+        reason="DuckDB doesn't allow comparing timestamp with and without timezones",
+    ),
+]
+
+
 @pytest.mark.parametrize(
     "func_name",
     [
-        param(
-            "gt",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `gt`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "ge",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `ge`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "lt",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `lt`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
-        param(
-            "le",
-            marks=[
-                pytest.mark.notimpl(
-                    ["dask"],
-                    raises=ValueError,
-                    reason="Metadata inference failed in `le`.",
-                ),
-                pytest.mark.notimpl(
-                    ["pandas"],
-                    raises=TypeError,
-                    reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
-                ),
-            ],
-        ),
+        param("gt", marks=no_mixed_timestamp_comparisons),
+        param("ge", marks=no_mixed_timestamp_comparisons),
+        param("lt", marks=no_mixed_timestamp_comparisons),
+        param("le", marks=no_mixed_timestamp_comparisons),
         "eq",
         "ne",
     ],

diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py
@@ -1172,7 +1172,6 @@ def test_range_expression_bounds(backend):
     raises=PsycoPg2InternalError,
     reason="Feature is not yet implemented: Unrecognized window function: percent_rank",
 )
-@pytest.mark.broken(["dask"], reason="different result ordering", raises=AssertionError)
 def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df):
     # GH #7631
     t = alltypes
@@ -1188,7 +1187,9 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df):
         .rename(expr.get_name())
     )
 
-    backend.assert_series_equal(result, expected)
+    backend.assert_series_equal(
+        result.value_counts().sort_index(), expected.value_counts().sort_index()
+    )
 
 
 @pytest.mark.notyet(

diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py
@@ -1217,21 +1217,19 @@ def as_scalar(self):
         Examples
         --------
         >>> import ibis
-        >>>
         >>> ibis.options.interactive = True
-        >>>
         >>> t = ibis.examples.penguins.fetch()
         >>> max_gentoo_weight = t.filter(t.species == "Gentoo").body_mass_g.max()
         >>> light_penguins = t.filter(t.body_mass_g < max_gentoo_weight / 2)
-        >>> light_penguins.group_by("species").count()
-        ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-        ┃ species   ┃ CountStar() ┃
-        ┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-        │ string    │ int64       │
-        ├───────────┼─────────────┤
-        │ Adelie    │          15 │
-        │ Chinstrap │           2 │
-        └───────────┴─────────────┘
+        >>> light_penguins.species.value_counts().order_by(ibis.desc("species_count"))
+        ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+        ┃ species   ┃ species_count ┃
+        ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+        │ string    │ int64         │
+        ├───────────┼───────────────┤
+        │ Adelie    │            15 │
+        │ Chinstrap │             2 │
+        └───────────┴───────────────┘
         """
         parents = self.op().relations
         if parents:
@@ -1350,21 +1348,19 @@ def as_scalar(self) -> Scalar:
         Examples
         --------
         >>> import ibis
-        >>>
         >>> ibis.options.interactive = True
-        >>>
         >>> t = ibis.examples.penguins.fetch()
         >>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
         >>> from_that_island = t.filter(t.island == heavy_gentoo.island.as_scalar())
-        >>> from_that_island.group_by("species").count()
-        ┏━━━━━━━━━┳━━━━━━━━━━━━━┓
-        ┃ species ┃ CountStar() ┃
-        ┡━━━━━━━━━╇━━━━━━━━━━━━━┩
-        │ string  │ int64       │
-        ├─────────┼─────────────┤
-        │ Adelie  │          44 │
-        │ Gentoo  │         124 │
-        └─────────┴─────────────┘
+        >>> from_that_island.species.value_counts().order_by(ibis.desc("species_count"))
+        ┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+        ┃ species ┃ species_count ┃
+        ┡━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+        │ string  │ int64         │
+        ├─────────┼───────────────┤
+        │ Gentoo  │           124 │
+        │ Adelie  │            44 │
+        └─────────┴───────────────┘
         """
         return self.as_table().as_scalar()
 

diff --git a/ibis/expr/types/numeric.py b/ibis/expr/types/numeric.py
@@ -594,16 +594,16 @@ def cot(self) -> NumericValue:
         --------
         >>> import ibis
         >>> ibis.options.interactive = True
-        >>> t = ibis.memtable({"values": [-1, 0, 1]})
+        >>> t = ibis.memtable({"values": [-1, -2, 3]})
         >>> t.values.cot()
         ┏━━━━━━━━━━━━━┓
         ┃ Cot(values) ┃
         ┡━━━━━━━━━━━━━┩
         │ float64     │
         ├─────────────┤
         │   -0.642093 │
-        │         inf │
-        │    0.642093 │
+        │    0.457658 │
+        │   -7.015253 │
         └─────────────┘
         """
         return ops.Cot(self).to_expr()

diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
@@ -2982,17 +2982,17 @@ def join(
         rated a movie:
 
         >>> tags.join(ratings, ["userId", "movieId"]).head(5)
-        ┏━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┓
-        ┃ userId ┃ movieId ┃ tag             ┃ timestamp  ┃ rating  ┃
-        ┡━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━┩
-        │ int64  │ int64   │ string          │ int64      │ float64 │
-        ├────────┼─────────┼─────────────────┼────────────┼─────────┤
-        │      2 │   60756 │ will ferrell    │ 1445714992 │     5.0 │
-        │      2 │   89774 │ Tom Hardy       │ 1445715205 │     5.0 │
-        │      2 │  106782 │ Martin Scorsese │ 1445715056 │     5.0 │
-        │      7 │   48516 │ way too long    │ 1169687325 │     1.0 │
-        │     18 │     431 │ mafia           │ 1462138755 │     4.0 │
-        └────────┴─────────┴─────────────────┴────────────┴─────────┘
+        ┏━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┓
+        ┃ userId ┃ movieId ┃ tag            ┃ timestamp  ┃ rating  ┃
+        ┡━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━┩
+        │ int64  │ int64   │ string         │ int64      │ float64 │
+        ├────────┼─────────┼────────────────┼────────────┼─────────┤
+        │     62 │       2 │ Robin Williams │ 1528843907 │     4.0 │
+        │     62 │     110 │ sword fight    │ 1528152535 │     4.5 │
+        │     62 │     410 │ gothic         │ 1525636609 │     4.5 │
+        │     62 │    2023 │ mafia          │ 1525636733 │     5.0 │
+        │     62 │    2124 │ quirky         │ 1525636846 │     5.0 │
+        └────────┴─────────┴────────────────┴────────────┴─────────┘
 
         To self-join a table with itself, you need to call
         `.view()` on one of the arguments so the two tables
@@ -3023,11 +3023,11 @@ def join(
         ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
         │ int64   │ string            │ int64         │ string            │
         ├─────────┼───────────────────┼───────────────┼───────────────────┤
-        │   60756 │ funny             │          1732 │ funny             │
-        │   60756 │ Highly quotable   │          1732 │ Highly quotable   │
-        │   89774 │ Tom Hardy         │        139385 │ tom hardy         │
-        │  106782 │ drugs             │          1732 │ drugs             │
-        │  106782 │ Leonardo DiCaprio │          5989 │ Leonardo DiCaprio │
+        │    1732 │ funny             │         60756 │ funny             │
+        │    1732 │ Highly quotable   │         60756 │ Highly quotable   │
+        │  139385 │ tom hardy         │         89774 │ Tom Hardy         │
+        │    1732 │ drugs             │        106782 │ drugs             │
+        │    5989 │ Leonardo DiCaprio │        106782 │ Leonardo DiCaprio │
         └─────────┴───────────────────┴───────────────┴───────────────────┘
         """
         from ibis.expr.types.joins import Join