diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index de914d778993..b587a2e5abd9 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -993,7 +993,7 @@ def test_memtable_construct_from_polars(backend, con, lazy): ([("a", "1.0")], ["d", "e"], ["d", "e"]), ], ) -def test_memtable_column_naming(backend, con, monkeypatch, df, columns, expected): +def test_memtable_column_naming(con, monkeypatch, df, columns, expected): monkeypatch.setattr(ibis.options, "default_backend", con) t = ibis.memtable(df, columns=columns) @@ -1009,7 +1009,7 @@ def test_memtable_column_naming(backend, con, monkeypatch, df, columns, expected ([("a", "1.0")], ["d"]), ], ) -def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns): +def test_memtable_column_naming_mismatch(con, monkeypatch, df, columns): monkeypatch.setattr(ibis.options, "default_backend", con) with pytest.raises(ValueError): @@ -1874,9 +1874,7 @@ def test_isnull_equality(con, backend, monkeypatch): @pytest.mark.broken( ["druid"], raises=PyDruidProgrammingError, - reason=( - "Query could not be planned. SQL query requires ordering a table by time column" - ), + reason="Query could not be planned. SQL query requires ordering a table by time column", ) def test_subsequent_overlapping_order_by(con, backend, alltypes, df): ts = alltypes.order_by(ibis.desc("id")).order_by("id") @@ -1990,3 +1988,24 @@ def test_topk_counts_null(con): tkf = tk.filter(_.x.isnull())[1] result = con.to_pyarrow(tkf) assert result[0].as_py() == 1 + + +@pytest.mark.notyet( + "clickhouse", + raises=AssertionError, + reason="ClickHouse returns False for x.isin([None])", +) +@pytest.mark.notimpl( + ["pandas", "dask"], + raises=AssertionError, + reason="null isin semantics are not implemented for pandas or dask", +) +@pytest.mark.never( + "mssql", + raises=AssertionError, + reason="mssql doesn't support null isin semantics in a projection because there is no bool type", +) +def test_null_isin_null_is_null(con): + t = ibis.memtable({"x": [1]}) + expr = t.x.isin([None]) + assert pd.isna(con.to_pandas(expr).iat[0]) diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 571c43a92426..b9e26756a86f 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -500,6 +500,8 @@ def between( def isin(self, values: Value | Sequence[Value]) -> ir.BooleanValue: """Check whether this expression's values are in `values`. + `NULL` values are propagated in the output. See examples for details. + Parameters ---------- values @@ -569,6 +571,40 @@ def isin(self, values: Value | Sequence[Value]) -> ir.BooleanValue: │ True │ │ False │ └───────────────┘ + + `NULL` behavior + + >>> t = ibis.memtable({"x": [1, 2]}) + >>> t.x.isin([1, None]) + ┏━━━━━━━━━━━━━┓ + ┃ InValues(x) ┃ + ┡━━━━━━━━━━━━━┩ + │ boolean │ + ├─────────────┤ + │ True │ + │ NULL │ + └─────────────┘ + >>> t = ibis.memtable({"x": [1, None, 2]}) + >>> t.x.isin([1]) + ┏━━━━━━━━━━━━━┓ + ┃ InValues(x) ┃ + ┡━━━━━━━━━━━━━┩ + │ boolean │ + ├─────────────┤ + │ True │ + │ NULL │ + │ False │ + └─────────────┘ + >>> t.x.isin([3]) + ┏━━━━━━━━━━━━━┓ + ┃ InValues(x) ┃ + ┡━━━━━━━━━━━━━┩ + │ boolean │ + ├─────────────┤ + │ False │ + │ NULL │ + │ False │ + └─────────────┘ """ from ibis.expr.types import ArrayValue