Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove ibisNA #9344

Merged
merged 5 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

4 changes: 0 additions & 4 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,6 @@ quartodoc:
- name: param
dynamic: true
signature_name: full
- name: NA
# Ideally exposed under `ibis` but that doesn't seem to work??
package: ibis.expr.api
signature_name: full
- name: "null"
dynamic: true
signature_name: full
Expand Down
2 changes: 1 addition & 1 deletion docs/posts/campaign-finance/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def get_election_type(pgi: StringValue) -> StringValue:
"E": "recount",
}
first_letter = pgi[0]
return first_letter.substitute(election_types, else_=ibis.NA)
return first_letter.substitute(election_types, else_=ibis.null())


cleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/ibis-for-pandas-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ represented by `NaN`. This can be confusing when working with numeric data,
since `NaN` is also a valid floating point value (along with `+/-inf`).

In Ibis, we try to be more precise: All data types are nullable, and we use
`ibis.NA` to represent `NULL` values, and all datatypes have a `.isnull()` method.
`ibis.null()` to represent `NULL` values, and all datatypes have a `.isnull()` method.
For floating point values, we use different values for `NaN` and `+/-inf`, and there
are the additional methods `.isnan()` and `.isinf()`.

Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/ibis-for-sql-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -522,10 +522,10 @@ ibis.to_sql(expr)

### Using `NULL` in expressions

To use `NULL` in an expression, either use the special `ibis.NA` value:
To use `NULL` in an expression, use `ibis.null()` value:

```{python}
pos_two = (t.two > 0).ifelse(t.two, ibis.NA)
pos_two = (t.two > 0).ifelse(t.two, ibis.null())
expr = t.mutate(two_positive=pos_two)
ibis.to_sql(expr)
```
Expand Down
20 changes: 19 additions & 1 deletion ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

__version__ = "9.0.0"

import warnings
from typing import Any

from ibis import examples, util
from ibis.backends import BaseBackend
from ibis.common.exceptions import IbisError
Expand Down Expand Up @@ -36,7 +39,7 @@ def __dir__() -> list[str]:
return sorted(out)


def __getattr__(name: str) -> BaseBackend:
def load_backend(name: str) -> BaseBackend:
"""Load backends in a lazy way with `ibis.<backend-name>`.

This also registers the backend options.
Expand All @@ -52,6 +55,7 @@ def __getattr__(name: str) -> BaseBackend:
attribute is "cached", so this function is only called the first time.

"""

entry_points = {ep for ep in util.backend_entry_points() if ep.name == name}

if not entry_points:
Expand Down Expand Up @@ -125,3 +129,17 @@ def connect(*args, **kwargs):
setattr(proxy, name, getattr(backend, name))

return proxy


def __getattr__(name: str) -> Any:
if name == "NA":
warnings.warn(
"The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
"version. Use 'ibis.null()' instead.",
DeprecationWarning,
stacklevel=2,
)

return null() # noqa: F405
else:
return load_backend(name)
10 changes: 5 additions & 5 deletions ibis/backends/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def test_isnull_notnull(con, expr, expected):
("expr", "expected"),
[
(ibis.coalesce(5, None, 4), 5),
(ibis.coalesce(ibis.NA, 4, ibis.NA), 4),
(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14),
(ibis.coalesce(ibis.null(), 4, ibis.null()), 4),
(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -127,7 +127,7 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
(ibis.NA.fill_null(5), 5),
(ibis.null().fill_null(5), 5),
(L(5).fill_null(10), 5),
(L(5).nullif(5), None),
(L(10).nullif(5), 10),
Expand All @@ -150,7 +150,7 @@ def test_fill_null_nullif(con, expr, expected):
(L(datetime(2015, 9, 1, hour=14, minute=48, second=5)), "DateTime"),
(L(date(2015, 9, 1)), "Date"),
param(
ibis.NA,
ibis.null(),
"Null",
marks=pytest.mark.xfail(
raises=AssertionError,
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_numeric_builtins_work(alltypes, df):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
expected = pd.Series([None] * nrows, name="na_column")
tm.assert_series_equal(result, expected)
Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/dask/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def sort_kind():
return "mergesort"


default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
"range_offset",
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_lead(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(-row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -59,7 +59,7 @@ def test_lag(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -78,7 +78,7 @@ def test_lead_delta(con, t, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -98,7 +98,7 @@ def test_lag_delta(t, con, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/tests/test_case_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,6 @@ def test_identical_to(mockcon, snapshot):


def test_identical_to_special_case(snapshot):
expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp")
expr = ibis.null().cast("int64").identical_to(ibis.null().cast("int64")).name("tmp")
result = ibis.to_sql(expr, dialect="impala")
snapshot.assert_match(result, "out.sql")
8 changes: 4 additions & 4 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_decimal_timestamp_builtins(con):
dc * 2,
dc**2,
dc.cast("double"),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.NA),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.null()),
dc.fill_null(0),
ts < (ibis.now() + ibis.interval(months=3)),
ts < (ibis.timestamp("2005-01-01") + ibis.interval(months=3)),
Expand Down Expand Up @@ -632,10 +632,10 @@ def test_unions_with_ctes(con, alltypes):
@pytest.mark.parametrize(
("left", "right", "expected"),
[
(ibis.NA.cast("int64"), ibis.NA.cast("int64"), True),
(ibis.null().cast("int64"), ibis.null().cast("int64"), True),
(L(1), L(1), True),
(ibis.NA.cast("int64"), L(1), False),
(L(1), ibis.NA.cast("int64"), False),
(ibis.null().cast("int64"), L(1), False),
(L(1), ibis.null().cast("int64"), False),
(L(0), L(1), False),
(L(1), L(0), False),
],
Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def sort_kind():
return "mergesort"


default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
"range_offset",
Expand Down Expand Up @@ -49,7 +49,7 @@ def test_lead(t, df, row_offset, default, row_window):
expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(-row_offset))
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -61,7 +61,7 @@ def test_lag(t, df, row_offset, default, row_window):
expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(row_offset))
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -80,7 +80,7 @@ def test_lead_delta(t, df, range_offset, default, range_window):
.reindex(df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -100,7 +100,7 @@ def test_lag_delta(t, df, range_offset, default, range_window):
.reindex(df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand Down
22 changes: 11 additions & 11 deletions ibis/backends/postgres/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_strftime(con, pattern):
[
param(L("foo_bar"), "text", id="text"),
param(L(5), "integer", id="integer"),
param(ibis.NA, "null", id="null"),
param(ibis.null(), "null", id="null"),
# TODO(phillipc): should this really be double?
param(L(1.2345), "numeric", id="numeric"),
param(
Expand Down Expand Up @@ -335,7 +335,7 @@ def test_regexp_extract(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.NA.fill_null(5), 5, id="filled"),
param(ibis.null().fill_null(5), 5, id="filled"),
param(L(5).fill_null(10), 5, id="not_filled"),
param(L(5).nullif(5), None, id="nullif_null"),
param(L(10).nullif(5), 10, id="nullif_not_null"),
Expand All @@ -349,8 +349,8 @@ def test_fill_null_nullif(con, expr, expected):
("expr", "expected"),
[
param(ibis.coalesce(5, None, 4), 5, id="first"),
param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -360,12 +360,12 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
param(
ibis.coalesce(
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
),
None,
id="all_nulls_with_all_cast",
Expand All @@ -377,7 +377,7 @@ def test_coalesce_all_na(con, expr, expected):


def test_coalesce_all_na_double(con):
expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
assert np.isnan(con.execute(expr))


Expand Down Expand Up @@ -815,14 +815,14 @@ def test_first_last_value(alltypes, df, func, expected_index):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))


def test_null_column_union(alltypes, df):
t = alltypes
s = alltypes[["double_col"]].mutate(string_col=ibis.NA.cast("string"))
s = alltypes[["double_col"]].mutate(string_col=ibis.null().cast("string"))
expr = t[["double_col", "string_col"]].union(s)
result = expr.execute()
nrows = t.count().execute()
Expand Down
18 changes: 9 additions & 9 deletions ibis/backends/risingwave/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_regexp(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.NA.fill_null(5), 5, id="filled"),
param(ibis.null().fill_null(5), 5, id="filled"),
param(L(5).fill_null(10), 5, id="not_filled"),
param(L(5).nullif(5), None, id="nullif_null"),
param(L(10).nullif(5), 10, id="nullif_not_null"),
Expand All @@ -180,8 +180,8 @@ def test_fill_null_nullif(con, expr, expected):
("expr", "expected"),
[
param(ibis.coalesce(5, None, 4), 5, id="first"),
param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -191,12 +191,12 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
param(
ibis.coalesce(
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
),
None,
id="all_nulls_with_all_cast",
Expand All @@ -208,7 +208,7 @@ def test_coalesce_all_na(con, expr, expected):


def test_coalesce_all_na_double(con):
expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
assert np.isnan(con.execute(expr))


Expand Down Expand Up @@ -595,7 +595,7 @@ def test_first_last_value(alltypes, df, func, expected_index):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sqlite/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_builtin_agg_udf(con):
def total(x) -> float:
"""Totally total."""

expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.NA).n)
expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.null()).n)
result = con.execute(expr)
assert result == 0.0

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/sql/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_coalesce(functional_alltypes, snapshot):
d = functional_alltypes.double_col
f = functional_alltypes.float_col

expr = ibis.coalesce((d > 30).ifelse(d, ibis.NA), ibis.NA, f).name("tmp")
expr = ibis.coalesce((d > 30).ifelse(d, ibis.null()), ibis.null(), f).name("tmp")
snapshot.assert_match(to_sql(expr.name("tmp")), "out.sql")


Expand Down
Loading