Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove ibis NA #9323

Closed
wants to merge 53 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
609e60b
refactor: deprecate fillna and dropna api
ncclementi Jun 3, 2024
4e39c60
chore: update snapshot tests to use fillnull
ncclementi Jun 3, 2024
4961302
chore: fix test_grouped_case to use dropnull
ncclementi Jun 3, 2024
ce14460
chore: fix parent function call
ncclementi Jun 4, 2024
bb138bb
chore: make sure test has def backend
ncclementi Jun 4, 2024
cad47e9
chore: fix docts examples to use dropnull
ncclementi Jun 4, 2024
33b3f18
chore: remove execution test
ncclementi Jun 4, 2024
3404252
test: move deprecationcheck out of backends
ncclementi Jun 4, 2024
e641be6
chore: consistent name
ncclementi Jun 4, 2024
a4c5cd3
test: add checkof deprec warn when usingfillna
ncclementi Jun 4, 2024
c8eac3c
chore: fix typo
ncclementi Jun 4, 2024
5a0495e
chore: deprecate Value.fillna add fillnull
ncclementi Jun 4, 2024
a417ade
chore: update test_function to use fillnull
ncclementi Jun 4, 2024
9f227a9
chore: update test_select to use fillnull
ncclementi Jun 4, 2024
2d3144e
chore: update some impala tests to use fillnull
ncclementi Jun 4, 2024
4b3634e
chore: update test_join to use fillnull
ncclementi Jun 4, 2024
22b5617
chore: use fillnull in postgres tests when applies
ncclementi Jun 5, 2024
ee32a34
chore: update risingwave test_function to use fillnull
ncclementi Jun 5, 2024
5a8047c
chore: fix typo
ncclementi Jun 5, 2024
9a2bf8b
chore: update test_generic to use fillnull
ncclementi Jun 5, 2024
ffff32b
chore: fix Value.coalesce docs
ncclementi Jun 5, 2024
88edd57
chore: replace filna for fillnull
ncclementi Jun 5, 2024
cd4db5b
chore: fixz docstrings and err msg
ncclementi Jun 5, 2024
e6d5c34
chore: update benchfuncs to use fillnull
ncclementi Jun 5, 2024
acdee1d
chore: update test_decimal to use fillnull
ncclementi Jun 5, 2024
56df3ce
chore: update tes_sql_builtins to use fillnull
ncclementi Jun 5, 2024
05ab774
chore: update test_table to use fillnull
ncclementi Jun 5, 2024
4c86473
chore: update test_value_expr to use fillnull
ncclementi Jun 5, 2024
fc79809
docs: update docs where fillna was used and replaced for fillnull
ncclementi Jun 5, 2024
e388f15
chore: update impala test_cas_exprs tests to use fillnull
ncclementi Jun 5, 2024
d89b589
chore: update impala test_unary_builtins to use fillnull
ncclementi Jun 5, 2024
d1c2ced
chore: fix docstring
ncclementi Jun 5, 2024
89935b9
chore: self.dropnull and self.fillnull in depr methods
ncclementi Jun 5, 2024
7768dbe
chore: use drop_null instead of dropnull
ncclementi Jun 7, 2024
21d1c73
chore: replace fillnull for fill_null
ncclementi Jun 7, 2024
a86d06f
chore: change version in deprcation warns
ncclementi Jun 7, 2024
83b7457
refactor: remove ibis.NA
ncclementi Jun 6, 2024
5093c95
chore: replace ibis.NA for ibis.null() in docs
ncclementi Jun 6, 2024
06a2ff1
chore: fix merge conflict
ncclementi Jun 7, 2024
8ef4637
chore: update dask tests to use ibis.null()
ncclementi Jun 6, 2024
ee3c232
chore: update impala tests to use ibis.null()
ncclementi Jun 6, 2024
e295d6b
chore: update pandas tests to use ibis.null()
ncclementi Jun 6, 2024
da4c9bf
chore: fixe merge conflict
ncclementi Jun 7, 2024
04c4c8b
chore: fix conflict
ncclementi Jun 7, 2024
0bc3285
chore: update sqlite tests to use ibis.null()
ncclementi Jun 6, 2024
7e2a839
chore: fix conflict
ncclementi Jun 7, 2024
b726b7a
chore: fix conflict
ncclementi Jun 7, 2024
5d3fd20
chore: removes ibis.NA from api docs
ncclementi Jun 6, 2024
4270b32
chore: fix conflict
ncclementi Jun 7, 2024
ee40d9d
docs: update freeze of post
ncclementi Jun 7, 2024
9db3bf5
chore: add NA and deprecation warn to getattr
ncclementi Jun 7, 2024
64b98e5
test: add test for ibis.NA depr warn
ncclementi Jun 7, 2024
224e4ff
chore: avoid execution in test_ibis_na_deprecation_warning
ncclementi Jun 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

4 changes: 0 additions & 4 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,6 @@ quartodoc:
- name: param
dynamic: true
signature_name: full
- name: NA
# Ideally exposed under `ibis` but that doesn't seem to work??
package: ibis.expr.api
signature_name: full
- name: "null"
dynamic: true
signature_name: full
Expand Down
2 changes: 1 addition & 1 deletion docs/how-to/timeseries/sessionize.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ sessionized = (
data
# Create a session id for each character by using a cumulative sum
# over the `new_session` column.
.mutate(new_session=is_new_session.fillna(True))
.mutate(new_session=is_new_session.fill_null(True))
# Create a session id for each character by using a cumulative sum
# over the `new_session` column.
.mutate(session_id=c.new_session.sum().over(entity_window))
Expand Down
2 changes: 1 addition & 1 deletion docs/posts/campaign-finance/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def get_election_type(pgi: StringValue) -> StringValue:
"E": "recount",
}
first_letter = pgi[0]
return first_letter.substitute(election_types, else_=ibis.NA)
return first_letter.substitute(election_types, else_=ibis.null())


cleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(
Expand Down
2 changes: 1 addition & 1 deletion docs/posts/ibis-analytics/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -1220,7 +1220,7 @@ def transform_downloads(extract_downloads):
)
.order_by(ibis._.timestamp.desc())
)
downloads = downloads.mutate(ibis._["python"].fillna("").name("python_full"))
downloads = downloads.mutate(ibis._["python"].fill_null("").name("python_full"))
downloads = downloads.mutate(
f.clean_version(downloads["python_full"], patch=False).name("python")
)
Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/ibis-for-pandas-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ represented by `NaN`. This can be confusing when working with numeric data,
since `NaN` is also a valid floating point value (along with `+/-inf`).

In Ibis, we try to be more precise: All data types are nullable, and we use
`ibis.NA` to represent `NULL` values, and all datatypes have a `.isnull()` method.
`ibis.null()` to represent `NULL` values, and all datatypes have a `.isnull()` method.
For floating point values, we use different values for `NaN` and `+/-inf`, and there
are the additional methods `.isnan()` and `.isinf()`.

Expand All @@ -532,17 +532,17 @@ the column name for the value to apply to.


```{python}
no_null_peng = penguins.fillna(dict(bill_depth_mm=0, bill_length_mm=0))
no_null_peng = penguins.fill_null(dict(bill_depth_mm=0, bill_length_mm=0))
```

### Replacing `NULL`s

Both pandas and Ibis have `fillna` methods which allow you to specify a replacement value
The Ibis equivalent of pandas `fillna` is `filnull`, this method allows you to specify a replacement value
for `NULL` values.


```{python}
bill_length_no_nulls = penguins.bill_length_mm.fillna(0)
bill_length_no_nulls = penguins.bill_length_mm.fill_null(0)
```

## Type casts
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/ibis-for-sql-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -522,10 +522,10 @@ ibis.to_sql(expr)

### Using `NULL` in expressions

To use `NULL` in an expression, either use the special `ibis.NA` value:
To use `NULL` in an expression, use `ibis.null()`:

```{python}
pos_two = (t.two > 0).ifelse(t.two, ibis.NA)
pos_two = (t.two > 0).ifelse(t.two, ibis.null())
expr = t.mutate(two_positive=pos_two)
ibis.to_sql(expr)
```
Expand Down
20 changes: 19 additions & 1 deletion ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

__version__ = "9.0.0"

import warnings
from typing import Any

from ibis import examples, util
from ibis.backends import BaseBackend
from ibis.common.exceptions import IbisError
Expand Down Expand Up @@ -36,7 +39,7 @@ def __dir__() -> list[str]:
return sorted(out)


def __getattr__(name: str) -> BaseBackend:
def load_backend(name: str) -> BaseBackend:
"""Load backends in a lazy way with `ibis.<backend-name>`.

This also registers the backend options.
Expand Down Expand Up @@ -125,3 +128,18 @@ def connect(*args, **kwargs):
setattr(proxy, name, getattr(backend, name))

return proxy


def __getattr__(name: str) -> Any:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we already had a __getattr__ I went this route to keep the typing for the load_backend portion. Once we deprecate NA completely we can remove the workaround

if name == "NA":
warnings.warn(
"Accessing 'ibis.NA' is deprecated as of v9.1 and will be removed in a future version. "
"Use 'ibis.null()' instead.",
Comment on lines +136 to +137
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Accessing 'ibis.NA' is deprecated as of v9.1 and will be removed in a future version. "
"Use 'ibis.null()' instead.",
"The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
"version. Use 'ibis.null()' instead.",

DeprecationWarning,
stacklevel=2,
)
import ibis

return ibis.null()
Comment on lines +141 to +143
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should work instead, since null is already imported above:

Suggested change
import ibis
return ibis.null()
return null()

else:
return load_backend(name)
14 changes: 7 additions & 7 deletions ibis/backends/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def test_isnull_notnull(con, expr, expected):
("expr", "expected"),
[
(ibis.coalesce(5, None, 4), 5),
(ibis.coalesce(ibis.NA, 4, ibis.NA), 4),
(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14),
(ibis.coalesce(ibis.null(), 4, ibis.null()), 4),
(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -127,13 +127,13 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
(ibis.NA.fillna(5), 5),
(L(5).fillna(10), 5),
(ibis.null().fill_null(5), 5),
(L(5).fill_null(10), 5),
(L(5).nullif(5), None),
(L(10).nullif(5), 10),
],
)
def test_fillna_nullif(con, expr, expected):
def test_fill_null_nullif(con, expr, expected):
result = con.execute(expr)
if expected is None:
assert pd.isnull(result)
Expand All @@ -150,7 +150,7 @@ def test_fillna_nullif(con, expr, expected):
(L(datetime(2015, 9, 1, hour=14, minute=48, second=5)), "DateTime"),
(L(date(2015, 9, 1)), "Date"),
param(
ibis.NA,
ibis.null(),
"Null",
marks=pytest.mark.xfail(
raises=AssertionError,
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_numeric_builtins_work(alltypes, df):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
expected = pd.Series([None] * nrows, name="na_column")
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/clickhouse/tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def test_count_name(assert_sql):
t = ibis.table(dict(a="string", b="bool"), name="t")

expr = t.group_by(t.a).agg(
A=t.count(where=~t.b).fillna(0), B=t.count(where=t.b).fillna(0)
A=t.count(where=~t.b).fill_null(0), B=t.count(where=t.b).fill_null(0)
)
assert_sql(expr)

Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/dask/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def sort_kind():
return "mergesort"


default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
"range_offset",
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_lead(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(-row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -59,7 +59,7 @@ def test_lag(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -78,7 +78,7 @@ def test_lead_delta(con, t, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -98,7 +98,7 @@ def test_lag_delta(t, con, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand Down
11 changes: 6 additions & 5 deletions ibis/backends/impala/tests/test_case_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,17 @@ def test_nullif_ifnull(tpch_lineitem, expr_fn, snapshot):
@pytest.mark.parametrize(
"expr_fn",
[
pytest.param(lambda t: t.l_quantity.fillna(0), id="fillna_l_quantity"),
pytest.param(lambda t: t.l_quantity.fill_null(0), id="fill_null_l_quantity"),
pytest.param(
lambda t: t.l_extendedprice.fillna(0), id="fillna_l_extendedprice"
lambda t: t.l_extendedprice.fill_null(0), id="fill_null_l_extendedprice"
),
pytest.param(
lambda t: t.l_extendedprice.fillna(0.0), id="fillna_l_extendedprice_double"
lambda t: t.l_extendedprice.fill_null(0.0),
id="fill_null_l_extendedprice_double",
),
],
)
def test_decimal_fillna_cast_arg(tpch_lineitem, expr_fn, snapshot):
def test_decimal_fill_null_cast_arg(tpch_lineitem, expr_fn, snapshot):
expr = expr_fn(tpch_lineitem)
result = translate(expr)
snapshot.assert_match(result, "out.sql")
Expand All @@ -99,6 +100,6 @@ def test_identical_to(mockcon, snapshot):


def test_identical_to_special_case(snapshot):
expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp")
expr = ibis.null().cast("int64").identical_to(ibis.null().cast("int64")).name("tmp")
result = ibis.to_sql(expr, dialect="impala")
snapshot.assert_match(result, "out.sql")
24 changes: 12 additions & 12 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def test_builtins(con, alltypes):
i4 % 10,
20 % i1,
d % 5,
i1.fillna(0),
i4.fillna(0),
i8.fillna(0),
i1.fill_null(0),
i4.fill_null(0),
i8.fill_null(0),
i4.to_timestamp("s"),
i4.to_timestamp("ms"),
i4.to_timestamp("us"),
Expand All @@ -65,7 +65,7 @@ def test_builtins(con, alltypes):
d.ceil(),
d.exp(),
d.isnull(),
d.fillna(0),
d.fill_null(0),
d.floor(),
d.log(),
d.ln(),
Expand Down Expand Up @@ -164,7 +164,7 @@ def _check_impala_output_types_match(con, table):
(5 / L(50).nullif(0), 0.1),
(5 / L(50).nullif(L(50000)), 0.1),
(5 / L(50000).nullif(0), 0.0001),
(L(50000).fillna(0), 50000),
(L(50000).fill_null(0), 50000),
],
)
def test_int_builtins(con, expr, expected):
Expand Down Expand Up @@ -257,13 +257,13 @@ def approx_equal(a, b, eps):
[
pytest.param(lambda dc: dc, "5.245", id="id"),
pytest.param(lambda dc: dc % 5, "0.245", id="mod"),
pytest.param(lambda dc: dc.fillna(0), "5.245", id="fillna"),
pytest.param(lambda dc: dc.fill_null(0), "5.245", id="fill_null"),
pytest.param(lambda dc: dc.exp(), "189.6158", id="exp"),
pytest.param(lambda dc: dc.log(), "1.65728", id="log"),
pytest.param(lambda dc: dc.log2(), "2.39094", id="log2"),
pytest.param(lambda dc: dc.log10(), "0.71975", id="log10"),
pytest.param(lambda dc: dc.sqrt(), "2.29019", id="sqrt"),
pytest.param(lambda dc: dc.fillna(0), "5.245", id="zero_ifnull"),
pytest.param(lambda dc: dc.fill_null(0), "5.245", id="zero_ifnull"),
pytest.param(lambda dc: -dc, "-5.245", id="neg"),
],
)
Expand Down Expand Up @@ -384,8 +384,8 @@ def test_decimal_timestamp_builtins(con):
dc * 2,
dc**2,
dc.cast("double"),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.NA),
dc.fillna(0),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.null()),
dc.fill_null(0),
ts < (ibis.now() + ibis.interval(months=3)),
ts < (ibis.timestamp("2005-01-01") + ibis.interval(months=3)),
# hashing
Expand Down Expand Up @@ -632,10 +632,10 @@ def test_unions_with_ctes(con, alltypes):
@pytest.mark.parametrize(
("left", "right", "expected"),
[
(ibis.NA.cast("int64"), ibis.NA.cast("int64"), True),
(ibis.null().cast("int64"), ibis.null().cast("int64"), True),
(L(1), L(1), True),
(ibis.NA.cast("int64"), L(1), False),
(L(1), ibis.NA.cast("int64"), False),
(ibis.null().cast("int64"), L(1), False),
(L(1), ibis.null().cast("int64"), False),
(L(0), L(1), False),
(L(1), L(0), False),
],
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/tests/test_unary_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def table(mockcon):
param(lambda x: x.log2(), id="log2"),
param(lambda x: x.log10(), id="log10"),
param(lambda x: x.nullif(0), id="nullif_zero"),
param(lambda x: x.fillna(0), id="zero_ifnull"),
param(lambda x: x.fill_null(0), id="zero_ifnull"),
],
)
@pytest.mark.parametrize("cname", ["double_col", "int_col"])
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/pandas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,15 +740,15 @@ def visit(cls, op: ops.Distinct, parent):
return parent.drop_duplicates()

@classmethod
def visit(cls, op: ops.DropNa, parent, how, subset):
def visit(cls, op: ops.DropNull, parent, how, subset):
if op.subset is not None:
subset = [col.name for col in op.subset]
else:
subset = None
return parent.dropna(how=how, subset=subset)

@classmethod
def visit(cls, op: ops.FillNa, parent, replacements):
def visit(cls, op: ops.FillNull, parent, replacements):
return parent.fillna(replacements)

@classmethod
Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/pandas/tests/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,10 +502,10 @@ def test_mutate_after_join():
.isnull()
.ifelse(joined["q_Order_Priority"], joined["p_Order_Priority"])
),
p_count=joined["p_count"].fillna(0),
q_count=joined["q_count"].fillna(0),
p_density=joined.p_density.fillna(1e-10),
q_density=joined.q_density.fillna(1e-10),
p_count=joined["p_count"].fill_null(0),
q_count=joined["q_count"].fill_null(0),
p_density=joined.p_density.fill_null(1e-10),
q_density=joined.q_density.fill_null(1e-10),
features=ibis.literal("Order_Priority"),
)

Expand Down
Loading