ibis-project · ncclementi · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 4, 2024
diff --git a/docs/_freeze/posts/campaign-finance/index/execute-results/html.json b/docs/_freeze/posts/campaign-finance/index/execute-results/html.json
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -298,10 +298,6 @@ quartodoc:
             - name: param
               dynamic: true
               signature_name: full
-            - name: NA
-              # Ideally exposed under `ibis` but that doesn't seem to work??
-              package: ibis.expr.api
-              signature_name: full
             - name: "null"
               dynamic: true
               signature_name: full

diff --git a/docs/how-to/timeseries/sessionize.qmd b/docs/how-to/timeseries/sessionize.qmd
@@ -59,7 +59,7 @@ sessionized = (
     data
     # Create a session id for each character by using a cumulative sum
     # over the `new_session` column.
-    .mutate(new_session=is_new_session.fillna(True))
+    .mutate(new_session=is_new_session.fill_null(True))
     # Create a session id for each character by using a cumulative sum
     # over the `new_session` column.
     .mutate(session_id=c.new_session.sum().over(entity_window))

diff --git a/docs/posts/campaign-finance/index.qmd b/docs/posts/campaign-finance/index.qmd
@@ -245,7 +245,7 @@ def get_election_type(pgi: StringValue) -> StringValue:
         "E": "recount",
     }
     first_letter = pgi[0]
-    return first_letter.substitute(election_types, else_=ibis.NA)
+    return first_letter.substitute(election_types, else_=ibis.null())
 
 
 cleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(

diff --git a/docs/posts/ibis-analytics/index.qmd b/docs/posts/ibis-analytics/index.qmd
@@ -1220,7 +1220,7 @@ def transform_downloads(extract_downloads):
         )
         .order_by(ibis._.timestamp.desc())
     )
-    downloads = downloads.mutate(ibis._["python"].fillna("").name("python_full"))
+    downloads = downloads.mutate(ibis._["python"].fill_null("").name("python_full"))
     downloads = downloads.mutate(
         f.clean_version(downloads["python_full"], patch=False).name("python")
     )

diff --git a/docs/tutorials/ibis-for-pandas-users.qmd b/docs/tutorials/ibis-for-pandas-users.qmd
@@ -507,7 +507,7 @@ represented by `NaN`. This can be confusing when working with numeric data,
 since `NaN` is also a valid floating point value (along with `+/-inf`).
 
 In Ibis, we try to be more precise: All data types are nullable, and we use
-`ibis.NA` to represent `NULL` values, and all datatypes have a `.isnull()` method.
+`ibis.null()` to represent `NULL` values, and all datatypes have a `.isnull()` method.
 For floating point values, we use different values for `NaN` and `+/-inf`, and there
 are the additional methods `.isnan()` and `.isinf()`.
 
@@ -532,17 +532,17 @@ the column name for the value to apply to.
 
 
 ```{python}
-no_null_peng = penguins.fillna(dict(bill_depth_mm=0, bill_length_mm=0))
+no_null_peng = penguins.fill_null(dict(bill_depth_mm=0, bill_length_mm=0))
 ```
 
 ### Replacing `NULL`s
 
-Both pandas and Ibis have `fillna` methods which allow you to specify a replacement value
+The Ibis equivalent of pandas `fillna` is `filnull`, this method allows you to specify a replacement value
 for `NULL` values.
 
 
 ```{python}
-bill_length_no_nulls = penguins.bill_length_mm.fillna(0)
+bill_length_no_nulls = penguins.bill_length_mm.fill_null(0)
 ```
 
 ## Type casts

diff --git a/docs/tutorials/ibis-for-sql-users.qmd b/docs/tutorials/ibis-for-sql-users.qmd
@@ -522,10 +522,10 @@ ibis.to_sql(expr)
 
 ### Using `NULL` in expressions
 
-To use `NULL` in an expression, either use the special `ibis.NA` value:
+To use `NULL` in an expression, use `ibis.null()`:
 
 ```{python}
-pos_two = (t.two > 0).ifelse(t.two, ibis.NA)
+pos_two = (t.two > 0).ifelse(t.two, ibis.null())
 expr = t.mutate(two_positive=pos_two)
 ibis.to_sql(expr)
 ```

diff --git a/ibis/__init__.py b/ibis/__init__.py
@@ -4,6 +4,9 @@
 
 __version__ = "9.0.0"
 
+import warnings
+from typing import Any
+
 from ibis import examples, util
 from ibis.backends import BaseBackend
 from ibis.common.exceptions import IbisError
@@ -36,7 +39,7 @@ def __dir__() -> list[str]:
     return sorted(out)
 
 
-def __getattr__(name: str) -> BaseBackend:
+def load_backend(name: str) -> BaseBackend:
     """Load backends in a lazy way with `ibis.<backend-name>`.
 
     This also registers the backend options.
@@ -125,3 +128,18 @@ def connect(*args, **kwargs):
         setattr(proxy, name, getattr(backend, name))
 
     return proxy
+
+
+def __getattr__(name: str) -> Any:
+    if name == "NA":
+        warnings.warn(
+            "Accessing 'ibis.NA' is deprecated as of v9.1 and will be removed in a future version. "
+            "Use 'ibis.null()' instead.",
-            "Accessing 'ibis.NA' is deprecated as of v9.1 and will be removed in a future version. "
-            "Use 'ibis.null()' instead.",
+            "The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
+            "version. Use 'ibis.null()' instead.",
-            "Accessing 'ibis.NA' is deprecated as of v9.1 and will be removed in a future version. "
-            "Use 'ibis.null()' instead.",
+            "The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
+            "version. Use 'ibis.null()' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        import ibis
+
+        return ibis.null()
-        import ibis
-
-        return ibis.null()
+        return null()
-        import ibis
-
-        return ibis.null()
+        return null()
+    else:
+        return load_backend(name)
diff --git a/ibis/backends/clickhouse/tests/test_functions.py b/ibis/backends/clickhouse/tests/test_functions.py
@@ -116,8 +116,8 @@ def test_isnull_notnull(con, expr, expected):
     ("expr", "expected"),
     [
         (ibis.coalesce(5, None, 4), 5),
-        (ibis.coalesce(ibis.NA, 4, ibis.NA), 4),
-        (ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14),
+        (ibis.coalesce(ibis.null(), 4, ibis.null()), 4),
+        (ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14),
     ],
 )
 def test_coalesce(con, expr, expected):
@@ -127,13 +127,13 @@ def test_coalesce(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        (ibis.NA.fillna(5), 5),
-        (L(5).fillna(10), 5),
+        (ibis.null().fill_null(5), 5),
+        (L(5).fill_null(10), 5),
         (L(5).nullif(5), None),
         (L(10).nullif(5), 10),
     ],
 )
-def test_fillna_nullif(con, expr, expected):
+def test_fill_null_nullif(con, expr, expected):
     result = con.execute(expr)
     if expected is None:
         assert pd.isnull(result)
@@ -150,7 +150,7 @@ def test_fillna_nullif(con, expr, expected):
         (L(datetime(2015, 9, 1, hour=14, minute=48, second=5)), "DateTime"),
         (L(date(2015, 9, 1)), "Date"),
         param(
-            ibis.NA,
+            ibis.null(),
             "Null",
             marks=pytest.mark.xfail(
                 raises=AssertionError,
@@ -418,7 +418,7 @@ def test_numeric_builtins_work(alltypes, df):
 def test_null_column(alltypes):
     t = alltypes
     nrows = t.count().execute()
-    expr = t.mutate(na_column=ibis.NA).na_column
+    expr = t.mutate(na_column=ibis.null()).na_column
     result = expr.execute()
     expected = pd.Series([None] * nrows, name="na_column")
     tm.assert_series_equal(result, expected)

diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py
@@ -362,7 +362,7 @@ def test_count_name(assert_sql):
     t = ibis.table(dict(a="string", b="bool"), name="t")
 
     expr = t.group_by(t.a).agg(
-        A=t.count(where=~t.b).fillna(0), B=t.count(where=t.b).fillna(0)
+        A=t.count(where=~t.b).fill_null(0), B=t.count(where=t.b).fill_null(0)
     )
     assert_sql(expr)
 

diff --git a/ibis/backends/dask/tests/test_window.py b/ibis/backends/dask/tests/test_window.py
@@ -20,7 +20,7 @@ def sort_kind():
     return "mergesort"
 
 
-default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
+default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
 row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
 range_offset = pytest.mark.parametrize(
     "range_offset",
@@ -48,7 +48,7 @@ def test_lead(con, t, df, row_offset, default, row_window):
     expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(-row_offset)).compute()
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -59,7 +59,7 @@ def test_lag(con, t, df, row_offset, default, row_window):
     expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(row_offset)).compute()
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -78,7 +78,7 @@ def test_lead_delta(con, t, pandas_df, range_offset, default, range_window):
         .reindex(pandas_df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -98,7 +98,7 @@ def test_lag_delta(t, con, pandas_df, range_offset, default, range_window):
         .reindex(pandas_df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 

diff --git a/...a_cast_arg/fillna_l_extendedprice/out.sql → ...ast_arg/fill_null_l_extendedprice/out.sql b/...a_cast_arg/fillna_l_extendedprice/out.sql → ...ast_arg/fill_null_l_extendedprice/out.sql
diff --git a/...arg/fillna_l_extendedprice_double/out.sql → .../fill_null_l_extendedprice_double/out.sql b/...arg/fillna_l_extendedprice_double/out.sql → .../fill_null_l_extendedprice_double/out.sql
diff --git a/...fillna_cast_arg/fillna_l_quantity/out.sql → ...ull_cast_arg/fill_null_l_quantity/out.sql b/...fillna_cast_arg/fillna_l_quantity/out.sql → ...ull_cast_arg/fill_null_l_quantity/out.sql
diff --git a/ibis/backends/impala/tests/test_case_exprs.py b/ibis/backends/impala/tests/test_case_exprs.py
@@ -76,16 +76,17 @@ def test_nullif_ifnull(tpch_lineitem, expr_fn, snapshot):
 @pytest.mark.parametrize(
     "expr_fn",
     [
-        pytest.param(lambda t: t.l_quantity.fillna(0), id="fillna_l_quantity"),
+        pytest.param(lambda t: t.l_quantity.fill_null(0), id="fill_null_l_quantity"),
         pytest.param(
-            lambda t: t.l_extendedprice.fillna(0), id="fillna_l_extendedprice"
+            lambda t: t.l_extendedprice.fill_null(0), id="fill_null_l_extendedprice"
         ),
         pytest.param(
-            lambda t: t.l_extendedprice.fillna(0.0), id="fillna_l_extendedprice_double"
+            lambda t: t.l_extendedprice.fill_null(0.0),
+            id="fill_null_l_extendedprice_double",
         ),
     ],
 )
-def test_decimal_fillna_cast_arg(tpch_lineitem, expr_fn, snapshot):
+def test_decimal_fill_null_cast_arg(tpch_lineitem, expr_fn, snapshot):
     expr = expr_fn(tpch_lineitem)
     result = translate(expr)
     snapshot.assert_match(result, "out.sql")
@@ -99,6 +100,6 @@ def test_identical_to(mockcon, snapshot):
 
 
 def test_identical_to_special_case(snapshot):
-    expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp")
+    expr = ibis.null().cast("int64").identical_to(ibis.null().cast("int64")).name("tmp")
     result = ibis.to_sql(expr, dialect="impala")
     snapshot.assert_match(result, "out.sql")
diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py
@@ -52,9 +52,9 @@ def test_builtins(con, alltypes):
         i4 % 10,
         20 % i1,
         d % 5,
-        i1.fillna(0),
-        i4.fillna(0),
-        i8.fillna(0),
+        i1.fill_null(0),
+        i4.fill_null(0),
+        i8.fill_null(0),
         i4.to_timestamp("s"),
         i4.to_timestamp("ms"),
         i4.to_timestamp("us"),
@@ -65,7 +65,7 @@ def test_builtins(con, alltypes):
         d.ceil(),
         d.exp(),
         d.isnull(),
-        d.fillna(0),
+        d.fill_null(0),
         d.floor(),
         d.log(),
         d.ln(),
@@ -164,7 +164,7 @@ def _check_impala_output_types_match(con, table):
         (5 / L(50).nullif(0), 0.1),
         (5 / L(50).nullif(L(50000)), 0.1),
         (5 / L(50000).nullif(0), 0.0001),
-        (L(50000).fillna(0), 50000),
+        (L(50000).fill_null(0), 50000),
     ],
 )
 def test_int_builtins(con, expr, expected):
@@ -257,13 +257,13 @@ def approx_equal(a, b, eps):
     [
         pytest.param(lambda dc: dc, "5.245", id="id"),
         pytest.param(lambda dc: dc % 5, "0.245", id="mod"),
-        pytest.param(lambda dc: dc.fillna(0), "5.245", id="fillna"),
+        pytest.param(lambda dc: dc.fill_null(0), "5.245", id="fill_null"),
         pytest.param(lambda dc: dc.exp(), "189.6158", id="exp"),
         pytest.param(lambda dc: dc.log(), "1.65728", id="log"),
         pytest.param(lambda dc: dc.log2(), "2.39094", id="log2"),
         pytest.param(lambda dc: dc.log10(), "0.71975", id="log10"),
         pytest.param(lambda dc: dc.sqrt(), "2.29019", id="sqrt"),
-        pytest.param(lambda dc: dc.fillna(0), "5.245", id="zero_ifnull"),
+        pytest.param(lambda dc: dc.fill_null(0), "5.245", id="zero_ifnull"),
         pytest.param(lambda dc: -dc, "-5.245", id="neg"),
     ],
 )
@@ -384,8 +384,8 @@ def test_decimal_timestamp_builtins(con):
         dc * 2,
         dc**2,
         dc.cast("double"),
-        api.ifelse(table.l_discount > 0, dc * table.l_discount, api.NA),
-        dc.fillna(0),
+        api.ifelse(table.l_discount > 0, dc * table.l_discount, api.null()),
+        dc.fill_null(0),
         ts < (ibis.now() + ibis.interval(months=3)),
         ts < (ibis.timestamp("2005-01-01") + ibis.interval(months=3)),
         # hashing
@@ -632,10 +632,10 @@ def test_unions_with_ctes(con, alltypes):
 @pytest.mark.parametrize(
     ("left", "right", "expected"),
     [
-        (ibis.NA.cast("int64"), ibis.NA.cast("int64"), True),
+        (ibis.null().cast("int64"), ibis.null().cast("int64"), True),
         (L(1), L(1), True),
-        (ibis.NA.cast("int64"), L(1), False),
-        (L(1), ibis.NA.cast("int64"), False),
+        (ibis.null().cast("int64"), L(1), False),
+        (L(1), ibis.null().cast("int64"), False),
         (L(0), L(1), False),
         (L(1), L(0), False),
     ],

diff --git a/ibis/backends/impala/tests/test_unary_builtins.py b/ibis/backends/impala/tests/test_unary_builtins.py
@@ -29,7 +29,7 @@ def table(mockcon):
         param(lambda x: x.log2(), id="log2"),
         param(lambda x: x.log10(), id="log10"),
         param(lambda x: x.nullif(0), id="nullif_zero"),
-        param(lambda x: x.fillna(0), id="zero_ifnull"),
+        param(lambda x: x.fill_null(0), id="zero_ifnull"),
     ],
 )
 @pytest.mark.parametrize("cname", ["double_col", "int_col"])

diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py
@@ -740,15 +740,15 @@ def visit(cls, op: ops.Distinct, parent):
         return parent.drop_duplicates()
 
     @classmethod
-    def visit(cls, op: ops.DropNa, parent, how, subset):
+    def visit(cls, op: ops.DropNull, parent, how, subset):
         if op.subset is not None:
             subset = [col.name for col in op.subset]
         else:
             subset = None
         return parent.dropna(how=how, subset=subset)
 
     @classmethod
-    def visit(cls, op: ops.FillNa, parent, replacements):
+    def visit(cls, op: ops.FillNull, parent, replacements):
         return parent.fillna(replacements)
 
     @classmethod

diff --git a/ibis/backends/pandas/tests/test_join.py b/ibis/backends/pandas/tests/test_join.py
@@ -502,10 +502,10 @@ def test_mutate_after_join():
             .isnull()
             .ifelse(joined["q_Order_Priority"], joined["p_Order_Priority"])
         ),
-        p_count=joined["p_count"].fillna(0),
-        q_count=joined["q_count"].fillna(0),
-        p_density=joined.p_density.fillna(1e-10),
-        q_density=joined.q_density.fillna(1e-10),
+        p_count=joined["p_count"].fill_null(0),
+        q_count=joined["q_count"].fill_null(0),
+        p_density=joined.p_density.fill_null(1e-10),
+        q_density=joined.q_density.fill_null(1e-10),
         features=ibis.literal("Order_Priority"),
     )