ibis-project · kszucs · Oct 12, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -230,6 +230,21 @@ quartodoc:
             - name: row_number
               dynamic: true
               signature_name: full
+            - name: rank
+              dynamic: true
+              signature_name: full
+            - name: dense_rank
+              dynamic: true
+              signature_name: full
+            - name: percent_rank
+              dynamic: true
+              signature_name: full
+            - name: cume_dist
+              dynamic: true
+              signature_name: full
+            - name: ntile
+              dynamic: true
+              signature_name: full
             - name: window
               dynamic: true
               signature_name: full

diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py
@@ -675,10 +675,10 @@ class array_filter(FunctionElement):
     ops.FirstValue: unary(sa.func.first_value),
     ops.LastValue: unary(sa.func.last_value),
     ops.RowNumber: fixed_arity(sa.func.row_number, 0),
-    ops.DenseRank: unary(lambda _: sa.func.dense_rank()),
-    ops.MinRank: unary(lambda _: sa.func.rank()),
-    ops.PercentRank: unary(lambda _: sa.func.percent_rank()),
-    ops.CumeDist: unary(lambda _: sa.func.cume_dist()),
+    ops.DenseRank: fixed_arity(sa.func.dense_rank, 0),
+    ops.MinRank: fixed_arity(sa.func.rank, 0),
+    ops.PercentRank: fixed_arity(sa.func.percent_rank, 0),
+    ops.CumeDist: fixed_arity(sa.func.cume_dist, 0),
     ops.NthValue: _nth_value,
     ops.WindowFunction: _window_function,
 }

diff --git a/ibis/backends/dask/tests/execution/test_window.py b/ibis/backends/dask/tests/execution/test_window.py
@@ -200,8 +200,8 @@ def test_batting_avg_change_in_games_per_year(players, players_df):
 
 
 @pytest.mark.xfail(
-    raises=NotImplementedError,
-    reason="Grouped and order windows not supported yet",
+    raises=AssertionError,
+    reason="Dask doesn't support the `rank` method on SeriesGroupBy",
 )
 def test_batting_most_hits(players, players_df):
     expr = players.mutate(

diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py
@@ -489,8 +489,6 @@ def _to_json_collection(t, op):
 
 
 _invalid_operations = {
-    # ibis.expr.operations.analytic
-    ops.NTile,
     # ibis.expr.operations.strings
     ops.Translate,
 }

diff --git a/...backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql b/...backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql
@@ -1 +1 @@
-ntile(3)
+ntile(3) OVER (ORDER BY `double_col` ASC)
diff --git a/...s/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql b/...s/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql
@@ -1 +1 @@
-percent_rank()
+percent_rank() OVER (ORDER BY `double_col` ASC)
diff --git a/ibis/backends/pandas/execution/window.py b/ibis/backends/pandas/execution/window.py
@@ -254,10 +254,10 @@ def trim_window_result(data: pd.Series | pd.DataFrame, timecontext: TimeContext
     return indexed_subset[name]
 
 
-@execute_node.register(ops.WindowFunction, pd.Series)
+@execute_node.register(ops.WindowFunction, [pd.Series])
 def execute_window_op(
     op,
-    data,
+    *data,
     scope: Scope | None = None,
     timecontext: TimeContext | None = None,
     aggcontext=None,
@@ -485,33 +485,42 @@ def execute_series_group_by_last_value(op, data, aggcontext=None, **kwargs):
     return aggcontext.agg(data, lambda x: _getter(x, -1))
 
 
-@execute_node.register(ops.MinRank, (pd.Series, SeriesGroupBy))
-def execute_series_min_rank(op, data, **kwargs):
-    # TODO(phillipc): Handle ORDER BY
+@execute_node.register(ops.MinRank)
+def execute_series_min_rank(op, aggcontext=None, **kwargs):
+    (key,) = aggcontext.order_by
+    df = aggcontext.parent
+    data = df[key]
     return data.rank(method="min", ascending=True).astype("int64") - 1
 
 
-@execute_node.register(ops.DenseRank, (pd.Series, SeriesGroupBy))
-def execute_series_dense_rank(op, data, **kwargs):
-    # TODO(phillipc): Handle ORDER BY
+@execute_node.register(ops.DenseRank)
+def execute_series_dense_rank(op, aggcontext=None, **kwargs):
+    (key,) = aggcontext.order_by
+    df = aggcontext.parent
+    data = df[key]
     return data.rank(method="dense", ascending=True).astype("int64") - 1
 
 
-@execute_node.register(ops.PercentRank, SeriesGroupBy)
-def execute_series_group_by_percent_rank(op, data, **kwargs):
-    return (
-        data.rank(method="min", ascending=True)
-        .sub(1)
-        .div(data.transform("count").sub(1))
-    )
+@execute_node.register(ops.PercentRank)
+def execute_series_group_by_percent_rank(op, aggcontext=None, **kwargs):
+    (key,) = aggcontext.order_by
+    df = aggcontext.parent
+    data = df[key]
+
+    result = data.rank(method="min", ascending=True) - 1
 
+    if isinstance(data, SeriesGroupBy):
+        nrows = data.transform("count")
+    else:
+        nrows = len(data)
 
-@execute_node.register(ops.PercentRank, pd.Series)
-def execute_series_percent_rank(op, data, **kwargs):
-    # TODO(phillipc): Handle ORDER BY
-    return data.rank(method="min", ascending=True).sub(1).div(len(data) - 1)
+    result /= nrows - 1
+    return result
 
 
-@execute_node.register(ops.CumeDist, (pd.Series, SeriesGroupBy))
-def execute_series_group_by_cume_dist(op, data, **kwargs):
+@execute_node.register(ops.CumeDist)
+def execute_series_group_by_cume_dist(op, aggcontext=None, **kwargs):
+    (key,) = aggcontext.order_by
+    df = aggcontext.parent
+    data = df[key]
     return data.rank(method="min", ascending=True, pct=True)
diff --git a/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql b/ibis/backends/postgres/tests/snapshots/test_functions/test_analytic_functions/out.sql
@@ -0,0 +1,7 @@
+SELECT
+  RANK() OVER (ORDER BY t0.double_col ASC) - 1 AS rank,
+  DENSE_RANK() OVER (ORDER BY t0.double_col ASC) - 1 AS dense_rank,
+  CUME_DIST() OVER (ORDER BY t0.double_col ASC) AS cume_dist,
+  NTILE(7) OVER (ORDER BY t0.double_col ASC) - 1 AS ntile,
+  PERCENT_RANK() OVER (ORDER BY t0.double_col ASC) AS percent_rank
+FROM functional_alltypes AS t0
diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py
@@ -1110,40 +1110,15 @@ def test_identical_to(con, df):
     tm.assert_series_equal(result, expected)
 
 
-def test_rank(con):
-    t = con.table("functional_alltypes")
-    expr = t.double_col.rank().name("rank")
-    sqla_expr = expr.compile()
-    result = str(sqla_expr.compile(compile_kwargs={"literal_binds": True}))
-    expected = (
-        "SELECT rank() OVER (ORDER BY t0.double_col) - 1 AS rank \n"
-        "FROM functional_alltypes AS t0"
-    )
-    assert result == expected
-
-
-def test_percent_rank(con):
-    t = con.table("functional_alltypes")
-    expr = t.double_col.percent_rank().name("percent_rank")
-    sqla_expr = expr.compile()
-    result = str(sqla_expr.compile(compile_kwargs={"literal_binds": True}))
-    expected = (
-        "SELECT percent_rank() OVER (ORDER BY t0.double_col) AS "
-        "percent_rank \nFROM functional_alltypes AS t0"
+def test_analytic_functions(alltypes, snapshot):
+    expr = alltypes.select(
+        rank=alltypes.double_col.rank(),
+        dense_rank=alltypes.double_col.dense_rank(),
+        cume_dist=alltypes.double_col.cume_dist(),
+        ntile=alltypes.double_col.ntile(7),
+        percent_rank=alltypes.double_col.percent_rank(),
     )
-    assert result == expected
-
-
-def test_ntile(con):
-    t = con.table("functional_alltypes")
-    expr = t.double_col.ntile(7).name("result")
-    sqla_expr = expr.compile()
-    result = str(sqla_expr.compile(compile_kwargs={"literal_binds": True}))
-    expected = (
-        "SELECT ntile(7) OVER (ORDER BY t0.double_col) - 1 AS result \n"
-        "FROM functional_alltypes AS t0"
-    )
-    assert result == expected
+    snapshot.assert_match(str(ibis.to_sql(expr)), "out.sql")
 
 
 @pytest.mark.parametrize("opname", ["invert", "neg"])

diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py
@@ -157,9 +157,8 @@ def calc_zscore(s):
             lambda t: t.cumcount(),
             id="row_number",
             marks=[
-                pytest.mark.notimpl(
-                    ["dask", "pandas"], raises=com.OperationNotDefinedError
-                )
+                pytest.mark.notimpl(["dask"], raises=NotImplementedError),
+                pytest.mark.notimpl(["pandas"], raises=com.OperationNotDefinedError),
             ],
         ),
         param(
@@ -891,9 +890,9 @@ def gb_fn(df):
 
 
 @pytest.mark.notimpl(
-    ["clickhouse", "dask", "datafusion", "polars"],
-    raises=com.OperationNotDefinedError,
+    ["clickhouse", "datafusion", "polars"], raises=com.OperationNotDefinedError
 )
+@pytest.mark.notimpl(["dask"], raises=AttributeError)
 @pytest.mark.notimpl(["pyspark"], raises=AnalysisException)
 @pytest.mark.notyet(
     ["clickhouse"],
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ntile(3)
		ntile(3) OVER (ORDER BY `double_col` ASC)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		percent_rank()
		percent_rank() OVER (ORDER BY `double_col` ASC)