diff --git a/ibis/backends/dask/executor.py b/ibis/backends/dask/executor.py index 58481ed654c9..921985a833c7 100644 --- a/ibis/backends/dask/executor.py +++ b/ibis/backends/dask/executor.py @@ -364,10 +364,25 @@ def visit(cls, op: ops.Sort, parent, keys): # 2. sort the dataframe using those columns # 3. drop the sort key columns ascending = [key.ascending for key in op.keys] + nulls_first = [key.nulls_first for key in op.keys] + + if all(nulls_first): + na_position = "first" + elif not any(nulls_first): + na_position = "last" + else: + raise ValueError( + "dask does not support specifying null ordering for individual columns" + ) + newcols = {gen_name("sort_key"): col for col in keys} names = list(newcols.keys()) df = parent.assign(**newcols) - df = df.sort_values(by=names, ascending=ascending) + df = df.sort_values( + by=names, + ascending=ascending, + na_position=na_position, + ) return df.drop(names, axis=1) @classmethod diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql index c6b1bb3235f9..2b5291f39629 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql @@ -1,3 +1,3 @@ SELECT - FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC NULLS LAST) AS `First(double_col)` + FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `First(double_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql index 864b3a2efb6c..e2b5fda12bd2 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_arg/out.sql @@ -1,3 +1,3 @@ SELECT - LAG(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col, 2)` + LAG(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC) AS `Lag(string_col, 2)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql index 9326b3b54e4c..73d13363b03a 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_default/out.sql @@ -1,3 +1,3 @@ SELECT - LAG(`t0`.`string_col`) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col)` + LAG(`t0`.`string_col`) OVER (ORDER BY NULL ASC) AS `Lag(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql index 1a917ebf03ae..672b17a3c97a 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lag_explicit_default/out.sql @@ -1,3 +1,3 @@ SELECT - LAG(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lag(string_col, 0)` + LAG(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC) AS `Lag(string_col, 0)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql index 4409732d6cf7..3807d77b7e8e 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql @@ -1,3 +1,3 @@ SELECT - LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC NULLS LAST) AS `Last(double_col)` + LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `Last(double_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql index 0ff670b2a562..47b7bd023daf 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_arg/out.sql @@ -1,3 +1,3 @@ SELECT - LEAD(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col, 2)` + LEAD(`t0`.`string_col`, 2) OVER (ORDER BY NULL ASC) AS `Lead(string_col, 2)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql index f2f42db96f81..466563842ba0 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_default/out.sql @@ -1,3 +1,3 @@ SELECT - LEAD(`t0`.`string_col`) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col)` + LEAD(`t0`.`string_col`) OVER (ORDER BY NULL ASC) AS `Lead(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql index 4051eb507852..fb4967eb993b 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/lead_explicit_default/out.sql @@ -1,3 +1,3 @@ SELECT - LEAD(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC NULLS LAST) AS `Lead(string_col, 0)` + LEAD(`t0`.`string_col`, 1, 0) OVER (ORDER BY NULL ASC) AS `Lead(string_col, 0)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql index b05add74ff76..9d1f7b6439b5 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/ntile/out.sql @@ -1,3 +1,3 @@ SELECT - NTILE(3) OVER (ORDER BY `t0`.`double_col` ASC NULLS LAST) - 1 AS `NTile(3)` + NTILE(3) OVER (ORDER BY `t0`.`double_col` ASC) - 1 AS `NTile(3)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql index adfc11bd37a6..f4587b9da429 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/percent_rank/out.sql @@ -1,3 +1,3 @@ SELECT - PERCENT_RANK() OVER (ORDER BY `t0`.`double_col` ASC NULLS LAST) AS `PercentRank()` + PERCENT_RANK() OVER (ORDER BY `t0`.`double_col` ASC) AS `PercentRank()` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql index 5a4f2dc98f14..4705e8524638 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql @@ -1 +1 @@ -SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC NULLS LAST) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file +SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql index b76e784fe68e..79ea467e969b 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql @@ -1 +1 @@ -SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC NULLS LAST) AS `four` FROM `my_data` AS `t0` \ No newline at end of file +SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC) AS `four` FROM `my_data` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql index 8d8f9b622b5e..9a50bddd2c69 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql @@ -10,9 +10,9 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `lag`, - LEAD(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) - `t0`.`f` AS `fwd_diff`, - FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `first`, - LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `last`, - LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`d` ASC NULLS LAST) AS `lag2` + LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `lag`, + LEAD(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - `t0`.`f` AS `fwd_diff`, + FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `first`, + LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `last`, + LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`d` ASC) AS `lag2` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql index f1f482ce6b36..3004bcd8535f 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql @@ -10,5 +10,5 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC NULLS LAST) AS `normed_f` + `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `normed_f` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql index 94b0b10ea0c5..1c00fd99c792 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql index 94b0b10ea0c5..1c00fd99c792 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql index 183b589665ac..8bfdfda64125 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql index 183b589665ac..8bfdfda64125 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql index 8fe0142ae10e..fa763e794811 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql index 8fe0142ae10e..fa763e794811 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql index ab0a805400b4..4a0222bfac7c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql index ab0a805400b4..4a0222bfac7c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC NULLS LAST) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql index ba464a7151de..7f161f658529 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql @@ -1,4 +1,4 @@ SELECT `t0`.`g`, - SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC NULLS LAST) AS `result` + SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `result` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql index 706a705bf519..4b3c7fd06ecc 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_nested_analytic_function/out.sql @@ -1,3 +1,3 @@ SELECT - LAG(`t0`.`f` - LAG(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST)) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` + LAG(`t0`.`f` - LAG(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC)) OVER (ORDER BY `t0`.`f` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql index 5692444e6ef9..a4bfd7709527 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out1.sql @@ -1,4 +1,4 @@ SELECT `t0`.`f`, - ROW_NUMBER() OVER (ORDER BY `t0`.`f` DESC) - 1 AS `revrank` + ROW_NUMBER() OVER (ORDER BY `t0`.`f` DESC NULLS LAST) - 1 AS `revrank` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql index fd79e6972531..148babbf968d 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql @@ -1,4 +1,4 @@ SELECT - LAG(`t0`.`d`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC) AS `foo`, - MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC) AS `Max(a)` + LAG(`t0`.`d`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `foo`, + MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `Max(a)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql index 96f8ae2f0e6c..cf60c97feff0 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_propagate_nested_windows/out.sql @@ -1,5 +1,3 @@ SELECT - LAG( - `t0`.`f` - LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) - ) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` + LAG(`t0`.`f` - LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC)) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql index 92c9d14f20b5..bdf930d27387 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_rank_functions/out.sql @@ -1,5 +1,5 @@ SELECT `t0`.`g`, - RANK() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `minr`, - DENSE_RANK() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `denser` + RANK() OVER (ORDER BY `t0`.`f` ASC) - 1 AS `minr`, + DENSE_RANK() OVER (ORDER BY `t0`.`f` ASC) - 1 AS `denser` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql index d4075f13c006..6d0a3ca38566 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out1.sql @@ -10,5 +10,5 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC NULLS LAST) AS `foo` + ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql index c423c5e14049..848eadfd34cf 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_does_not_require_order_by/out2.sql @@ -10,5 +10,5 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC NULLS LAST) - 1 AS `foo` + ROW_NUMBER() OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` ASC) - 1 AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql index 47864c94d3db..3c427bd8d8cd 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_row_number_properly_composes_with_arithmetic/out.sql @@ -11,6 +11,6 @@ SELECT `t0`.`j`, `t0`.`k`, ( - ROW_NUMBER() OVER (ORDER BY `t0`.`f` ASC NULLS LAST) - 1 + ROW_NUMBER() OVER (ORDER BY `t0`.`f` ASC) - 1 ) / 2 AS `new` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql index fb0afc364573..59c90d433f04 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql index fb0afc364573..59c90d433f04 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql index 0c5d75598474..04a67494f61c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_10_5/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 10 preceding AND 5 preceding) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 10 preceding AND 5 preceding) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql index b2c51c37cdd4..fea0bef745be 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 2 following) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 following) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql index 133948c9fac3..934d181ace5c 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_2_prec_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN CURRENT ROW AND 2 following) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN CURRENT ROW AND 2 following) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql index 66299747e54f..5f403774950e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_5_10/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 following AND 10 following) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 5 following AND 10 following) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql index 77b80b90de80..e99891553eab 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql index 72fbc24d4f8b..c37a4c6f60f0 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND UNBOUNDED FOLLOWING) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 5 preceding AND UNBOUNDED FOLLOWING) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql index a7b4b896db68..4ff17326b133 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND CURRENT ROW) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 5 preceding AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql index 15f6313e751c..4a922a7dcda8 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/prec_5_foll_2/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 5 preceding AND 2 following) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 5 preceding AND 2 following) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql index e7baeb238d4d..a031f7238b10 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/trailing_10/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 10 preceding AND CURRENT ROW) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN 10 preceding AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py index fca17cc2cf1e..a3153d17b8b4 100644 --- a/ibis/backends/pandas/executor.py +++ b/ibis/backends/pandas/executor.py @@ -69,7 +69,7 @@ def visit(cls, op: ops.Alias, arg, name): return arg @classmethod - def visit(cls, op: ops.SortKey, expr, ascending): + def visit(cls, op: ops.SortKey, expr, ascending, nulls_first): return expr @classmethod @@ -593,11 +593,26 @@ def visit(cls, op: ops.Sort, parent, keys): # 2. sort the dataframe using those columns # 3. drop the sort key columns ascending = [key.ascending for key in op.keys] + nulls_first = [key.nulls_first for key in op.keys] + + if all(nulls_first): + na_position = "first" + elif not any(nulls_first): + na_position = "last" + else: + raise ValueError( + "pandas does not support specifying null ordering for individual columns" + ) + newcols = {gen_name("sort_key"): col for col in keys} names = list(newcols.keys()) df = parent.assign(**newcols) df = df.sort_values( - by=names, ascending=ascending, ignore_index=True, kind="mergesort" + by=names, + ascending=ascending, + na_position=na_position, + ignore_index=True, + kind="mergesort", ) return df.drop(columns=names) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 9309581d05d2..4d9a497191b4 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -227,10 +227,9 @@ def sort(op, **kw): by = list(newcols.keys()) descending = [key.descending for key in op.keys] - try: - lf = lf.sort(by, descending=descending, nulls_last=True) - except TypeError: # pragma: no cover - lf = lf.sort(by, reverse=descending, nulls_last=True) # pragma: no cover + nulls_last = [not key.nulls_first for key in op.keys] + + lf = lf.sort(by, descending=descending, nulls_last=nulls_last) return lf.drop(*by) diff --git a/ibis/backends/sql/compiler.py b/ibis/backends/sql/compiler.py index de0a0bf6eeea..09f26b80d62f 100644 --- a/ibis/backends/sql/compiler.py +++ b/ibis/backends/sql/compiler.py @@ -1044,8 +1044,8 @@ def visit_Coalesce(self, op, *, arg): ### Ordering and window functions - def visit_SortKey(self, op, *, expr, ascending: bool): - return sge.Ordered(this=expr, desc=not ascending) + def visit_SortKey(self, op, *, expr, ascending: bool, nulls_first: bool): + return sge.Ordered(this=expr, desc=not ascending, nulls_first=nulls_first) def visit_ApproxMedian(self, op, *, arg, where): return self.agg.approx_quantile(arg, 0.5, where=where) diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index f51e38d3bd75..daa52eecedf9 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -273,6 +273,8 @@ class Tokenizer(Hive.Tokenizer): class Impala(Hive): + NULL_ORDERING = "nulls_are_large" + class Generator(Hive.Generator): TRANSFORMS = Hive.Generator.TRANSFORMS.copy() | { sge.ApproxDistinct: rename_func("ndv"), @@ -336,6 +338,8 @@ def _create_sql(self, expression: sge.Create) -> str: return self.create_sql(expression) +# hack around https://github.com/tobymao/sqlglot/issues/3684 +Oracle.NULL_ORDERING = "nulls_are_large" Oracle.Generator.TRANSFORMS |= { sge.LogicalOr: rename_func("max"), sge.LogicalAnd: rename_func("min"), diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql index 135af2cbb817..ec87896c4f64 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql @@ -1,12 +1,12 @@ WITH `t1` AS ( SELECT `t0`.`street`, - ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC) - 1 AS `key` FROM `data` AS `t0` ), `t7` AS ( SELECT `t6`.`street`, - ROW_NUMBER() OVER (ORDER BY `t6`.`street` ASC NULLS LAST) - 1 AS `key` + ROW_NUMBER() OVER (ORDER BY `t6`.`street` ASC) - 1 AS `key` FROM ( SELECT `t3`.`street`, diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 44e8156e7e34..3ed4a9db8cc5 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd +import pandas.testing as tm import pytest import toolz from pytest import param @@ -599,6 +600,142 @@ def test_order_by_random(alltypes): assert not r1.equals(r2) +@pytest.mark.notimpl(["druid"]) +@pytest.mark.parametrize( + "op, expected", + [ + param("desc", {"a": [1, 2, 3], "b": ["foo", "baz", None]}), + param("asc", {"a": [2, 1, 3], "b": ["baz", "foo", None]}), + ], + ids=["desc", "asc"], +) +def test_order_by_nulls_default(con, op, expected): + # default nulls_first is False + t = ibis.memtable([{"a": 1, "b": "foo"}, {"a": 2, "b": "baz"}, {"a": 3, "b": None}]) + expr = t.order_by(getattr(t["b"], op)()) + result = con.execute(expr).reset_index(drop=True) + expected = pd.DataFrame(expected) + + tm.assert_frame_equal( + result.replace({np.nan: None}), expected.replace({np.nan: None}) + ) + + +@pytest.mark.notimpl(["druid"]) +@pytest.mark.parametrize( + "op, nulls_first, expected", + [ + param("desc", True, {"a": [3, 1, 2], "b": [None, "foo", "baz"]}), + param("asc", True, {"a": [3, 2, 1], "b": [None, "baz", "foo"]}), + ], + ids=["desc", "asc"], +) +def test_order_by_nulls(con, op, nulls_first, expected): + t = ibis.memtable([{"a": 1, "b": "foo"}, {"a": 2, "b": "baz"}, {"a": 3, "b": None}]) + expr = t.order_by(getattr(t["b"], op)(nulls_first=nulls_first)) + result = con.execute(expr).reset_index(drop=True) + expected = pd.DataFrame(expected) + + tm.assert_frame_equal( + result.replace({np.nan: None}), expected.replace({np.nan: None}) + ) + + +@pytest.mark.notimpl(["druid"]) +@pytest.mark.broken( + ["exasol", "mssql", "mysql"], + raises=AssertionError, + reason="someone decided a long time ago that 'A' = 'a' is true in these systems", +) +@pytest.mark.parametrize( + "op1, nf1, op2, nf2, expected", + [ + param( + "asc", + False, + "desc", + False, + { + "col1": [1, 1, 1, 2, 3, 3, None], + "col2": ["c", "a", None, "B", "a", "D", "a"], + }, + id="asc-desc-ff", + ), + param( + "asc", + True, + "desc", + True, + { + "col1": [None, 1, 1, 1, 2, 3, 3], + "col2": ["a", None, "c", "a", "B", "a", "D"], + }, + id="asc-desc-tt", + ), + param( + "asc", + True, + "desc", + False, + { + "col1": [None, 1, 1, 1, 2, 3, 3], + "col2": ["a", "c", "a", None, "B", "a", "D"], + }, + id="asc-desc-tf", + ), + param( + "asc", + True, + "asc", + True, + { + "col1": [None, 1, 1, 1, 2, 3, 3], + "col2": ["a", None, "a", "c", "B", "D", "a"], + }, + id="asc-asc-tt", + ), + param( + "asc", + True, + "asc", + False, + { + "col1": [None, 1, 1, 1, 2, 3, 3], + "col2": ["a", "a", "c", None, "B", "D", "a"], + }, + id="asc-asc-tf", + ), + ], +) +def test_order_by_two_cols_nulls(con, op1, nf1, nf2, op2, expected): + t = ibis.memtable( + { + # this is here because pandas converts None to nan, but of course + # only for numeric types, because that's totally reasonable + "col1": pd.Series([1, 3, 2, 1, 3, 1, None], dtype="object"), + "col2": ["a", "a", "B", "c", "D", None, "a"], + } + ) + expr = t.order_by( + getattr(t["col1"], op1)(nulls_first=nf1), + getattr(t["col2"], op2)(nulls_first=nf2), + ) + + if (con.name in ("pandas", "dask")) and (nf1 != nf2): + with pytest.raises( + ValueError, + match=f"{con.name} does not support specifying null ordering for individual column", + ): + con.execute(expr) + else: + result = con.execute(expr).reset_index(drop=True) + expected = pd.DataFrame(expected) + + tm.assert_frame_equal( + result.replace({np.nan: None}), expected.replace({np.nan: None}) + ) + + @pytest.mark.notyet( ["druid"], raises=PyDruidProgrammingError, diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 7cf20dc264cc..92e14066cac8 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -559,8 +559,8 @@ def _memtable_from_polars_dataframe( ).to_expr() -def _deferred_method_call(expr, method_name): - method = operator.methodcaller(method_name) +def _deferred_method_call(expr, method_name, **kwargs): + method = operator.methodcaller(method_name, **kwargs) if isinstance(expr, str): value = _[expr] elif isinstance(expr, Deferred): @@ -572,13 +572,15 @@ def _deferred_method_call(expr, method_name): return method(value) -def desc(expr: ir.Column | str) -> ir.Value: +def desc(expr: ir.Column | str, nulls_first: bool = False) -> ir.Value: """Create a descending sort key from `expr` or column name. Parameters ---------- expr The expression or column name to use for sorting + nulls_first + Bool to indicate weather to put NULL values first or not. See Also -------- @@ -608,16 +610,18 @@ def desc(expr: ir.Column | str) -> ir.Value: An expression """ - return _deferred_method_call(expr, "desc") + return _deferred_method_call(expr, "desc", nulls_first=nulls_first) -def asc(expr: ir.Column | str) -> ir.Value: +def asc(expr: ir.Column | str, nulls_first: bool = False) -> ir.Value: """Create a ascending sort key from `asc` or column name. Parameters ---------- expr The expression or column name to use for sorting + nulls_first + Bool to indicate weather to put NULL values first or not. See Also -------- @@ -647,7 +651,7 @@ def asc(expr: ir.Column | str) -> ir.Value: An expression """ - return _deferred_method_call(expr, "asc") + return _deferred_method_call(expr, "asc", nulls_first=nulls_first) def preceding(value) -> ir.Value: diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index c3aad74faab3..7d87550a9bcf 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -243,11 +243,12 @@ def table_column(op, rel, name): @translate.register(ops.SortKey) -def sort_key(op, expr, ascending): - if ascending: - return f"{expr}.asc()" - else: - return f"{expr}.desc()" +def sort_key(op, expr, ascending, nulls_first): + method = "asc" if ascending else "desc" + call = f"{expr}.{method}" + if nulls_first: + return f"{call}(nulls_first={nulls_first})" + return f"{call}()" @translate.register(ops.Reduction) diff --git a/ibis/expr/operations/sortkeys.py b/ibis/expr/operations/sortkeys.py index fe5940a19ee5..28edaa72aec5 100644 --- a/ibis/expr/operations/sortkeys.py +++ b/ibis/expr/operations/sortkeys.py @@ -19,6 +19,7 @@ class SortKey(Value): # TODO(kszucs): rename expr to arg or something else except expr expr: Value ascending: bool = True + nulls_first: bool = False dtype = rlz.dtype_like("expr") shape = rlz.shape_like("expr") diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 894f94b6dc2d..cf6e40eb5fac 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -309,9 +309,12 @@ def window_merge_frames(_, window): order_keys = {} for sort_key in window.orderings + _.order_by: - order_keys[sort_key.expr] = sort_key.ascending - order_by = (ops.SortKey(k, v) for k, v in order_keys.items()) + order_keys[sort_key.expr] = sort_key.ascending, sort_key.nulls_first + order_by = ( + ops.SortKey(expr, ascending=ascending, nulls_first=nulls_first) + for expr, (ascending, nulls_first) in order_keys.items() + ) return _.copy(start=start, end=end, group_by=group_by, order_by=order_by) diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index f00fa471bf82..71a95a51a128 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -1199,13 +1199,13 @@ def __le__(self, other: Value) -> ir.BooleanValue: def __lt__(self, other: Value) -> ir.BooleanValue: return _binop(ops.Less, self, other) - def asc(self) -> ir.Value: + def asc(self, nulls_first: bool = False) -> ir.Value: """Sort an expression ascending.""" - return ops.SortKey(self, ascending=True).to_expr() + return ops.SortKey(self, ascending=True, nulls_first=nulls_first).to_expr() - def desc(self) -> ir.Value: + def desc(self, nulls_first: bool = False) -> ir.Value: """Sort an expression descending.""" - return ops.SortKey(self, ascending=False).to_expr() + return ops.SortKey(self, ascending=False, nulls_first=nulls_first).to_expr() def to_pandas(self, **kwargs) -> pd.Series: """Convert a column expression to a pandas Series or scalar object. diff --git a/ibis/tests/test_api.py b/ibis/tests/test_api.py index b21f566f64bc..7dce704a8279 100644 --- a/ibis/tests/test_api.py +++ b/ibis/tests/test_api.py @@ -6,6 +6,7 @@ from typing import NamedTuple import pytest +from pytest import param import ibis @@ -76,3 +77,37 @@ def test_ibis_na_deprecation_warning(): DeprecationWarning, match="The 'ibis.NA' constant is deprecated as of v9.1" ): assert ibis.NA is ibis.null() + + +@pytest.mark.parametrize( + "op, api_func", + [ + param("desc", ibis.desc), + param("asc", ibis.asc), + ], +) +def test_ibis_desc_asc_default(op, api_func): + t = ibis.memtable([{"a": 1, "b": "foo"}, {"a": 2, "b": "baz"}, {"a": 3, "b": None}]) + + expr = t.order_by(getattr(t["b"], op)()) + expr_api = t.order_by(api_func("b")) + + assert expr.op() == expr_api.op() + + +@pytest.mark.parametrize( + "op, api_func, nulls_first", + [ + param("desc", ibis.desc, True), + param("asc", ibis.asc, True), + param("desc", ibis.desc, False), + param("asc", ibis.asc, False), + ], +) +def test_ibis_desc_asc(op, api_func, nulls_first): + t = ibis.memtable([{"a": 1, "b": "foo"}, {"a": 2, "b": "baz"}, {"a": 3, "b": None}]) + + expr = t.order_by(getattr(t["b"], op)(nulls_first=nulls_first)) + expr_api = t.order_by(api_func("b", nulls_first=nulls_first)) + + assert expr.op() == expr_api.op()