Skip to content

Commit

Permalink
Merge #93567
Browse files Browse the repository at this point in the history
93567: opt: add support for NULLS LAST with DISTINCT ON r=rytaft a=rytaft

This commit adds an exception for the requirement that the `ORDER BY` clause of a `DISTINCT ON` query must contain only columns in the `ON` clause. Specifically, it allows expressions of the form `col IS NULL`, where `col` is one of the `ON` columns. This is needed to support `NULLS LAST`, since we implement `NULLS LAST` by synthesizing a `col IS NULL` ordering column.

The approach feels a bit hacky, but it seems like the smallest, lowest risk option available.

Fixes #90763

Release note (bug fix): Fixed an issue where `DISTINCT ON` queries would fail with the error "SELECT DISTINCT ON expressions must match initial ORDER BY expressions" when the query included an `ORDER BY` clause containing `ASC NULLS LAST` or `DESC NULLS FIRST`.

Co-authored-by: Rebecca Taft <[email protected]>
  • Loading branch information
craig[bot] and rytaft committed Dec 14, 2022
2 parents dc9cfae + 3366389 commit 57667ed
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 2 deletions.
29 changes: 29 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,32 @@ query I
SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y)
----
1

# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON.
statement ok
CREATE TABLE author (
id INT PRIMARY KEY,
name TEXT,
genre TEXT
);
INSERT INTO author VALUES
(1, 'Alice', 'Action'),
(2, 'Bob', 'Biography'),
(3, 'Carol', 'Crime'),
(4, 'Dave', 'Action'),
(5, 'Eve', 'Crime'),
(6, 'Bart', null);

query ITT
SELECT
DISTINCT ON ("genre") *
FROM
"public"."author"
ORDER BY
"genre" ASC NULLS LAST,
"id" ASC NULLS LAST
----
1 Alice Action
2 Bob Biography
3 Carol Crime
6 Bart NULL
23 changes: 21 additions & 2 deletions pkg/sql/opt/optbuilder/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ func (b *Builder) buildDistinctOn(
) (outScope *scope) {
// When there is a DISTINCT ON clause, the ORDER BY clause is restricted to either:
// 1. Contain a subset of columns from the ON list, or
// 2. Start with a permutation of all columns from the ON list.
// 2. Start with a permutation of all columns from the ON list, or
// 3. Contain col IS NULL expressions, where col is from the ON list.
//
// In case 1, the ORDER BY simply specifies an output ordering as usual.
// Example:
Expand All @@ -76,13 +77,31 @@ func (b *Builder) buildDistinctOn(
// This means: for each value of a, choose the (b, c) from the row with the
// smallest e value, and order these results by a.
//
// Note: this behavior is consistent with PostgreSQL.
// Case 3 is needed to support using NULLS LAST with DISTINCT ON. Since we
// synthesize an ORDER BY column such as (col IS NULL) to support NULLS LAST,
// we need to make sure that we allow this column to exist.
//
// Note: Cases 1 and 2 are consistent with PostgreSQL. Case 3 is more
// permissive but does not affect correctness.

// Check that the DISTINCT ON expressions match the initial ORDER BY
// expressions.
var seen opt.ColSet
for _, col := range inScope.ordering {
if !distinctOnCols.Contains(col.ID()) {
scopeCol := inScope.getColumn(col.ID())
if scopeCol != nil {
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
if distinctOnCols.Contains(v.Col) {
// We have a col IS NULL expression (case 3 above).
continue
}
}
}
}
}
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
Expand Down
46 changes: 46 additions & 0 deletions pkg/sql/opt/optbuilder/testdata/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,49 @@ distinct-on
└── aggregations
└── first-agg [as=z:3]
└── z:3

# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON.
exec-ddl
CREATE TABLE author (
id INT PRIMARY KEY,
name TEXT,
genre TEXT
)
----

build
SELECT
DISTINCT ON ("genre") *
FROM
"public"."author"
ORDER BY
"genre" ASC NULLS LAST,
"id" ASC NULLS LAST
----
sort
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null]
├── ordering: +8,+3,+9,+1
└── project
├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3
├── distinct-on
│ ├── columns: id:1!null name:2 genre:3
│ ├── grouping columns: genre:3
│ ├── internal-ordering: +6,+7,+1 opt(3)
│ ├── sort
│ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ │ ├── ordering: +7,+1 opt(3,6)
│ │ └── project
│ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ │ ├── scan author
│ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ │ └── projections
│ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ │ └── id:1 IS NULL [as=nulls_ordering_id:7]
│ └── aggregations
│ ├── first-agg [as=id:1]
│ │ └── id:1
│ └── first-agg [as=name:2]
│ └── name:2
└── projections
├── genre:3 IS NULL [as=nulls_ordering_genre:8]
└── id:1 IS NULL [as=nulls_ordering_id:9]

0 comments on commit 57667ed

Please sign in to comment.