Skip to content

Commit

Permalink
opt: add support for NULLS LAST with DISTINCT ON
Browse files Browse the repository at this point in the history
This commit adds an exception for the requirement that the ORDER
BY clause of a DISTINCT ON query must contain only columns in the
ON clause. Specifically, it allows expressions of the form
`col IS NULL`, where col is one of the ON columns. This is needed
to support NULLS LAST, since we implement NULLS LAST by synthesizing
a `col IS NULL` ordering column.

The approach feels a bit hacky, but it seems like the smallest,
lowest risk option available.

Fixes #90763

Release note (bug fix): Fixed an issue where DISTINCT ON queries
would fail with the error "SELECT DISTINCT ON expressions must
match initial ORDER BY expressions" when the query included an
ORDER BY clause containing ASC NULLS LAST or DESC NULLS FIRST.

sql/logictest: fix flake in TestLogic_distinct_on

Make sure the query results are deterministic.

Fixes #93719

Release note: None
  • Loading branch information
rytaft committed Dec 15, 2022
1 parent 9b65541 commit 0a143d1
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 2 deletions.
28 changes: 28 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,31 @@ query I
SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y)
----
1

# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON.
statement ok
CREATE TABLE author (
id INT PRIMARY KEY,
name TEXT,
genre TEXT
);
INSERT INTO author VALUES
(1, 'Alice', 'Action'),
(2, 'Bob', 'Biography'),
(3, 'Carol', 'Crime'),
(4, 'Dave', 'Action'),
(5, 'Eve', 'Crime'),
(6, 'Bart', null);

query T
SELECT
DISTINCT ON ("genre") genre
FROM
"public"."author"
ORDER BY
"genre" ASC NULLS LAST
----
Action
Biography
Crime
NULL
23 changes: 21 additions & 2 deletions pkg/sql/opt/optbuilder/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ func (b *Builder) buildDistinctOn(
) (outScope *scope) {
// When there is a DISTINCT ON clause, the ORDER BY clause is restricted to either:
// 1. Contain a subset of columns from the ON list, or
// 2. Start with a permutation of all columns from the ON list.
// 2. Start with a permutation of all columns from the ON list, or
// 3. Contain col IS NULL expressions, where col is from the ON list.
//
// In case 1, the ORDER BY simply specifies an output ordering as usual.
// Example:
Expand All @@ -76,13 +77,31 @@ func (b *Builder) buildDistinctOn(
// This means: for each value of a, choose the (b, c) from the row with the
// smallest e value, and order these results by a.
//
// Note: this behavior is consistent with PostgreSQL.
// Case 3 is needed to support using NULLS LAST with DISTINCT ON. Since we
// synthesize an ORDER BY column such as (col IS NULL) to support NULLS LAST,
// we need to make sure that we allow this column to exist.
//
// Note: Cases 1 and 2 are consistent with PostgreSQL. Case 3 is more
// permissive but does not affect correctness.

// Check that the DISTINCT ON expressions match the initial ORDER BY
// expressions.
var seen opt.ColSet
for _, col := range inScope.ordering {
if !distinctOnCols.Contains(col.ID()) {
scopeCol := inScope.getColumn(col.ID())
if scopeCol != nil {
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
if distinctOnCols.Contains(v.Col) {
// We have a col IS NULL expression (case 3 above).
continue
}
}
}
}
}
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
Expand Down
46 changes: 46 additions & 0 deletions pkg/sql/opt/optbuilder/testdata/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,49 @@ distinct-on
└── aggregations
└── first-agg [as=z:3]
└── z:3

# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON.
exec-ddl
CREATE TABLE author (
id INT PRIMARY KEY,
name TEXT,
genre TEXT
)
----

build
SELECT
DISTINCT ON ("genre") *
FROM
"public"."author"
ORDER BY
"genre" ASC NULLS LAST,
"id" ASC NULLS LAST
----
sort
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null]
├── ordering: +8,+3,+9,+1
└── project
├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3
├── distinct-on
│ ├── columns: id:1!null name:2 genre:3
│ ├── grouping columns: genre:3
│ ├── internal-ordering: +6,+7,+1 opt(3)
│ ├── sort
│ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ │ ├── ordering: +7,+1 opt(3,6)
│ │ └── project
│ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ │ ├── scan author
│ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ │ └── projections
│ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ │ └── id:1 IS NULL [as=nulls_ordering_id:7]
│ └── aggregations
│ ├── first-agg [as=id:1]
│ │ └── id:1
│ └── first-agg [as=name:2]
│ └── name:2
└── projections
├── genre:3 IS NULL [as=nulls_ordering_genre:8]
└── id:1 IS NULL [as=nulls_ordering_id:9]

0 comments on commit 0a143d1

Please sign in to comment.