From 0a143d151a0b376bb45a0f0f196218a733a9d78d Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Wed, 14 Dec 2022 02:47:26 +0000 Subject: [PATCH] opt: add support for NULLS LAST with DISTINCT ON This commit adds an exception for the requirement that the ORDER BY clause of a DISTINCT ON query must contain only columns in the ON clause. Specifically, it allows expressions of the form `col IS NULL`, where col is one of the ON columns. This is needed to support NULLS LAST, since we implement NULLS LAST by synthesizing a `col IS NULL` ordering column. The approach feels a bit hacky, but it seems like the smallest, lowest risk option available. Fixes #90763 Release note (bug fix): Fixed an issue where DISTINCT ON queries would fail with the error "SELECT DISTINCT ON expressions must match initial ORDER BY expressions" when the query included an ORDER BY clause containing ASC NULLS LAST or DESC NULLS FIRST. sql/logictest: fix flake in TestLogic_distinct_on Make sure the query results are deterministic. Fixes #93719 Release note: None --- .../logictest/testdata/logic_test/distinct_on | 28 +++++++++++ pkg/sql/opt/optbuilder/distinct.go | 23 +++++++++- pkg/sql/opt/optbuilder/testdata/distinct_on | 46 +++++++++++++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/distinct_on b/pkg/sql/logictest/testdata/logic_test/distinct_on index ccfa63f5a01b..ec06164e5a64 100644 --- a/pkg/sql/logictest/testdata/logic_test/distinct_on +++ b/pkg/sql/logictest/testdata/logic_test/distinct_on @@ -377,3 +377,31 @@ query I SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y) ---- 1 + +# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON. +statement ok +CREATE TABLE author ( + id INT PRIMARY KEY, + name TEXT, + genre TEXT +); +INSERT INTO author VALUES + (1, 'Alice', 'Action'), + (2, 'Bob', 'Biography'), + (3, 'Carol', 'Crime'), + (4, 'Dave', 'Action'), + (5, 'Eve', 'Crime'), + (6, 'Bart', null); + +query T +SELECT + DISTINCT ON ("genre") genre +FROM + "public"."author" +ORDER BY + "genre" ASC NULLS LAST +---- +Action +Biography +Crime +NULL diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index 9167731f0175..1ece150176df 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -61,7 +61,8 @@ func (b *Builder) buildDistinctOn( ) (outScope *scope) { // When there is a DISTINCT ON clause, the ORDER BY clause is restricted to either: // 1. Contain a subset of columns from the ON list, or - // 2. Start with a permutation of all columns from the ON list. + // 2. Start with a permutation of all columns from the ON list, or + // 3. Contain col IS NULL expressions, where col is from the ON list. // // In case 1, the ORDER BY simply specifies an output ordering as usual. // Example: @@ -76,13 +77,31 @@ func (b *Builder) buildDistinctOn( // This means: for each value of a, choose the (b, c) from the row with the // smallest e value, and order these results by a. // - // Note: this behavior is consistent with PostgreSQL. + // Case 3 is needed to support using NULLS LAST with DISTINCT ON. Since we + // synthesize an ORDER BY column such as (col IS NULL) to support NULLS LAST, + // we need to make sure that we allow this column to exist. + // + // Note: Cases 1 and 2 are consistent with PostgreSQL. Case 3 is more + // permissive but does not affect correctness. // Check that the DISTINCT ON expressions match the initial ORDER BY // expressions. var seen opt.ColSet for _, col := range inScope.ordering { if !distinctOnCols.Contains(col.ID()) { + scopeCol := inScope.getColumn(col.ID()) + if scopeCol != nil { + if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok { + if _, ok := isExpr.Right.(*memo.NullExpr); ok { + if v, ok := isExpr.Left.(*memo.VariableExpr); ok { + if distinctOnCols.Contains(v.Col) { + // We have a col IS NULL expression (case 3 above). + continue + } + } + } + } + } panic(pgerror.Newf( pgcode.InvalidColumnReference, "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", diff --git a/pkg/sql/opt/optbuilder/testdata/distinct_on b/pkg/sql/opt/optbuilder/testdata/distinct_on index cd0f339c26bf..85fecbf26cec 100644 --- a/pkg/sql/opt/optbuilder/testdata/distinct_on +++ b/pkg/sql/opt/optbuilder/testdata/distinct_on @@ -488,3 +488,49 @@ distinct-on └── aggregations └── first-agg [as=z:3] └── z:3 + +# Regression test for #90763. Ensure NULLS LAST works with DISTINCT ON. +exec-ddl +CREATE TABLE author ( + id INT PRIMARY KEY, + name TEXT, + genre TEXT +) +---- + +build +SELECT + DISTINCT ON ("genre") * +FROM + "public"."author" +ORDER BY + "genre" ASC NULLS LAST, + "id" ASC NULLS LAST +---- +sort + ├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null] + ├── ordering: +8,+3,+9,+1 + └── project + ├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3 + ├── distinct-on + │ ├── columns: id:1!null name:2 genre:3 + │ ├── grouping columns: genre:3 + │ ├── internal-ordering: +6,+7,+1 opt(3) + │ ├── sort + │ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null + │ │ ├── ordering: +7,+1 opt(3,6) + │ │ └── project + │ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3 + │ │ ├── scan author + │ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5 + │ │ └── projections + │ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6] + │ │ └── id:1 IS NULL [as=nulls_ordering_id:7] + │ └── aggregations + │ ├── first-agg [as=id:1] + │ │ └── id:1 + │ └── first-agg [as=name:2] + │ └── name:2 + └── projections + ├── genre:3 IS NULL [as=nulls_ordering_genre:8] + └── id:1 IS NULL [as=nulls_ordering_id:9]