From 7346f0aec38f74463dc81afb3763292779049de4 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Fri, 28 Jul 2023 18:40:12 -0500 Subject: [PATCH] opt: fix error with DISTINCT ON and ORDER BY ASC NULLS LAST This commit fixes an error that could occur in the optbuilder when planning a query with DISTINCT ON and a non-standard nulls ordering. The optimizer supports queries with a non-standard nulls ordering by projecting a column with the expression (col IS NULL) and adding it to the ordering. Since we require that DISTINCT ON columns must be the prefix of any ordering columns, we must account for the new ordering column when building DISTINCT ON. A previous bug fix for #90763 caused the new column to be simply ignored when building DISTINCT ON, but this was insufficient. We need to actually include the new column among the DISTINCT ON columns. This commit makes that change. Fixes #107839 Release note (bug fix): Fixed a spurious error "no data source matches prefix" that could occur during planning for a query with DISTINCT ON and ORDER BY ASC NULLS LAST or ORDER BY DESC NULLS FIRST. --- .../logictest/testdata/logic_test/distinct_on | 29 +++++++ pkg/sql/opt/optbuilder/distinct.go | 16 ++-- pkg/sql/opt/optbuilder/testdata/distinct_on | 87 +++++++++++++------ 3 files changed, 101 insertions(+), 31 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/distinct_on b/pkg/sql/logictest/testdata/logic_test/distinct_on index ec06164e5a64..c145b9a0cf9d 100644 --- a/pkg/sql/logictest/testdata/logic_test/distinct_on +++ b/pkg/sql/logictest/testdata/logic_test/distinct_on @@ -405,3 +405,32 @@ Action Biography Crime NULL + +# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON. +statement ok +CREATE TABLE t1 (id int, str text); +CREATE TABLE t2 (id int, num int); +INSERT INTO t1 VALUES (1, 'hello'), (2, NULL); +INSERT INTO t2 VALUES (1, 1), (2, 2), (NULL, NULL) + +query II +SELECT +DISTINCT ON (t2.id) +t2.* +FROM t1, t2 +ORDER BY t2.id DESC NULLS FIRST +---- +NULL NULL +2 2 +1 1 + +query II +SELECT +DISTINCT ON (t2.id) +t2.* +FROM t1, t2 +ORDER BY t2.id ASC NULLS LAST +---- +1 1 +2 2 +NULL NULL diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index afe9dfa14013..ad495dccd202 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -89,6 +89,7 @@ func (b *Builder) buildDistinctOn( var seen opt.ColSet for _, col := range inScope.ordering { if !distinctOnCols.Contains(col.ID()) { + colIsValid := false scopeCol := inScope.getColumn(col.ID()) if scopeCol != nil { if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok { @@ -96,16 +97,21 @@ func (b *Builder) buildDistinctOn( if v, ok := isExpr.Left.(*memo.VariableExpr); ok { if distinctOnCols.Contains(v.Col) { // We have a col IS NULL expression (case 3 above). - continue + // Add the new column to distinctOnCols, since it doesn't change + // the semantics of the DISTINCT ON. + distinctOnCols.Add(col.ID()) + colIsValid = true } } } } } - panic(pgerror.Newf( - pgcode.InvalidColumnReference, - "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", - )) + if !colIsValid { + panic(pgerror.Newf( + pgcode.InvalidColumnReference, + "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", + )) + } } seen.Add(col.ID()) if seen.Equals(distinctOnCols) { diff --git a/pkg/sql/opt/optbuilder/testdata/distinct_on b/pkg/sql/opt/optbuilder/testdata/distinct_on index 85fecbf26cec..9b7b07b408f2 100644 --- a/pkg/sql/opt/optbuilder/testdata/distinct_on +++ b/pkg/sql/opt/optbuilder/testdata/distinct_on @@ -507,30 +507,65 @@ ORDER BY "genre" ASC NULLS LAST, "id" ASC NULLS LAST ---- +distinct-on + ├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:6!null] + ├── grouping columns: genre:3 nulls_ordering_genre:6!null + ├── internal-ordering: +7,+1 opt(3,6) + ├── ordering: +6,+3 + ├── sort + │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null + │ ├── ordering: +6,+3,+7,+1 + │ └── project + │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3 + │ ├── scan author + │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5 + │ └── projections + │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6] + │ └── id:1 IS NULL [as=nulls_ordering_id:7] + └── aggregations + ├── first-agg [as=id:1] + │ └── id:1 + └── first-agg [as=name:2] + └── name:2 + +# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON. + +exec-ddl +CREATE TABLE t1 (id int, str text) +---- + +exec-ddl +CREATE TABLE t2 (id int, num int) +---- + +build +SELECT +DISTINCT ON (t2.id) +t1.id, +t1.str +FROM t1 JOIN t2 ON t1.id = t2.id +ORDER BY t2.id DESC NULLS FIRST +---- sort - ├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null] - ├── ordering: +8,+3,+9,+1 - └── project - ├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3 - ├── distinct-on - │ ├── columns: id:1!null name:2 genre:3 - │ ├── grouping columns: genre:3 - │ ├── internal-ordering: +6,+7,+1 opt(3) - │ ├── sort - │ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null - │ │ ├── ordering: +7,+1 opt(3,6) - │ │ └── project - │ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3 - │ │ ├── scan author - │ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5 - │ │ └── projections - │ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6] - │ │ └── id:1 IS NULL [as=nulls_ordering_id:7] - │ └── aggregations - │ ├── first-agg [as=id:1] - │ │ └── id:1 - │ └── first-agg [as=name:2] - │ └── name:2 - └── projections - ├── genre:3 IS NULL [as=nulls_ordering_genre:8] - └── id:1 IS NULL [as=nulls_ordering_id:9] + ├── columns: id:1!null str:2 [hidden: t2.id:6!null nulls_ordering_id:11!null] + ├── ordering: -11,-6 + └── distinct-on + ├── columns: t1.id:1!null str:2 t2.id:6!null nulls_ordering_id:11!null + ├── grouping columns: t2.id:6!null nulls_ordering_id:11!null + ├── project + │ ├── columns: nulls_ordering_id:11!null t1.id:1!null str:2 t2.id:6!null + │ ├── inner-join (hash) + │ │ ├── columns: t1.id:1!null str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 t2.id:6!null num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10 + │ │ ├── scan t1 + │ │ │ └── columns: t1.id:1 str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 + │ │ ├── scan t2 + │ │ │ └── columns: t2.id:6 num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10 + │ │ └── filters + │ │ └── t1.id:1 = t2.id:6 + │ └── projections + │ └── t2.id:6 IS NULL [as=nulls_ordering_id:11] + └── aggregations + ├── first-agg [as=t1.id:1] + │ └── t1.id:1 + └── first-agg [as=str:2] + └── str:2