diff --git a/pkg/sql/logictest/testdata/logic_test/distinct_on b/pkg/sql/logictest/testdata/logic_test/distinct_on index ec06164e5a64..c145b9a0cf9d 100644 --- a/pkg/sql/logictest/testdata/logic_test/distinct_on +++ b/pkg/sql/logictest/testdata/logic_test/distinct_on @@ -405,3 +405,32 @@ Action Biography Crime NULL + +# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON. +statement ok +CREATE TABLE t1 (id int, str text); +CREATE TABLE t2 (id int, num int); +INSERT INTO t1 VALUES (1, 'hello'), (2, NULL); +INSERT INTO t2 VALUES (1, 1), (2, 2), (NULL, NULL) + +query II +SELECT +DISTINCT ON (t2.id) +t2.* +FROM t1, t2 +ORDER BY t2.id DESC NULLS FIRST +---- +NULL NULL +2 2 +1 1 + +query II +SELECT +DISTINCT ON (t2.id) +t2.* +FROM t1, t2 +ORDER BY t2.id ASC NULLS LAST +---- +1 1 +2 2 +NULL NULL diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index 1ece150176df..da95bbc8d7fb 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -89,6 +89,7 @@ func (b *Builder) buildDistinctOn( var seen opt.ColSet for _, col := range inScope.ordering { if !distinctOnCols.Contains(col.ID()) { + colIsValid := false scopeCol := inScope.getColumn(col.ID()) if scopeCol != nil { if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok { @@ -96,16 +97,21 @@ func (b *Builder) buildDistinctOn( if v, ok := isExpr.Left.(*memo.VariableExpr); ok { if distinctOnCols.Contains(v.Col) { // We have a col IS NULL expression (case 3 above). - continue + // Add the new column to distinctOnCols, since it doesn't change + // the semantics of the DISTINCT ON. + distinctOnCols.Add(col.ID()) + colIsValid = true } } } } } - panic(pgerror.Newf( - pgcode.InvalidColumnReference, - "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", - )) + if !colIsValid { + panic(pgerror.Newf( + pgcode.InvalidColumnReference, + "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", + )) + } } seen.Add(col.ID()) if seen.Equals(distinctOnCols) { diff --git a/pkg/sql/opt/optbuilder/testdata/distinct_on b/pkg/sql/opt/optbuilder/testdata/distinct_on index 85fecbf26cec..1f6f35b112e3 100644 --- a/pkg/sql/opt/optbuilder/testdata/distinct_on +++ b/pkg/sql/opt/optbuilder/testdata/distinct_on @@ -507,30 +507,87 @@ ORDER BY "genre" ASC NULLS LAST, "id" ASC NULLS LAST ---- +distinct-on + ├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:6!null] + ├── grouping columns: genre:3 nulls_ordering_genre:6!null + ├── internal-ordering: +7,+1 opt(3,6) + ├── ordering: +6,+3 + ├── sort + │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null + │ ├── ordering: +6,+3,+7,+1 + │ └── project + │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3 + │ ├── scan author + │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5 + │ └── projections + │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6] + │ └── id:1 IS NULL [as=nulls_ordering_id:7] + └── aggregations + ├── first-agg [as=id:1] + │ └── id:1 + └── first-agg [as=name:2] + └── name:2 + +# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON. + +exec-ddl +CREATE TABLE t1 (id int, str text) +---- + +exec-ddl +CREATE TABLE t2 (id int, num int) +---- + +build +SELECT +DISTINCT ON (t2.id) +t1.id, +t1.str +FROM t1 JOIN t2 ON t1.id = t2.id +ORDER BY t2.id DESC NULLS FIRST +---- sort - ├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null] - ├── ordering: +8,+3,+9,+1 - └── project - ├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3 - ├── distinct-on - │ ├── columns: id:1!null name:2 genre:3 - │ ├── grouping columns: genre:3 - │ ├── internal-ordering: +6,+7,+1 opt(3) - │ ├── sort - │ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null - │ │ ├── ordering: +7,+1 opt(3,6) - │ │ └── project - │ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3 - │ │ ├── scan author - │ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5 - │ │ └── projections - │ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6] - │ │ └── id:1 IS NULL [as=nulls_ordering_id:7] - │ └── aggregations - │ ├── first-agg [as=id:1] - │ │ └── id:1 - │ └── first-agg [as=name:2] - │ └── name:2 - └── projections - ├── genre:3 IS NULL [as=nulls_ordering_genre:8] - └── id:1 IS NULL [as=nulls_ordering_id:9] + ├── columns: id:1!null str:2 [hidden: t2.id:6!null nulls_ordering_id:11!null] + ├── ordering: -11,-6 + └── distinct-on + ├── columns: t1.id:1!null str:2 t2.id:6!null nulls_ordering_id:11!null + ├── grouping columns: t2.id:6!null nulls_ordering_id:11!null + ├── project + │ ├── columns: nulls_ordering_id:11!null t1.id:1!null str:2 t2.id:6!null + │ ├── inner-join (hash) + │ │ ├── columns: t1.id:1!null str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 t2.id:6!null num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10 + │ │ ├── scan t1 + │ │ │ └── columns: t1.id:1 str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 + │ │ ├── scan t2 + │ │ │ └── columns: t2.id:6 num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10 + │ │ └── filters + │ │ └── t1.id:1 = t2.id:6 + │ └── projections + │ └── t2.id:6 IS NULL [as=nulls_ordering_id:11] + └── aggregations + ├── first-agg [as=t1.id:1] + │ └── t1.id:1 + └── first-agg [as=str:2] + └── str:2 + +# Because of the hack we use to allow ORDER BY NULLS FIRST / LAST with DISTINCT +# ON, we also support this query. Postgres doesn't support it, but it doesn't +# seem like a problem that we do. +build +SELECT DISTINCT ON (id) * FROM t1 ORDER BY id IS NULL, id +---- +sort + ├── columns: id:1 str:2 [hidden: column6:6!null] + ├── ordering: +6,+1 + └── distinct-on + ├── columns: id:1 str:2 column6:6!null + ├── grouping columns: id:1 column6:6!null + ├── project + │ ├── columns: column6:6!null id:1 str:2 + │ ├── scan t1 + │ │ └── columns: id:1 str:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5 + │ └── projections + │ └── id:1 IS NULL [as=column6:6] + └── aggregations + └── first-agg [as=str:2] + └── str:2