Skip to content

Commit

Permalink
opt: fix error with DISTINCT ON and ORDER BY ASC NULLS LAST
Browse files Browse the repository at this point in the history
This commit fixes an error that could occur in the optbuilder when planning a
query with DISTINCT ON and a non-standard nulls ordering.

The optimizer supports queries with a non-standard nulls ordering by projecting
a column with the expression (col IS NULL) and adding it to the ordering. Since
we require that DISTINCT ON columns must be the prefix of any ordering columns,
we must account for the new ordering column when building DISTINCT ON. A
previous bug fix for cockroachdb#90763 caused the new column to be simply ignored when
building DISTINCT ON, but this was insufficient. We need to actually include
the new column among the DISTINCT ON columns. This commit makes that change.

Fixes cockroachdb#107839

Release note (bug fix): Fixed a spurious error "no data source matches prefix"
that could occur during planning for a query with DISTINCT ON and ORDER BY ASC
NULLS LAST or ORDER BY DESC NULLS FIRST.
  • Loading branch information
rytaft committed Aug 7, 2023
1 parent 898c592 commit 9dcdbcd
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 31 deletions.
29 changes: 29 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,32 @@ Action
Biography
Crime
NULL

# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.
statement ok
CREATE TABLE t1 (id int, str text);
CREATE TABLE t2 (id int, num int);
INSERT INTO t1 VALUES (1, 'hello'), (2, NULL);
INSERT INTO t2 VALUES (1, 1), (2, 2), (NULL, NULL)

query II
SELECT
DISTINCT ON (t2.id)
t2.*
FROM t1, t2
ORDER BY t2.id DESC NULLS FIRST
----
NULL NULL
2 2
1 1

query II
SELECT
DISTINCT ON (t2.id)
t2.*
FROM t1, t2
ORDER BY t2.id ASC NULLS LAST
----
1 1
2 2
NULL NULL
16 changes: 11 additions & 5 deletions pkg/sql/opt/optbuilder/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,29 @@ func (b *Builder) buildDistinctOn(
var seen opt.ColSet
for _, col := range inScope.ordering {
if !distinctOnCols.Contains(col.ID()) {
colIsValid := false
scopeCol := inScope.getColumn(col.ID())
if scopeCol != nil {
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
if distinctOnCols.Contains(v.Col) {
// We have a col IS NULL expression (case 3 above).
continue
// Add the new column to distinctOnCols, since it doesn't change
// the semantics of the DISTINCT ON.
distinctOnCols.Add(col.ID())
colIsValid = true
}
}
}
}
}
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
))
if !colIsValid {
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
))
}
}
seen.Add(col.ID())
if seen.Equals(distinctOnCols) {
Expand Down
109 changes: 83 additions & 26 deletions pkg/sql/opt/optbuilder/testdata/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -507,30 +507,87 @@ ORDER BY
"genre" ASC NULLS LAST,
"id" ASC NULLS LAST
----
distinct-on
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:6!null]
├── grouping columns: genre:3 nulls_ordering_genre:6!null
├── internal-ordering: +7,+1 opt(3,6)
├── ordering: +6,+3
├── sort
│ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ ├── ordering: +6,+3,+7,+1
│ └── project
│ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ ├── scan author
│ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ └── projections
│ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ └── id:1 IS NULL [as=nulls_ordering_id:7]
└── aggregations
├── first-agg [as=id:1]
│ └── id:1
└── first-agg [as=name:2]
└── name:2

# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.

exec-ddl
CREATE TABLE t1 (id int, str text)
----

exec-ddl
CREATE TABLE t2 (id int, num int)
----

build
SELECT
DISTINCT ON (t2.id)
t1.id,
t1.str
FROM t1 JOIN t2 ON t1.id = t2.id
ORDER BY t2.id DESC NULLS FIRST
----
sort
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null]
├── ordering: +8,+3,+9,+1
└── project
├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3
├── distinct-on
│ ├── columns: id:1!null name:2 genre:3
│ ├── grouping columns: genre:3
│ ├── internal-ordering: +6,+7,+1 opt(3)
│ ├── sort
│ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ │ ├── ordering: +7,+1 opt(3,6)
│ │ └── project
│ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ │ ├── scan author
│ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ │ └── projections
│ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ │ └── id:1 IS NULL [as=nulls_ordering_id:7]
│ └── aggregations
│ ├── first-agg [as=id:1]
│ │ └── id:1
│ └── first-agg [as=name:2]
│ └── name:2
└── projections
├── genre:3 IS NULL [as=nulls_ordering_genre:8]
└── id:1 IS NULL [as=nulls_ordering_id:9]
├── columns: id:1!null str:2 [hidden: t2.id:6!null nulls_ordering_id:11!null]
├── ordering: -11,-6
└── distinct-on
├── columns: t1.id:1!null str:2 t2.id:6!null nulls_ordering_id:11!null
├── grouping columns: t2.id:6!null nulls_ordering_id:11!null
├── project
│ ├── columns: nulls_ordering_id:11!null t1.id:1!null str:2 t2.id:6!null
│ ├── inner-join (hash)
│ │ ├── columns: t1.id:1!null str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 t2.id:6!null num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
│ │ ├── scan t1
│ │ │ └── columns: t1.id:1 str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5
│ │ ├── scan t2
│ │ │ └── columns: t2.id:6 num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
│ │ └── filters
│ │ └── t1.id:1 = t2.id:6
│ └── projections
│ └── t2.id:6 IS NULL [as=nulls_ordering_id:11]
└── aggregations
├── first-agg [as=t1.id:1]
│ └── t1.id:1
└── first-agg [as=str:2]
└── str:2

# Because of the hack we use to allow ORDER BY NULLS FIRST / LAST with DISTINCT
# ON, we also support this query. Postgres doesn't support it, but it doesn't
# seem like a problem that we do.
build
SELECT DISTINCT ON (id) * FROM t1 ORDER BY id IS NULL, id
----
sort
├── columns: id:1 str:2 [hidden: column6:6!null]
├── ordering: +6,+1
└── distinct-on
├── columns: id:1 str:2 column6:6!null
├── grouping columns: id:1 column6:6!null
├── project
│ ├── columns: column6:6!null id:1 str:2
│ ├── scan t1
│ │ └── columns: id:1 str:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5
│ └── projections
│ └── id:1 IS NULL [as=column6:6]
└── aggregations
└── first-agg [as=str:2]
└── str:2

0 comments on commit 9dcdbcd

Please sign in to comment.