Skip to content

Commit

Permalink
opt: fix error with DISTINCT ON and ORDER BY ASC NULLS LAST
Browse files Browse the repository at this point in the history
This commit fixes an error that could occur in the optbuilder when planning a
query with DISTINCT ON and a non-standard nulls ordering.

The optimizer supports queries with a non-standard nulls ordering by projecting
a column with the expression (col IS NULL) and adding it to the ordering. Since
we require that DISTINCT ON columns must be the prefix of any ordering columns,
we must account for the new ordering column when building DISTINCT ON. A
previous bug fix for #90763 caused the new column to be simply ignored when
building DISTINCT ON, but this was insufficient. We need to actually include
the new column among the DISTINCT ON columns. This commit makes that change.

Fixes #107839

Release note (bug fix): Fixed a spurious error "no data source matches prefix"
that could occur during planning for a query with DISTINCT ON and ORDER BY ASC
NULLS LAST or ORDER BY DESC NULLS FIRST.
  • Loading branch information
rytaft committed Jul 29, 2023
1 parent 7ba8059 commit 7346f0a
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 31 deletions.
29 changes: 29 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,32 @@ Action
Biography
Crime
NULL

# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.
statement ok
CREATE TABLE t1 (id int, str text);
CREATE TABLE t2 (id int, num int);
INSERT INTO t1 VALUES (1, 'hello'), (2, NULL);
INSERT INTO t2 VALUES (1, 1), (2, 2), (NULL, NULL)

query II
SELECT
DISTINCT ON (t2.id)
t2.*
FROM t1, t2
ORDER BY t2.id DESC NULLS FIRST
----
NULL NULL
2 2
1 1

query II
SELECT
DISTINCT ON (t2.id)
t2.*
FROM t1, t2
ORDER BY t2.id ASC NULLS LAST
----
1 1
2 2
NULL NULL
16 changes: 11 additions & 5 deletions pkg/sql/opt/optbuilder/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,29 @@ func (b *Builder) buildDistinctOn(
var seen opt.ColSet
for _, col := range inScope.ordering {
if !distinctOnCols.Contains(col.ID()) {
colIsValid := false
scopeCol := inScope.getColumn(col.ID())
if scopeCol != nil {
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
if distinctOnCols.Contains(v.Col) {
// We have a col IS NULL expression (case 3 above).
continue
// Add the new column to distinctOnCols, since it doesn't change
// the semantics of the DISTINCT ON.
distinctOnCols.Add(col.ID())
colIsValid = true
}
}
}
}
}
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
))
if !colIsValid {
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
))
}
}
seen.Add(col.ID())
if seen.Equals(distinctOnCols) {
Expand Down
87 changes: 61 additions & 26 deletions pkg/sql/opt/optbuilder/testdata/distinct_on
Original file line number Diff line number Diff line change
Expand Up @@ -507,30 +507,65 @@ ORDER BY
"genre" ASC NULLS LAST,
"id" ASC NULLS LAST
----
distinct-on
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:6!null]
├── grouping columns: genre:3 nulls_ordering_genre:6!null
├── internal-ordering: +7,+1 opt(3,6)
├── ordering: +6,+3
├── sort
│ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ ├── ordering: +6,+3,+7,+1
│ └── project
│ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ ├── scan author
│ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ └── projections
│ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ └── id:1 IS NULL [as=nulls_ordering_id:7]
└── aggregations
├── first-agg [as=id:1]
│ └── id:1
└── first-agg [as=name:2]
└── name:2

# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.

exec-ddl
CREATE TABLE t1 (id int, str text)
----

exec-ddl
CREATE TABLE t2 (id int, num int)
----

build
SELECT
DISTINCT ON (t2.id)
t1.id,
t1.str
FROM t1 JOIN t2 ON t1.id = t2.id
ORDER BY t2.id DESC NULLS FIRST
----
sort
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null]
├── ordering: +8,+3,+9,+1
└── project
├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3
├── distinct-on
│ ├── columns: id:1!null name:2 genre:3
│ ├── grouping columns: genre:3
│ ├── internal-ordering: +6,+7,+1 opt(3)
│ ├── sort
│ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
│ │ ├── ordering: +7,+1 opt(3,6)
│ │ └── project
│ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
│ │ ├── scan author
│ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
│ │ └── projections
│ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
│ │ └── id:1 IS NULL [as=nulls_ordering_id:7]
│ └── aggregations
│ ├── first-agg [as=id:1]
│ │ └── id:1
│ └── first-agg [as=name:2]
│ └── name:2
└── projections
├── genre:3 IS NULL [as=nulls_ordering_genre:8]
└── id:1 IS NULL [as=nulls_ordering_id:9]
├── columns: id:1!null str:2 [hidden: t2.id:6!null nulls_ordering_id:11!null]
├── ordering: -11,-6
└── distinct-on
├── columns: t1.id:1!null str:2 t2.id:6!null nulls_ordering_id:11!null
├── grouping columns: t2.id:6!null nulls_ordering_id:11!null
├── project
│ ├── columns: nulls_ordering_id:11!null t1.id:1!null str:2 t2.id:6!null
│ ├── inner-join (hash)
│ │ ├── columns: t1.id:1!null str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 t2.id:6!null num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
│ │ ├── scan t1
│ │ │ └── columns: t1.id:1 str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5
│ │ ├── scan t2
│ │ │ └── columns: t2.id:6 num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
│ │ └── filters
│ │ └── t1.id:1 = t2.id:6
│ └── projections
│ └── t2.id:6 IS NULL [as=nulls_ordering_id:11]
└── aggregations
├── first-agg [as=t1.id:1]
│ └── t1.id:1
└── first-agg [as=str:2]
└── str:2

0 comments on commit 7346f0a

Please sign in to comment.