Skip to content

Commit

Permalink
optbuilder: allow non-aggregate expressions when GROUP BY cols are un…
Browse files Browse the repository at this point in the history
…ique

This extends support for queries such as this pseudo-SQL:
```
SELECT * FROM t1 GROUP BY pk_cols;
```
To also support:
```
SELECT * FROM t1 GROUP BY unique_index_non_nullable_key_cols;
SELECT * FROM t1 GROUP BY unique_without_index_non_nullable_key_cols;
```
All unique index or unique constraint columns must be not nullable for
the query to not error out.

Fixes: cockroachdb#99444

Release note (sql change): This patch adds support for non-aggregate
expressions involving columns outside of the grouping columns when the
grouping columns include all key columns of a unique index and those key
columns are not nullable.
  • Loading branch information
Mark Sirek committed Apr 18, 2023
1 parent 410c378 commit 64044c4
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3445,3 +3445,42 @@ distribute
│ └── filters
│ └── t1.b = regional_by_row_table_virt_partial.pk
└── 1

subtest groupByUnique

# GROUP BY unique column v doesn't allow non-aggregate expressions in the
# SELECT list because v is not marked as NOT NULL.
statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function
SELECT pk, a, b FROM regional_by_row_table_virt GROUP BY v;

statement ok
ALTER TABLE regional_by_row_table_virt ALTER COLUMN v SET NOT NULL

# GROUP BY unique column v allows non-aggregate expressions in the
# SELECT list because v is marked as NOT NULL.
query III
SELECT pk, a, b FROM regional_by_row_table_virt GROUP BY v;
----
1 1 1

# GROUP BY unique index column a doesn't allow non-aggregate expressions in the
# SELECT list because v is not marked as NOT NULL.
statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function
SELECT pk FROM regional_by_row_table_virt GROUP BY a;

statement ok
ALTER TABLE regional_by_row_table_virt ALTER COLUMN v SET NOT NULL

# GROUP BY unique index column a allows non-aggregate expressions in the
# SELECT list because v is marked as NOT NULL.
query I
SELECT pk FROM regional_by_row_table_virt GROUP BY v;
----
1

# GROUP BY unique expression index (a+10) doesn't currently allow non-aggregate
# expressions in the SELECT list. This could potentially be supported in the
# future. The index need not be an expression index if the GROUP BY expression
# can be proven to be monotonically increasing or decreasing.
statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function
SELECT pk FROM regional_by_row_table_virt GROUP BY (a+10);
76 changes: 76 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/unique
Original file line number Diff line number Diff line change
Expand Up @@ -872,3 +872,79 @@ SELECT * FROM uniq_fk_child
a b c d e
1 1 1 1 1
2 NULL 2 NULL NULL

subtest GroupByUnique

# Group by UNIQUE WITHOUT INDEX columns which are nullable must not allow
# non-aggregate expressions outside of the grouping columns.
statement error pq: column "b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT b FROM uniq_hidden_pk GROUP BY a

# Group by UNIQUE INDEX columns which are nullable must not allow non-aggregate
# expressions outside of the grouping columns.
statement error pq: column "x" must appear in the GROUP BY clause or be used in an aggregate function
SELECT x FROM uniq GROUP BY v

# Group by UNIQUE WITHOUT INDEX constraint can allow non-aggregate expressions
# outside of the grouping columns if the constraint is not null.
query TI colnames,rowsort
SELECT r, i FROM uniq_enum GROUP BY i
----
r i
us-west 1
eu-west 2

statement ok
ALTER TABLE uniq_hidden_pk ALTER COLUMN b SET NOT NULL

# Group by UNIQUE WITHOUT INDEX constraint cannot allow non-aggregate
# expressions if any of index columns is nullable.
statement error pq: column "a" must appear in the GROUP BY clause or be used in an aggregate function
SELECT a FROM uniq_hidden_pk GROUP BY b, c

statement ok
ALTER TABLE uniq_hidden_pk ALTER COLUMN c SET NOT NULL

# Group by UNIQUE WITHOUT INDEX constraint can allow non-aggregate expressions
# outside of the grouping columns if the constraint is not nullable.
query II colnames,rowsort
SELECT a,d FROM uniq_hidden_pk GROUP BY b, c
----
a d
1 1
2 2

# Group by a subset of UNIQUE WITHOUT INDEX columns must not allow non-aggregate
# expressions outside of the grouping columns.
statement error pq: column "a" must appear in the GROUP BY clause or be used in an aggregate function
SELECT a FROM uniq_hidden_pk GROUP BY b

# Group by UNIQUE INDEX constraint cannot allow non-aggregate expressions
# outside of the grouping columns if index columns are nullable.
statement error pq: column "x" must appear in the GROUP BY clause or be used in an aggregate function
SELECT x FROM uniq GROUP BY v

statement ok
ALTER TABLE uniq ALTER COLUMN v SET NOT NULL

# Group by UNIQUE INDEX constraint can allow non-aggregate expressions
# outside of the grouping columns if index columns are not nullable.
query I colnames,rowsort
SELECT x FROM uniq GROUP BY v ORDER BY v
----
x
1
2
3
NULL
5
NULL
2
NULL
1
NULL
NULL
20
NULL
NULL
NULL
58 changes: 54 additions & 4 deletions pkg/sql/opt/optbuilder/groupby.go
Original file line number Diff line number Diff line change
Expand Up @@ -924,9 +924,12 @@ func newGroupingError(name tree.Name) error {
}

// allowImplicitGroupingColumn returns true if col is part of a table and the
// the groupby metadata indicates that we are grouping on the entire PK of that
// table. In that case, we can allow col as an "implicit" grouping column, even
// if it is not specified in the query.
// groupby metadata indicates that we are grouping on the entire PK, an entire
// unique index key, or an entire unique without index key of that table. In
// that case, we can allow col as an "implicit" grouping column, even if it is
// not specified in the query.
// In the unique index or unique without index cases, all key columns must be
// marked as NOT NULL to allow the implicit grouping.
func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bool {
md := b.factory.Metadata()
colMeta := md.ColumnMeta(colID)
Expand All @@ -935,6 +938,7 @@ func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bo
}
// Get all the PK columns.
tab := md.Table(colMeta.Table)
tabMeta := md.TableMeta(colMeta.Table)
var pkCols opt.ColSet
if tab.IndexCount() == 0 {
// Virtual tables have no indexes.
Expand All @@ -949,5 +953,51 @@ func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bo
for i := range groupingCols {
pkCols.Remove(groupingCols[i].id)
}
return pkCols.Empty()
if pkCols.Empty() {
return true
}
// Check UNIQUE WITHOUT INDEX constraints.
for i := 0; i < tab.UniqueCount(); i++ {
uniqueConstraint := tab.Unique(i)
var uniqueCols opt.ColSet
nullable := false
for j := 0; j < uniqueConstraint.ColumnCount(); j++ {
column := tab.Column(uniqueConstraint.ColumnOrdinal(tab, j))
if column.IsNullable() {
nullable = true
}
columnID := tabMeta.MetaID.ColumnID(uniqueConstraint.ColumnOrdinal(tab, j))
uniqueCols.Add(columnID)
}
if nullable {
// There may be duplicate rows with nulls in unique constraint columns, so
// we cannot treat the constraint as truly unique if any of its columns is
// nullable.
continue
}
for k := range groupingCols {
uniqueCols.Remove(groupingCols[k].id)
}
if uniqueCols.Empty() {
return true
}
}
// Check UNIQUE INDEX constraints.
for i := 1; i < tab.IndexCount(); i++ {
index := tab.Index(i)
if !index.IsUnique() || index.IsInverted() {
continue
}
// If any of the key columns is nullable, uniqueCols is suffixed with the
// primary key columns, so we don't have to explicitly check for nullable
// columns here.
uniqueCols := tabMeta.IndexKeyColumns(i)
for j := range groupingCols {
uniqueCols.Remove(groupingCols[j].id)
}
if uniqueCols.Empty() {
return true
}
}
return false
}

0 comments on commit 64044c4

Please sign in to comment.