From 64044c41286c1a131b6c9a0dcdcdc5e4eeabefaf Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Mon, 17 Apr 2023 11:51:30 -0700 Subject: [PATCH] optbuilder: allow non-aggregate expressions when GROUP BY cols are unique This extends support for queries such as this pseudo-SQL: ``` SELECT * FROM t1 GROUP BY pk_cols; ``` To also support: ``` SELECT * FROM t1 GROUP BY unique_index_non_nullable_key_cols; SELECT * FROM t1 GROUP BY unique_without_index_non_nullable_key_cols; ``` All unique index or unique constraint columns must be not nullable for the query to not error out. Fixes: #99444 Release note (sql change): This patch adds support for non-aggregate expressions involving columns outside of the grouping columns when the grouping columns include all key columns of a unique index and those key columns are not nullable. --- .../logic_test/regional_by_row_query_behavior | 39 ++++++++++ pkg/sql/logictest/testdata/logic_test/unique | 76 +++++++++++++++++++ pkg/sql/opt/optbuilder/groupby.go | 58 +++++++++++++- 3 files changed, 169 insertions(+), 4 deletions(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior index 97a30b72b2f6..35514140ba28 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior @@ -3445,3 +3445,42 @@ distribute │ └── filters │ └── t1.b = regional_by_row_table_virt_partial.pk └── 1 + +subtest groupByUnique + +# GROUP BY unique column v doesn't allow non-aggregate expressions in the +# SELECT list because v is not marked as NOT NULL. +statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function +SELECT pk, a, b FROM regional_by_row_table_virt GROUP BY v; + +statement ok +ALTER TABLE regional_by_row_table_virt ALTER COLUMN v SET NOT NULL + +# GROUP BY unique column v allows non-aggregate expressions in the +# SELECT list because v is marked as NOT NULL. +query III +SELECT pk, a, b FROM regional_by_row_table_virt GROUP BY v; +---- +1 1 1 + +# GROUP BY unique index column a doesn't allow non-aggregate expressions in the +# SELECT list because v is not marked as NOT NULL. +statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function +SELECT pk FROM regional_by_row_table_virt GROUP BY a; + +statement ok +ALTER TABLE regional_by_row_table_virt ALTER COLUMN v SET NOT NULL + +# GROUP BY unique index column a allows non-aggregate expressions in the +# SELECT list because v is marked as NOT NULL. +query I +SELECT pk FROM regional_by_row_table_virt GROUP BY v; +---- +1 + +# GROUP BY unique expression index (a+10) doesn't currently allow non-aggregate +# expressions in the SELECT list. This could potentially be supported in the +# future. The index need not be an expression index if the GROUP BY expression +# can be proven to be monotonically increasing or decreasing. +statement error pq: column "pk" must appear in the GROUP BY clause or be used in an aggregate function +SELECT pk FROM regional_by_row_table_virt GROUP BY (a+10); diff --git a/pkg/sql/logictest/testdata/logic_test/unique b/pkg/sql/logictest/testdata/logic_test/unique index a48d3fc2f41b..22552391a072 100644 --- a/pkg/sql/logictest/testdata/logic_test/unique +++ b/pkg/sql/logictest/testdata/logic_test/unique @@ -872,3 +872,79 @@ SELECT * FROM uniq_fk_child a b c d e 1 1 1 1 1 2 NULL 2 NULL NULL + +subtest GroupByUnique + +# Group by UNIQUE WITHOUT INDEX columns which are nullable must not allow +# non-aggregate expressions outside of the grouping columns. +statement error pq: column "b" must appear in the GROUP BY clause or be used in an aggregate function +SELECT b FROM uniq_hidden_pk GROUP BY a + +# Group by UNIQUE INDEX columns which are nullable must not allow non-aggregate +# expressions outside of the grouping columns. +statement error pq: column "x" must appear in the GROUP BY clause or be used in an aggregate function +SELECT x FROM uniq GROUP BY v + +# Group by UNIQUE WITHOUT INDEX constraint can allow non-aggregate expressions +# outside of the grouping columns if the constraint is not null. +query TI colnames,rowsort +SELECT r, i FROM uniq_enum GROUP BY i +---- +r i +us-west 1 +eu-west 2 + +statement ok +ALTER TABLE uniq_hidden_pk ALTER COLUMN b SET NOT NULL + +# Group by UNIQUE WITHOUT INDEX constraint cannot allow non-aggregate +# expressions if any of index columns is nullable. +statement error pq: column "a" must appear in the GROUP BY clause or be used in an aggregate function +SELECT a FROM uniq_hidden_pk GROUP BY b, c + +statement ok +ALTER TABLE uniq_hidden_pk ALTER COLUMN c SET NOT NULL + +# Group by UNIQUE WITHOUT INDEX constraint can allow non-aggregate expressions +# outside of the grouping columns if the constraint is not nullable. +query II colnames,rowsort +SELECT a,d FROM uniq_hidden_pk GROUP BY b, c +---- +a d +1 1 +2 2 + +# Group by a subset of UNIQUE WITHOUT INDEX columns must not allow non-aggregate +# expressions outside of the grouping columns. +statement error pq: column "a" must appear in the GROUP BY clause or be used in an aggregate function +SELECT a FROM uniq_hidden_pk GROUP BY b + +# Group by UNIQUE INDEX constraint cannot allow non-aggregate expressions +# outside of the grouping columns if index columns are nullable. +statement error pq: column "x" must appear in the GROUP BY clause or be used in an aggregate function +SELECT x FROM uniq GROUP BY v + +statement ok +ALTER TABLE uniq ALTER COLUMN v SET NOT NULL + +# Group by UNIQUE INDEX constraint can allow non-aggregate expressions +# outside of the grouping columns if index columns are not nullable. +query I colnames,rowsort +SELECT x FROM uniq GROUP BY v ORDER BY v +---- +x +1 +2 +3 +NULL +5 +NULL +2 +NULL +1 +NULL +NULL +20 +NULL +NULL +NULL diff --git a/pkg/sql/opt/optbuilder/groupby.go b/pkg/sql/opt/optbuilder/groupby.go index b3c6fe61de8c..473ba42775fc 100644 --- a/pkg/sql/opt/optbuilder/groupby.go +++ b/pkg/sql/opt/optbuilder/groupby.go @@ -924,9 +924,12 @@ func newGroupingError(name tree.Name) error { } // allowImplicitGroupingColumn returns true if col is part of a table and the -// the groupby metadata indicates that we are grouping on the entire PK of that -// table. In that case, we can allow col as an "implicit" grouping column, even -// if it is not specified in the query. +// groupby metadata indicates that we are grouping on the entire PK, an entire +// unique index key, or an entire unique without index key of that table. In +// that case, we can allow col as an "implicit" grouping column, even if it is +// not specified in the query. +// In the unique index or unique without index cases, all key columns must be +// marked as NOT NULL to allow the implicit grouping. func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bool { md := b.factory.Metadata() colMeta := md.ColumnMeta(colID) @@ -935,6 +938,7 @@ func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bo } // Get all the PK columns. tab := md.Table(colMeta.Table) + tabMeta := md.TableMeta(colMeta.Table) var pkCols opt.ColSet if tab.IndexCount() == 0 { // Virtual tables have no indexes. @@ -949,5 +953,51 @@ func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bo for i := range groupingCols { pkCols.Remove(groupingCols[i].id) } - return pkCols.Empty() + if pkCols.Empty() { + return true + } + // Check UNIQUE WITHOUT INDEX constraints. + for i := 0; i < tab.UniqueCount(); i++ { + uniqueConstraint := tab.Unique(i) + var uniqueCols opt.ColSet + nullable := false + for j := 0; j < uniqueConstraint.ColumnCount(); j++ { + column := tab.Column(uniqueConstraint.ColumnOrdinal(tab, j)) + if column.IsNullable() { + nullable = true + } + columnID := tabMeta.MetaID.ColumnID(uniqueConstraint.ColumnOrdinal(tab, j)) + uniqueCols.Add(columnID) + } + if nullable { + // There may be duplicate rows with nulls in unique constraint columns, so + // we cannot treat the constraint as truly unique if any of its columns is + // nullable. + continue + } + for k := range groupingCols { + uniqueCols.Remove(groupingCols[k].id) + } + if uniqueCols.Empty() { + return true + } + } + // Check UNIQUE INDEX constraints. + for i := 1; i < tab.IndexCount(); i++ { + index := tab.Index(i) + if !index.IsUnique() || index.IsInverted() { + continue + } + // If any of the key columns is nullable, uniqueCols is suffixed with the + // primary key columns, so we don't have to explicitly check for nullable + // columns here. + uniqueCols := tabMeta.IndexKeyColumns(i) + for j := range groupingCols { + uniqueCols.Remove(groupingCols[j].id) + } + if uniqueCols.Empty() { + return true + } + } + return false }