From e920dd93aaf10280234d42a9bf88f2d240604b4a Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Mon, 4 Oct 2021 16:54:41 -0400 Subject: [PATCH] sql: do not collect statistics on virtual columns PR #68312 intended to update the behavior of `CREATE STATISTICS` to prevent statistics collection on virtual computed columns. However, it failed to account for multi-column statistics and for `CREATE STATISTICS` statements that explicitly reference virtual columns. This commit accounts for these two cases. This prevents internal errors from occuring when the system tries to collect statistics on `NOT NULL` virtual columns. Virtual column values are not included in the primary index. So when the statistics job reads the primary index to sample the virtual column, it assumes the value is null, which violates the column's `NOT NULL` constraint. This violation causes an error. Fixes #71080 Release note (bug fix): A bug has been fixed which caused internal errors when collecting statistics on tables with virtual computed columns. --- pkg/sql/create_stats.go | 18 ++++++++- .../testdata/logic_test/distsql_stats | 37 ++++++++++++++++++- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index dd0478ccdcb6..aae704326690 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -227,6 +227,13 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro columnIDs := make([]descpb.ColumnID, len(columns)) for i := range columns { + if columns[i].IsVirtual() { + return nil, pgerror.Newf( + pgcode.InvalidColumnReference, + "cannot create statistics on virtual column %q", + columns[i].ColName(), + ) + } columnIDs[i] = columns[i].GetID() } col, err := tableDesc.FindColumnWithID(columnIDs[0]) @@ -422,9 +429,16 @@ func createStatsDefaultColumns( continue } - colIDs := make([]descpb.ColumnID, j+1) + colIDs := make([]descpb.ColumnID, 0, j+1) for k := 0; k <= j; k++ { - colIDs[k] = idx.GetColumnID(k) + col, err := desc.FindColumnWithID(idx.GetColumnID(k)) + if err != nil { + return nil, err + } + if col.IsVirtual() { + continue + } + colIDs = append(colIDs, col.GetID()) } // Check for existing stats and remember the requested stats. diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index e19d7f02f29b..44aa546c448d 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -824,11 +824,18 @@ upper_bound range_rows distinct_range_rows equal_rows 3 0 0 1 # Test that stats are not collected for virtual columns. +statement ok +SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true + statement ok CREATE TABLE virt ( a INT, + b INT, v INT AS (a + 10) VIRTUAL, - INDEX (v) + INDEX (v), + INDEX (a, v), + INDEX (a, v, b), + INDEX (a) WHERE v > 0 ) statement ok @@ -850,7 +857,9 @@ ORDER BY column_names::STRING, created ---- statistics_name column_names row_count null_count has_histogram +s {a,b} 3 0 false s {a} 3 0 true +s {b} 3 3 true s {rowid} 3 0 true # Test that non-index columns have histograms collected for them, with @@ -1004,6 +1013,7 @@ ORDER BY column_names has_histogram {id} true {j} true +{s,j} false {s} true # Regression test for #56356. Histograms on all-null columns should not cause @@ -1121,3 +1131,28 @@ CREATE TABLE t63387 ( ); INSERT INTO t63387 VALUES (1, '{}'); CREATE STATISTICS s FROM t63387; + +# Regression test for #71080. Stats collection should succeed on tables with NOT +# NULL virtual columns. +statement ok +SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true + +statement ok +CREATE TABLE t71080 ( + k INT PRIMARY KEY, + a INT, + b INT NOT NULL AS (a + 10) VIRTUAL, + INDEX (a, b) +); + +statement ok +INSERT INTO t71080 VALUES (1, 2); + +statement ok +CREATE STATISTICS s FROM t71080; + +statement error cannot create statistics on virtual column \"b\" +CREATE STATISTICS s ON b FROM t71080; + +statement error cannot create statistics on virtual column \"b\" +CREATE STATISTICS s ON a, b FROM t71080;