Skip to content

Commit

Permalink
sql: do not collect statistics on virtual columns
Browse files Browse the repository at this point in the history
PR cockroachdb#68312 intended to update the behavior of  `CREATE STATISTICS` to
prevent statistics collection on virtual computed columns. However, it
failed to account for multi-column statistics and for
`CREATE STATISTICS` statements that explicitly reference virtual
columns. This commit accounts for these two cases.

This prevents internal errors from occuring when the system tries to
collect statistics on `NOT NULL` virtual columns. Virtual column values
are not included in the primary index. So when the statistics
job reads the primary index to sample the virtual column, it assumes the
value is null, which violates the column's `NOT NULL` constraint. This
violation causes an error.

Fixes cockroachdb#71080

Release note (bug fix): A bug has been fixed which caused internal
errors when collecting statistics on tables with virtual computed
columns.
  • Loading branch information
mgartner committed Oct 7, 2021
1 parent 4ac6bf3 commit e920dd9
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
18 changes: 16 additions & 2 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,13 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro

columnIDs := make([]descpb.ColumnID, len(columns))
for i := range columns {
if columns[i].IsVirtual() {
return nil, pgerror.Newf(
pgcode.InvalidColumnReference,
"cannot create statistics on virtual column %q",
columns[i].ColName(),
)
}
columnIDs[i] = columns[i].GetID()
}
col, err := tableDesc.FindColumnWithID(columnIDs[0])
Expand Down Expand Up @@ -422,9 +429,16 @@ func createStatsDefaultColumns(
continue
}

colIDs := make([]descpb.ColumnID, j+1)
colIDs := make([]descpb.ColumnID, 0, j+1)
for k := 0; k <= j; k++ {
colIDs[k] = idx.GetColumnID(k)
col, err := desc.FindColumnWithID(idx.GetColumnID(k))
if err != nil {
return nil, err
}
if col.IsVirtual() {
continue
}
colIDs = append(colIDs, col.GetID())
}

// Check for existing stats and remember the requested stats.
Expand Down
37 changes: 36 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -824,11 +824,18 @@ upper_bound range_rows distinct_range_rows equal_rows
3 0 0 1

# Test that stats are not collected for virtual columns.
statement ok
SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true

statement ok
CREATE TABLE virt (
a INT,
b INT,
v INT AS (a + 10) VIRTUAL,
INDEX (v)
INDEX (v),
INDEX (a, v),
INDEX (a, v, b),
INDEX (a) WHERE v > 0
)

statement ok
Expand All @@ -850,7 +857,9 @@ ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a,b} 3 0 false
s {a} 3 0 true
s {b} 3 3 true
s {rowid} 3 0 true

# Test that non-index columns have histograms collected for them, with
Expand Down Expand Up @@ -1004,6 +1013,7 @@ ORDER BY
column_names has_histogram
{id} true
{j} true
{s,j} false
{s} true

# Regression test for #56356. Histograms on all-null columns should not cause
Expand Down Expand Up @@ -1121,3 +1131,28 @@ CREATE TABLE t63387 (
);
INSERT INTO t63387 VALUES (1, '{}');
CREATE STATISTICS s FROM t63387;

# Regression test for #71080. Stats collection should succeed on tables with NOT
# NULL virtual columns.
statement ok
SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true

statement ok
CREATE TABLE t71080 (
k INT PRIMARY KEY,
a INT,
b INT NOT NULL AS (a + 10) VIRTUAL,
INDEX (a, b)
);

statement ok
INSERT INTO t71080 VALUES (1, 2);

statement ok
CREATE STATISTICS s FROM t71080;

statement error cannot create statistics on virtual column \"b\"
CREATE STATISTICS s ON b FROM t71080;

statement error cannot create statistics on virtual column \"b\"
CREATE STATISTICS s ON a, b FROM t71080;

0 comments on commit e920dd9

Please sign in to comment.