Skip to content

Commit

Permalink
sql: do not collect statistics on virtual columns
Browse files Browse the repository at this point in the history
PR cockroachdb#68312 intended to update the behavior of  `CREATE STATISTICS` to
prevent statistics collection on virtual computed columns. However, it
failed to account for multi-column statistics and for
`CREATE STATISTICS` statements that explicitly reference virtual
columns. This commit accounts for these two cases.

This prevents internal errors from occuring when the system tries to
collect statistics on `NOT NULL` virtual columns. Virtual column values
are not included in the primary index. So when the statistics
job reads the primary index to sample the virtual column, it assumes the
value is null, which violates the column's `NOT NULL` constraint. This
violation causes an error.

Fixes cockroachdb#71080

Release note (bug fix): A bug has been fixed which caused internal
errors when collecting statistics on tables with virtual computed
columns.
  • Loading branch information
mgartner authored and ericharmeling committed Oct 20, 2021
1 parent a14ab7a commit e907fa9
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 4 deletions.
18 changes: 16 additions & 2 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro

columnIDs := make([]descpb.ColumnID, len(columns))
for i := range columns {
if columns[i].IsVirtual() {
return nil, pgerror.Newf(
pgcode.InvalidColumnReference,
"cannot create statistics on virtual column %q",
columns[i].ColName(),
)
}
columnIDs[i] = columns[i].GetID()
}
col, err := tableDesc.FindColumnWithID(columnIDs[0])
Expand Down Expand Up @@ -441,9 +448,16 @@ func createStatsDefaultColumns(
continue
}

colIDs := make([]descpb.ColumnID, j+1)
colIDs := make([]descpb.ColumnID, 0, j+1)
for k := 0; k <= j; k++ {
colIDs[k] = idx.GetKeyColumnID(k)
col, err := desc.FindColumnWithID(idx.GetKeyColumnID(k))
if err != nil {
return nil, err
}
if col.IsVirtual() {
continue
}
colIDs = append(colIDs, col.GetID())
}

// Check for existing stats and remember the requested stats.
Expand Down
41 changes: 39 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -824,11 +824,18 @@ upper_bound range_rows distinct_range_rows equal_rows
3 0 0 1

# Test that stats are not collected for virtual columns.
statement ok
SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true

statement ok
CREATE TABLE virt (
a INT,
b INT,
v INT AS (a + 10) VIRTUAL,
INDEX (v)
INDEX (v),
INDEX (a, v),
INDEX (a, v, b),
INDEX (a) WHERE v > 0
)

statement ok
Expand All @@ -850,7 +857,9 @@ ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a,b} 3 0 false
s {a} 3 0 true
s {b} 3 3 true
s {rowid} 3 0 true

# Test that stats are not collect for inaccessible virtual columns that
Expand All @@ -861,7 +870,9 @@ CREATE TABLE expression (
b INT,
j JSON,
INDEX a_plus_b ((a + b)),
INVERTED INDEX j_a ((j->'a'))
INDEX a_a_plus_b (a, (a + b)),
INVERTED INDEX j_a ((j->'a')),
INVERTED INDEX a_j_a (a, (j->'a'))
);
INSERT INTO expression VALUES (1, 1, '{"a": "b"}'), (2, 10, '{"c": "d"}'), (3, 1, '{"e": "f"}');

Expand Down Expand Up @@ -1037,6 +1048,7 @@ ORDER BY
column_names has_histogram
{id} true
{j} true
{s,j} false
{s} true

statement ok
Expand Down Expand Up @@ -1181,3 +1193,28 @@ CREATE TABLE t63387 (
);
INSERT INTO t63387 VALUES (1, '{}');
CREATE STATISTICS s FROM t63387;

# Regression test for #71080. Stats collection should succeed on tables with NOT
# NULL virtual columns.
statement ok
SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true

statement ok
CREATE TABLE t71080 (
k INT PRIMARY KEY,
a INT,
b INT NOT NULL AS (a + 10) VIRTUAL,
INDEX (a, b)
);

statement ok
INSERT INTO t71080 VALUES (1, 2);

statement ok
CREATE STATISTICS s FROM t71080;

statement error cannot create statistics on virtual column \"b\"
CREATE STATISTICS s ON b FROM t71080;

statement error cannot create statistics on virtual column \"b\"
CREATE STATISTICS s ON a, b FROM t71080;

0 comments on commit e907fa9

Please sign in to comment.