Skip to content

Commit

Permalink
sql: do not collect stats for virtual columns
Browse files Browse the repository at this point in the history
Previously, incorrect stats were collected for virtual computed columns.
This was because DistSQLPlanner plans table readers on the table's
primary index which does not include virtual computed columns.

These stats were likely never used by the optimizer where all stats
originate from Scans. Scans only produce stats for the columns they
produce and Scans cannot produce virtual columns. So even though stats
for virtual columns were collected, they never propagated through
statistics estimation.

This commit disables stats collection for virtual columns to avoid
confusion and future bugs resulting from incorrect stats.

Fixes cockroachdb#68186

Release note: None
  • Loading branch information
mgartner committed Oct 7, 2021
1 parent 9bf0c8e commit 4ac6bf3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 6 deletions.
41 changes: 35 additions & 6 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,25 @@ func createStatsDefaultColumns(
// addIndexColumnStatsIfNotExists appends column stats for the given column
// ID if they have not already been added. Histogram stats are collected for
// every indexed column.
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) {
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) error {
col, err := desc.FindColumnWithID(colID)
if err != nil {
return err
}

// Do not collect stats for virtual computed columns. DistSQLPlanner
// cannot currently collect stats for these columns because it plans
// table readers on the table's primary index which does not include
// virtual computed columns.
if col.IsVirtual() {
return nil
}

colList := []descpb.ColumnID{colID}

// Check for existing stats and remember the requested stats.
if !trackStatsIfNotExists(colList) {
return
return nil
}

colStat := jobspb.CreateStatsDetails_ColStat{
Expand All @@ -360,12 +373,18 @@ func createStatsDefaultColumns(
colStat.HasHistogram = true
colStats = append(colStats, colStat)
}

return nil
}

// Add column stats for the primary key.
for i := 0; i < desc.GetPrimaryIndex().NumColumns(); i++ {
primaryIdx := desc.GetPrimaryIndex()
for i := 0; i < primaryIdx.NumColumns(); i++ {
// Generate stats for each column in the primary key.
addIndexColumnStatsIfNotExists(desc.GetPrimaryIndex().GetColumnID(i), false /* isInverted */)
err := addIndexColumnStatsIfNotExists(primaryIdx.GetColumnID(i), false /* isInverted */)
if err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if i == 0 || !multiColEnabled {
Expand Down Expand Up @@ -394,7 +413,9 @@ func createStatsDefaultColumns(
isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED && colID == idx.InvertedColumnID()

// Generate stats for each indexed column.
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if j == 0 || !multiColEnabled {
Expand Down Expand Up @@ -438,7 +459,9 @@ func createStatsDefaultColumns(
return nil, err
}
isInverted := colinfo.ColumnTypeIsInvertedIndexable(col.GetType())
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}
}
}
}
Expand All @@ -447,6 +470,12 @@ func createStatsDefaultColumns(
nonIdxCols := 0
for i := 0; i < len(desc.PublicColumns()) && nonIdxCols < maxNonIndexCols; i++ {
col := desc.PublicColumns()[i]

// Do not collect stats for virtual computed columns.
if col.IsVirtual() {
continue
}

colList := []descpb.ColumnID{col.GetID()}

if !trackStatsIfNotExists(colList) {
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,36 @@ upper_bound range_rows distinct_range_rows equal_rows
2 0 0 1
3 0 0 1

# Test that stats are not collected for virtual columns.
statement ok
CREATE TABLE virt (
a INT,
v INT AS (a + 10) VIRTUAL,
INDEX (v)
)

statement ok
INSERT INTO virt VALUES (1), (2), (3)

statement ok
CREATE STATISTICS s FROM virt

query TTIIB colnames,rowsort
SELECT
statistics_name,
column_names,
row_count,
null_count,
histogram_id IS NOT NULL AS has_histogram
FROM
[SHOW STATISTICS FOR TABLE virt]
ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {rowid} 3 0 true

# Test that non-index columns have histograms collected for them, with
# up to 2 buckets.
statement ok
Expand Down

0 comments on commit 4ac6bf3

Please sign in to comment.