From 4ac6bf32391e6bc59098b0c9c9d06441d4664342 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Sun, 1 Aug 2021 16:53:42 -0400 Subject: [PATCH] sql: do not collect stats for virtual columns Previously, incorrect stats were collected for virtual computed columns. This was because DistSQLPlanner plans table readers on the table's primary index which does not include virtual computed columns. These stats were likely never used by the optimizer where all stats originate from Scans. Scans only produce stats for the columns they produce and Scans cannot produce virtual columns. So even though stats for virtual columns were collected, they never propagated through statistics estimation. This commit disables stats collection for virtual columns to avoid confusion and future bugs resulting from incorrect stats. Fixes #68186 Release note: None --- pkg/sql/create_stats.go | 41 ++++++++++++++++--- .../testdata/logic_test/distsql_stats | 30 ++++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index 30786b93497e..dd0478ccdcb6 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -335,12 +335,25 @@ func createStatsDefaultColumns( // addIndexColumnStatsIfNotExists appends column stats for the given column // ID if they have not already been added. Histogram stats are collected for // every indexed column. - addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) { + addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) error { + col, err := desc.FindColumnWithID(colID) + if err != nil { + return err + } + + // Do not collect stats for virtual computed columns. DistSQLPlanner + // cannot currently collect stats for these columns because it plans + // table readers on the table's primary index which does not include + // virtual computed columns. + if col.IsVirtual() { + return nil + } + colList := []descpb.ColumnID{colID} // Check for existing stats and remember the requested stats. if !trackStatsIfNotExists(colList) { - return + return nil } colStat := jobspb.CreateStatsDetails_ColStat{ @@ -360,12 +373,18 @@ func createStatsDefaultColumns( colStat.HasHistogram = true colStats = append(colStats, colStat) } + + return nil } // Add column stats for the primary key. - for i := 0; i < desc.GetPrimaryIndex().NumColumns(); i++ { + primaryIdx := desc.GetPrimaryIndex() + for i := 0; i < primaryIdx.NumColumns(); i++ { // Generate stats for each column in the primary key. - addIndexColumnStatsIfNotExists(desc.GetPrimaryIndex().GetColumnID(i), false /* isInverted */) + err := addIndexColumnStatsIfNotExists(primaryIdx.GetColumnID(i), false /* isInverted */) + if err != nil { + return nil, err + } // Only collect multi-column stats if enabled. if i == 0 || !multiColEnabled { @@ -394,7 +413,9 @@ func createStatsDefaultColumns( isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED && colID == idx.InvertedColumnID() // Generate stats for each indexed column. - addIndexColumnStatsIfNotExists(colID, isInverted) + if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil { + return nil, err + } // Only collect multi-column stats if enabled. if j == 0 || !multiColEnabled { @@ -438,7 +459,9 @@ func createStatsDefaultColumns( return nil, err } isInverted := colinfo.ColumnTypeIsInvertedIndexable(col.GetType()) - addIndexColumnStatsIfNotExists(colID, isInverted) + if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil { + return nil, err + } } } } @@ -447,6 +470,12 @@ func createStatsDefaultColumns( nonIdxCols := 0 for i := 0; i < len(desc.PublicColumns()) && nonIdxCols < maxNonIndexCols; i++ { col := desc.PublicColumns()[i] + + // Do not collect stats for virtual computed columns. + if col.IsVirtual() { + continue + } + colList := []descpb.ColumnID{col.GetID()} if !trackStatsIfNotExists(colList) { diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 44e7d037eeb4..e19d7f02f29b 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -823,6 +823,36 @@ upper_bound range_rows distinct_range_rows equal_rows 2 0 0 1 3 0 0 1 +# Test that stats are not collected for virtual columns. +statement ok +CREATE TABLE virt ( + a INT, + v INT AS (a + 10) VIRTUAL, + INDEX (v) +) + +statement ok +INSERT INTO virt VALUES (1), (2), (3) + +statement ok +CREATE STATISTICS s FROM virt + +query TTIIB colnames,rowsort +SELECT + statistics_name, + column_names, + row_count, + null_count, + histogram_id IS NOT NULL AS has_histogram +FROM + [SHOW STATISTICS FOR TABLE virt] +ORDER BY + column_names::STRING, created +---- +statistics_name column_names row_count null_count has_histogram +s {a} 3 0 true +s {rowid} 3 0 true + # Test that non-index columns have histograms collected for them, with # up to 2 buckets. statement ok