Skip to content

Commit

Permalink
Merge #68260 #68312
Browse files Browse the repository at this point in the history
68260: multitenant: move tenantcostmodel r=RaduBerinde a=RaduBerinde

Move `tenantcostmodel` from `pkg/util` to the new `pkg/multitenant`.

Release note: None

68312: sql: do not collect stats for virtual columns r=mgartner a=mgartner

Previously, incorrect stats were collected for virtual computed columns.
This was because DistSQLPlanner plans table readers on the table's
primary index which does not include virtual computed columns.

These stats were likely never used by the optimizer where all stats
originate from Scans. Scans only produce stats for the columns they
produce and Scans cannot produce virtual columns. So even though stats
for virtual columns were collected, they never propagated through
statistics estimation.

This commit disables stats collection for virtual columns to avoid
confusion and future bugs resulting from incorrect stats.

Fixes #68186

Release note: None

Co-authored-by: Radu Berinde <[email protected]>
Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
3 people committed Aug 2, 2021
3 parents 2317cc8 + 675af34 + c20e1e5 commit 6eafde9
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 22 deletions.
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/tenantrate/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/multitenant",
"//pkg/multitenant/tenantcostmodel",
"//pkg/roachpb:with-mocks",
"//pkg/settings",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/quotapool",
"//pkg/util/syncutil",
"//pkg/util/tenantcostmodel",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
],
Expand All @@ -33,13 +33,13 @@ go_test(
data = glob(["testdata/**"]),
deps = [
":tenantrate",
"//pkg/multitenant/tenantcostmodel",
"//pkg/roachpb:with-mocks",
"//pkg/settings/cluster",
"//pkg/testutils",
"//pkg/testutils/metrictestutils",
"//pkg/util/leaktest",
"//pkg/util/metric",
"//pkg/util/tenantcostmodel",
"//pkg/util/timeutil",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_cockroachdb_errors//:errors",
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/tenantrate/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ package tenantrate
import (
"context"

"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/util/quotapool"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/tenantrate/limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ import (
"sync"
"time"

"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/quotapool"
"github.com/cockroachdb/cockroach/pkg/util/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/tenantrate/limiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/tenantrate"
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/metrictestutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/errors"
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/tenantrate/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
package tenantrate

import (
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcostmodel"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/util/tenantcostmodel"
)

// Config contains the configuration of the rate limiter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ go_library(
"model.go",
"settings.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/util/tenantcostmodel",
importpath = "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcostmodel",
visibility = ["//visibility:public"],
deps = ["//pkg/settings"],
)
File renamed without changes.
File renamed without changes.
41 changes: 35 additions & 6 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,25 @@ func createStatsDefaultColumns(
// addIndexColumnStatsIfNotExists appends column stats for the given column
// ID if they have not already been added. Histogram stats are collected for
// every indexed column.
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) {
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) error {
col, err := desc.FindColumnWithID(colID)
if err != nil {
return err
}

// Do not collect stats for virtual computed columns. DistSQLPlanner
// cannot currently collect stats for these columns because it plans
// table readers on the table's primary index which does not include
// virtual computed columns.
if col.IsVirtual() {
return nil
}

colList := []descpb.ColumnID{colID}

// Check for existing stats and remember the requested stats.
if !trackStatsIfNotExists(colList) {
return
return nil
}

colStat := jobspb.CreateStatsDetails_ColStat{
Expand All @@ -359,12 +372,18 @@ func createStatsDefaultColumns(
colStat.HasHistogram = true
colStats = append(colStats, colStat)
}

return nil
}

// Add column stats for the primary key.
for i := 0; i < desc.GetPrimaryIndex().NumKeyColumns(); i++ {
primaryIdx := desc.GetPrimaryIndex()
for i := 0; i < primaryIdx.NumKeyColumns(); i++ {
// Generate stats for each column in the primary key.
addIndexColumnStatsIfNotExists(desc.GetPrimaryIndex().GetKeyColumnID(i), false /* isInverted */)
err := addIndexColumnStatsIfNotExists(primaryIdx.GetKeyColumnID(i), false /* isInverted */)
if err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if i == 0 || !multiColEnabled {
Expand Down Expand Up @@ -393,7 +412,9 @@ func createStatsDefaultColumns(
isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED && colID == idx.InvertedColumnID()

// Generate stats for each indexed column.
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if j == 0 || !multiColEnabled {
Expand Down Expand Up @@ -437,7 +458,9 @@ func createStatsDefaultColumns(
return nil, err
}
isInverted := colinfo.ColumnTypeIsInvertedIndexable(col.GetType())
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}
}
}
}
Expand All @@ -446,6 +469,12 @@ func createStatsDefaultColumns(
nonIdxCols := 0
for i := 0; i < len(desc.PublicColumns()) && nonIdxCols < maxNonIndexCols; i++ {
col := desc.PublicColumns()[i]

// Do not collect stats for virtual computed columns.
if col.IsVirtual() {
continue
}

colList := []descpb.ColumnID{col.GetID()}

if !trackStatsIfNotExists(colList) {
Expand Down
46 changes: 37 additions & 9 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -826,8 +826,38 @@ upper_bound range_rows distinct_range_rows equal_rows
statement ok
SET experimental_enable_expression_indexes=true

# Test that inaccessible columns that represent expression indexes have
# histograms collected for them.
# Test that stats are not collected for virtual columns.
statement ok
CREATE TABLE virt (
a INT,
v INT AS (a + 10) VIRTUAL,
INDEX (v)
)

statement ok
INSERT INTO virt VALUES (1), (2), (3)

statement ok
CREATE STATISTICS s FROM virt

query TTIIB colnames,rowsort
SELECT
statistics_name,
column_names,
row_count,
null_count,
histogram_id IS NOT NULL AS has_histogram
FROM
[SHOW STATISTICS FOR TABLE virt]
ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {rowid} 3 0 true

# Test that stats are not collect for inaccessible virtual columns that
# represent expression indexes.
statement ok
CREATE TABLE expression (
a INT,
Expand All @@ -853,13 +883,11 @@ FROM
ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {crdb_internal_idx_expr_1} 3 3 false
s {crdb_internal_idx_expr} 3 3 true
s {j} 3 0 false
s {rowid} 3 0 true
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {j} 3 0 false
s {rowid} 3 0 true

# Test that non-index columns have histograms collected for them, with
# up to 2 buckets.
Expand Down

0 comments on commit 6eafde9

Please sign in to comment.