From dfc99843deb09975a6ca8fb45bac4aa0b28d5383 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Wed, 16 Mar 2022 14:57:49 -0700 Subject: [PATCH] stats: table-level setting to turn auto stats collection on/off Fixes #40989 Previously, there was no way to enable or disable automatic statistics collection at the table level. It could only be turned on or off via the `sql.stats.automatic_collection.enabled` cluster setting. This was inadequate because statistics collection can be expensive for large tables, and it would be desirable to defer collection until after data is finished loading, or in off hours. Also, small tables which are frequently updated may trigger statistics collection leading to unnecessary overhead and/or unpredictable query plan changes. To address this, this patch adds support for setting of the following cluster settings at the table level: ``` sql.stats.automatic_collection.enabled sql.stats.automatic_collection.fraction_stale_rows sql.stats.automatic_collection.min_stale_rows ``` for example: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true); ALTER TABLE t1 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1, "sql.stats.automatic_collection.min_stale_rows" = 2000); ``` The table-level setting takes precedence over the cluster setting. Release justification: Low risk fix for missing fine-grained control over automatic statistics collection. Release note (sql change): Automatic statistics collection can now be enabled or disabled for individual tables, taking precedence over the cluster setting, for example: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true); ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = false); ALTER TABLE t1 RESET ("sql.stats.automatic_collection.enabled"); ``` RESET removes the setting value entirely, in which case the cluster setting of the same name is in effect for the table. Cluster settings `sql.stats.automatic_collection.fraction_stale_rows` and `sql.stats.automatic_collection.min_stale_rows` can now also be set at the table level, either at table creation time, or later, independent of whether auto stats is enabled: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1, "sql.stats.automatic_collection.min_stale_rows" = 2000); CREATE TABLE t1 (a INT, b INT) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 1000000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.05 ); ``` Tables that have auto stats collection explicitly enabled or disabled may be discovered by querying system tables, for example, find all tables with auto stats enabled at the table level: ``` SELECT tbl.database_name || '.' || tbl.schema_name || '.' || tbl.name FROM crdb_internal.tables AS tbl INNER JOIN system.descriptor AS d ON d.id = tbl.table_id WHERE tbl.database_name IS NOT NULL AND tbl.database_name <> '%s' AND tbl.drop_time IS NULL AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true'; ?column? -------------------- defaultdb.mws.t1 ``` --- pkg/settings/cluster/cluster_settings.go | 14 ++ pkg/sql/catalog/catpb/catalog.proto | 12 + pkg/sql/catalog/descpb/structured.go | 49 ++++ pkg/sql/catalog/descpb/structured.proto | 5 +- pkg/sql/catalog/descriptor.go | 19 ++ pkg/sql/catalog/tabledesc/structured.go | 39 +++ pkg/sql/catalog/tabledesc/validate.go | 64 +++++ pkg/sql/catalog/tabledesc/validate_test.go | 156 +++++++++++- pkg/sql/distsql_plan_stats.go | 3 + .../logictest/testdata/logic_test/alter_table | 111 ++++++++ .../testdata/logic_test/create_table | 64 +++++ pkg/sql/paramparse/BUILD.bazel | 2 + pkg/sql/paramparse/paramobserver.go | 220 ++++++++++++++++ pkg/sql/set_cluster_setting.go | 3 +- pkg/sql/stats/automatic_stats.go | 236 +++++++++++++++--- pkg/sql/stats/automatic_stats_test.go | 37 +++ 16 files changed, 1000 insertions(+), 34 deletions(-) diff --git a/pkg/settings/cluster/cluster_settings.go b/pkg/settings/cluster/cluster_settings.go index b8f863e1fd3a..e4c976d26a49 100644 --- a/pkg/settings/cluster/cluster_settings.go +++ b/pkg/settings/cluster/cluster_settings.go @@ -78,6 +78,20 @@ func TelemetryOptOut() bool { // (for example, a CLI subcommand that does not connect to a cluster). var NoSettings *Settings // = nil +const ( + // AutoStatsClusterSettingName is the name of the automatic stats collection + // enabled cluster setting. + AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" + + // AutoStatsMinStaleSettingName is the name of the automatic stats collection + // min stale rows cluster setting. + AutoStatsMinStaleSettingName = "sql.stats.automatic_collection.min_stale_rows" + + // AutoStatsFractionStaleSettingName is the name of the automatic stats + // collection fraction stale rows cluster setting. + AutoStatsFractionStaleSettingName = "sql.stats.automatic_collection.fraction_stale_rows" +) + // CPUProfileType tracks whether a CPU profile is in progress. type CPUProfileType int32 diff --git a/pkg/sql/catalog/catpb/catalog.proto b/pkg/sql/catalog/catpb/catalog.proto index c38b9ecfb88f..a52ad2699ad1 100644 --- a/pkg/sql/catalog/catpb/catalog.proto +++ b/pkg/sql/catalog/catpb/catalog.proto @@ -217,3 +217,15 @@ message RowLevelTTL { // the relation name. optional bool label_metrics = 10 [(gogoproto.nullable) = false]; } + +// ClusterSettingsForTable represents cluster settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +message ClusterSettingsForTable { + option (gogoproto.equal) = true; + optional bool sql_stats_automatic_collection_enabled = 1; + optional int64 sql_stats_automatic_collection_min_stale_rows = 2; + optional double sql_stats_automatic_collection_fraction_stale_rows = 3; +} + + diff --git a/pkg/sql/catalog/descpb/structured.go b/pkg/sql/catalog/descpb/structured.go index 7d8cfac634ca..5d81f2a99463 100644 --- a/pkg/sql/catalog/descpb/structured.go +++ b/pkg/sql/catalog/descpb/structured.go @@ -273,6 +273,55 @@ func (desc *TableDescriptor) Persistence() tree.Persistence { return tree.PersistencePermanent } +// AutoStatsCollectionEnabled indicates if automatic statistics collection is +// explicitly enabled or disabled for this table. If ok is true, then +// enabled==false means auto stats collection is off for this table, and if +// true, auto stats are on for this table. If ok is false, there is no setting +// for this table. +func (desc *TableDescriptor) AutoStatsCollectionEnabled() (enabled bool, ok bool) { + if desc.NoClusterSettingsForTable() { + return false, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled == nil { + return false, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, true +} + +// AutoStatsMinStaleRows indicates the setting of +// sql.stats.automatic_collection.min_stale_rows for this table. +// If ok is true, then the minStaleRows value is valid, otherwise this has not +// been set at the table level. +func (desc *TableDescriptor) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + if desc.NoClusterSettingsForTable() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, true +} + +// AutoStatsFractionStaleRows indicates the setting of +// sql.stats.automatic_collection.fraction_stale_rows for this table. +// If ok is true, then the fractionStaleRows value is valid, otherwise this has +// not been set at the table level. +func (desc *TableDescriptor) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + if desc.NoClusterSettingsForTable() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, true +} + +// NoClusterSettingsForTable is true if no cluster settings are set at the +// table level for the given table. +func (desc *TableDescriptor) NoClusterSettingsForTable() bool { + return desc.ClusterSettingsForTable == nil +} + // IsVirtualTable returns true if the TableDescriptor describes a // virtual Table (like the information_schema tables) and thus doesn't // need to be physically stored. diff --git a/pkg/sql/catalog/descpb/structured.proto b/pkg/sql/catalog/descpb/structured.proto index b64ba880e9a6..649220f169cd 100644 --- a/pkg/sql/catalog/descpb/structured.proto +++ b/pkg/sql/catalog/descpb/structured.proto @@ -1198,7 +1198,10 @@ message TableDescriptor { optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false, (gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"]; - // Next ID: 51 + // ClusterSettingsForTable are cluster settings specified at the table level. + optional cockroach.sql.catalog.catpb.ClusterSettingsForTable cluster_settings_for_table = 51 [(gogoproto.customname)="ClusterSettingsForTable"]; + + // Next ID: 52 } // SurvivalGoal is the survival goal for a database. diff --git a/pkg/sql/catalog/descriptor.go b/pkg/sql/catalog/descriptor.go index e56e2b07d974..5d2965c9731a 100644 --- a/pkg/sql/catalog/descriptor.go +++ b/pkg/sql/catalog/descriptor.go @@ -678,6 +678,25 @@ type TableDescriptor interface { GetExcludeDataFromBackup() bool // GetStorageParams returns a list of storage parameters for the table. GetStorageParams(spaceBetweenEqual bool) []string + // NoClusterSettingsForTable is true if no cluster settings are set at the + // table level for the given table. + NoClusterSettingsForTable() bool + // AutoStatsCollectionEnabled indicates if automatic statistics collection is + // explicitly enabled or disabled for this table. If ok is true, then + // enabled==false means auto stats collection is off for this table, and if + // true, auto stats are on for this table. If ok is false, there is no setting + // for this table. + AutoStatsCollectionEnabled() (enabled bool, ok bool) + // AutoStatsMinStaleRows indicates the setting of + // sql.stats.automatic_collection.min_stale_rows for this table. + // If ok is true, then the minStaleRows value is valid, otherwise this has not + // been set at the table level. + AutoStatsMinStaleRows() (minStaleRows int64, ok bool) + // AutoStatsFractionStaleRows indicates the setting of + // sql.stats.automatic_collection.fraction_stale_rows for this table. + // If ok is true, then the fractionStaleRows value is valid, otherwise this has + // not been set at the table level. + AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) } // TypeDescriptor will eventually be called typedesc.Descriptor. diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index dc21740b7076..89289332cd46 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "sort" + "strconv" "strings" "github.com/cockroachdb/cockroach/pkg/clusterversion" @@ -2565,6 +2566,24 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { if exclude := desc.GetExcludeDataFromBackup(); exclude { appendStorageParam(`exclude_data_from_backup`, `true`) } + if settings := desc.ClusterSettingsForTable; settings != nil { + // These need to be wrapped in double-quotes because they contain '.' chars. + if settings.SqlStatsAutomaticCollectionEnabled != nil { + value := *settings.SqlStatsAutomaticCollectionEnabled + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsClusterSettingName), + fmt.Sprintf("%v", value)) + } + if settings.SqlStatsAutomaticCollectionMinStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionMinStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsMinStaleSettingName), + strconv.FormatInt(value, 10)) + } + if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionFractionStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsFractionStaleSettingName), + fmt.Sprintf("%g", value)) + } + } return storageParams } @@ -2583,6 +2602,26 @@ func (desc *wrapper) GetMultiRegionEnumDependencyIfExists() bool { return false } +// NoClusterSettingsForTable implements the TableDescriptor interface. +func (desc *wrapper) NoClusterSettingsForTable() bool { + return desc.TableDesc().NoClusterSettingsForTable() +} + +// AutoStatsCollectionEnabled implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsCollectionEnabled() (enabled bool, ok bool) { + return desc.TableDesc().AutoStatsCollectionEnabled() +} + +// AutoStatsMinStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + return desc.TableDesc().AutoStatsMinStaleRows() +} + +// AutoStatsFractionStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + return desc.TableDesc().AutoStatsFractionStaleRows() +} + // SetTableLocalityRegionalByTable sets the descriptor's locality config to // regional at the table level in the supplied region. An empty region name // (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in diff --git a/pkg/sql/catalog/tabledesc/validate.go b/pkg/sql/catalog/tabledesc/validate.go index 599d96482bcb..7aad00956e1f 100644 --- a/pkg/sql/catalog/tabledesc/validate.go +++ b/pkg/sql/catalog/tabledesc/validate.go @@ -15,6 +15,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege" @@ -520,6 +522,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) { } } + desc.validateClusterSettingsForTable(vea.Report) + if desc.IsSequence() { return } @@ -1501,3 +1505,63 @@ func (desc *wrapper) validatePartitioning() error { ) }) } + +// validateClusterSettingsForTable validates that any new cluster settings at +// the table level hold a valid value. +func (desc *wrapper) validateClusterSettingsForTable(errReportFn func(err error)) { + if desc.ClusterSettingsForTable == nil { + return + } + desc.validateBoolSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, + cluster.AutoStatsClusterSettingName) + desc.validateIntSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, + cluster.AutoStatsMinStaleSettingName, settings.NonNegativeInt) + desc.validateFloatSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, + cluster.AutoStatsFractionStaleSettingName, settings.NonNegativeFloat) +} + +func (desc *wrapper) verifyProperTableForStatsSetting( + errReportFn func(err error), settingName string, +) { + if desc.IsVirtualTable() { + errReportFn(errors.Newf("Setting %s may not be set on virtual table", settingName)) + } + if !desc.IsTable() { + errReportFn(errors.Newf("Setting %s may not be set on a view or sequence", settingName)) + } +} + +func (desc *wrapper) validateBoolSetting( + errReportFn func(err error), value *bool, settingName string, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + } +} + +func (desc *wrapper) validateIntSetting( + errReportFn func(err error), value *int64, settingName string, validateFunc func(v int64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := + validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} + +func (desc *wrapper) validateFloatSetting( + errReportFn func(err error), + value *float64, + settingName string, + validateFunc func(v float64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := + validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} diff --git a/pkg/sql/catalog/tabledesc/validate_test.go b/pkg/sql/catalog/tabledesc/validate_test.go index ab8c710bb31d..5c09da0a8404 100644 --- a/pkg/sql/catalog/tabledesc/validate_test.go +++ b/pkg/sql/catalog/tabledesc/validate_test.go @@ -131,6 +131,7 @@ var validationMap = []struct { "ExcludeDataFromBackup": {status: thisFieldReferencesNoObjects}, "NextConstraintID": {status: iSolemnlySwearThisFieldIsValidated}, "DeclarativeSchemaChangerState": {status: iSolemnlySwearThisFieldIsValidated}, + "ClusterSettingsForTable": {status: iSolemnlySwearThisFieldIsValidated}, }, }, { @@ -289,6 +290,14 @@ var validationMap = []struct { "DeclarativeSchemaChangerState": {status: thisFieldReferencesNoObjects}, }, }, + { + obj: catpb.ClusterSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{ + "SqlStatsAutomaticCollectionEnabled": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionMinStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionFractionStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + }, + }, } type validationStatusInfo struct { @@ -332,6 +341,9 @@ func TestValidateTableDesc(t *testing.T) { computedExpr := "1 + 1" generatedAsIdentitySequenceOptionExpr := " START 2 INCREMENT 3 CACHE 10" + boolTrue := true + negativeOne := int64(-1) + negativeOneFloat := float64(-1) testData := []struct { err string @@ -1869,6 +1881,79 @@ func TestValidateTableDesc(t *testing.T) { }, }, }, + {`Setting sql.stats.automatic_collection.enabled may not be set on virtual table`, + descpb.TableDescriptor{ + ID: catconstants.MinVirtualID, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + SequenceOpts: &descpb.TableDescriptor_SequenceOpts{ + Increment: 1, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }, + }, + {`invalid value for sql.stats.automatic_collection.min_stale_rows: cannot be set to a negative value: -1`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionMinStaleRows: &negativeOne}, + }}, + {`invalid value for sql.stats.automatic_collection.fraction_stale_rows: cannot set to a negative value: -1.000000`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionFractionStaleRows: &negativeOneFloat}, + }}, } for i, d := range testData { t.Run(d.err, func(t *testing.T) { @@ -1890,6 +1975,7 @@ func TestValidateTableDesc(t *testing.T) { func TestValidateCrossTableReferences(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() + boolTrue := true pointer := func(s string) *string { return &s @@ -1897,6 +1983,8 @@ func TestValidateCrossTableReferences(t *testing.T) { tests := []struct { err string + otherErr string + testName string desc descpb.TableDescriptor otherDescs []descpb.TableDescriptor }{ @@ -2116,6 +2204,7 @@ func TestValidateCrossTableReferences(t *testing.T) { }, // Views. { // 10 + err: ``, desc: descpb.TableDescriptor{ Name: "foo", ID: 51, @@ -2207,8 +2296,59 @@ func TestValidateCrossTableReferences(t *testing.T) { DependedOnBy: []descpb.TableDescriptor_Reference{{ID: 123}}, }}, }, - // Sequences. { // 13 + err: ``, + testName: `Stats setting in view`, + otherErr: `relation "bar" (52): Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + desc: descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + }, + otherDescs: []descpb.TableDescriptor{{ + Name: "bar", + ID: 52, + ParentID: 1, + FormatVersion: descpb.InterleavedFormatVersion, + UnexposedParentSchemaID: keys.PublicSchemaID, + ViewQuery: "SELECT * FROM foo", + DependsOn: []descpb.ID{51}, + NextColumnID: 2, + Columns: []descpb.ColumnDescriptor{ + {Name: "a", ID: 1, Type: types.Int}, + }, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + }, + // Sequences. + { // 14 err: `depended-on-by relation "bar" (52) does not have a column with ID 123`, desc: descpb.TableDescriptor{ Name: "foo", @@ -2259,6 +2399,20 @@ func TestValidateCrossTableReferences(t *testing.T) { } else if expectedErr != err.Error() { t.Errorf("%d: expected \"%s\", but found \"%s\"", i, expectedErr, err.Error()) } + // Validate other descriptors for specific test cases. + if test.testName == "Stats setting in view" { + expectedErr = test.otherErr + nonTableDesc := NewBuilder(&test.otherDescs[0]).BuildImmutable() + err := validate.Self(clusterversion.TestingClusterVersion, nonTableDesc) + if expectedErr == "" && err != nil { + t.Errorf("%d: expected success, but found error: \"%+v\"", i, err) + } else if expectedErr != "" && err == nil { + t.Errorf("%d: expected \"%s\", but found success: %+v", i, expectedErr, + test.otherDescs[0]) + } else if expectedErr != "" && expectedErr != err.Error() { + t.Errorf("%d: expected \"%s\", but found \"%+v\"", i, expectedErr, err) + } + } }) } } diff --git a/pkg/sql/distsql_plan_stats.go b/pkg/sql/distsql_plan_stats.go index fb097b34c726..6fcc49d9daba 100644 --- a/pkg/sql/distsql_plan_stats.go +++ b/pkg/sql/distsql_plan_stats.go @@ -212,6 +212,9 @@ func (dsp *DistSQLPlanner) createStatsPlan( var rowsExpected uint64 if len(tableStats) > 0 { overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV) + if autoStatsFractionStaleRowsForTable, ok := desc.TableDesc().AutoStatsFractionStaleRows(); ok { + overhead = autoStatsFractionStaleRowsForTable + } // Convert to a signed integer first to make the linter happy. rowsExpected = uint64(int64( // The total expected number of rows is the same number that was measured diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ab682db27ef5..b1c08a6fa75a 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -2364,3 +2364,114 @@ COMMIT; statement ok ROLLBACK; + +subtest table_settings + +statement ok +CREATE TABLE t5 (a int) + +# Turn on automatic stats collection +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +# Strings in settings should be converted to the proper data type. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 'false') + +# Verify automatic collection is disabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'false' +---- +{"sqlStatsAutomaticCollectionEnabled": false} + +# SHOW CREATE TABLE displays the value properly. +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = false) + +statement error pq: parameter "sql.stats.automatic_collection.enabled" requires a Boolean value +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 123) + +statement ok +ALTER TABLE t5 RESET ("sql.stats.automatic_collection.enabled") + +# Verify the automatic collection setting is removed. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{} + +statement error pq: invalid value for sql.stats.automatic_collection.fraction_stale_rows: could not parse "hello" as type float: strconv.ParseFloat: parsing "hello": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 'hello') + +statement error pq: invalid value for sql.stats.automatic_collection.min_stale_rows: could not parse "world" as type int: strconv.ParseInt: parsing "world": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.min_stale_rows" = 'world') + +# Verify strings can be converted to proper setting values. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = '0.15', + "sql.stats.automatic_collection.min_stale_rows" = '1234') + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.15, "sqlStatsAutomaticCollectionMinStaleRows": "1234"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 1234, "sql.stats.automatic_collection.fraction_stale_rows" = 0.15) diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index 039042219dd2..489cbe24c224 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -872,3 +872,67 @@ CREATE TABLE public.t_good_hash_indexes_2 ( crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL, CONSTRAINT t_good_hash_indexes_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5) ) + +subtest table_settings + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +statement ok +DROP TABLE t1 + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.fraction_stale_rows" = 0.5, + "sql.stats.automatic_collection.min_stale_rows" = 4000) + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.5, "sqlStatsAutomaticCollectionMinStaleRows": "4000"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t1] +---- +CREATE TABLE public.t1 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t1_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 4000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.5) + +statement ok +CREATE TABLE t11 (a int) WITH ("sql.stats.automatic_collection.enabled" = true, + "sql.stats.automatic_collection.fraction_stale_rows" = 1.797693134862315708145274237317043567981e+308, + "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807) + +# Using max values for auto stats table settings +query T +SELECT create_statement FROM [SHOW CREATE TABLE t11] +---- +CREATE TABLE public.t11 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t11_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807, "sql.stats.automatic_collection.fraction_stale_rows" = 1.7976931348623157e+308) diff --git a/pkg/sql/paramparse/BUILD.bazel b/pkg/sql/paramparse/BUILD.bazel index 7faf6d4c7020..23a703e3020d 100644 --- a/pkg/sql/paramparse/BUILD.bazel +++ b/pkg/sql/paramparse/BUILD.bazel @@ -12,6 +12,8 @@ go_library( deps = [ "//pkg/geo/geoindex", "//pkg/server/telemetry", + "//pkg/settings", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/tabledesc", diff --git a/pkg/sql/paramparse/paramobserver.go b/pkg/sql/paramparse/paramobserver.go index 584d33be5955..92256da58c5d 100644 --- a/pkg/sql/paramparse/paramobserver.go +++ b/pkg/sql/paramparse/paramobserver.go @@ -16,6 +16,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/server/telemetry" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" @@ -139,6 +141,34 @@ func boolFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (boo return bool(*s), nil } +func intFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (int64, error) { + intDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if intDatum, err = tree.ParseDInt(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsInt(evalCtx, key, intDatum) + if err != nil { + return 0, err + } + return s, nil +} + +func floatFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (float64, error) { + floatDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if floatDatum, err = tree.ParseDFloat(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsFloat(evalCtx, key, floatDatum) + if err != nil { + return 0, err + } + return s, nil +} + type tableParam struct { onSet func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error onReset func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error @@ -465,6 +495,18 @@ var tableParams = map[string]tableParam{ return nil }, }, + cluster.AutoStatsClusterSettingName: { + onSet: boolTableSettingFunc, + onReset: tableSettingResetFunc(boolSetting), + }, + cluster.AutoStatsMinStaleSettingName: { + onSet: intTableSettingFunc(settings.NonNegativeInt), + onReset: tableSettingResetFunc(intSetting), + }, + cluster.AutoStatsFractionStaleSettingName: { + onSet: floatTableSettingFunc(settings.NonNegativeFloat), + onReset: tableSettingResetFunc(floatSetting), + }, } func init() { @@ -508,6 +550,184 @@ func init() { } } +type settingDataType int + +const ( + boolSetting = iota + intSetting + floatSetting +) + +func boolTableSettingFunc( + ctx context.Context, + po *TableStorageParamObserver, + semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, + key string, + datum tree.Datum, +) error { + boolVal, err := boolFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **bool + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**bool); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &boolVal + return nil + } else if *setting == boolVal { + return nil + } + *setting = boolVal + return nil +} + +func intTableSettingFunc( + validateFunc func(v int64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + intVal, err := intFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **int64 + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**int64); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + if err = validateFunc(intVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &intVal + return nil + } else if *setting == intVal { + return nil + } + *setting = intVal + return nil + } +} + +func floatTableSettingFunc( + validateFunc func(v float64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + floatVal, err := floatFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **float64 + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**float64); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + if err = validateFunc(floatVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &floatVal + return nil + } else if *setting == floatVal { + return nil + } + *setting = floatVal + return nil + } +} + +func tableSettingResetFunc( + settingDataType settingDataType, +) func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error { + return func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error { + if po.tableDesc.ClusterSettingsForTable == nil { + return nil + } + settingPtr := settingValuePointer(key, po.tableDesc.ClusterSettingsForTable) + if settingPtr == nil { + return errors.Newf("unable to reset table setting %s", key) + } + var ok bool + switch settingDataType { + case boolSetting: + var setting **bool + if setting, ok = settingPtr.(**bool); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + case intSetting: + var setting **int64 + if setting, ok = settingPtr.(**int64); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + case floatSetting: + var setting **float64 + if setting, ok = settingPtr.(**float64); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + default: + return errors.Newf("unable to reset table setting %s", key) + } + return nil + } +} + +func settingValuePointer( + settingName string, clusterSettingsForTable *catpb.ClusterSettingsForTable, +) interface{} { + switch settingName { + case cluster.AutoStatsClusterSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled + case cluster.AutoStatsMinStaleSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows + case cluster.AutoStatsFractionStaleSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows + } + return nil +} + // onSet implements the StorageParamObserver interface. func (po *TableStorageParamObserver) onSet( ctx context.Context, diff --git a/pkg/sql/set_cluster_setting.go b/pkg/sql/set_cluster_setting.go index 7e2df6c23ef5..095ea5a09cb8 100644 --- a/pkg/sql/set_cluster_setting.go +++ b/pkg/sql/set_cluster_setting.go @@ -36,7 +36,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/sessioninit" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" - "github.com/cockroachdb/cockroach/pkg/sql/stats" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -226,7 +225,7 @@ func (n *setClusterSettingNode) startExec(params runParams) error { // Report tracked cluster settings via telemetry. // TODO(justin): implement a more general mechanism for tracking these. switch n.name { - case stats.AutoStatsClusterSettingName: + case cluster.AutoStatsClusterSettingName: switch expectedEncodedValue { case "true": telemetry.Inc(sqltelemetry.TurnAutoStatsOnUseCounter) diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 36c9093d2210..d374f05a6c2f 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -34,15 +34,11 @@ import ( "github.com/cockroachdb/errors" ) -// AutoStatsClusterSettingName is the name of the automatic stats collection -// cluster setting. -const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" - // AutomaticStatisticsClusterMode controls the cluster setting for enabling // automatic table statistics collection. var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( settings.TenantWritable, - AutoStatsClusterSettingName, + cluster.AutoStatsClusterSettingName, "automatic statistics collection mode", true, ).WithPublic() @@ -81,7 +77,7 @@ var AutomaticStatisticsMaxIdleTime = settings.RegisterFloatSetting( var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { s := settings.RegisterFloatSetting( settings.TenantWritable, - "sql.stats.automatic_collection.fraction_stale_rows", + cluster.AutoStatsFractionStaleSettingName, "target fraction of stale rows per table that will trigger a statistics refresh", 0.2, settings.NonNegativeFloat, @@ -96,7 +92,7 @@ var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { s := settings.RegisterIntSetting( settings.TenantWritable, - "sql.stats.automatic_collection.min_stale_rows", + cluster.AutoStatsMinStaleSettingName, "target minimum number of stale rows per table that will trigger a statistics refresh", 500, settings.NonNegativeInt, @@ -212,6 +208,18 @@ type Refresher struct { // mutationCounts contains aggregated mutation counts for each table that // have yet to be processed by the refresher. mutationCounts map[descpb.ID]int64 + + // autoStatsEnabled is true if auto stats collection is explicitly enabled for + // the given table, which may override the cluster setting. + autoStatsEnabled map[descpb.ID]bool + + // autoStatsFractionStaleRows maps TableID to the table-level setting of + // sql.stats.automatic_collection.fraction_stale_rows, if it's been set. + autoStatsFractionStaleRows map[descpb.ID]float64 + + // autoStatsMinStaleRows maps TableID to the table-level setting of + // sql.stats.automatic_collection.min_stale_rows, if it's been set. + autoStatsMinStaleRows map[descpb.ID]int64 } // mutation contains metadata about a SQL mutation and is the message passed to @@ -219,6 +227,10 @@ type Refresher struct { type mutation struct { tableID descpb.ID rowsAffected int + // Table-level settings which may override cluster settings + autoStatsEnabled *bool + autoStatsFractionStaleRows *float64 + autoStatsMinStaleRows *int64 } // MakeRefresher creates a new Refresher. @@ -232,15 +244,18 @@ func MakeRefresher( randSource := rand.NewSource(rand.Int63()) return &Refresher{ - AmbientContext: ambientCtx, - st: st, - ex: ex, - cache: cache, - randGen: makeAutoStatsRand(randSource), - mutations: make(chan mutation, refreshChanBufferLen), - asOfTime: asOfTime, - extraTime: time.Duration(rand.Int63n(int64(time.Hour))), - mutationCounts: make(map[descpb.ID]int64, 16), + AmbientContext: ambientCtx, + st: st, + ex: ex, + cache: cache, + randGen: makeAutoStatsRand(randSource), + mutations: make(chan mutation, refreshChanBufferLen), + asOfTime: asOfTime, + extraTime: time.Duration(rand.Int63n(int64(time.Hour))), + mutationCounts: make(map[descpb.ID]int64, 16), + autoStatsEnabled: make(map[descpb.ID]bool), + autoStatsFractionStaleRows: make(map[descpb.ID]float64), + autoStatsMinStaleRows: make(map[descpb.ID]int64), } } @@ -274,6 +289,9 @@ func (r *Refresher) Start( case <-timer.C: mutationCounts := r.mutationCounts + autoStatsEnabledForTable := r.autoStatsEnabled + autoStatsFractionStaleRows := r.autoStatsFractionStaleRows + autoStatsMinStaleRows := r.autoStatsMinStaleRows if err := stopper.RunAsyncTask( ctx, "stats.Refresher: maybeRefreshStats", func(ctx context.Context) { // Wait so that the latest changes will be reflected according to the @@ -288,13 +306,35 @@ func (r *Refresher) Start( } for tableID, rowsAffected := range mutationCounts { - // Check the cluster setting before each refresh in case it was - // disabled recently. - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - break + // Check the cluster setting and table setting before each refresh + // in case it was disabled recently. + var enabledForTable bool + if enabled, ok := autoStatsEnabledForTable[tableID]; ok { + if !enabled { + continue + } + enabledForTable = true + } else if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { + // Only break if there's no possibility of finding tables of the + // remaining mutations in the autoStatsEnabledForTable map. + if len(autoStatsEnabledForTable) == 0 { + break + } else { + continue + } } + var fractionStaleRowsForTable *float64 + var minStaleRowsForTable *int64 - r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime) + if fractionStaleRows, ok := autoStatsFractionStaleRows[tableID]; ok { + fractionStaleRowsForTable = &fractionStaleRows + } + if minStaleRows, ok := autoStatsMinStaleRows[tableID]; ok { + minStaleRowsForTable = &minStaleRows + } + + r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime, + enabledForTable, fractionStaleRowsForTable, minStaleRowsForTable) select { case <-stopper.ShouldQuiesce(): @@ -308,10 +348,25 @@ func (r *Refresher) Start( }); err != nil { log.Errorf(ctx, "failed to refresh stats: %v", err) } + // This clears out any tables that may have been added to the + // mutationCounts map by ensureAllTables. This is by design. We don't + // want to constantly refresh tables that are read-only. r.mutationCounts = make(map[descpb.ID]int64, len(r.mutationCounts)) + r.autoStatsEnabled = make(map[descpb.ID]bool) + r.autoStatsFractionStaleRows = make(map[descpb.ID]float64) + r.autoStatsMinStaleRows = make(map[descpb.ID]int64) case mut := <-r.mutations: r.mutationCounts[mut.tableID] += int64(mut.rowsAffected) + if mut.autoStatsEnabled != nil { + r.autoStatsEnabled[mut.tableID] = *mut.autoStatsEnabled + } + if mut.autoStatsFractionStaleRows != nil { + r.autoStatsFractionStaleRows[mut.tableID] = *mut.autoStatsFractionStaleRows + } + if mut.autoStatsMinStaleRows != nil { + r.autoStatsMinStaleRows[mut.tableID] = *mut.autoStatsMinStaleRows + } case <-stopper.ShouldQuiesce(): return @@ -326,12 +381,67 @@ func (r *Refresher) Start( func (r *Refresher) ensureAllTables( ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, ) { + if !AutomaticStatisticsClusterMode.Get(settings) { - // Automatic stats are disabled. + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled=true is + // checked and only those tables are included in this scan. + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + ` +SELECT + tbl.table_id +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id + AS OF SYSTEM TIME '-%s' +WHERE + tbl.database_name IS NOT NULL + AND tbl.database_name <> '%s' + AND tbl.drop_time IS NULL + AND ( + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' + ) IS NULL + AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + );`, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + ) + + it, err := r.ex.QueryIterator( + ctx, + "get-tables-with-autostats-explicitly-enabled", + nil, /* txn */ + getTablesWithAutoStatsExplicitlyEnabledQuery, + ) + if err == nil { + var ok bool + for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { + row := it.Cur() + tableID := descpb.ID(*row[0].(*tree.DInt)) + // Don't create statistics for virtual tables. + // The query already excludes views and system tables. + if !descpb.IsVirtualTable(tableID) { + r.mutationCounts[tableID] += 0 + r.autoStatsEnabled[tableID] = true + } + } + } + if err != nil { + // Note that it is ok if the iterator returned partial results before + // encountering an error - in that case we added entries to + // r.mutationCounts for some of the tables and operation of adding an + // entry is idempotent (i.e. we didn't mess up anything for the next + // call to this method). + log.Errorf(ctx, "failed to get tables for automatic stats: %v", err) + } return } // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled of null, + // meaning not set, or true qualifies rows we're interested in. getAllTablesQuery := fmt.Sprintf( ` SELECT @@ -346,7 +456,13 @@ WHERE AND tbl.drop_time IS NULL AND ( crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' - ) IS NULL;`, + ) IS NULL + AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable'->'sqlStatsAutomaticCollectionEnabled' IS NULL + OR crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + );`, initialTableCollectionDelay, systemschema.SystemDatabaseName, ) @@ -385,7 +501,16 @@ WHERE // successful insert, update, upsert or delete. rowsAffected refers to the // number of rows written as part of the mutation operation. func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected int) { - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { + // The table-level setting of sql.stats.automatic_collection.enabled takes + // precedence over the cluster setting. + var enabledAtTableLevel *bool + + if autoStatsCollectionEnabled, ok := table.TableDesc().AutoStatsCollectionEnabled(); ok { + if !autoStatsCollectionEnabled { + return + } + enabledAtTableLevel = &autoStatsCollectionEnabled + } else if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { // Automatic stats are disabled. return } @@ -393,11 +518,26 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // Don't collect stats for this kind of table: system, virtual, view, etc. return } + var fractionStaleRowsForTable *float64 + var minStaleRowsForTable *int64 + + if minStaleRows, ok := table.TableDesc().AutoStatsMinStaleRows(); ok { + minStaleRowsForTable = &minStaleRows + } + if fractionStaleRows, ok := table.TableDesc().AutoStatsFractionStaleRows(); ok { + fractionStaleRowsForTable = &fractionStaleRows + } // Send mutation info to the refresher thread to avoid adding latency to // the calling transaction. select { - case r.mutations <- mutation{tableID: table.GetID(), rowsAffected: rowsAffected}: + case r.mutations <- mutation{ + tableID: table.GetID(), + rowsAffected: rowsAffected, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + }: default: // Don't block if there is no room in the buffered channel. if bufferedChanFullLogLimiter.ShouldLog() { @@ -410,12 +550,24 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // maybeRefreshStats implements the core logic described in the comment for // Refresher. It is called by the background Refresher thread. +// +// enabledForTable, if true, indicates that auto stats is explicitly enabled at +// the table level and any mutation struct built by maybeRefreshStats should +// include that information. +// +// fractionStaleRowsForTable and minStaleRowsForTable, if non-nil, indicate +// table-level settings for sql.stats.automatic_collection.fraction_stale_rows +// and sql.stats.automatic_collection.min_stale_rows to be used in place of the +// cluster settings of the same name. func (r *Refresher) maybeRefreshStats( ctx context.Context, stopper *stop.Stopper, tableID descpb.ID, rowsAffected int64, asOf time.Duration, + enabledForTable bool, + fractionStaleRowsForTable *float64, + minStaleRowsForTable *int64, ) { tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID) if err != nil { @@ -453,15 +605,31 @@ func (r *Refresher) maybeRefreshStats( mustRefresh = true } - targetRows := int64(rowCount*AutomaticStatisticsFractionStaleRows.Get(&r.st.SV)) + - AutomaticStatisticsMinStaleRows.Get(&r.st.SV) - if !mustRefresh && rowsAffected < math.MaxInt32 && r.randGen.randInt(targetRows) >= rowsAffected { + statsFractionStaleRows := AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) + if fractionStaleRowsForTable != nil { + statsFractionStaleRows = *fractionStaleRowsForTable + } + statsMinStaleRows := AutomaticStatisticsMinStaleRows.Get(&r.st.SV) + if minStaleRowsForTable != nil { + statsMinStaleRows = *minStaleRowsForTable + } + targetRows := int64(rowCount*statsFractionStaleRows) + statsMinStaleRows + // randInt will panic if we pass it a value of 0. + randomTargetRows := int64(0) + if targetRows > 0 { + randomTargetRows = r.randGen.randInt(targetRows) + } + if !mustRefresh && rowsAffected < math.MaxInt32 && randomTargetRows >= rowsAffected { // No refresh is happening this time. return } if err := r.refreshStats(ctx, tableID, asOf); err != nil { if errors.Is(err, ConcurrentCreateStatsError) { + var enabledAtTableLevel *bool + if enabledForTable { + enabledAtTableLevel = &enabledForTable + } // Another stats job was already running. Attempt to reschedule this // refresh. if mustRefresh { @@ -471,14 +639,22 @@ func (r *Refresher) maybeRefreshStats( // cycle so that we have another chance to trigger a refresh. We pass // rowsAffected=0 so that we don't force a refresh if another node has // already done it. - r.mutations <- mutation{tableID: tableID, rowsAffected: 0} + r.mutations <- mutation{tableID: tableID, rowsAffected: 0, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + } } else { // If this refresh was caused by a "dice roll", we want to make sure // that the refresh is rescheduled so that we adhere to the // AutomaticStatisticsFractionStaleRows statistical ideal. We // ensure that the refresh is triggered during the next cycle by // passing a very large number for rowsAffected. - r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32} + r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + } } return } diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 592c6d7c5cc5..fc8b04a10fd9 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -84,6 +84,8 @@ func TestMaybeRefreshStats(t *testing.T) { // even though rowsAffected=0. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -93,6 +95,31 @@ func TestMaybeRefreshStats(t *testing.T) { // is 0, so refreshing will not succeed. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting minStaleRows for the table prevents refreshing from occurring. + minStaleRowsForTable := int64(100000000) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, &minStaleRowsForTable, /* minStaleRowsForTable */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting fractionStaleRows for the table can also prevent refreshing from + // occurring, though this is a not a typical value for this setting. + fractionStaleRowsForTable := float64(100000000) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + &fractionStaleRowsForTable /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -102,6 +129,8 @@ func TestMaybeRefreshStats(t *testing.T) { // updated than exist in the table, the probability of a refresh is 100%. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 2 /* expected */); err != nil { t.Fatal(err) @@ -113,6 +142,8 @@ func TestMaybeRefreshStats(t *testing.T) { descVW := desctestutils.TestingGetPublicTableDescriptor(s.DB(), keys.SystemSQLCodec, "t", "vw") refresher.maybeRefreshStats( ctx, s.Stopper(), descVW.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) select { case <-refresher.mutations: @@ -312,6 +343,8 @@ func TestAverageRefreshTime(t *testing.T) { // is 0. refresher.maybeRefreshStats( ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, table, 20 /* expected */); err != nil { t.Fatal(err) @@ -362,6 +395,8 @@ func TestAverageRefreshTime(t *testing.T) { // were deleted. refresher.maybeRefreshStats( ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, table, 15 /* expected */); err != nil { t.Fatal(err) @@ -455,6 +490,8 @@ func TestNoRetryOnFailure(t *testing.T) { // Try to refresh stats on a table that doesn't exist. r.maybeRefreshStats( ctx, s.Stopper(), 100 /* tableID */, math.MaxInt32, time.Microsecond, /* asOfTime */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) // Ensure that we will not try to refresh tableID 100 again.