diff --git a/pkg/settings/cluster/cluster_settings.go b/pkg/settings/cluster/cluster_settings.go index b8f863e1fd3a..e4c976d26a49 100644 --- a/pkg/settings/cluster/cluster_settings.go +++ b/pkg/settings/cluster/cluster_settings.go @@ -78,6 +78,20 @@ func TelemetryOptOut() bool { // (for example, a CLI subcommand that does not connect to a cluster). var NoSettings *Settings // = nil +const ( + // AutoStatsClusterSettingName is the name of the automatic stats collection + // enabled cluster setting. + AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" + + // AutoStatsMinStaleSettingName is the name of the automatic stats collection + // min stale rows cluster setting. + AutoStatsMinStaleSettingName = "sql.stats.automatic_collection.min_stale_rows" + + // AutoStatsFractionStaleSettingName is the name of the automatic stats + // collection fraction stale rows cluster setting. + AutoStatsFractionStaleSettingName = "sql.stats.automatic_collection.fraction_stale_rows" +) + // CPUProfileType tracks whether a CPU profile is in progress. type CPUProfileType int32 diff --git a/pkg/sql/catalog/catpb/catalog.proto b/pkg/sql/catalog/catpb/catalog.proto index c38b9ecfb88f..a52ad2699ad1 100644 --- a/pkg/sql/catalog/catpb/catalog.proto +++ b/pkg/sql/catalog/catpb/catalog.proto @@ -217,3 +217,15 @@ message RowLevelTTL { // the relation name. optional bool label_metrics = 10 [(gogoproto.nullable) = false]; } + +// ClusterSettingsForTable represents cluster settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +message ClusterSettingsForTable { + option (gogoproto.equal) = true; + optional bool sql_stats_automatic_collection_enabled = 1; + optional int64 sql_stats_automatic_collection_min_stale_rows = 2; + optional double sql_stats_automatic_collection_fraction_stale_rows = 3; +} + + diff --git a/pkg/sql/catalog/descpb/structured.go b/pkg/sql/catalog/descpb/structured.go index 7d8cfac634ca..5d81f2a99463 100644 --- a/pkg/sql/catalog/descpb/structured.go +++ b/pkg/sql/catalog/descpb/structured.go @@ -273,6 +273,55 @@ func (desc *TableDescriptor) Persistence() tree.Persistence { return tree.PersistencePermanent } +// AutoStatsCollectionEnabled indicates if automatic statistics collection is +// explicitly enabled or disabled for this table. If ok is true, then +// enabled==false means auto stats collection is off for this table, and if +// true, auto stats are on for this table. If ok is false, there is no setting +// for this table. +func (desc *TableDescriptor) AutoStatsCollectionEnabled() (enabled bool, ok bool) { + if desc.NoClusterSettingsForTable() { + return false, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled == nil { + return false, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, true +} + +// AutoStatsMinStaleRows indicates the setting of +// sql.stats.automatic_collection.min_stale_rows for this table. +// If ok is true, then the minStaleRows value is valid, otherwise this has not +// been set at the table level. +func (desc *TableDescriptor) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + if desc.NoClusterSettingsForTable() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, true +} + +// AutoStatsFractionStaleRows indicates the setting of +// sql.stats.automatic_collection.fraction_stale_rows for this table. +// If ok is true, then the fractionStaleRows value is valid, otherwise this has +// not been set at the table level. +func (desc *TableDescriptor) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + if desc.NoClusterSettingsForTable() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, true +} + +// NoClusterSettingsForTable is true if no cluster settings are set at the +// table level for the given table. +func (desc *TableDescriptor) NoClusterSettingsForTable() bool { + return desc.ClusterSettingsForTable == nil +} + // IsVirtualTable returns true if the TableDescriptor describes a // virtual Table (like the information_schema tables) and thus doesn't // need to be physically stored. diff --git a/pkg/sql/catalog/descpb/structured.proto b/pkg/sql/catalog/descpb/structured.proto index b64ba880e9a6..649220f169cd 100644 --- a/pkg/sql/catalog/descpb/structured.proto +++ b/pkg/sql/catalog/descpb/structured.proto @@ -1198,7 +1198,10 @@ message TableDescriptor { optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false, (gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"]; - // Next ID: 51 + // ClusterSettingsForTable are cluster settings specified at the table level. + optional cockroach.sql.catalog.catpb.ClusterSettingsForTable cluster_settings_for_table = 51 [(gogoproto.customname)="ClusterSettingsForTable"]; + + // Next ID: 52 } // SurvivalGoal is the survival goal for a database. diff --git a/pkg/sql/catalog/descriptor.go b/pkg/sql/catalog/descriptor.go index e56e2b07d974..5d2965c9731a 100644 --- a/pkg/sql/catalog/descriptor.go +++ b/pkg/sql/catalog/descriptor.go @@ -678,6 +678,25 @@ type TableDescriptor interface { GetExcludeDataFromBackup() bool // GetStorageParams returns a list of storage parameters for the table. GetStorageParams(spaceBetweenEqual bool) []string + // NoClusterSettingsForTable is true if no cluster settings are set at the + // table level for the given table. + NoClusterSettingsForTable() bool + // AutoStatsCollectionEnabled indicates if automatic statistics collection is + // explicitly enabled or disabled for this table. If ok is true, then + // enabled==false means auto stats collection is off for this table, and if + // true, auto stats are on for this table. If ok is false, there is no setting + // for this table. + AutoStatsCollectionEnabled() (enabled bool, ok bool) + // AutoStatsMinStaleRows indicates the setting of + // sql.stats.automatic_collection.min_stale_rows for this table. + // If ok is true, then the minStaleRows value is valid, otherwise this has not + // been set at the table level. + AutoStatsMinStaleRows() (minStaleRows int64, ok bool) + // AutoStatsFractionStaleRows indicates the setting of + // sql.stats.automatic_collection.fraction_stale_rows for this table. + // If ok is true, then the fractionStaleRows value is valid, otherwise this has + // not been set at the table level. + AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) } // TypeDescriptor will eventually be called typedesc.Descriptor. diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index dc21740b7076..89289332cd46 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "sort" + "strconv" "strings" "github.com/cockroachdb/cockroach/pkg/clusterversion" @@ -2565,6 +2566,24 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { if exclude := desc.GetExcludeDataFromBackup(); exclude { appendStorageParam(`exclude_data_from_backup`, `true`) } + if settings := desc.ClusterSettingsForTable; settings != nil { + // These need to be wrapped in double-quotes because they contain '.' chars. + if settings.SqlStatsAutomaticCollectionEnabled != nil { + value := *settings.SqlStatsAutomaticCollectionEnabled + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsClusterSettingName), + fmt.Sprintf("%v", value)) + } + if settings.SqlStatsAutomaticCollectionMinStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionMinStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsMinStaleSettingName), + strconv.FormatInt(value, 10)) + } + if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionFractionStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsFractionStaleSettingName), + fmt.Sprintf("%g", value)) + } + } return storageParams } @@ -2583,6 +2602,26 @@ func (desc *wrapper) GetMultiRegionEnumDependencyIfExists() bool { return false } +// NoClusterSettingsForTable implements the TableDescriptor interface. +func (desc *wrapper) NoClusterSettingsForTable() bool { + return desc.TableDesc().NoClusterSettingsForTable() +} + +// AutoStatsCollectionEnabled implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsCollectionEnabled() (enabled bool, ok bool) { + return desc.TableDesc().AutoStatsCollectionEnabled() +} + +// AutoStatsMinStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + return desc.TableDesc().AutoStatsMinStaleRows() +} + +// AutoStatsFractionStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + return desc.TableDesc().AutoStatsFractionStaleRows() +} + // SetTableLocalityRegionalByTable sets the descriptor's locality config to // regional at the table level in the supplied region. An empty region name // (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in diff --git a/pkg/sql/catalog/tabledesc/validate.go b/pkg/sql/catalog/tabledesc/validate.go index 599d96482bcb..7aad00956e1f 100644 --- a/pkg/sql/catalog/tabledesc/validate.go +++ b/pkg/sql/catalog/tabledesc/validate.go @@ -15,6 +15,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege" @@ -520,6 +522,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) { } } + desc.validateClusterSettingsForTable(vea.Report) + if desc.IsSequence() { return } @@ -1501,3 +1505,63 @@ func (desc *wrapper) validatePartitioning() error { ) }) } + +// validateClusterSettingsForTable validates that any new cluster settings at +// the table level hold a valid value. +func (desc *wrapper) validateClusterSettingsForTable(errReportFn func(err error)) { + if desc.ClusterSettingsForTable == nil { + return + } + desc.validateBoolSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, + cluster.AutoStatsClusterSettingName) + desc.validateIntSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, + cluster.AutoStatsMinStaleSettingName, settings.NonNegativeInt) + desc.validateFloatSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, + cluster.AutoStatsFractionStaleSettingName, settings.NonNegativeFloat) +} + +func (desc *wrapper) verifyProperTableForStatsSetting( + errReportFn func(err error), settingName string, +) { + if desc.IsVirtualTable() { + errReportFn(errors.Newf("Setting %s may not be set on virtual table", settingName)) + } + if !desc.IsTable() { + errReportFn(errors.Newf("Setting %s may not be set on a view or sequence", settingName)) + } +} + +func (desc *wrapper) validateBoolSetting( + errReportFn func(err error), value *bool, settingName string, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + } +} + +func (desc *wrapper) validateIntSetting( + errReportFn func(err error), value *int64, settingName string, validateFunc func(v int64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := + validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} + +func (desc *wrapper) validateFloatSetting( + errReportFn func(err error), + value *float64, + settingName string, + validateFunc func(v float64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := + validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} diff --git a/pkg/sql/catalog/tabledesc/validate_test.go b/pkg/sql/catalog/tabledesc/validate_test.go index ab8c710bb31d..5c09da0a8404 100644 --- a/pkg/sql/catalog/tabledesc/validate_test.go +++ b/pkg/sql/catalog/tabledesc/validate_test.go @@ -131,6 +131,7 @@ var validationMap = []struct { "ExcludeDataFromBackup": {status: thisFieldReferencesNoObjects}, "NextConstraintID": {status: iSolemnlySwearThisFieldIsValidated}, "DeclarativeSchemaChangerState": {status: iSolemnlySwearThisFieldIsValidated}, + "ClusterSettingsForTable": {status: iSolemnlySwearThisFieldIsValidated}, }, }, { @@ -289,6 +290,14 @@ var validationMap = []struct { "DeclarativeSchemaChangerState": {status: thisFieldReferencesNoObjects}, }, }, + { + obj: catpb.ClusterSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{ + "SqlStatsAutomaticCollectionEnabled": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionMinStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionFractionStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + }, + }, } type validationStatusInfo struct { @@ -332,6 +341,9 @@ func TestValidateTableDesc(t *testing.T) { computedExpr := "1 + 1" generatedAsIdentitySequenceOptionExpr := " START 2 INCREMENT 3 CACHE 10" + boolTrue := true + negativeOne := int64(-1) + negativeOneFloat := float64(-1) testData := []struct { err string @@ -1869,6 +1881,79 @@ func TestValidateTableDesc(t *testing.T) { }, }, }, + {`Setting sql.stats.automatic_collection.enabled may not be set on virtual table`, + descpb.TableDescriptor{ + ID: catconstants.MinVirtualID, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + SequenceOpts: &descpb.TableDescriptor_SequenceOpts{ + Increment: 1, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }, + }, + {`invalid value for sql.stats.automatic_collection.min_stale_rows: cannot be set to a negative value: -1`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionMinStaleRows: &negativeOne}, + }}, + {`invalid value for sql.stats.automatic_collection.fraction_stale_rows: cannot set to a negative value: -1.000000`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionFractionStaleRows: &negativeOneFloat}, + }}, } for i, d := range testData { t.Run(d.err, func(t *testing.T) { @@ -1890,6 +1975,7 @@ func TestValidateTableDesc(t *testing.T) { func TestValidateCrossTableReferences(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() + boolTrue := true pointer := func(s string) *string { return &s @@ -1897,6 +1983,8 @@ func TestValidateCrossTableReferences(t *testing.T) { tests := []struct { err string + otherErr string + testName string desc descpb.TableDescriptor otherDescs []descpb.TableDescriptor }{ @@ -2116,6 +2204,7 @@ func TestValidateCrossTableReferences(t *testing.T) { }, // Views. { // 10 + err: ``, desc: descpb.TableDescriptor{ Name: "foo", ID: 51, @@ -2207,8 +2296,59 @@ func TestValidateCrossTableReferences(t *testing.T) { DependedOnBy: []descpb.TableDescriptor_Reference{{ID: 123}}, }}, }, - // Sequences. { // 13 + err: ``, + testName: `Stats setting in view`, + otherErr: `relation "bar" (52): Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + desc: descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + }, + otherDescs: []descpb.TableDescriptor{{ + Name: "bar", + ID: 52, + ParentID: 1, + FormatVersion: descpb.InterleavedFormatVersion, + UnexposedParentSchemaID: keys.PublicSchemaID, + ViewQuery: "SELECT * FROM foo", + DependsOn: []descpb.ID{51}, + NextColumnID: 2, + Columns: []descpb.ColumnDescriptor{ + {Name: "a", ID: 1, Type: types.Int}, + }, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + }, + // Sequences. + { // 14 err: `depended-on-by relation "bar" (52) does not have a column with ID 123`, desc: descpb.TableDescriptor{ Name: "foo", @@ -2259,6 +2399,20 @@ func TestValidateCrossTableReferences(t *testing.T) { } else if expectedErr != err.Error() { t.Errorf("%d: expected \"%s\", but found \"%s\"", i, expectedErr, err.Error()) } + // Validate other descriptors for specific test cases. + if test.testName == "Stats setting in view" { + expectedErr = test.otherErr + nonTableDesc := NewBuilder(&test.otherDescs[0]).BuildImmutable() + err := validate.Self(clusterversion.TestingClusterVersion, nonTableDesc) + if expectedErr == "" && err != nil { + t.Errorf("%d: expected success, but found error: \"%+v\"", i, err) + } else if expectedErr != "" && err == nil { + t.Errorf("%d: expected \"%s\", but found success: %+v", i, expectedErr, + test.otherDescs[0]) + } else if expectedErr != "" && expectedErr != err.Error() { + t.Errorf("%d: expected \"%s\", but found \"%+v\"", i, expectedErr, err) + } + } }) } } diff --git a/pkg/sql/distsql_plan_stats.go b/pkg/sql/distsql_plan_stats.go index fb097b34c726..6fcc49d9daba 100644 --- a/pkg/sql/distsql_plan_stats.go +++ b/pkg/sql/distsql_plan_stats.go @@ -212,6 +212,9 @@ func (dsp *DistSQLPlanner) createStatsPlan( var rowsExpected uint64 if len(tableStats) > 0 { overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV) + if autoStatsFractionStaleRowsForTable, ok := desc.TableDesc().AutoStatsFractionStaleRows(); ok { + overhead = autoStatsFractionStaleRowsForTable + } // Convert to a signed integer first to make the linter happy. rowsExpected = uint64(int64( // The total expected number of rows is the same number that was measured diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ab682db27ef5..b1c08a6fa75a 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -2364,3 +2364,114 @@ COMMIT; statement ok ROLLBACK; + +subtest table_settings + +statement ok +CREATE TABLE t5 (a int) + +# Turn on automatic stats collection +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +# Strings in settings should be converted to the proper data type. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 'false') + +# Verify automatic collection is disabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'false' +---- +{"sqlStatsAutomaticCollectionEnabled": false} + +# SHOW CREATE TABLE displays the value properly. +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = false) + +statement error pq: parameter "sql.stats.automatic_collection.enabled" requires a Boolean value +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 123) + +statement ok +ALTER TABLE t5 RESET ("sql.stats.automatic_collection.enabled") + +# Verify the automatic collection setting is removed. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{} + +statement error pq: invalid value for sql.stats.automatic_collection.fraction_stale_rows: could not parse "hello" as type float: strconv.ParseFloat: parsing "hello": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 'hello') + +statement error pq: invalid value for sql.stats.automatic_collection.min_stale_rows: could not parse "world" as type int: strconv.ParseInt: parsing "world": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.min_stale_rows" = 'world') + +# Verify strings can be converted to proper setting values. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = '0.15', + "sql.stats.automatic_collection.min_stale_rows" = '1234') + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.15, "sqlStatsAutomaticCollectionMinStaleRows": "1234"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 1234, "sql.stats.automatic_collection.fraction_stale_rows" = 0.15) diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index 039042219dd2..489cbe24c224 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -872,3 +872,67 @@ CREATE TABLE public.t_good_hash_indexes_2 ( crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL, CONSTRAINT t_good_hash_indexes_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5) ) + +subtest table_settings + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +statement ok +DROP TABLE t1 + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.fraction_stale_rows" = 0.5, + "sql.stats.automatic_collection.min_stale_rows" = 4000) + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.5, "sqlStatsAutomaticCollectionMinStaleRows": "4000"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t1] +---- +CREATE TABLE public.t1 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t1_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 4000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.5) + +statement ok +CREATE TABLE t11 (a int) WITH ("sql.stats.automatic_collection.enabled" = true, + "sql.stats.automatic_collection.fraction_stale_rows" = 1.797693134862315708145274237317043567981e+308, + "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807) + +# Using max values for auto stats table settings +query T +SELECT create_statement FROM [SHOW CREATE TABLE t11] +---- +CREATE TABLE public.t11 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t11_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807, "sql.stats.automatic_collection.fraction_stale_rows" = 1.7976931348623157e+308) diff --git a/pkg/sql/paramparse/BUILD.bazel b/pkg/sql/paramparse/BUILD.bazel index 7faf6d4c7020..23a703e3020d 100644 --- a/pkg/sql/paramparse/BUILD.bazel +++ b/pkg/sql/paramparse/BUILD.bazel @@ -12,6 +12,8 @@ go_library( deps = [ "//pkg/geo/geoindex", "//pkg/server/telemetry", + "//pkg/settings", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/tabledesc", diff --git a/pkg/sql/paramparse/paramobserver.go b/pkg/sql/paramparse/paramobserver.go index 584d33be5955..92256da58c5d 100644 --- a/pkg/sql/paramparse/paramobserver.go +++ b/pkg/sql/paramparse/paramobserver.go @@ -16,6 +16,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/server/telemetry" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" @@ -139,6 +141,34 @@ func boolFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (boo return bool(*s), nil } +func intFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (int64, error) { + intDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if intDatum, err = tree.ParseDInt(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsInt(evalCtx, key, intDatum) + if err != nil { + return 0, err + } + return s, nil +} + +func floatFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (float64, error) { + floatDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if floatDatum, err = tree.ParseDFloat(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsFloat(evalCtx, key, floatDatum) + if err != nil { + return 0, err + } + return s, nil +} + type tableParam struct { onSet func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error onReset func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error @@ -465,6 +495,18 @@ var tableParams = map[string]tableParam{ return nil }, }, + cluster.AutoStatsClusterSettingName: { + onSet: boolTableSettingFunc, + onReset: tableSettingResetFunc(boolSetting), + }, + cluster.AutoStatsMinStaleSettingName: { + onSet: intTableSettingFunc(settings.NonNegativeInt), + onReset: tableSettingResetFunc(intSetting), + }, + cluster.AutoStatsFractionStaleSettingName: { + onSet: floatTableSettingFunc(settings.NonNegativeFloat), + onReset: tableSettingResetFunc(floatSetting), + }, } func init() { @@ -508,6 +550,184 @@ func init() { } } +type settingDataType int + +const ( + boolSetting = iota + intSetting + floatSetting +) + +func boolTableSettingFunc( + ctx context.Context, + po *TableStorageParamObserver, + semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, + key string, + datum tree.Datum, +) error { + boolVal, err := boolFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **bool + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**bool); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &boolVal + return nil + } else if *setting == boolVal { + return nil + } + *setting = boolVal + return nil +} + +func intTableSettingFunc( + validateFunc func(v int64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + intVal, err := intFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **int64 + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**int64); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + if err = validateFunc(intVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &intVal + return nil + } else if *setting == intVal { + return nil + } + *setting = intVal + return nil + } +} + +func floatTableSettingFunc( + validateFunc func(v float64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + floatVal, err := floatFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + var settingPtr **float64 + var ok bool + if settingPtr, ok = settingValuePointer(key, po.tableDesc.ClusterSettingsForTable).(**float64); !ok { + return errors.Newf("table setting %s has unexpected type", key) + } + if settingPtr == nil { + return errors.Newf("unable to set table setting %s", key) + } + if err = validateFunc(floatVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", key) + } + setting := *settingPtr + if setting == nil { + *settingPtr = &floatVal + return nil + } else if *setting == floatVal { + return nil + } + *setting = floatVal + return nil + } +} + +func tableSettingResetFunc( + settingDataType settingDataType, +) func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error { + return func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error { + if po.tableDesc.ClusterSettingsForTable == nil { + return nil + } + settingPtr := settingValuePointer(key, po.tableDesc.ClusterSettingsForTable) + if settingPtr == nil { + return errors.Newf("unable to reset table setting %s", key) + } + var ok bool + switch settingDataType { + case boolSetting: + var setting **bool + if setting, ok = settingPtr.(**bool); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + case intSetting: + var setting **int64 + if setting, ok = settingPtr.(**int64); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + case floatSetting: + var setting **float64 + if setting, ok = settingPtr.(**float64); !ok { + return errors.Newf("unable to reset table setting %s", key) + } + if *setting == nil { + // This setting is unset or has already been reset. + return nil + } + *setting = nil + default: + return errors.Newf("unable to reset table setting %s", key) + } + return nil + } +} + +func settingValuePointer( + settingName string, clusterSettingsForTable *catpb.ClusterSettingsForTable, +) interface{} { + switch settingName { + case cluster.AutoStatsClusterSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled + case cluster.AutoStatsMinStaleSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows + case cluster.AutoStatsFractionStaleSettingName: + return &clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows + } + return nil +} + // onSet implements the StorageParamObserver interface. func (po *TableStorageParamObserver) onSet( ctx context.Context, diff --git a/pkg/sql/set_cluster_setting.go b/pkg/sql/set_cluster_setting.go index 7e2df6c23ef5..095ea5a09cb8 100644 --- a/pkg/sql/set_cluster_setting.go +++ b/pkg/sql/set_cluster_setting.go @@ -36,7 +36,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/sessioninit" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" - "github.com/cockroachdb/cockroach/pkg/sql/stats" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -226,7 +225,7 @@ func (n *setClusterSettingNode) startExec(params runParams) error { // Report tracked cluster settings via telemetry. // TODO(justin): implement a more general mechanism for tracking these. switch n.name { - case stats.AutoStatsClusterSettingName: + case cluster.AutoStatsClusterSettingName: switch expectedEncodedValue { case "true": telemetry.Inc(sqltelemetry.TurnAutoStatsOnUseCounter) diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 36c9093d2210..d374f05a6c2f 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -34,15 +34,11 @@ import ( "github.com/cockroachdb/errors" ) -// AutoStatsClusterSettingName is the name of the automatic stats collection -// cluster setting. -const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" - // AutomaticStatisticsClusterMode controls the cluster setting for enabling // automatic table statistics collection. var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( settings.TenantWritable, - AutoStatsClusterSettingName, + cluster.AutoStatsClusterSettingName, "automatic statistics collection mode", true, ).WithPublic() @@ -81,7 +77,7 @@ var AutomaticStatisticsMaxIdleTime = settings.RegisterFloatSetting( var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { s := settings.RegisterFloatSetting( settings.TenantWritable, - "sql.stats.automatic_collection.fraction_stale_rows", + cluster.AutoStatsFractionStaleSettingName, "target fraction of stale rows per table that will trigger a statistics refresh", 0.2, settings.NonNegativeFloat, @@ -96,7 +92,7 @@ var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { s := settings.RegisterIntSetting( settings.TenantWritable, - "sql.stats.automatic_collection.min_stale_rows", + cluster.AutoStatsMinStaleSettingName, "target minimum number of stale rows per table that will trigger a statistics refresh", 500, settings.NonNegativeInt, @@ -212,6 +208,18 @@ type Refresher struct { // mutationCounts contains aggregated mutation counts for each table that // have yet to be processed by the refresher. mutationCounts map[descpb.ID]int64 + + // autoStatsEnabled is true if auto stats collection is explicitly enabled for + // the given table, which may override the cluster setting. + autoStatsEnabled map[descpb.ID]bool + + // autoStatsFractionStaleRows maps TableID to the table-level setting of + // sql.stats.automatic_collection.fraction_stale_rows, if it's been set. + autoStatsFractionStaleRows map[descpb.ID]float64 + + // autoStatsMinStaleRows maps TableID to the table-level setting of + // sql.stats.automatic_collection.min_stale_rows, if it's been set. + autoStatsMinStaleRows map[descpb.ID]int64 } // mutation contains metadata about a SQL mutation and is the message passed to @@ -219,6 +227,10 @@ type Refresher struct { type mutation struct { tableID descpb.ID rowsAffected int + // Table-level settings which may override cluster settings + autoStatsEnabled *bool + autoStatsFractionStaleRows *float64 + autoStatsMinStaleRows *int64 } // MakeRefresher creates a new Refresher. @@ -232,15 +244,18 @@ func MakeRefresher( randSource := rand.NewSource(rand.Int63()) return &Refresher{ - AmbientContext: ambientCtx, - st: st, - ex: ex, - cache: cache, - randGen: makeAutoStatsRand(randSource), - mutations: make(chan mutation, refreshChanBufferLen), - asOfTime: asOfTime, - extraTime: time.Duration(rand.Int63n(int64(time.Hour))), - mutationCounts: make(map[descpb.ID]int64, 16), + AmbientContext: ambientCtx, + st: st, + ex: ex, + cache: cache, + randGen: makeAutoStatsRand(randSource), + mutations: make(chan mutation, refreshChanBufferLen), + asOfTime: asOfTime, + extraTime: time.Duration(rand.Int63n(int64(time.Hour))), + mutationCounts: make(map[descpb.ID]int64, 16), + autoStatsEnabled: make(map[descpb.ID]bool), + autoStatsFractionStaleRows: make(map[descpb.ID]float64), + autoStatsMinStaleRows: make(map[descpb.ID]int64), } } @@ -274,6 +289,9 @@ func (r *Refresher) Start( case <-timer.C: mutationCounts := r.mutationCounts + autoStatsEnabledForTable := r.autoStatsEnabled + autoStatsFractionStaleRows := r.autoStatsFractionStaleRows + autoStatsMinStaleRows := r.autoStatsMinStaleRows if err := stopper.RunAsyncTask( ctx, "stats.Refresher: maybeRefreshStats", func(ctx context.Context) { // Wait so that the latest changes will be reflected according to the @@ -288,13 +306,35 @@ func (r *Refresher) Start( } for tableID, rowsAffected := range mutationCounts { - // Check the cluster setting before each refresh in case it was - // disabled recently. - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - break + // Check the cluster setting and table setting before each refresh + // in case it was disabled recently. + var enabledForTable bool + if enabled, ok := autoStatsEnabledForTable[tableID]; ok { + if !enabled { + continue + } + enabledForTable = true + } else if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { + // Only break if there's no possibility of finding tables of the + // remaining mutations in the autoStatsEnabledForTable map. + if len(autoStatsEnabledForTable) == 0 { + break + } else { + continue + } } + var fractionStaleRowsForTable *float64 + var minStaleRowsForTable *int64 - r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime) + if fractionStaleRows, ok := autoStatsFractionStaleRows[tableID]; ok { + fractionStaleRowsForTable = &fractionStaleRows + } + if minStaleRows, ok := autoStatsMinStaleRows[tableID]; ok { + minStaleRowsForTable = &minStaleRows + } + + r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime, + enabledForTable, fractionStaleRowsForTable, minStaleRowsForTable) select { case <-stopper.ShouldQuiesce(): @@ -308,10 +348,25 @@ func (r *Refresher) Start( }); err != nil { log.Errorf(ctx, "failed to refresh stats: %v", err) } + // This clears out any tables that may have been added to the + // mutationCounts map by ensureAllTables. This is by design. We don't + // want to constantly refresh tables that are read-only. r.mutationCounts = make(map[descpb.ID]int64, len(r.mutationCounts)) + r.autoStatsEnabled = make(map[descpb.ID]bool) + r.autoStatsFractionStaleRows = make(map[descpb.ID]float64) + r.autoStatsMinStaleRows = make(map[descpb.ID]int64) case mut := <-r.mutations: r.mutationCounts[mut.tableID] += int64(mut.rowsAffected) + if mut.autoStatsEnabled != nil { + r.autoStatsEnabled[mut.tableID] = *mut.autoStatsEnabled + } + if mut.autoStatsFractionStaleRows != nil { + r.autoStatsFractionStaleRows[mut.tableID] = *mut.autoStatsFractionStaleRows + } + if mut.autoStatsMinStaleRows != nil { + r.autoStatsMinStaleRows[mut.tableID] = *mut.autoStatsMinStaleRows + } case <-stopper.ShouldQuiesce(): return @@ -326,12 +381,67 @@ func (r *Refresher) Start( func (r *Refresher) ensureAllTables( ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, ) { + if !AutomaticStatisticsClusterMode.Get(settings) { - // Automatic stats are disabled. + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled=true is + // checked and only those tables are included in this scan. + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + ` +SELECT + tbl.table_id +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id + AS OF SYSTEM TIME '-%s' +WHERE + tbl.database_name IS NOT NULL + AND tbl.database_name <> '%s' + AND tbl.drop_time IS NULL + AND ( + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' + ) IS NULL + AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + );`, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + ) + + it, err := r.ex.QueryIterator( + ctx, + "get-tables-with-autostats-explicitly-enabled", + nil, /* txn */ + getTablesWithAutoStatsExplicitlyEnabledQuery, + ) + if err == nil { + var ok bool + for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { + row := it.Cur() + tableID := descpb.ID(*row[0].(*tree.DInt)) + // Don't create statistics for virtual tables. + // The query already excludes views and system tables. + if !descpb.IsVirtualTable(tableID) { + r.mutationCounts[tableID] += 0 + r.autoStatsEnabled[tableID] = true + } + } + } + if err != nil { + // Note that it is ok if the iterator returned partial results before + // encountering an error - in that case we added entries to + // r.mutationCounts for some of the tables and operation of adding an + // entry is idempotent (i.e. we didn't mess up anything for the next + // call to this method). + log.Errorf(ctx, "failed to get tables for automatic stats: %v", err) + } return } // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled of null, + // meaning not set, or true qualifies rows we're interested in. getAllTablesQuery := fmt.Sprintf( ` SELECT @@ -346,7 +456,13 @@ WHERE AND tbl.drop_time IS NULL AND ( crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' - ) IS NULL;`, + ) IS NULL + AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable'->'sqlStatsAutomaticCollectionEnabled' IS NULL + OR crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + );`, initialTableCollectionDelay, systemschema.SystemDatabaseName, ) @@ -385,7 +501,16 @@ WHERE // successful insert, update, upsert or delete. rowsAffected refers to the // number of rows written as part of the mutation operation. func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected int) { - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { + // The table-level setting of sql.stats.automatic_collection.enabled takes + // precedence over the cluster setting. + var enabledAtTableLevel *bool + + if autoStatsCollectionEnabled, ok := table.TableDesc().AutoStatsCollectionEnabled(); ok { + if !autoStatsCollectionEnabled { + return + } + enabledAtTableLevel = &autoStatsCollectionEnabled + } else if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { // Automatic stats are disabled. return } @@ -393,11 +518,26 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // Don't collect stats for this kind of table: system, virtual, view, etc. return } + var fractionStaleRowsForTable *float64 + var minStaleRowsForTable *int64 + + if minStaleRows, ok := table.TableDesc().AutoStatsMinStaleRows(); ok { + minStaleRowsForTable = &minStaleRows + } + if fractionStaleRows, ok := table.TableDesc().AutoStatsFractionStaleRows(); ok { + fractionStaleRowsForTable = &fractionStaleRows + } // Send mutation info to the refresher thread to avoid adding latency to // the calling transaction. select { - case r.mutations <- mutation{tableID: table.GetID(), rowsAffected: rowsAffected}: + case r.mutations <- mutation{ + tableID: table.GetID(), + rowsAffected: rowsAffected, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + }: default: // Don't block if there is no room in the buffered channel. if bufferedChanFullLogLimiter.ShouldLog() { @@ -410,12 +550,24 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // maybeRefreshStats implements the core logic described in the comment for // Refresher. It is called by the background Refresher thread. +// +// enabledForTable, if true, indicates that auto stats is explicitly enabled at +// the table level and any mutation struct built by maybeRefreshStats should +// include that information. +// +// fractionStaleRowsForTable and minStaleRowsForTable, if non-nil, indicate +// table-level settings for sql.stats.automatic_collection.fraction_stale_rows +// and sql.stats.automatic_collection.min_stale_rows to be used in place of the +// cluster settings of the same name. func (r *Refresher) maybeRefreshStats( ctx context.Context, stopper *stop.Stopper, tableID descpb.ID, rowsAffected int64, asOf time.Duration, + enabledForTable bool, + fractionStaleRowsForTable *float64, + minStaleRowsForTable *int64, ) { tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID) if err != nil { @@ -453,15 +605,31 @@ func (r *Refresher) maybeRefreshStats( mustRefresh = true } - targetRows := int64(rowCount*AutomaticStatisticsFractionStaleRows.Get(&r.st.SV)) + - AutomaticStatisticsMinStaleRows.Get(&r.st.SV) - if !mustRefresh && rowsAffected < math.MaxInt32 && r.randGen.randInt(targetRows) >= rowsAffected { + statsFractionStaleRows := AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) + if fractionStaleRowsForTable != nil { + statsFractionStaleRows = *fractionStaleRowsForTable + } + statsMinStaleRows := AutomaticStatisticsMinStaleRows.Get(&r.st.SV) + if minStaleRowsForTable != nil { + statsMinStaleRows = *minStaleRowsForTable + } + targetRows := int64(rowCount*statsFractionStaleRows) + statsMinStaleRows + // randInt will panic if we pass it a value of 0. + randomTargetRows := int64(0) + if targetRows > 0 { + randomTargetRows = r.randGen.randInt(targetRows) + } + if !mustRefresh && rowsAffected < math.MaxInt32 && randomTargetRows >= rowsAffected { // No refresh is happening this time. return } if err := r.refreshStats(ctx, tableID, asOf); err != nil { if errors.Is(err, ConcurrentCreateStatsError) { + var enabledAtTableLevel *bool + if enabledForTable { + enabledAtTableLevel = &enabledForTable + } // Another stats job was already running. Attempt to reschedule this // refresh. if mustRefresh { @@ -471,14 +639,22 @@ func (r *Refresher) maybeRefreshStats( // cycle so that we have another chance to trigger a refresh. We pass // rowsAffected=0 so that we don't force a refresh if another node has // already done it. - r.mutations <- mutation{tableID: tableID, rowsAffected: 0} + r.mutations <- mutation{tableID: tableID, rowsAffected: 0, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + } } else { // If this refresh was caused by a "dice roll", we want to make sure // that the refresh is rescheduled so that we adhere to the // AutomaticStatisticsFractionStaleRows statistical ideal. We // ensure that the refresh is triggered during the next cycle by // passing a very large number for rowsAffected. - r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32} + r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32, + autoStatsEnabled: enabledAtTableLevel, + autoStatsFractionStaleRows: fractionStaleRowsForTable, + autoStatsMinStaleRows: minStaleRowsForTable, + } } return } diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 592c6d7c5cc5..fc8b04a10fd9 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -84,6 +84,8 @@ func TestMaybeRefreshStats(t *testing.T) { // even though rowsAffected=0. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -93,6 +95,31 @@ func TestMaybeRefreshStats(t *testing.T) { // is 0, so refreshing will not succeed. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting minStaleRows for the table prevents refreshing from occurring. + minStaleRowsForTable := int64(100000000) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, &minStaleRowsForTable, /* minStaleRowsForTable */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting fractionStaleRows for the table can also prevent refreshing from + // occurring, though this is a not a typical value for this setting. + fractionStaleRowsForTable := float64(100000000) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + &fractionStaleRowsForTable /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -102,6 +129,8 @@ func TestMaybeRefreshStats(t *testing.T) { // updated than exist in the table, the probability of a refresh is 100%. refresher.maybeRefreshStats( ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, descA, 2 /* expected */); err != nil { t.Fatal(err) @@ -113,6 +142,8 @@ func TestMaybeRefreshStats(t *testing.T) { descVW := desctestutils.TestingGetPublicTableDescriptor(s.DB(), keys.SystemSQLCodec, "t", "vw") refresher.maybeRefreshStats( ctx, s.Stopper(), descVW.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) select { case <-refresher.mutations: @@ -312,6 +343,8 @@ func TestAverageRefreshTime(t *testing.T) { // is 0. refresher.maybeRefreshStats( ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, table, 20 /* expected */); err != nil { t.Fatal(err) @@ -362,6 +395,8 @@ func TestAverageRefreshTime(t *testing.T) { // were deleted. refresher.maybeRefreshStats( ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) if err := checkStatsCount(ctx, cache, table, 15 /* expected */); err != nil { t.Fatal(err) @@ -455,6 +490,8 @@ func TestNoRetryOnFailure(t *testing.T) { // Try to refresh stats on a table that doesn't exist. r.maybeRefreshStats( ctx, s.Stopper(), 100 /* tableID */, math.MaxInt32, time.Microsecond, /* asOfTime */ + false, /* enabledForTable */ + nil /* fractionStaleRowsForTable */, nil, /* minStaleRowsForTable */ ) // Ensure that we will not try to refresh tableID 100 again.