diff --git a/pkg/settings/cluster/cluster_settings.go b/pkg/settings/cluster/cluster_settings.go index b8f863e1fd3a..fb706b83806f 100644 --- a/pkg/settings/cluster/cluster_settings.go +++ b/pkg/settings/cluster/cluster_settings.go @@ -78,6 +78,30 @@ func TelemetryOptOut() bool { // (for example, a CLI subcommand that does not connect to a cluster). var NoSettings *Settings // = nil +// BoolSetting represents a boolean setting value. +type BoolSetting int + +// The values for BoolSetting. +const ( + NotSet = iota // NotSet is like a null. The value has not been set. + True + False +) + +const ( + // AutoStatsEnabledSettingName is the name of the automatic stats collection + // enabled cluster setting. + AutoStatsEnabledSettingName = "sql.stats.automatic_collection.enabled" + + // AutoStatsMinStaleSettingName is the name of the automatic stats collection + // min stale rows cluster setting. + AutoStatsMinStaleSettingName = "sql.stats.automatic_collection.min_stale_rows" + + // AutoStatsFractionStaleSettingName is the name of the automatic stats + // collection fraction stale rows cluster setting. + AutoStatsFractionStaleSettingName = "sql.stats.automatic_collection.fraction_stale_rows" +) + // CPUProfileType tracks whether a CPU profile is in progress. type CPUProfileType int32 diff --git a/pkg/sql/catalog/BUILD.bazel b/pkg/sql/catalog/BUILD.bazel index 39f48e97a976..e093d9a847b2 100644 --- a/pkg/sql/catalog/BUILD.bazel +++ b/pkg/sql/catalog/BUILD.bazel @@ -25,6 +25,7 @@ go_library( "//pkg/kv", "//pkg/roachpb", "//pkg/server/telemetry", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/pgwire/pgcode", diff --git a/pkg/sql/catalog/catpb/catalog.proto b/pkg/sql/catalog/catpb/catalog.proto index c38b9ecfb88f..37010fa624ce 100644 --- a/pkg/sql/catalog/catpb/catalog.proto +++ b/pkg/sql/catalog/catpb/catalog.proto @@ -217,3 +217,22 @@ message RowLevelTTL { // the relation name. optional bool label_metrics = 10 [(gogoproto.nullable) = false]; } + +// ClusterSettingsForTable represents cluster settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +message ClusterSettingsForTable { + option (gogoproto.equal) = true; + optional bool sql_stats_automatic_collection_enabled = 1; + optional int64 sql_stats_automatic_collection_min_stale_rows = 2; + optional double sql_stats_automatic_collection_fraction_stale_rows = 3; +} + +// SessionSettingsForTable represents session settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +// This is currently unused, but defined for backwards compatibility in +// anticipation of expected use going forward. +message SessionSettingsForTable { + option (gogoproto.equal) = true; +} diff --git a/pkg/sql/catalog/descpb/BUILD.bazel b/pkg/sql/catalog/descpb/BUILD.bazel index 3b051ca0232b..37d13fe272d5 100644 --- a/pkg/sql/catalog/descpb/BUILD.bazel +++ b/pkg/sql/catalog/descpb/BUILD.bazel @@ -21,6 +21,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/keys", + "//pkg/settings/cluster", "//pkg/sql/catalog/catconstants", "//pkg/sql/catalog/catpb", "//pkg/sql/parser", diff --git a/pkg/sql/catalog/descpb/structured.go b/pkg/sql/catalog/descpb/structured.go index 7d8cfac634ca..37dfae50b938 100644 --- a/pkg/sql/catalog/descpb/structured.go +++ b/pkg/sql/catalog/descpb/structured.go @@ -12,6 +12,7 @@ package descpb import ( "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants" "github.com/cockroachdb/cockroach/pkg/sql/protoreflect" "github.com/cockroachdb/cockroach/pkg/sql/sem/catid" @@ -273,6 +274,55 @@ func (desc *TableDescriptor) Persistence() tree.Persistence { return tree.PersistencePermanent } +// AutoStatsCollectionEnabled indicates if automatic statistics collection is +// explicitly enabled or disabled for this table. +func (desc *TableDescriptor) AutoStatsCollectionEnabled() cluster.BoolSetting { + if desc.NoClusterSettingOverrides() { + return cluster.NotSet + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled == nil { + return cluster.NotSet + } + if *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled { + return cluster.True + } + return cluster.False +} + +// AutoStatsMinStaleRows indicates the setting of +// sql.stats.automatic_collection.min_stale_rows for this table. +// If ok is true, then the minStaleRows value is valid, otherwise this has not +// been set at the table level. +func (desc *TableDescriptor) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + if desc.NoClusterSettingOverrides() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, true +} + +// AutoStatsFractionStaleRows indicates the setting of +// sql.stats.automatic_collection.fraction_stale_rows for this table. +// If ok is true, then the fractionStaleRows value is valid, otherwise this has +// not been set at the table level. +func (desc *TableDescriptor) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + if desc.NoClusterSettingOverrides() { + return 0, false + } + if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows == nil { + return 0, false + } + return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, true +} + +// NoClusterSettingOverrides is true if no cluster settings are set at the +// table level for the given table. +func (desc *TableDescriptor) NoClusterSettingOverrides() bool { + return desc.ClusterSettingsForTable == nil +} + // IsVirtualTable returns true if the TableDescriptor describes a // virtual Table (like the information_schema tables) and thus doesn't // need to be physically stored. diff --git a/pkg/sql/catalog/descpb/structured.proto b/pkg/sql/catalog/descpb/structured.proto index b64ba880e9a6..0b675311352b 100644 --- a/pkg/sql/catalog/descpb/structured.proto +++ b/pkg/sql/catalog/descpb/structured.proto @@ -1198,7 +1198,13 @@ message TableDescriptor { optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false, (gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"]; - // Next ID: 51 + // ClusterSettingsForTable are cluster settings specified at the table level. + optional cockroach.sql.catalog.catpb.ClusterSettingsForTable cluster_settings_for_table = 51 [(gogoproto.customname)="ClusterSettingsForTable"]; + + // SessionSettingsForTable are session settings specified at the table level. + optional cockroach.sql.catalog.catpb.SessionSettingsForTable session_settings_for_table = 52 [(gogoproto.customname)="SessionSettingsForTable"]; + + // Next ID: 53 } // SurvivalGoal is the survival goal for a database. diff --git a/pkg/sql/catalog/descriptor.go b/pkg/sql/catalog/descriptor.go index e56e2b07d974..c7077e775bc7 100644 --- a/pkg/sql/catalog/descriptor.go +++ b/pkg/sql/catalog/descriptor.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" @@ -678,6 +679,23 @@ type TableDescriptor interface { GetExcludeDataFromBackup() bool // GetStorageParams returns a list of storage parameters for the table. GetStorageParams(spaceBetweenEqual bool) []string + // NoClusterSettingOverrides is true if no cluster settings are set at the + // table level for the given table, meaning no cluster settings are overridden + // for this table. + NoClusterSettingOverrides() bool + // AutoStatsCollectionEnabled indicates if automatic statistics collection is + // explicitly enabled or disabled for this table. + AutoStatsCollectionEnabled() cluster.BoolSetting + // AutoStatsMinStaleRows indicates the setting of + // sql.stats.automatic_collection.min_stale_rows for this table. + // If ok is true, then the minStaleRows value is valid, otherwise this has not + // been set at the table level. + AutoStatsMinStaleRows() (minStaleRows int64, ok bool) + // AutoStatsFractionStaleRows indicates the setting of + // sql.stats.automatic_collection.fraction_stale_rows for this table. + // If ok is true, then the fractionStaleRows value is valid, otherwise this has + // not been set at the table level. + AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) } // TypeDescriptor will eventually be called typedesc.Descriptor. diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index dc21740b7076..19302a6eab3e 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "sort" + "strconv" "strings" "github.com/cockroachdb/cockroach/pkg/clusterversion" @@ -2565,6 +2566,24 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { if exclude := desc.GetExcludeDataFromBackup(); exclude { appendStorageParam(`exclude_data_from_backup`, `true`) } + if settings := desc.ClusterSettingsForTable; settings != nil { + // These need to be wrapped in double-quotes because they contain '.' chars. + if settings.SqlStatsAutomaticCollectionEnabled != nil { + value := *settings.SqlStatsAutomaticCollectionEnabled + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsEnabledSettingName), + fmt.Sprintf("%v", value)) + } + if settings.SqlStatsAutomaticCollectionMinStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionMinStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsMinStaleSettingName), + strconv.FormatInt(value, 10)) + } + if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionFractionStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsFractionStaleSettingName), + fmt.Sprintf("%g", value)) + } + } return storageParams } @@ -2583,6 +2602,26 @@ func (desc *wrapper) GetMultiRegionEnumDependencyIfExists() bool { return false } +// NoClusterSettingOverrides implements the TableDescriptor interface. +func (desc *wrapper) NoClusterSettingOverrides() bool { + return desc.TableDesc().NoClusterSettingOverrides() +} + +// AutoStatsCollectionEnabled implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsCollectionEnabled() cluster.BoolSetting { + return desc.TableDesc().AutoStatsCollectionEnabled() +} + +// AutoStatsMinStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + return desc.TableDesc().AutoStatsMinStaleRows() +} + +// AutoStatsFractionStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + return desc.TableDesc().AutoStatsFractionStaleRows() +} + // SetTableLocalityRegionalByTable sets the descriptor's locality config to // regional at the table level in the supplied region. An empty region name // (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in diff --git a/pkg/sql/catalog/tabledesc/validate.go b/pkg/sql/catalog/tabledesc/validate.go index 599d96482bcb..2afda67e3261 100644 --- a/pkg/sql/catalog/tabledesc/validate.go +++ b/pkg/sql/catalog/tabledesc/validate.go @@ -15,6 +15,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege" @@ -520,6 +522,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) { } } + desc.validateClusterSettingsForTable(vea.Report) + if desc.IsSequence() { return } @@ -1501,3 +1505,61 @@ func (desc *wrapper) validatePartitioning() error { ) }) } + +// validateClusterSettingsForTable validates that any new cluster settings at +// the table level hold a valid value. +func (desc *wrapper) validateClusterSettingsForTable(errReportFn func(err error)) { + if desc.NoClusterSettingOverrides() { + return + } + desc.validateBoolSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, + cluster.AutoStatsEnabledSettingName) + desc.validateIntSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, + cluster.AutoStatsMinStaleSettingName, settings.NonNegativeInt) + desc.validateFloatSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, + cluster.AutoStatsFractionStaleSettingName, settings.NonNegativeFloat) +} + +func (desc *wrapper) verifyProperTableForStatsSetting( + errReportFn func(err error), settingName string, +) { + if desc.IsVirtualTable() { + errReportFn(errors.Newf("Setting %s may not be set on virtual table", settingName)) + } + if !desc.IsTable() { + errReportFn(errors.Newf("Setting %s may not be set on a view or sequence", settingName)) + } +} + +func (desc *wrapper) validateBoolSetting( + errReportFn func(err error), value *bool, settingName string, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + } +} + +func (desc *wrapper) validateIntSetting( + errReportFn func(err error), value *int64, settingName string, validateFunc func(v int64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} + +func (desc *wrapper) validateFloatSetting( + errReportFn func(err error), + value *float64, + settingName string, + validateFunc func(v float64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(errReportFn, settingName) + if err := validateFunc(*value); err != nil { + errReportFn(errors.Wrapf(err, "invalid value for %s", settingName)) + } + } +} diff --git a/pkg/sql/catalog/tabledesc/validate_test.go b/pkg/sql/catalog/tabledesc/validate_test.go index ab8c710bb31d..9cf8244413e1 100644 --- a/pkg/sql/catalog/tabledesc/validate_test.go +++ b/pkg/sql/catalog/tabledesc/validate_test.go @@ -131,6 +131,8 @@ var validationMap = []struct { "ExcludeDataFromBackup": {status: thisFieldReferencesNoObjects}, "NextConstraintID": {status: iSolemnlySwearThisFieldIsValidated}, "DeclarativeSchemaChangerState": {status: iSolemnlySwearThisFieldIsValidated}, + "ClusterSettingsForTable": {status: iSolemnlySwearThisFieldIsValidated}, + "SessionSettingsForTable": {status: thisFieldReferencesNoObjects}, }, }, { @@ -289,6 +291,19 @@ var validationMap = []struct { "DeclarativeSchemaChangerState": {status: thisFieldReferencesNoObjects}, }, }, + { + obj: catpb.ClusterSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{ + "SqlStatsAutomaticCollectionEnabled": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionMinStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionFractionStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + }, + }, + { + // To be filled in with fields in the future... + obj: catpb.SessionSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{}, + }, } type validationStatusInfo struct { @@ -332,6 +347,9 @@ func TestValidateTableDesc(t *testing.T) { computedExpr := "1 + 1" generatedAsIdentitySequenceOptionExpr := " START 2 INCREMENT 3 CACHE 10" + boolTrue := true + negativeOne := int64(-1) + negativeOneFloat := float64(-1) testData := []struct { err string @@ -1869,6 +1887,95 @@ func TestValidateTableDesc(t *testing.T) { }, }, }, + {`Setting sql.stats.automatic_collection.enabled may not be set on virtual table`, + descpb.TableDescriptor{ + ID: catconstants.MinVirtualID, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + Name: "bar", + ID: 52, + ParentID: 1, + FormatVersion: descpb.InterleavedFormatVersion, + UnexposedParentSchemaID: keys.PublicSchemaID, + ViewQuery: "SELECT * FROM foo", + DependsOn: []descpb.ID{51}, + NextColumnID: 2, + Columns: []descpb.ColumnDescriptor{ + {Name: "a", ID: 1, Type: types.Int}, + }, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + SequenceOpts: &descpb.TableDescriptor_SequenceOpts{ + Increment: 1, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }, + }, + {`invalid value for sql.stats.automatic_collection.min_stale_rows: cannot be set to a negative value: -1`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionMinStaleRows: &negativeOne}, + }}, + {`invalid value for sql.stats.automatic_collection.fraction_stale_rows: cannot set to a negative value: -1.000000`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionFractionStaleRows: &negativeOneFloat}, + }}, } for i, d := range testData { t.Run(d.err, func(t *testing.T) { @@ -2116,6 +2223,7 @@ func TestValidateCrossTableReferences(t *testing.T) { }, // Views. { // 10 + err: ``, desc: descpb.TableDescriptor{ Name: "foo", ID: 51, diff --git a/pkg/sql/distsql_plan_stats.go b/pkg/sql/distsql_plan_stats.go index fb097b34c726..2542e5137ea6 100644 --- a/pkg/sql/distsql_plan_stats.go +++ b/pkg/sql/distsql_plan_stats.go @@ -212,6 +212,9 @@ func (dsp *DistSQLPlanner) createStatsPlan( var rowsExpected uint64 if len(tableStats) > 0 { overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV) + if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok { + overhead = autoStatsFractionStaleRowsForTable + } // Convert to a signed integer first to make the linter happy. rowsExpected = uint64(int64( // The total expected number of rows is the same number that was measured diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ab682db27ef5..b1c08a6fa75a 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -2364,3 +2364,114 @@ COMMIT; statement ok ROLLBACK; + +subtest table_settings + +statement ok +CREATE TABLE t5 (a int) + +# Turn on automatic stats collection +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +# Strings in settings should be converted to the proper data type. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 'false') + +# Verify automatic collection is disabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'false' +---- +{"sqlStatsAutomaticCollectionEnabled": false} + +# SHOW CREATE TABLE displays the value properly. +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = false) + +statement error pq: parameter "sql.stats.automatic_collection.enabled" requires a Boolean value +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 123) + +statement ok +ALTER TABLE t5 RESET ("sql.stats.automatic_collection.enabled") + +# Verify the automatic collection setting is removed. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{} + +statement error pq: invalid value for sql.stats.automatic_collection.fraction_stale_rows: could not parse "hello" as type float: strconv.ParseFloat: parsing "hello": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 'hello') + +statement error pq: invalid value for sql.stats.automatic_collection.min_stale_rows: could not parse "world" as type int: strconv.ParseInt: parsing "world": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.min_stale_rows" = 'world') + +# Verify strings can be converted to proper setting values. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = '0.15', + "sql.stats.automatic_collection.min_stale_rows" = '1234') + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.15, "sqlStatsAutomaticCollectionMinStaleRows": "1234"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 1234, "sql.stats.automatic_collection.fraction_stale_rows" = 0.15) diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index 039042219dd2..489cbe24c224 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -872,3 +872,67 @@ CREATE TABLE public.t_good_hash_indexes_2 ( crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL, CONSTRAINT t_good_hash_indexes_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5) ) + +subtest table_settings + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +statement ok +DROP TABLE t1 + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.fraction_stale_rows" = 0.5, + "sql.stats.automatic_collection.min_stale_rows" = 4000) + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.5, "sqlStatsAutomaticCollectionMinStaleRows": "4000"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t1] +---- +CREATE TABLE public.t1 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t1_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 4000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.5) + +statement ok +CREATE TABLE t11 (a int) WITH ("sql.stats.automatic_collection.enabled" = true, + "sql.stats.automatic_collection.fraction_stale_rows" = 1.797693134862315708145274237317043567981e+308, + "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807) + +# Using max values for auto stats table settings +query T +SELECT create_statement FROM [SHOW CREATE TABLE t11] +---- +CREATE TABLE public.t11 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t11_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807, "sql.stats.automatic_collection.fraction_stale_rows" = 1.7976931348623157e+308) diff --git a/pkg/sql/paramparse/BUILD.bazel b/pkg/sql/paramparse/BUILD.bazel index 7faf6d4c7020..23a703e3020d 100644 --- a/pkg/sql/paramparse/BUILD.bazel +++ b/pkg/sql/paramparse/BUILD.bazel @@ -12,6 +12,8 @@ go_library( deps = [ "//pkg/geo/geoindex", "//pkg/server/telemetry", + "//pkg/settings", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/tabledesc", diff --git a/pkg/sql/paramparse/paramobserver.go b/pkg/sql/paramparse/paramobserver.go index 584d33be5955..028480f9a88a 100644 --- a/pkg/sql/paramparse/paramobserver.go +++ b/pkg/sql/paramparse/paramobserver.go @@ -16,6 +16,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/server/telemetry" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" @@ -139,6 +141,34 @@ func boolFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (boo return bool(*s), nil } +func intFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (int64, error) { + intDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if intDatum, err = tree.ParseDInt(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsInt(evalCtx, key, intDatum) + if err != nil { + return 0, err + } + return s, nil +} + +func floatFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (float64, error) { + floatDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if floatDatum, err = tree.ParseDFloat(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid value for %s", key) + } + } + s, err := DatumAsFloat(evalCtx, key, floatDatum) + if err != nil { + return 0, err + } + return s, nil +} + type tableParam struct { onSet func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error onReset func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error @@ -465,6 +495,18 @@ var tableParams = map[string]tableParam{ return nil }, }, + cluster.AutoStatsEnabledSettingName: { + onSet: boolTableSettingFunc, + onReset: tableSettingResetFunc, + }, + cluster.AutoStatsMinStaleSettingName: { + onSet: intTableSettingFunc(settings.NonNegativeInt), + onReset: tableSettingResetFunc, + }, + cluster.AutoStatsFractionStaleSettingName: { + onSet: floatTableSettingFunc(settings.NonNegativeFloat), + onReset: tableSettingResetFunc, + }, } func init() { @@ -508,6 +550,226 @@ func init() { } } +func boolTableSettingFunc( + ctx context.Context, + po *TableStorageParamObserver, + semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, + key string, + datum tree.Datum, +) error { + boolVal, err := boolFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setBoolValue(key, boolVal, po.tableDesc.ClusterSettingsForTable) +} + +func intTableSettingFunc( + validateFunc func(v int64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + intVal, err := intFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setIntValue(key, intVal, po.tableDesc.ClusterSettingsForTable, validateFunc) + } +} + +func floatTableSettingFunc( + validateFunc func(v float64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + floatVal, err := floatFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setFloatValue(key, floatVal, po.tableDesc.ClusterSettingsForTable, validateFunc) + } +} + +func tableSettingResetFunc( + po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string, +) error { + if po.tableDesc.ClusterSettingsForTable == nil { + return nil + } + return resetValue(key, po.tableDesc.ClusterSettingsForTable) +} + +func resetValue(settingName string, clusterSettingsForTable *catpb.ClusterSettingsForTable) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to reset table setting %s", settingName) + } + switch settingName { + case cluster.AutoStatsEnabledSettingName: + return resetBoolValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled) + case cluster.AutoStatsMinStaleSettingName: + return resetIntValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows) + case cluster.AutoStatsFractionStaleSettingName: + return resetFloatValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows) + } + return errors.Newf("unable to reset table setting %s", settingName) +} + +func resetBoolValue(settingHolder **bool) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func resetIntValue(settingHolder **int64) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func resetFloatValue(settingHolder **float64) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func setBoolValue( + settingName string, boolVal bool, clusterSettingsForTable *catpb.ClusterSettingsForTable, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + // settingHolder is the memory address holding the *bool setting. + var settingHolder **bool + switch settingName { + case cluster.AutoStatsEnabledSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &boolVal + return nil + } else if *setting == boolVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = boolVal + return nil +} + +func setIntValue( + settingName string, + intVal int64, + clusterSettingsForTable *catpb.ClusterSettingsForTable, + validateFunc func(v int64) error, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + if err := validateFunc(intVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", settingName) + } + // settingHolder is the memory address holding the *int64 setting. + var settingHolder **int64 + switch settingName { + case cluster.AutoStatsMinStaleSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &intVal + return nil + } else if *setting == intVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = intVal + return nil +} + +func setFloatValue( + settingName string, + floatVal float64, + clusterSettingsForTable *catpb.ClusterSettingsForTable, + validateFunc func(v float64) error, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + if err := validateFunc(floatVal); err != nil { + return errors.Wrapf(err, "invalid value for %s", settingName) + } + // settingHolder is the memory address holding the *float64 setting. + var settingHolder **float64 + switch settingName { + case cluster.AutoStatsFractionStaleSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &floatVal + return nil + } else if *setting == floatVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = floatVal + return nil +} + // onSet implements the StorageParamObserver interface. func (po *TableStorageParamObserver) onSet( ctx context.Context, diff --git a/pkg/sql/set_cluster_setting.go b/pkg/sql/set_cluster_setting.go index 7e2df6c23ef5..56cee568754a 100644 --- a/pkg/sql/set_cluster_setting.go +++ b/pkg/sql/set_cluster_setting.go @@ -36,7 +36,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/sessioninit" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" - "github.com/cockroachdb/cockroach/pkg/sql/stats" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -226,7 +225,7 @@ func (n *setClusterSettingNode) startExec(params runParams) error { // Report tracked cluster settings via telemetry. // TODO(justin): implement a more general mechanism for tracking these. switch n.name { - case stats.AutoStatsClusterSettingName: + case cluster.AutoStatsEnabledSettingName: switch expectedEncodedValue { case "true": telemetry.Inc(sqltelemetry.TurnAutoStatsOnUseCounter) diff --git a/pkg/sql/stats/BUILD.bazel b/pkg/sql/stats/BUILD.bazel index 45d47a829f0b..95c0c4828aea 100644 --- a/pkg/sql/stats/BUILD.bazel +++ b/pkg/sql/stats/BUILD.bazel @@ -88,6 +88,7 @@ go_test( "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", "//pkg/sql/catalog/desctestutils", + "//pkg/sql/catalog/systemschema", "//pkg/sql/catalog/tabledesc", "//pkg/sql/catalog/typedesc", "//pkg/sql/execinfra", diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 36c9093d2210..dfb53f7c9b88 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -18,10 +18,12 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -34,15 +36,11 @@ import ( "github.com/cockroachdb/errors" ) -// AutoStatsClusterSettingName is the name of the automatic stats collection -// cluster setting. -const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" - // AutomaticStatisticsClusterMode controls the cluster setting for enabling // automatic table statistics collection. var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( settings.TenantWritable, - AutoStatsClusterSettingName, + cluster.AutoStatsEnabledSettingName, "automatic statistics collection mode", true, ).WithPublic() @@ -81,7 +79,7 @@ var AutomaticStatisticsMaxIdleTime = settings.RegisterFloatSetting( var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { s := settings.RegisterFloatSetting( settings.TenantWritable, - "sql.stats.automatic_collection.fraction_stale_rows", + cluster.AutoStatsFractionStaleSettingName, "target fraction of stale rows per table that will trigger a statistics refresh", 0.2, settings.NonNegativeFloat, @@ -96,7 +94,7 @@ var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { s := settings.RegisterIntSetting( settings.TenantWritable, - "sql.stats.automatic_collection.min_stale_rows", + cluster.AutoStatsMinStaleSettingName, "target minimum number of stale rows per table that will trigger a statistics refresh", 500, settings.NonNegativeInt, @@ -212,6 +210,9 @@ type Refresher struct { // mutationCounts contains aggregated mutation counts for each table that // have yet to be processed by the refresher. mutationCounts map[descpb.ID]int64 + + // numTablesEnsured is an internal counter for testing ensureAllTables. + numTablesEnsured int } // mutation contains metadata about a SQL mutation and is the message passed to @@ -244,6 +245,70 @@ func MakeRefresher( } } +func (r *Refresher) getNumTablesEnsured() int { + return r.numTablesEnsured +} + +func (r *Refresher) getTableDescriptor( + ctx context.Context, tableID descpb.ID, +) (catalog.TableDescriptor, error) { + var desc catalog.TableDescriptor + if err := r.cache.collectionFactory.Txn(ctx, r.cache.SQLExecutor, r.cache.ClientDB, func( + ctx context.Context, txn *kv.Txn, descriptors *descs.Collection, + ) (err error) { + desc, err = descriptors.GetImmutableTableByID(ctx, txn, tableID, tree.ObjectLookupFlagsWithRequired()) + return err + }); err != nil { + log.Errorf(ctx, "%v", + errors.Wrapf(err, "failed to get table descriptor for automatic stats on table id: %d", tableID)) + return nil, err + } + return desc, nil +} + +func (r *Refresher) automaticStatisticsEnabled(desc catalog.TableDescriptor) bool { + enabledForTable := desc.AutoStatsCollectionEnabled() + // The table-level setting of sql.stats.automatic_collection.enabled takes + // precedence over the cluster setting. + if enabledForTable == cluster.NotSet { + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + return enabledForTable == cluster.True +} + +func (r *Refresher) autoStatsEnabled(ctx context.Context, tableID descpb.ID) bool { + var desc catalog.TableDescriptor + var err error + if desc, err = r.getTableDescriptor(ctx, tableID); err != nil { + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + return r.automaticStatisticsEnabled(desc) +} + +func (r *Refresher) autoStatsMinStaleRows(ctx context.Context, tableID descpb.ID) int64 { + var desc catalog.TableDescriptor + var err error + if desc, err = r.getTableDescriptor(ctx, tableID); err != nil { + return AutomaticStatisticsMinStaleRows.Get(&r.st.SV) + } + if minStaleRows, ok := desc.AutoStatsMinStaleRows(); ok { + return minStaleRows + } + return AutomaticStatisticsMinStaleRows.Get(&r.st.SV) +} + +func (r *Refresher) autoStatsFractionStaleRows(ctx context.Context, tableID descpb.ID) float64 { + var desc catalog.TableDescriptor + var err error + if desc, err = r.getTableDescriptor(ctx, tableID); err != nil { + return AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) + } + if fractionStaleRows, ok := desc.AutoStatsFractionStaleRows(); ok { + return fractionStaleRows + } + return AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) +} + // Start starts the stats refresher thread, which polls for messages about // new SQL mutations and refreshes the table statistics with probability // proportional to the percentage of rows affected. @@ -288,10 +353,13 @@ func (r *Refresher) Start( } for tableID, rowsAffected := range mutationCounts { - // Check the cluster setting before each refresh in case it was - // disabled recently. - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - break + // Check the cluster setting and table setting before each refresh + // in case they were disabled recently. + + if !r.autoStatsEnabled(ctx, tableID) { + // Table-level or cluster setting disables auto stats, + // so skip this table. + continue } r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime) @@ -308,6 +376,9 @@ func (r *Refresher) Start( }); err != nil { log.Errorf(ctx, "failed to refresh stats: %v", err) } + // This clears out any tables that may have been added to the + // mutationCounts map by ensureAllTables. This is by design. We don't + // want to constantly refresh tables that are read-only. r.mutationCounts = make(map[descpb.ID]int64, len(r.mutationCounts)) case mut := <-r.mutations: @@ -321,19 +392,8 @@ func (r *Refresher) Start( return nil } -// ensureAllTables ensures that an entry exists in r.mutationCounts for each -// table in the database. -func (r *Refresher) ensureAllTables( - ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, -) { - if !AutomaticStatisticsClusterMode.Get(settings) { - // Automatic stats are disabled. - return - } - - // Use a historical read so as to disable txn contention resolution. - getAllTablesQuery := fmt.Sprintf( - ` +const ( + getAllTablesTemplateSQL = ` SELECT tbl.table_id FROM @@ -346,16 +406,33 @@ WHERE AND tbl.drop_time IS NULL AND ( crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' - ) IS NULL;`, - initialTableCollectionDelay, - systemschema.SystemDatabaseName, - ) + ) IS NULL + %s` + + explicitlyEnabledTablesPredicate = `AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + )` + + autoStatsEnableOrNotSpecifiedPredicate = `AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable'->'sqlStatsAutomaticCollectionEnabled' IS NULL + OR crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + )` +) +func (r *Refresher) getApplicableTables( + ctx context.Context, stmt string, opname string, forTesting bool, +) { + if forTesting { + r.numTablesEnsured = 0 + } it, err := r.ex.QueryIterator( ctx, - "get-tables", + opname, nil, /* txn */ - getAllTablesQuery, + stmt, ) if err == nil { var ok bool @@ -366,6 +443,9 @@ WHERE // The query already excludes views and system tables. if !descpb.IsVirtualTable(tableID) { r.mutationCounts[tableID] += 0 + if forTesting { + r.numTablesEnsured++ + } } } } @@ -376,8 +456,41 @@ WHERE // entry is idempotent (i.e. we didn't mess up anything for the next // call to this method). log.Errorf(ctx, "failed to get tables for automatic stats: %v", err) + } +} + +// ensureAllTables ensures that an entry exists in r.mutationCounts for each +// table in the database which has auto stats enabled, either explicitly via +// a table-level setting, or implicitly via the cluster setting. +func (r *Refresher) ensureAllTables( + ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, +) { + if !AutomaticStatisticsClusterMode.Get(settings) { + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled=true is + // checked and only those tables are included in this scan. + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + explicitlyEnabledTablesPredicate, + ) + r.getApplicableTables(ctx, getTablesWithAutoStatsExplicitlyEnabledQuery, + "get-tables-with-autostats-explicitly-enabled", false) return } + + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled of null, + // meaning not set, or true qualifies rows we're interested in. + getAllTablesQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + autoStatsEnableOrNotSpecifiedPredicate, + ) + r.getApplicableTables(ctx, getAllTablesQuery, + "get-tables", false) } // NotifyMutation is called by SQL mutation operations to signal to the @@ -385,10 +498,10 @@ WHERE // successful insert, update, upsert or delete. rowsAffected refers to the // number of rows written as part of the mutation operation. func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected int) { - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - // Automatic stats are disabled. + if !r.automaticStatisticsEnabled(table) { return } + if !hasStatistics(table) { // Don't collect stats for this kind of table: system, virtual, view, etc. return @@ -397,7 +510,10 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // Send mutation info to the refresher thread to avoid adding latency to // the calling transaction. select { - case r.mutations <- mutation{tableID: table.GetID(), rowsAffected: rowsAffected}: + case r.mutations <- mutation{ + tableID: table.GetID(), + rowsAffected: rowsAffected, + }: default: // Don't block if there is no room in the buffered channel. if bufferedChanFullLogLimiter.ShouldLog() { @@ -453,9 +569,15 @@ func (r *Refresher) maybeRefreshStats( mustRefresh = true } - targetRows := int64(rowCount*AutomaticStatisticsFractionStaleRows.Get(&r.st.SV)) + - AutomaticStatisticsMinStaleRows.Get(&r.st.SV) - if !mustRefresh && rowsAffected < math.MaxInt32 && r.randGen.randInt(targetRows) >= rowsAffected { + statsFractionStaleRows := r.autoStatsFractionStaleRows(ctx, tableID) + statsMinStaleRows := r.autoStatsMinStaleRows(ctx, tableID) + targetRows := int64(rowCount*statsFractionStaleRows) + statsMinStaleRows + // randInt will panic if we pass it a value of 0. + randomTargetRows := int64(0) + if targetRows > 0 { + randomTargetRows = r.randGen.randInt(targetRows) + } + if !mustRefresh && rowsAffected < math.MaxInt32 && randomTargetRows >= rowsAffected { // No refresh is happening this time. return } diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 592c6d7c5cc5..c79f2aebf741 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -27,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" @@ -98,6 +99,31 @@ func TestMaybeRefreshStats(t *testing.T) { t.Fatal(err) } + // Setting minStaleRows for the table prevents refreshing from occurring. + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.min_stale_rows" = 100000000)`) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a RESET ("sql.stats.automatic_collection.min_stale_rows")`) + + // Setting fractionStaleRows for the table can also prevent refreshing from + // occurring, though this is a not a typical value for this setting. + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.fraction_stale_rows" = 100000000)`) + refresher.maybeRefreshStats( + ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a RESET ("sql.stats.automatic_collection.fraction_stale_rows")`) + // With rowsAffected=10, refreshing should work. Since there are more rows // updated than exist in the table, the probability of a refresh is 100%. refresher.maybeRefreshStats( @@ -121,6 +147,103 @@ func TestMaybeRefreshStats(t *testing.T) { } } +func TestEnsureAllTablesQueries(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + + st := cluster.MakeTestingClusterSettings() + evalCtx := tree.NewTestingEvalContext(st) + defer evalCtx.Stop(ctx) + + sqlRun := sqlutils.MakeSQLRunner(sqlDB) + sqlRun.Exec(t, + `CREATE DATABASE t; + CREATE TABLE t.a (k INT PRIMARY KEY); + CREATE TABLE t.b (k INT PRIMARY KEY);`) + + executor := s.InternalExecutor().(sqlutil.InternalExecutor) + cache := NewTableStatisticsCache( + ctx, + 10, /* cacheSize */ + kvDB, + executor, + keys.SystemSQLCodec, + s.ClusterSettings(), + s.RangeFeedFactory().(*rangefeed.Factory), + s.CollectionFactory().(*descs.CollectionFactory), + ) + r := MakeRefresher(s.AmbientCtx(), st, executor, cache, time.Microsecond /* asOfTime */) + + if err := checkAllTablesCount(ctx, 2, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.enabled" = true)`) + if err := checkAllTablesCount(ctx, 2, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.b SET ("sql.stats.automatic_collection.enabled" = false)`) + if err := checkAllTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.enabled" = false)`) + if err := checkAllTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } +} + +func checkAllTablesCount(ctx context.Context, expected int, r *Refresher) error { + const collectionDelay = time.Microsecond + getAllTablesQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + collectionDelay, + systemschema.SystemDatabaseName, + autoStatsEnableOrNotSpecifiedPredicate, + ) + r.getApplicableTables(ctx, getAllTablesQuery, + "get-tables", true) + actual := r.getNumTablesEnsured() + if expected != actual { + return fmt.Errorf("expected %d table(s) but found %d", expected, actual) + } + return nil +} + +func checkExplicitlyEnabledTablesCount(ctx context.Context, expected int, r *Refresher) error { + const collectionDelay = time.Microsecond + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + collectionDelay, + systemschema.SystemDatabaseName, + explicitlyEnabledTablesPredicate, + ) + r.getApplicableTables(ctx, getTablesWithAutoStatsExplicitlyEnabledQuery, + "get-tables-with-autostats-explicitly-enabled", true) + actual := r.getNumTablesEnsured() + if expected != actual { + return fmt.Errorf("expected %d table(s) but found %d", expected, actual) + } + return nil +} + func TestAverageRefreshTime(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)