From e6f508f6b5e7938ce4d1c62234bffd519727ad21 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Wed, 16 Mar 2022 14:57:49 -0700 Subject: [PATCH] stats: table-level setting to turn auto stats collection on/off Fixes #40989 Previously, there was no way to enable or disable automatic statistics collection at the table level. It could only be turned on or off via the `sql.stats.automatic_collection.enabled` cluster setting. This was inadequate because statistics collection can be expensive for large tables, and it would be desirable to defer collection until after data is finished loading, or in off hours. Also, small tables which are frequently updated may trigger statistics collection leading to unnecessary overhead and/or unpredictable query plan changes. To address this, this patch adds support for setting of the following cluster settings at the table level: ``` sql.stats.automatic_collection.enabled sql.stats.automatic_collection.fraction_stale_rows sql.stats.automatic_collection.min_stale_rows ``` for example: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true); ALTER TABLE t1 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1, "sql.stats.automatic_collection.min_stale_rows" = 2000); ``` The table-level setting takes precedence over the cluster setting. Release justification: Low risk fix for missing fine-grained control over automatic statistics collection. Release note (sql change): Automatic statistics collection can now be enabled or disabled for individual tables, taking precedence over the cluster setting, for example: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true); ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = false); ALTER TABLE t1 RESET ("sql.stats.automatic_collection.enabled"); ``` RESET removes the setting value entirely, in which case the cluster setting of the same name is in effect for the table. Cluster settings `sql.stats.automatic_collection.fraction_stale_rows` and `sql.stats.automatic_collection.min_stale_rows` can now also be set at the table level, either at table creation time, or later, independent of whether auto stats is enabled: ``` ALTER TABLE t1 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1, "sql.stats.automatic_collection.min_stale_rows" = 2000); CREATE TABLE t1 (a INT, b INT) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 1000000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.05 ); ``` The current table-level cluster settings, along with storage parameters, is shown in the `WITH` clause output of `SHOW CREATE TABLE`. Note, any row mutations which have occurred within a couple minutes of disabling auto stats collection via `ALTER TABLE ... SET` may trigger stats collection, though DML submitted after the setting change will not. --- pkg/settings/cluster/cluster_settings.go | 24 ++ pkg/sql/catalog/BUILD.bazel | 1 + pkg/sql/catalog/catpb/BUILD.bazel | 2 + pkg/sql/catalog/catpb/catalog.go | 112 ++++++ pkg/sql/catalog/catpb/catalog.proto | 19 + pkg/sql/catalog/descpb/structured.proto | 8 +- pkg/sql/catalog/descriptor.go | 29 ++ pkg/sql/catalog/tabledesc/structured.go | 78 ++++ pkg/sql/catalog/tabledesc/validate.go | 65 ++++ pkg/sql/catalog/tabledesc/validate_test.go | 108 ++++++ pkg/sql/distsql_plan_stats.go | 3 + .../logictest/testdata/logic_test/alter_table | 111 ++++++ .../testdata/logic_test/create_table | 70 ++++ pkg/sql/paramparse/BUILD.bazel | 2 + pkg/sql/paramparse/paramobserver.go | 262 ++++++++++++++ pkg/sql/set_cluster_setting.go | 3 +- pkg/sql/stats/BUILD.bazel | 3 + pkg/sql/stats/automatic_stats.go | 333 +++++++++++++++--- pkg/sql/stats/automatic_stats_test.go | 226 +++++++++++- 19 files changed, 1396 insertions(+), 63 deletions(-) create mode 100644 pkg/sql/catalog/catpb/catalog.go diff --git a/pkg/settings/cluster/cluster_settings.go b/pkg/settings/cluster/cluster_settings.go index b8f863e1fd3a..fb706b83806f 100644 --- a/pkg/settings/cluster/cluster_settings.go +++ b/pkg/settings/cluster/cluster_settings.go @@ -78,6 +78,30 @@ func TelemetryOptOut() bool { // (for example, a CLI subcommand that does not connect to a cluster). var NoSettings *Settings // = nil +// BoolSetting represents a boolean setting value. +type BoolSetting int + +// The values for BoolSetting. +const ( + NotSet = iota // NotSet is like a null. The value has not been set. + True + False +) + +const ( + // AutoStatsEnabledSettingName is the name of the automatic stats collection + // enabled cluster setting. + AutoStatsEnabledSettingName = "sql.stats.automatic_collection.enabled" + + // AutoStatsMinStaleSettingName is the name of the automatic stats collection + // min stale rows cluster setting. + AutoStatsMinStaleSettingName = "sql.stats.automatic_collection.min_stale_rows" + + // AutoStatsFractionStaleSettingName is the name of the automatic stats + // collection fraction stale rows cluster setting. + AutoStatsFractionStaleSettingName = "sql.stats.automatic_collection.fraction_stale_rows" +) + // CPUProfileType tracks whether a CPU profile is in progress. type CPUProfileType int32 diff --git a/pkg/sql/catalog/BUILD.bazel b/pkg/sql/catalog/BUILD.bazel index 39f48e97a976..e093d9a847b2 100644 --- a/pkg/sql/catalog/BUILD.bazel +++ b/pkg/sql/catalog/BUILD.bazel @@ -25,6 +25,7 @@ go_library( "//pkg/kv", "//pkg/roachpb", "//pkg/server/telemetry", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/pgwire/pgcode", diff --git a/pkg/sql/catalog/catpb/BUILD.bazel b/pkg/sql/catalog/catpb/BUILD.bazel index 246ffc5a14a9..643dd91ec3f4 100644 --- a/pkg/sql/catalog/catpb/BUILD.bazel +++ b/pkg/sql/catalog/catpb/BUILD.bazel @@ -29,6 +29,7 @@ go_proto_library( go_library( name = "catpb", srcs = [ + "catalog.go", "constraint.go", "default_privilege.go", "doc.go", @@ -44,6 +45,7 @@ go_library( deps = [ "//pkg/keys", "//pkg/security", + "//pkg/settings/cluster", "//pkg/sql/catalog/catconstants", "//pkg/sql/privilege", "//pkg/sql/sem/catid", diff --git a/pkg/sql/catalog/catpb/catalog.go b/pkg/sql/catalog/catpb/catalog.go new file mode 100644 index 000000000000..3c1871b38344 --- /dev/null +++ b/pkg/sql/catalog/catpb/catalog.go @@ -0,0 +1,112 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package catpb + +import "github.com/cockroachdb/cockroach/pkg/settings/cluster" + +// AutoStatsCollectionEnabled indicates if automatic statistics collection is +// explicitly enabled or disabled. +func (cs *ClusterSettingsForTable) AutoStatsCollectionEnabled() cluster.BoolSetting { + if cs.SqlStatsAutomaticCollectionEnabled == nil { + return cluster.NotSet + } + if *cs.SqlStatsAutomaticCollectionEnabled { + return cluster.True + } + return cluster.False +} + +// AutoStatsMinStaleRows indicates the setting of +// sql.stats.automatic_collection.min_stale_rows in ClusterSettingsForTable. +// If ok is true, then the minStaleRows value is valid, otherwise this has not +// been set. +func (cs *ClusterSettingsForTable) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + if cs.SqlStatsAutomaticCollectionMinStaleRows == nil { + return 0, false + } + return *cs.SqlStatsAutomaticCollectionMinStaleRows, true +} + +// AutoStatsFractionStaleRows indicates the setting of +// sql.stats.automatic_collection.fraction_stale_rows in +// ClusterSettingsForTable. If ok is true, then the fractionStaleRows value is +// valid, otherwise this has not been set. +func (cs *ClusterSettingsForTable) AutoStatsFractionStaleRows() ( + fractionStaleRows float64, + ok bool, +) { + if cs.SqlStatsAutomaticCollectionFractionStaleRows == nil { + return 0, false + } + return *cs.SqlStatsAutomaticCollectionFractionStaleRows, true +} + +// NoAutoStatsSettingsOverrides is true if no auto stats related cluster +// settings are present in these ClusterSettingsForTable. +func (cs *ClusterSettingsForTable) NoAutoStatsSettingsOverrides() bool { + if cs == nil { + return true + } + if cs.SqlStatsAutomaticCollectionEnabled != nil || + cs.SqlStatsAutomaticCollectionMinStaleRows != nil || + cs.SqlStatsAutomaticCollectionFractionStaleRows != nil { + return false + } + return true +} + +// AutoStatsSettingsEqual returns true if all auto stats related +// ClusterSettingsForTable in this structure match those in `other`. An unset +// value must also be unset in the other's settings to be equal. +func (cs *ClusterSettingsForTable) AutoStatsSettingsEqual(other *ClusterSettingsForTable) bool { + otherHasNoAutoStatsSettings := other.NoAutoStatsSettingsOverrides() + if cs.NoAutoStatsSettingsOverrides() { + return otherHasNoAutoStatsSettings + } + if otherHasNoAutoStatsSettings { + return false + } + if cs.AutoStatsCollectionEnabled() != other.AutoStatsCollectionEnabled() { + return false + } + minStaleRows := cs.SqlStatsAutomaticCollectionMinStaleRows + otherMinStaleRows := other.SqlStatsAutomaticCollectionMinStaleRows + if minStaleRows == nil { + if otherMinStaleRows != nil { + return false + } + } + if otherMinStaleRows == nil { + if minStaleRows != nil { + return false + } + } + if *minStaleRows != *otherMinStaleRows { + return false + } + + fractionStaleRows := cs.SqlStatsAutomaticCollectionFractionStaleRows + otherFractionStaleRows := other.SqlStatsAutomaticCollectionFractionStaleRows + if fractionStaleRows == nil { + if otherFractionStaleRows != nil { + return false + } + } + if otherFractionStaleRows == nil { + if fractionStaleRows != nil { + return false + } + } + if *fractionStaleRows != *otherFractionStaleRows { + return false + } + return true +} diff --git a/pkg/sql/catalog/catpb/catalog.proto b/pkg/sql/catalog/catpb/catalog.proto index c38b9ecfb88f..37010fa624ce 100644 --- a/pkg/sql/catalog/catpb/catalog.proto +++ b/pkg/sql/catalog/catpb/catalog.proto @@ -217,3 +217,22 @@ message RowLevelTTL { // the relation name. optional bool label_metrics = 10 [(gogoproto.nullable) = false]; } + +// ClusterSettingsForTable represents cluster settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +message ClusterSettingsForTable { + option (gogoproto.equal) = true; + optional bool sql_stats_automatic_collection_enabled = 1; + optional int64 sql_stats_automatic_collection_min_stale_rows = 2; + optional double sql_stats_automatic_collection_fraction_stale_rows = 3; +} + +// SessionSettingsForTable represents session settings specified at the table +// level. Each setting is nullable so queries of the descriptor in JSON form +// only list values which have been set. +// This is currently unused, but defined for backwards compatibility in +// anticipation of expected use going forward. +message SessionSettingsForTable { + option (gogoproto.equal) = true; +} diff --git a/pkg/sql/catalog/descpb/structured.proto b/pkg/sql/catalog/descpb/structured.proto index b64ba880e9a6..0b675311352b 100644 --- a/pkg/sql/catalog/descpb/structured.proto +++ b/pkg/sql/catalog/descpb/structured.proto @@ -1198,7 +1198,13 @@ message TableDescriptor { optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false, (gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"]; - // Next ID: 51 + // ClusterSettingsForTable are cluster settings specified at the table level. + optional cockroach.sql.catalog.catpb.ClusterSettingsForTable cluster_settings_for_table = 51 [(gogoproto.customname)="ClusterSettingsForTable"]; + + // SessionSettingsForTable are session settings specified at the table level. + optional cockroach.sql.catalog.catpb.SessionSettingsForTable session_settings_for_table = 52 [(gogoproto.customname)="SessionSettingsForTable"]; + + // Next ID: 53 } // SurvivalGoal is the survival goal for a database. diff --git a/pkg/sql/catalog/descriptor.go b/pkg/sql/catalog/descriptor.go index e56e2b07d974..45807ff18f62 100644 --- a/pkg/sql/catalog/descriptor.go +++ b/pkg/sql/catalog/descriptor.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" @@ -678,6 +679,34 @@ type TableDescriptor interface { GetExcludeDataFromBackup() bool // GetStorageParams returns a list of storage parameters for the table. GetStorageParams(spaceBetweenEqual bool) []string + // NoClusterSettingOverrides is true if no cluster settings are set at the + // table level for the given table, meaning no cluster settings are overridden + // for this table. + NoClusterSettingOverrides() bool + // NoAutoStatsSettingsOverrides is true if no auto stats related cluster + // settings are set at the table level for the given table. + NoAutoStatsSettingsOverrides() bool + // AutoStatsCollectionEnabled indicates if automatic statistics collection is + // explicitly enabled or disabled for this table. + AutoStatsCollectionEnabled() cluster.BoolSetting + // AutoStatsMinStaleRows indicates the setting of + // sql.stats.automatic_collection.min_stale_rows for this table. + // If ok is true, then the minStaleRows value is valid, otherwise this has not + // been set at the table level. + AutoStatsMinStaleRows() (minStaleRows int64, ok bool) + // AutoStatsFractionStaleRows indicates the setting of + // sql.stats.automatic_collection.fraction_stale_rows for this table. If ok is + // true, then the fractionStaleRows value is valid, otherwise this has not + // been set at the table level. + AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) + // GetClusterSettingsForTable returns the cluster settings specified at + // the table level for this table. May return nil if none are set. + GetClusterSettingsForTable() *catpb.ClusterSettingsForTable + // AutoStatsClusterSettingOverridesEqual returns true if all auto stats + // related cluster setting overrides in this descriptor match those in the + // `otherClusterSettings`. An unset value must also be unset in the other's + // settings to be equal. + AutoStatsClusterSettingOverridesEqual(otherClusterSettings *catpb.ClusterSettingsForTable) bool } // TypeDescriptor will eventually be called typedesc.Descriptor. diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index dc21740b7076..67ebcd659ecc 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "sort" + "strconv" "strings" "github.com/cockroachdb/cockroach/pkg/clusterversion" @@ -2565,6 +2566,24 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { if exclude := desc.GetExcludeDataFromBackup(); exclude { appendStorageParam(`exclude_data_from_backup`, `true`) } + if settings := desc.ClusterSettingsForTable; settings != nil { + // These need to be wrapped in double-quotes because they contain '.' chars. + if settings.SqlStatsAutomaticCollectionEnabled != nil { + value := *settings.SqlStatsAutomaticCollectionEnabled + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsEnabledSettingName), + fmt.Sprintf("%v", value)) + } + if settings.SqlStatsAutomaticCollectionMinStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionMinStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsMinStaleSettingName), + strconv.FormatInt(value, 10)) + } + if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil { + value := *settings.SqlStatsAutomaticCollectionFractionStaleRows + appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsFractionStaleSettingName), + fmt.Sprintf("%g", value)) + } + } return storageParams } @@ -2583,6 +2602,65 @@ func (desc *wrapper) GetMultiRegionEnumDependencyIfExists() bool { return false } +// NoClusterSettingOverrides implements the TableDescriptor interface. +func (desc *wrapper) NoClusterSettingOverrides() bool { + return desc.ClusterSettingsForTable == nil +} + +// NoAutoStatsSettingsOverrides implements the TableDescriptor interface. +func (desc *wrapper) NoAutoStatsSettingsOverrides() bool { + if desc.ClusterSettingsForTable == nil { + return true + } + return desc.ClusterSettingsForTable.NoAutoStatsSettingsOverrides() +} + +// AutoStatsCollectionEnabled implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsCollectionEnabled() cluster.BoolSetting { + if desc.NoClusterSettingOverrides() { + return cluster.NotSet + } + return desc.ClusterSettingsForTable.AutoStatsCollectionEnabled() +} + +// AutoStatsMinStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) { + if desc.NoClusterSettingOverrides() { + return 0, false + } + return desc.ClusterSettingsForTable.AutoStatsMinStaleRows() +} + +// AutoStatsFractionStaleRows implements the TableDescriptor interface. +func (desc *wrapper) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) { + if desc.NoClusterSettingOverrides() { + return 0, false + } + return desc.ClusterSettingsForTable.AutoStatsFractionStaleRows() +} + +// GetClusterSettingsForTable implements the TableDescriptor interface. +func (desc *wrapper) GetClusterSettingsForTable() *catpb.ClusterSettingsForTable { + return desc.ClusterSettingsForTable +} + +// AutoStatsClusterSettingOverridesEqual implements the TableDescriptor +// interface. +func (desc *wrapper) AutoStatsClusterSettingOverridesEqual( + otherClusterSettings *catpb.ClusterSettingsForTable, +) bool { + otherHasNoAutoStatsSettings := otherClusterSettings == nil || + otherClusterSettings.NoAutoStatsSettingsOverrides() + + if desc.NoAutoStatsSettingsOverrides() { + return otherHasNoAutoStatsSettings + } + if otherHasNoAutoStatsSettings { + return false + } + return desc.ClusterSettingsForTable.AutoStatsSettingsEqual(otherClusterSettings) +} + // SetTableLocalityRegionalByTable sets the descriptor's locality config to // regional at the table level in the supplied region. An empty region name // (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in diff --git a/pkg/sql/catalog/tabledesc/validate.go b/pkg/sql/catalog/tabledesc/validate.go index 599d96482bcb..3a373191fcba 100644 --- a/pkg/sql/catalog/tabledesc/validate.go +++ b/pkg/sql/catalog/tabledesc/validate.go @@ -15,6 +15,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege" @@ -520,6 +522,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) { } } + desc.validateClusterSettingsForTable(vea) + if desc.IsSequence() { return } @@ -1501,3 +1505,64 @@ func (desc *wrapper) validatePartitioning() error { ) }) } + +// validateClusterSettingsForTable validates that any new cluster settings at +// the table level hold a valid value. +func (desc *wrapper) validateClusterSettingsForTable(vea catalog.ValidationErrorAccumulator) { + if desc.NoClusterSettingOverrides() { + return + } + desc.validateBoolSetting(vea, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, + cluster.AutoStatsEnabledSettingName) + desc.validateIntSetting(vea, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, + cluster.AutoStatsMinStaleSettingName, settings.NonNegativeInt) + desc.validateFloatSetting(vea, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, + cluster.AutoStatsFractionStaleSettingName, settings.NonNegativeFloat) +} + +func (desc *wrapper) verifyProperTableForStatsSetting( + vea catalog.ValidationErrorAccumulator, settingName string, +) { + if desc.IsVirtualTable() { + vea.Report(errors.Newf("Setting %s may not be set on virtual table", settingName)) + } + if !desc.IsTable() { + vea.Report(errors.Newf("Setting %s may not be set on a view or sequence", settingName)) + } +} + +func (desc *wrapper) validateBoolSetting( + vea catalog.ValidationErrorAccumulator, value *bool, settingName string, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(vea, settingName) + } +} + +func (desc *wrapper) validateIntSetting( + vea catalog.ValidationErrorAccumulator, + value *int64, + settingName string, + validateFunc func(v int64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(vea, settingName) + if err := validateFunc(*value); err != nil { + vea.Report(errors.Wrapf(err, "invalid integer value for %s", settingName)) + } + } +} + +func (desc *wrapper) validateFloatSetting( + vea catalog.ValidationErrorAccumulator, + value *float64, + settingName string, + validateFunc func(v float64) error, +) { + if value != nil { + desc.verifyProperTableForStatsSetting(vea, settingName) + if err := validateFunc(*value); err != nil { + vea.Report(errors.Wrapf(err, "invalid float value for %s", settingName)) + } + } +} diff --git a/pkg/sql/catalog/tabledesc/validate_test.go b/pkg/sql/catalog/tabledesc/validate_test.go index ab8c710bb31d..77cac79cebd1 100644 --- a/pkg/sql/catalog/tabledesc/validate_test.go +++ b/pkg/sql/catalog/tabledesc/validate_test.go @@ -131,6 +131,8 @@ var validationMap = []struct { "ExcludeDataFromBackup": {status: thisFieldReferencesNoObjects}, "NextConstraintID": {status: iSolemnlySwearThisFieldIsValidated}, "DeclarativeSchemaChangerState": {status: iSolemnlySwearThisFieldIsValidated}, + "ClusterSettingsForTable": {status: iSolemnlySwearThisFieldIsValidated}, + "SessionSettingsForTable": {status: thisFieldReferencesNoObjects}, }, }, { @@ -289,6 +291,19 @@ var validationMap = []struct { "DeclarativeSchemaChangerState": {status: thisFieldReferencesNoObjects}, }, }, + { + obj: catpb.ClusterSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{ + "SqlStatsAutomaticCollectionEnabled": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionMinStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + "SqlStatsAutomaticCollectionFractionStaleRows": {status: iSolemnlySwearThisFieldIsValidated}, + }, + }, + { + // To be filled in with fields in the future... + obj: catpb.SessionSettingsForTable{}, + fieldMap: map[string]validationStatusInfo{}, + }, } type validationStatusInfo struct { @@ -332,6 +347,9 @@ func TestValidateTableDesc(t *testing.T) { computedExpr := "1 + 1" generatedAsIdentitySequenceOptionExpr := " START 2 INCREMENT 3 CACHE 10" + boolTrue := true + negativeOne := int64(-1) + negativeOneFloat := float64(-1) testData := []struct { err string @@ -1869,6 +1887,95 @@ func TestValidateTableDesc(t *testing.T) { }, }, }, + {`Setting sql.stats.automatic_collection.enabled may not be set on virtual table`, + descpb.TableDescriptor{ + ID: catconstants.MinVirtualID, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + Name: "bar", + ID: 52, + ParentID: 1, + FormatVersion: descpb.InterleavedFormatVersion, + UnexposedParentSchemaID: keys.PublicSchemaID, + ViewQuery: "SELECT * FROM foo", + DependsOn: []descpb.ID{51}, + NextColumnID: 2, + Columns: []descpb.ColumnDescriptor{ + {Name: "a", ID: 1, Type: types.Int}, + }, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }}, + {`Setting sql.stats.automatic_collection.enabled may not be set on a view or sequence`, + descpb.TableDescriptor{ + ID: 51, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "a", Type: types.Int}, + }, + SequenceOpts: &descpb.TableDescriptor_SequenceOpts{ + Increment: 1, + }, + PrimaryIndex: descpb.IndexDescriptor{ + ID: 1, + Name: "primary", + Unique: true, + KeyColumnIDs: []descpb.ColumnID{1}, + KeyColumnNames: []string{"a"}, + KeyColumnDirections: []descpb.IndexDescriptor_Direction{descpb.IndexDescriptor_ASC}, + Version: descpb.PrimaryIndexWithStoredColumnsVersion, + EncodingType: descpb.PrimaryIndexEncoding, + ConstraintID: 1, + }, + Families: []descpb.ColumnFamilyDescriptor{ + {ID: 0, Name: "primary", + ColumnIDs: []descpb.ColumnID{1}, + ColumnNames: []string{"a"}, + }, + }, + NextColumnID: 2, + NextFamilyID: 1, + NextIndexID: 5, + NextConstraintID: 2, + Privileges: catpb.NewBasePrivilegeDescriptor(security.AdminRoleName()), + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionEnabled: &boolTrue}, + }, + }, + {`invalid integer value for sql.stats.automatic_collection.min_stale_rows: cannot be set to a negative value: -1`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionMinStaleRows: &negativeOne}, + }}, + {`invalid float value for sql.stats.automatic_collection.fraction_stale_rows: cannot set to a negative value: -1.000000`, + descpb.TableDescriptor{ + ID: 2, + ParentID: 1, + Name: "foo", + FormatVersion: descpb.InterleavedFormatVersion, + Columns: []descpb.ColumnDescriptor{ + {ID: 1, Name: "bar"}, + }, + NextColumnID: 2, + ClusterSettingsForTable: &catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionFractionStaleRows: &negativeOneFloat}, + }}, } for i, d := range testData { t.Run(d.err, func(t *testing.T) { @@ -2116,6 +2223,7 @@ func TestValidateCrossTableReferences(t *testing.T) { }, // Views. { // 10 + err: ``, desc: descpb.TableDescriptor{ Name: "foo", ID: 51, diff --git a/pkg/sql/distsql_plan_stats.go b/pkg/sql/distsql_plan_stats.go index fb097b34c726..2542e5137ea6 100644 --- a/pkg/sql/distsql_plan_stats.go +++ b/pkg/sql/distsql_plan_stats.go @@ -212,6 +212,9 @@ func (dsp *DistSQLPlanner) createStatsPlan( var rowsExpected uint64 if len(tableStats) > 0 { overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV) + if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok { + overhead = autoStatsFractionStaleRowsForTable + } // Convert to a signed integer first to make the linter happy. rowsExpected = uint64(int64( // The total expected number of rows is the same number that was measured diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ab682db27ef5..cfb69a1e3daf 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -2364,3 +2364,114 @@ COMMIT; statement ok ROLLBACK; + +subtest table_settings + +statement ok +CREATE TABLE t5 (a int) + +# Turn on automatic stats collection +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +# Strings in settings should be converted to the proper data type. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 'false') + +# Verify automatic collection is disabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL + AND crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' + ->> 'sqlStatsAutomaticCollectionEnabled' = 'false' +---- +{"sqlStatsAutomaticCollectionEnabled": false} + +# SHOW CREATE TABLE displays the value properly. +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = false) + +statement error pq: parameter "sql.stats.automatic_collection.enabled" requires a Boolean value +ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 123) + +statement ok +ALTER TABLE t5 RESET ("sql.stats.automatic_collection.enabled") + +# Verify the automatic collection setting is removed. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{} + +statement error pq: invalid float value for sql.stats.automatic_collection.fraction_stale_rows: could not parse "hello" as type float: strconv.ParseFloat: parsing "hello": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 'hello') + +statement error pq: invalid integer value for sql.stats.automatic_collection.min_stale_rows: could not parse "world" as type int: strconv.ParseInt: parsing "world": invalid syntax +ALTER TABLE t5 SET ("sql.stats.automatic_collection.min_stale_rows" = 'world') + +# Verify strings can be converted to proper setting values. +statement ok +ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = '0.15', + "sql.stats.automatic_collection.min_stale_rows" = '1234') + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't5' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.15, "sqlStatsAutomaticCollectionMinStaleRows": "1234"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t5] +---- +CREATE TABLE public.t5 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 1234, "sql.stats.automatic_collection.fraction_stale_rows" = 0.15) diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index 039042219dd2..8b70776e9c77 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -872,3 +872,73 @@ CREATE TABLE public.t_good_hash_indexes_2 ( crdb_internal_a_shard_5 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 5:::INT8)) VIRTUAL, CONSTRAINT t_good_hash_indexes_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=5) ) + +subtest table_settings + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.enabled" = true) + +# Verify automatic collection is enabled. +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionEnabled": true} + +statement ok +DROP TABLE t1 + +statement ok +CREATE TABLE t1 (a int) WITH ("sql.stats.automatic_collection.fraction_stale_rows" = 0.5, + "sql.stats.automatic_collection.min_stale_rows" = 4000) + +# Verify settings +query T +SELECT + crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->>'clusterSettingsForTable' +FROM + crdb_internal.tables AS tbl + INNER JOIN system.descriptor AS d ON d.id = tbl.table_id +WHERE + tbl.name = 't1' + AND tbl.drop_time IS NULL +---- +{"sqlStatsAutomaticCollectionFractionStaleRows": 0.5, "sqlStatsAutomaticCollectionMinStaleRows": "4000"} + +query T +SELECT create_statement FROM [SHOW CREATE TABLE t1] +---- +CREATE TABLE public.t1 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t1_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.min_stale_rows" = 4000, "sql.stats.automatic_collection.fraction_stale_rows" = 0.5) + +statement ok +CREATE TABLE t11 (a int) WITH ("sql.stats.automatic_collection.enabled" = true, + "sql.stats.automatic_collection.fraction_stale_rows" = 1.797693134862315708145274237317043567981e+308, + "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807) + +# Using max values for auto stats table settings +query T +SELECT create_statement FROM [SHOW CREATE TABLE t11] +---- +CREATE TABLE public.t11 ( + a INT8 NULL, + rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), + CONSTRAINT t11_pkey PRIMARY KEY (rowid ASC) +) WITH ("sql.stats.automatic_collection.enabled" = true, "sql.stats.automatic_collection.min_stale_rows" = 9223372036854775807, "sql.stats.automatic_collection.fraction_stale_rows" = 1.7976931348623157e+308) + +statement error pq: invalid float value for sql.stats.automatic_collection.fraction_stale_rows: cannot set to a negative value: -1.000000 +CREATE TABLE t22 (a int) WITH ("sql.stats.automatic_collection.fraction_stale_rows" = -1.0) + +statement error pq: invalid integer value for sql.stats.automatic_collection.min_stale_rows: cannot be set to a negative value: -1 +CREATE TABLE t22 (a int) WITH ("sql.stats.automatic_collection.min_stale_rows" = -1) diff --git a/pkg/sql/paramparse/BUILD.bazel b/pkg/sql/paramparse/BUILD.bazel index 7faf6d4c7020..23a703e3020d 100644 --- a/pkg/sql/paramparse/BUILD.bazel +++ b/pkg/sql/paramparse/BUILD.bazel @@ -12,6 +12,8 @@ go_library( deps = [ "//pkg/geo/geoindex", "//pkg/server/telemetry", + "//pkg/settings", + "//pkg/settings/cluster", "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/tabledesc", diff --git a/pkg/sql/paramparse/paramobserver.go b/pkg/sql/paramparse/paramobserver.go index 584d33be5955..e8b8a7209b42 100644 --- a/pkg/sql/paramparse/paramobserver.go +++ b/pkg/sql/paramparse/paramobserver.go @@ -16,6 +16,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/server/telemetry" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" @@ -139,6 +141,34 @@ func boolFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (boo return bool(*s), nil } +func intFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (int64, error) { + intDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if intDatum, err = tree.ParseDInt(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid integer value for %s", key) + } + } + s, err := DatumAsInt(evalCtx, key, intDatum) + if err != nil { + return 0, err + } + return s, nil +} + +func floatFromDatum(evalCtx *tree.EvalContext, key string, datum tree.Datum) (float64, error) { + floatDatum := datum + if stringVal, err := DatumAsString(evalCtx, key, datum); err == nil { + if floatDatum, err = tree.ParseDFloat(stringVal); err != nil { + return 0, errors.Wrapf(err, "invalid float value for %s", key) + } + } + s, err := DatumAsFloat(evalCtx, key, floatDatum) + if err != nil { + return 0, err + } + return s, nil +} + type tableParam struct { onSet func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error onReset func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error @@ -465,6 +495,18 @@ var tableParams = map[string]tableParam{ return nil }, }, + cluster.AutoStatsEnabledSettingName: { + onSet: boolTableSettingFunc, + onReset: tableSettingResetFunc, + }, + cluster.AutoStatsMinStaleSettingName: { + onSet: intTableSettingFunc(settings.NonNegativeInt), + onReset: tableSettingResetFunc, + }, + cluster.AutoStatsFractionStaleSettingName: { + onSet: floatTableSettingFunc(settings.NonNegativeFloat), + onReset: tableSettingResetFunc, + }, } func init() { @@ -508,6 +550,226 @@ func init() { } } +func boolTableSettingFunc( + ctx context.Context, + po *TableStorageParamObserver, + semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, + key string, + datum tree.Datum, +) error { + boolVal, err := boolFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setBoolValue(key, boolVal, po.tableDesc.ClusterSettingsForTable) +} + +func intTableSettingFunc( + validateFunc func(v int64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + intVal, err := intFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setIntValue(key, intVal, po.tableDesc.ClusterSettingsForTable, validateFunc) + } +} + +func floatTableSettingFunc( + validateFunc func(v float64) error, +) func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + return func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, + evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + floatVal, err := floatFromDatum(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.ClusterSettingsForTable == nil { + po.tableDesc.ClusterSettingsForTable = &catpb.ClusterSettingsForTable{} + } + return setFloatValue(key, floatVal, po.tableDesc.ClusterSettingsForTable, validateFunc) + } +} + +func tableSettingResetFunc( + po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string, +) error { + if po.tableDesc.ClusterSettingsForTable == nil { + return nil + } + return resetValue(key, po.tableDesc.ClusterSettingsForTable) +} + +func resetValue(settingName string, clusterSettingsForTable *catpb.ClusterSettingsForTable) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to reset table setting %s", settingName) + } + switch settingName { + case cluster.AutoStatsEnabledSettingName: + return resetBoolValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled) + case cluster.AutoStatsMinStaleSettingName: + return resetIntValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows) + case cluster.AutoStatsFractionStaleSettingName: + return resetFloatValue(&clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows) + } + return errors.Newf("unable to reset table setting %s", settingName) +} + +func resetBoolValue(settingHolder **bool) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func resetIntValue(settingHolder **int64) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func resetFloatValue(settingHolder **float64) error { + if *settingHolder == nil { + // This setting is unset or has already been reset. + return nil + } + *settingHolder = nil + return nil +} + +func setBoolValue( + settingName string, boolVal bool, clusterSettingsForTable *catpb.ClusterSettingsForTable, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + // settingHolder is the memory address holding the *bool setting. + var settingHolder **bool + switch settingName { + case cluster.AutoStatsEnabledSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionEnabled + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &boolVal + return nil + } else if *setting == boolVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = boolVal + return nil +} + +func setIntValue( + settingName string, + intVal int64, + clusterSettingsForTable *catpb.ClusterSettingsForTable, + validateFunc func(v int64) error, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + if err := validateFunc(intVal); err != nil { + return errors.Wrapf(err, "invalid integer value for %s", settingName) + } + // settingHolder is the memory address holding the *int64 setting. + var settingHolder **int64 + switch settingName { + case cluster.AutoStatsMinStaleSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &intVal + return nil + } else if *setting == intVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = intVal + return nil +} + +func setFloatValue( + settingName string, + floatVal float64, + clusterSettingsForTable *catpb.ClusterSettingsForTable, + validateFunc func(v float64) error, +) error { + if clusterSettingsForTable == nil { + return errors.Newf("unable to set table setting %s", settingName) + } + if err := validateFunc(floatVal); err != nil { + return errors.Wrapf(err, "invalid float value for %s", settingName) + } + // settingHolder is the memory address holding the *float64 setting. + var settingHolder **float64 + switch settingName { + case cluster.AutoStatsFractionStaleSettingName: + settingHolder = &clusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows + default: + return errors.Newf("unable to set table setting %s", settingName) + } + // Get the actual setting value. Setting is nullable and optional, + // so may be nil. + setting := *settingHolder + + if setting == nil { + // If this was never set before, make a pointer to the new value and assign + // it to the proper field in clusterSettingsForTable. + *settingHolder = &floatVal + return nil + } else if *setting == floatVal { + // If the current setting matches the value we're trying to set, there's + // nothing to do. Just return. + return nil + } + // Update the current setting with the new value. We could have updated + // settingHolder directly, as in the `setting == nil` case which would + // have allocated new memory. Instead we're just overwriting the old value. + *setting = floatVal + return nil +} + // onSet implements the StorageParamObserver interface. func (po *TableStorageParamObserver) onSet( ctx context.Context, diff --git a/pkg/sql/set_cluster_setting.go b/pkg/sql/set_cluster_setting.go index 7e2df6c23ef5..56cee568754a 100644 --- a/pkg/sql/set_cluster_setting.go +++ b/pkg/sql/set_cluster_setting.go @@ -36,7 +36,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/sessioninit" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" - "github.com/cockroachdb/cockroach/pkg/sql/stats" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -226,7 +225,7 @@ func (n *setClusterSettingNode) startExec(params runParams) error { // Report tracked cluster settings via telemetry. // TODO(justin): implement a more general mechanism for tracking these. switch n.name { - case stats.AutoStatsClusterSettingName: + case cluster.AutoStatsEnabledSettingName: switch expectedEncodedValue { case "true": telemetry.Inc(sqltelemetry.TurnAutoStatsOnUseCounter) diff --git a/pkg/sql/stats/BUILD.bazel b/pkg/sql/stats/BUILD.bazel index 45d47a829f0b..1f830e0f426a 100644 --- a/pkg/sql/stats/BUILD.bazel +++ b/pkg/sql/stats/BUILD.bazel @@ -26,6 +26,7 @@ go_library( "//pkg/settings", "//pkg/settings/cluster", "//pkg/sql/catalog", + "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", "//pkg/sql/catalog/systemschema", @@ -85,9 +86,11 @@ go_test( "//pkg/settings/cluster", "//pkg/sql", "//pkg/sql/catalog", + "//pkg/sql/catalog/catpb", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", "//pkg/sql/catalog/desctestutils", + "//pkg/sql/catalog/systemschema", "//pkg/sql/catalog/tabledesc", "//pkg/sql/catalog/typedesc", "//pkg/sql/execinfra", diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 36c9093d2210..72e6bcb9866f 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" @@ -34,15 +35,11 @@ import ( "github.com/cockroachdb/errors" ) -// AutoStatsClusterSettingName is the name of the automatic stats collection -// cluster setting. -const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" - // AutomaticStatisticsClusterMode controls the cluster setting for enabling // automatic table statistics collection. var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( settings.TenantWritable, - AutoStatsClusterSettingName, + cluster.AutoStatsEnabledSettingName, "automatic statistics collection mode", true, ).WithPublic() @@ -81,7 +78,7 @@ var AutomaticStatisticsMaxIdleTime = settings.RegisterFloatSetting( var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { s := settings.RegisterFloatSetting( settings.TenantWritable, - "sql.stats.automatic_collection.fraction_stale_rows", + cluster.AutoStatsFractionStaleSettingName, "target fraction of stale rows per table that will trigger a statistics refresh", 0.2, settings.NonNegativeFloat, @@ -96,7 +93,7 @@ var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { s := settings.RegisterIntSetting( settings.TenantWritable, - "sql.stats.automatic_collection.min_stale_rows", + cluster.AutoStatsMinStaleSettingName, "target minimum number of stale rows per table that will trigger a statistics refresh", 500, settings.NonNegativeInt, @@ -111,6 +108,10 @@ var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { // have any effect. var DefaultRefreshInterval = time.Minute +// MinRefreshInterval is the minimum auto stats refresh interval allowed. +// If a shorter interval is requested, MinRefreshInterval is used instead. +const MinRefreshInterval = time.Microsecond + // DefaultAsOfTime is a duration which is used to define the AS OF time for // automatic runs of CREATE STATISTICS. It is mutable for testing. // NB: Updates to this value after MakeRefresher has been called will not have @@ -199,6 +200,10 @@ type Refresher struct { // metadata about SQL mutations to the background Refresher thread. mutations chan mutation + // settings is the buffered channel used to pass messages containing + // autostats setting override information to the background Refresher thread. + settings chan settingOverride + // asOfTime is a duration which is used to define the AS OF time for // runs of CREATE STATISTICS by the Refresher. asOfTime time.Duration @@ -212,13 +217,28 @@ type Refresher struct { // mutationCounts contains aggregated mutation counts for each table that // have yet to be processed by the refresher. mutationCounts map[descpb.ID]int64 + + // settingOverrides holds any autostats cluster setting overrides for each + // table. + settingOverrides map[descpb.ID]catpb.ClusterSettingsForTable + + // numTablesEnsured is an internal counter for testing ensureAllTables. + numTablesEnsured int } // mutation contains metadata about a SQL mutation and is the message passed to // the background refresher thread to (possibly) trigger a statistics refresh. type mutation struct { - tableID descpb.ID - rowsAffected int + tableID descpb.ID + rowsAffected int + hasSettingOverrides bool +} + +// settingOverride specifies the autostats setting override values to use in +// place of the cluster settings. +type settingOverride struct { + tableID descpb.ID + settings catpb.ClusterSettingsForTable } // MakeRefresher creates a new Refresher. @@ -232,16 +252,77 @@ func MakeRefresher( randSource := rand.NewSource(rand.Int63()) return &Refresher{ - AmbientContext: ambientCtx, - st: st, - ex: ex, - cache: cache, - randGen: makeAutoStatsRand(randSource), - mutations: make(chan mutation, refreshChanBufferLen), - asOfTime: asOfTime, - extraTime: time.Duration(rand.Int63n(int64(time.Hour))), - mutationCounts: make(map[descpb.ID]int64, 16), + AmbientContext: ambientCtx, + st: st, + ex: ex, + cache: cache, + randGen: makeAutoStatsRand(randSource), + mutations: make(chan mutation, refreshChanBufferLen), + settings: make(chan settingOverride, refreshChanBufferLen), + asOfTime: asOfTime, + extraTime: time.Duration(rand.Int63n(int64(time.Hour))), + mutationCounts: make(map[descpb.ID]int64, 16), + settingOverrides: make(map[descpb.ID]catpb.ClusterSettingsForTable), + } +} + +func (r *Refresher) getNumTablesEnsured() int { + return r.numTablesEnsured +} + +func (r *Refresher) autoStatsEnabled(desc catalog.TableDescriptor) bool { + if desc == nil { + // If the descriptor could not be accessed, defer to the cluster setting. + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + enabledForTable := desc.AutoStatsCollectionEnabled() + // The table-level setting of sql.stats.automatic_collection.enabled takes + // precedence over the cluster setting. + if enabledForTable == cluster.NotSet { + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + return enabledForTable == cluster.True +} + +func (r *Refresher) autoStatsEnabledForTableID( + tableID descpb.ID, settingOverrides map[descpb.ID]catpb.ClusterSettingsForTable, +) bool { + var setting catpb.ClusterSettingsForTable + var ok bool + + if setting, ok = settingOverrides[tableID]; !ok { + // If there are no setting overrides, defer to the cluster setting. + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + autoStatsSettingValue := setting.AutoStatsCollectionEnabled() + if autoStatsSettingValue == cluster.NotSet { + return AutomaticStatisticsClusterMode.Get(&r.st.SV) + } + // The table-level setting of sql.stats.automatic_collection.enabled takes + // precedence over the cluster setting. + return autoStatsSettingValue == cluster.True +} + +func (r *Refresher) autoStatsMinStaleRows(explicitSettings *catpb.ClusterSettingsForTable) int64 { + if explicitSettings == nil { + return AutomaticStatisticsMinStaleRows.Get(&r.st.SV) + } + if minStaleRows, ok := explicitSettings.AutoStatsMinStaleRows(); ok { + return minStaleRows + } + return AutomaticStatisticsMinStaleRows.Get(&r.st.SV) +} + +func (r *Refresher) autoStatsFractionStaleRows( + explicitSettings *catpb.ClusterSettingsForTable, +) float64 { + if explicitSettings == nil { + return AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) + } + if fractionStaleRows, ok := explicitSettings.AutoStatsFractionStaleRows(); ok { + return fractionStaleRows } + return AutomaticStatisticsFractionStaleRows.Get(&r.st.SV) } // Start starts the stats refresher thread, which polls for messages about @@ -252,6 +333,11 @@ func (r *Refresher) Start( ) error { bgCtx := r.AnnotateCtx(context.Background()) _ = stopper.RunAsyncTask(bgCtx, "refresher", func(ctx context.Context) { + // Make sure we never sleep for zero time. This ends up spinning the CPU. + if refreshInterval < MinRefreshInterval { + refreshInterval = MinRefreshInterval + } + origRefreshInterval := refreshInterval // We always sleep for r.asOfTime at the beginning of each refresh, so // subtract it from the refreshInterval. refreshInterval -= r.asOfTime @@ -273,7 +359,21 @@ func (r *Refresher) Start( r.ensureAllTables(ctx, &r.st.SV, initialTableCollectionDelay) case <-timer.C: + if len(r.mutationCounts) == 0 { + timer.Reset(origRefreshInterval) + continue + } mutationCounts := r.mutationCounts + var settingOverrides map[descpb.ID]catpb.ClusterSettingsForTable + settingOverrides = make(map[descpb.ID]catpb.ClusterSettingsForTable) + if len(r.settingOverrides) > 0 { + for tableID := range mutationCounts { + if setting, ok := r.settingOverrides[tableID]; ok { + settingOverrides[tableID] = setting + } + } + } + if err := stopper.RunAsyncTask( ctx, "stats.Refresher: maybeRefreshStats", func(ctx context.Context) { // Wait so that the latest changes will be reflected according to the @@ -288,13 +388,16 @@ func (r *Refresher) Start( } for tableID, rowsAffected := range mutationCounts { - // Check the cluster setting before each refresh in case it was - // disabled recently. - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - break + // Check the cluster setting and table setting before each refresh + // in case they were disabled recently. + if !r.autoStatsEnabledForTableID(tableID, settingOverrides) { + continue } - - r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime) + var explicitSettings *catpb.ClusterSettingsForTable + if settings, ok := settingOverrides[tableID]; ok { + explicitSettings = &settings + } + r.maybeRefreshStats(ctx, tableID, explicitSettings, rowsAffected, r.asOfTime) select { case <-stopper.ShouldQuiesce(): @@ -308,12 +411,35 @@ func (r *Refresher) Start( }); err != nil { log.Errorf(ctx, "failed to refresh stats: %v", err) } + // This clears out any tables that may have been added to the + // mutationCounts map by ensureAllTables. This is by design. We don't + // want to constantly refresh tables that are read-only. r.mutationCounts = make(map[descpb.ID]int64, len(r.mutationCounts)) case mut := <-r.mutations: r.mutationCounts[mut.tableID] += int64(mut.rowsAffected) + // The mutations channel also handles resetting of cluster setting + // overrides when none exist (so that we don't have to pass two messages + // when nothing is overridden). + if !mut.hasSettingOverrides { + delete(r.settingOverrides, mut.tableID) + } + + case clusterSettingOverride := <-r.settings: + if oldSettings, ok := r.settingOverrides[clusterSettingOverride.tableID]; ok { + currentSettings := clusterSettingOverride.settings + // If the old cluster setting overrides don't match the current ones, update + // the hash map with the current settings. + if !currentSettings.AutoStatsSettingsEqual(&oldSettings) { + r.settingOverrides[clusterSettingOverride.tableID] = currentSettings + } + } else { + // There were no previous settings, to use the new ones. + r.settingOverrides[clusterSettingOverride.tableID] = clusterSettingOverride.settings + } case <-stopper.ShouldQuiesce(): + log.Info(ctx, "quiescing auto stats refresher") return } } @@ -321,19 +447,8 @@ func (r *Refresher) Start( return nil } -// ensureAllTables ensures that an entry exists in r.mutationCounts for each -// table in the database. -func (r *Refresher) ensureAllTables( - ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, -) { - if !AutomaticStatisticsClusterMode.Get(settings) { - // Automatic stats are disabled. - return - } - - // Use a historical read so as to disable txn contention resolution. - getAllTablesQuery := fmt.Sprintf( - ` +const ( + getAllTablesTemplateSQL = ` SELECT tbl.table_id FROM @@ -346,16 +461,33 @@ WHERE AND tbl.drop_time IS NULL AND ( crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', d.descriptor, false)->'table'->>'viewQuery' - ) IS NULL;`, - initialTableCollectionDelay, - systemschema.SystemDatabaseName, - ) + ) IS NULL + %s` + + explicitlyEnabledTablesPredicate = `AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + )` + + autoStatsEnabledOrNotSpecifiedPredicate = `AND + (crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable'->'sqlStatsAutomaticCollectionEnabled' IS NULL + OR crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', + d.descriptor, false)->'table'->'clusterSettingsForTable' ->> 'sqlStatsAutomaticCollectionEnabled' = 'true' + )` +) +func (r *Refresher) getApplicableTables( + ctx context.Context, stmt string, opname string, forTesting bool, +) { + if forTesting { + r.numTablesEnsured = 0 + } it, err := r.ex.QueryIterator( ctx, - "get-tables", + opname, nil, /* txn */ - getAllTablesQuery, + stmt, ) if err == nil { var ok bool @@ -366,6 +498,9 @@ WHERE // The query already excludes views and system tables. if !descpb.IsVirtualTable(tableID) { r.mutationCounts[tableID] += 0 + if forTesting { + r.numTablesEnsured++ + } } } } @@ -376,8 +511,60 @@ WHERE // entry is idempotent (i.e. we didn't mess up anything for the next // call to this method). log.Errorf(ctx, "failed to get tables for automatic stats: %v", err) + } +} + +// ensureAllTables ensures that an entry exists in r.mutationCounts for each +// table in the database which has auto stats enabled, either explicitly via +// a table-level setting, or implicitly via the cluster setting. +func (r *Refresher) ensureAllTables( + ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, +) { + if !AutomaticStatisticsClusterMode.Get(settings) { + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled=true is + // checked and only those tables are included in this scan. + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + explicitlyEnabledTablesPredicate, + ) + r.getApplicableTables(ctx, getTablesWithAutoStatsExplicitlyEnabledQuery, + "get-tables-with-autostats-explicitly-enabled", false) return } + + // Use a historical read so as to disable txn contention resolution. + // A table-level setting of sql.stats.automatic_collection.enabled of null, + // meaning not set, or true qualifies rows we're interested in. + getAllTablesQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + initialTableCollectionDelay, + systemschema.SystemDatabaseName, + autoStatsEnabledOrNotSpecifiedPredicate, + ) + r.getApplicableTables(ctx, getAllTablesQuery, + "get-tables", false) +} + +func copyAutoStatsSettings( + dest *catpb.ClusterSettingsForTable, src *catpb.ClusterSettingsForTable, +) { + if src == nil || dest == nil { + return + } + autoStatsEnabledSetting := src.AutoStatsCollectionEnabled() + if autoStatsEnabledSetting != cluster.NotSet { + autoStatsEnabled := autoStatsEnabledSetting == cluster.True + dest.SqlStatsAutomaticCollectionEnabled = &autoStatsEnabled + } + if minStaleRows, ok := src.AutoStatsMinStaleRows(); ok { + dest.SqlStatsAutomaticCollectionMinStaleRows = &minStaleRows + } + if fractionStaleRows, ok := src.AutoStatsFractionStaleRows(); ok { + dest.SqlStatsAutomaticCollectionFractionStaleRows = &fractionStaleRows + } } // NotifyMutation is called by SQL mutation operations to signal to the @@ -385,19 +572,46 @@ WHERE // successful insert, update, upsert or delete. rowsAffected refers to the // number of rows written as part of the mutation operation. func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected int) { - if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { - // Automatic stats are disabled. + if !r.autoStatsEnabled(table) { return } + if !hasStatistics(table) { // Don't collect stats for this kind of table: system, virtual, view, etc. return } + hasSettingOverrides := !table.NoAutoStatsSettingsOverrides() + + // Send setting override information over first, so it could take effect + // before the mutation is processed. + if hasSettingOverrides { + clusterSettingsForTable := table.GetClusterSettingsForTable() + autoStatsOverrides := catpb.ClusterSettingsForTable{} + copyAutoStatsSettings(&autoStatsOverrides, clusterSettingsForTable) + select { + case r.settings <- settingOverride{ + tableID: table.GetID(), + settings: autoStatsOverrides, + }: + default: + // Don't block if there is no room in the buffered channel. + if bufferedChanFullLogLimiter.ShouldLog() { + log.Warningf(context.TODO(), + "buffered channel is full. Unable to update settings for table %q (%d) during auto stats refreshing", + table.GetName(), table.GetID()) + } + } + } + // Send mutation info to the refresher thread to avoid adding latency to // the calling transaction. select { - case r.mutations <- mutation{tableID: table.GetID(), rowsAffected: rowsAffected}: + case r.mutations <- mutation{ + tableID: table.GetID(), + rowsAffected: rowsAffected, + hasSettingOverrides: hasSettingOverrides, + }: default: // Don't block if there is no room in the buffered channel. if bufferedChanFullLogLimiter.ShouldLog() { @@ -410,10 +624,12 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // maybeRefreshStats implements the core logic described in the comment for // Refresher. It is called by the background Refresher thread. +// explicitSettings, if non-nil, holds any autostats cluster setting overrides +// for this table. func (r *Refresher) maybeRefreshStats( ctx context.Context, - stopper *stop.Stopper, tableID descpb.ID, + explicitSettings *catpb.ClusterSettingsForTable, rowsAffected int64, asOf time.Duration, ) { @@ -453,15 +669,26 @@ func (r *Refresher) maybeRefreshStats( mustRefresh = true } - targetRows := int64(rowCount*AutomaticStatisticsFractionStaleRows.Get(&r.st.SV)) + - AutomaticStatisticsMinStaleRows.Get(&r.st.SV) - if !mustRefresh && rowsAffected < math.MaxInt32 && r.randGen.randInt(targetRows) >= rowsAffected { + statsFractionStaleRows := r.autoStatsFractionStaleRows(explicitSettings) + statsMinStaleRows := r.autoStatsMinStaleRows(explicitSettings) + targetRows := int64(rowCount*statsFractionStaleRows) + statsMinStaleRows + // randInt will panic if we pass it a value of 0. + randomTargetRows := int64(0) + if targetRows > 0 { + randomTargetRows = r.randGen.randInt(targetRows) + } + if !mustRefresh && rowsAffected < math.MaxInt32 && randomTargetRows >= rowsAffected { // No refresh is happening this time. return } if err := r.refreshStats(ctx, tableID, asOf); err != nil { if errors.Is(err, ConcurrentCreateStatsError) { + // Make sure we don't remove any explicitSettings from the + // settingOverrides map. An ALTER TABLE may have just occurred, creating + // new valid auto stats settings and a false value of hasSettingOverrides + // would remove those. + const dontRemoveSettings = true // Another stats job was already running. Attempt to reschedule this // refresh. if mustRefresh { @@ -471,14 +698,16 @@ func (r *Refresher) maybeRefreshStats( // cycle so that we have another chance to trigger a refresh. We pass // rowsAffected=0 so that we don't force a refresh if another node has // already done it. - r.mutations <- mutation{tableID: tableID, rowsAffected: 0} + r.mutations <- mutation{tableID: tableID, rowsAffected: 0, + hasSettingOverrides: dontRemoveSettings} } else { // If this refresh was caused by a "dice roll", we want to make sure // that the refresh is rescheduled so that we adhere to the // AutomaticStatisticsFractionStaleRows statistical ideal. We // ensure that the refresh is triggered during the next cycle by // passing a very large number for rowsAffected. - r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32} + r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32, + hasSettingOverrides: dontRemoveSettings} } return } diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 592c6d7c5cc5..f43674968815 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -24,9 +24,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" @@ -83,7 +85,7 @@ func TestMaybeRefreshStats(t *testing.T) { // There are no stats yet, so this must refresh the statistics on table t // even though rowsAffected=0. refresher.maybeRefreshStats( - ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, descA.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -92,7 +94,28 @@ func TestMaybeRefreshStats(t *testing.T) { // Try to refresh again. With rowsAffected=0, the probability of a refresh // is 0, so refreshing will not succeed. refresher.maybeRefreshStats( - ctx, s.Stopper(), descA.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, descA.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting minStaleRows for the table prevents refreshing from occurring. + minStaleRows := int64(100000000) + explicitSettings := catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionMinStaleRows: &minStaleRows} + refresher.maybeRefreshStats( + ctx, descA.GetID(), &explicitSettings, 10 /* rowsAffected */, time.Microsecond, /* asOf */ + ) + if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { + t.Fatal(err) + } + + // Setting fractionStaleRows for the table can also prevent refreshing from + // occurring, though this is a not a typical value for this setting. + fractionStaleRows := float64(100000000) + explicitSettings = catpb.ClusterSettingsForTable{SqlStatsAutomaticCollectionFractionStaleRows: &fractionStaleRows} + refresher.maybeRefreshStats( + ctx, descA.GetID(), &explicitSettings, 10 /* rowsAffected */, time.Microsecond, /* asOf */ ) if err := checkStatsCount(ctx, cache, descA, 1 /* expected */); err != nil { t.Fatal(err) @@ -101,7 +124,7 @@ func TestMaybeRefreshStats(t *testing.T) { // With rowsAffected=10, refreshing should work. Since there are more rows // updated than exist in the table, the probability of a refresh is 100%. refresher.maybeRefreshStats( - ctx, s.Stopper(), descA.GetID(), 10 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, descA.GetID(), nil /* explicitSettings */, 10 /* rowsAffected */, time.Microsecond, /* asOf */ ) if err := checkStatsCount(ctx, cache, descA, 2 /* expected */); err != nil { t.Fatal(err) @@ -112,7 +135,7 @@ func TestMaybeRefreshStats(t *testing.T) { // TODO(rytaft): Should not enqueue views to begin with. descVW := desctestutils.TestingGetPublicTableDescriptor(s.DB(), keys.SystemSQLCodec, "t", "vw") refresher.maybeRefreshStats( - ctx, s.Stopper(), descVW.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, descVW.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ ) select { case <-refresher.mutations: @@ -121,6 +144,175 @@ func TestMaybeRefreshStats(t *testing.T) { } } +func BenchmarkMaybeRefreshStats(b *testing.B) { + defer leaktest.AfterTest(b)() + defer log.Scope(b).Close(b) + ctx := context.Background() + + s, sqlDB, kvDB := serverutils.StartServer(b, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + + st := cluster.MakeTestingClusterSettings() + evalCtx := tree.NewTestingEvalContext(st) + defer evalCtx.Stop(ctx) + + AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false) + AutomaticStatisticsMinStaleRows.Override(ctx, &st.SV, 5) + + sqlRun := sqlutils.MakeSQLRunner(sqlDB) + sqlRun.Exec(b, + `CREATE DATABASE t; + CREATE TABLE t.a1 (k INT PRIMARY KEY); + CREATE TABLE t.a2 (k INT PRIMARY KEY); + CREATE TABLE t.a3 (k INT PRIMARY KEY); + CREATE TABLE t.a4 (k INT PRIMARY KEY); + CREATE TABLE t.a5 (k INT PRIMARY KEY); + CREATE TABLE t.a6 (k INT PRIMARY KEY); + CREATE TABLE t.a7 (k INT PRIMARY KEY); + CREATE TABLE t.a8 (k INT PRIMARY KEY); + CREATE TABLE t.a9 (k INT PRIMARY KEY); + CREATE TABLE t.a10 (k INT PRIMARY KEY); + +`) + + executor := s.InternalExecutor().(sqlutil.InternalExecutor) + tabDescrs := make([]catalog.TableDescriptor, 0, 10) + for i := 1; i <= 10; i++ { + tabDescrs = append(tabDescrs, + desctestutils.TestingGetPublicTableDescriptor(s.DB(), keys.SystemSQLCodec, + "t", fmt.Sprintf("a%d", i))) + } + + cache := NewTableStatisticsCache( + ctx, + 10, /* cacheSize */ + kvDB, + executor, + keys.SystemSQLCodec, + s.ClusterSettings(), + s.RangeFeedFactory().(*rangefeed.Factory), + s.CollectionFactory().(*descs.CollectionFactory), + ) + refresher := MakeRefresher(s.AmbientCtx(), st, executor, cache, time.Microsecond /* asOfTime */) + + // Run the operation 1000 times. + const numOps = 1000 + runBench1 := func(b *testing.B) { + b.ResetTimer() + b.StartTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < numOps; j++ { + for _, desc := range tabDescrs { + refresher.maybeRefreshStats( + ctx, desc.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ) + } + } + } + b.StopTimer() + } + b.Run(`BenchmarkMaybeRefreshStats`, func(b *testing.B) { + runBench1(b) + }) +} + +func TestEnsureAllTablesQueries(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + + st := cluster.MakeTestingClusterSettings() + evalCtx := tree.NewTestingEvalContext(st) + defer evalCtx.Stop(ctx) + + sqlRun := sqlutils.MakeSQLRunner(sqlDB) + sqlRun.Exec(t, + `CREATE DATABASE t; + CREATE TABLE t.a (k INT PRIMARY KEY); + CREATE TABLE t.b (k INT PRIMARY KEY);`) + + executor := s.InternalExecutor().(sqlutil.InternalExecutor) + cache := NewTableStatisticsCache( + ctx, + 10, /* cacheSize */ + kvDB, + executor, + keys.SystemSQLCodec, + s.ClusterSettings(), + s.RangeFeedFactory().(*rangefeed.Factory), + s.CollectionFactory().(*descs.CollectionFactory), + ) + r := MakeRefresher(s.AmbientCtx(), st, executor, cache, time.Microsecond /* asOfTime */) + + if err := checkAllTablesCount(ctx, 2, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.enabled" = true)`) + if err := checkAllTablesCount(ctx, 2, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.b SET ("sql.stats.automatic_collection.enabled" = false)`) + if err := checkAllTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 1, r); err != nil { + t.Fatal(err) + } + sqlRun.Exec(t, + `ALTER TABLE t.a SET ("sql.stats.automatic_collection.enabled" = false)`) + if err := checkAllTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } + if err := checkExplicitlyEnabledTablesCount(ctx, 0, r); err != nil { + t.Fatal(err) + } +} + +func checkAllTablesCount(ctx context.Context, expected int, r *Refresher) error { + const collectionDelay = time.Microsecond + getAllTablesQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + collectionDelay, + systemschema.SystemDatabaseName, + autoStatsEnabledOrNotSpecifiedPredicate, + ) + r.getApplicableTables(ctx, getAllTablesQuery, + "get-tables", true) + actual := r.getNumTablesEnsured() + if expected != actual { + return fmt.Errorf("expected %d table(s) but found %d", expected, actual) + } + return nil +} + +func checkExplicitlyEnabledTablesCount(ctx context.Context, expected int, r *Refresher) error { + const collectionDelay = time.Microsecond + getTablesWithAutoStatsExplicitlyEnabledQuery := fmt.Sprintf( + getAllTablesTemplateSQL, + collectionDelay, + systemschema.SystemDatabaseName, + explicitlyEnabledTablesPredicate, + ) + r.getApplicableTables(ctx, getTablesWithAutoStatsExplicitlyEnabledQuery, + "get-tables-with-autostats-explicitly-enabled", true) + actual := r.getNumTablesEnsured() + if expected != actual { + return fmt.Errorf("expected %d table(s) but found %d", expected, actual) + } + return nil +} + func TestAverageRefreshTime(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -311,7 +503,7 @@ func TestAverageRefreshTime(t *testing.T) { // the statistics on table t. With rowsAffected=0, the probability of refresh // is 0. refresher.maybeRefreshStats( - ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, table.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ ) if err := checkStatsCount(ctx, cache, table, 20 /* expected */); err != nil { t.Fatal(err) @@ -361,7 +553,7 @@ func TestAverageRefreshTime(t *testing.T) { // remain (5 from column k and 10 from column v), since the old stats on k // were deleted. refresher.maybeRefreshStats( - ctx, s.Stopper(), table.GetID(), 0 /* rowsAffected */, time.Microsecond, /* asOf */ + ctx, table.GetID(), nil /* explicitSettings */, 0 /* rowsAffected */, time.Microsecond, /* asOf */ ) if err := checkStatsCount(ctx, cache, table, 15 /* expected */); err != nil { t.Fatal(err) @@ -454,7 +646,8 @@ func TestNoRetryOnFailure(t *testing.T) { // Try to refresh stats on a table that doesn't exist. r.maybeRefreshStats( - ctx, s.Stopper(), 100 /* tableID */, math.MaxInt32, time.Microsecond, /* asOfTime */ + ctx, 100 /* tableID */, nil /* explicitSettings */, math.MaxInt32, + time.Microsecond, /* asOfTime */ ) // Ensure that we will not try to refresh tableID 100 again. @@ -463,7 +656,7 @@ func TestNoRetryOnFailure(t *testing.T) { } } -func TestMutationsChannel(t *testing.T) { +func TestMutationsAndSettingOverrideChannels(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) ctx := context.Background() @@ -475,6 +668,7 @@ func TestMutationsChannel(t *testing.T) { r := Refresher{ st: st, mutations: make(chan mutation, refreshChanBufferLen), + settings: make(chan settingOverride, refreshChanBufferLen), } tbl := descpb.TableDescriptor{ID: 53, ParentID: 52, Name: "foo"} @@ -489,6 +683,22 @@ func TestMutationsChannel(t *testing.T) { if expected, actual := refreshChanBufferLen, len(r.mutations); expected != actual { t.Fatalf("expected channel size %d but found %d", expected, actual) } + + // Test that the settings channel doesn't block even when we add 10 more + // items than can fit in the buffer. + clusterSettings := &catpb.ClusterSettingsForTable{} + tableDesc.TableDesc().ClusterSettingsForTable = clusterSettings + minStaleRows := int64(1) + clusterSettings.SqlStatsAutomaticCollectionMinStaleRows = &minStaleRows + for i := 0; i < refreshChanBufferLen+10; i++ { + int64CurrIteration := int64(i) + clusterSettings.SqlStatsAutomaticCollectionMinStaleRows = &int64CurrIteration + r.NotifyMutation(tableDesc, 5 /* rowsAffected */) + } + + if expected, actual := refreshChanBufferLen, len(r.settings); expected != actual { + t.Fatalf("expected channel size %d but found %d", expected, actual) + } } func TestDefaultColumns(t *testing.T) {