Skip to content

Commit

Permalink
stats: table-level setting to turn auto stats collection on/off
Browse files Browse the repository at this point in the history
Fixes #40989

Previously, there was no way to enable or disable automatic statistics
collection at the table level. It could only be turned on or off via the
`sql.stats.automatic_collection.enabled` cluster setting.

This was inadequate because statistics collection can be expensive for
large tables, and it would be desirable to defer collection until after
data is finished loading, or in off hours. Also, small tables which are
frequently updated may trigger statistics collection leading to
unnecessary overhead and/or unpredictable query plan changes.

To address this, this patch adds support for setting of the following
cluster settings at the table level:
```
sql.stats.automatic_collection.enabled
sql.stats.automatic_collection.fraction_stale_rows
sql.stats.automatic_collection.min_stale_rows
```
for example:
```
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true);
ALTER TABLE t1
      SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1,
           "sql.stats.automatic_collection.min_stale_rows" = 2000);
```
The table-level setting takes precedence over the cluster setting.

Release justification: Low risk fix for missing fine-grained control
over automatic statistics collection.

Release note (sql change): Automatic statistics collection can now be
enabled or disabled for individual tables, taking precedence over the
cluster setting, for example:
```
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true);
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = false);
ALTER TABLE t1 RESET ("sql.stats.automatic_collection.enabled");
```
RESET removes the setting value entirely, in which case the cluster
setting of the same name is in effect for the table.

Cluster settings `sql.stats.automatic_collection.fraction_stale_rows`
and `sql.stats.automatic_collection.min_stale_rows` can now also be
set at the table level, either at table creation time, or later,
independent of whether auto stats is enabled:
```
ALTER TABLE t1
      SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1,
           "sql.stats.automatic_collection.min_stale_rows" = 2000);
CREATE TABLE t1 (a INT, b INT)
       WITH ("sql.stats.automatic_collection.enabled" = true,
             "sql.stats.automatic_collection.min_stale_rows" = 1000000,
	     "sql.stats.automatic_collection.fraction_stale_rows" = 0.05
	    );
```
Tables that have auto stats collection explicitly enabled or disabled
may be discovered by querying system tables, for example,
find all tables with auto stats enabled at the table level:
```
SELECT
        tbl.database_name || '.' || tbl.schema_name || '.' || tbl.name
FROM
        crdb_internal.tables AS tbl
        INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
        tbl.database_name IS NOT NULL
        AND tbl.database_name <> '%s'
        AND tbl.drop_time IS NULL
        AND
         crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
                d.descriptor, false)->'table'->'clusterSettingsForTable'
		->> 'sqlStatsAutomaticCollectionEnabled' = 'true';

      ?column?
--------------------
  defaultdb.mws.t1

```
  • Loading branch information
Mark Sirek committed Mar 22, 2022
1 parent 9cdc505 commit 4db4555
Show file tree
Hide file tree
Showing 16 changed files with 1,003 additions and 34 deletions.
14 changes: 14 additions & 0 deletions pkg/settings/cluster/cluster_settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ func TelemetryOptOut() bool {
// (for example, a CLI subcommand that does not connect to a cluster).
var NoSettings *Settings // = nil

const (
// AutoStatsClusterSettingName is the name of the automatic stats collection
// enabled cluster setting.
AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled"

// AutoStatsMinStaleSettingName is the name of the automatic stats collection
// min stale rows cluster setting.
AutoStatsMinStaleSettingName = "sql.stats.automatic_collection.min_stale_rows"

// AutoStatsMinStaleSettingName is the name of the automatic stats collection
// min stale rows cluster setting.
AutoStatsFractionStaleSettingName = "sql.stats.automatic_collection.fraction_stale_rows"
)

// CPUProfileType tracks whether a CPU profile is in progress.
type CPUProfileType int32

Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/catalog/catpb/catalog.proto
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,15 @@ message RowLevelTTL {
// rows on table) during row level TTL. If zero, no statistics are reported.
optional int64 row_stats_poll_interval = 9 [(gogoproto.nullable)=false, (gogoproto.casttype)="time.Duration"];
}

// ClusterSettingsForTable represents cluster settings specified at the table
// level. Each setting is nullable so queries of the descriptor in JSON form
// only list values which have been set.
message ClusterSettingsForTable {
option (gogoproto.equal) = true;
optional bool sql_stats_automatic_collection_enabled = 1;
optional int64 sql_stats_automatic_collection_min_stale_rows = 2;
optional double sql_stats_automatic_collection_fraction_stale_rows = 3;
}


49 changes: 49 additions & 0 deletions pkg/sql/catalog/descpb/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,55 @@ func (desc *TableDescriptor) Persistence() tree.Persistence {
return tree.PersistencePermanent
}

// AutoStatsCollectionEnabled indicates if automatic statistics collection is
// explicitly enabled or disabled for this table. If ok is true, then
// enabled==false means auto stats collection is off for this table, and if
// true, auto stats are on for this table. If ok is false, there is no setting
// for this table.
func (desc *TableDescriptor) AutoStatsCollectionEnabled() (enabled bool, ok bool) {
if desc.NoClusterSettingsForTable() {
return false, false
}
if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled == nil {
return false, false
}
return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled, true
}

// AutoStatsMinStaleRows indicates the setting of
// sql.stats.automatic_collection.min_stale_rows for this table.
// If ok is true, then the minStaleRows value is valid, otherwise this has not
// been set at the table level.
func (desc *TableDescriptor) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) {
if desc.NoClusterSettingsForTable() {
return 0, false
}
if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows == nil {
return 0, false
}
return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows, true
}

// AutoStatsFractionStaleRows indicates the setting of
// sql.stats.automatic_collection.fraction_stale_rows for this table.
// If ok is true, then the fractionStaleRows value is valid, otherwise this has
// not been set at the table level.
func (desc *TableDescriptor) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) {
if desc.NoClusterSettingsForTable() {
return 0, false
}
if desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows == nil {
return 0, false
}
return *desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows, true
}

// noClusterSettingsForTable is true if no cluster settings are set at the
// table level for the given table.
func (desc *TableDescriptor) NoClusterSettingsForTable() bool {
return desc.ClusterSettingsForTable == nil
}

// IsVirtualTable returns true if the TableDescriptor describes a
// virtual Table (like the information_schema tables) and thus doesn't
// need to be physically stored.
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/catalog/descpb/structured.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,10 @@ message TableDescriptor {
optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false,
(gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"];

// Next ID: 51
// ClusterSettingsForTable are cluster settings specified at the table level.
optional cockroach.sql.catalog.catpb.ClusterSettingsForTable cluster_settings_for_table = 51 [(gogoproto.customname)="ClusterSettingsForTable"];

// Next ID: 52
}

// SurvivalGoal is the survival goal for a database.
Expand Down
19 changes: 19 additions & 0 deletions pkg/sql/catalog/descriptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,25 @@ type TableDescriptor interface {
GetExcludeDataFromBackup() bool
// GetStorageParams returns a list of storage parameters for the table.
GetStorageParams(spaceBetweenEqual bool) []string
// noClusterSettingsForTable is true if no cluster settings are set at the
// table level for the given table.
NoClusterSettingsForTable() bool
// AutoStatsCollectionEnabled indicates if automatic statistics collection is
// explicitly enabled or disabled for this table. If ok is true, then
// enabled==false means auto stats collection is off for this table, and if
// true, auto stats are on for this table. If ok is false, there is no setting
// for this table.
AutoStatsCollectionEnabled() (enabled bool, ok bool)
// AutoStatsMinStaleRows indicates the setting of
// sql.stats.automatic_collection.min_stale_rows for this table.
// If ok is true, then the minStaleRows value is valid, otherwise this has not
// been set at the table level.
AutoStatsMinStaleRows() (minStaleRows int64, ok bool)
// AutoStatsFractionStaleRows indicates the setting of
// sql.stats.automatic_collection.fraction_stale_rows for this table.
// If ok is true, then the fractionStaleRows value is valid, otherwise this has
// not been set at the table level.
AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool)
}

// TypeDescriptor will eventually be called typedesc.Descriptor.
Expand Down
39 changes: 39 additions & 0 deletions pkg/sql/catalog/tabledesc/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
Expand Down Expand Up @@ -2562,6 +2563,24 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string {
if exclude := desc.GetExcludeDataFromBackup(); exclude {
appendStorageParam(`exclude_data_from_backup`, `true`)
}
if settings := desc.ClusterSettingsForTable; settings != nil {
// These need to be wrapped in double-quotes because they contain '.' chars.
if settings.SqlStatsAutomaticCollectionEnabled != nil {
value := *settings.SqlStatsAutomaticCollectionEnabled
appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsClusterSettingName),
fmt.Sprintf("%v", value))
}
if settings.SqlStatsAutomaticCollectionMinStaleRows != nil {
value := *settings.SqlStatsAutomaticCollectionMinStaleRows
appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsMinStaleSettingName),
strconv.FormatInt(value, 10))
}
if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil {
value := *settings.SqlStatsAutomaticCollectionFractionStaleRows
appendStorageParam(fmt.Sprintf(`"%s"`, cluster.AutoStatsFractionStaleSettingName),
fmt.Sprintf("%g", value))
}
}
return storageParams
}

Expand All @@ -2580,6 +2599,26 @@ func (desc *wrapper) GetMultiRegionEnumDependencyIfExists() bool {
return false
}

// NoClusterSettingsForTable implements the TableDescriptor interface.
func (desc *wrapper) NoClusterSettingsForTable() bool {
return desc.TableDesc().NoClusterSettingsForTable()
}

// AutoStatsCollectionEnabled implements the TableDescriptor interface.
func (desc *wrapper) AutoStatsCollectionEnabled() (enabled bool, ok bool) {
return desc.TableDesc().AutoStatsCollectionEnabled()
}

// AutoStatsMinStaleRows implements the TableDescriptor interface.
func (desc *wrapper) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) {
return desc.TableDesc().AutoStatsMinStaleRows()
}

// AutoStatsFractionStaleRows implements the TableDescriptor interface.
func (desc *wrapper) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) {
return desc.TableDesc().AutoStatsFractionStaleRows()
}

// SetTableLocalityRegionalByTable sets the descriptor's locality config to
// regional at the table level in the supplied region. An empty region name
// (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in
Expand Down
66 changes: 66 additions & 0 deletions pkg/sql/catalog/tabledesc/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege"
Expand Down Expand Up @@ -520,6 +522,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) {
}
}

desc.validateClusterSettingsForTable(vea.Report)

if desc.IsSequence() {
return
}
Expand Down Expand Up @@ -674,6 +678,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) {
))
}
return

desc.validateClusterSettingsForTable(vea.Report)
}
if m.GetPrimaryKeySwap() != nil {
foundAlterPK = true
Expand Down Expand Up @@ -1501,3 +1507,63 @@ func (desc *wrapper) validatePartitioning() error {
)
})
}

// validateClusterSettingsForTable validates that any new cluster settings at
// the table level hold a valid value.
func (desc *wrapper) validateClusterSettingsForTable(errReportFn func(err error)) {
if desc.ClusterSettingsForTable == nil {
return
}
desc.validateBoolSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionEnabled,
cluster.AutoStatsClusterSettingName)
desc.validateIntSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionMinStaleRows,
cluster.AutoStatsMinStaleSettingName, settings.NonNegativeInt)
desc.validateFloatSetting(errReportFn, desc.ClusterSettingsForTable.SqlStatsAutomaticCollectionFractionStaleRows,
cluster.AutoStatsFractionStaleSettingName, settings.NonNegativeFloat)
}

func (desc *wrapper) verifyProperTableForStatsSetting(
errReportFn func(err error), settingName string,
) {
if desc.IsVirtualTable() {
errReportFn(errors.Newf("Setting %s may not be set on virtual table", settingName))
}
if !desc.IsTable() {
errReportFn(errors.Newf("Setting %s may not be set on a view or sequence", settingName))
}
}

func (desc *wrapper) validateBoolSetting(
errReportFn func(err error), value *bool, settingName string,
) {
if value != nil {
desc.verifyProperTableForStatsSetting(errReportFn, settingName)
}
}

func (desc *wrapper) validateIntSetting(
errReportFn func(err error), value *int64, settingName string, validateFunc func(v int64) error,
) {
if value != nil {
desc.verifyProperTableForStatsSetting(errReportFn, settingName)
if err :=
validateFunc(*value); err != nil {
errReportFn(errors.Wrapf(err, "invalid value for %s", settingName))
}
}
}

func (desc *wrapper) validateFloatSetting(
errReportFn func(err error),
value *float64,
settingName string,
validateFunc func(v float64) error,
) {
if value != nil {
desc.verifyProperTableForStatsSetting(errReportFn, settingName)
if err :=
validateFunc(*value); err != nil {
errReportFn(errors.Wrapf(err, "invalid value for %s", settingName))
}
}
}
Loading

0 comments on commit 4db4555

Please sign in to comment.