Skip to content

Commit

Permalink
stats: table-level setting to turn auto stats collection on/off
Browse files Browse the repository at this point in the history
Fixes cockroachdb#40989

Previously, there was no way to enable or disable automatic statistics
collection at the table level. It could only be turned on or off via the
`sql.stats.automatic_collection.enabled` cluster setting.

This was inadequate because statistics collection can be expensive for
large tables, and it would be desirable to defer collection until after
data is finished loading, or in off hours. Also, small tables which are
frequently updated may trigger statistics collection leading to
unnecessary overhead and/or unpredictable query plan changes.

To address this, this patch adds support for setting of the following
cluster settings at the table level:
```
sql.stats.automatic_collection.enabled
sql.stats.automatic_collection.fraction_stale_rows
sql.stats.automatic_collection.min_stale_rows
```
for example:
```
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true);
ALTER TABLE t1
      SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1,
           "sql.stats.automatic_collection.min_stale_rows" = 2000);
```
The table-level setting takes precedence over the cluster setting.

Release justification: Low risk fix for missing fine-grained control
over automatic statistics collection.

Release note (sql change): Automatic statistics collection can now be
enabled or disabled for individual tables, for example:
```
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = true);
ALTER TABLE t1 SET ("sql.stats.automatic_collection.enabled" = false);
ALTER TABLE t1 RESET ("sql.stats.automatic_collection.enabled");
```
RESET removes the setting value entirely, in which case the cluster
setting of the same name is in effect for the table.

Cluster settings `sql.stats.automatic_collection.fraction_stale_rows`
and `sql.stats.automatic_collection.min_stale_rows` can now also be
set at the table level, either at table creation time, or later:
```
ALTER TABLE t1
      SET ("sql.stats.automatic_collection.fraction_stale_rows" = 0.1,
           "sql.stats.automatic_collection.min_stale_rows" = 2000);
CREATE TABLE t1 (a INT, b INT)
       WITH ("sql.stats.automatic_collection.enabled" = true,
             "sql.stats.automatic_collection.min_stale_rows" = 1000000,
	     "sql.stats.automatic_collection.fraction_stale_rows" = 0.05
	    );
```
Tables that have auto stats collection explicitly enabled or disabled
may be discovered by querying system tables, for example,
find all tables with auto stats enabled:
```
SELECT
        tbl.database_name || '.' || tbl.schema_name || '.' || tbl.name
FROM
        crdb_internal.tables AS tbl
        INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
        tbl.database_name IS NOT NULL
        AND tbl.database_name <> '%s'
        AND tbl.drop_time IS NULL
        AND
         crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
                d.descriptor, false)->'table'->'tableLevelSetting'
		-> 'sqlStatsAutomaticCollectionEnabled' ? 'value';

      ?column?
--------------------
  defaultdb.mws.t1

```
  • Loading branch information
Mark Sirek committed Mar 20, 2022
1 parent 9cdc505 commit 72a4d33
Show file tree
Hide file tree
Showing 13 changed files with 789 additions and 23 deletions.
30 changes: 30 additions & 0 deletions pkg/sql/catalog/catpb/catalog.proto
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,33 @@ message RowLevelTTL {
// rows on table) during row level TTL. If zero, no statistics are reported.
optional int64 row_stats_poll_interval = 9 [(gogoproto.nullable)=false, (gogoproto.casttype)="time.Duration"];
}

// Create separate messages for setting values so we may access the
// values directly via SQL.
message Bool {
option (gogoproto.equal) = true;
optional bool value = 1 [(gogoproto.nullable)=false];
}
message Int {
option (gogoproto.equal) = true;
optional int64 value = 1 [(gogoproto.nullable)=false];
}
// Protobuf's double is float64 in Go.
message Float {
option (gogoproto.equal) = true;
optional double value = 1 [(gogoproto.nullable)=false];
}
message String {
option (gogoproto.equal) = true;
optional string value = 1 [(gogoproto.nullable)=false];
}

// TableLevelSettings represents cluster or session settings specified at the
// table level. Each SettingValue is nullable so queries of the descriptor in
// JSON form only list values which have been set.
message TableLevelSettings {
option (gogoproto.equal) = true;
optional Bool sql_stats_automatic_collection_enabled = 1;
optional Int sql_stats_automatic_collection_min_stale_rows = 2;
optional Float sql_stats_automatic_collection_fraction_stale_rows = 3;
}
43 changes: 43 additions & 0 deletions pkg/sql/catalog/descpb/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,49 @@ func (desc *TableDescriptor) Persistence() tree.Persistence {
return tree.PersistencePermanent
}

// AutoStatsCollectionEnabled indicates if automatic statistics collection is
// explicitly enabled or disabled for this table. If ok is true, then
// enabled==false means auto stats collection is off for this table, and if
// true, auto stats are on for this table. If ok is false, there is no setting
// for this table.
func (desc *TableDescriptor) AutoStatsCollectionEnabled() (enabled bool, ok bool) {
if desc.TableLevelSettings == nil {
return false, false
}
if desc.TableLevelSettings.SqlStatsAutomaticCollectionEnabled == nil {
return false, false
}
return desc.TableLevelSettings.SqlStatsAutomaticCollectionEnabled.Value, true
}

// AutoStatsMinStaleRows indicates the setting of
// sql.stats.automatic_collection.min_stale_rows for this table.
// If ok is true, then the minStaleRows value is valid, otherwise this has not
// been set at the table level.
func (desc *TableDescriptor) AutoStatsMinStaleRows() (minStaleRows int64, ok bool) {
if desc.TableLevelSettings == nil {
return 0, false
}
if desc.TableLevelSettings.SqlStatsAutomaticCollectionMinStaleRows == nil {
return 0, false
}
return desc.TableLevelSettings.SqlStatsAutomaticCollectionMinStaleRows.Value, true
}

// AutoStatsFractionStaleRows indicates the setting of
// sql.stats.automatic_collection.fraction_stale_rows for this table.
// If ok is true, then the fractionStaleRows value is valid, otherwise this has
// not been set at the table level.
func (desc *TableDescriptor) AutoStatsFractionStaleRows() (fractionStaleRows float64, ok bool) {
if desc.TableLevelSettings == nil {
return 0, false
}
if desc.TableLevelSettings.SqlStatsAutomaticCollectionFractionStaleRows == nil {
return 0, false
}
return desc.TableLevelSettings.SqlStatsAutomaticCollectionFractionStaleRows.Value, true
}

// IsVirtualTable returns true if the TableDescriptor describes a
// virtual Table (like the information_schema tables) and thus doesn't
// need to be physically stored.
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/catalog/descpb/structured.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,9 @@ message TableDescriptor {
optional uint32 next_constraint_id = 49 [(gogoproto.nullable) = false,
(gogoproto.customname) = "NextConstraintID", (gogoproto.casttype) = "ConstraintID"];

// Next ID: 51
// TableLevelSettings are cluster or session settings specified at the table level.
optional cockroach.sql.catalog.catpb.TableLevelSettings table_level_setting = 51 [(gogoproto.customname)="TableLevelSettings"];

}

// SurvivalGoal is the survival goal for a database.
Expand Down
26 changes: 26 additions & 0 deletions pkg/sql/catalog/tabledesc/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
Expand Down Expand Up @@ -2533,6 +2534,12 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string {
appendStorageParam := func(key, value string) {
storageParams = append(storageParams, key+spacing+`=`+spacing+value)
}
boolAsString := func(boolVal bool) string {
if boolVal {
return "true"
}
return "false"
}
if ttl := desc.GetRowLevelTTL(); ttl != nil {
appendStorageParam(`ttl`, `'on'`)
appendStorageParam(`ttl_automatic_column`, `'on'`)
Expand Down Expand Up @@ -2562,6 +2569,25 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string {
if exclude := desc.GetExcludeDataFromBackup(); exclude {
appendStorageParam(`exclude_data_from_backup`, `true`)
}
if desc.TableLevelSettings != nil {
settings := desc.TableLevelSettings
// These need to be wrapped in double-quotes because they contain '.' chars.
if settings.SqlStatsAutomaticCollectionEnabled != nil {
value := settings.SqlStatsAutomaticCollectionEnabled.Value
appendStorageParam(`"sql.stats.automatic_collection.enabled"`,
boolAsString(value))
}
if settings.SqlStatsAutomaticCollectionMinStaleRows != nil {
value := settings.SqlStatsAutomaticCollectionMinStaleRows.Value
appendStorageParam(`"sql.stats.automatic_collection.min_stale_rows"`,
strconv.FormatInt(value, 10))
}
if settings.SqlStatsAutomaticCollectionFractionStaleRows != nil {
value := settings.SqlStatsAutomaticCollectionFractionStaleRows.Value
appendStorageParam(`"sql.stats.automatic_collection.fraction_stale_rows"`,
fmt.Sprintf("%g", value)) //strconv.FormatFloat(value, 10))
}
}
return storageParams
}

Expand Down
46 changes: 46 additions & 0 deletions pkg/sql/catalog/tabledesc/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catprivilege"
Expand Down Expand Up @@ -713,6 +714,8 @@ func (desc *wrapper) ValidateSelf(vea catalog.ValidationErrorAccumulator) {
// ON UPDATE expression. This check is made to ensure that we know which ON
// UPDATE action to perform when a FK UPDATE happens.
ValidateOnUpdate(desc, vea.Report)

vea.Report(desc.validateTableLevelSettings())
}

// ValidateOnUpdate returns an error if there is a column with both a foreign
Expand Down Expand Up @@ -1501,3 +1504,46 @@ func (desc *wrapper) validatePartitioning() error {
)
})
}

// validateTableLevelSettings validates that any new table-level settings hold
// a valid value.
func (desc *wrapper) validateTableLevelSettings() error {
if desc.TableLevelSettings != nil {
if desc.TableLevelSettings.SqlStatsAutomaticCollectionEnabled != nil {
setting := "sql.stats.automatic_collection.enabled"
if desc.IsVirtualTable() {
return errors.Newf("Setting %s may not be set on virtual table", setting)
}
if !desc.IsTable() {
return errors.Newf("Setting %s may not be set on a view or sequence", setting)
}
}
if desc.TableLevelSettings.SqlStatsAutomaticCollectionMinStaleRows != nil {
setting := "sql.stats.automatic_collection.min_stale_rows"
if desc.IsVirtualTable() {
return errors.Newf("Setting %s may not be set on virtual table", setting)
}
if !desc.IsTable() {
return errors.Newf("Setting %s may not be set on a view or sequence", setting)
}
if err := settings.
NonNegativeInt(desc.TableLevelSettings.SqlStatsAutomaticCollectionMinStaleRows.Value); err != nil {
return errors.Wrapf(err, "invalid value for %s", setting)
}
}
if desc.TableLevelSettings.SqlStatsAutomaticCollectionFractionStaleRows != nil {
setting := "sql.stats.automatic_collection.fraction_stale_rows"
if desc.IsVirtualTable() {
return errors.Newf("Setting %s may not be set on virtual table", setting)
}
if !desc.IsTable() {
return errors.Newf("Setting %s may not be set on a view or sequence", setting)
}
if err := settings.
NonNegativeFloat(desc.TableLevelSettings.SqlStatsAutomaticCollectionFractionStaleRows.Value); err != nil {
return errors.Wrapf(err, "invalid value for %s", setting)
}
}
}
return nil
}
9 changes: 9 additions & 0 deletions pkg/sql/catalog/tabledesc/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ var validationMap = []struct {
"ExcludeDataFromBackup": {status: thisFieldReferencesNoObjects},
"NextConstraintID": {status: iSolemnlySwearThisFieldIsValidated},
"DeclarativeSchemaChangerState": {status: iSolemnlySwearThisFieldIsValidated},
"TableLevelSettings": {status: iSolemnlySwearThisFieldIsValidated},
},
},
{
Expand Down Expand Up @@ -289,6 +290,14 @@ var validationMap = []struct {
"DeclarativeSchemaChangerState": {status: thisFieldReferencesNoObjects},
},
},
{
obj: catpb.TableLevelSettings{},
fieldMap: map[string]validationStatusInfo{
"SqlStatsAutomaticCollectionEnabled": {status: iSolemnlySwearThisFieldIsValidated},
"SqlStatsAutomaticCollectionMinStaleRows": {status: iSolemnlySwearThisFieldIsValidated},
"SqlStatsAutomaticCollectionFractionStaleRows": {status: iSolemnlySwearThisFieldIsValidated},
},
},
}

type validationStatusInfo struct {
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/distsql_plan_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ func (dsp *DistSQLPlanner) createStatsPlan(
var rowsExpected uint64
if len(tableStats) > 0 {
overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV)
if autoStatsFractionStaleRowsForTable, ok := desc.TableDesc().AutoStatsFractionStaleRows(); ok {
overhead = autoStatsFractionStaleRowsForTable
}
// Convert to a signed integer first to make the linter happy.
rowsExpected = uint64(int64(
// The total expected number of rows is the same number that was measured
Expand Down
100 changes: 100 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/alter_table
Original file line number Diff line number Diff line change
Expand Up @@ -2330,3 +2330,103 @@ COMMIT;

statement ok
ROLLBACK;

subtest table_settings

statement ok
CREATE TABLE t5 (a int)

# Turn on automatic stats collection
statement ok
ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = true)

# Verify automatic collection is enabled.
query T
SELECT
crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
d.descriptor, false)->'table'->'tableLevelSetting'
FROM
crdb_internal.tables AS tbl
INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
tbl.name = 't5'
AND tbl.drop_time IS NULL
----
{"sqlStatsAutomaticCollectionEnabled": {"value": true}}

# Strings in settings should be converted to the proper data type.
statement ok
ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 'false')

# Verify automatic collection is disabled.
# TODO(msirek): Fix pb_to_json so it displays the false boolean value.
query T
SELECT
crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
d.descriptor, false)->'table'->'tableLevelSetting'
FROM
crdb_internal.tables AS tbl
INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
tbl.name = 't5'
AND tbl.drop_time IS NULL
AND NOT crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
d.descriptor, false)->'table'->'tableLevelSetting'
-> 'sqlStatsAutomaticCollectionEnabled' ? 'value'
----
{"sqlStatsAutomaticCollectionEnabled": {}}

# SHOW CREATE TABLE displays the value properly.
query T
SELECT create_statement FROM [SHOW CREATE TABLE t5]
----
CREATE TABLE public.t5 (
a INT8 NULL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
CONSTRAINT t5_pkey PRIMARY KEY (rowid ASC)
) WITH ("sql.stats.automatic_collection.enabled" = false)

statement error pq: parameter "sql.stats.automatic_collection.enabled" requires a Boolean value
ALTER TABLE t5 SET ("sql.stats.automatic_collection.enabled" = 123)

statement ok
ALTER TABLE t5 RESET ("sql.stats.automatic_collection.enabled")

# Verify the automatic collection setting is removed.
query T
SELECT
crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
d.descriptor, false)->'table'->'tableLevelSetting'
FROM
crdb_internal.tables AS tbl
INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
tbl.name = 't5'
AND tbl.drop_time IS NULL
----
{}

statement error pq: invalid value for sql.stats.automatic_collection.fraction_stale_rows: could not parse "hello" as type float: strconv.ParseFloat: parsing "hello": invalid syntax
ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = 'hello')

statement error pq: invalid value for sql.stats.automatic_collection.min_stale_rows: could not parse "world" as type int: strconv.ParseInt: parsing "world": invalid syntax
ALTER TABLE t5 SET ("sql.stats.automatic_collection.min_stale_rows" = 'world')

# Verify strings can be converted to proper setting values.
statement ok
ALTER TABLE t5 SET ("sql.stats.automatic_collection.fraction_stale_rows" = '0.15',
"sql.stats.automatic_collection.min_stale_rows" = '1234')

# Verify settings
query T
SELECT
crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor',
d.descriptor, false)->'table'->'tableLevelSetting'
FROM
crdb_internal.tables AS tbl
INNER JOIN system.descriptor AS d ON d.id = tbl.table_id
WHERE
tbl.name = 't5'
AND tbl.drop_time IS NULL
----
{"sqlStatsAutomaticCollectionFractionStaleRows": {"value": 0.15}, "sqlStatsAutomaticCollectionMinStaleRows": {"value": "1234"}}
Loading

0 comments on commit 72a4d33

Please sign in to comment.