Skip to content

Commit

Permalink
sql/stats: add table setting to disable generation of stats forecasts
Browse files Browse the repository at this point in the history
Add a new storage parameter (a.k.a. table setting) which can be used to
override cluster setting `sql.stats.forecasts.enabled`. This builds on
work done earlier in #79025 and #86932 so we don't have to do much, just
wire it all together.

Fixes: #86353

Release justification: Low-risk update to new functionality.

Release note (sql change): Add a new table setting
`sql_stats_forecasts_enabled` which controls whether statistics
forecasts are generated for a specific table. When set, this overrides
cluster setting `sql.stats.forecasts.enabled`.
  • Loading branch information
michae2 committed Aug 28, 2022
1 parent 896cf3c commit f5936b0
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pkg/sql/catalog/descriptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,10 @@ type TableDescriptor interface {
// GetAutoStatsSettings returns the table settings related to automatic
// statistics collection. May return nil if none are set.
GetAutoStatsSettings() *catpb.AutoStatsSettings
// ForecastStatsEnabled indicates whether statistics forecasting is explicitly
// enabled or disabled for this table. If ok is true, then the enabled value
// is valid, otherwise this has not been set at the table level.
ForecastStatsEnabled() (enabled bool, ok bool)
// GetIndexNameByID returns the name of an index based on an ID, taking into
// account any ongoing declarative schema changes. Declarative schema changes
// do not propagate the index name into the mutations until changes are fully
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/catalog/tabledesc/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
Expand Down Expand Up @@ -2661,6 +2662,9 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string {
fmt.Sprintf("%g", value))
}
}
if enabled, ok := desc.ForecastStatsEnabled(); ok {
appendStorageParam(`sql_stats_forecasts_enabled`, strconv.FormatBool(enabled))
}
return storageParams
}

Expand Down Expand Up @@ -2716,6 +2720,14 @@ func (desc *wrapper) GetAutoStatsSettings() *catpb.AutoStatsSettings {
return desc.AutoStatsSettings
}

// ForecastStatsEnabled implements the TableDescriptor interface.
func (desc *wrapper) ForecastStatsEnabled() (enabled bool, ok bool) {
if desc.ForecastStats == nil {
return false, false
}
return *desc.ForecastStats, true
}

// SetTableLocalityRegionalByTable sets the descriptor's locality config to
// regional at the table level in the supplied region. An empty region name
// (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in
Expand Down
216 changes: 216 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/forecast
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,154 @@ WHERE stat->>'name' = '__forecast__';
}
]

# Test that we can disable forecasts for individual tables.

statement ok
CREATE TABLE d (d DATE PRIMARY KEY)
WITH (sql_stats_automatic_collection_enabled = false, sql_stats_forecasts_enabled = false)

statement ok
ALTER TABLE d INJECT STATISTICS '[
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-21 00:00:00.000000",
"distinct_count": 1,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 1,
"num_eq": 0,
"num_range": 1,
"upper_bound": "1999-12-21"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 1
},
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-26 00:00:00.000000",
"distinct_count": 2,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "1999-12-26"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 2
},
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-31 00:00:00.000000",
"distinct_count": 3,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 3,
"num_eq": 0,
"num_range": 3,
"upper_bound": "1999-12-31"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 3
}
]'

query TTTIIII
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE d WITH FORECAST]
ORDER BY created
----
__auto__ {d} 1999-12-21 00:00:00 +0000 +0000 1 1 0 3
__auto__ {d} 1999-12-26 00:00:00 +0000 +0000 2 2 0 3
__auto__ {d} 1999-12-31 00:00:00 +0000 +0000 3 3 0 3
__forecast__ {d} 2000-01-05 00:00:00 +0000 +0000 4 4 0 3

query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE d WITH FORECAST]
)
WHERE stat->>'name' = '__forecast__';
----
[
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 4,
"num_eq": 0,
"num_range": 4,
"upper_bound": "2000-01-05"
}
]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

# Test that optimizer_use_forecasts can be used to enable and disable forecasts.

statement ok
Expand Down Expand Up @@ -789,6 +937,19 @@ scan x
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

statement ok
RESET optimizer_use_forecasts

Expand Down Expand Up @@ -843,6 +1004,19 @@ scan x
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

# Test that sql.stats.forecasts.enabled can be used to enable and disable
# generation of forecasts in the stats cache.

Expand Down Expand Up @@ -893,6 +1067,37 @@ vectorized: true
table: x@x_pkey
spans: [/17 - ]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]

# Test that we can enable forecasts for individual tables even if
# sql.stats.forecasts.enabled is false.

statement ok
ALTER TABLE c SET (sql_stats_forecasts_enabled = true)

query T
EXPLAIN SELECT * FROM c WHERE h > '1988-08-07'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 23 (96% of the table; stats collected <hidden> ago; using stats forecast)
table: c@c_pkey
spans: [/'1988-08-07 00:00:00.000001+00:00' - ]

# Now check that we resume using forecasts when sql.stats.forecasts.enabled is
# reset.

statement ok
RESET CLUSTER SETTING sql.stats.forecasts.enabled

Expand Down Expand Up @@ -939,3 +1144,14 @@ vectorized: true
estimated row count: 2 (50% of the table; stats collected <hidden> ago; using stats forecast)
table: x@x_pkey
spans: [/17 - ]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]
3 changes: 3 additions & 0 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ func tableTypeCanHaveStats(table catalog.TableDescriptor) bool {
// forecastAllowed returns true if statistics forecasting is allowed for the
// given table.
func forecastAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool {
if enabled, ok := table.ForecastStatsEnabled(); ok {
return enabled
}
return UseStatisticsForecasts.Get(&clusterSettings.SV)
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/storageparam/tablestorageparam/table_storage_param.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,22 @@ var tableParams = map[string]tableParam{
onSet: autoStatsFractionStaleRowsSettingFunc(settings.NonNegativeFloat),
onReset: autoStatsTableSettingResetFunc,
},
`sql_stats_forecasts_enabled`: {
onSet: func(
po *Setter, semaCtx *tree.SemaContext, evalCtx *eval.Context, key string, datum tree.Datum,
) error {
enabled, err := boolFromDatum(evalCtx, key, datum)
if err != nil {
return err
}
po.tableDesc.ForecastStats = &enabled
return nil
},
onReset: func(po *Setter, evalCtx *eval.Context, key string) error {
po.tableDesc.ForecastStats = nil
return nil
},
},
}

func init() {
Expand Down

0 comments on commit f5936b0

Please sign in to comment.