Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/stats: add table setting to disable generation of stats forecasts #86986

Merged
merged 2 commits into from
Aug 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ sql.stats.automatic_collection.min_stale_rows integer 500 target minimum number
sql.stats.cleanup.recurrence string @hourly cron-tab recurrence for SQL Stats cleanup job
sql.stats.flush.enabled boolean true if set, SQL execution statistics are periodically flushed to disk
sql.stats.flush.interval duration 10m0s the interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval
sql.stats.forecasts.enabled boolean true when true, enables generation of statistics forecasts by default for all tables
sql.stats.histogram_collection.enabled boolean true histogram collection mode
sql.stats.multi_column_collection.enabled boolean true multi-column statistics collection mode
sql.stats.non_default_columns.min_retention_period duration 24h0m0s minimum retention period for table statistics collected on non-default columns
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
<tr><td><code>sql.stats.cleanup.recurrence</code></td><td>string</td><td><code>@hourly</code></td><td>cron-tab recurrence for SQL Stats cleanup job</td></tr>
<tr><td><code>sql.stats.flush.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, SQL execution statistics are periodically flushed to disk</td></tr>
<tr><td><code>sql.stats.flush.interval</code></td><td>duration</td><td><code>10m0s</code></td><td>the interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval</td></tr>
<tr><td><code>sql.stats.forecasts.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, enables generation of statistics forecasts by default for all tables</td></tr>
<tr><td><code>sql.stats.histogram_collection.enabled</code></td><td>boolean</td><td><code>true</code></td><td>histogram collection mode</td></tr>
<tr><td><code>sql.stats.multi_column_collection.enabled</code></td><td>boolean</td><td><code>true</code></td><td>multi-column statistics collection mode</td></tr>
<tr><td><code>sql.stats.non_default_columns.min_retention_period</code></td><td>duration</td><td><code>24h0m0s</code></td><td>minimum retention period for table statistics collected on non-default columns</td></tr>
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/catalog/descriptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,10 @@ type TableDescriptor interface {
// GetAutoStatsSettings returns the table settings related to automatic
// statistics collection. May return nil if none are set.
GetAutoStatsSettings() *catpb.AutoStatsSettings
// ForecastStatsEnabled indicates whether statistics forecasting is explicitly
// enabled or disabled for this table. If ok is true, then the enabled value
// is valid, otherwise this has not been set at the table level.
ForecastStatsEnabled() (enabled bool, ok bool)
// GetIndexNameByID returns the name of an index based on an ID, taking into
// account any ongoing declarative schema changes. Declarative schema changes
// do not propagate the index name into the mutations until changes are fully
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/catalog/tabledesc/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
Expand Down Expand Up @@ -2661,6 +2662,9 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string {
fmt.Sprintf("%g", value))
}
}
if enabled, ok := desc.ForecastStatsEnabled(); ok {
appendStorageParam(`sql_stats_forecasts_enabled`, strconv.FormatBool(enabled))
}
return storageParams
}

Expand Down Expand Up @@ -2716,6 +2720,14 @@ func (desc *wrapper) GetAutoStatsSettings() *catpb.AutoStatsSettings {
return desc.AutoStatsSettings
}

// ForecastStatsEnabled implements the TableDescriptor interface.
func (desc *wrapper) ForecastStatsEnabled() (enabled bool, ok bool) {
if desc.ForecastStats == nil {
return false, false
}
return *desc.ForecastStats, true
}

// SetTableLocalityRegionalByTable sets the descriptor's locality config to
// regional at the table level in the supplied region. An empty region name
// (or its alias PrimaryRegionNotSpecifiedName) denotes that the table is homed in
Expand Down
313 changes: 313 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/forecast
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,154 @@ WHERE stat->>'name' = '__forecast__';
}
]

# Test that we can disable forecasts for individual tables.

statement ok
CREATE TABLE d (d DATE PRIMARY KEY)
WITH (sql_stats_automatic_collection_enabled = false, sql_stats_forecasts_enabled = false)

statement ok
ALTER TABLE d INJECT STATISTICS '[
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-21 00:00:00.000000",
"distinct_count": 1,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 1,
"num_eq": 0,
"num_range": 1,
"upper_bound": "1999-12-21"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 1
},
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-26 00:00:00.000000",
"distinct_count": 2,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 2,
"num_eq": 0,
"num_range": 2,
"upper_bound": "1999-12-26"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 2
},
{
"avg_size": 3,
"columns": [
"d"
],
"created_at": "1999-12-31 00:00:00.000000",
"distinct_count": 3,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 3,
"num_eq": 0,
"num_range": 3,
"upper_bound": "1999-12-31"
}
],
"histo_col_type": "DATE",
"histo_version": 2,
"name": "__auto__",
"null_count": 0,
"row_count": 3
}
]'

query TTTIIII
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE d WITH FORECAST]
ORDER BY created
----
__auto__ {d} 1999-12-21 00:00:00 +0000 +0000 1 1 0 3
__auto__ {d} 1999-12-26 00:00:00 +0000 +0000 2 2 0 3
__auto__ {d} 1999-12-31 00:00:00 +0000 +0000 3 3 0 3
__forecast__ {d} 2000-01-05 00:00:00 +0000 +0000 4 4 0 3

query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE d WITH FORECAST]
)
WHERE stat->>'name' = '__forecast__';
----
[
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "1999-12-16"
},
{
"distinct_range": 4,
"num_eq": 0,
"num_range": 4,
"upper_bound": "2000-01-05"
}
]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

# Test that optimizer_use_forecasts can be used to enable and disable forecasts.

statement ok
Expand Down Expand Up @@ -789,6 +937,19 @@ scan x
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

statement ok
RESET optimizer_use_forecasts

Expand Down Expand Up @@ -842,3 +1003,155 @@ scan x
├── cost: 16.04
├── key: (1)
└── distribution: test

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM d WHERE d >= '1999-12-16'
----
scan d
├── columns: d:1
├── constraint: /1: [/'1999-12-16' - ]
├── stats: [rows=3, distinct(1)=3, null(1)=0]
│ histogram(1)= 0 0 3 0
│ <--- '1999-12-16' --- '1999-12-31'
├── cost: 17.11
├── key: (1)
└── distribution: test

# Test that sql.stats.forecasts.enabled can be used to enable and disable
# generation of forecasts in the stats cache.

statement ok
SET CLUSTER SETTING sql.stats.forecasts.enabled = false

query T
EXPLAIN SELECT * FROM g WHERE a > 8
----
distribution: local
vectorized: true
·
• scan
estimated row count: 0 (<0.01% of the table; stats collected <hidden> ago)
table: g@g_pkey
spans: [/9 - ]

query T
EXPLAIN SELECT * FROM s WHERE b < 3
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: s@s_pkey
spans: [ - /2]

query T
EXPLAIN SELECT * FROM c WHERE h > '1988-08-07'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 0 (<0.01% of the table; stats collected <hidden> ago)
table: c@c_pkey
spans: [/'1988-08-07 00:00:00.000001+00:00' - ]

query T
EXPLAIN SELECT * FROM x WHERE a > 16
----
distribution: local
vectorized: true
·
• scan
estimated row count: 0 (<0.01% of the table; stats collected <hidden> ago)
table: x@x_pkey
spans: [/17 - ]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]

# Test that we can enable forecasts for individual tables even if
# sql.stats.forecasts.enabled is false.

statement ok
ALTER TABLE c SET (sql_stats_forecasts_enabled = true)

query T
EXPLAIN SELECT * FROM c WHERE h > '1988-08-07'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 23 (96% of the table; stats collected <hidden> ago; using stats forecast)
table: c@c_pkey
spans: [/'1988-08-07 00:00:00.000001+00:00' - ]

# Now check that we resume using forecasts when sql.stats.forecasts.enabled is
# reset.

statement ok
RESET CLUSTER SETTING sql.stats.forecasts.enabled

query T
EXPLAIN SELECT * FROM g WHERE a > 8
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (22% of the table; stats collected <hidden> ago; using stats forecast)
table: g@g_pkey
spans: [/9 - ]

query T
EXPLAIN SELECT * FROM s WHERE b < 3
----
distribution: local
vectorized: true
·
• scan
estimated row count: 1 (100% of the table; stats collected <hidden> ago; using stats forecast)
table: s@s_pkey
spans: [ - /2]

query T
EXPLAIN SELECT * FROM c WHERE h > '1988-08-07'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 23 (96% of the table; stats collected <hidden> ago; using stats forecast)
table: c@c_pkey
spans: [/'1988-08-07 00:00:00.000001+00:00' - ]

query T
EXPLAIN SELECT * FROM x WHERE a > 16
----
distribution: local
vectorized: true
·
• scan
estimated row count: 2 (50% of the table; stats collected <hidden> ago; using stats forecast)
table: x@x_pkey
spans: [/17 - ]

query T
EXPLAIN SELECT * FROM d WHERE d >= '1999-12-16'
----
distribution: local
vectorized: true
·
• scan
estimated row count: 3 (100% of the table; stats collected <hidden> ago)
table: d@d_pkey
spans: [/'1999-12-16' - ]
2 changes: 1 addition & 1 deletion pkg/sql/stats/automatic_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ func (r *Refresher) maybeRefreshStats(
rowsAffected int64,
asOf time.Duration,
) {
tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID)
tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */)
if err != nil {
log.Errorf(ctx, "failed to get table statistics: %v", err)
return
Expand Down
Loading