Skip to content

Commit

Permalink
stats: enable optimizer stats use on system tables for query planning
Browse files Browse the repository at this point in the history
Informs cockroachdb#80123

Previously, statistics could be collected on system tables, but their
use in the optimizer for planning queries was disabled.

This was inadequate because without stats, full table scan might be
chosen by the optimizer for queries involving system tables in cases
where index access is actually cheaper. Also, it is confusing that one
could ANALYZE a system table, but the collected statistics would not be
used when querying the table, as shown in the EXPLAIN output.

To address this, this patch enables stats usage on all system tables
except system.table_statistics and system.lease, as doing so may cause
hangs. A new tenant-writable cluster setting is added,
`sql.stats.system_tables.enabled`, which when true allows statistics on
system tables to be used by the optimizer for costing and planning
queries. The default value of this setting is `true`. Collection of
statistics on `system.table_statistics` and `system.lease` has been
disabled because it would be confusing to allow stats to be collected
that would never be used by the optimizer.

Release note: none
  • Loading branch information
Mark Sirek committed Apr 29, 2022
1 parent a3c0709 commit fe3cc5f
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ sql.stats.multi_column_collection.enabled boolean true multi-column statistics c
sql.stats.persisted_rows.max integer 1000000 maximum number of rows of statement and transaction statistics that will be persisted in the system tables
sql.stats.post_events.enabled boolean false if set, an event is logged for every CREATE STATISTICS job
sql.stats.response.max integer 20000 the maximum number of statements and transaction stats returned in a CombinedStatements request
sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer
sql.telemetry.query_sampling.enabled boolean false when set to true, executed queries will emit an event on the telemetry logging channel
sql.temp_object_cleaner.cleanup_interval duration 30m0s how often to clean up orphaned temporary objects
sql.temp_object_cleaner.wait_interval duration 30m0s how long after creation a temporary object will be cleaned up
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@
<tr><td><code>sql.stats.persisted_rows.max</code></td><td>integer</td><td><code>1000000</code></td><td>maximum number of rows of statement and transaction statistics that will be persisted in the system tables</td></tr>
<tr><td><code>sql.stats.post_events.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if set, an event is logged for every CREATE STATISTICS job</td></tr>
<tr><td><code>sql.stats.response.max</code></td><td>integer</td><td><code>20000</code></td><td>the maximum number of statements and transaction stats returned in a CombinedStatements request</td></tr>
<tr><td><code>sql.stats.system_tables.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, enables use of statistics on system tables by the query optimizer</td></tr>
<tr><td><code>sql.telemetry.query_sampling.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when set to true, executed queries will emit an event on the telemetry logging channel</td></tr>
<tr><td><code>sql.temp_object_cleaner.cleanup_interval</code></td><td>duration</td><td><code>30m0s</code></td><td>how often to clean up orphaned temporary objects</td></tr>
<tr><td><code>sql.temp_object_cleaner.wait_interval</code></td><td>duration</td><td><code>30m0s</code></td><td>how long after creation a temporary object will be cleaned up</td></tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ SELECT node_id, name FROM crdb_internal.leases ORDER BY name
0 scheduled_jobs
0 settings
0 statement_diagnostics_requests
0 table_statistics
0 test
0 users

Expand Down
13 changes: 13 additions & 0 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/featureflag"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/settings"
Expand Down Expand Up @@ -229,6 +230,18 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
)
}

if tableDesc.GetID() == keys.TableStatisticsTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.table_statistics",
)
}

if tableDesc.GetID() == keys.LeaseTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.lease",
)
}

if err := n.p.CheckPrivilege(ctx, tableDesc, privilege.SELECT); err != nil {
return nil, err
}
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -1345,3 +1345,25 @@ CREATE TABLE t76867 (

statement ok
ANALYZE t76867

# Regression tests for #80123. Collecting stats on system tables is allowed.
statement ok
ANALYZE system.locations

# EXPLAIN output should indicate stats collected on system.locations.
query T
SELECT * FROM [EXPLAIN SELECT * FROM system.locations] OFFSET 2
----
·
• scan
estimated row count: 5 (100% of the table; stats collected <hidden> ago)
table: locations@primary
spans: FULL SCAN

# Collecting stats on system.lease is disallowed.
statement error pq: cannot create statistics on system.lease
ANALYZE system.lease

# Collecting stats on system.table_statistics is disallowed.
statement error pq: cannot create statistics on system.table_statistics
ANALYZE system.table_statistics
16 changes: 15 additions & 1 deletion pkg/sql/stats/automatic_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ import (
// cluster setting.
const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled"

// UseStatsOnSystemTables is the name of the use statistics on system tables
// cluster setting.
const UseStatsOnSystemTables = "sql.stats.system_tables.enabled"

// AutomaticStatisticsClusterMode controls the cluster setting for enabling
// automatic table statistics collection.
var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting(
Expand All @@ -47,6 +51,16 @@ var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting(
true,
).WithPublic()

// UseStatisticsOnSystemTables controls the cluster setting for enabling
// statistics usage by the optimizer for planning queries involving system
// tables.
var UseStatisticsOnSystemTables = settings.RegisterBoolSetting(
settings.TenantWritable,
UseStatsOnSystemTables,
"when true, enables use of statistics on system tables by the query optimizer",
true,
).WithPublic()

// MultiColumnStatisticsClusterMode controls the cluster setting for enabling
// automatic collection of multi-column statistics.
var MultiColumnStatisticsClusterMode = settings.RegisterBoolSetting(
Expand Down Expand Up @@ -389,7 +403,7 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i
// Automatic stats are disabled.
return
}
if !hasStatistics(table) {
if !autostatsCollectionAllowed(table) {
// Don't collect stats for this kind of table: system, virtual, view, etc.
return
}
Expand Down
33 changes: 30 additions & 3 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,19 +214,46 @@ func decodeTableStatisticsKV(
func (sc *TableStatisticsCache) GetTableStats(
ctx context.Context, table catalog.TableDescriptor,
) ([]*TableStatistic, error) {
if !hasStatistics(table) {
if !statsUsageAllowed(table, sc.Settings) {
return nil, nil
}
return sc.getTableStatsFromCache(ctx, table.GetID())
}

// hasStatistics returns true if the table can have statistics collected for it.
func hasStatistics(table catalog.TableDescriptor) bool {
// statsUsageAllowed returns true if statistics on `table` are allowed to be
// used by the query optimizer.
func statsUsageAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool {
if catalog.IsSystemDescriptor(table) {
// Disable stats usage on system.table_statistics and system.lease. Looking
// up stats on system.lease is known to cause hangs, and the same could
// happen with system.table_statistics.
if table.GetID() == keys.TableStatisticsTableID ||
table.GetID() == keys.LeaseTableID {
return false
}
// Return whether the optimizer is allowed to use stats on system tables.
return UseStatisticsOnSystemTables.Get(&clusterSettings.SV)
}
return tableTypeCanHaveStats(table)
}

// autostatsCollectionAllowed returns true if statistics are allowed to be
// automatically collected on the table.
func autostatsCollectionAllowed(table catalog.TableDescriptor) bool {
if catalog.IsSystemDescriptor(table) {
// Don't try to get statistics for system tables (most importantly,
// for table_statistics itself).
return false
}
return tableTypeCanHaveStats(table)
}

// tableTypeCanHaveStats returns true if manual collection of statistics on the
// table type via CREATE STATISTICS or ANALYZE is allowed. Note that specific
// system tables may have stats collection disabled in create_stats.go. This
// function just indicates if the type of table may have stats manually
// collected.
func tableTypeCanHaveStats(table catalog.TableDescriptor) bool {
if table.IsVirtualTable() {
// Don't try to get statistics for virtual tables.
return false
Expand Down

0 comments on commit fe3cc5f

Please sign in to comment.