Skip to content

Commit

Permalink
Merge #80887
Browse files Browse the repository at this point in the history
80887: stats: enable auto stats collection on system tables r=msirek a=msirek

Fixes #80123

Previously, mutations to system tables did not trigger automatic
collection of statistics to influence optimizer costs and plan
selection.

This was inadequate because system tables are being used in
increasingly sophisticated ways in queries, most notably around driving
subsystems in CRDB, requiring avoidance of full table scans. Manual
collection of stats on system tables is not sufficient to meet
requirements as system tables are driven by automatic processes/jobs.

To address this, this patch enables auto stats collection on system
tables by default, which can be disabled by setting new cluster setting
`sql.stats.system_tables_autostats.enabled` to false. Auto stats are
always disabled on `system.lease`, `system.table_statistics`, 
`system.jobs` and `system.scheduled_jobs`, no matter the value of the 
cluster setting. Autostats on the first two tables could potentially 
cause hangs, and autostats on the last two tables could potentially   
impact system performance.

Release note: none


Co-authored-by: Mark Sirek <[email protected]>
  • Loading branch information
craig[bot] and Mark Sirek committed May 5, 2022
2 parents a4d141a + e811eb6 commit 8204b7d
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 186 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ sql.stats.persisted_rows.max integer 1000000 maximum number of rows of statement
sql.stats.post_events.enabled boolean false if set, an event is logged for every CREATE STATISTICS job
sql.stats.response.max integer 20000 the maximum number of statements and transaction stats returned in a CombinedStatements request
sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer
sql.stats.system_tables_autostats.enabled boolean true when true, enables automatic collection of statistics on system tables
sql.telemetry.query_sampling.enabled boolean false when set to true, executed queries will emit an event on the telemetry logging channel
sql.temp_object_cleaner.cleanup_interval duration 30m0s how often to clean up orphaned temporary objects
sql.temp_object_cleaner.wait_interval duration 30m0s how long after creation a temporary object will be cleaned up
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@
<tr><td><code>sql.stats.post_events.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if set, an event is logged for every CREATE STATISTICS job</td></tr>
<tr><td><code>sql.stats.response.max</code></td><td>integer</td><td><code>20000</code></td><td>the maximum number of statements and transaction stats returned in a CombinedStatements request</td></tr>
<tr><td><code>sql.stats.system_tables.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, enables use of statistics on system tables by the query optimizer</td></tr>
<tr><td><code>sql.stats.system_tables_autostats.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, enables automatic collection of statistics on system tables</td></tr>
<tr><td><code>sql.telemetry.query_sampling.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when set to true, executed queries will emit an event on the telemetry logging channel</td></tr>
<tr><td><code>sql.temp_object_cleaner.cleanup_interval</code></td><td>duration</td><td><code>30m0s</code></td><td>how often to clean up orphaned temporary objects</td></tr>
<tr><td><code>sql.temp_object_cleaner.wait_interval</code></td><td>duration</td><td><code>30m0s</code></td><td>how long after creation a temporary object will be cleaned up</td></tr>
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/catalog/catpb/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ const (
// cluster setting.
UseStatsOnSystemTables = "sql.stats.system_tables.enabled"

// AutoStatsOnSystemTables is the name of the autostats on system tables
// cluster setting.
AutoStatsOnSystemTables = "sql.stats.system_tables_autostats.enabled"

// AutoStatsMinStaleTableSettingName is the name of the automatic stats collection
// min stale rows table setting.
AutoStatsMinStaleTableSettingName = "sql_stats_automatic_collection_min_stale_rows"
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,18 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
)
}

if tableDesc.GetID() == keys.JobsTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.jobs",
)
}

if tableDesc.GetID() == keys.ScheduledJobsTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.scheduled_jobs",
)
}

if err := n.p.CheckPrivilege(ctx, tableDesc, privilege.SELECT); err != nil {
return nil, err
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/distsql_event_log
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# CREATE STATISTICS
###################

# Keep auto stats jobs on system tables from disrupting tests in this file.
statement ok
SET CLUSTER SETTING sql.stats.system_tables_autostats.enabled = FALSE

# This test verifies that events are posted for table statistics creation.
statement ok
SET CLUSTER SETTING sql.stats.post_events.enabled = TRUE
Expand All @@ -18,7 +22,7 @@ CREATE STATISTICS __auto__ FROM a
query IIT
SELECT "targetID", "reportingID", "info"::JSONB - 'Timestamp' - 'DescriptorID'
FROM system.eventlog
WHERE "eventType" = 'create_statistics'
WHERE "eventType" = 'create_statistics' AND "targetID" <> 12
ORDER BY "timestamp", info
----
106 1 {"EventType": "create_statistics", "Statement": "CREATE STATISTICS s1 ON id FROM test.public.a", "TableName": "test.public.a", "Tag": "CREATE STATISTICS", "User": "root"}
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -1367,3 +1367,11 @@ ANALYZE system.lease
# Collecting stats on system.table_statistics is disallowed.
statement error pq: cannot create statistics on system.table_statistics
ANALYZE system.table_statistics

# Collecting stats on system.jobs is disallowed.
statement error pq: cannot create statistics on system.jobs
ANALYZE system.jobs

# Collecting stats on system.scheduled_jobs is disallowed.
statement error pq: cannot create statistics on system.scheduled_jobs
ANALYZE system.scheduled_jobs
5 changes: 5 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/jobs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ CREATE INDEX ON t(x)

query TTT
SELECT job_type, description, user_name FROM [SHOW JOBS] WHERE user_name = 'root'
AND job_type LIKE 'SCHEMA CHANGE%'
----
SCHEMA CHANGE updating version for users table root
SCHEMA CHANGE updating version for role options table root
Expand All @@ -33,6 +34,7 @@ SCHEMA CHANGE GC GC for temporary index used during index backfill root

query TTT
SELECT job_type, description, user_name FROM crdb_internal.jobs WHERE user_name = 'root'
AND job_type LIKE 'SCHEMA CHANGE%'
----
SCHEMA CHANGE updating version for users table root
SCHEMA CHANGE updating version for role options table root
Expand All @@ -42,6 +44,7 @@ SCHEMA CHANGE GC GC for temporary index used during index backfi

query TTT
SELECT job_type, description, user_name FROM crdb_internal.jobs WHERE user_name = 'node'
AND job_type LIKE 'AUTO SPAN%'
----
AUTO SPAN CONFIG RECONCILIATION reconciling span configurations node

Expand Down Expand Up @@ -84,6 +87,7 @@ user root

query TTT
SELECT job_type, description, user_name FROM [SHOW JOBS] WHERE user_name IN ('root', 'testuser', 'node')
AND job_type LIKE 'SCHEMA CHANGE%'
----
SCHEMA CHANGE updating version for users table root
SCHEMA CHANGE updating version for role options table root
Expand All @@ -95,6 +99,7 @@ SCHEMA CHANGE GC GC for temporary index used during index backfill testuser

query TTT
SELECT job_type, description, user_name FROM crdb_internal.jobs WHERE user_name IN ('root', 'testuser', 'node')
AND (job_type LIKE 'AUTO SPAN%' OR job_type LIKE 'SCHEMA CHANGE%')
----
AUTO SPAN CONFIG RECONCILIATION reconciling span configurations node
SCHEMA CHANGE updating version for users table root
Expand Down
166 changes: 83 additions & 83 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -237,89 +237,89 @@ SELECT * FROM [SHOW SEQUENCES FROM system]
----
sequence_schema sequence_name

query TTTTIT colnames,rowsort
SELECT * FROM [SHOW TABLES FROM system]
----
schema_name table_name type owner estimated_row_count locality
public descriptor table NULL 0 NULL
public tenant_settings table NULL 0 NULL
public span_configurations table NULL 0 NULL
public sql_instances table NULL 0 NULL
public tenant_usage table NULL 0 NULL
public database_role_settings table NULL 0 NULL
public transaction_statistics table NULL 0 NULL
public statement_statistics table NULL 0 NULL
public join_tokens table NULL 0 NULL
public migrations table NULL 0 NULL
public sqlliveness table NULL 0 NULL
public scheduled_jobs table NULL 0 NULL
public statement_diagnostics table NULL 0 NULL
public statement_diagnostics_requests table NULL 0 NULL
public statement_bundle_chunks table NULL 0 NULL
public role_options table NULL 0 NULL
public protected_ts_records table NULL 0 NULL
public protected_ts_meta table NULL 0 NULL
public namespace table NULL 0 NULL
public reports_meta table NULL 0 NULL
public replication_stats table NULL 0 NULL
public replication_critical_localities table NULL 0 NULL
public replication_constraint_stats table NULL 0 NULL
public comments table NULL 0 NULL
public role_members table NULL 0 NULL
public locations table NULL 0 NULL
public table_statistics table NULL 0 NULL
public web_sessions table NULL 0 NULL
public jobs table NULL 0 NULL
public ui table NULL 0 NULL
public rangelog table NULL 0 NULL
public eventlog table NULL 0 NULL
public lease table NULL 0 NULL
public tenants table NULL 0 NULL
public settings table NULL 0 NULL
public zones table NULL 0 NULL
public users table NULL 0 NULL

query TTTTITT colnames,rowsort
SELECT * FROM [SHOW TABLES FROM system WITH COMMENT]
----
schema_name table_name type owner estimated_row_count locality comment
public descriptor table NULL 0 NULL ·
public tenant_settings table NULL 0 NULL ·
public span_configurations table NULL 0 NULL ·
public sql_instances table NULL 0 NULL ·
public tenant_usage table NULL 0 NULL ·
public database_role_settings table NULL 0 NULL ·
public transaction_statistics table NULL 0 NULL ·
public statement_statistics table NULL 0 NULL ·
public join_tokens table NULL 0 NULL ·
public migrations table NULL 0 NULL ·
public sqlliveness table NULL 0 NULL ·
public scheduled_jobs table NULL 0 NULL ·
public statement_diagnostics table NULL 0 NULL ·
public statement_diagnostics_requests table NULL 0 NULL ·
public statement_bundle_chunks table NULL 0 NULL ·
public role_options table NULL 0 NULL ·
public protected_ts_records table NULL 0 NULL ·
public protected_ts_meta table NULL 0 NULL ·
public namespace table NULL 0 NULL ·
public reports_meta table NULL 0 NULL ·
public replication_stats table NULL 0 NULL ·
public replication_critical_localities table NULL 0 NULL ·
public replication_constraint_stats table NULL 0 NULL ·
public comments table NULL 0 NULL ·
public role_members table NULL 0 NULL ·
public locations table NULL 0 NULL ·
public table_statistics table NULL 0 NULL ·
public web_sessions table NULL 0 NULL ·
public jobs table NULL 0 NULL ·
public ui table NULL 0 NULL ·
public rangelog table NULL 0 NULL ·
public eventlog table NULL 0 NULL ·
public lease table NULL 0 NULL ·
public tenants table NULL 0 NULL ·
public settings table NULL 0 NULL ·
public zones table NULL 0 NULL ·
public users table NULL 0 NULL ·
query TTTTT colnames,rowsort
SELECT schema_name, table_name, type, owner, locality FROM [SHOW TABLES FROM system]
----
schema_name table_name type owner locality
public comments table NULL NULL
public database_role_settings table NULL NULL
public descriptor table NULL NULL
public eventlog table NULL NULL
public jobs table NULL NULL
public join_tokens table NULL NULL
public lease table NULL NULL
public locations table NULL NULL
public migrations table NULL NULL
public namespace table NULL NULL
public protected_ts_meta table NULL NULL
public protected_ts_records table NULL NULL
public rangelog table NULL NULL
public replication_constraint_stats table NULL NULL
public replication_critical_localities table NULL NULL
public replication_stats table NULL NULL
public reports_meta table NULL NULL
public role_members table NULL NULL
public role_options table NULL NULL
public scheduled_jobs table NULL NULL
public settings table NULL NULL
public span_configurations table NULL NULL
public sql_instances table NULL NULL
public sqlliveness table NULL NULL
public statement_bundle_chunks table NULL NULL
public statement_diagnostics table NULL NULL
public statement_diagnostics_requests table NULL NULL
public statement_statistics table NULL NULL
public table_statistics table NULL NULL
public tenant_settings table NULL NULL
public tenant_usage table NULL NULL
public tenants table NULL NULL
public transaction_statistics table NULL NULL
public ui table NULL NULL
public users table NULL NULL
public web_sessions table NULL NULL
public zones table NULL NULL

query TTTTTT colnames,rowsort
SELECT schema_name, table_name, type, owner, locality, comment FROM [SHOW TABLES FROM system WITH COMMENT]
----
schema_name table_name type owner locality comment
public descriptor table NULL NULL ·
public tenant_settings table NULL NULL ·
public span_configurations table NULL NULL ·
public sql_instances table NULL NULL ·
public tenant_usage table NULL NULL ·
public database_role_settings table NULL NULL ·
public transaction_statistics table NULL NULL ·
public statement_statistics table NULL NULL ·
public join_tokens table NULL NULL ·
public migrations table NULL NULL ·
public sqlliveness table NULL NULL ·
public scheduled_jobs table NULL NULL ·
public statement_diagnostics table NULL NULL ·
public statement_diagnostics_requests table NULL NULL ·
public statement_bundle_chunks table NULL NULL ·
public role_options table NULL NULL ·
public protected_ts_records table NULL NULL ·
public protected_ts_meta table NULL NULL ·
public namespace table NULL NULL ·
public reports_meta table NULL NULL ·
public replication_stats table NULL NULL ·
public replication_critical_localities table NULL NULL ·
public replication_constraint_stats table NULL NULL ·
public comments table NULL NULL ·
public role_members table NULL NULL ·
public locations table NULL NULL ·
public table_statistics table NULL NULL ·
public web_sessions table NULL NULL ·
public jobs table NULL NULL ·
public ui table NULL NULL ·
public rangelog table NULL NULL ·
public eventlog table NULL NULL ·
public lease table NULL NULL ·
public tenants table NULL NULL ·
public settings table NULL NULL ·
public zones table NULL NULL ·
public users table NULL NULL ·

query ITTT colnames
SELECT node_id, user_name, application_name, active_queries
Expand Down
Loading

0 comments on commit 8204b7d

Please sign in to comment.