diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index d8c31b0bd76c..9dfab9e88b29 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -173,6 +173,7 @@ sql.stats.multi_column_collection.enabled boolean true multi-column statistics c sql.stats.persisted_rows.max integer 1000000 maximum number of rows of statement and transaction statistics that will be persisted in the system tables sql.stats.post_events.enabled boolean false if set, an event is logged for every CREATE STATISTICS job sql.stats.response.max integer 20000 the maximum number of statements and transaction stats returned in a CombinedStatements request +sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer sql.telemetry.query_sampling.enabled boolean false when set to true, executed queries will emit an event on the telemetry logging channel sql.temp_object_cleaner.cleanup_interval duration 30m0s how often to clean up orphaned temporary objects sql.temp_object_cleaner.wait_interval duration 30m0s how long after creation a temporary object will be cleaned up diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 430931b3c553..e6df792eb25a 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -190,6 +190,7 @@ sql.stats.persisted_rows.maxinteger1000000maximum number of rows of statement and transaction statistics that will be persisted in the system tables sql.stats.post_events.enabledbooleanfalseif set, an event is logged for every CREATE STATISTICS job sql.stats.response.maxinteger20000the maximum number of statements and transaction stats returned in a CombinedStatements request +sql.stats.system_tables.enabledbooleantruewhen true, enables use of statistics on system tables by the query optimizer sql.telemetry.query_sampling.enabledbooleanfalsewhen set to true, executed queries will emit an event on the telemetry logging channel sql.temp_object_cleaner.cleanup_intervalduration30m0show often to clean up orphaned temporary objects sql.temp_object_cleaner.wait_intervalduration30m0show long after creation a temporary object will be cleaned up diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant index be7ec1c5d0bb..dfe0737257c0 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant +++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant @@ -18,6 +18,7 @@ SELECT node_id, name FROM crdb_internal.leases ORDER BY name 0 scheduled_jobs 0 settings 0 statement_diagnostics_requests +0 table_statistics 0 test 0 users diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index 64ff8003de43..5965a813890e 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/featureflag" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings" @@ -230,6 +231,18 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro ) } + if tableDesc.GetID() == keys.TableStatisticsTableID { + return nil, pgerror.New( + pgcode.WrongObjectType, "cannot create statistics on system.table_statistics", + ) + } + + if tableDesc.GetID() == keys.LeaseTableID { + return nil, pgerror.New( + pgcode.WrongObjectType, "cannot create statistics on system.lease", + ) + } + if err := n.p.CheckPrivilege(ctx, tableDesc, privilege.SELECT); err != nil { return nil, err } diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 6f06afd9b229..869a41935836 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -1345,3 +1345,25 @@ CREATE TABLE t76867 ( statement ok ANALYZE t76867 + +# Regression tests for #80123. Collecting stats on system tables is allowed. +statement ok +ANALYZE system.locations + +# EXPLAIN output should indicate stats collected on system.locations. +query T +SELECT * FROM [EXPLAIN SELECT * FROM system.locations] OFFSET 2 +---- +· +• scan + estimated row count: 5 (100% of the table; stats collected ago) + table: locations@primary + spans: FULL SCAN + +# Collecting stats on system.lease is disallowed. +statement error pq: cannot create statistics on system.lease +ANALYZE system.lease + +# Collecting stats on system.table_statistics is disallowed. +statement error pq: cannot create statistics on system.table_statistics +ANALYZE system.table_statistics diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 36c9093d2210..2986a2f11906 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -38,6 +38,10 @@ import ( // cluster setting. const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" +// UseStatsOnSystemTables is the name of the use statistics on system tables +// cluster setting. +const UseStatsOnSystemTables = "sql.stats.system_tables.enabled" + // AutomaticStatisticsClusterMode controls the cluster setting for enabling // automatic table statistics collection. var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( @@ -47,6 +51,16 @@ var AutomaticStatisticsClusterMode = settings.RegisterBoolSetting( true, ).WithPublic() +// UseStatisticsOnSystemTables controls the cluster setting for enabling +// statistics usage by the optimizer for planning queries involving system +// tables. +var UseStatisticsOnSystemTables = settings.RegisterBoolSetting( + settings.TenantWritable, + UseStatsOnSystemTables, + "when true, enables use of statistics on system tables by the query optimizer", + true, +).WithPublic() + // MultiColumnStatisticsClusterMode controls the cluster setting for enabling // automatic collection of multi-column statistics. var MultiColumnStatisticsClusterMode = settings.RegisterBoolSetting( @@ -389,7 +403,7 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i // Automatic stats are disabled. return } - if !hasStatistics(table) { + if !autostatsCollectionAllowed(table) { // Don't collect stats for this kind of table: system, virtual, view, etc. return } diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 1d072e9ea997..6fcb55777d43 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -214,19 +214,46 @@ func decodeTableStatisticsKV( func (sc *TableStatisticsCache) GetTableStats( ctx context.Context, table catalog.TableDescriptor, ) ([]*TableStatistic, error) { - if !hasStatistics(table) { + if !statsUsageAllowed(table, sc.Settings) { return nil, nil } return sc.getTableStatsFromCache(ctx, table.GetID()) } -// hasStatistics returns true if the table can have statistics collected for it. -func hasStatistics(table catalog.TableDescriptor) bool { +// statsUsageAllowed returns true if statistics on `table` are allowed to be +// used by the query optimizer. +func statsUsageAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { + if catalog.IsSystemDescriptor(table) { + // Disable stats usage on system.table_statistics and system.lease. Looking + // up stats on system.lease is known to cause hangs, and the same could + // happen with system.table_statistics. + if table.GetID() == keys.TableStatisticsTableID || + table.GetID() == keys.LeaseTableID { + return false + } + // Return whether the optimizer is allowed to use stats on system tables. + return UseStatisticsOnSystemTables.Get(&clusterSettings.SV) + } + return tableTypeCanHaveStats(table) +} + +// autostatsCollectionAllowed returns true if statistics are allowed to be +// automatically collected on the table. +func autostatsCollectionAllowed(table catalog.TableDescriptor) bool { if catalog.IsSystemDescriptor(table) { // Don't try to get statistics for system tables (most importantly, // for table_statistics itself). return false } + return tableTypeCanHaveStats(table) +} + +// tableTypeCanHaveStats returns true if manual collection of statistics on the +// table type via CREATE STATISTICS or ANALYZE is allowed. Note that specific +// system tables may have stats collection disabled in create_stats.go. This +// function just indicates if the type of table may have stats manually +// collected. +func tableTypeCanHaveStats(table catalog.TableDescriptor) bool { if table.IsVirtualTable() { // Don't try to get statistics for virtual tables. return false