diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index cc5072a805b3..f67f8f89b23c 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -11,10 +11,9 @@ cloudstorage.http.custom_ca string custom root CA (appended to system's default cloudstorage.timeout duration 10m0s the timeout for import/export storage operations cluster.organization string organization name cluster.preserve_downgrade_option string disable (automatic or manual) cluster version upgrade from the specified version until reset -diagnostics.forced_sql_stat_reset.interval duration 2h0m0s interval after which SQL statement statistics are refreshed even if not collected (should be more than diagnostics.sql_stat_reset.interval). It has a max value of 24H. +diagnostics.forced_sql_stat_reset.interval duration 2h0m0s interval after which the reported SQL Stats are reset even if not collected by telemetry reporter. It has a max value of 24H. diagnostics.reporting.enabled boolean true enable reporting diagnostic metrics to cockroach labs diagnostics.reporting.interval duration 1h0m0s interval at which diagnostics data should be reported -diagnostics.sql_stat_reset.interval duration 1h0m0s interval controlling how often SQL statement statistics should be reset (should be less than diagnostics.forced_sql_stat_reset.interval). It has a max value of 24H. enterprise.license string the encoded cluster license external.graphite.endpoint string if nonempty, push server metrics to the Graphite or Carbon server at the specified host:port external.graphite.interval duration 10s the interval at which metrics are pushed to Graphite (if enabled) diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 7b4d00058fc9..b55645d5e701 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -14,10 +14,9 @@ cluster.organizationstringorganization name cluster.preserve_downgrade_optionstringdisable (automatic or manual) cluster version upgrade from the specified version until reset diagnostics.active_query_dumps.enabledbooleantrueexperimental: enable dumping of anonymized active queries to disk when node is under memory pressure -diagnostics.forced_sql_stat_reset.intervalduration2h0m0sinterval after which SQL statement statistics are refreshed even if not collected (should be more than diagnostics.sql_stat_reset.interval). It has a max value of 24H. +diagnostics.forced_sql_stat_reset.intervalduration2h0m0sinterval after which the reported SQL Stats are reset even if not collected by telemetry reporter. It has a max value of 24H. diagnostics.reporting.enabledbooleantrueenable reporting diagnostic metrics to cockroach labs diagnostics.reporting.intervalduration1h0m0sinterval at which diagnostics data should be reported -diagnostics.sql_stat_reset.intervalduration1h0m0sinterval controlling how often SQL statement statistics should be reset (should be less than diagnostics.forced_sql_stat_reset.interval). It has a max value of 24H. enterprise.licensestringthe encoded cluster license external.graphite.endpointstringif nonempty, push server metrics to the Graphite or Carbon server at the specified host:port external.graphite.intervalduration10sthe interval at which metrics are pushed to Graphite (if enabled) diff --git a/pkg/server/stats_test.go b/pkg/server/stats_test.go index 81f4cd991152..cd165579ef79 100644 --- a/pkg/server/stats_test.go +++ b/pkg/server/stats_test.go @@ -140,7 +140,7 @@ func TestEnsureSQLStatsAreFlushedForTelemetry(t *testing.T) { statusServer := s.(*TestServer).status sqlServer := s.(*TestServer).Server.sqlServer.pgServer.SQLServer - sqlServer.GetSQLStatsController().ResetLocalSQLStats(ctx) + sqlServer.GetSQLStatsProvider().(*persistedsqlstats.PersistedSQLStats).Flush(ctx) testutils.SucceedsSoon(t, func() error { // Get the diagnostic info. res, err := statusServer.Diagnostics(ctx, &serverpb.DiagnosticsRequest{NodeId: "local"}) diff --git a/pkg/settings/registry.go b/pkg/settings/registry.go index 6221d7c61736..5ece20241cf6 100644 --- a/pkg/settings/registry.go +++ b/pkg/settings/registry.go @@ -103,6 +103,7 @@ var retiredSettings = map[string]struct{}{ "kv.closed_timestamp.close_fraction": {}, "sql.telemetry.query_sampling.qps_threshold": {}, "sql.telemetry.query_sampling.sample_rate": {}, + "diagnostics.sql_stat_reset.interval": {}, // removed as of 22.1. "sql.defaults.drop_enum_value.enabled": {}, diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index f632c492da4c..8008106237f5 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -326,7 +326,6 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { metrics.StatsMetrics.ReportedSQLStatsMemoryCurBytesCount, metrics.StatsMetrics.ReportedSQLStatsMemoryMaxBytesHist, pool, - nil, /* resetInterval */ nil, /* reportedProvider */ cfg.SQLStatsTestingKnobs, ) @@ -339,7 +338,6 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { metrics.StatsMetrics.SQLStatsMemoryCurBytesCount, metrics.StatsMetrics.SQLStatsMemoryMaxBytesHist, pool, - sqlstats.SQLStatReset, reportedSQLStats, cfg.SQLStatsTestingKnobs, ) @@ -446,12 +444,11 @@ func makeMetrics(cfg *ExecutorConfig, internal bool) Metrics { // Start starts the Server's background processing. func (s *Server) Start(ctx context.Context, stopper *stop.Stopper) { s.indexUsageStats.Start(ctx, stopper) - // Start a loop to clear SQL stats at the max reset interval. This is - // to ensure that we always have some worker clearing SQL stats to avoid - // continually allocating space for the SQL stats. Additionally, spawn - // a loop to clear the reported stats at the same large interval just - // in case the telemetry worker fails. s.sqlStats.Start(ctx, stopper) + + // reportedStats is periodically cleared to prevent too many SQL Stats + // accumulated in the reporter when the telemetry server fails. + // Usually it is telemetry's reporter's job to clear the reporting SQL Stats. s.reportedStats.Start(ctx, stopper) } diff --git a/pkg/sql/sqlstats/cluster_settings.go b/pkg/sql/sqlstats/cluster_settings.go index d966b6d8819c..8655dd0e68b6 100644 --- a/pkg/sql/sqlstats/cluster_settings.go +++ b/pkg/sql/sqlstats/cluster_settings.go @@ -137,22 +137,12 @@ var MaxSQLStatsStmtFingerprintsPerExplicitTxn = settings.RegisterIntSetting( 2000, ) -// SQLStatReset is the cluster setting that controls at what interval SQL -// statement statistics should be reset. -var SQLStatReset = settings.RegisterDurationSetting( - "diagnostics.sql_stat_reset.interval", - "interval controlling how often SQL statement statistics should "+ - "be reset (should be less than diagnostics.forced_sql_stat_reset.interval). It has a max value of 24H.", - time.Hour, - settings.NonNegativeDurationWithMaximum(time.Hour*24), -).WithPublic() - // MaxSQLStatReset is the cluster setting that controls at what interval SQL // statement statistics must be flushed within. var MaxSQLStatReset = settings.RegisterDurationSetting( "diagnostics.forced_sql_stat_reset.interval", - "interval after which SQL statement statistics are refreshed even "+ - "if not collected (should be more than diagnostics.sql_stat_reset.interval). It has a max value of 24H.", - time.Hour*2, // 2 x diagnostics.sql_stat_reset.interval + "interval after which the reported SQL Stats are reset even "+ + "if not collected by telemetry reporter. It has a max value of 24H.", + time.Hour*2, settings.NonNegativeDurationWithMaximum(time.Hour*24), ).WithPublic() diff --git a/pkg/sql/sqlstats/persistedsqlstats/provider.go b/pkg/sql/sqlstats/persistedsqlstats/provider.go index 9e9f8ad7285e..911d00b6c8ed 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/provider.go +++ b/pkg/sql/sqlstats/persistedsqlstats/provider.go @@ -110,7 +110,6 @@ func New(cfg *Config, memSQLStats *sslocal.SQLStats) *PersistedSQLStats { // Start implements sqlstats.Provider interface. func (s *PersistedSQLStats) Start(ctx context.Context, stopper *stop.Stopper) { - s.SQLStats.Start(ctx, stopper) s.startSQLStatsFlushLoop(ctx, stopper) s.jobMonitor.start(ctx, stopper) } diff --git a/pkg/sql/sqlstats/sslocal/sql_stats.go b/pkg/sql/sqlstats/sslocal/sql_stats.go index f609c8a4e559..be91205b8ce2 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats.go @@ -38,10 +38,6 @@ type SQLStats struct { // fingerprints we can store in memory. uniqueTxnFingerprintLimit *settings.IntSetting - // resetInterval is the interval of how long before the in-memory stats are - // being reset. - resetInterval *settings.DurationSetting - mu struct { syncutil.Mutex @@ -78,7 +74,6 @@ func newSQLStats( curMemBytesCount *metric.Gauge, maxMemBytesHist *metric.Histogram, parentMon *mon.BytesMonitor, - resetInterval *settings.DurationSetting, flushTarget Sink, knobs *sqlstats.TestingKnobs, ) *SQLStats { @@ -95,7 +90,6 @@ func newSQLStats( st: st, uniqueStmtFingerprintLimit: uniqueStmtFingerprintLimit, uniqueTxnFingerprintLimit: uniqueTxnFingerprintLimit, - resetInterval: resetInterval, flushTarget: flushTarget, knobs: knobs, } diff --git a/pkg/sql/sqlstats/sslocal/sql_stats_test.go b/pkg/sql/sqlstats/sslocal/sql_stats_test.go index 20525dc7dc2d..3ed7a0d390a6 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats_test.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats_test.go @@ -437,7 +437,6 @@ func TestExplicitTxnFingerprintAccounting(t *testing.T) { nil, /* curMemoryBytesCount */ nil, /* maxMemoryBytesHist */ monitor, - nil, /* resetInterval */ nil, /* reportingSink */ nil, /* knobs */ ) diff --git a/pkg/sql/sqlstats/sslocal/sslocal_provider.go b/pkg/sql/sqlstats/sslocal/sslocal_provider.go index 691ed72982fc..c79d7314b88d 100644 --- a/pkg/sql/sqlstats/sslocal/sslocal_provider.go +++ b/pkg/sql/sqlstats/sslocal/sslocal_provider.go @@ -38,12 +38,11 @@ func New( curMemoryBytesCount *metric.Gauge, maxMemoryBytesHist *metric.Histogram, pool *mon.BytesMonitor, - resetInterval *settings.DurationSetting, reportingSink Sink, knobs *sqlstats.TestingKnobs, ) *SQLStats { return newSQLStats(settings, maxStmtFingerprints, maxTxnFingerprints, - curMemoryBytesCount, maxMemoryBytesHist, pool, resetInterval, + curMemoryBytesCount, maxMemoryBytesHist, pool, reportingSink, knobs) } @@ -59,18 +58,6 @@ func (s *SQLStats) GetController( // Start implements sqlstats.Provider interface. func (s *SQLStats) Start(ctx context.Context, stopper *stop.Stopper) { - if s.resetInterval != nil { - s.periodicallyClearSQLStats(ctx, stopper, s.resetInterval) - } - // Start a loop to clear SQL stats at the max reset interval. This is - // to ensure that we always have some worker clearing SQL stats to avoid - // continually allocating space for the SQL stats. - s.periodicallyClearSQLStats(ctx, stopper, sqlstats.MaxSQLStatReset) -} - -func (s *SQLStats) periodicallyClearSQLStats( - ctx context.Context, stopper *stop.Stopper, resetInterval *settings.DurationSetting, -) { // We run a periodic async job to clean up the in-memory stats. _ = stopper.RunAsyncTask(ctx, "sql-stats-clearer", func(ctx context.Context) { var timer timeutil.Timer @@ -79,13 +66,13 @@ func (s *SQLStats) periodicallyClearSQLStats( last := s.mu.lastReset s.mu.Unlock() - next := last.Add(resetInterval.Get(&s.st.SV)) + next := last.Add(sqlstats.MaxSQLStatReset.Get(&s.st.SV)) wait := next.Sub(timeutil.Now()) if wait < 0 { err := s.Reset(ctx) if err != nil { if log.V(1) { - log.Warningf(ctx, "reported SQL stats memory limit has been exceeded, some fingerprints stats are discarded: %s", err) + log.Warningf(ctx, "unexpected error: %s", err) } } } else {