Skip to content

Commit

Permalink
stats: automatically delete stats for dropped tables
Browse files Browse the repository at this point in the history
This commit adds another task to the stats refresher that periodically
runs a query to delete stats for dropped tables from the system table.
By default, this query runs once an hour, but this can be configured via
a cluster setting (which also exposes a way to disable this new "stats
garbage collector" when it is set to 0). The query also limits the
number of dropped tables to process at once to 1000 by default
(controled via another cluster setting). The rationale for introducing
the limit is to prevent a huge DELETE when the cluster that has been
running for long time with many dropped tables has just upgraded to the
binary with this fix.

Release note (bug fix): CockroachDB now automatically deletes statistics
for dropped tables from `system.table_statistics` table.
  • Loading branch information
yuzefovich committed Jun 23, 2023
1 parent 320e861 commit 00388a1
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/sql/stats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ go_test(
"automatic_stats_manual_test.go",
"automatic_stats_test.go",
"create_stats_job_test.go",
"delete_stats_dropped_table_test.go",
"delete_stats_test.go",
"forecast_test.go",
"histogram_test.go",
Expand Down Expand Up @@ -114,6 +115,7 @@ go_test(
"//pkg/sql/sem/catid",
"//pkg/sql/sem/eval",
"//pkg/sql/sem/tree",
"//pkg/sql/tests",
"//pkg/sql/types",
"//pkg/testutils",
"//pkg/testutils/jobutils",
Expand Down
72 changes: 72 additions & 0 deletions pkg/sql/stats/automatic_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,28 @@ var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting {
return s
}()

// statsGarbageCollectionInterval controls the interval between running an
// internal query to delete stats for dropped tables.
var statsGarbageCollectionInterval = settings.RegisterDurationSetting(
settings.TenantWritable,
"sql.stats.garbage_collection_interval",
"interval between deleting stats for dropped tables, set to 0 to disable",
time.Hour,
settings.NonNegativeDuration,
)

// statsGarbageCollectionLimit controls the limit on the number of dropped
// tables that we delete stats for as part of the single "garbage sweep" (those
// beyond the limit will need to wait out statsGarbageCollectionInterval until
// the next "sweep").
var statsGarbageCollectionLimit = settings.RegisterIntSetting(
settings.TenantWritable,
"sql.stats.garbage_collection_limit",
"limit on the number of dropped tables that stats are deleted for as part of a single statement",
1000,
settings.PositiveInt,
)

// DefaultRefreshInterval is the frequency at which the Refresher will check if
// the stats for each table should be refreshed. It is mutable for testing.
// NB: Updates to this value after Refresher.Start has been called will not
Expand Down Expand Up @@ -555,6 +577,56 @@ func (r *Refresher) Start(
r.startedTasksWG.Done()
log.Warningf(ctx, "refresher task failed to start: %v", err)
}
// Start another task that will periodically run an internal query to delete
// stats for dropped tables.
r.startedTasksWG.Add(1)
if err := stopper.RunAsyncTask(bgCtx, "stats-garbage-collector", func(ctx context.Context) {
defer r.startedTasksWG.Done()
intervalChangedCh := make(chan struct{}, 1)
// The stats-garbage-collector task is started only once globally, so
// we'll only add a single OnChange callback.
statsGarbageCollectionInterval.SetOnChange(&r.st.SV, func(ctx context.Context) {
select {
case intervalChangedCh <- struct{}{}:
default:
}
})
for {
interval := statsGarbageCollectionInterval.Get(&r.st.SV)
if interval == 0 {
// Zero interval disables the stats garbage collector, so we
// block until either it is enabled again or the node is
// quiescing.
select {
case <-intervalChangedCh:
continue
case <-r.drainAutoStats:
log.Infof(ctx, "draining stats garbage collector")
return
case <-stopper.ShouldQuiesce():
log.Infof(ctx, "quiescing stats garbage collector")
return
}
}
select {
case <-time.After(interval):
case <-intervalChangedCh:
continue
case <-r.drainAutoStats:
log.Infof(ctx, "draining stats garbage collector")
return
case <-stopper.ShouldQuiesce():
log.Infof(ctx, "quiescing stats garbage collector")
return
}
if err := deleteStatsForDroppedTables(ctx, r.ex, statsGarbageCollectionLimit.Get(&r.st.SV)); err != nil {
log.Warningf(ctx, "stats-garbage-collector encountered an error when deleting stats: %v", err)
}
}
}); err != nil {
r.startedTasksWG.Done()
log.Warningf(ctx, "stats-garbage-collector task failed to start: %v", err)
}
return nil
}

Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/stats/delete_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,15 @@ func DeleteOldStatsForOtherColumns(
)
return err
}

// deleteStatsForDroppedTables deletes all statistics for at most 'limit' number
// of dropped tables.
func deleteStatsForDroppedTables(ctx context.Context, ex isql.Executor, limit int64) error {
_, err := ex.Exec(
ctx, "delete-statistics-for-dropped-tables", nil, /* txn */
fmt.Sprintf(`DELETE FROM system.table_statistics
WHERE "tableID" NOT IN (SELECT table_id FROM crdb_internal.tables)
LIMIT %d`, limit),
)
return err
}
74 changes: 74 additions & 0 deletions pkg/sql/stats/delete_stats_dropped_table_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package stats_test

import (
"context"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/sql/tests"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
)

// TestStatsAreDeletedForDroppedTables ensures that statistics for dropped
// tables are automatically deleted.
func TestStatsAreDeletedForDroppedTables(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

params, _ := tests.CreateTestServerParams()
params.ScanMaxIdleTime = time.Millisecond // speed up MVCC GC queue scans
s, sqlDB, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(context.Background())
runner := sqlutils.MakeSQLRunner(sqlDB)

// Disable auto stats so that it doesn't interfere.
runner.Exec(t, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;")
// Lower the garbage collection interval to speed up the test.
runner.Exec(t, "SET CLUSTER SETTING sql.stats.garbage_collection_interval = '1s';")
// Poll for MVCC GC more frequently.
runner.Exec(t, "SET CLUSTER SETTING sql.gc_job.wait_for_gc.interval = '1s';")
// Cached protected timestamp state delays MVCC GC, update it every second.
runner.Exec(t, "SET CLUSTER SETTING kv.protectedts.poll_interval = '1s';")

// Create a table with short TTL and collect stats on it.
runner.Exec(t, "CREATE TABLE t (k PRIMARY KEY) AS SELECT 1;")
runner.Exec(t, "ALTER TABLE t CONFIGURE ZONE USING gc.ttlseconds = 1;")
runner.Exec(t, "ANALYZE t;")

r := runner.QueryRow(t, "SELECT 't'::regclass::oid")
var tableID int
r.Scan(&tableID)

// Ensure that we see a single statistic for the table.
var count int
runner.QueryRow(t, `SELECT count(*) FROM system.table_statistics WHERE "tableID" = $1;`, tableID).Scan(&count)
if count != 1 {
t.Fatalf("expected a single statistic for table 't', found %d", count)
}

// Now drop the table and make sure that the table statistic is deleted
// promptly.
runner.Exec(t, "DROP TABLE t;")
testutils.SucceedsSoon(t, func() error {
runner.QueryRow(t, `SELECT count(*) FROM system.table_statistics WHERE "tableID" = $1;`, tableID).Scan(&count)
if count != 0 {
return errors.Newf("expected no stats for the dropped table, found %d statistics", count)
}
return nil
})
}

0 comments on commit 00388a1

Please sign in to comment.