From d84da0c028b2be6ae0d745ac8c2d40b42324f27e Mon Sep 17 00:00:00 2001 From: Chengxiong Ruan Date: Tue, 28 Mar 2023 21:28:55 -0400 Subject: [PATCH] sql: mark index as GCed if table has been GCed in legacy gc path Previously, if a table is GCed before an index is GCed by a TRUNCATE TABLE gc job, the TRUNCATE TABLE gc job can be stuck in running status because the table descriptor is missing. This is problematic because these jobs will never succeed and doing nothing. This commit marks the indexes as GCed if the descriptor cannot be found assuming that the table has been GCed. Also, table GC should have GCed all the indexes. Note that this only affect the legacy GC path. Epic: None Release note (bug fix): This commit fixes a bug where TRUNCATE TABLE gc job can be stuck in running status if the table descriptor has been GCed. It was because TRUNCATE TABLE actually creates new empty indexes, then replaces and drops the old indexes. The dropped indexes data are deleted and GCed within the TRUNCATE TABLE gc job which needed to see the table descriptor to make progress. But, if the table data has been GCed, the TRUNCATE TABLE gc job couldn't make progress. This patch makes it able to handle the missing descriptor edge case and let the TRUNCATE TABLE gc job succeed. --- pkg/sql/gcjob/refresh_statuses.go | 3 ++ pkg/sql/gcjob_test/BUILD.bazel | 1 + pkg/sql/gcjob_test/gc_job_test.go | 68 +++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/pkg/sql/gcjob/refresh_statuses.go b/pkg/sql/gcjob/refresh_statuses.go index fdd028663070..4505dbc4f292 100644 --- a/pkg/sql/gcjob/refresh_statuses.go +++ b/pkg/sql/gcjob/refresh_statuses.go @@ -142,6 +142,9 @@ func updateStatusForGCElements( if errors.Is(err, catalog.ErrDescriptorNotFound) { log.Warningf(ctx, "table %d not found, marking as GC'd", tableID) markTableGCed(ctx, tableID, progress) + for indexID := range indexDropTimes { + markIndexGCed(ctx, indexID, progress) + } return false, true, maxDeadline } log.Warningf(ctx, "error while calculating GC time for table %d, err: %+v", tableID, err) diff --git a/pkg/sql/gcjob_test/BUILD.bazel b/pkg/sql/gcjob_test/BUILD.bazel index 94330318f6eb..62cea74e203c 100644 --- a/pkg/sql/gcjob_test/BUILD.bazel +++ b/pkg/sql/gcjob_test/BUILD.bazel @@ -29,6 +29,7 @@ go_test( "//pkg/sql/catalog/tabledesc", "//pkg/sql/gcjob", "//pkg/sql/gcjob/gcjobnotifier", + "//pkg/sql/tests", "//pkg/testutils", "//pkg/testutils/jobutils", "//pkg/testutils/serverutils", diff --git a/pkg/sql/gcjob_test/gc_job_test.go b/pkg/sql/gcjob_test/gc_job_test.go index 4f38151f4377..90fa847db366 100644 --- a/pkg/sql/gcjob_test/gc_job_test.go +++ b/pkg/sql/gcjob_test/gc_job_test.go @@ -37,6 +37,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" "github.com/cockroachdb/cockroach/pkg/sql/gcjob" "github.com/cockroachdb/cockroach/pkg/sql/gcjob/gcjobnotifier" + "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" @@ -720,3 +721,70 @@ func (f *fakeSystemConfigProvider) numCalls() int { } var _ config.SystemConfigProvider = (*fakeSystemConfigProvider)(nil) + +func TestLegacyIndexGCSucceedsWithMissingDescriptor(t *testing.T) { + defer leaktest.AfterTest(t)() + params, _ := tests.CreateTestServerParams() + params.Knobs.JobsTestingKnobs = jobs.NewTestingKnobsWithShortIntervals() + + s, sqlDB, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(context.Background()) + tDB := sqlutils.MakeSQLRunner(sqlDB) + + tDB.Exec(t, `CREATE TABLE t(a INT)`) + tDB.Exec(t, `INSERT INTO t VALUES (1), (2)`) + tDB.Exec(t, `TRUNCATE TABLE t`) + + var truncateJobID string + testutils.SucceedsSoon(t, func() error { + rslt := tDB.QueryStr(t, `SELECT job_id, status, running_status FROM [SHOW JOBS] WHERE description = 'GC for TRUNCATE TABLE defaultdb.public.t'`) + if len(rslt) != 1 { + t.Fatalf("expect only 1 truncate job, found %d", len(rslt)) + } + if rslt[0][1] != "running" { + return errors.New("job not running yet") + } + if rslt[0][2] != "waiting for GC TTL" { + return errors.New("not waiting for gc yet") + } + truncateJobID = rslt[0][0] + return nil + }) + + tDB.Exec(t, `PAUSE JOB `+truncateJobID) + testutils.SucceedsSoon(t, func() error { + rslt := tDB.QueryStr(t, `SELECT status FROM [SHOW JOBS] WHERE job_id = `+truncateJobID) + if len(rslt) != 1 { + t.Fatalf("expect only 1 truncate job, found %d", len(rslt)) + } + if rslt[0][0] != "paused" { + return errors.New("job not paused yet") + } + return nil + }) + + tDB.Exec(t, `ALTER TABLE t CONFIGURE ZONE USING gc.ttlseconds = 1;`) + tDB.Exec(t, `DROP TABLE t`) + testutils.SucceedsSoon(t, func() error { + rslt := tDB.QueryStr(t, `SELECT status FROM [SHOW JOBS] WHERE description = 'GC for DROP TABLE defaultdb.public.t'`) + if len(rslt) != 1 { + t.Fatalf("expect only 1 truncate job, found %d", len(rslt)) + } + if rslt[0][0] != "succeeded" { + return errors.New("job not running yet") + } + return nil + }) + + tDB.Exec(t, `RESUME JOB `+truncateJobID) + testutils.SucceedsSoon(t, func() error { + rslt := tDB.QueryStr(t, `SELECT status FROM [SHOW JOBS] WHERE job_id = `+truncateJobID) + if len(rslt) != 1 { + t.Fatalf("expect only 1 truncate job, found %d", len(rslt)) + } + if rslt[0][0] != "succeeded" { + return errors.New("job not running") + } + return nil + }) +}