From 483722d44b0c2f7b8e2f9d2865070d104a79d356 Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Thu, 2 May 2024 15:10:46 -0400 Subject: [PATCH] doctor: skip validation for dropped descriptors In some cases, dropped descriptors appear in our `system.descriptors` table with dangling job mutations without an associated job. This patch teaches debug doctor examine to skip validation on such dropped descriptors. Epic: none Fixes: #123477 Fixes: #122956 Release note: none --- pkg/cli/doctor_test.go | 55 ++++++++++++++++--- .../doctor/test_examine_cluster_dropped | 6 ++ pkg/sql/doctor/doctor.go | 4 ++ 3 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 pkg/cli/testdata/doctor/test_examine_cluster_dropped diff --git a/pkg/cli/doctor_test.go b/pkg/cli/doctor_test.go index 5cbd02f5dce9..08abac4f391a 100644 --- a/pkg/cli/doctor_test.go +++ b/pkg/cli/doctor_test.go @@ -20,7 +20,14 @@ import ( "github.com/cockroachdb/datadriven" ) -var descriptorWithFKMutation = `'{ +func descriptorWithFKMutation(isDropped bool) string { + dropState := `` + dropTime := `` + if isDropped { + dropState = `"state": "DROP",` + dropTime = `"dropTime": "1713940113794672911",` + } + return `'{ "table": { "columns": [ { @@ -32,8 +39,9 @@ var descriptorWithFKMutation = `'{ "width": 64 } } - ], - "createAsOfTime": { + ],` + + dropTime + + `"createAsOfTime": { "logical": 1, "wallTime": "1713940112376217646" }, @@ -133,11 +141,13 @@ var descriptorWithFKMutation = `'{ }, "replacementOf": { "time": {} - }, - "unexposedParentSchemaId": 381, + },` + + dropState + + `"unexposedParentSchemaId": 381, "version": "2" } }'` +} // This test doctoring a secure cluster. func TestDoctorCluster(t *testing.T) { @@ -177,13 +187,13 @@ func TestDoctorClusterBroken(t *testing.T) { c := NewCLITest(TestCLIParams{T: t}) defer c.Cleanup() - jobDesc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+ - "crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation) + desc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+ + "crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation(false /* isDropped */)) // Introduce a descriptor with an attached job mutation (along with other issues). c.RunWithArgs([]string{"sql", "-e", strings.Join([]string{ "CREATE TABLE foo (i INT)", - jobDesc, + desc, }, ";\n"), }) @@ -200,6 +210,35 @@ func TestDoctorClusterBroken(t *testing.T) { }) } +// TestDoctorClusterDropped tests that debug doctor examine will avoid validating dropped descriptors. +func TestDoctorClusterDropped(t *testing.T) { + defer leaktest.AfterTest(t)() + c := NewCLITest(TestCLIParams{T: t}) + defer c.Cleanup() + + desc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+ + "crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation(true /* isDropped */)) + + // Introduce a dropped descriptor with an attached job mutation (along with other issues). + c.RunWithArgs([]string{"sql", "-e", strings.Join([]string{ + "CREATE TABLE foo (i INT)", + desc, + }, ";\n"), + }) + + t.Run("examine", func(t *testing.T) { + out, err := c.RunWithCapture("debug doctor examine cluster") + if err != nil { + t.Fatal(err) + } + + // Using datadriven allows TESTFLAGS=-rewrite. + datadriven.RunTest(t, datapathutils.TestDataPath(t, "doctor", "test_examine_cluster_dropped"), func(t *testing.T, td *datadriven.TestData) string { + return out + }) + }) +} + // TestDoctorZipDir tests the operation of zip over secure clusters. func TestDoctorZipDir(t *testing.T) { defer leaktest.AfterTest(t)() diff --git a/pkg/cli/testdata/doctor/test_examine_cluster_dropped b/pkg/cli/testdata/doctor/test_examine_cluster_dropped new file mode 100644 index 000000000000..d6924a745c3e --- /dev/null +++ b/pkg/cli/testdata/doctor/test_examine_cluster_dropped @@ -0,0 +1,6 @@ +debug doctor examine cluster +---- +debug doctor examine cluster +Examining 57 descriptors and 58 namespace entries... +Examining 2 jobs... +No problems found! diff --git a/pkg/sql/doctor/doctor.go b/pkg/sql/doctor/doctor.go index 6ab289ab0c9c..c31293c43c0c 100644 --- a/pkg/sql/doctor/doctor.go +++ b/pkg/sql/doctor/doctor.go @@ -189,6 +189,10 @@ func ExamineDescriptors( for _, row := range descTable { id := descpb.ID(row.ID) desc := descLookupFn(id) + // No need to validate dropped descriptors + if desc.Dropped() { + continue + } ve := cb.ValidateWithRecover(ctx, version, desc) for _, err := range ve { problemsFound = true