Skip to content

Commit

Permalink
doctor: skip validation for dropped descriptors
Browse files Browse the repository at this point in the history
In some cases, dropped descriptors appear in our `system.descriptors`
table with dangling job mutations without an associated job.
This patch teaches debug doctor examine to skip validation
on such dropped descriptors.

Epic: none

Fixes: cockroachdb#123477
Fixes: cockroachdb#122956

Release note: none
  • Loading branch information
annrpom committed May 7, 2024
1 parent 4416ff0 commit 26388b0
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 12 deletions.
7 changes: 3 additions & 4 deletions pkg/cli/doctor.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ func fromZipDir(
return errors.Wrap(err, "failed unmarshalling job payload")
}

// when can progress be null? if it exists but we have never started it?
// Skip NULL job payloads.
if fields[5] == "NULL" {
return nil
}
Expand All @@ -554,9 +554,8 @@ func fromZipDir(
Payload string `json:"hex_payload"`
Progress string `json:"hex_progress"`
}, 0)
// TODO(before merge): when does this happen?
if err := parseJSONFile(zipDirPath, "system.jobs.json", &jobsTableJSON); err != nil {
return nil, nil, nil, errors.Wrapf(err, "failed to parse system.jobs.json")
if err := parseJSONFile(zipDirPath, "crdb_internal.system_jobs.json", &jobsTableJSON); err != nil {
return nil, nil, nil, errors.Wrapf(err, "failed to parse crdb_internal.system_jobs.json")
}
for _, job := range jobsTableJSON {
row := jobs.JobMetadata{
Expand Down
55 changes: 47 additions & 8 deletions pkg/cli/doctor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ import (
"github.com/cockroachdb/datadriven"
)

var descriptorWithFKMutation = `'{
func descriptorWithFKMutation(isDropped bool) string {
dropState := ``
dropTime := ``
if isDropped {
dropState = `"state": "DROP",`
dropTime = `"dropTime": "1713940113794672911",`
}
return `'{
"table": {
"columns": [
{
Expand All @@ -32,8 +39,9 @@ var descriptorWithFKMutation = `'{
"width": 64
}
}
],
"createAsOfTime": {
],` +
dropTime +
`"createAsOfTime": {
"logical": 1,
"wallTime": "1713940112376217646"
},
Expand Down Expand Up @@ -133,11 +141,13 @@ var descriptorWithFKMutation = `'{
},
"replacementOf": {
"time": {}
},
"unexposedParentSchemaId": 381,
},` +
dropState +
`"unexposedParentSchemaId": 381,
"version": "2"
}
}'`
}

// This test doctoring a secure cluster.
func TestDoctorCluster(t *testing.T) {
Expand Down Expand Up @@ -177,13 +187,13 @@ func TestDoctorClusterBroken(t *testing.T) {
c := NewCLITest(TestCLIParams{T: t})
defer c.Cleanup()

jobDesc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+
"crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation)
desc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+
"crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation(false /* isDropped */))

// Introduce a descriptor with an attached job mutation (along with other issues).
c.RunWithArgs([]string{"sql", "-e", strings.Join([]string{
"CREATE TABLE foo (i INT)",
jobDesc,
desc,
}, ";\n"),
})

Expand All @@ -200,6 +210,35 @@ func TestDoctorClusterBroken(t *testing.T) {
})
}

// TestDoctorClusterDropped tests that debug doctor examine will avoid validating dropped descriptors.
func TestDoctorClusterDropped(t *testing.T) {
defer leaktest.AfterTest(t)()
c := NewCLITest(TestCLIParams{T: t})
defer c.Cleanup()

desc := fmt.Sprintf("SELECT crdb_internal.unsafe_upsert_descriptor('foo'::regclass::oid::int,"+
"crdb_internal.json_to_pb('cockroach.sql.sqlbase.Descriptor', %s::jsonb), true)", descriptorWithFKMutation(true /* isDropped */))

// Introduce a dropped descriptor with an attached job mutation (along with other issues).
c.RunWithArgs([]string{"sql", "-e", strings.Join([]string{
"CREATE TABLE foo (i INT)",
desc,
}, ";\n"),
})

t.Run("examine", func(t *testing.T) {
out, err := c.RunWithCapture("debug doctor examine cluster")
if err != nil {
t.Fatal(err)
}

// Using datadriven allows TESTFLAGS=-rewrite.
datadriven.RunTest(t, datapathutils.TestDataPath(t, "doctor", "test_examine_cluster_dropped"), func(t *testing.T, td *datadriven.TestData) string {
return out
})
})
}

// TestDoctorZipDir tests the operation of zip over secure clusters.
func TestDoctorZipDir(t *testing.T) {
defer leaktest.AfterTest(t)()
Expand Down
6 changes: 6 additions & 0 deletions pkg/cli/testdata/doctor/test_examine_cluster_dropped
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
debug doctor examine cluster
----
debug doctor examine cluster
Examining 57 descriptors and 58 namespace entries...
Examining 2 jobs...
No problems found!
4 changes: 4 additions & 0 deletions pkg/sql/doctor/doctor.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ func ExamineDescriptors(
for _, row := range descTable {
id := descpb.ID(row.ID)
desc := descLookupFn(id)
// No need to validate dropped descriptors
if desc.Dropped() {
continue
}
ve := cb.ValidateWithRecover(ctx, version, desc)
for _, err := range ve {
problemsFound = true
Expand Down

0 comments on commit 26388b0

Please sign in to comment.