From ebac81d9c110495c919d6b383f715f8689fad06b Mon Sep 17 00:00:00 2001 From: Lucy Zhang Date: Thu, 25 Feb 2021 19:36:16 -0500 Subject: [PATCH] sql: fix reverting schema changes for databases and schemas Previously, we were failing to handle the case where a schema change job to drop or rename a database or schema entered the reverting state. In `OnFailOrCancel`, we were always trying to look up the descriptor associated with the job assuming it was a table. This led to a panic in 20.2. On master we get return an internal error `relation [id] not found`. We also had the preexisting behavior that when dropping a database, we would return nil from `OnFailOrCancel` without indicating anything was wrong. This is undesirable, since it's abnormal for these jobs to enter the reverting state, and we can't actually either revert the schema change or clean up. This PR adds checks at the top of `OnFailOrCancel` so that we now exit early with an error if the descriptor is not a table, and makes the behavior for all database and schema jobs consistent. Release justification: Fixes for high-priority or high-severity bugs in existing functionality Release note (bug fix): Fixed a bug where schema changes on databases and schemas could return a `relation [] does not exist` if they failed or were canceled and entered the reverting state. These jobs are not actually possible to revert. With this change, the correct error causing the job to fail will be returned, and the job will enter the failed state with an error indicating that the job could not be reverted. --- pkg/sql/schema_changer.go | 18 ++++--- pkg/sql/schema_changer_test.go | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 6 deletions(-) diff --git a/pkg/sql/schema_changer.go b/pkg/sql/schema_changer.go index cb000bfca918..e1b2562030fb 100644 --- a/pkg/sql/schema_changer.go +++ b/pkg/sql/schema_changer.go @@ -719,13 +719,20 @@ func (sc *SchemaChanger) handlePermanentSchemaChangeError( ctx context.Context, err error, evalCtx *extendedEvalContext, ) error { - // Ensure that this mutation is first in line prior to reverting. + // Ensure that this is a table descriptor and that the mutation is first in + // line prior to reverting. { // Pull out the requested descriptor. desc, descErr := sc.getTargetDescriptor(ctx) if descErr != nil { return descErr } + // Currently we don't attempt to roll back schema changes for anything other + // than tables. For jobs intended to drop other types of descriptors, we do + // nothing. + if _, ok := desc.(catalog.TableDescriptor); !ok { + return errors.Newf("schema change jobs on databases and schemas cannot be reverted") + } // Check that we aren't queued behind another schema changer. if err := sc.notFirstInLine(ctx, desc); err != nil { @@ -2311,12 +2318,11 @@ func (r schemaChangeResumer) OnFailOrCancel(ctx context.Context, execCtx interfa p := execCtx.(JobExecContext) details := r.job.Details().(jobspb.SchemaChangeDetails) - if details.DroppedDatabaseID != descpb.InvalidID { - // TODO (lucy): Do we need to do anything here? - return nil - } + // If this is a schema change to drop a database or schema, DescID will be + // unset. We cannot revert such schema changes, so just exit early with an + // error. if details.DescID == descpb.InvalidID { - return errors.AssertionFailedf("job has no database ID or table ID") + return errors.Newf("schema change jobs on databases and schemas cannot be reverted") } sc := SchemaChanger{ descID: details.DescID, diff --git a/pkg/sql/schema_changer_test.go b/pkg/sql/schema_changer_test.go index 0ad1fbda4af5..069c5ba0f708 100644 --- a/pkg/sql/schema_changer_test.go +++ b/pkg/sql/schema_changer_test.go @@ -6850,3 +6850,99 @@ AND descriptor_ids[1] = 'db.t2'::regclass::int`, require.Equal(t, status, jobs.StatusCanceled) require.Equal(t, error, "job canceled by user") } + +// TestRevertingJobsOnDatabasesAndSchemas tests that schema change jobs on +// databases and schemas return an error from the OnFailOrCancel hook. +// Regression test for #59415. +func TestRevertingJobsOnDatabasesAndSchemas(t *testing.T) { + defer leaktest.AfterTest(t)() + defer sqltestutils.SetTestJobsAdoptInterval()() + ctx := context.Background() + + var s serverutils.TestServerInterface + params, _ := tests.CreateTestServerParams() + params.Knobs = base.TestingKnobs{ + SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{ + RunBeforeResume: func(jobID jobspb.JobID) error { + scJob, err := s.JobRegistry().(*jobs.Registry).LoadJob(ctx, jobID) + if err != nil { + return err + } + pl := scJob.Payload() + // This is a hacky way to only inject errors in the rename/drop/grant jobs. + if strings.Contains(pl.Description, "updating parent database") { + return nil + } + for _, s := range []string{"DROP", "RENAME", "updating privileges"} { + if strings.Contains(pl.Description, s) { + return errors.New("injected permanent error") + } + } + return nil + }, + }, + } + var db *gosql.DB + s, db, _ = serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + sqlDB := sqlutils.MakeSQLRunner(db) + + for _, tc := range []struct { + name string + setupStmts string + scStmt string + jobRegex string + }{ + { + name: "drop schema", + setupStmts: `CREATE DATABASE db_drop_schema; CREATE SCHEMA db_drop_schema.sc;`, + scStmt: `DROP SCHEMA db_drop_schema.sc`, + jobRegex: `^DROP SCHEMA db_drop_schema.sc$`, + }, + { + name: "rename schema", + setupStmts: `CREATE DATABASE db_rename_schema; CREATE SCHEMA db_rename_schema.sc;`, + scStmt: `ALTER SCHEMA db_rename_schema.sc RENAME TO new_name`, + jobRegex: `^ALTER SCHEMA db_rename_schema.sc RENAME TO new_name$`, + }, + { + name: "grant on schema", + setupStmts: `CREATE DATABASE db_grant_on_schema; CREATE SCHEMA db_grant_on_schema.sc;`, + scStmt: `GRANT ALL ON SCHEMA db_grant_on_schema.sc TO PUBLIC`, + jobRegex: `updating privileges for schema`, + }, + { + name: "drop database cascade", + setupStmts: `CREATE DATABASE db_drop; CREATE SCHEMA db_drop.sc; CREATE TABLE db_drop.sc.tbl();`, + scStmt: `DROP DATABASE db_drop CASCADE`, + jobRegex: `^DROP DATABASE db_drop CASCADE$`, + }, + { + name: "rename database", + setupStmts: `CREATE DATABASE db_rename;`, + scStmt: `ALTER DATABASE db_rename RENAME TO db_new_name`, + jobRegex: `^ALTER DATABASE db_rename RENAME TO db_new_name$`, + }, + { + name: "grant on database", + setupStmts: `CREATE DATABASE db_grant`, + scStmt: `GRANT ALL ON DATABASE db_grant TO PUBLIC`, + jobRegex: `updating privileges for database`, + }, + } { + t.Run(tc.name, func(t *testing.T) { + sqlDB.Exec(t, tc.setupStmts) + sqlDB.ExpectErr(t, "injected permanent error", tc.scStmt) + result := sqlDB.QueryStr(t, + `SELECT status, error FROM crdb_internal.jobs WHERE description ~ $1`, + tc.jobRegex) + require.Len(t, result, 1) + status, jobError := result[0][0], result[0][1] + require.Equal(t, string(jobs.StatusFailed), status) + require.Regexp(t, + "cannot be reverted, manual cleanup may be required: "+ + "schema change jobs on databases and schemas cannot be reverted", + jobError) + }) + } +}