From 8ca4c7219d7a709a5e29562666cb8153ac7d27ef Mon Sep 17 00:00:00 2001 From: arulajmani Date: Fri, 19 Mar 2021 17:23:57 -0400 Subject: [PATCH] backupccl: mark type descs as dropped if there is a failure in restore Previously, when cleaning up type descriptors from a failed restore, we would directly delete the system.descriptor entry. This is bad because writing directly to the system.descriptor table means we bypass the descriptor collection, which doesn't know these descriptors have been dropped. The descriptor collection validates all uncommitted descriptors before writing them. As the descriptor collection doesn't know type descriptors have been dropped, validating cross references was always bound to fail. Put another way, if a restore from a backup which contained user defined types failed for any reason, we were bound to require manual cleanup. This patch fixes this problem by going through the descriptor collection and writing the descriptor in DROPPED state in addition to deleting the system.descriptor entry. Release note (bug fix): Fixed a bug where a failed restore from a backup including user defined types would require manual cleanup. --- pkg/ccl/backupccl/backup_test.go | 33 ++++++++++++++++++++++++++++++++ pkg/ccl/backupccl/restore_job.go | 15 +++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 9b937ec36c40..7d56c49663a9 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -6817,6 +6817,39 @@ func TestBackupDoesNotHangOnIntent(t *testing.T) { require.Error(t, tx.Commit()) } +func TestRestoreTypeDescriptorsRollBack(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + _, tc, sqlDB, _, cleanupFn := BackupRestoreTestSetup(t, singleNode, 0, InitManualReplication) + defer cleanupFn() + + for _, server := range tc.Servers { + registry := server.JobRegistry().(*jobs.Registry) + registry.TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{ + jobspb.TypeRestore: func(raw jobs.Resumer) jobs.Resumer { + r := raw.(*restoreResumer) + r.testingKnobs.beforePublishingDescriptors = func() error { + return errors.New("boom") + } + return r + }, + } + } + + sqlDB.Exec(t, ` +CREATE DATABASE db; +CREATE TYPE db.typ AS ENUM(); +CREATE TABLE db.table (k INT PRIMARY KEY, v db.typ); +`) + + // Back up the database, drop it, and restore into it. + sqlDB.Exec(t, `BACKUP DATABASE db TO 'nodelocal://0/test/'`) + sqlDB.Exec(t, `DROP DATABASE db`) + sqlDB.ExpectErr(t, "boom", `RESTORE DATABASE db FROM 'nodelocal://0/test/'`) + sqlDB.CheckQueryResults(t, `SELECT count(*) FROM system.namespace WHERE name = 'typ'`, [][]string{{"0"}}) +} + // TestRestoreResetsDescriptorVersions tests that new descriptors created while // restoring have their versions reset. Descriptors end up at version 2 after // the job is finished, since they are updated once at the end of the job to diff --git a/pkg/ccl/backupccl/restore_job.go b/pkg/ccl/backupccl/restore_job.go index 42c3a780f3d9..3bdbd1d06d3b 100644 --- a/pkg/ccl/backupccl/restore_job.go +++ b/pkg/ccl/backupccl/restore_job.go @@ -1896,6 +1896,16 @@ func (r *restoreResumer) dropDescriptors( // TypeDescriptors don't have a GC job process, so we can just write them // as dropped here. typDesc := details.TypeDescs[i] + mutType, err := descsCol.GetMutableTypeByID(ctx, txn, typDesc.ID, tree.ObjectLookupFlags{ + CommonLookupFlags: tree.CommonLookupFlags{ + AvoidCached: true, + IncludeOffline: true, + }, + }) + if err != nil { + return err + } + catalogkv.WriteObjectNamespaceEntryRemovalToBatch( ctx, b, @@ -1905,6 +1915,11 @@ func (r *restoreResumer) dropDescriptors( typDesc.Name, false, /* kvTrace */ ) + mutType.State = descpb.DescriptorState_DROP + if err := descsCol.WriteDescToBatch(ctx, false /* kvTrace */, mutType, b); err != nil { + return errors.Wrap(err, "writing dropping type to batch") + } + // Remove the system.descriptor entry. b.Del(catalogkeys.MakeDescMetadataKey(codec, typDesc.ID)) }