Skip to content

Commit

Permalink
backupccl: add all new restored descriptors in offline state
Browse files Browse the repository at this point in the history
This change updates the RESTORE job to add all new descriptors prior to
restoring data in the OFFLINE state, and update their state to PUBLIC
after data has been restored. This makes all descriptors behave like
tables.

Release justification: Low risk, high benefit changes to existing
functionality (for database descriptors; other changes apply only to new
functionality)

Release note (sql change): Databases being restored will now be in the
offline state, invisible to users, until the data has been restored.
This is the same as the existing behavior for restored tables. (This
change is also applied to enums and user-defined schemas being restored,
which is a change relative to only the 20.2 alpha releases.)
  • Loading branch information
thoszhang committed Sep 9, 2020
1 parent 2b2dfb9 commit d8c6d8a
Show file tree
Hide file tree
Showing 4 changed files with 707 additions and 353 deletions.
271 changes: 271 additions & 0 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
Expand Down Expand Up @@ -6037,6 +6038,276 @@ func TestBackupDoesNotHangOnIntent(t *testing.T) {
require.Error(t, tx.Commit())
}

// TestRestoreResetsDescriptorVersions tests that new descriptors created while
// restoring have their versions reset. Descriptors end up at version 2 after
// the job is finished, since they are updated once at the end of the job to
// make them public.
func TestRestoreResetsDescriptorVersions(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

_, tc, sqlDB, _, cleanupFn := BackupRestoreTestSetup(t, singleNode, 0, InitNone)
defer cleanupFn()
kvDB := tc.Server(0).DB()

// Create some descriptors and do some schema changes to bump their versions.
sqlDB.Exec(t, `
CREATE DATABASE d_old;
ALTER DATABASE d_old RENAME TO d;
USE d;
CREATE SCHEMA sc_old;
ALTER SCHEMA sc_old RENAME TO sc;
CREATE TABLE sc.tb (x INT);
ALTER TABLE sc.tb ADD COLUMN a INT;
CREATE TYPE sc.typ AS ENUM ('hello');
ALTER TYPE sc.typ ADD VALUE 'hi';
`)
// Back up the database.
sqlDB.Exec(t, `BACKUP DATABASE d TO 'nodelocal://0/test/'`)

// Drop the database and restore into it.
sqlDB.Exec(t, `DROP DATABASE d`)
sqlDB.Exec(t, `RESTORE DATABASE d FROM 'nodelocal://0/test/'`)

dbDesc := catalogkv.TestingGetDatabaseDescriptor(kvDB, keys.SystemSQLCodec, "d")
require.EqualValues(t, 2, dbDesc.Version)

schemaDesc := catalogkv.TestingGetSchemaDescriptor(kvDB, keys.SystemSQLCodec, dbDesc.GetID(), "sc")
require.EqualValues(t, 2, schemaDesc.Version)

tableDesc := catalogkv.TestingGetTableDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "d", "sc", "tb")
require.EqualValues(t, 2, tableDesc.Version)

typeDesc := catalogkv.TestingGetTypeDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "d", "sc", "typ")
require.EqualValues(t, 2, typeDesc.Version)
}

func TestOfflineDescriptorsDuringRestore(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Protects beforePublishingNotification, continueNotification.
var mu syncutil.Mutex
// Closed when the restore job has reached the point right before publishing
// descriptors.
beforePublishingNotification := make(chan struct{})
// Closed when we're ready to resume with the restore.
continueNotification := make(chan struct{})
initBackfillNotification := func() (chan struct{}, chan struct{}) {
mu.Lock()
defer mu.Unlock()
beforePublishingNotification = make(chan struct{})
continueNotification = make(chan struct{})
return beforePublishingNotification, continueNotification
}
notifyBackfill := func(ctx context.Context) {
mu.Lock()
defer mu.Unlock()
if beforePublishingNotification != nil {
close(beforePublishingNotification)
beforePublishingNotification = nil
}
select {
case <-continueNotification:
return
case <-ctx.Done():
return
}
}

t.Run("restore-database", func(t *testing.T) {
ctx, tc, sqlDB, _, cleanupFn := BackupRestoreTestSetup(t, singleNode, 0, InitNone)
defer cleanupFn()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
kvDB := tc.Server(0).DB()

for _, server := range tc.Servers {
registry := server.JobRegistry().(*jobs.Registry)
registry.TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{
jobspb.TypeRestore: func(raw jobs.Resumer) jobs.Resumer {
r := raw.(*restoreResumer)
r.testingKnobs.beforePublishingDescriptors = func() error {
notifyBackfill(ctx)
return nil
}
return r
},
}
}

sqlDB.Exec(t, `
CREATE DATABASE d;
USE d;
CREATE SCHEMA sc;
CREATE TABLE sc.tb (x INT);
CREATE TYPE sc.typ AS ENUM ('hello');
`)

// Back up the database.
sqlDB.Exec(t, `BACKUP DATABASE d TO 'nodelocal://0/test/'`)

// Drop the database and restore into it.
sqlDB.Exec(t, `DROP DATABASE d`)

beforePublishingNotif, continueNotif := initBackfillNotification()
g := ctxgroup.WithContext(ctx)
g.GoCtx(func(ctx context.Context) error {
if _, err := sqlDB.DB.ExecContext(ctx, `RESTORE DATABASE d FROM 'nodelocal://0/test/'`); err != nil {
t.Fatal(err)
}
return nil
})

<-beforePublishingNotif

// Verify that the descriptors are offline.

dbDesc := catalogkv.TestingGetDatabaseDescriptor(kvDB, keys.SystemSQLCodec, "d")
require.Equal(t, descpb.DescriptorState_OFFLINE, dbDesc.State)

schemaDesc := catalogkv.TestingGetSchemaDescriptor(kvDB, keys.SystemSQLCodec, dbDesc.GetID(), "sc")
require.Equal(t, descpb.DescriptorState_OFFLINE, schemaDesc.State)

tableDesc := catalogkv.TestingGetTableDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "d", "sc", "tb")
require.Equal(t, descpb.DescriptorState_OFFLINE, tableDesc.State)

typeDesc := catalogkv.TestingGetTypeDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "d", "sc", "typ")
require.Equal(t, descpb.DescriptorState_OFFLINE, typeDesc.State)

// Verify that the descriptors are not visible.
// TODO (lucy): Arguably there should be a SQL test where we manually create
// the offline descriptors. This part doesn't have much to do with RESTORE
// per se.

// Sometimes name resolution doesn't result in an "offline" error because
// the lookups are performed in planner.LookupObject(), which sets the
// Required flag to false so that callers can decide what to do with a
// negative result, but also means that we never generate the error in the
// first place. Right now we just settle for having some error reported, even
// if it's not the ideal error.

sqlDB.CheckQueryResults(t, `SHOW DATABASES`, [][]string{{"data"}, {"defaultdb"}, {"postgres"}, {"system"}})

sqlDB.ExpectErr(t, `database "d" is offline: restoring`, `USE d`)

sqlDB.ExpectErr(t, `target database or schema does not exist`, `SHOW TABLES FROM d`)
sqlDB.ExpectErr(t, `target database or schema does not exist`, `SHOW TABLES FROM d.sc`)

sqlDB.ExpectErr(t, `relation "d.sc.tb" does not exist`, `SELECT * FROM d.sc.tb`)
sqlDB.ExpectErr(t, `relation "d.sc.tb" does not exist`, `ALTER TABLE d.sc.tb ADD COLUMN b INT`)

sqlDB.ExpectErr(t, `type "d.sc.typ" does not exist`, `ALTER TYPE d.sc.typ RENAME TO typ2`)

sqlDB.ExpectErr(t, `cannot create "d.sc.other" because the target database or schema does not exist`, `CREATE TABLE d.sc.other()`)

close(continueNotif)
require.NoError(t, g.Wait())
})

t.Run("restore-into-existing-database", func(t *testing.T) {
ctx, tc, sqlDB, _, cleanupFn := BackupRestoreTestSetup(t, singleNode, 0, InitNone)
defer cleanupFn()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
kvDB := tc.Server(0).DB()

for _, server := range tc.Servers {
registry := server.JobRegistry().(*jobs.Registry)
registry.TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{
jobspb.TypeRestore: func(raw jobs.Resumer) jobs.Resumer {
r := raw.(*restoreResumer)
r.testingKnobs.beforePublishingDescriptors = func() error {
notifyBackfill(ctx)
return nil
}
return r
},
}
}

sqlDB.Exec(t, `
CREATE DATABASE d;
USE d;
CREATE SCHEMA sc;
CREATE TABLE sc.tb (x INT);
CREATE TYPE sc.typ AS ENUM ('hello');
`)

// Back up the database.
sqlDB.Exec(t, `BACKUP DATABASE d TO 'nodelocal://0/test/'`)

// Drop the database.
sqlDB.Exec(t, `DROP DATABASE d`)

// Create a new database and restore into it.
sqlDB.Exec(t, `CREATE DATABASE newdb`)

beforePublishingNotif, continueNotif := initBackfillNotification()
g := ctxgroup.WithContext(ctx)
g.GoCtx(func(ctx context.Context) error {
if _, err := sqlDB.DB.ExecContext(ctx, `RESTORE d.* FROM 'nodelocal://0/test/' WITH into_db='newdb'`); err != nil {
t.Fatal(err)
}
return nil
})

<-beforePublishingNotif

// Verify that the descriptors are offline.

dbDesc := catalogkv.TestingGetDatabaseDescriptor(kvDB, keys.SystemSQLCodec, "newdb")
schemaDesc := catalogkv.TestingGetSchemaDescriptor(kvDB, keys.SystemSQLCodec, dbDesc.GetID(), "sc")
require.Equal(t, descpb.DescriptorState_OFFLINE, schemaDesc.State)

tableDesc := catalogkv.TestingGetTableDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "newdb", "sc", "tb")
require.Equal(t, descpb.DescriptorState_OFFLINE, tableDesc.State)

typeDesc := catalogkv.TestingGetTypeDescriptorFromSchema(kvDB, keys.SystemSQLCodec, "newdb", "sc", "typ")
require.Equal(t, descpb.DescriptorState_OFFLINE, typeDesc.State)

// Verify that the descriptors are not visible.
// TODO (lucy): Arguably there should be a SQL test where we manually create
// the offline descriptors. This part doesn't have much to do with RESTORE
// per se.

// Sometimes name resolution doesn't result in an "offline" error because
// the lookups are performed in planner.LookupObject(), which sets the
// Required flag to false so that callers can decide what to do with a
// negative result, but also means that we never generate the error in the
// first place. Right now we just settle for having some error reported, even
// if it's not the ideal error.

sqlDB.Exec(t, `USE newdb`)

sqlDB.CheckQueryResults(t, `SHOW TABLES`, [][]string{})
sqlDB.CheckQueryResults(t, `SHOW TYPES`, [][]string{})
sqlDB.CheckQueryResults(t, `SHOW SCHEMAS`, [][]string{
{"crdb_internal"}, {"information_schema"}, {"pg_catalog"}, {"pg_extension"}, {"public"},
})

sqlDB.ExpectErr(t, `target database or schema does not exist`, `SHOW TABLES FROM newdb.sc`)

sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `SELECT * FROM newdb.sc.tb`)
sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `SELECT * FROM sc.tb`)
sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `ALTER TABLE newdb.sc.tb ADD COLUMN b INT`)
sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `ALTER TABLE sc.tb ADD COLUMN b INT`)

sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `ALTER TYPE newdb.sc.typ RENAME TO typ2`)
sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `ALTER TYPE sc.typ RENAME TO typ2`)

sqlDB.ExpectErr(t, `schema "sc" is offline: restoring`, `ALTER SCHEMA sc RENAME TO sc2`)

close(continueNotif)
require.NoError(t, g.Wait())

})
}

// TestManifestBitFlip tests that we can detect a corrupt manifest when a bit
// was flipped on disk for both an unencrypted and an encrypted manifest.
func TestManifestBitFlip(t *testing.T) {
Expand Down
Loading

0 comments on commit d8c6d8a

Please sign in to comment.