Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backupccl: ignore all dropped descriptors during backup #76635

Merged
merged 1 commit into from
Feb 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,14 +335,21 @@ func TestBackupRestoreDataDriven(t *testing.T) {
_, err := ds.getSQLDB(t, server, user).Exec(d.Input)
ret := ds.noticeBuffer
if err != nil {
ret = append(ds.noticeBuffer, err.Error())
if pqErr := (*pq.Error)(nil); errors.As(err, &pqErr) {
// pausepoint errors have the job ID in them, and datadriven tests
// don't seem to support regex matching. Clean the error up to not
// include the job ID.
if i := strings.Index(err.Error(), "paused before it completed with reason"); i != -1 {
ret = append(ds.noticeBuffer, err.Error()[i:])
} else if pqErr := (*pq.Error)(nil); errors.As(err, &pqErr) {
ret = append(ds.noticeBuffer, err.Error())
if pqErr.Detail != "" {
ret = append(ret, "DETAIL: "+pqErr.Detail)
}
if pqErr.Hint != "" {
ret = append(ret, "HINT: "+pqErr.Hint)
}
} else {
t.Errorf("failed to execute stmt %s due to %s", d.Input, err.Error())
}
}
return strings.Join(ret, "\n")
Expand Down
14 changes: 14 additions & 0 deletions pkg/ccl/backupccl/backupresolver/targets.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ func NewDescriptorResolver(descs []catalog.Descriptor) (*DescriptorResolver, err
// check the ParentID for tables, and all the valid parents must be
// known before we start to check that.
for _, desc := range descs {
if desc.Dropped() {
continue
}
if _, isDB := desc.(catalog.DatabaseDescriptor); isDB {
if _, ok := r.DbsByName[desc.GetName()]; ok {
return nil, errors.Errorf("duplicate database name: %q used for ID %d and %d",
Expand All @@ -207,6 +210,9 @@ func NewDescriptorResolver(descs []catalog.Descriptor) (*DescriptorResolver, err

// Add all schemas to the resolver.
for _, desc := range descs {
if desc.Dropped() {
continue
}
if sc, ok := desc.(catalog.SchemaDescriptor); ok {
schemaMap := r.ObjsByName[sc.GetParentID()]
if schemaMap == nil {
Expand Down Expand Up @@ -332,6 +338,14 @@ func DescriptorsMatchingTargets(
}
if _, ok := alreadyRequestedDBs[dbID]; !ok {
desc := r.DescByID[dbID]
// Verify that the database is in the correct state.
doesNotExistErr := errors.Errorf(`database %q does not exist`, d)
if err := catalog.FilterDescriptorState(
desc, tree.CommonLookupFlags{},
); err != nil {
// Return a does not exist error if explicitly asking for this database.
return ret, doesNotExistErr
}
ret.Descs = append(ret.Descs, desc)
ret.RequestedDBs = append(ret.RequestedDBs,
desc.(catalog.DatabaseDescriptor))
Expand Down
15 changes: 9 additions & 6 deletions pkg/ccl/backupccl/targets.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,9 @@ func getAllDescChanges(
return res, nil
}

// fullClusterTargets returns all of the tableDescriptors to be included in a
// full cluster backup, and all the user databases.
// fullClusterTargets returns all of the descriptors to be included in a full
// cluster backup, along with all the "complete databases" that we are backing
// up.
func fullClusterTargets(
allDescs []catalog.Descriptor,
) ([]catalog.Descriptor, []catalog.DatabaseDescriptor, error) {
Expand All @@ -240,6 +241,11 @@ func fullClusterTargets(
systemTablesToBackup := GetSystemTablesToIncludeInClusterBackup()

for _, desc := range allDescs {
// If a descriptor is in the DROP state at `EndTime` we do not want to
// include it in the backup.
if desc.Dropped() {
continue
}
switch desc := desc.(type) {
case catalog.DatabaseDescriptor:
dbDesc := dbdesc.NewBuilder(desc.DatabaseDesc()).BuildImmutableDatabase()
Expand All @@ -256,10 +262,7 @@ func fullClusterTargets(
fullClusterDescs = append(fullClusterDescs, desc)
}
} else {
// Add all user tables that are not in a DROP state.
if !desc.Dropped() {
fullClusterDescs = append(fullClusterDescs, desc)
}
fullClusterDescs = append(fullClusterDescs, desc)
}
case catalog.SchemaDescriptor:
fullClusterDescs = append(fullClusterDescs, desc)
Expand Down
200 changes: 200 additions & 0 deletions pkg/ccl/backupccl/testdata/backup-restore/backup-dropped-descriptors
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# backup-dropped-desctiprors tests backup and restore interaction with database, schema
# and type descriptors in the DROP state.
subtest dropped-database-descriptors

new-server name=s1
----

exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.before.exec';
CREATE DATABASE d;
CREATE TABLE d.foo (id INT);
DROP DATABASE d CASCADE;
----
paused before it completed with reason: pause point "schemachanger.before.exec" hit

# At this point, we have a descriptor entry for `d` in a DROP state.
query-sql
WITH tbls AS (
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor
)
SELECT orig->'database'->'name', orig->'database'->'state' FROM tbls WHERE id = 107;
----
"d" "DROP"

# A database backup should fail since we are explicitly targeting a dropped
# object.
exec-sql
BACKUP DATABASE d INTO 'nodelocal://0/dropped-database';
----
pq: failed to resolve targets specified in the BACKUP stmt: database "d" does not exist, or invalid RESTORE timestamp: supplied backups do not cover requested time

# A cluster backup should succeed but should ignore the dropped database
# and table descriptors.
exec-sql
BACKUP INTO 'nodelocal://0/cluster/dropped-database';
----

query-sql
SELECT count(*) FROM [SHOW BACKUP LATEST IN 'nodelocal://0/cluster/dropped-database'] WHERE object_name = 'd' OR object_name = 'foo';
----
0

# Now create another descriptor entry with the same name in a PUBLIC state.
exec-sql
CREATE DATABASE d;
CREATE TABLE d.bar (id INT);
----

# A database backup should succeed since we have a public database descriptor that matches the
# target.
exec-sql
BACKUP DATABASE d INTO 'nodelocal://0/dropped-database';
----

# A cluster backup should succeed and include the public database descriptor and
# its table.
exec-sql
BACKUP INTO 'nodelocal://0/cluster/dropped-database';
----

# Restore from the database backup to ensure it is valid.
# Sanity check that we did not backup the table 'foo' that belonged to the
# dropped database 'd'.
exec-sql
RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/dropped-database' WITH new_db_name = 'd1';
USE d1;
----

query-sql
SELECT schema_name,table_name FROM [SHOW TABLES];
----
public bar

# Restore from the cluster backup to ensure it is valid.
# Sanity check that we did not backup the table 'foo' that belonged to the
# dropped database 'd'.
exec-sql
RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/cluster/dropped-database' WITH new_db_name = 'd2';
USE d2;
----

query-sql
SELECT schema_name,table_name FROM [SHOW TABLES];
----
public bar

subtest end

# Test backup/restore interaction with dropped schema and type in a database.
subtest dropped-schema-descriptors

new-server name=s2
----

exec-sql
CREATE DATABASE d2;
CREATE TABLE d2.t2 (id INT);
----

exec-sql
CREATE TYPE d2.typ AS ENUM ('hello');
CREATE SCHEMA d2.s;
CREATE TABLE d2.s.t (id INT);
SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.before.exec';
DROP SCHEMA d2.s CASCADE;
----
paused before it completed with reason: pause point "schemachanger.before.exec" hit

exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'typeschemachanger.before.exec';
DROP TYPE d2.typ;
----
paused before it completed with reason: pause point "typeschemachanger.before.exec" hit

query-sql
WITH tbls AS (
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor
)
SELECT orig->'schema'->'name', orig->'schema'->'state' FROM tbls WHERE id = 112;
----
"s" "DROP"


query-sql
WITH tbls AS (
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor
)
SELECT orig->'type'->'name', orig->'type'->'state' FROM tbls WHERE id = 110 OR id = 111;
----
"typ" "DROP"
"_typ" "DROP"

# A database backup should succeed but should not include the dropped schema,
# type, and table.
exec-sql
BACKUP DATABASE d2 INTO 'nodelocal://0/dropped-schema-in-database';
----

query-sql
SELECT count(*) FROM [SHOW BACKUP LATEST IN 'nodelocal://0/dropped-schema-in-database'] WHERE
object_name = 's' OR object_name = 'typ';
----
0


# A cluster backup should succeed but should not include the dropped schema,
# type, and table.
exec-sql
BACKUP INTO 'nodelocal://0/cluster/dropped-schema-in-database';
----

query-sql
SELECT count(*) FROM [SHOW BACKUP LATEST IN 'nodelocal://0/cluster/dropped-schema-in-database']
WHERE object_name = 's' OR object_name = 'typ';
----
0

# Restore the backups to check they are valid.
exec-sql
RESTORE DATABASE d2 FROM LATEST IN 'nodelocal://0/dropped-schema-in-database' WITH new_db_name = 'd3';
USE d3;
----

# We don't expect to see the dropped schema 's'.
query-sql
SELECT schema_name FROM [SHOW SCHEMAS];
----
public
crdb_internal
information_schema
pg_catalog
pg_extension


query-sql
SELECT schema_name, table_name FROM [SHOW TABLES];
----
public t2


exec-sql
RESTORE DATABASE d2 FROM LATEST IN 'nodelocal://0/cluster/dropped-schema-in-database' WITH new_db_name ='d4';
USE d4;
----

query-sql
SELECT schema_name FROM [SHOW SCHEMAS];
----
public
crdb_internal
information_schema
pg_catalog
pg_extension

query-sql
SELECT schema_name, table_name FROM [SHOW TABLES];
----
public t2

subtest end
3 changes: 3 additions & 0 deletions pkg/sql/schema_changer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2248,6 +2248,9 @@ func (r schemaChangeResumer) Resume(ctx context.Context, execCtx interface{}) er
for r := retry.StartWithCtx(ctx, opts); r.Next(); {
// Note that r.Next always returns true on first run so exec will be
// called at least once before there is a chance for this loop to exit.
if err := p.ExecCfg().JobRegistry.CheckPausepoint("schemachanger.before.exec"); err != nil {
return err
}
scErr = sc.exec(ctx)
switch {
case scErr == nil:
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/type_change.go
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,9 @@ func (t *typeSchemaChanger) execWithRetry(ctx context.Context) error {
Multiplier: 1.5,
}
for r := retry.StartWithCtx(ctx, opts); r.Next(); {
if err := t.execCfg.JobRegistry.CheckPausepoint("typeschemachanger.before.exec"); err != nil {
return err
}
tcErr := t.exec(ctx)
switch {
case tcErr == nil:
Expand Down