Skip to content

Commit

Permalink
Merge #71890 #72403
Browse files Browse the repository at this point in the history
71890: backup/restore: Remove dependencies on defaultdb and postgres for full cluster destore r=adityamaru a=RichardJCai

Previously we hardcoded some cases where defaultdb and postgres were
assumed to be present during the full cluster restore.

With this change, we make it so we treat defaultdb/postgres to be
regular databases for restore - they are dropped before the restore
happens.

Additionally pg_temp_schemas are now restored to their original
parent db and not defaultdb.

Release note (bulkio): temporary tables are now restored to their
original database instead of defaultdb during a full cluster restore.

Resolve #71479

72403: sqlproxy: add a hint to the error returned by the connection throttle r=JeffSwenson a=JeffSwenson

The most common cause of connection throttling is a misconfigured
user or password. Providing a hint should make it clear to a user
what they need to do to fix it.

There was a bug in the toPgError handler that prevented it from handling
hints properly. It was taking hints from the internal code error instead
of the wrapping error.

Release note: None

Co-authored-by: richardjcai <[email protected]>
Co-authored-by: Jeff <[email protected]>
  • Loading branch information
3 people committed Nov 10, 2021
3 parents fcb5221 + 2276b0a + b8de5ff commit 2de17e7
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 158 deletions.
62 changes: 38 additions & 24 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2238,8 +2238,17 @@ INSERT INTO d.tb VALUES ('hello'), ('hello');
defer cleanupRestore()
// We should get an error when restoring the table.
sqlDBRestore.ExpectErr(t, "sst: no such file", `RESTORE FROM $1`, LocalFoo)

// Make sure the temp system db is not present.
row := sqlDBRestore.QueryStr(t, fmt.Sprintf(`SELECT * FROM [SHOW DATABASES] WHERE database_name = '%s'`, restoreTempSystemDB))
require.Equal(t, 0, len(row))

// Make sure defaultdb and postgres are recreated.
sqlDBRestore.CheckQueryResults(t,
`SELECT * FROM system.namespace WHERE name = 'defaultdb' OR name ='postgres'`, [][]string{
{"0", "0", "defaultdb", "70"},
{"0", "0", "postgres", "71"},
})
}

func TestBackupRestoreUserDefinedSchemas(t *testing.T) {
Expand Down Expand Up @@ -6065,7 +6074,6 @@ func TestBackupRestoreCorruptedStatsIgnored(t *testing.T) {

var tableID int
sqlDB.QueryRow(t, `SELECT id FROM system.namespace WHERE name = 'bank'`).Scan(&tableID)
fmt.Println(tableID)
sqlDB.Exec(t, `BACKUP data.bank TO $1`, dest)

// Overwrite the stats file with some invalid data.
Expand Down Expand Up @@ -8199,23 +8207,19 @@ func TestFullClusterTemporaryBackupAndRestore(t *testing.T) {
sqlDBRestore.Exec(t, `RESTORE FROM 'nodelocal://0/full_cluster_backup'`)

// Before the reconciliation job runs we should be able to see the following:
// - 4 synthesized pg_temp sessions in defaultdb.
// We synthesize a new temp schema for each unique backed-up <dbID, schemaID>
// tuple of a temporary table descriptor.
// - 2 synthesized pg_temp sessions in defaultdb and 1 each in db1 and db2.
// We synthesize a new temp schema for each unique backed-up schemaID
// of a temporary table descriptor.
// - All temp tables remapped to belong to the associated synthesized temp
// schema, and in the defaultdb.
checkSchemasQuery := `SELECT schema_name FROM [SHOW SCHEMAS] WHERE schema_name LIKE 'pg_temp_%' ORDER BY
schema_name`
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery,
[][]string{{"pg_temp_0_0"}, {"pg_temp_0_1"}, {"pg_temp_0_2"}, {"pg_temp_0_3"}})
// schema in the original db.
checkSchemasQuery := `SELECT count(*) FROM [SHOW SCHEMAS] WHERE schema_name LIKE 'pg_temp_%'`
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery, [][]string{{"2"}})

checkTempTablesQuery := `SELECT schema_name,
table_name FROM [SHOW TABLES] ORDER BY schema_name, table_name`
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{{"pg_temp_0_0", "t"},
{"pg_temp_0_1", "t"}, {"pg_temp_0_2", "t"}, {"pg_temp_0_3", "t"}})
checkTempTablesQuery := `SELECT table_name FROM [SHOW TABLES] ORDER BY table_name`
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{{"t"}, {"t"}})

// Sanity check that the databases the temporary tables originally belonged to
// are restored and empty because of the remapping.
// are restored.
sqlDBRestore.CheckQueryResults(t,
`SELECT database_name FROM [SHOW DATABASES] ORDER BY database_name`,
[][]string{{"d1"}, {"d2"}, {"defaultdb"}, {"postgres"}, {"system"}})
Expand All @@ -8228,26 +8232,36 @@ table_name FROM [SHOW TABLES] ORDER BY schema_name, table_name`
sqlDBRestore.QueryRow(t, checkCommentQuery).Scan(&commentCount)
require.Equal(t, commentCount, 2)

// Check that show tables in one of the restored DBs returns an empty result.
// Check that show tables in one of the restored DBs returns the temporary
// table.
sqlDBRestore.Exec(t, "USE d1")
sqlDBRestore.CheckQueryResults(t, "SHOW TABLES", [][]string{})
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{
{"t"},
})
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery, [][]string{{"1"}})

sqlDBRestore.Exec(t, "USE d2")
sqlDBRestore.CheckQueryResults(t, "SHOW TABLES", [][]string{})
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{
{"t"},
})
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery, [][]string{{"1"}})

testutils.SucceedsSoon(t, func() error {
ch <- timeutil.Now()
<-finishedCh

// Check that all the synthesized temp schemas have been wiped.
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery, [][]string{})
for _, database := range []string{"defaultdb", "d1", "d2"} {
sqlDBRestore.Exec(t, fmt.Sprintf("USE %s", database))
// Check that all the synthesized temp schemas have been wiped.
sqlDBRestore.CheckQueryResults(t, checkSchemasQuery, [][]string{{"0"}})

// Check that all the temp tables have been wiped.
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{})
// Check that all the temp tables have been wiped.
sqlDBRestore.CheckQueryResults(t, checkTempTablesQuery, [][]string{})

// Check that all the temp table comments have been wiped.
sqlDBRestore.QueryRow(t, checkCommentQuery).Scan(&commentCount)
require.Equal(t, commentCount, 0)
// Check that all the temp table comments have been wiped.
sqlDBRestore.QueryRow(t, checkCommentQuery).Scan(&commentCount)
require.Equal(t, commentCount, 0)
}
return nil
})
}
Expand Down
83 changes: 79 additions & 4 deletions pkg/ccl/backupccl/full_cluster_backup_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,14 @@ CREATE TABLE data2.foo (a int);
// Check that zones are restored during pre-restore.
t.Run("ensure zones are restored during pre-restore", func(t *testing.T) {
<-restoredZones
checkZones := "SELECT * FROM system.zones"
// Not specifying the schema makes the query search using defaultdb first.
// which ends up returning the error
// pq: database "defaultdb" is offline: restoring
checkZones := "SELECT * FROM system.public.zones"
sqlDBRestore.CheckQueryResults(t, checkZones, sqlDB.QueryStr(t, checkZones))

// Check that the user tables are still offline.
sqlDBRestore.ExpectErr(t, "database \"data\" is offline: restoring", "SELECT * FROM data.bank")
sqlDBRestore.ExpectErr(t, "database \"data\" is offline: restoring", "SELECT * FROM data.public.bank")

// Check there is no data in the span that we expect user data to be imported.
store := tcRestore.GetFirstStoreFromServer(t, 0)
Expand Down Expand Up @@ -610,7 +613,7 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
// Note that the system tables here correspond to the temporary tables
// imported, not the system tables themselves.
sqlDBRestore.CheckQueryResults(t,
`SELECT name FROM crdb_internal.tables WHERE state = 'DROP' ORDER BY name`,
`SELECT name FROM system.crdb_internal.tables WHERE state = 'DROP' ORDER BY name`,
[][]string{
{"bank"},
{"comments"},
Expand Down Expand Up @@ -700,7 +703,7 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
// Note that the system tables here correspond to the temporary tables
// imported, not the system tables themselves.
sqlDBRestore.CheckQueryResults(t,
`SELECT name FROM crdb_internal.tables WHERE state = 'DROP' ORDER BY name`,
`SELECT name FROM system.crdb_internal.tables WHERE state = 'DROP' ORDER BY name`,
[][]string{
{"bank"},
{"comments"},
Expand Down Expand Up @@ -1029,3 +1032,75 @@ CREATE TABLE bar (id INT);
BACKUP TO 'nodelocal://1/foo' WITH revision_history;
`)
}

func TestRestoreWithRecreatedDefaultDB(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
defer cleanupFn()
defer cleanupEmptyCluster()

sqlDB.Exec(t, `
DROP DATABASE defaultdb;
CREATE DATABASE defaultdb;
`)
sqlDB.Exec(t, `BACKUP TO $1`, LocalFoo)

sqlDBRestore.Exec(t, `RESTORE FROM $1`, LocalFoo)

// Since we dropped and recreated defaultdb, defaultdb has the next available
// id which is 52.
expectedDefaultDBID := "52"

sqlDBRestore.CheckQueryResults(t, `SELECT * FROM system.namespace WHERE name = 'defaultdb'`, [][]string{
{"0", "0", "defaultdb", expectedDefaultDBID},
})
}

func TestRestoreWithDroppedDefaultDB(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
defer cleanupFn()
defer cleanupEmptyCluster()

sqlDB.Exec(t, `
DROP DATABASE defaultdb;
`)
sqlDB.Exec(t, `BACKUP TO $1`, LocalFoo)

sqlDBRestore.Exec(t, `RESTORE FROM $1`, LocalFoo)

sqlDBRestore.CheckQueryResults(t, `SELECT count(*) FROM system.namespace WHERE name = 'defaultdb'`, [][]string{
{"0"},
})
}

func TestRestoreToClusterWithDroppedDefaultDB(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
defer cleanupFn()
defer cleanupEmptyCluster()

expectedRow := sqlDB.QueryRow(t, `SELECT * FROM system.namespace WHERE name = 'defaultdb'`)
var parentID, parentSchemaID, ID int
var name string
expectedRow.Scan(&parentID, &parentSchemaID, &name, &ID)

sqlDB.Exec(t, `BACKUP TO $1`, LocalFoo)

sqlDBRestore.Exec(t, `
DROP DATABASE defaultdb;
`)
sqlDBRestore.Exec(t, `RESTORE FROM $1`, LocalFoo)
sqlDBRestore.CheckQueryResults(t, `SELECT * FROM system.namespace WHERE name = 'defaultdb'`, [][]string{
{fmt.Sprint(parentID), fmt.Sprint(parentSchemaID), name, fmt.Sprint(ID)},
})
}
20 changes: 19 additions & 1 deletion pkg/ccl/backupccl/restore_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -2109,7 +2109,25 @@ func (r *restoreResumer) OnFailOrCancel(ctx context.Context, execCtx interface{}
}
}

return r.dropDescriptors(ctx, execCfg.JobRegistry, execCfg.Codec, txn, descsCol)
if err := r.dropDescriptors(ctx, execCfg.JobRegistry, execCfg.Codec, txn, descsCol); err != nil {
return err
}

if details.DescriptorCoverage == tree.AllDescriptors {
// We've dropped defaultdb and postgres in the planning phase, we must
// recreate them now if the full cluster restore failed.
ie := p.ExecCfg().InternalExecutor
_, err := ie.Exec(ctx, "recreate-defaultdb", txn, "CREATE DATABASE IF NOT EXISTS defaultdb")
if err != nil {
return err
}

_, err = ie.Exec(ctx, "recreate-postgres", txn, "CREATE DATABASE IF NOT EXISTS postgres")
if err != nil {
return err
}
}
return nil
}); err != nil {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/ccl/backupccl/restore_mid_schema_change_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,12 @@ func expectedSCJobCount(scName string, isClusterRestore, after bool) int {
numBackgroundSCJobs = 1
}

// We drop defaultdb and postgres for full cluster restores
numBackgroundDropDatabaseSCJobs := 2
// Since we're doing a cluster restore, we need to account for all of
// the schema change jobs that existed in the backup.
if isClusterRestore {
expNumSCJobs += numBackgroundSCJobs
expNumSCJobs += numBackgroundSCJobs + numBackgroundDropDatabaseSCJobs

// If we're performing a cluster restore, we also need to include the drop
// crdb_temp_system job.
Expand Down
Loading

0 comments on commit 2de17e7

Please sign in to comment.