Skip to content

Commit

Permalink
Merge #119342
Browse files Browse the repository at this point in the history
119342: backupccl: deflake TestDataDriven_restore_on_fail_or_cancel_retry r=msbutler a=stevendanna

Full cluster restore drops the default DB. The test driver cache connections that may have originally connected to a database that is now dropped. This causes problems for queries issued after the full cluster restore.

Here, (1) I change the query we use to get job IDs to one that doesn't depend on doing any search path lookups and (2) reset all of our connections after the first restore we do.

See also #88913

Fixes #119079

Release note: None

Co-authored-by: Steven Danna <[email protected]>
  • Loading branch information
craig[bot] and stevendanna committed Mar 11, 2024
2 parents eb53b06 + af15a37 commit 1741d97
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
18 changes: 13 additions & 5 deletions pkg/ccl/backupccl/datadriven_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,13 @@ func (d *datadrivenTestState) getIODir(t *testing.T, name string) string {
return dir
}

func (d *datadrivenTestState) clearConnCache() {
for _, db := range d.sqlDBs {
db.Close()
}
d.sqlDBs = make(map[sqlDBKey]*gosql.DB)
}

func (d *datadrivenTestState) getSQLDB(t *testing.T, name string, user string) *gosql.DB {
return d.getSQLDBForVC(t, name, "default", user)
}
Expand Down Expand Up @@ -463,14 +470,13 @@ func runTestDataDriven(t *testing.T, testFilePathFromWorkspace string) {
ds.noticeBuffer = nil
const user = "root"
sqlDB := ds.getSQLDB(t, lastCreatedCluster, user)
// First, run the schema change.

_, err := sqlDB.Exec(d.Input)

var jobID jobspb.JobID
{
const qFmt = `SELECT job_id FROM [SHOW JOBS] WHERE job_type = '%s' ORDER BY created DESC LIMIT 1`
errJob := sqlDB.QueryRow(fmt.Sprintf(qFmt, jobType)).Scan(&jobID)
const query = `SELECT id FROM system.jobs WHERE job_type = $1 ORDER BY created DESC LIMIT 1`
errJob := sqlDB.QueryRow(query, jobType.String()).Scan(&jobID)
if !errors.Is(errJob, gosql.ErrNoRows) {
require.NoError(t, errJob)
}
Expand Down Expand Up @@ -525,7 +531,9 @@ func runTestDataDriven(t *testing.T, testFilePathFromWorkspace string) {
ds.cleanupFns = append(ds.cleanupFns, nodelocalCleanup)
}
return ""

case "clear-conn-cache":
ds.clearConnCache()
return ""
case "new-cluster":
var name, shareDirWith, iodir, localities, beforeVersion, testingKnobCfg string
var splits int
Expand Down Expand Up @@ -646,7 +654,7 @@ func runTestDataDriven(t *testing.T, testFilePathFromWorkspace string) {
var jobID jobspb.JobID
require.NoError(t,
ds.getSQLDB(t, cluster, user).QueryRow(
`SELECT job_id FROM [SHOW JOBS] ORDER BY created DESC LIMIT 1`).Scan(&jobID))
`SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`).Scan(&jobID))
fmt.Printf("expecting pausepoint, found job ID %d\n\n\n", jobID)

runner := sqlutils.MakeSQLRunner(ds.getSQLDB(t, cluster, user))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ RESTORE FROM LATEST IN 'nodelocal://1/cluster_backup';
----
job paused at pausepoint

clear-conn-cache
----

exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = '';
----
Expand Down

0 comments on commit 1741d97

Please sign in to comment.