Skip to content

Commit

Permalink
importer: fix flaky TestImportIntoCSV
Browse files Browse the repository at this point in the history
This commit fixes an oversight of 117f712
that made TestImportIntoCSV flaky. In particular, that commit made it so
that if the import fails but is actually paused, the corresponding
import job is resumed. This is needed in order to bring the table back
online before it can be safely dropped. However, resuming of the job
might fail if the job still has the "pause-requested" status, thus, this
commit adds a waiting mechanism before the job transitions out of this
status. Additionally, that resume can hang (or take a very long time),
so this commit switches to canceling the job instead.

Release note: None
  • Loading branch information
yuzefovich committed Apr 6, 2023
1 parent 42abc0c commit edfe741
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions pkg/sql/importer/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3024,7 +3024,6 @@ func TestImportIntoCSV(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.WithIssue(t, 100477, "programming error in dropTableAfterJobComplete below")
skip.UnderShort(t)
skip.UnderRace(t, "takes >1min under race")

Expand Down Expand Up @@ -3102,15 +3101,14 @@ func TestImportIntoCSV(t *testing.T) {
testNum := -1
insertedRows := numFiles * rowsPerFile

// Some of the tests result in a failing import. in this case,
// the table won't be able to drop until IMPORT's
// OnFailOrCancel brings the table back online.
// Some of the tests result in a failing import. In this case, the table
// won't be able to drop until IMPORT's OnFailOrCancel brings the table back
// online.
//
// Depening on which node that the import started on,
// we may have paused rather than failed. This is
// because _all_ errors that "rpc error" are
// retriable. If we end up with a cross-node nodelocal
// request, we get a pause.
// Depending on which node that the import started on, we may have paused
// rather than failed. This is because _all_ errors that "rpc error" are
// retriable. If we end up with a cross-node nodelocal request, we get a
// pause.
dropTableAfterJobComplete := func(t *testing.T, tableName string) {
var jobID int
row := conn.QueryRow("SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'IMPORT' AND status IN ('paused', 'pause-requested', 'reverting')")
Expand All @@ -3119,7 +3117,19 @@ func TestImportIntoCSV(t *testing.T) {
t.Fatal(err)
}
if jobID != 0 {
sqlDB.Exec(t, "RESUME JOB $1", jobID)
// If the job has the "pause-requested" status, we must block until
// it transitions to the "paused" status (because we cannot cancel
// the job otherwise).
testutils.SucceedsSoon(t, func() error {
r := sqlDB.QueryRow(t, "SELECT status FROM [SHOW JOBS] WHERE job_id = $1", jobID)
var status string
r.Scan(&status)
if status == string(jobs.StatusPauseRequested) {
return errors.New("still has pause-requested status")
}
return nil
})
sqlDB.Exec(t, "CANCEL JOB $1", jobID)
sqlDB.Exec(t, "SHOW JOB WHEN COMPLETE $1", jobID)
}
sqlDB.Exec(t, fmt.Sprintf("DROP TABLE %s", tableName))
Expand Down

0 comments on commit edfe741

Please sign in to comment.