From d201ba6d5707d8f9eb5ea1cf3f85247e7ca3dbfc Mon Sep 17 00:00:00 2001 From: Michael Erickson <michae2@cockroachlabs.com> Date: Fri, 11 Aug 2023 12:07:49 -0700 Subject: [PATCH] importer: only check import *atomicity* in TestImportWorkerFailure Five years ago, in #26881, we changed import to retry on worker failures, which made imports much more resilient to transient failures like nodes going down. As part of this work we created `TestImportWorkerFailure` which shuts down one node during an import, and checks that the import succeeded. Unfortunately, this test was checked-in skipped, because though imports were much more resilient to node failures, they were not completely resilient in every possible scenario, making the test flakey. Two months ago, in #105712, we unskipped this test and discovered that in some cases the import statement succeeded but only imported a partial dataset. This non-atomicity seems like a bigger issue than whether the import is able to succeed in every possible transient failure scenario, and is tracked separately in #108547. This PR changes `TestImportWorkerFailure` to remove successful import as a necessary condition for test success. Instead, the test now only checks whether the import was atomic; that is, whether a successful import imported all data or a failed import imported none. This is more in line with what we can guarantee about imports today. Fixes: #102839 Release note: None --- pkg/sql/importer/import_stmt_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/sql/importer/import_stmt_test.go b/pkg/sql/importer/import_stmt_test.go index 149bbd0a244b..10bfebe0c3df 100644 --- a/pkg/sql/importer/import_stmt_test.go +++ b/pkg/sql/importer/import_stmt_test.go @@ -5253,10 +5253,15 @@ func TestImportWorkerFailure(t *testing.T) { tc.StopServer(1) close(allowResponse) - // We expect the statement to retry since it should have encountered a - // retryable error. + // We expect the IMPORT statement to usually retry since it should have + // encountered a retryable error. We don't currently catch all such retryable + // errors, however, so in some cases the IMPORT statement will return the + // error to the client. In this case we verify that the import was completely + // rolled back. if err := <-errCh; err != nil { - t.Fatal(err) + t.Logf("%s failed, checking that imported data was completely removed: %q", query, err) + sqlDB.CheckQueryResults(t, `SELECT * FROM t ORDER BY i`, [][]string{}) + return } // But the job should be restarted and succeed eventually.