From d201ba6d5707d8f9eb5ea1cf3f85247e7ca3dbfc Mon Sep 17 00:00:00 2001
From: Michael Erickson <michae2@cockroachlabs.com>
Date: Fri, 11 Aug 2023 12:07:49 -0700
Subject: [PATCH] importer: only check import *atomicity* in
 TestImportWorkerFailure

Five years ago, in #26881, we changed import to retry on worker
failures, which made imports much more resilient to transient failures
like nodes going down. As part of this work we created
`TestImportWorkerFailure` which shuts down one node during an import,
and checks that the import succeeded. Unfortunately, this test was
checked-in skipped, because though imports were much more resilient to
node failures, they were not completely resilient in every possible
scenario, making the test flakey.

Two months ago, in #105712, we unskipped this test and discovered that
in some cases the import statement succeeded but only imported a partial
dataset. This non-atomicity seems like a bigger issue than whether the
import is able to succeed in every possible transient failure scenario,
and is tracked separately in #108547.

This PR changes `TestImportWorkerFailure` to remove successful import as
a necessary condition for test success. Instead, the test now only
checks whether the import was atomic; that is, whether a successful
import imported all data or a failed import imported none. This is more
in line with what we can guarantee about imports today.

Fixes: #102839

Release note: None
---
 pkg/sql/importer/import_stmt_test.go | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pkg/sql/importer/import_stmt_test.go b/pkg/sql/importer/import_stmt_test.go
index 149bbd0a244b..10bfebe0c3df 100644
--- a/pkg/sql/importer/import_stmt_test.go
+++ b/pkg/sql/importer/import_stmt_test.go
@@ -5253,10 +5253,15 @@ func TestImportWorkerFailure(t *testing.T) {
 	tc.StopServer(1)
 
 	close(allowResponse)
-	// We expect the statement to retry since it should have encountered a
-	// retryable error.
+	// We expect the IMPORT statement to usually retry since it should have
+	// encountered a retryable error. We don't currently catch all such retryable
+	// errors, however, so in some cases the IMPORT statement will return the
+	// error to the client. In this case we verify that the import was completely
+	// rolled back.
 	if err := <-errCh; err != nil {
-		t.Fatal(err)
+		t.Logf("%s failed, checking that imported data was completely removed: %q", query, err)
+		sqlDB.CheckQueryResults(t, `SELECT * FROM t ORDER BY i`, [][]string{})
+		return
 	}
 
 	// But the job should be restarted and succeed eventually.