From 16aecb208d35cfb13dba0c00cd80e5c20e7a32e0 Mon Sep 17 00:00:00 2001 From: Ben Bardin Date: Fri, 14 Jan 2022 10:16:11 -0500 Subject: [PATCH] import: check readability earlier Release note (sql change): Import now checks readability earlier for multiple files, to fail sooner if e.g. permissions are invalid. --- pkg/ccl/importccl/import_stmt_test.go | 9 ++-- pkg/ccl/importccl/read_import_base.go | 65 ++++++++++++++++++++------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/pkg/ccl/importccl/import_stmt_test.go b/pkg/ccl/importccl/import_stmt_test.go index 6e95bfeca680..7da17e71351b 100644 --- a/pkg/ccl/importccl/import_stmt_test.go +++ b/pkg/ccl/importccl/import_stmt_test.go @@ -7041,15 +7041,12 @@ func TestImportRowErrorLargeRows(t *testing.T) { if r.Method != "GET" { return } - _, err := w.Write([]byte("firstrowvalue\nsecondrow,is,notok,")) - require.NoError(t, err) + _, _ = w.Write([]byte("firstrowvalue\nsecondrow,is,notok,")) // Write 8MB field as the last field of the second // row. bigData := randutil.RandBytes(rng, 8<<20) - _, err = w.Write(bigData) - require.NoError(t, err) - _, err = w.Write([]byte("\n")) - require.NoError(t, err) + _, _ = w.Write(bigData) + _, _ = w.Write([]byte("\n")) })) defer srv.Close() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) diff --git a/pkg/ccl/importccl/read_import_base.go b/pkg/ccl/importccl/read_import_base.go index 6bd59369b25b..385ae8b5a16c 100644 --- a/pkg/ccl/importccl/read_import_base.go +++ b/pkg/ccl/importccl/read_import_base.go @@ -153,24 +153,59 @@ func readInputFiles( fileSizes := make(map[int32]int64, len(dataFiles)) - // Attempt to fetch total number of bytes for all files. + // "Pre-import" work. + // Validate readability early, and attempt to fetch total number of bytes for + // all files to track progress. for id, dataFile := range dataFiles { - conf, err := cloud.ExternalStorageConfFromURI(dataFile, user) - if err != nil { - return err - } - es, err := makeExternalStorage(ctx, conf) - if err != nil { + if err := func() error { + // Run within an anonymous function to release each connection after each + // iteration, rather than all at once after the `for` loop. + conf, err := cloud.ExternalStorageConfFromURI(dataFile, user) + if err != nil { + return err + } + es, err := makeExternalStorage(ctx, conf) + if err != nil { + return err + } + defer es.Close() + + sz, err := es.Size(ctx, "") + + if sz <= 0 { + // Don't log dataFile here because it could leak auth information. + log.Infof(ctx, "could not fetch file size; falling back to per-file progress: %v", err) + } else { + fileSizes[id] = sz + } + + if len(dataFiles) > 1 { + // If there's more than one file, try to read a byte from each to verify + // readability (e.g. permissions). + // If there's only one file, skip that check because it provides no + // advantage. + raw, err := es.ReadFile(ctx, "") + if err != nil { + return err + } + defer raw.Close() + + p := make([]byte, 1) + if _, err := raw.Read(p); err != nil && err != io.EOF { + // Check that we can read the file. We don't care about content yet, + // so we read a single byte and we don't process it in any way. + // If the file is empty -- and we can tell that -- that also counts + // as readable, so don't error. + return err + } + + } + + return nil + }(); err != nil { return err } - sz, err := es.Size(ctx, "") - es.Close() - if sz <= 0 { - // Don't log dataFile here because it could leak auth information. - log.Infof(ctx, "could not fetch file size; falling back to per-file progress: %v", err) - break - } - fileSizes[id] = sz + } for dataFileIndex, dataFile := range dataFiles {