Skip to content

Commit

Permalink
backupccl: display up to 10 missing files in `SHOW BACKUP .. with che…
Browse files Browse the repository at this point in the history
…ck_files`

Previously, `SHOW BACKUP WITH check_files` displayed the first missing SST.
This patch will display up to 100 missing SSTs. Further, this renames the
misleading `approximateTablePhysicalSize` to `approximateSpanPhysicalSize`.
Below I write out how physical table size is calculated:

1. Each range we backup maps to 1 to many spans (currently in the
backup_manfest.files object).

2. 1 to many spans get written to an SST. No span will get written to multiple
SSTs.

3. When backup created these spans, it tried really hard to split spans at
table boundaries, so only one table’s data could be in a span, but a last
minute table creation makes this near impossible, due to slow range splits.
A big table will have many spans.

4. To compute the approximate logical size (called size_bytes in SHOW BACKUP)
of each table, we sum the logical bytes over all it’s spans. We identify a
table’s span by checking the table prefix of the first key in the span. See
getTableSizes method)

5. To compute the physical size (file_bytes in SHOW BACKUP) of a span, compute
the logical size of each SST by summing the logical bytes in the SST  over its
spans (see getLogicalSSTSize method), and attribute a portion of the physical
SST size (returned from cloud storage) to a span  using the formula:
(sstPhysicalSize) * (logicalSpanSize) / (logicalSSTSize) = physicalSpanSize (
the approximateSpanTableSize method implements this).

6. To compute the physical size of a table, sum over the physical sizes the
table’s spans

Release note (sql change): SHOW BACKUP WITH check_files will display up to 10
missing SSTs.
  • Loading branch information
msbutler committed Jun 7, 2022
1 parent f0372fd commit 5b5d819
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 44 deletions.
47 changes: 39 additions & 8 deletions pkg/ccl/backupccl/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,9 @@ func checkBackupFiles(
storeFactory cloud.ExternalStorageFromURIFactory,
user username.SQLUsername,
) ([][]int64, error) {
maxMissingFiles := 10
missingFiles := make(map[string]bool, maxMissingFiles)
numMissingFiles := 0

checkLayer := func(layer int) ([]int64, error) {
// TODO (msbutler): Right now, checkLayer opens stores for each backup layer. In 22.2,
Expand Down Expand Up @@ -546,10 +549,20 @@ func checkBackupFiles(
}
sz, err := store.Size(ctx, f.Path)
if err != nil {
return nil, errors.Wrapf(err, "Error checking file %s in %s", f.Path, uri)
uriNoLocality := strings.Split(uri, "?")[0]
missingFile := path.Join(uriNoLocality, f.Path)
if _, ok := missingFiles[missingFile]; !ok {
missingFiles[missingFile] = false
numMissingFiles++
if maxMissingFiles == numMissingFiles {
break
}
}
continue
}
fileSizes[i] = sz
}

return fileSizes, nil
}

Expand All @@ -559,8 +572,24 @@ func checkBackupFiles(
if err != nil {
return nil, err
}
if numMissingFiles == maxMissingFiles {
break
}
manifestFileSizes[layer] = layerFileSizes
}
if numMissingFiles > 0 {
filesForMsg := make([]string, numMissingFiles)
i := 0
for file := range missingFiles {
filesForMsg[i] = file
i++
}
errorMsgPrefix := "The following files are missing from the backup:"
if numMissingFiles == maxMissingFiles {
errorMsgPrefix = "Multiple files cannot be read from the backup including:"
}
return nil, errors.Newf("%s\n\t%s", errorMsgPrefix, strings.Join(filesForMsg, "\n\t"))
}
return manifestFileSizes, nil
}

Expand Down Expand Up @@ -828,11 +857,11 @@ func getLogicalSSTSize(files []backuppb.BackupManifest_File) map[string]int64 {
return sstDataSize
}

// approximateTablePhysicalSize approximates the number bytes written to disk for the table.
func approximateTablePhysicalSize(
logicalTableSize int64, logicalFileSize int64, sstFileSize int64,
// approximateSpanPhysicalSize approximates the number of bytes written to disk for the span.
func approximateSpanPhysicalSize(
logicalSpanSize int64, logicalSSTSize int64, physicalSSTSize int64,
) int64 {
return int64(float64(sstFileSize) * (float64(logicalTableSize) / float64(logicalFileSize)))
return int64(float64(physicalSSTSize) * (float64(logicalSpanSize) / float64(logicalSSTSize)))
}

// getTableSizes gathers row and size count for each table in the manifest
Expand Down Expand Up @@ -868,7 +897,8 @@ func getTableSizes(
s := tableSizes[descpb.ID(tableID)]
s.rowCount.Add(file.EntryCounts)
if len(fileSizes) > 0 {
s.fileSize = approximateTablePhysicalSize(s.rowCount.DataSize, logicalSSTSize[file.Path], fileSizes[i])
s.fileSize += approximateSpanPhysicalSize(file.EntryCounts.DataSize, logicalSSTSize[file.Path],
fileSizes[i])
}
tableSizes[descpb.ID(tableID)] = s
}
Expand Down Expand Up @@ -993,7 +1023,7 @@ func backupShowerFileSetup(inCol tree.StringOrPlaceholderOptList) backupShower {
backupType = "incremental"
}

sstDataSize := getLogicalSSTSize(manifest.Files)
logicalSSTSize := getLogicalSSTSize(manifest.Files)
for j, file := range manifest.Files {
filePath := file.Path
if inCol != nil {
Expand All @@ -1008,7 +1038,8 @@ func backupShowerFileSetup(inCol tree.StringOrPlaceholderOptList) backupShower {
}
sz := int64(-1)
if len(info.fileSizes) > 0 {
sz = approximateTablePhysicalSize(info.fileSizes[i][j], file.EntryCounts.DataSize, sstDataSize[file.Path])
sz = approximateSpanPhysicalSize(file.EntryCounts.DataSize,
logicalSSTSize[file.Path], info.fileSizes[i][j])
}
rows = append(rows, tree.Datums{
tree.NewDString(filePath),
Expand Down
78 changes: 42 additions & 36 deletions pkg/ccl/backupccl/show_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -839,10 +839,13 @@ func TestShowBackupPathIsCollectionRoot(t *testing.T) {
"SHOW BACKUP $1", localFoo)
}

// TestShowBackupCheckFiles verifies the check_files option catches a corrupt backup file
// in 3 scenarios: 1. SST from a full backup; 2. SST from a default incremental backup; 3.
// SST from an incremental backup created with the incremental_location parameter.
// The first two scenarios also get checked with locality aware backups.
// TestShowBackupCheckFiles verifies the check_files option catches a corrupt
// backup file in 3 scenarios: 1. SST from a full backup; 2. SST from a default
// incremental backup; 3. SST from an incremental backup created with the
// incremental_location parameter. The first two scenarios also get checked with
// locality aware backups. The test also sanity checks the new file_bytes column
// in SHOW BACKUP with check_files, which displays the physical size of each
// table in the backup.
func TestShowBackupCheckFiles(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand All @@ -855,8 +858,8 @@ func TestShowBackupCheckFiles(t *testing.T) {

collectionRoot := "full"
incLocRoot := "inc"
const c1, c2, c3 = `nodelocal://0/full`, `nodelocal://1/full`, `nodelocal://2/full`
const i1, i2, i3 = `nodelocal://0/inc`, `nodelocal://1/inc`, `nodelocal://2/inc`
const c1, c2, c3 = `nodelocal://1/full`, `nodelocal://2/full`, `nodelocal://3/full`
const i1, i2, i3 = `nodelocal://1/inc`, `nodelocal://2/inc`, `nodelocal://3/inc`
localities := []string{"default", "dc=dc1", "dc=dc2"}

collections := []string{
Expand All @@ -879,7 +882,6 @@ func TestShowBackupCheckFiles(t *testing.T) {
{dest: collections, inc: incrementals, localities: localities},
}

// create db
sqlDB.Exec(t, `CREATE DATABASE fkdb`)
sqlDB.Exec(t, `CREATE TABLE fkdb.fk (ind INT)`)

Expand All @@ -905,11 +907,23 @@ func TestShowBackupCheckFiles(t *testing.T) {

sqlDB.Exec(t, fmt.Sprintf("BACKUP DATABASE fkdb INTO LATEST IN %s", dest))

// breakCheckFiles validates that moving an SST will cause SHOW BACKUP with check_files to
// error.
breakCheckFiles := func(
// rootDir identifies the root of the collection or incremental backup dir
// of the target backup file.
rootDir string,

// backupDest contains the collection or incremental URIs.
backupDest []string,

// file contains the path to the target file staring from the rootDir.
file string,

// fileLocality contains the expected locality of the target file.
fileLocality string,

// checkQuery contains the 'SHOW BACKUP with check_files' query.
checkQuery string) {

// Ensure no errors first
Expand All @@ -921,65 +935,57 @@ func TestShowBackupCheckFiles(t *testing.T) {
require.NoError(t, err, "failed to corrupt SST")
}

// To validate the checkFiles error message, resolve the full path of the missing file.
// For locality aware backups, it may not live at the default URI (e.g. backupDest[0]).
var fileDest string
// If the file is from a locality aware backup, check its locality info. Note
// that locality aware URIs have the following structure:
// `someURI?COCKROACH_LOCALITY ='locality'`.
if fileLocality == "NULL" {
fileDest = backupDest[0]
} else {
var locality string
fileLocality = url.QueryEscape(fileLocality)
for _, destURI := range backupDest {
// Locality aware URIs have the following structure:
// `someURI?COCKROACH_LOCALITY='locality'`. Given the fileLocality, we
// match for the proper URI.
// Using the locality, match for the proper URI.
destLocality := strings.Split(destURI, "?")
if strings.Contains(destLocality[1], fileLocality) {
fileDest = destURI
locality = destLocality[1]
break
}
}
require.NotEmpty(t, locality, "could not find file locality")
}
require.NotEmpty(t, fileDest, "could not find file locality")

// The full error message looks like "Error checking file
// data/756930828574818306.sst in nodelocal://0/full/2022/04/27-134916.90".
//
// Note that the expected error message excludes the path to the data file
// to avoid a test flake for locality aware backups where two different
// nodelocal URI's read to the same place. In this scenario, the test
// expects the backup to be in nodelocal://1/foo and the actual error
// message resolves the uri to nodelocal://0/foo. While both are correct,
// the test fails.

// Get Path after /data dir
toFile := "data" + strings.Split(file, "/data")[1]
errorMsg := fmt.Sprintf("Error checking file %s", toFile)

// Note that the expected error message excludes the nodelocal portion of
// the file path (nodelocal://1/) to avoid a test flake for locality aware
// backups where two different nodelocal URI's read to the same place. In
// this scenario, the test expects the backup to be in nodelocal://2/foo
// and the actual error message resolves the uri to nodelocal://1/foo.
// While both are correct, the test fails.
errorMsg := fmt.Sprintf("The following files are missing from the backup:\n\t.*%s",
filepath.Join(rootDir, file))
sqlDB.ExpectErr(t, errorMsg, checkQuery)

if err := os.Rename(badPath, fullPath); err != nil {
require.NoError(t, err, "failed to de-corrupt SST")
}
}

fileInfo := sqlDB.QueryStr(t,
fmt.Sprintf(`SELECT path, locality, file_bytes FROM [SHOW BACKUP FILES FROM LATEST IN %s with check_files]`, dest))

checkQuery := fmt.Sprintf(`SHOW BACKUP FROM LATEST IN %s WITH check_files`, dest)

// break on full backup
// Break on full backup.
breakCheckFiles(collectionRoot, test.dest, fileInfo[0][0], fileInfo[0][1], checkQuery)

// break on default inc backup
// Break on default inc backup.
breakCheckFiles(collectionRoot, test.dest, fileInfo[len(fileInfo)-1][0], fileInfo[len(fileInfo)-1][1], checkQuery)

// check that each file size is positive
// Check that each file size is positive.
for _, file := range fileInfo {
sz, err := strconv.Atoi(file[2])
require.NoError(t, err, "could not get file size")
require.Greater(t, sz, 0, "file size is not positive")
}

// check that returned file size is consistent across flavors of SHOW BACKUP
// Check that the returned file size is consistent across flavors of SHOW BACKUP.
fileSum := sqlDB.QueryStr(t,
fmt.Sprintf(`SELECT sum(file_bytes) FROM [SHOW BACKUP FILES FROM LATEST IN %s with check_files]`, dest))

Expand All @@ -988,7 +994,7 @@ func TestShowBackupCheckFiles(t *testing.T) {
fileSum)

if len(test.dest) == 1 {
// break on an incremental backup stored at incremental_location
// Break on an incremental backup stored at incremental_location.
fileInfo := sqlDB.QueryStr(t,
fmt.Sprintf(`SELECT path, locality FROM [SHOW BACKUP FILES FROM LATEST IN %s WITH incremental_location = %s]`,
dest, inc))
Expand Down

0 comments on commit 5b5d819

Please sign in to comment.