From eb94c8b73c9c1cf53de38a3669a66431cd07e7e1 Mon Sep 17 00:00:00 2001 From: Paul Bardea Date: Tue, 2 Feb 2021 17:53:31 -0500 Subject: [PATCH] backupccl: fix rare failure in reading backup file BackupManifests are compresed when written to ExternalStorage. When reading backup manifests, we check if the content type indicates that the backup manifest is compressed (and thus needs to be decompressed). We do this because some previous backups were not compressed, so backup needs to be able to detect if the backup manfiest has been compressed. However, very rarely (1 in 60000 attempts in my case), the compressed data might be detected as "application/vnd.ms-fontobject", rather than a gzipped file. This causes backup to not decompress the file, and thus try to unmarshall the compressed data. The file is misdetected because the defined sniffing algorithm in http.DetectContentType first looks at the 35th byte to see if it matches a "magic pattern", before checking if the data is in gzip format. And, sometimes, the 35th byte happens to match up with that magic pattern. This commit updates the check so that we only check if the GZip magic bytes header is present or not, rather than checking all possible content types. The gzip header is not expected to conflict with the first 6 bytes of normally generated protobuf messages that are compressed. Release note (bug fix): Fixes a bug where backups would fail with an error when trying to read a backup that was written. --- pkg/ccl/backupccl/backup_test.go | 13 ++++--------- pkg/ccl/backupccl/manifest_handling.go | 23 ++++++++++++++++------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 9561d4aba35b..c1e9d3ade54c 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -17,7 +17,6 @@ import ( "io" "io/ioutil" "math/rand" - "net/http" "net/url" "os" "path" @@ -329,8 +328,7 @@ func TestBackupRestoreStatementResult(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupManifestBytes) - require.Equal(t, ZipType, fileType) + require.True(t, isGZipped(backupManifestBytes)) }) sqlDB.Exec(t, "CREATE DATABASE data2") @@ -462,8 +460,7 @@ func TestBackupRestorePartitioned(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupPartitionBytes) - require.Equal(t, ZipType, fileType) + require.True(t, isGZipped(backupPartitionBytes)) } } } @@ -1577,8 +1574,7 @@ func TestBackupRestoreResume(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupManifestBytes) - if fileType == ZipType { + if isGZipped(backupManifestBytes) { backupManifestBytes, err = decompressData(backupManifestBytes) require.NoError(t, err) } @@ -3935,8 +3931,7 @@ func TestBackupRestoreChecksum(t *testing.T) { if err != nil { t.Fatalf("%+v", err) } - fileType := http.DetectContentType(backupManifestBytes) - if fileType == ZipType { + if isGZipped(backupManifestBytes) { backupManifestBytes, err = decompressData(backupManifestBytes) require.NoError(t, err) } diff --git a/pkg/ccl/backupccl/manifest_handling.go b/pkg/ccl/backupccl/manifest_handling.go index 352494c433e2..bf101d9684cc 100644 --- a/pkg/ccl/backupccl/manifest_handling.go +++ b/pkg/ccl/backupccl/manifest_handling.go @@ -16,7 +16,6 @@ import ( "encoding/hex" "fmt" "io/ioutil" - "net/http" "net/url" "path" "sort" @@ -71,14 +70,26 @@ const ( const ( // BackupFormatDescriptorTrackingVersion added tracking of complete DBs. BackupFormatDescriptorTrackingVersion uint32 = 1 - // ZipType is the format of a GZipped compressed file. - ZipType = "application/x-gzip" dateBasedIncFolderName = "/20060102/150405.00" dateBasedIntoFolderName = "/2006/01/02-150405.00" latestFileName = "LATEST" ) +// isGZipped detects whether the given bytes represent GZipped data. This check +// is used rather than a standard implementation such as http.DetectContentType +// since some zipped data may be mis-identified by that method. We've seen +// gzipped data incorrectly identified as "application/vnd.ms-fontobject". The +// magic bytes are from the MIME sniffing algorithm http.DetectContentType is +// based which can be found at https://mimesniff.spec.whatwg.org/. +// +// This method is only used to detect if protobufs are GZipped, and there are no +// conflicts between the starting bytes of a protobuf and these magic bytes. +func isGZipped(dat []byte) bool { + gzipPrefix := []byte("\x1F\x8B\x08") + return bytes.HasPrefix(dat, gzipPrefix) +} + // BackupFileDescriptors is an alias on which to implement sort's interface. type BackupFileDescriptors []BackupManifest_File @@ -222,8 +233,7 @@ func readBackupManifest( } } - fileType := http.DetectContentType(descBytes) - if fileType == ZipType { + if isGZipped(descBytes) { descBytes, err = decompressData(descBytes) if err != nil { return BackupManifest{}, errors.Wrap( @@ -289,8 +299,7 @@ func readBackupPartitionDescriptor( } } - fileType := http.DetectContentType(descBytes) - if fileType == ZipType { + if isGZipped(descBytes) { descBytes, err = decompressData(descBytes) if err != nil { return BackupPartitionDescriptor{}, errors.Wrap(