diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 9561d4aba35b..c1e9d3ade54c 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -17,7 +17,6 @@ import ( "io" "io/ioutil" "math/rand" - "net/http" "net/url" "os" "path" @@ -329,8 +328,7 @@ func TestBackupRestoreStatementResult(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupManifestBytes) - require.Equal(t, ZipType, fileType) + require.True(t, isGZipped(backupManifestBytes)) }) sqlDB.Exec(t, "CREATE DATABASE data2") @@ -462,8 +460,7 @@ func TestBackupRestorePartitioned(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupPartitionBytes) - require.Equal(t, ZipType, fileType) + require.True(t, isGZipped(backupPartitionBytes)) } } } @@ -1577,8 +1574,7 @@ func TestBackupRestoreResume(t *testing.T) { if err != nil { t.Fatal(err) } - fileType := http.DetectContentType(backupManifestBytes) - if fileType == ZipType { + if isGZipped(backupManifestBytes) { backupManifestBytes, err = decompressData(backupManifestBytes) require.NoError(t, err) } @@ -3935,8 +3931,7 @@ func TestBackupRestoreChecksum(t *testing.T) { if err != nil { t.Fatalf("%+v", err) } - fileType := http.DetectContentType(backupManifestBytes) - if fileType == ZipType { + if isGZipped(backupManifestBytes) { backupManifestBytes, err = decompressData(backupManifestBytes) require.NoError(t, err) } diff --git a/pkg/ccl/backupccl/manifest_handling.go b/pkg/ccl/backupccl/manifest_handling.go index 352494c433e2..bf101d9684cc 100644 --- a/pkg/ccl/backupccl/manifest_handling.go +++ b/pkg/ccl/backupccl/manifest_handling.go @@ -16,7 +16,6 @@ import ( "encoding/hex" "fmt" "io/ioutil" - "net/http" "net/url" "path" "sort" @@ -71,14 +70,26 @@ const ( const ( // BackupFormatDescriptorTrackingVersion added tracking of complete DBs. BackupFormatDescriptorTrackingVersion uint32 = 1 - // ZipType is the format of a GZipped compressed file. - ZipType = "application/x-gzip" dateBasedIncFolderName = "/20060102/150405.00" dateBasedIntoFolderName = "/2006/01/02-150405.00" latestFileName = "LATEST" ) +// isGZipped detects whether the given bytes represent GZipped data. This check +// is used rather than a standard implementation such as http.DetectContentType +// since some zipped data may be mis-identified by that method. We've seen +// gzipped data incorrectly identified as "application/vnd.ms-fontobject". The +// magic bytes are from the MIME sniffing algorithm http.DetectContentType is +// based which can be found at https://mimesniff.spec.whatwg.org/. +// +// This method is only used to detect if protobufs are GZipped, and there are no +// conflicts between the starting bytes of a protobuf and these magic bytes. +func isGZipped(dat []byte) bool { + gzipPrefix := []byte("\x1F\x8B\x08") + return bytes.HasPrefix(dat, gzipPrefix) +} + // BackupFileDescriptors is an alias on which to implement sort's interface. type BackupFileDescriptors []BackupManifest_File @@ -222,8 +233,7 @@ func readBackupManifest( } } - fileType := http.DetectContentType(descBytes) - if fileType == ZipType { + if isGZipped(descBytes) { descBytes, err = decompressData(descBytes) if err != nil { return BackupManifest{}, errors.Wrap( @@ -289,8 +299,7 @@ func readBackupPartitionDescriptor( } } - fileType := http.DetectContentType(descBytes) - if fileType == ZipType { + if isGZipped(descBytes) { descBytes, err = decompressData(descBytes) if err != nil { return BackupPartitionDescriptor{}, errors.Wrap(