From 30587ed09de8fbf4edc8d243db309686e87124af Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Tue, 22 Mar 2022 09:19:01 -0400 Subject: [PATCH] backupccl: SHOW BACKUP FILES IN (on collection) returns the full SST path Previously, SHOW BACKUP FILES on a backup collection would return the SST file path relative to the manifest directory. Given that the incremental backup and full backup manifests are stored in different directories, the file paths that SHOW BACKUP FILES should reflect that. This patch changes the path `SHOW BACKUP FILES IN` returns to the backup path relative to the collection root. As an example: Previously, the command `SHOW BACKUP FILES LATEST IN s3://mybackups`, would return: data/001.SST // from a full backup data/002.SST // from an incremental backup Now, the command will return (assuming the full and inc live in same subdir): /2020/12/25-060000.00/data/001.SST /2020/12/25-060000.00/20201225/070000.00/data/002.SST Note: when a user passes the incremental_location parameter, the output result will be slightly misleading because the incrementals will have a different collection root. To aid in this confusion, I added a backup_type column equal to 'incremental' or 'full'. I plan to test this change in the PR for #77694 Release note: None --- pkg/ccl/backupccl/show.go | 107 +++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/pkg/ccl/backupccl/show.go b/pkg/ccl/backupccl/show.go index 7634f7a4688c..d57924441268 100644 --- a/pkg/ccl/backupccl/show.go +++ b/pkg/ccl/backupccl/show.go @@ -11,6 +11,7 @@ package backupccl import ( "context" "net/url" + "path" "strings" "time" @@ -73,6 +74,7 @@ type backupInfoReader interface { cloud.ExternalStorage, *jobspb.BackupEncryptionOptions, []string, + []string, chan<- tree.Datums, ) error header() colinfo.ResultColumns @@ -91,6 +93,9 @@ func (m manifestInfoReader) header() colinfo.ResultColumns { // showBackup reads backup info from the manifest, populates the manifestInfoReader, // calls the backupShower to process the manifest info into datums, // and pipes the information to the user's sql console via the results channel. + +// TODO(msbutler): during the old backup syntax purge, remove store, incStore, incPaths, +// and pass only `stores []cloud.ExternalStorage` object in signature func (m manifestInfoReader) showBackup( ctx context.Context, mem *mon.BoundAccount, @@ -98,6 +103,7 @@ func (m manifestInfoReader) showBackup( incStore cloud.ExternalStorage, enc *jobspb.BackupEncryptionOptions, incPaths []string, + manifestDirs []string, resultsCh chan<- tree.Datums, ) error { var memSize int64 @@ -148,7 +154,7 @@ func (m manifestInfoReader) showBackup( return err } - datums, err := m.shower.fn(manifests) + datums, err := m.shower.fn(manifests, manifestDirs) if err != nil { return err } @@ -182,6 +188,7 @@ func (m metadataSSTInfoReader) showBackup( incStore cloud.ExternalStorage, enc *jobspb.BackupEncryptionOptions, incPaths []string, + manifestDirs []string, resultsCh chan<- tree.Datums, ) error { filename := metadataSSTName @@ -269,13 +276,13 @@ func showBackupPlanHook( case tree.BackupRangeDetails: shower = backupShowerRanges case tree.BackupFileDetails: - shower = backupShowerFiles + shower = backupShowerFileSetup(backup.InCollection) case tree.BackupManifestAsJSON: shower = jsonShower default: shower = backupShowerDefault(ctx, p, backup.ShouldIncludeSchemas, opts) } - infoReader = manifestInfoReader{shower} + infoReader = manifestInfoReader{shower: shower} } fn := func(ctx context.Context, _ []sql.PlanNode, resultsCh chan<- tree.Datums) error { @@ -395,6 +402,7 @@ you must pass the 'encryption_info_dir' parameter that points to the directory o computedSubdir, ) var incPaths []string + var manifestDirs []string var incStore cloud.ExternalStorage if err != nil { if errors.Is(err, cloud.ErrListingUnsupported) { @@ -419,23 +427,57 @@ you must pass the 'encryption_info_dir' parameter that points to the directory o if err != nil { return errors.Wrapf(err, "make incremental storage") } + manifestDirs = getManifestDirs(computedSubdir, incLocations[0], incPaths, explicitIncPaths) } mem := p.ExecCfg().RootMemoryMonitor.MakeBoundAccount() defer mem.Close(ctx) - return infoReader.showBackup(ctx, &mem, store, incStore, encryption, incPaths, resultsCh) + return infoReader.showBackup(ctx, &mem, store, incStore, encryption, incPaths, + manifestDirs, resultsCh) } return fn, infoReader.header(), nil, false, nil } +// getManifestDirs gathers the path to the directory for each backup manifest, +// relative to the collection root. In other words, path.Join(dest, +// manifestDirs[i]) is the resolved manifest path. If the user passed +// incremental_location, the path.Join(explicitIncPath,manifestDirs[i]) is the +// resolved incremental manifest path. +func getManifestDirs( + fullSubdir string, incLocation string, incPaths []string, explicitIncPaths []string, +) []string { + manifestDirs := make([]string, len(incPaths)+1) + + // The full backup manifest path is always in the fullSubdir + manifestDirs[0] = fullSubdir + + // The incLocation reveals if incremental backups were stored in the full + // backup's subdirectory, the default incremental directory, or in a different + // incremental_location. To figure this out, remove the fullSubdir from the + // incremental location, then check if the defaultIncrementalSubdir is in the + // path AND that the user did not pass explicit incremental paths: + defaultIncSubdir := "" + incLocNoSubdir := strings.Replace(incLocation, fullSubdir, "", 1) + splitIncLocation := strings.Split(incLocNoSubdir, "/") + if splitIncLocation[len(splitIncLocation)-1] == DefaultIncrementalsSubdir && len( + explicitIncPaths) == 0 { + defaultIncSubdir = "/" + DefaultIncrementalsSubdir + } + for i, incPath := range incPaths { + incPathNoManifest := strings.Replace(incPath, backupManifestName, "", 1) + manifestDirs[i+1] = path.Join(defaultIncSubdir, fullSubdir, incPathNoManifest) + } + return manifestDirs +} + type backupShower struct { // header defines the columns of the table printed as output of the show command. header colinfo.ResultColumns // fn is the specific implementation of the shower that can either be a default, ranges, files, // or JSON shower. - fn func([]BackupManifest) ([]tree.Datums, error) + fn func(manifests []BackupManifest, manifestDirs []string) ([]tree.Datums, error) } // backupShowerHeaders defines the schema for the table presented to the user. @@ -481,7 +523,7 @@ func backupShowerDefault( ) backupShower { return backupShower{ header: backupShowerHeaders(showSchemas, opts), - fn: func(manifests []BackupManifest) ([]tree.Datums, error) { + fn: func(manifests []BackupManifest, manifestDirs []string) ([]tree.Datums, error) { var rows []tree.Datums for _, manifest := range manifests { // Map database ID to descriptor name. @@ -733,7 +775,7 @@ var backupShowerRanges = backupShower{ {Name: "end_key", Typ: types.Bytes}, }, - fn: func(manifests []BackupManifest) (rows []tree.Datums, err error) { + fn: func(manifests []BackupManifest, manifestDirs []string) (rows []tree.Datums, err error) { for _, manifest := range manifests { for _, span := range manifest.Spans { rows = append(rows, tree.Datums{ @@ -748,9 +790,10 @@ var backupShowerRanges = backupShower{ }, } -var backupShowerFiles = backupShower{ - header: colinfo.ResultColumns{ +func backupShowerFileSetup(inCol tree.Expr) backupShower { + return backupShower{header: colinfo.ResultColumns{ {Name: "path", Typ: types.String}, + {Name: "backup_type", Typ: types.String}, {Name: "start_pretty", Typ: types.String}, {Name: "end_pretty", Typ: types.String}, {Name: "start_key", Typ: types.Bytes}, @@ -759,22 +802,36 @@ var backupShowerFiles = backupShower{ {Name: "rows", Typ: types.Int}, }, - fn: func(manifests []BackupManifest) (rows []tree.Datums, err error) { - for _, manifest := range manifests { - for _, file := range manifest.Files { - rows = append(rows, tree.Datums{ - tree.NewDString(file.Path), - tree.NewDString(file.Span.Key.String()), - tree.NewDString(file.Span.EndKey.String()), - tree.NewDBytes(tree.DBytes(file.Span.Key)), - tree.NewDBytes(tree.DBytes(file.Span.EndKey)), - tree.NewDInt(tree.DInt(file.EntryCounts.DataSize)), - tree.NewDInt(tree.DInt(file.EntryCounts.Rows)), - }) + fn: func(manifests []BackupManifest, manifestDirs []string) (rows []tree.Datums, err error) { + if (inCol != nil) && len(manifestDirs) == 0 { + return nil, errors.AssertionFailedf( + "manifestDirs empty even though backup is in collection") } - } - return rows, nil - }, + for i, manifest := range manifests { + backupType := "full" + if manifest.isIncremental() { + backupType = "incremental" + } + for _, file := range manifest.Files { + filePath := file.Path + if inCol != nil { + filePath = path.Join(manifestDirs[i], filePath) + } + rows = append(rows, tree.Datums{ + tree.NewDString(filePath), + tree.NewDString(backupType), + tree.NewDString(file.Span.Key.String()), + tree.NewDString(file.Span.EndKey.String()), + tree.NewDBytes(tree.DBytes(file.Span.Key)), + tree.NewDBytes(tree.DBytes(file.Span.EndKey)), + tree.NewDInt(tree.DInt(file.EntryCounts.DataSize)), + tree.NewDInt(tree.DInt(file.EntryCounts.Rows)), + }) + } + } + return rows, nil + }, + } } var jsonShower = backupShower{ @@ -782,7 +839,7 @@ var jsonShower = backupShower{ {Name: "manifest", Typ: types.Jsonb}, }, - fn: func(manifests []BackupManifest) ([]tree.Datums, error) { + fn: func(manifests []BackupManifest, manifestDirs []string) ([]tree.Datums, error) { rows := make([]tree.Datums, len(manifests)) for i, manifest := range manifests { j, err := protoreflect.MessageToJSON(