diff --git a/checkpoint.go b/checkpoint.go index ec7d02832d..9a565e81d5 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -268,6 +268,7 @@ func (d *DB) Checkpoint( } var excludedFiles map[deletedFileEntry]*fileMetadata + var remoteFiles []base.DiskFileNum // Set of FileBacking.DiskFileNum which will be required by virtual sstables // in the checkpoint. requiredVirtualBackingFiles := make(map[base.DiskFileNum]struct{}) @@ -293,6 +294,21 @@ func (d *DB) Checkpoint( } requiredVirtualBackingFiles[fileBacking.DiskFileNum] = struct{}{} } + meta, err := d.objProvider.Lookup(fileTypeTable, fileBacking.DiskFileNum) + if err != nil { + ckErr = err + return ckErr + } + if meta.IsRemote() { + // We don't copy remote files. This is desirable as checkpointing is + // supposed to be a fast operation, and references to remote files can + // always be resolved by any checkpoint readers by reading the object + // catalog. We don't add this file to excludedFiles either, as that'd + // cause it to be deleted in the second manifest entry which is also + // inaccurate. + remoteFiles = append(remoteFiles, meta.DiskFileNum) + continue + } srcPath := base.MakeFilepath(fs, d.dirname, fileTypeTable, fileBacking.DiskFileNum) destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) @@ -319,6 +335,12 @@ func (d *DB) Checkpoint( if ckErr != nil { return ckErr } + if len(remoteFiles) > 0 { + ckErr = d.objProvider.CheckpointState(fs, destDir, fileTypeTable, remoteFiles) + if ckErr != nil { + return ckErr + } + } // Copy the WAL files. We copy rather than link because WAL file recycling // will cause the WAL files to be reused which would invalidate the diff --git a/checkpoint_test.go b/checkpoint_test.go index 88bc5e6798..4e8ab1fef0 100644 --- a/checkpoint_test.go +++ b/checkpoint_test.go @@ -16,11 +16,12 @@ import ( "github.com/cockroachdb/datadriven" "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/objstorage/remote" "github.com/cockroachdb/pebble/vfs" "github.com/stretchr/testify/require" ) -func TestCheckpoint(t *testing.T) { +func testCheckpointImpl(t *testing.T, ddFile string, createOnShared bool) { dbs := make(map[string]*DB) defer func() { for _, db := range dbs { @@ -32,6 +33,7 @@ func TestCheckpoint(t *testing.T) { mem := vfs.NewMem() var memLog base.InMemLogger + remoteMem := remote.NewInMem() opts := &Options{ FS: vfs.WithLogging(mem, memLog.Infof), FormatMajorVersion: internalFormatNewest, @@ -39,10 +41,16 @@ func TestCheckpoint(t *testing.T) { DisableAutomaticCompactions: true, Logger: testLogger{t}, } + opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": remoteMem, + }) + if createOnShared { + opts.Experimental.CreateOnShared = remote.CreateOnSharedAll + } opts.DisableTableStats = true opts.private.testingAlwaysWaitForCleanup = true - datadriven.RunTest(t, "testdata/checkpoint", func(t *testing.T, td *datadriven.TestData) string { + datadriven.RunTest(t, ddFile, func(t *testing.T, td *datadriven.TestData) string { switch td.Cmd { case "batch": if len(td.CmdArgs) != 1 { @@ -192,6 +200,12 @@ func TestCheckpoint(t *testing.T) { return err.Error() } dbs[dir] = d + if len(dbs) == 1 && createOnShared { + // This is the first db. Set a creator ID. + if err := d.SetCreatorID(1); err != nil { + return err.Error() + } + } return memLog.String() case "scan": @@ -216,6 +230,15 @@ func TestCheckpoint(t *testing.T) { }) } +func TestCheckpoint(t *testing.T) { + t.Run("shared=false", func(t *testing.T) { + testCheckpointImpl(t, "testdata/checkpoint", false /* createOnShared */) + }) + t.Run("shared=true", func(t *testing.T) { + testCheckpointImpl(t, "testdata/checkpoint_shared", true /* createOnShared */) + }) +} + func TestCheckpointCompaction(t *testing.T) { fs := vfs.NewMem() d, err := Open("", &Options{FS: fs, Logger: testLogger{t: t}}) diff --git a/objstorage/objstorage.go b/objstorage/objstorage.go index aad809cd6b..08019ca2d1 100644 --- a/objstorage/objstorage.go +++ b/objstorage/objstorage.go @@ -298,6 +298,11 @@ type Provider interface { // directory does not exist. IsNotExistError(err error) bool + // CheckpointState saves any saved state on local disk to the specified + // directory on the specified VFS. A new Pebble instance instantiated at that + // path should be able to resolve references to the specified files. + CheckpointState(fs vfs.FS, dir string, fileType base.FileType, fileNums []base.DiskFileNum) error + // Metrics returns metrics about objstorage. Currently, it only returns metrics // about the shared cache. Metrics() sharedcache.Metrics diff --git a/objstorage/objstorageprovider/provider.go b/objstorage/objstorageprovider/provider.go index bc42d85351..cee346df81 100644 --- a/objstorage/objstorageprovider/provider.go +++ b/objstorage/objstorageprovider/provider.go @@ -455,6 +455,30 @@ func (p *provider) Metrics() sharedcache.Metrics { return sharedcache.Metrics{} } +// CheckpointState is part of the objstorage.Provider interface. +func (p *provider) CheckpointState( + fs vfs.FS, dir string, fileType base.FileType, fileNums []base.DiskFileNum, +) error { + p.mu.Lock() + defer p.mu.Unlock() + for i := range fileNums { + if _, ok := p.mu.knownObjects[fileNums[i]]; !ok { + return errors.Wrapf( + os.ErrNotExist, + "file %s (type %d) unknown to the objstorage provider", + fileNums[i], errors.Safe(fileType), + ) + } + // Prevent this object from deletion, at least for the life of this instance. + p.mu.protectedObjects[fileNums[i]] = p.mu.protectedObjects[fileNums[i]] + 1 + } + + if p.remote.catalog != nil { + return p.remote.catalog.Checkpoint(fs, dir) + } + return nil +} + func (p *provider) addMetadata(meta objstorage.ObjectMetadata) { p.mu.Lock() defer p.mu.Unlock() diff --git a/objstorage/objstorageprovider/remoteobjcat/catalog.go b/objstorage/objstorageprovider/remoteobjcat/catalog.go index 4b0b676891..566fd39f88 100644 --- a/objstorage/objstorageprovider/remoteobjcat/catalog.go +++ b/objstorage/objstorageprovider/remoteobjcat/catalog.go @@ -8,6 +8,7 @@ import ( "cmp" "fmt" "io" + "path/filepath" "slices" "sync" @@ -373,6 +374,28 @@ func (c *Catalog) createNewCatalogFileLocked() (outErr error) { return nil } +// Checkpoint copies catalog state to a file in the specified directory +func (c *Catalog) Checkpoint(fs vfs.FS, dir string) error { + c.mu.Lock() + defer c.mu.Unlock() + + // NB: Every write to recWriter is flushed. We don't need to worry about + // this new file descriptor not getting all the saved catalog entries. + existingCatalogFilepath := filepath.Join(c.dirname, c.mu.catalogFilename) + destPath := filepath.Join(dir, c.mu.catalogFilename) + if err := vfs.CopyAcrossFS(c.fs, existingCatalogFilepath, fs, destPath); err != nil { + return err + } + catalogMarker, _, err := atomicfs.LocateMarker(fs, dir, catalogMarkerName) + if err != nil { + return err + } + if err := catalogMarker.Move(c.mu.catalogFilename); err != nil { + return err + } + return catalogMarker.Close() +} + func writeRecord(ve *VersionEdit, file vfs.File, recWriter *record.Writer) error { w, err := recWriter.Next() if err != nil { diff --git a/testdata/checkpoint b/testdata/checkpoint index 34ce03f7a9..15382e8288 100644 --- a/testdata/checkpoint +++ b/testdata/checkpoint @@ -17,6 +17,7 @@ close: db/marker.manifest.000001.MANIFEST-000001 sync: db open-dir: db open-dir: db +open-dir: db sync: db/MANIFEST-000001 create: db/000002.log sync: db @@ -281,6 +282,7 @@ open: checkpoints/checkpoint1/MANIFEST-000001 close: checkpoints/checkpoint1/MANIFEST-000001 open-dir: checkpoints/checkpoint1 open-dir: checkpoints/checkpoint1 +open-dir: checkpoints/checkpoint1 open: checkpoints/checkpoint1/OPTIONS-000003 close: checkpoints/checkpoint1/OPTIONS-000003 open: checkpoints/checkpoint1/000006.log @@ -347,6 +349,7 @@ open: checkpoints/checkpoint2/MANIFEST-000001 close: checkpoints/checkpoint2/MANIFEST-000001 open-dir: checkpoints/checkpoint2 open-dir: checkpoints/checkpoint2 +open-dir: checkpoints/checkpoint2 open: checkpoints/checkpoint2/OPTIONS-000003 close: checkpoints/checkpoint2/OPTIONS-000003 open: checkpoints/checkpoint2/000006.log @@ -388,6 +391,7 @@ open: checkpoints/checkpoint3/MANIFEST-000001 close: checkpoints/checkpoint3/MANIFEST-000001 open-dir: checkpoints/checkpoint3 open-dir: checkpoints/checkpoint3 +open-dir: checkpoints/checkpoint3 open: checkpoints/checkpoint3/OPTIONS-000003 close: checkpoints/checkpoint3/OPTIONS-000003 open: checkpoints/checkpoint3/000006.log @@ -525,6 +529,7 @@ open: checkpoints/checkpoint4/MANIFEST-000001 close: checkpoints/checkpoint4/MANIFEST-000001 open-dir: checkpoints/checkpoint4 open-dir: checkpoints/checkpoint4 +open-dir: checkpoints/checkpoint4 open: checkpoints/checkpoint4/OPTIONS-000003 close: checkpoints/checkpoint4/OPTIONS-000003 open: checkpoints/checkpoint4/000008.log @@ -635,6 +640,7 @@ open: checkpoints/checkpoint5/MANIFEST-000001 close: checkpoints/checkpoint5/MANIFEST-000001 open-dir: checkpoints/checkpoint5 open-dir: checkpoints/checkpoint5 +open-dir: checkpoints/checkpoint5 open: checkpoints/checkpoint5/OPTIONS-000003 close: checkpoints/checkpoint5/OPTIONS-000003 open: checkpoints/checkpoint5/000008.log @@ -731,6 +737,7 @@ open: checkpoints/checkpoint6/MANIFEST-000001 close: checkpoints/checkpoint6/MANIFEST-000001 open-dir: checkpoints/checkpoint6 open-dir: checkpoints/checkpoint6 +open-dir: checkpoints/checkpoint6 open: checkpoints/checkpoint6/OPTIONS-000003 close: checkpoints/checkpoint6/OPTIONS-000003 open: checkpoints/checkpoint6/000008.log diff --git a/testdata/checkpoint_shared b/testdata/checkpoint_shared new file mode 100644 index 0000000000..605fe2bd35 --- /dev/null +++ b/testdata/checkpoint_shared @@ -0,0 +1,333 @@ +open db +---- +mkdir-all: db 0755 +open-dir: +sync: +close: +open-dir: db +close: db +open-dir: db +lock: db/LOCK +open-dir: db +open-dir: db +create: db/MANIFEST-000001 +sync: db/MANIFEST-000001 +create: db/marker.manifest.000001.MANIFEST-000001 +close: db/marker.manifest.000001.MANIFEST-000001 +sync: db +open-dir: db +open-dir: db +open-dir: db +sync: db/MANIFEST-000001 +create: db/000002.log +sync: db +create: db/marker.format-version.000001.017 +close: db/marker.format-version.000001.017 +sync: db +create: db/temporary.000003.dbtmp +sync: db/temporary.000003.dbtmp +close: db/temporary.000003.dbtmp +rename: db/temporary.000003.dbtmp -> db/OPTIONS-000003 +sync: db +create: db/REMOTE-OBJ-CATALOG-000001 +sync: db/REMOTE-OBJ-CATALOG-000001 +create: db/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +close: db/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync: db +sync: db/REMOTE-OBJ-CATALOG-000001 + +batch db +set a 1 +set b 2 +set c 3 +---- +sync-data: db/000002.log + +flush db +---- +sync-data: db/000002.log +close: db/000002.log +create: db/000004.log +sync: db +sync: db/REMOTE-OBJ-CATALOG-000001 +sync: db/MANIFEST-000001 + +batch db +set b 5 +set d 7 +set e 8 +---- +sync-data: db/000004.log + +flush db +---- +sync-data: db/000004.log +close: db/000004.log +reuseForWrite: db/000002.log -> db/000006.log +sync: db +sync: db/REMOTE-OBJ-CATALOG-000001 +sync: db/MANIFEST-000001 + +batch db +set f 9 +set g 10 +---- +sync-data: db/000006.log + +checkpoint db checkpoints/checkpoint1 +---- +mkdir-all: checkpoints/checkpoint1 0755 +open-dir: checkpoints +sync: checkpoints +close: checkpoints +open-dir: +sync: +close: +open-dir: checkpoints/checkpoint1 +link: db/OPTIONS-000003 -> checkpoints/checkpoint1/OPTIONS-000003 +open-dir: checkpoints/checkpoint1 +create: checkpoints/checkpoint1/marker.format-version.000001.017 +sync-data: checkpoints/checkpoint1/marker.format-version.000001.017 +close: checkpoints/checkpoint1/marker.format-version.000001.017 +sync: checkpoints/checkpoint1 +close: checkpoints/checkpoint1 +open: db/MANIFEST-000001 +create: checkpoints/checkpoint1/MANIFEST-000001 +sync-data: checkpoints/checkpoint1/MANIFEST-000001 +close: checkpoints/checkpoint1/MANIFEST-000001 +close: db/MANIFEST-000001 +open-dir: checkpoints/checkpoint1 +create: checkpoints/checkpoint1/marker.manifest.000001.MANIFEST-000001 +sync-data: checkpoints/checkpoint1/marker.manifest.000001.MANIFEST-000001 +close: checkpoints/checkpoint1/marker.manifest.000001.MANIFEST-000001 +sync: checkpoints/checkpoint1 +close: checkpoints/checkpoint1 +open: db/REMOTE-OBJ-CATALOG-000001 +create: checkpoints/checkpoint1/REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint1/REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint1/REMOTE-OBJ-CATALOG-000001 +close: db/REMOTE-OBJ-CATALOG-000001 +open-dir: checkpoints/checkpoint1 +create: checkpoints/checkpoint1/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint1/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint1/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync: checkpoints/checkpoint1 +close: checkpoints/checkpoint1 +open: db/000006.log +create: checkpoints/checkpoint1/000006.log +sync-data: checkpoints/checkpoint1/000006.log +close: checkpoints/checkpoint1/000006.log +close: db/000006.log +sync: checkpoints/checkpoint1 +close: checkpoints/checkpoint1 + +checkpoint db checkpoints/checkpoint1 +---- +checkpoint checkpoints/checkpoint1: file already exists + +# Create a checkpoint that omits SSTs that don't overlap with the [d - f) range. +checkpoint db checkpoints/checkpoint2 restrict=(d-f) +---- +mkdir-all: checkpoints/checkpoint2 0755 +open-dir: checkpoints +sync: checkpoints +close: checkpoints +open-dir: checkpoints/checkpoint2 +link: db/OPTIONS-000003 -> checkpoints/checkpoint2/OPTIONS-000003 +open-dir: checkpoints/checkpoint2 +create: checkpoints/checkpoint2/marker.format-version.000001.017 +sync-data: checkpoints/checkpoint2/marker.format-version.000001.017 +close: checkpoints/checkpoint2/marker.format-version.000001.017 +sync: checkpoints/checkpoint2 +close: checkpoints/checkpoint2 +open: db/MANIFEST-000001 +create: checkpoints/checkpoint2/MANIFEST-000001 +sync-data: checkpoints/checkpoint2/MANIFEST-000001 +close: checkpoints/checkpoint2/MANIFEST-000001 +close: db/MANIFEST-000001 +open-dir: checkpoints/checkpoint2 +create: checkpoints/checkpoint2/marker.manifest.000001.MANIFEST-000001 +sync-data: checkpoints/checkpoint2/marker.manifest.000001.MANIFEST-000001 +close: checkpoints/checkpoint2/marker.manifest.000001.MANIFEST-000001 +sync: checkpoints/checkpoint2 +close: checkpoints/checkpoint2 +open: db/REMOTE-OBJ-CATALOG-000001 +create: checkpoints/checkpoint2/REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint2/REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint2/REMOTE-OBJ-CATALOG-000001 +close: db/REMOTE-OBJ-CATALOG-000001 +open-dir: checkpoints/checkpoint2 +create: checkpoints/checkpoint2/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint2/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint2/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync: checkpoints/checkpoint2 +close: checkpoints/checkpoint2 +open: db/000006.log +create: checkpoints/checkpoint2/000006.log +sync-data: checkpoints/checkpoint2/000006.log +close: checkpoints/checkpoint2/000006.log +close: db/000006.log +sync: checkpoints/checkpoint2 +close: checkpoints/checkpoint2 + +# Create a checkpoint that omits SSTs that don't overlap with [a - e) and [d - f). +checkpoint db checkpoints/checkpoint3 restrict=(a-e, d-f) +---- +mkdir-all: checkpoints/checkpoint3 0755 +open-dir: checkpoints +sync: checkpoints +close: checkpoints +open-dir: checkpoints/checkpoint3 +link: db/OPTIONS-000003 -> checkpoints/checkpoint3/OPTIONS-000003 +open-dir: checkpoints/checkpoint3 +create: checkpoints/checkpoint3/marker.format-version.000001.017 +sync-data: checkpoints/checkpoint3/marker.format-version.000001.017 +close: checkpoints/checkpoint3/marker.format-version.000001.017 +sync: checkpoints/checkpoint3 +close: checkpoints/checkpoint3 +open: db/MANIFEST-000001 +create: checkpoints/checkpoint3/MANIFEST-000001 +sync-data: checkpoints/checkpoint3/MANIFEST-000001 +close: checkpoints/checkpoint3/MANIFEST-000001 +close: db/MANIFEST-000001 +open-dir: checkpoints/checkpoint3 +create: checkpoints/checkpoint3/marker.manifest.000001.MANIFEST-000001 +sync-data: checkpoints/checkpoint3/marker.manifest.000001.MANIFEST-000001 +close: checkpoints/checkpoint3/marker.manifest.000001.MANIFEST-000001 +sync: checkpoints/checkpoint3 +close: checkpoints/checkpoint3 +open: db/REMOTE-OBJ-CATALOG-000001 +create: checkpoints/checkpoint3/REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint3/REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint3/REMOTE-OBJ-CATALOG-000001 +close: db/REMOTE-OBJ-CATALOG-000001 +open-dir: checkpoints/checkpoint3 +create: checkpoints/checkpoint3/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync-data: checkpoints/checkpoint3/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint3/marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 +sync: checkpoints/checkpoint3 +close: checkpoints/checkpoint3 +open: db/000006.log +create: checkpoints/checkpoint3/000006.log +sync-data: checkpoints/checkpoint3/000006.log +close: checkpoints/checkpoint3/000006.log +close: db/000006.log +sync: checkpoints/checkpoint3 +close: checkpoints/checkpoint3 + +compact db +---- +sync-data: db/000006.log +close: db/000006.log +reuseForWrite: db/000004.log -> db/000008.log +sync: db +sync: db/REMOTE-OBJ-CATALOG-000001 +sync: db/MANIFEST-000001 +sync: db/REMOTE-OBJ-CATALOG-000001 +sync: db/MANIFEST-000001 + +batch db +set h 11 +---- +sync-data: db/000008.log + +list db +---- +000006.log +000008.log +LOCK +MANIFEST-000001 +OPTIONS-000003 +REMOTE-OBJ-CATALOG-000001 +marker.format-version.000001.017 +marker.manifest.000001.MANIFEST-000001 +marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 + +list checkpoints/checkpoint1 +---- +000006.log +MANIFEST-000001 +OPTIONS-000003 +REMOTE-OBJ-CATALOG-000001 +marker.format-version.000001.017 +marker.manifest.000001.MANIFEST-000001 +marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 + +open checkpoints/checkpoint1 readonly +---- +open-dir: checkpoints/checkpoint1 +lock: checkpoints/checkpoint1/LOCK +open-dir: checkpoints/checkpoint1 +open-dir: checkpoints/checkpoint1 +open: checkpoints/checkpoint1/MANIFEST-000001 +close: checkpoints/checkpoint1/MANIFEST-000001 +open-dir: checkpoints/checkpoint1 +open-dir: checkpoints/checkpoint1 +open-dir: checkpoints/checkpoint1 +open: checkpoints/checkpoint1/REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint1/REMOTE-OBJ-CATALOG-000001 +open: checkpoints/checkpoint1/OPTIONS-000003 +close: checkpoints/checkpoint1/OPTIONS-000003 +open: checkpoints/checkpoint1/000006.log +close: checkpoints/checkpoint1/000006.log + +scan checkpoints/checkpoint1 +---- +a 1 +b 5 +c 3 +d 7 +e 8 +f 9 +g 10 +. + +scan db +---- +a 1 +b 5 +c 3 +d 7 +e 8 +f 9 +g 10 +h 11 +. + +# This checkpoint should contain no SSTs. +list checkpoints/checkpoint2 +---- +000006.log +MANIFEST-000001 +OPTIONS-000003 +REMOTE-OBJ-CATALOG-000001 +marker.format-version.000001.017 +marker.manifest.000001.MANIFEST-000001 +marker.remote-obj-catalog.000001.REMOTE-OBJ-CATALOG-000001 + +open checkpoints/checkpoint2 readonly +---- +open-dir: checkpoints/checkpoint2 +lock: checkpoints/checkpoint2/LOCK +open-dir: checkpoints/checkpoint2 +open-dir: checkpoints/checkpoint2 +open: checkpoints/checkpoint2/MANIFEST-000001 +close: checkpoints/checkpoint2/MANIFEST-000001 +open-dir: checkpoints/checkpoint2 +open-dir: checkpoints/checkpoint2 +open-dir: checkpoints/checkpoint2 +open: checkpoints/checkpoint2/REMOTE-OBJ-CATALOG-000001 +close: checkpoints/checkpoint2/REMOTE-OBJ-CATALOG-000001 +open: checkpoints/checkpoint2/OPTIONS-000003 +close: checkpoints/checkpoint2/OPTIONS-000003 +open: checkpoints/checkpoint2/000006.log +close: checkpoints/checkpoint2/000006.log + +scan checkpoints/checkpoint2 +---- +b 5 +d 7 +e 8 +f 9 +g 10 +.