From 3a7021c5ad358d3b04a8701c2192abb745c8483d Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 18 Mar 2024 12:24:20 -0700 Subject: [PATCH] db: reuse backings for external ingestions When we ingest the same external file multiple times (either in separate ingestions or within the same ingestion), we now use the same backing. We use the recently added virtual backing protection mechanism to make sure that the backings don't disappear between the point where we decide to reuse and applying the version change. The metamorphic test provides good coverage for this. --- flushable_test.go | 2 +- ingest.go | 194 +++++++++++++----- ingest_test.go | 6 +- objstorage/objstorage.go | 4 + objstorage/objstorageprovider/provider.go | 14 +- .../objstorageprovider/provider_test.go | 4 +- objstorage/objstorageprovider/remote.go | 48 +++++ objstorage/remote/storage.go | 15 ++ testdata/ingest_external | 126 ++++++++++++ version_set.go | 1 + 10 files changed, 350 insertions(+), 64 deletions(-) diff --git a/flushable_test.go b/flushable_test.go index 8b9e68a1aa..91ed10f135 100644 --- a/flushable_test.go +++ b/flushable_test.go @@ -86,7 +86,7 @@ func TestIngestedSSTFlushableAPI(t *testing.T) { // (e.g. because the files reside on a different filesystem), ingestLink will // fall back to copying, and if that fails we undo our work and return an // error. - if err := ingestLink(jobID, d.opts, d.objProvider, lr); err != nil { + if err := ingestLinkLocal(jobID, d.opts, d.objProvider, lr.local); err != nil { panic("couldn't hard link sstables") } diff --git a/ingest.go b/ingest.go index 5e47550957..2f3af8c058 100644 --- a/ingest.go +++ b/ingest.go @@ -208,13 +208,6 @@ func ingestLoad1External( Virtual: true, Size: e.Size, } - // For simplicity, we use the same number for both the FileNum and the - // DiskFileNum (even though this is a virtual sstable). Pass the underlying - // FileBacking's size to the same size as the virtualized view of the sstable. - // This ensures that we don't over-prioritize this sstable for compaction just - // yet, as we do not have a clear sense of what parts of this sstable are - // referenced by other nodes. - meta.InitProviderBacking(base.DiskFileNum(fileNum), e.Size) // In the name of keeping this ingestion as fast as possible, we avoid // *all* existence checks and synthesize a file metadata with smallest/largest @@ -248,9 +241,6 @@ func ingestLoad1External( meta.SyntheticPrefix = e.SyntheticPrefix meta.SyntheticSuffix = e.SyntheticSuffix - if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil { - return nil, err - } return meta, nil } @@ -420,6 +410,11 @@ type ingestSharedMeta struct { type ingestExternalMeta struct { *fileMetadata external ExternalFile + // usedExistingBacking is true if the external file is reusing a backing + // that existed before this ingestion. In this case, we called + // VirtualBackings.Protect() on that backing; we will need to call + // Unprotect() after the ingestion. + usedExistingBacking bool } func (r *ingestLoadResult) fileCount() int { @@ -591,18 +586,18 @@ func ingestCleanup(objProvider objstorage.Provider, meta []ingestLocalMeta) erro return firstErr } -// ingestLink creates new objects which are backed by either hardlinks to or -// copies of the ingested files. It also attaches shared objects to the provider. -func ingestLink( - jobID int, opts *Options, objProvider objstorage.Provider, lr ingestLoadResult, +// ingestLinkLocal creates new objects which are backed by either hardlinks to or +// copies of the ingested files. +func ingestLinkLocal( + jobID int, opts *Options, objProvider objstorage.Provider, localMetas []ingestLocalMeta, ) error { - for i := range lr.local { + for i := range localMetas { objMeta, err := objProvider.LinkOrCopyFromLocal( - context.TODO(), opts.FS, lr.local[i].path, fileTypeTable, lr.local[i].FileBacking.DiskFileNum, + context.TODO(), opts.FS, localMetas[i].path, fileTypeTable, localMetas[i].FileBacking.DiskFileNum, objstorage.CreateOptions{PreferSharedStorage: true}, ) if err != nil { - if err2 := ingestCleanup(objProvider, lr.local[:i]); err2 != nil { + if err2 := ingestCleanup(objProvider, localMetas[:i]); err2 != nil { opts.Logger.Errorf("ingest cleanup failed: %v", err2) } return err @@ -612,10 +607,21 @@ func ingestLink( JobID: jobID, Reason: "ingesting", Path: objProvider.Path(objMeta), - FileNum: base.PhysicalTableDiskFileNum(lr.local[i].FileNum), + FileNum: base.PhysicalTableDiskFileNum(localMetas[i].FileNum), }) } } + return nil +} + +// ingestAttachRemote attaches remote objects to the storage provider. +// +// For external objects, we reuse existing FileBackings from the current version +// when possible. +// +// ingestUnprotectExternalBackings() must be called after this function (even in +// error cases). +func (d *DB) ingestAttachRemote(jobID int, lr ingestLoadResult) error { remoteObjs := make([]objstorage.RemoteObjectToAttach, 0, len(lr.shared)+len(lr.external)) for i := range lr.shared { backing, err := lr.shared[i].shared.Backing.Get() @@ -628,20 +634,52 @@ func ingestLink( Backing: backing, }) } + + d.findExistingBackingsForExternalObjects(lr.external) + + newFileBackings := make(map[remote.ObjectKey]*fileBacking, len(lr.external)) for i := range lr.external { - // Try to resolve a reference to the external file. - backing, err := objProvider.CreateExternalObjectBacking(lr.external[i].external.Locator, lr.external[i].external.ObjName) + meta := lr.external[i].fileMetadata + if meta.FileBacking != nil { + // The backing was filled in by findExistingBackingsForExternalObjects(). + continue + } + key := remote.MakeObjectKey(lr.external[i].external.Locator, lr.external[i].external.ObjName) + if backing, ok := newFileBackings[key]; ok { + // We already created the same backing in this loop. + meta.FileBacking = backing + continue + } + providerBacking, err := d.objProvider.CreateExternalObjectBacking(key.Locator, key.ObjectName) if err != nil { return err } + // We have to attach the remote object (and assign it a DiskFileNum). For + // simplicity, we use the same number for both the FileNum and the + // DiskFileNum (even though this is a virtual sstable). + meta.InitProviderBacking(base.DiskFileNum(meta.FileNum), lr.external[i].external.Size) + + // Set the underlying FileBacking's size to the same size as the virtualized + // view of the sstable. This ensures that we don't over-prioritize this + // sstable for compaction just yet, as we do not have a clear sense of + // what parts of this sstable are referenced by other nodes. + meta.FileBacking.Size = lr.external[i].external.Size + newFileBackings[key] = meta.FileBacking + remoteObjs = append(remoteObjs, objstorage.RemoteObjectToAttach{ - FileNum: lr.external[i].FileBacking.DiskFileNum, + FileNum: meta.FileBacking.DiskFileNum, FileType: fileTypeTable, - Backing: backing, + Backing: providerBacking, }) } - remoteObjMetas, err := objProvider.AttachRemoteObjects(remoteObjs) + for i := range lr.external { + if err := lr.external[i].Validate(d.opts.Comparer.Compare, d.opts.Comparer.FormatKey); err != nil { + return err + } + } + + remoteObjMetas, err := d.objProvider.AttachRemoteObjects(remoteObjs) if err != nil { return err } @@ -654,8 +692,8 @@ func ingestLink( // open the db again after a crash/restart (see checkConsistency in open.go), // plus it more accurately allows us to prioritize compactions of files // that were originally created by us. - if remoteObjMetas[i].IsShared() && !objProvider.IsSharedForeign(remoteObjMetas[i]) { - size, err := objProvider.Size(remoteObjMetas[i]) + if remoteObjMetas[i].IsShared() && !d.objProvider.IsSharedForeign(remoteObjMetas[i]) { + size, err := d.objProvider.Size(remoteObjMetas[i]) if err != nil { return err } @@ -663,12 +701,12 @@ func ingestLink( } } - if opts.EventListener.TableCreated != nil { + if d.opts.EventListener.TableCreated != nil { for i := range remoteObjMetas { - opts.EventListener.TableCreated(TableCreateInfo{ + d.opts.EventListener.TableCreated(TableCreateInfo{ JobID: jobID, Reason: "ingesting", - Path: objProvider.Path(remoteObjMetas[i]), + Path: d.objProvider.Path(remoteObjMetas[i]), FileNum: remoteObjMetas[i].DiskFileNum, }) } @@ -677,6 +715,49 @@ func ingestLink( return nil } +// findExistingBackingsForExternalObjects populates the FileBacking for external +// files which are already in use by the current version. +// +// We take a Ref and LatestRef on populated backings. +func (d *DB) findExistingBackingsForExternalObjects(metas []ingestExternalMeta) { + d.mu.Lock() + defer d.mu.Unlock() + + for i := range metas { + diskFileNums := d.objProvider.GetExternalObjects(metas[i].external.Locator, metas[i].external.ObjName) + // We cross-check against fileBackings in the current version because it is + // possible that the external object is referenced by an sstable which only + // exists in a previous version. In that case, that object could be removed + // at any time so we cannot reuse it. + for _, n := range diskFileNums { + if backing, ok := d.mu.versions.virtualBackings.Get(n); ok { + // Protect this backing from being removed from the latest version. We + // will unprotect in ingestUnprotectExternalBackings. + d.mu.versions.virtualBackings.Protect(n) + metas[i].usedExistingBacking = true + metas[i].FileBacking = backing + break + } + } + } +} + +// ingestUnprotectExternalBackings unprotects the file backings that were reused +// for external objects when the ingestion fails. +func (d *DB) ingestUnprotectExternalBackings(lr ingestLoadResult) { + d.mu.Lock() + defer d.mu.Unlock() + + for _, meta := range lr.external { + if meta.usedExistingBacking { + // If the backing is not use anywhere else and the ingest failed (or the + // ingested tables were already compacted away), this call will cause in + // the next version update to remove the backing. + d.mu.versions.virtualBackings.Unprotect(meta.FileBacking.DiskFileNum) + } + } +} + func setSeqNumInMetadata(m *fileMetadata, seqNum uint64, cmp Compare, format base.FormatKey) error { setSeqFn := func(k base.InternalKey) base.InternalKey { return base.MakeInternalKey(k.UserKey, seqNum, k.Kind()) @@ -1182,7 +1263,7 @@ type ExternalFile struct { // - the backing sst must not contain multiple keys with the same prefix. SyntheticSuffix []byte - // Level denotes the level at which this file was presetnt at read time + // Level denotes the level at which this file was present at read time // if the external file was returned by a scan of an existing Pebble // instance. If Level is 0, this field is ignored. Level uint8 @@ -1420,10 +1501,16 @@ func (d *DB) ingest( // Hard link the sstables into the DB directory. Since the sstables aren't // referenced by a version, they won't be used. If the hard linking fails - // (e.g. because the files reside on a different filesystem), ingestLink will - // fall back to copying, and if that fails we undo our work and return an + // (e.g. because the files reside on a different filesystem), ingestLinkLocal + // will fall back to copying, and if that fails we undo our work and return an // error. - if err := ingestLink(jobID, d.opts, d.objProvider, loadResult); err != nil { + if err := ingestLinkLocal(jobID, d.opts, d.objProvider, loadResult.local); err != nil { + return IngestOperationStats{}, err + } + + err = d.ingestAttachRemote(jobID, loadResult) + defer d.ingestUnprotectExternalBackings(loadResult) + if err != nil { return IngestOperationStats{}, err } @@ -2208,41 +2295,41 @@ func (d *DB) ingestApply( // Determine the lowest level in the LSM for which the sstable doesn't // overlap any existing files in the level. var m *fileMetadata - sharedIdx := -1 - externalIdx := -1 specifiedLevel := -1 - externalFile := false + isShared := false + isExternal := false if i < len(lr.local) { // local file. m = lr.local[i].fileMetadata } else if (i - len(lr.local)) < len(lr.shared) { // shared file. - sharedIdx = i - len(lr.local) + isShared = true + sharedIdx := i - len(lr.local) m = lr.shared[sharedIdx].fileMetadata specifiedLevel = int(lr.shared[sharedIdx].shared.Level) } else { // external file. - externalFile = true - externalIdx = i - (len(lr.local) + len(lr.shared)) + isExternal = true + externalIdx := i - (len(lr.local) + len(lr.shared)) m = lr.external[externalIdx].fileMetadata - specifiedLevel = int(lr.external[externalIdx].external.Level) + if lr.externalFilesHaveLevel { + specifiedLevel = int(lr.external[externalIdx].external.Level) + } + } + + // Add to CreatedBackingTables if this is a new backing. + // + // Shared files always have a new backing. External files have new backings + // iff the backing disk file num and the file num match (see ingestAttachRemote). + if isShared || (isExternal && m.FileBacking.DiskFileNum == base.DiskFileNum(m.FileNum)) { + ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) } + f := &ve.NewFiles[i] var err error - if sharedIdx >= 0 { - f.Level = specifiedLevel - if f.Level < sharedLevelsStart { - panic(fmt.Sprintf("cannot slot a shared file higher than the highest shared level: %d < %d", - f.Level, sharedLevelsStart)) - } - ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) - } else if externalFile && lr.externalFilesHaveLevel { + if specifiedLevel != -1 { f.Level = specifiedLevel - ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) } else { - if externalFile { - ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) - } var splitFile *fileMetadata if exciseSpan.Valid() && exciseSpan.Contains(d.cmp, m.Smallest) && exciseSpan.Contains(d.cmp, m.Largest) { // This file fits perfectly within the excise span. We can slot it at @@ -2287,6 +2374,10 @@ func (d *DB) ingestApply( d.mu.versions.logUnlock() return nil, err } + if isShared && f.Level < sharedLevelsStart { + panic(fmt.Sprintf("cannot slot a shared file higher than the highest shared level: %d < %d", + f.Level, sharedLevelsStart)) + } f.Meta = m levelMetrics := metrics[f.Level] if levelMetrics == nil { @@ -2401,6 +2492,7 @@ func (d *DB) ingestApply( if err := d.mu.versions.logAndApply(jobID, ve, metrics, false /* forceRotation */, func() []compactionInfo { return d.getInProgressCompactionInfoLocked(nil) }); err != nil { + // Note: any error during logAndApply is fatal; this won't be reachable in production. return nil, err } diff --git a/ingest_test.go b/ingest_test.go index b10adde7da..4aa654d8af 100644 --- a/ingest_test.go +++ b/ingest_test.go @@ -334,8 +334,7 @@ func TestIngestLink(t *testing.T) { opts.FS.Remove(meta[i].path) } - lr := ingestLoadResult{local: meta} - err = ingestLink(0 /* jobID */, opts, objProvider, lr) + err = ingestLinkLocal(0 /* jobID */, opts, objProvider, meta) if i < count { if err == nil { t.Fatalf("expected error, but found success") @@ -402,8 +401,7 @@ func TestIngestLinkFallback(t *testing.T) { meta := &fileMetadata{FileNum: 1} meta.InitPhysicalBacking() - lr := ingestLoadResult{local: []ingestLocalMeta{{fileMetadata: meta, path: "source"}}} - err = ingestLink(0, opts, objProvider, lr) + err = ingestLinkLocal(0, opts, objProvider, []ingestLocalMeta{{fileMetadata: meta, path: "source"}}) require.NoError(t, err) dest, err := mem.Open("000001.sst") diff --git a/objstorage/objstorage.go b/objstorage/objstorage.go index 9734e069f4..9a6312249b 100644 --- a/objstorage/objstorage.go +++ b/objstorage/objstorage.go @@ -286,6 +286,10 @@ type Provider interface { // Pebble and will never be removed by Pebble. CreateExternalObjectBacking(locator remote.Locator, objName string) (RemoteObjectBacking, error) + // GetExternalObjects returns a list of DiskFileNums corresponding to all + // objects that are backed by the given external object. + GetExternalObjects(locator remote.Locator, objName string) []base.DiskFileNum + // AttachRemoteObjects registers existing remote objects with this provider. // // The objects are not guaranteed to be durable (accessible in case of diff --git a/objstorage/objstorageprovider/provider.go b/objstorage/objstorageprovider/provider.go index cee346df81..0344e82f9d 100644 --- a/objstorage/objstorageprovider/provider.go +++ b/objstorage/objstorageprovider/provider.go @@ -37,13 +37,7 @@ type provider struct { mu struct { sync.RWMutex - remote struct { - // catalogBatch accumulates remote object creations and deletions until - // Sync is called. - catalogBatch remoteobjcat.Batch - - storageObjects map[remote.Locator]remote.Storage - } + remote remoteLockedState // localObjectsChanged is set if non-remote objects were created or deleted // but Sync was not yet called. @@ -500,6 +494,9 @@ func (p *provider) addMetadataLocked(meta objstorage.ObjectMetadata) { CleanupMethod: meta.Remote.CleanupMethod, CustomObjectName: meta.Remote.CustomObjectName, }) + if meta.IsExternal() { + p.mu.remote.addExternalObject(meta) + } } else { p.mu.localObjectsChanged = true } @@ -514,6 +511,9 @@ func (p *provider) removeMetadata(fileNum base.DiskFileNum) { return } delete(p.mu.knownObjects, fileNum) + if meta.IsExternal() { + p.mu.remote.removeExternalObject(meta) + } if meta.IsRemote() { p.mu.remote.catalogBatch.DeleteObject(fileNum) } else { diff --git a/objstorage/objstorageprovider/provider_test.go b/objstorage/objstorageprovider/provider_test.go index f269d8c748..4ccf96af89 100644 --- a/objstorage/objstorageprovider/provider_test.go +++ b/objstorage/objstorageprovider/provider_test.go @@ -384,7 +384,7 @@ func TestSharedMultipleLocators(t *testing.T) { require.NoError(t, p3.Close()) } -func TestAttachCustomObject(t *testing.T) { +func TestAttachExternalObject(t *testing.T) { ctx := context.Background() storage := remote.NewInMem() sharedFactory := remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ @@ -425,6 +425,8 @@ func TestAttachCustomObject(t *testing.T) { require.Equal(t, byte(123), checkData(t, 0, buf)) require.NoError(t, r.Close()) + require.Equal(t, []base.DiskFileNum{1}, p1.GetExternalObjects("foo", "some-obj-name")) + // Verify that we can extract a correct backing from this provider and attach // the object to another provider. meta, err := p1.Lookup(base.FileTypeTable, base.DiskFileNum(1)) diff --git a/objstorage/objstorageprovider/remote.go b/objstorage/objstorageprovider/remote.go index dd1c99c675..79dba5c212 100644 --- a/objstorage/objstorageprovider/remote.go +++ b/objstorage/objstorageprovider/remote.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "runtime" + "slices" "sync" "sync/atomic" @@ -46,6 +47,43 @@ type remoteSubsystem struct { } } +type remoteLockedState struct { + // catalogBatch accumulates remote object creations and deletions until + // Sync is called. + catalogBatch remoteobjcat.Batch + + storageObjects map[remote.Locator]remote.Storage + + externalObjects map[remote.ObjectKey][]base.DiskFileNum +} + +func (rs *remoteLockedState) addExternalObject(meta objstorage.ObjectMetadata) { + if rs.externalObjects == nil { + rs.externalObjects = make(map[remote.ObjectKey][]base.DiskFileNum) + } + key := remote.MakeObjectKey(meta.Remote.Locator, meta.Remote.CustomObjectName) + rs.externalObjects[key] = append(rs.externalObjects[key], meta.DiskFileNum) +} + +func (rs *remoteLockedState) removeExternalObject(meta objstorage.ObjectMetadata) { + key := remote.MakeObjectKey(meta.Remote.Locator, meta.Remote.CustomObjectName) + newSlice := slices.DeleteFunc(rs.externalObjects[key], func(n base.DiskFileNum) bool { + return n == meta.DiskFileNum + }) + if len(newSlice) == 0 { + delete(rs.externalObjects, key) + } else { + rs.externalObjects[key] = newSlice + } +} + +func (rs *remoteLockedState) getExternalObjects( + locator remote.Locator, objName string, +) []base.DiskFileNum { + key := remote.MakeObjectKey(locator, objName) + return rs.externalObjects[key] +} + // remoteInit initializes the remote object subsystem (if configured) and finds // any remote objects. func (p *provider) remoteInit() error { @@ -110,6 +148,9 @@ func (p *provider) remoteInit() error { o.AssertValid() } p.mu.knownObjects[o.DiskFileNum] = o + if o.IsExternal() { + p.mu.remote.addExternalObject(o) + } } return nil } @@ -375,3 +416,10 @@ func (p *provider) ensureStorage(locator remote.Locator) (remote.Storage, error) defer p.mu.Unlock() return p.ensureStorageLocked(locator) } + +// GetExternalObjects is part of the Provider interface. +func (p *provider) GetExternalObjects(locator remote.Locator, objName string) []base.DiskFileNum { + p.mu.Lock() + defer p.mu.Unlock() + return p.mu.remote.getExternalObjects(locator, objName) +} diff --git a/objstorage/remote/storage.go b/objstorage/remote/storage.go index 8918764413..5a2ee54cd4 100644 --- a/objstorage/remote/storage.go +++ b/objstorage/remote/storage.go @@ -131,3 +131,18 @@ type ObjectReader interface { Close() error } + +// ObjectKey is a (locator, object name) pair which uniquely identifies a remote +// object and can be used as a map key. +type ObjectKey struct { + Locator Locator + ObjectName string +} + +// MakeObjectKey is a convenience constructor for ObjectKey. +func MakeObjectKey(locator Locator, objectName string) ObjectKey { + return ObjectKey{ + Locator: locator, + ObjectName: objectName, + } +} diff --git a/testdata/ingest_external b/testdata/ingest_external index 93059467b8..1fffbb7eb9 100644 --- a/testdata/ingest_external +++ b/testdata/ingest_external @@ -654,3 +654,129 @@ next a: (foo, .) b: (bar, .) c: (baz, .) + +# Test reuse of external backings. +reset +---- + +build-remote reuse1 +set ax ax +set ay ay +set da da +set db db +set dc dc +set uv uv +---- + +build-remote reuse2 +set f f +set h h +set j j +---- + +# Test reuse of backings within a single ingestion. We should see only two +# backings. +ingest-external +reuse1 bounds=(a,b) +reuse2 bounds=(f,g) +reuse2 bounds=(h,i) +reuse1 bounds=(d,e) +reuse1 bounds=(u,v) +reuse2 bounds=(j,k) +---- + +lsm +---- +L6: + 000004(000004):[a#10,DELSIZED-b#inf,RANGEDEL] + 000007(000004):[d#11,DELSIZED-e#inf,RANGEDEL] + 000005(000005):[f#12,DELSIZED-g#inf,RANGEDEL] + 000006(000005):[h#13,DELSIZED-i#inf,RANGEDEL] + 000009(000005):[j#14,DELSIZED-k#inf,RANGEDEL] + 000008(000004):[u#15,DELSIZED-v#inf,RANGEDEL] + +# Test reuse of backings across separate requests. +ingest-external +reuse1 bounds=(xu,xv) synthetic-prefix=x +reuse2 bounds=(yj,yk) synthetic-prefix=y +---- + +lsm +---- +L6: + 000004(000004):[a#10,DELSIZED-b#inf,RANGEDEL] + 000007(000004):[d#11,DELSIZED-e#inf,RANGEDEL] + 000005(000005):[f#12,DELSIZED-g#inf,RANGEDEL] + 000006(000005):[h#13,DELSIZED-i#inf,RANGEDEL] + 000009(000005):[j#14,DELSIZED-k#inf,RANGEDEL] + 000008(000004):[u#15,DELSIZED-v#inf,RANGEDEL] + 000010(000004):[xu#16,DELSIZED-xv#inf,RANGEDEL] + 000011(000005):[yj#17,DELSIZED-yk#inf,RANGEDEL] + +batch +del-range a z +---- + +compact a z +---- + +ingest-external +reuse1 bounds=(a,b) +reuse2 bounds=(f,g) +---- + +lsm +---- +L6: + 000014(000014):[a#19,DELSIZED-b#inf,RANGEDEL] + 000015(000015):[f#20,DELSIZED-g#inf,RANGEDEL] + +# Multiple reuse of existing backings in one request. +ingest-external +reuse2 bounds=(h,i) +reuse1 bounds=(d,e) +reuse1 bounds=(u,v) +reuse2 bounds=(j,k) +reuse1 bounds=(xu,xv) synthetic-prefix=x +reuse2 bounds=(yj,yk) synthetic-prefix=y +---- + +lsm +---- +L6: + 000014(000014):[a#19,DELSIZED-b#inf,RANGEDEL] + 000017(000014):[d#21,DELSIZED-e#inf,RANGEDEL] + 000015(000015):[f#20,DELSIZED-g#inf,RANGEDEL] + 000016(000015):[h#22,DELSIZED-i#inf,RANGEDEL] + 000019(000015):[j#23,DELSIZED-k#inf,RANGEDEL] + 000018(000014):[u#24,DELSIZED-v#inf,RANGEDEL] + 000020(000014):[xu#25,DELSIZED-xv#inf,RANGEDEL] + 000021(000015):[yj#26,DELSIZED-yk#inf,RANGEDEL] + +# Test that reusing the same backing region with different prefix and suffix +# works as expected. In particular, make sure the synthetic suffix doesn't +# modify cached block data, leading to the next iterator seeing the suffix. + +reset +---- + +build-remote reuse +set a@10 a +set b@11 b +---- + +ingest-external +reuse bounds=(a,c) synthetic-suffix=@20 +reuse bounds=(za,zc) synthetic-prefix=z +---- + +iter +first +next +next +next +---- +a@20: (a, .) +b@20: (b, .) +za@10: (a, .) +zb@11: (b, .) \ No newline at end of file diff --git a/version_set.go b/version_set.go index d6a371f23e..9aa1121ffd 100644 --- a/version_set.go +++ b/version_set.go @@ -385,6 +385,7 @@ func (vs *versionSet) logLock() { // Wait for any existing writing to the manifest to complete, then mark the // manifest as busy. for vs.writing { + // Note: writerCond.L is DB.mu, so we unlock it while we wait. vs.writerCond.Wait() } vs.writing = true