From ca6709562e16e5f70bbbba0f8ccefdef8d3e48dd Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Thu, 7 Mar 2024 03:05:36 +0000 Subject: [PATCH] scan_internal: don't visit empty external files When truncating external files to our scan bounds, we may find that the file has Start == End. Other validation in ingest asserts these are invalid bounds. --- ingest.go | 17 ------------- scan_internal.go | 52 +++++++++++++++++++++++++++++----------- testdata/ingest_external | 16 +++++++++++++ 3 files changed, 54 insertions(+), 31 deletions(-) diff --git a/ingest.go b/ingest.go index 2347d89e24..f4fe59f166 100644 --- a/ingest.go +++ b/ingest.go @@ -1179,23 +1179,6 @@ type ExternalFile struct { Level uint8 } -func (e *ExternalFile) cloneFromFileMeta(f *fileMetadata) { - *e = ExternalFile{ - Bounds: KeyRange{ - Start: append([]byte(nil), f.Smallest.UserKey...), - End: append([]byte(nil), f.Largest.UserKey...), - }, - HasPointKey: f.HasPointKeys, - HasRangeKey: f.HasRangeKeys, - Size: f.Size, - } - e.SyntheticSuffix = append([]byte(nil), f.SyntheticSuffix...) - if pr := f.PrefixReplacement; pr != nil { - e.ContentPrefix = append([]byte(nil), pr.ContentPrefix...) - e.SyntheticPrefix = append([]byte(nil), pr.SyntheticPrefix...) - } -} - // IngestWithStats does the same as Ingest, and additionally returns // IngestOperationStats. func (d *DB) IngestWithStats(paths []string) (IngestOperationStats, error) { diff --git a/scan_internal.go b/scan_internal.go index e646dbe478..06f2d47780 100644 --- a/scan_internal.go +++ b/scan_internal.go @@ -7,6 +7,7 @@ package pebble import ( "context" "fmt" + "slices" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/base" @@ -460,27 +461,43 @@ func (d *DB) truncateExternalFile( level int, file *fileMetadata, objMeta objstorage.ObjectMetadata, -) *ExternalFile { +) (*ExternalFile, error) { cmp := d.cmp - sst := &ExternalFile{} - sst.cloneFromFileMeta(file) - sst.Level = uint8(level) - sst.ObjName = objMeta.Remote.CustomObjectName - sst.Locator = objMeta.Remote.Locator - needsLowerTruncate := cmp(lower, file.Smallest.UserKey) > 0 - needsUpperTruncate := cmp(upper, file.Largest.UserKey) < 0 || (cmp(upper, file.Largest.UserKey) == 0 && !file.Largest.IsExclusiveSentinel()) + sst := &ExternalFile{ + Level: uint8(level), + ObjName: objMeta.Remote.CustomObjectName, + Locator: objMeta.Remote.Locator, + Bounds: KeyRange{}, + HasPointKey: file.HasPointKeys, + HasRangeKey: file.HasRangeKeys, + Size: file.Size, + SyntheticSuffix: slices.Clone(file.SyntheticSuffix), + } + if pr := file.PrefixReplacement; pr != nil { + sst.ContentPrefix = slices.Clone(pr.ContentPrefix) + sst.SyntheticPrefix = slices.Clone(pr.SyntheticPrefix) + } + needsLowerTruncate := cmp(lower, file.Smallest.UserKey) > 0 if needsLowerTruncate { - sst.Bounds.Start = sst.Bounds.Start[:0] - sst.Bounds.Start = append(sst.Bounds.Start, lower...) + sst.Bounds.Start = slices.Clone(lower) + } else { + sst.Bounds.Start = slices.Clone(file.Smallest.UserKey) } + cmpUpper := cmp(upper, file.Largest.UserKey) + needsUpperTruncate := cmpUpper < 0 || (cmpUpper == 0 && !file.Largest.IsExclusiveSentinel()) if needsUpperTruncate { - sst.Bounds.End = sst.Bounds.End[:0] - sst.Bounds.End = append(sst.Bounds.End, upper...) + sst.Bounds.End = slices.Clone(upper) + } else { + sst.Bounds.End = slices.Clone(file.Largest.UserKey) } - return sst + if cmp(sst.Bounds.Start, sst.Bounds.End) >= 0 { + return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.Bounds.Start, sst.Bounds.End) + } + + return sst, nil } // truncateSharedFile truncates a shared file's [Smallest, Largest] fields to @@ -693,6 +710,10 @@ func scanInternalImpl( for level := firstLevelWithRemote; level < numLevels; level++ { files := current.Levels[level].Iter() for f := files.SeekGE(cmp, lower); f != nil && cmp(f.Smallest.UserKey, upper) < 0; f = files.Next() { + if cmp(lower, f.Largest.UserKey) == 0 && f.Largest.IsExclusiveSentinel() { + continue + } + var objMeta objstorage.ObjectMetadata var err error objMeta, err = provider.Lookup(fileTypeTable, f.FileBacking.DiskFileNum) @@ -737,7 +758,10 @@ func scanInternalImpl( return err } } else if objMeta.IsExternal() { - sst := iter.db.truncateExternalFile(ctx, lower, upper, level, f, objMeta) + sst, err := iter.db.truncateExternalFile(ctx, lower, upper, level, f, objMeta) + if err != nil { + return err + } if err := opts.visitExternalFile(sst); err != nil { return err } diff --git a/testdata/ingest_external b/testdata/ingest_external index 59e7fae0cd..119f4caab5 100644 --- a/testdata/ingest_external +++ b/testdata/ingest_external @@ -695,3 +695,19 @@ a: (d2-v1, .) b: (d1-v1, .) d: (d1-v1, .) e: (d2-v1, .) + +reset +---- + +build-remote trunctest +set a foo +set b bar +---- + +ingest-external +trunctest bounds=(a,c) +---- + +replicate 1 2 c z +---- +replicated 0 external SSTs