From c109764a96e7983522fdad2298414c8839c9454c Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Mon, 18 Mar 2024 08:57:06 +0000 Subject: [PATCH] ingest: use StartKey/EndKey pair rather than bounds in ExternalFile Bounds is defined as exclusive, so it is semantically confusing to use it here. --- data_test.go | 8 ++++---- ingest.go | 42 +++++++++++++++++++++++------------------- metamorphic/ops.go | 10 ++++++---- scan_internal.go | 21 ++++++++++----------- scan_internal_test.go | 20 +++++++++----------- 5 files changed, 52 insertions(+), 49 deletions(-) diff --git a/data_test.go b/data_test.go index 4a4161ab18..256d80f9cc 100644 --- a/data_test.go +++ b/data_test.go @@ -1321,8 +1321,8 @@ func runIngestExternalCmd( switch arg.Key { case "bounds": nArgs(2) - ef.Bounds.Start = []byte(arg.Vals[0]) - ef.Bounds.End = []byte(arg.Vals[1]) + ef.StartKey = []byte(arg.Vals[0]) + ef.EndKey = []byte(arg.Vals[1]) case "bounds-are-inclusive": nArgs(1) b, err := strconv.ParseBool(arg.Vals[0]) @@ -1330,7 +1330,7 @@ func runIngestExternalCmd( usageErr(fmt.Sprintf("%s should have boolean argument: %v", arg.Key, err)) } - ef.BoundsHasInclusiveEndKey = b + ef.EndKeyIsInclusive = b case "size": nArgs(1) arg.Scan(t, 0, &ef.Size) @@ -1347,7 +1347,7 @@ func runIngestExternalCmd( usageErr(fmt.Sprintf("unknown argument %v", arg.Key)) } } - if ef.Bounds.Start == nil { + if ef.StartKey == nil { usageErr("no bounds specified") } diff --git a/ingest.go b/ingest.go index 891565cf86..5e47550957 100644 --- a/ingest.go +++ b/ingest.go @@ -182,17 +182,17 @@ func ingestLoad1External( return nil, errors.New("pebble: cannot ingest external file with no point or range keys") } - if opts.Comparer.Compare(e.Bounds.Start, e.Bounds.End) > 0 { - return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.Bounds.Start, e.Bounds.End) + if opts.Comparer.Compare(e.StartKey, e.EndKey) > 0 { + return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.StartKey, e.EndKey) } - if opts.Comparer.Compare(e.Bounds.Start, e.Bounds.End) == 0 && !e.BoundsHasInclusiveEndKey { - return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.Bounds.Start, e.Bounds.End) + if opts.Comparer.Compare(e.StartKey, e.EndKey) == 0 && !e.EndKeyIsInclusive { + return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.StartKey, e.EndKey) } - if n := opts.Comparer.Split(e.Bounds.Start); n != len(e.Bounds.Start) { - return nil, errors.Newf("pebble: external file bounds start key %q has suffix", e.Bounds.Start) + if n := opts.Comparer.Split(e.StartKey); n != len(e.StartKey) { + return nil, errors.Newf("pebble: external file bounds start key %q has suffix", e.StartKey) } - if n := opts.Comparer.Split(e.Bounds.End); n != len(e.Bounds.End) { - return nil, errors.Newf("pebble: external file bounds end key %q has suffix", e.Bounds.End) + if n := opts.Comparer.Split(e.EndKey); n != len(e.EndKey) { + return nil, errors.Newf("pebble: external file bounds end key %q has suffix", e.EndKey) } // #3287: range keys don't yet work correctly when the range key bounds are not tight. @@ -219,10 +219,13 @@ func ingestLoad1External( // In the name of keeping this ingestion as fast as possible, we avoid // *all* existence checks and synthesize a file metadata with smallest/largest // keys that overlap whatever the passed-in span was. - smallestCopy := slices.Clone(e.Bounds.Start) - largestCopy := slices.Clone(e.Bounds.End) + smallestCopy := slices.Clone(e.StartKey) + largestCopy := slices.Clone(e.EndKey) if e.HasPointKey { - if e.BoundsHasInclusiveEndKey { + // Sequence numbers are updated later by + // ingestUpdateSeqNum, applying a squence number that + // is applied to all keys in the sstable. + if e.EndKeyIsInclusive { meta.ExtendPointKeyBounds( opts.Comparer.Compare, base.MakeInternalKey(smallestCopy, 0, InternalKeyKindMax), @@ -1143,19 +1146,20 @@ type ExternalFile struct { // is acceptable in lieu of the backing file size. Size uint64 - // Bounds of the sstable; the ingestion of this file will only result in keys - // within [Bounds.Start, Bounds.End). These bounds are loose i.e. it's - // possible for keys to not span the entirety of this range. + // StartKey and EndKey define the bounds of the sstable; the ingestion + // of this file will only result in keys within [StartKey, EndKey) if + // EndKeyIsInclusive is false or [StartKey, EndKey] if it is true. + // These bounds are loose i.e. it's possible for keys to not span the + // entirety of this range. // - // The Bounds.Start/End user keys must not have suffixes. + // StartKey and EndKey user keys must not have suffixes. // // Multiple ExternalFiles in one ingestion must all have non-overlapping // bounds. - Bounds KeyRange + StartKey, EndKey []byte - // BoundsHasInclusiveEndKey is true if Bounds.End should be - // treated as inclusive. - BoundsHasInclusiveEndKey bool + // EndKeyIsInclusive is true if EndKey should be treated as inclusive. + EndKeyIsInclusive bool // HasPointKey and HasRangeKey denote whether this file contains point keys // or range keys. If both structs are false, an error is returned during diff --git a/metamorphic/ops.go b/metamorphic/ops.go index e1136dc648..542d6731ab 100644 --- a/metamorphic/ops.go +++ b/metamorphic/ops.go @@ -980,10 +980,12 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) { for i, obj := range objs { meta := t.getExternalObj(obj.externalObjID) external[i] = pebble.ExternalFile{ - Locator: "external", - ObjName: externalObjName(obj.externalObjID), - Size: meta.sstMeta.Size, - Bounds: obj.bounds, + Locator: "external", + ObjName: externalObjName(obj.externalObjID), + Size: meta.sstMeta.Size, + StartKey: obj.bounds.Start, + EndKey: obj.bounds.End, + EndKeyIsInclusive: false, // Note: if the table has point/range keys, we don't know for sure whether // this particular range has any, but that's acceptable. HasPointKey: meta.sstMeta.HasPointKeys || meta.sstMeta.HasRangeDelKeys, diff --git a/scan_internal.go b/scan_internal.go index 23de989bc6..ce47e08a93 100644 --- a/scan_internal.go +++ b/scan_internal.go @@ -467,7 +467,6 @@ func (d *DB) truncateExternalFile( Level: uint8(level), ObjName: objMeta.Remote.CustomObjectName, Locator: objMeta.Remote.Locator, - Bounds: KeyRange{}, HasPointKey: file.HasPointKeys, HasRangeKey: file.HasRangeKeys, Size: file.Size, @@ -479,27 +478,27 @@ func (d *DB) truncateExternalFile( needsLowerTruncate := cmp(lower, file.Smallest.UserKey) > 0 if needsLowerTruncate { - sst.Bounds.Start = slices.Clone(lower) + sst.StartKey = slices.Clone(lower) } else { - sst.Bounds.Start = slices.Clone(file.Smallest.UserKey) + sst.StartKey = slices.Clone(file.Smallest.UserKey) } cmpUpper := cmp(upper, file.Largest.UserKey) needsUpperTruncate := cmpUpper < 0 if needsUpperTruncate { - sst.Bounds.End = slices.Clone(upper) - sst.BoundsHasInclusiveEndKey = false + sst.EndKey = slices.Clone(upper) + sst.EndKeyIsInclusive = false } else { - sst.Bounds.End = slices.Clone(file.Largest.UserKey) - sst.BoundsHasInclusiveEndKey = !file.Largest.IsExclusiveSentinel() + sst.EndKey = slices.Clone(file.Largest.UserKey) + sst.EndKeyIsInclusive = !file.Largest.IsExclusiveSentinel() } - if cmp(sst.Bounds.Start, sst.Bounds.End) > 0 { - return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.Bounds.Start, sst.Bounds.End) + if cmp(sst.StartKey, sst.EndKey) > 0 { + return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.StartKey, sst.EndKey) } - if cmp(sst.Bounds.Start, sst.Bounds.End) == 0 && !sst.BoundsHasInclusiveEndKey { - return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.Bounds.Start, sst.Bounds.End) + if cmp(sst.StartKey, sst.EndKey) == 0 && !sst.EndKeyIsInclusive { + return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.StartKey, sst.EndKey) } return sst, nil diff --git a/scan_internal_test.go b/scan_internal_test.go index 2a6ab8fe5c..6ef4c56d39 100644 --- a/scan_internal_test.go +++ b/scan_internal_test.go @@ -475,15 +475,13 @@ func TestScanInternal(t *testing.T) { writeSST(points, rangeDels, rangeKeys, objstorageprovider.NewRemoteWritable(file)) ef := ExternalFile{ - ObjName: objName, - Locator: remote.Locator("external-storage"), - Size: 10, - Bounds: KeyRange{ - Start: smallest.UserKey, - End: largest.UserKey, - }, - BoundsHasInclusiveEndKey: true, - HasPointKey: true, + ObjName: objName, + Locator: remote.Locator("external-storage"), + Size: 10, + StartKey: smallest.UserKey, + EndKey: largest.UserKey, + EndKeyIsInclusive: true, + HasPointKey: true, } _, err = d.IngestExternalFiles([]ExternalFile{ef}) require.NoError(t, err) @@ -554,8 +552,8 @@ func TestScanInternal(t *testing.T) { externalFileVisitor = func(sst *ExternalFile) error { fmt.Fprintf(&b, "external file: %s %s [0x%s-0x%s] (hasPoint: %v, hasRange: %v)\n", sst.Locator, sst.ObjName, - hex.EncodeToString(sst.Bounds.Start), - hex.EncodeToString(sst.Bounds.End), + hex.EncodeToString(sst.StartKey), + hex.EncodeToString(sst.EndKey), sst.HasPointKey, sst.HasRangeKey) return nil }