diff --git a/internal/testkeys/testkeys.go b/internal/testkeys/testkeys.go index ab2f5d1dee7..3e6264703cc 100644 --- a/internal/testkeys/testkeys.go +++ b/internal/testkeys/testkeys.go @@ -102,6 +102,10 @@ var Comparer = &base.Comparer{ Name: "pebble.internal.testkeys", } +// The comparator is similar to the one in Cockroach; when the prefixes are +// equal: +// - if one key has a suffix and one doesn't the one without one is smaller; +// - otherwise, the key with the larger (decoded) suffix value is smaller. func compare(a, b []byte) int { ai, bi := split(a), split(b) if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { diff --git a/metamorphic/build.go b/metamorphic/build.go index a33c02e63fa..d57b57b6ea3 100644 --- a/metamorphic/build.go +++ b/metamorphic/build.go @@ -32,6 +32,7 @@ func writeSSTForIngestion( pointIter base.InternalIterator, rangeDelIter keyspan.FragmentIterator, rangeKeyIter keyspan.FragmentIterator, + syntheticSuffix sstable.SyntheticSuffix, writable objstorage.Writable, targetFMV pebble.FormatMajorVersion, ) (*sstable.WriterMetadata, error) { @@ -47,6 +48,17 @@ func writeSSTForIngestion( rangeKeyIterCloser := base.CloseHelper(rangeKeyIter) defer rangeKeyIterCloser.Close() + outputKey := func(key []byte) []byte { + return slices.Clone(key) + } + + if len(syntheticSuffix) > 0 { + outputKey = func(key []byte) []byte { + n := t.opts.Comparer.Split(key) + return append(key[:n:n], syntheticSuffix...) + } + } + var lastUserKey []byte for key, value := pointIter.First(); key != nil; key, value = pointIter.Next() { // Ignore duplicate keys. @@ -67,7 +79,9 @@ func writeSSTForIngestion( if err != nil { return nil, err } - if err := w.Add(*key, valBytes); err != nil { + k := *key + k.UserKey = outputKey(k.UserKey) + if err := w.Add(k, valBytes); err != nil { return nil, err } } @@ -78,7 +92,7 @@ func writeSSTForIngestion( if rangeDelIter != nil { span, err := rangeDelIter.First() for ; span != nil; span, err = rangeDelIter.Next() { - if err := w.DeleteRange(slices.Clone(span.Start), slices.Clone(span.End)); err != nil { + if err := w.DeleteRange(outputKey(span.Start), outputKey(span.End)); err != nil { return nil, err } } @@ -103,8 +117,8 @@ func writeSSTForIngestion( // same sequence number is nonsensical, so we "coalesce" or collapse // the keys. collapsed := keyspan.Span{ - Start: slices.Clone(span.Start), - End: slices.Clone(span.End), + Start: outputKey(span.Start), + End: outputKey(span.End), Keys: make([]keyspan.Key, 0, len(span.Keys)), } rangekey.Coalesce( @@ -150,6 +164,7 @@ func buildForIngest( meta, err := writeSSTForIngestion( t, iter, rangeDelIter, rangeKeyIter, + nil, /* syntheticSuffix */ writable, db.FormatMajorVersion(), ) @@ -160,7 +175,12 @@ func buildForIngest( // external object (truncated to the given bounds) and returns its path and // metadata. func buildForIngestExternalEmulation( - t *Test, dbID objID, externalObjID objID, bounds pebble.KeyRange, i int, + t *Test, + dbID objID, + externalObjID objID, + bounds pebble.KeyRange, + syntheticSuffix sstable.SyntheticSuffix, + i int, ) (path string, _ *sstable.WriterMetadata) { path = t.opts.FS.PathJoin(t.tmpDir, fmt.Sprintf("ext%d-%d", dbID.slot(), i)) f, err := t.opts.FS.Create(path) @@ -173,6 +193,7 @@ func buildForIngestExternalEmulation( meta, err := writeSSTForIngestion( t, pointIter, rangeDelIter, rangeKeyIter, + syntheticSuffix, writable, t.minFMV(), ) @@ -225,28 +246,41 @@ func openExternalObj( return reader, pointIter, rangeDelIter, rangeKeyIter } -// externalObjIsEmpty returns true if the given external object has no point or -// range keys withing the given bounds. -func externalObjIsEmpty(t *Test, externalObjID objID, bounds pebble.KeyRange) bool { +// analyzeExternalObj looks at all keys and spans for the given external object +// (within the given bounds) and returns whether there are any keys and the max +// suffix. +func analyzeExternalObj( + t *Test, externalObjID objID, bounds pebble.KeyRange, +) (isEmpty bool, maxSuffix []byte) { reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds) defer reader.Close() defer closeIters(pointIter, rangeDelIter, rangeKeyIter) - key, _ := pointIter.First() - panicIfErr(pointIter.Error()) - if key != nil { - return false + isEmpty = true + analyzeKey := func(key []byte) { + isEmpty = false + n := t.opts.Comparer.Split(key) + if suffix := key[n:]; len(suffix) > 0 && t.opts.Comparer.Compare(maxSuffix, suffix) < 0 { + maxSuffix = slices.Clone(suffix) + } + } + + for key, _ := pointIter.First(); key != nil; key, _ = pointIter.Next() { + analyzeKey(key.UserKey) } + panicIfErr(pointIter.Error()) for _, it := range []keyspan.FragmentIterator{rangeDelIter, rangeKeyIter} { - if it != nil { - span, err := it.First() - panicIfErr(err) - if span != nil { - return false - } + if it == nil { + continue + } + span, err := it.First() + for ; span != nil; span, err = it.Next() { + analyzeKey(span.Start) + analyzeKey(span.End) } + panicIfErr(err) } - return true + return isEmpty, maxSuffix } func panicIfErr(err error) { diff --git a/metamorphic/generator.go b/metamorphic/generator.go index b9f00aecd38..82233c063b0 100644 --- a/metamorphic/generator.go +++ b/metamorphic/generator.go @@ -1338,6 +1338,11 @@ func (g *generator) writerIngestExternalFiles() { } o.bounds.End = keys[2*i+1] } + for i := range objs { + if g.rng.Intn(2) == 0 { + objs[i].syntheticSuffix = g.keyGenerator.UniformSuffix() + } + } // The batches we're ingesting may contain single delete tombstones that // when applied to the writer result in nondeterminism in the deleted key. // If that's the case, we can restore determinism by first deleting the keys diff --git a/metamorphic/ops.go b/metamorphic/ops.go index 00d25851617..bf58f0e4abb 100644 --- a/metamorphic/ops.go +++ b/metamorphic/ops.go @@ -926,21 +926,48 @@ type ingestExternalFilesOp struct { type externalObjWithBounds struct { externalObjID objID bounds pebble.KeyRange + // We will only apply the synthetic suffix if all the suffixes within the + // bounds are smaller. + syntheticSuffix sstable.SyntheticSuffix } func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) { db := t.getDB(o.dbID) + + // We modify objs to eliminate empty objects and clear invalid synthetic suffixes. + var objs []externalObjWithBounds + for _, obj := range o.objs { + // Make sure the object exists and is not empty. + objMeta := t.getExternalObj(obj.externalObjID) + if !objMeta.HasPointKeys && !objMeta.HasRangeKeys && !objMeta.HasRangeDelKeys { + continue + } + isEmpty, maxSuffix := analyzeExternalObj(t, obj.externalObjID, obj.bounds) + if isEmpty { + // Currently we don't support ingesting external objects that have no point + // or range keys in the given range. Filter out any such objects. + // TODO(radu): even though we don't expect this case in practice, eventually + // we want to make sure that it doesn't cause failures. + continue + } + if obj.syntheticSuffix != nil && t.opts.Comparer.Compare(maxSuffix, obj.syntheticSuffix) > 0 { + // The synthetic suffix cannot be applied, as there are larger suffixes in the range. + obj.syntheticSuffix = nil + } + objs = append(objs, obj) + } + var err error if !t.testOpts.externalStorageEnabled { // Emulate the operation by crating local, truncated SST files and ingesting // them. var paths []string - for i, obj := range o.objs { + for i, obj := range objs { // Make sure the object exists and is not empty. if objMeta := t.getExternalObj(obj.externalObjID); !objMeta.HasPointKeys && !objMeta.HasRangeKeys && !objMeta.HasRangeDelKeys { continue } - path, meta := buildForIngestExternalEmulation(t, o.dbID, obj.externalObjID, obj.bounds, i) + path, meta := buildForIngestExternalEmulation(t, o.dbID, obj.externalObjID, obj.bounds, obj.syntheticSuffix, i) if meta.HasPointKeys || meta.HasRangeKeys || meta.HasRangeDelKeys { paths = append(paths, path) } @@ -949,20 +976,6 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) { err = db.Ingest(paths) } } else { - // Currently we don't support ingesting external objects that have no point - // or range keys in the given range. Filter out any such objects. - // TODO(radu): even though we don't expect this case in practice, eventually - // we want to make sure that it doesn't cause failures. - var objs []externalObjWithBounds - for _, obj := range o.objs { - objMeta := t.getExternalObj(obj.externalObjID) - if objMeta.HasPointKeys || objMeta.HasRangeKeys || objMeta.HasRangeDelKeys { - if !externalObjIsEmpty(t, obj.externalObjID, obj.bounds) { - objs = append(objs, obj) - } - } - } - external := make([]pebble.ExternalFile, len(objs)) for i, obj := range objs { meta := t.getExternalObj(obj.externalObjID) @@ -974,8 +987,9 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) { LargestUserKey: obj.bounds.End, // Note: if the table has point/range keys, we don't know for sure whether // this particular range has any, but that's acceptable. - HasPointKey: meta.HasPointKeys || meta.HasRangeDelKeys, - HasRangeKey: meta.HasRangeKeys, + HasPointKey: meta.HasPointKeys || meta.HasRangeDelKeys, + HasRangeKey: meta.HasRangeKeys, + SyntheticSuffix: obj.syntheticSuffix, // TODO(radu): test prefix replacement. } } @@ -1761,6 +1775,7 @@ func (o *newExternalObjOp) run(t *Test, h historyRecorder) { meta, err := writeSSTForIngestion( t, iter, rangeDelIter, rangeKeyIter, + nil, /* syntheticSuffix */ writable, t.minFMV(), )