Skip to content

Commit

Permalink
metamorphic: add testing for synthetic suffix
Browse files Browse the repository at this point in the history
Add a random synthetic suffix for external ingestion.

One difficulty is that the synthetic suffix has to be larger than all
existing suffixes. We can't easily determine this at generation time,
so we generate a random suffix and at runtime determine if it's ok to
use or not.
  • Loading branch information
RaduBerinde committed Feb 15, 2024
1 parent d062cb8 commit de72781
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 37 deletions.
4 changes: 4 additions & 0 deletions internal/testkeys/testkeys.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ var Comparer = &base.Comparer{
Name: "pebble.internal.testkeys",
}

// The comparator is similar to the one in Cockroach; when the prefixes are
// equal:
// - if one key has a suffix and one doesn't the one without one is smaller;
// - otherwise, the key with the larger (decoded) suffix value is smaller.
func compare(a, b []byte) int {
ai, bi := split(a), split(b)
if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
Expand Down
72 changes: 53 additions & 19 deletions metamorphic/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func writeSSTForIngestion(
pointIter base.InternalIterator,
rangeDelIter keyspan.FragmentIterator,
rangeKeyIter keyspan.FragmentIterator,
syntheticSuffix sstable.SyntheticSuffix,
writable objstorage.Writable,
targetFMV pebble.FormatMajorVersion,
) (*sstable.WriterMetadata, error) {
Expand All @@ -47,6 +48,17 @@ func writeSSTForIngestion(
rangeKeyIterCloser := base.CloseHelper(rangeKeyIter)
defer rangeKeyIterCloser.Close()

outputKey := func(key []byte) []byte {
return slices.Clone(key)
}

if len(syntheticSuffix) > 0 {
outputKey = func(key []byte) []byte {
n := t.opts.Comparer.Split(key)
return append(key[:n:n], syntheticSuffix...)
}
}

var lastUserKey []byte
for key, value := pointIter.First(); key != nil; key, value = pointIter.Next() {
// Ignore duplicate keys.
Expand All @@ -67,7 +79,9 @@ func writeSSTForIngestion(
if err != nil {
return nil, err
}
if err := w.Add(*key, valBytes); err != nil {
k := *key
k.UserKey = outputKey(k.UserKey)
if err := w.Add(k, valBytes); err != nil {
return nil, err
}
}
Expand All @@ -78,7 +92,7 @@ func writeSSTForIngestion(
if rangeDelIter != nil {
span, err := rangeDelIter.First()
for ; span != nil; span, err = rangeDelIter.Next() {
if err := w.DeleteRange(slices.Clone(span.Start), slices.Clone(span.End)); err != nil {
if err := w.DeleteRange(outputKey(span.Start), outputKey(span.End)); err != nil {
return nil, err
}
}
Expand All @@ -103,8 +117,8 @@ func writeSSTForIngestion(
// same sequence number is nonsensical, so we "coalesce" or collapse
// the keys.
collapsed := keyspan.Span{
Start: slices.Clone(span.Start),
End: slices.Clone(span.End),
Start: outputKey(span.Start),
End: outputKey(span.End),
Keys: make([]keyspan.Key, 0, len(span.Keys)),
}
rangekey.Coalesce(
Expand Down Expand Up @@ -150,6 +164,7 @@ func buildForIngest(
meta, err := writeSSTForIngestion(
t,
iter, rangeDelIter, rangeKeyIter,
nil, /* syntheticSuffix */
writable,
db.FormatMajorVersion(),
)
Expand All @@ -160,7 +175,12 @@ func buildForIngest(
// external object (truncated to the given bounds) and returns its path and
// metadata.
func buildForIngestExternalEmulation(
t *Test, dbID objID, externalObjID objID, bounds pebble.KeyRange, i int,
t *Test,
dbID objID,
externalObjID objID,
bounds pebble.KeyRange,
syntheticSuffix sstable.SyntheticSuffix,
i int,
) (path string, _ *sstable.WriterMetadata) {
path = t.opts.FS.PathJoin(t.tmpDir, fmt.Sprintf("ext%d-%d", dbID.slot(), i))
f, err := t.opts.FS.Create(path)
Expand All @@ -173,6 +193,7 @@ func buildForIngestExternalEmulation(
meta, err := writeSSTForIngestion(
t,
pointIter, rangeDelIter, rangeKeyIter,
syntheticSuffix,
writable,
t.minFMV(),
)
Expand Down Expand Up @@ -225,28 +246,41 @@ func openExternalObj(
return reader, pointIter, rangeDelIter, rangeKeyIter
}

// externalObjIsEmpty returns true if the given external object has no point or
// range keys withing the given bounds.
func externalObjIsEmpty(t *Test, externalObjID objID, bounds pebble.KeyRange) bool {
// analyzeExternalObj looks at all keys and spans for the given external object
// (within the given bounds) and returns whether there are any keys and the max
// suffix.
func analyzeExternalObj(
t *Test, externalObjID objID, bounds pebble.KeyRange,
) (isEmpty bool, maxSuffix []byte) {
reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds)
defer reader.Close()
defer closeIters(pointIter, rangeDelIter, rangeKeyIter)

key, _ := pointIter.First()
panicIfErr(pointIter.Error())
if key != nil {
return false
isEmpty = true
analyzeKey := func(key []byte) {
isEmpty = false
n := t.opts.Comparer.Split(key)
if suffix := key[n:]; len(suffix) > 0 && t.opts.Comparer.Compare(maxSuffix, suffix) < 0 {
maxSuffix = slices.Clone(suffix)
}
}

for key, _ := pointIter.First(); key != nil; key, _ = pointIter.Next() {
analyzeKey(key.UserKey)
}
panicIfErr(pointIter.Error())
for _, it := range []keyspan.FragmentIterator{rangeDelIter, rangeKeyIter} {
if it != nil {
span, err := it.First()
panicIfErr(err)
if span != nil {
return false
}
if it == nil {
continue
}
span, err := it.First()
for ; span != nil; span, err = it.Next() {
analyzeKey(span.Start)
analyzeKey(span.End)
}
panicIfErr(err)
}
return true
return isEmpty, maxSuffix
}

func panicIfErr(err error) {
Expand Down
5 changes: 5 additions & 0 deletions metamorphic/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,11 @@ func (g *generator) writerIngestExternalFiles() {
}
o.bounds.End = keys[2*i+1]
}
for i := range objs {
if g.rng.Intn(2) == 0 {
objs[i].syntheticSuffix = g.keyGenerator.UniformSuffix()
}
}
// The batches we're ingesting may contain single delete tombstones that
// when applied to the writer result in nondeterminism in the deleted key.
// If that's the case, we can restore determinism by first deleting the keys
Expand Down
51 changes: 33 additions & 18 deletions metamorphic/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -926,21 +926,48 @@ type ingestExternalFilesOp struct {
type externalObjWithBounds struct {
externalObjID objID
bounds pebble.KeyRange
// We will only apply the synthetic suffix if all the suffixes within the
// bounds are smaller.
syntheticSuffix sstable.SyntheticSuffix
}

func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
db := t.getDB(o.dbID)

// We modify objs to eliminate empty objects and clear invalid synthetic suffixes.
var objs []externalObjWithBounds
for _, obj := range o.objs {
// Make sure the object exists and is not empty.
objMeta := t.getExternalObj(obj.externalObjID)
if !objMeta.HasPointKeys && !objMeta.HasRangeKeys && !objMeta.HasRangeDelKeys {
continue
}
isEmpty, maxSuffix := analyzeExternalObj(t, obj.externalObjID, obj.bounds)
if isEmpty {
// Currently we don't support ingesting external objects that have no point
// or range keys in the given range. Filter out any such objects.
// TODO(radu): even though we don't expect this case in practice, eventually
// we want to make sure that it doesn't cause failures.
continue
}
if obj.syntheticSuffix != nil && t.opts.Comparer.Compare(maxSuffix, obj.syntheticSuffix) > 0 {
// The synthetic suffix cannot be applied, as there are larger suffixes in the range.
obj.syntheticSuffix = nil
}
objs = append(objs, obj)
}

var err error
if !t.testOpts.externalStorageEnabled {
// Emulate the operation by crating local, truncated SST files and ingesting
// them.
var paths []string
for i, obj := range o.objs {
for i, obj := range objs {
// Make sure the object exists and is not empty.
if objMeta := t.getExternalObj(obj.externalObjID); !objMeta.HasPointKeys && !objMeta.HasRangeKeys && !objMeta.HasRangeDelKeys {
continue
}
path, meta := buildForIngestExternalEmulation(t, o.dbID, obj.externalObjID, obj.bounds, i)
path, meta := buildForIngestExternalEmulation(t, o.dbID, obj.externalObjID, obj.bounds, obj.syntheticSuffix, i)
if meta.HasPointKeys || meta.HasRangeKeys || meta.HasRangeDelKeys {
paths = append(paths, path)
}
Expand All @@ -949,20 +976,6 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
err = db.Ingest(paths)
}
} else {
// Currently we don't support ingesting external objects that have no point
// or range keys in the given range. Filter out any such objects.
// TODO(radu): even though we don't expect this case in practice, eventually
// we want to make sure that it doesn't cause failures.
var objs []externalObjWithBounds
for _, obj := range o.objs {
objMeta := t.getExternalObj(obj.externalObjID)
if objMeta.HasPointKeys || objMeta.HasRangeKeys || objMeta.HasRangeDelKeys {
if !externalObjIsEmpty(t, obj.externalObjID, obj.bounds) {
objs = append(objs, obj)
}
}
}

external := make([]pebble.ExternalFile, len(objs))
for i, obj := range objs {
meta := t.getExternalObj(obj.externalObjID)
Expand All @@ -974,8 +987,9 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
LargestUserKey: obj.bounds.End,
// Note: if the table has point/range keys, we don't know for sure whether
// this particular range has any, but that's acceptable.
HasPointKey: meta.HasPointKeys || meta.HasRangeDelKeys,
HasRangeKey: meta.HasRangeKeys,
HasPointKey: meta.HasPointKeys || meta.HasRangeDelKeys,
HasRangeKey: meta.HasRangeKeys,
SyntheticSuffix: obj.syntheticSuffix,
// TODO(radu): test prefix replacement.
}
}
Expand Down Expand Up @@ -1761,6 +1775,7 @@ func (o *newExternalObjOp) run(t *Test, h historyRecorder) {
meta, err := writeSSTForIngestion(
t,
iter, rangeDelIter, rangeKeyIter,
nil, /* syntheticSuffix */
writable,
t.minFMV(),
)
Expand Down

0 comments on commit de72781

Please sign in to comment.