Skip to content

Commit

Permalink
metamorphic: track object key bounds
Browse files Browse the repository at this point in the history
This commit adjusts operation generation to track the smallest and largest keys
in all objects. This is used to determine at generation time whether an
ingestion should succeed or fail. Only if the ingestion will succeed is key
manager state updated as if the operation succeeded.

This increases the probability of multi-batch ingestions and ingestions
involving deletion operations through removing the old canTolerateApplyFailure
mechanic that avoided ingesting some batches as a multi-batch ingestion if they
contained any delete operations.
  • Loading branch information
jbowens committed Dec 14, 2023
1 parent 288bf0f commit ab4952c
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 69 deletions.
74 changes: 29 additions & 45 deletions metamorphic/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1427,58 +1427,42 @@ func (g *generator) writerIngest() {
return
}

// TODO(nicktrav): this is resulting in too many single batch ingests.
// Consider alternatives. One possibility would be to pass through whether
// we can tolerate failure or not, and if the ingestOp encounters a
// failure, it would retry after splitting into single batch ingests.

dbID := g.dbs.rand(g.rng)
// Ingest between 1 and 3 batches.
batchIDs := make([]objID, 0, 1+g.rng.Intn(3))
canFail := cap(batchIDs) > 1
for i := 0; i < cap(batchIDs); i++ {
batchID := g.liveBatches.rand(g.rng)
if canFail && !g.keyManager.canTolerateApplyFailure(batchID) {
continue
}
// After the ingest runs, it either succeeds and the keys are in the
// DB, or it fails and these keys never make it to the DB.
g.removeBatchFromGenerator(batchID)
batchIDs = append(batchIDs, batchID)
if len(g.liveBatches) == 0 {
break
}
}
if len(batchIDs) == 0 && len(g.liveBatches) > 0 {
// Unable to find multiple batches because of the
// canTolerateApplyFailure call above, so just pick one batch.
dbID := g.dbs.rand(g.rng)
n := min(1+g.rng.Intn(3), len(g.liveBatches))
batchIDs := make([]objID, n)
derivedDBIDs := make([]objID, n)
for i := 0; i < n; i++ {
batchID := g.liveBatches.rand(g.rng)
batchIDs[i] = batchID
derivedDBIDs[i] = g.objDB[batchIDs[i]]
g.removeBatchFromGenerator(batchID)
batchIDs = append(batchIDs, batchID)
}

// The batches we're ingesting may contain single delete tombstones that
// when applied to the writer result in nondeterminism in the deleted key.
// If that's the case, we can restore determinism by first deleting the keys
// from the writer.
//
// Generating additional operations here is not ideal, but it simplifies
// single delete invariants significantly.
for _, batchID := range batchIDs {
singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
for _, conflict := range singleDeleteConflicts {
g.add(&deleteOp{
writerID: dbID,
key: conflict,
derivedDBID: dbID,
})
// Ingestions may fail if the ingested sstables overlap one another.
// Either it succeeds and its keys are committed to the DB, or it fails and
// the keys are not committed.
if !g.keyManager.doObjectBoundsOverlap(batchIDs) {
// This ingestion will succeed.
//
// The batches we're ingesting may contain single delete tombstones that
// when applied to the writer result in nondeterminism in the deleted key.
// If that's the case, we can restore determinism by first deleting the keys
// from the writer.
//
// Generating additional operations here is not ideal, but it simplifies
// single delete invariants significantly.
for _, batchID := range batchIDs {
singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
for _, conflict := range singleDeleteConflicts {
g.add(&deleteOp{
writerID: dbID,
key: conflict,
derivedDBID: dbID,
})
}
}
}

derivedDBIDs := make([]objID, len(batchIDs))
for i := range batchIDs {
derivedDBIDs[i] = g.objDB[batchIDs[i]]
}
g.add(&ingestOp{
dbID: dbID,
batchIDs: batchIDs,
Expand Down
131 changes: 107 additions & 24 deletions metamorphic/key_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,43 @@ func (m *keyMeta) mergeInto(dst *keyMeta, ts int) {
}
}

type bounds struct {
smallest []byte
largest []byte
largestExcl bool // is largest exclusive?
}

func (b *bounds) String() string {
if b.largestExcl {
return fmt.Sprintf("[%q,%q)", b.smallest, b.largest)
}
return fmt.Sprintf("[%q,%q]", b.smallest, b.largest)
}

// overlaps returns true iff the bounds intersect.
func (b *bounds) overlaps(cmp base.Compare, other *bounds) bool {
// Is b strictly before other?
if v := cmp(b.largest, other.smallest); v < 0 || (v == 0 && b.largestExcl) {
return false
}
// Is b strictly after other?
if v := cmp(b.smallest, other.largest); v > 0 || (v == 0 && other.largestExcl) {
return false
}
return true
}

// mergeInto merges the receiver bounds into other, mutating other.
func (b bounds) mergeInto(cmp base.Compare, other *bounds) {
if cmp(other.smallest, b.smallest) > 0 {
other.smallest = b.smallest
}
if v := cmp(other.largest, b.largest); v < 0 || (v == 0 && other.largestExcl) {
other.largest = b.largest
other.largestExcl = b.largestExcl
}
}

// keyManager tracks the write operations performed on keys in the generation
// phase of the metamorphic test. It maintains histories of operations performed
// against every unique user key on every writer object. These histories inform
Expand Down Expand Up @@ -154,6 +191,10 @@ type keyManager struct {
// List of keys per writer, and what has happened to it in that writer.
// Will be transferred when needed.
byObj map[objID][]*keyMeta
// boundsByObj holds user key bounds encompassing all the keys set within an
// object. It's updated within `update` when a new op is generated. It's
// used when determining whether an ingestion should succeed or not.
boundsByObj map[objID]*bounds

// globalKeys represents all the keys that have been generated so far. Not
// all these keys have been written to. globalKeys is sorted.
Expand All @@ -176,13 +217,13 @@ func (k *keyManager) nextMetaTimestamp() int {
}

// newKeyManager returns a pointer to a new keyManager. Callers should
// interact with this using addNewKey, knownKeys, update,
// canTolerateApplyFailure methods only.
// interact with this using addNewKey, knownKeys, update methods only.
func newKeyManager(numInstances int) *keyManager {
m := &keyManager{
comparer: testkeys.Comparer,
byObjKey: make(map[string]*keyMeta),
byObj: make(map[objID][]*keyMeta),
boundsByObj: make(map[objID]*bounds),
globalKeysMap: make(map[string]bool),
globalKeyPrefixesMap: make(map[string]struct{}),
}
Expand Down Expand Up @@ -222,6 +263,12 @@ func (k *keyManager) getOrInit(id objID, key []byte) *keyMeta {
k.byObjKey[o.String()] = m
// Add to the id-to-metas slide.
k.byObj[o.id] = append(k.byObj[o.id], m)

// Expand the object's bounds to contain this key if they don't already.
k.expandBounds(id, bounds{
smallest: key,
largest: key,
})
return m
}

Expand Down Expand Up @@ -278,8 +325,48 @@ func (k *keyManager) mergeKeysInto(from, to objID, mergeFunc func(src, dst *keyM
iTo++
}

k.byObj[to] = msNew // Update "to".
delete(k.byObj, from) // Unlink "from".
// All the keys in `from` have been merged into `to`. Expand `to`'s bounds
// to be at least as wide as `from`'s.
if fromBounds := k.boundsByObj[from]; fromBounds != nil {
k.expandBounds(to, *fromBounds)
}
k.byObj[to] = msNew // Update "to" obj.
delete(k.byObj, from) // Unlink "from" obj.
delete(k.boundsByObj, from) // Unlink "from" bounds.
}

// expandBounds expands the incrementally maintained bounds of o to be at least
// as wide as `b`.
func (k *keyManager) expandBounds(o objID, b bounds) {
existing, ok := k.boundsByObj[o]
if !ok {
existing = new(bounds)
*existing = b
k.boundsByObj[o] = existing
return
}
b.mergeInto(k.comparer.Compare, existing)
}

// doObjectBoundsOverlap returns true iff any of the named objects have key
// bounds that overlap any other named object.
func (k *keyManager) doObjectBoundsOverlap(objIDs []objID) bool {
for i := range objIDs {
ib, iok := k.boundsByObj[objIDs[i]]
if !iok {
continue
}
for j := i + 1; j < len(objIDs); j++ {
jb, jok := k.boundsByObj[objIDs[j]]
if !jok {
continue
}
if ib.overlaps(k.comparer.Compare, jb) {
return true
}
}
}
return false
}

// checkForSingleDelConflicts examines all the keys written to srcObj, and
Expand Down Expand Up @@ -440,13 +527,29 @@ func (k *keyManager) update(o op) {
})
}
}
k.expandBounds(s.writerID, bounds{
smallest: s.start,
largest: s.end,
largestExcl: true,
})
case *singleDeleteOp:
meta := k.getOrInit(s.writerID, s.key)
meta.history = append(meta.history, keyHistoryItem{
opType: writerSingleDelete,
metaTimestamp: k.nextMetaTimestamp(),
})

case *ingestOp:
// Some ingestion operations may attempt to ingest overlapping sstables
// which is prohibited. We know at generation time whether these
// ingestions will be successful. If they won't be successful, we should
// not update the key state because both the batch(es) and target DB
// will be left unmodified.
if k.doObjectBoundsOverlap(s.batchIDs) {
// This ingestion will fail.
return
}

// For each batch, merge the keys into the DB. We can't call
// keyMeta.mergeInto directly to merge, because ingest operations first
// "flatten" the batch (because you can't set the same key twice at a
Expand Down Expand Up @@ -520,26 +623,6 @@ func (k *keyManager) eligibleSingleDeleteKeys(o objID) (keys [][]byte) {
return keys
}

// canTolerateApplyFailure is called with a batch ID and returns true iff a
// failure to apply this batch to the DB can be tolerated.
func (k *keyManager) canTolerateApplyFailure(id objID) bool {
if id.tag() != batchTag {
panic("called with an objID that is not a batch")
}
ms, ok := k.byObj[id]
if !ok {
return true
}
for _, m := range ms {
for i := len(m.history) - 1; i >= 0; i-- {
if m.history[i].opType.isDelete() {
return false
}
}
}
return true
}

// a keyHistoryItem describes an individual operation performed on a key.
type keyHistoryItem struct {
// opType may be writerSet, writerDelete, writerSingleDelete,
Expand Down
5 changes: 5 additions & 0 deletions metamorphic/key_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ func TestKeyManager(t *testing.T) {
} else {
fmt.Fprintf(&buf, "%q already tracked\n", fields[1])
}
case "bounds":
for i := 1; i < len(fields); i++ {
objID := mustParseObjID(fields[1])
fmt.Fprintf(&buf, "%s: %s\n", objID, km.boundsByObj[objID])
}
case "keys":
fmt.Fprintf(&buf, "keys: ")
printKeys(&buf, km.globalKeys)
Expand Down
40 changes: 40 additions & 0 deletions metamorphic/testdata/key_manager
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ op db1.Set("foo", "foo")
keys
singledel-keys db1
singledel-keys batch1
bounds db1
op db1.SingleDelete("foo", false)
keys
singledel-keys db1
Expand All @@ -33,6 +34,7 @@ can singledel on batch1: "foo"
keys: "foo"
can singledel on db1: "foo"
can singledel on batch1: "foo"
db1: ["foo","foo"]
[db1.SingleDelete("foo", false /* maybeReplaceDelete */)]
keys: "foo"
can singledel on db1: "foo"
Expand Down Expand Up @@ -302,6 +304,7 @@ op db1.Ingest(batch1)
singledel-keys db1
op db1.Set("foo", "foo")
singledel-keys db1
bounds db1
----
"foo" is new
"bar" is new
Expand All @@ -314,6 +317,43 @@ conflicts merging batch1 (collapsed) into db1: (none)
can singledel on db1: "bar", "foo"
[db1.Set("foo", "foo")]
can singledel on db1: "bar"
db1: ["a","z")

# Repeat the above test, but this time with an ingestion that should fail due to
# overlapping key ranges.

run
add-new-key foo
add-new-key bar
op db1.Set("foo", "foo")
singledel-keys db1
op batch1.Set("foo", "foo")
bounds batch1
op batch1.DeleteRange("a", "z")
bounds batch1
op batch2.DeleteRange("y", "z")
conflicts collapsed batch1 db1
op db1.Ingest(batch1, batch2)
singledel-keys db1
op db1.Set("foo", "foo")
singledel-keys db1
bounds db1
----
"foo" already tracked
"bar" already tracked
[db1.Set("foo", "foo")]
can singledel on db1: "bar"
[batch1.Set("foo", "foo")]
batch1: ["foo","foo"]
[batch1.DeleteRange("a", "z")]
batch1: ["a","z")
[batch2.DeleteRange("y", "z")]
conflicts merging batch1 (collapsed) into db1: (none)
[db1.Ingest(batch1, batch2)]
can singledel on db1: "bar"
[db1.Set("foo", "foo")]
can singledel on db1: "bar"
db1: ["a","z")

# Since ingestion flattens keys, foo should be single-deletable on the db after
# ingest, even though it couldn't be single deleted from the batch before
Expand Down

0 comments on commit ab4952c

Please sign in to comment.