Skip to content

Commit

Permalink
storage: CheckSSTConflict fix for nexting over overlapping points
Browse files Browse the repository at this point in the history
Previously, the nexting logic around both iterators
being at a range key and not a point key was flawed
in that we'd miss ext points that were in between
the current and next sst keys, when we'd next both
of them. This change addresses that.

It also addresses other miscellaneous corner cases around
stats calculations with overlapping sst/engine range keys
and point keys. All these bugs were found with the upcoming
CheckSSTConflicts randomized test in cockroachdb#98408.

Epic: none

Release note: None
  • Loading branch information
itsbilal committed Mar 21, 2023
1 parent 679d8e7 commit 05cd4bc
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 72 deletions.
47 changes: 46 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestEvalAddSSTable(t *testing.T) {
noShadowBelow int64 // DisallowShadowingBelow
requireReqTS bool // AddSSTableRequireAtRequestTimestamp
expect kvs
expectAny bool
expectErr interface{} // error type, substring, substring slice, or true (any)
expectErrRace interface{}
expectStatsEst bool // expect MVCCStats.ContainsEstimates, don't check stats
Expand Down Expand Up @@ -1040,6 +1041,48 @@ func TestEvalAddSSTable(t *testing.T) {
sst: kvs{rangeKV("a", "b", 7, "")},
expectErr: &kvpb.WriteTooOldError{},
},
"DisallowConflict allows overlapping sst range tombstones": {
noConflict: true,
data: kvs{pointKV("ib", 6, "foo"), pointKV("if", 6, "foo"), pointKV("it", 6, "foo"), rangeKV("i", "j", 5, "")},
sst: kvs{rangeKV("ia", "irc", 8, ""), rangeKV("ie", "iu", 9, ""), pointKV("ic", 7, "foo"), pointKV("iq", 8, "foo")},
expectAny: true,
},
"DisallowConflict does not miss deleted ext keys": {
noConflict: true,
data: kvs{pointKV("c", 6, "foo"), pointKV("d", 6, "foo"), pointKV("e", 6, "foo"), rangeKV("bb", "j", 5, "")},
sst: kvs{rangeKV("b", "k", 8, ""), pointKV("cc", 9, "foo"), pointKV("dd", 7, "foo"), pointKV("ee", 7, "foo")},
expectAny: true,
},
"DisallowConflict does not miss deleted ext keys 2": {
noConflict: true,
data: kvs{pointKV("kr", 7, "foo"), pointKV("krj", 7, "foo"), pointKV("ksq", 7, "foo"), pointKV("ku", 6, "foo")},
sst: kvs{rangeKV("ke", "l", 11, ""), pointKV("kr", 8, "bar"), pointKV("ksxk", 9, "bar")},
expectAny: true,
},
"DisallowConflict does not miss deleted ext keys 3": {
noConflict: true,
data: kvs{pointKV("xe", 5, "foo"), pointKV("xg", 6, "foo"), pointKV("xh", 7, "foo"), rangeKV("xf", "xk", 5, "")},
sst: kvs{pointKV("xeqn", 10, "foo"), pointKV("xh", 12, "foo"), rangeKV("x", "xp", 11, "")},
expectAny: true,
},
"DisallowConflict does not miss deleted ext keys 4": {
noConflict: true,
data: kvs{pointKV("xh", 7, "foo")},
sst: kvs{pointKV("xh", 12, "foo"), rangeKV("x", "xp", 11, "")},
expectAny: true,
},
"DisallowConflict does not repeatedly count ext value deleted by ext range": {
noConflict: true,
data: kvs{rangeKV("bf", "bjs", 7, ""), pointKV("bbeg", 6, "foo"), pointKV("bf", 6, "foo"), pointKV("bl", 6, "foo")},
sst: kvs{pointKV("bbtq", 11, "foo"), pointKV("bbw", 11, "foo"), pointKV("bc", 11, "foo"), pointKV("bl", 12, "foo")},
expectAny: true,
},
"DisallowConflict does not miss sst range keys after overlapping point": {
noConflict: true,
data: kvs{pointKV("oe", 8, "foo"), pointKV("oi", 8, "foo"), rangeKV("o", "omk", 7, ""), pointKV("od", 6, "foo")},
sst: kvs{pointKV("oe", 11, "foo"), pointKV("oih", 12, "foo"), rangeKV("ods", "ogvh", 10, ""), rangeKV("ogvh", "ohl", 10, ""), rangeKV("ogvh", "ohl", 9, "")},
expectAny: true,
},
}
testutils.RunTrueAndFalse(t, "IngestAsWrites", func(t *testing.T, ingestAsWrites bool) {
testutils.RunValues(t, "RewriteConcurrency", []interface{}{0, 8}, func(t *testing.T, c interface{}) {
Expand Down Expand Up @@ -1198,7 +1241,9 @@ func TestEvalAddSSTable(t *testing.T) {
}

// Scan resulting data from engine.
require.Equal(t, expect, storageutils.ScanEngine(t, engine))
if !tc.expectAny {
require.Equal(t, expect, storageutils.ScanEngine(t, engine))
}

// Check that stats were updated correctly.
if tc.expectStatsEst {
Expand Down
180 changes: 109 additions & 71 deletions pkg/storage/sst.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func CheckSSTConflicts(
}
defer sstIter.Close()

compareForCollision := func(sstKey, extKey MVCCKey, sstValueRaw, extValueRaw []byte) error {
compareForCollision := func(sstKey, extKey MVCCKey, sstValueRaw, extValueRaw []byte, sstRangeKeyVersion MVCCRangeKeyVersion) error {
// Make sure both keys are proper committed MVCC keys. Note that this is
// only checked when the key exists both in the SST and existing data, it is
// not an exhaustive check of the SST.
Expand Down Expand Up @@ -346,6 +346,8 @@ func CheckSSTConflicts(
// to take into account the existing KV pair.
if extValueIsTombstone {
statsDiff.AgeTo(extKey.Timestamp.WallTime)
} else if sstRangeKeyVersion.Timestamp.Compare(extKey.Timestamp) > 0 && sstRangeKeyVersion.Timestamp.Compare(sstKey.Timestamp) < 0 {
statsDiff.AgeTo(sstRangeKeyVersion.Timestamp.WallTime)
} else {
statsDiff.AgeTo(sstKey.Timestamp.WallTime)
}
Expand Down Expand Up @@ -418,7 +420,7 @@ func CheckSSTConflicts(
return enginepb.MVCCStats{}, errors.Errorf("prefix iterator returned mismatching prefix: %s != %s", extKey.Key, sstKey.Key)
}

if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw); err != nil {
if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw, MVCCRangeKeyVersion{}); err != nil {
return enginepb.MVCCStats{}, err
}

Expand Down Expand Up @@ -734,22 +736,15 @@ func CheckSSTConflicts(
// Check if this ext range key is going to fragment the SST range key.
if sstRangeKeys.Bounds.Key.Compare(extRangeKeys.Bounds.Key) < 0 && !extRangeKeys.Versions.Equal(sstRangeKeys.Versions) &&
(extPrevRangeKeys.IsEmpty() || !extPrevRangeKeys.Bounds.EndKey.Equal(extRangeKeys.Bounds.Key)) {
statsDiff.Add(UpdateStatsOnRangeKeySplit(extRangeKeys.Bounds.Key, sstRangeKeys.Versions))
// Add a fragment end key at extRangeKeys.Bounds.Key, to finish off
// the sst fragment at that point. Note that we've already "lifted up"
// the GCBytesAge of the overlapping parts of extRangeKeys and
// sstRangeKeys when we did the call to updateStatsOnRangeKeyPutVersion
// in the for loop above.
statsDiff.AgeTo(sstRangeKeys.Versions.Newest().WallTime)
statsDiff.RangeKeyBytes += int64(EncodedMVCCKeyPrefixLength(extRangeKeys.Bounds.Key))
updatedStack := extRangeKeys
updatedStack.Versions = extRangeKeys.Versions.Clone()
// Remove the contribution for versions, as that's already been added.
for i, v := range sstRangeKeys.Versions {
if i == 0 {
// We do this dance to make updatedStack.Versions.Newest() == v. This
// is necessary to keep GCBytesAge calculations correct.
oldVersions := updatedStack.Versions
updatedStack.Versions = append(MVCCRangeKeyVersions{v}, oldVersions...)
}
statsDiff.Subtract(updateStatsOnRangeKeyPutVersion(updatedStack, v))
}
statsDiff.AgeTo(sstRangeKeys.Versions.Newest().WallTime)
statsDiff.RangeKeyBytes -= int64(EncodedMVCCKeyPrefixLength(extRangeKeys.Bounds.Key))
statsDiff.RangeKeyCount--
} else if !extPrevRangeKeys.IsEmpty() && extPrevRangeKeys.Bounds.EndKey.Equal(extRangeKeys.Bounds.Key) {
updatedStack := extRangeKeys
updatedStack.Versions = extRangeKeys.Versions.Clone()
Expand Down Expand Up @@ -810,25 +805,10 @@ func CheckSSTConflicts(
// sstIter is further ahead. This should never happen; we always seek
// extIter after seeking/nexting sstIter.
return enginepb.MVCCStats{}, errors.AssertionFailedf("expected engine iter to be ahead of sst iter")
} else if cmp > 0 && sstHasPoint {
} else if cmp > 0 && sstHasPoint && !extHasRange {
// We exclude !sstHasPoint above in case we were at a range key pause
// point that matches extKey. In that case, the below SeekGE would make
// no forward progress.
//
// However, seeking is not safe if we're at an ext range key; we could
// miss conflicts and overlaps with sst range keys in between
// [current SST Key, extKey.Key). Do a next and go back to the start of
// the loop. If we had a dedicated sst range key iterator, we could have
// optimized away this unconditional-next.
if extHasRange {
sstIter.NextKey()
sstOK, sstErr = sstIter.Valid()
if sstOK {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
continue
}
sstIter.SeekGE(MVCCKey{Key: extKey.Key})
sstOK, sstErr = sstIter.Valid()
if sstOK {
Expand Down Expand Up @@ -864,31 +844,37 @@ func CheckSSTConflicts(
}

extValueDeletedByRange := extHasRange && extHasPoint && extRangeKeys.Covers(extKey)
if sstHasPoint && extHasPoint && !extValueDeletedByRange {
// TODO(sumeer): extValueRaw is not always needed below. In many cases
// MVCCValueLenAndIsTombstone() suffices. This will require some
// rearrangement of the logic in compareForCollision.
extValueRaw, err := extIter.UnsafeValue()
if err != nil {
return enginepb.MVCCStats{}, err
}
if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw); err != nil {
return enginepb.MVCCStats{}, err
}
} else if sstHasPoint && extValueDeletedByRange {
// Don't double-count the current key.
var deletedAt hlc.Timestamp
if _, isTombstone, err := extIter.MVCCValueLenAndIsTombstone(); err != nil {
return enginepb.MVCCStats{}, err
} else if isTombstone {
deletedAt = extKey.Timestamp
} else {
version, _ := extRangeKeys.Versions.FirstAtOrAbove(extKey.Timestamp)
deletedAt = version.Timestamp
if extKey.Key.Equal(sstKey.Key) {
if sstHasPoint && extHasPoint && !extValueDeletedByRange {
// TODO(sumeer): extValueRaw is not always needed below. In many cases
// MVCCValueLenAndIsTombstone() suffices. This will require some
// rearrangement of the logic in compareForCollision.
extValueRaw, err := extIter.UnsafeValue()
if err != nil {
return enginepb.MVCCStats{}, err
}
var sstRangeKeyVersion MVCCRangeKeyVersion
if sstHasRange && sstRangeKeys.Covers(extKey) {
sstRangeKeyVersion, _ = sstRangeKeys.FirstAtOrAbove(extKey.Timestamp)
}
if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw, sstRangeKeyVersion); err != nil {
return enginepb.MVCCStats{}, err
}
} else if sstHasPoint && extValueDeletedByRange {
// Don't double-count the current key.
var deletedAt hlc.Timestamp
if _, isTombstone, err := extIter.MVCCValueLenAndIsTombstone(); err != nil {
return enginepb.MVCCStats{}, err
} else if isTombstone {
deletedAt = extKey.Timestamp
} else {
version, _ := extRangeKeys.Versions.FirstAtOrAbove(extKey.Timestamp)
deletedAt = version.Timestamp
}
statsDiff.AgeTo(deletedAt.WallTime)
statsDiff.KeyCount--
statsDiff.KeyBytes -= int64(len(extKey.Key) + 1)
}
statsDiff.AgeTo(deletedAt.WallTime)
statsDiff.KeyCount--
statsDiff.KeyBytes -= int64(len(extKey.Key) + 1)
}

// Fast path with sstTimestamp set and a common case of import cancellation.
Expand Down Expand Up @@ -1014,14 +1000,49 @@ func CheckSSTConflicts(
// seek the other, and on the next iteration, step the second iterator
// and seek the former iterator back to the same point.
if sstHasPoint && extHasPoint && !steppedExtIter {
sstIter.NextKey()
sstOK, sstErr = sstIter.Valid()
if sstOK {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
if sstHasRange && extHasRange {
// Step both iterators. Seek whichever one lands further ahead.
extIter.NextKey()
extOK, extErr = extIter.Valid()
sstIter.NextKey()
sstOK, sstErr = sstIter.Valid()
if sstOK && (!extOK || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) > 0) {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
} else if extOK && (!sstOK || sstIter.UnsafeKey().Key.Compare(extIter.UnsafeKey().Key) > 0) {
sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
sstOK, sstErr = sstIter.Valid()
if sstOK && (!extOK || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0) {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
}
} else if sstHasRange {
// Step the ext iter instead of the sst iter. This prevents us from
// missing any ext keys that could overlap with this sst range key.
// The downside of doing this is that we have to reseek both iterators
// right after, to preserve the sst iterator < ext iterator invariant.
extIter.NextKey()
extOK, extErr = extIter.Valid()
if extOK {
sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
sstOK, sstErr = sstIter.Valid()
if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
}
} else {
sstIter.NextKey()
sstOK, sstErr = sstIter.Valid()
if sstOK {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
}
extOK, extErr = extIter.Valid()
}
extOK, extErr = extIter.Valid()
} else if !steppedExtIter {
oldKey := sstIter.UnsafeKey().Clone()
oldExtKey := extIter.UnsafeKey().Clone()
if sstHasPoint { // !extHasPoint
// Check if ext has a point at this key. If not, NextKey() on sstIter
// and seek extIter.
Expand All @@ -1031,15 +1052,24 @@ func CheckSSTConflicts(
if extErr != nil {
return enginepb.MVCCStats{}, extErr
}
if !extOK || !extIter.UnsafeKey().Key.Equal(oldKey.Key) {
// extIter either went out of bounds or stepped one key ahead. Re-seek
// it at the next SST key.
if !extOK || !extIter.UnsafeKey().Key.Equal(oldExtKey.Key) {
// extIter either went out of bounds or stepped one key ahead. If the
// ext iter is at a new key that's less than the next sst key, re-seek
// the sst iter. If not, re-seek the ext iter at the next sst key.
sstIter.NextKey()
sstOK, sstErr = sstIter.Valid()
if sstOK {

if sstOK && extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
sstOK, sstErr = sstIter.Valid()
if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
} else if sstOK {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
extOK, extErr = extIter.Valid()
}
// If extIter found a point key at the same MVCC Key, we still need
// to check for conflicts against it.
Expand Down Expand Up @@ -1076,14 +1106,22 @@ func CheckSSTConflicts(
sstIter.Next()
sstOK, sstErr = sstIter.Valid()
sstChangedKeys := !sstOK || !sstIter.UnsafeKey().Key.Equal(oldKey.Key)
if sstOK && sstChangedKeys {
extIter.Next()
steppedExtIter = true
extOK, extErr = extIter.Valid()
extChangedKeys := !extOK || !extIter.UnsafeKey().Key.Equal(oldExtKey.Key)
if sstOK && extOK && sstChangedKeys && extChangedKeys &&
extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
sstOK, sstErr = sstIter.Valid()
if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
}
} else if sstOK && sstChangedKeys && !extOK {
extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
extOK, extErr = extIter.Valid()
} else {
extIter.Next()
steppedExtIter = true
extOK, extErr = extIter.Valid()
extChangedKeys := !extOK || !extIter.UnsafeKey().Key.Equal(oldKey.Key)
if sstChangedKeys && !extChangedKeys {
sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
sstOK, sstErr = sstIter.Valid()
Expand Down

0 comments on commit 05cd4bc

Please sign in to comment.