diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index 00fc114701d5..9819b82d2dae 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -64,6 +64,7 @@ func TestEvalAddSSTable(t *testing.T) { noShadowBelow int64 // DisallowShadowingBelow requireReqTS bool // AddSSTableRequireAtRequestTimestamp expect kvs + expectAny bool expectErr interface{} // error type, substring, substring slice, or true (any) expectErrRace interface{} expectStatsEst bool // expect MVCCStats.ContainsEstimates, don't check stats @@ -1040,6 +1041,48 @@ func TestEvalAddSSTable(t *testing.T) { sst: kvs{rangeKV("a", "b", 7, "")}, expectErr: &kvpb.WriteTooOldError{}, }, + "DisallowConflict allows overlapping sst range tombstones": { + noConflict: true, + data: kvs{pointKV("ib", 6, "foo"), pointKV("if", 6, "foo"), pointKV("it", 6, "foo"), rangeKV("i", "j", 5, "")}, + sst: kvs{rangeKV("ia", "irc", 8, ""), rangeKV("ie", "iu", 9, ""), pointKV("ic", 7, "foo"), pointKV("iq", 8, "foo")}, + expectAny: true, + }, + "DisallowConflict does not miss deleted ext keys": { + noConflict: true, + data: kvs{pointKV("c", 6, "foo"), pointKV("d", 6, "foo"), pointKV("e", 6, "foo"), rangeKV("bb", "j", 5, "")}, + sst: kvs{rangeKV("b", "k", 8, ""), pointKV("cc", 9, "foo"), pointKV("dd", 7, "foo"), pointKV("ee", 7, "foo")}, + expectAny: true, + }, + "DisallowConflict does not miss deleted ext keys 2": { + noConflict: true, + data: kvs{pointKV("kr", 7, "foo"), pointKV("krj", 7, "foo"), pointKV("ksq", 7, "foo"), pointKV("ku", 6, "foo")}, + sst: kvs{rangeKV("ke", "l", 11, ""), pointKV("kr", 8, "bar"), pointKV("ksxk", 9, "bar")}, + expectAny: true, + }, + "DisallowConflict does not miss deleted ext keys 3": { + noConflict: true, + data: kvs{pointKV("xe", 5, "foo"), pointKV("xg", 6, "foo"), pointKV("xh", 7, "foo"), rangeKV("xf", "xk", 5, "")}, + sst: kvs{pointKV("xeqn", 10, "foo"), pointKV("xh", 12, "foo"), rangeKV("x", "xp", 11, "")}, + expectAny: true, + }, + "DisallowConflict does not miss deleted ext keys 4": { + noConflict: true, + data: kvs{pointKV("xh", 7, "foo")}, + sst: kvs{pointKV("xh", 12, "foo"), rangeKV("x", "xp", 11, "")}, + expectAny: true, + }, + "DisallowConflict does not repeatedly count ext value deleted by ext range": { + noConflict: true, + data: kvs{rangeKV("bf", "bjs", 7, ""), pointKV("bbeg", 6, "foo"), pointKV("bf", 6, "foo"), pointKV("bl", 6, "foo")}, + sst: kvs{pointKV("bbtq", 11, "foo"), pointKV("bbw", 11, "foo"), pointKV("bc", 11, "foo"), pointKV("bl", 12, "foo")}, + expectAny: true, + }, + "DisallowConflict does not miss sst range keys after overlapping point": { + noConflict: true, + data: kvs{pointKV("oe", 8, "foo"), pointKV("oi", 8, "foo"), rangeKV("o", "omk", 7, ""), pointKV("od", 6, "foo")}, + sst: kvs{pointKV("oe", 11, "foo"), pointKV("oih", 12, "foo"), rangeKV("ods", "ogvh", 10, ""), rangeKV("ogvh", "ohl", 10, ""), rangeKV("ogvh", "ohl", 9, "")}, + expectAny: true, + }, } testutils.RunTrueAndFalse(t, "IngestAsWrites", func(t *testing.T, ingestAsWrites bool) { testutils.RunValues(t, "RewriteConcurrency", []interface{}{0, 8}, func(t *testing.T, c interface{}) { @@ -1198,7 +1241,9 @@ func TestEvalAddSSTable(t *testing.T) { } // Scan resulting data from engine. - require.Equal(t, expect, storageutils.ScanEngine(t, engine)) + if !tc.expectAny { + require.Equal(t, expect, storageutils.ScanEngine(t, engine)) + } // Check that stats were updated correctly. if tc.expectStatsEst { diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index 6831ec71c93c..faa71fef5655 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -239,7 +239,7 @@ func CheckSSTConflicts( } defer sstIter.Close() - compareForCollision := func(sstKey, extKey MVCCKey, sstValueRaw, extValueRaw []byte) error { + compareForCollision := func(sstKey, extKey MVCCKey, sstValueRaw, extValueRaw []byte, sstRangeKeyVersion MVCCRangeKeyVersion) error { // Make sure both keys are proper committed MVCC keys. Note that this is // only checked when the key exists both in the SST and existing data, it is // not an exhaustive check of the SST. @@ -346,6 +346,8 @@ func CheckSSTConflicts( // to take into account the existing KV pair. if extValueIsTombstone { statsDiff.AgeTo(extKey.Timestamp.WallTime) + } else if sstRangeKeyVersion.Timestamp.Compare(extKey.Timestamp) > 0 && sstRangeKeyVersion.Timestamp.Compare(sstKey.Timestamp) < 0 { + statsDiff.AgeTo(sstRangeKeyVersion.Timestamp.WallTime) } else { statsDiff.AgeTo(sstKey.Timestamp.WallTime) } @@ -418,7 +420,7 @@ func CheckSSTConflicts( return enginepb.MVCCStats{}, errors.Errorf("prefix iterator returned mismatching prefix: %s != %s", extKey.Key, sstKey.Key) } - if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw); err != nil { + if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw, MVCCRangeKeyVersion{}); err != nil { return enginepb.MVCCStats{}, err } @@ -734,22 +736,15 @@ func CheckSSTConflicts( // Check if this ext range key is going to fragment the SST range key. if sstRangeKeys.Bounds.Key.Compare(extRangeKeys.Bounds.Key) < 0 && !extRangeKeys.Versions.Equal(sstRangeKeys.Versions) && (extPrevRangeKeys.IsEmpty() || !extPrevRangeKeys.Bounds.EndKey.Equal(extRangeKeys.Bounds.Key)) { - statsDiff.Add(UpdateStatsOnRangeKeySplit(extRangeKeys.Bounds.Key, sstRangeKeys.Versions)) + // Add a fragment end key at extRangeKeys.Bounds.Key, to finish off + // the sst fragment at that point. Note that we've already "lifted up" + // the GCBytesAge of the overlapping parts of extRangeKeys and + // sstRangeKeys when we did the call to updateStatsOnRangeKeyPutVersion + // in the for loop above. + statsDiff.AgeTo(sstRangeKeys.Versions.Newest().WallTime) + statsDiff.RangeKeyBytes += int64(EncodedMVCCKeyPrefixLength(extRangeKeys.Bounds.Key)) updatedStack := extRangeKeys updatedStack.Versions = extRangeKeys.Versions.Clone() - // Remove the contribution for versions, as that's already been added. - for i, v := range sstRangeKeys.Versions { - if i == 0 { - // We do this dance to make updatedStack.Versions.Newest() == v. This - // is necessary to keep GCBytesAge calculations correct. - oldVersions := updatedStack.Versions - updatedStack.Versions = append(MVCCRangeKeyVersions{v}, oldVersions...) - } - statsDiff.Subtract(updateStatsOnRangeKeyPutVersion(updatedStack, v)) - } - statsDiff.AgeTo(sstRangeKeys.Versions.Newest().WallTime) - statsDiff.RangeKeyBytes -= int64(EncodedMVCCKeyPrefixLength(extRangeKeys.Bounds.Key)) - statsDiff.RangeKeyCount-- } else if !extPrevRangeKeys.IsEmpty() && extPrevRangeKeys.Bounds.EndKey.Equal(extRangeKeys.Bounds.Key) { updatedStack := extRangeKeys updatedStack.Versions = extRangeKeys.Versions.Clone() @@ -810,25 +805,10 @@ func CheckSSTConflicts( // sstIter is further ahead. This should never happen; we always seek // extIter after seeking/nexting sstIter. return enginepb.MVCCStats{}, errors.AssertionFailedf("expected engine iter to be ahead of sst iter") - } else if cmp > 0 && sstHasPoint { + } else if cmp > 0 && sstHasPoint && !extHasRange { // We exclude !sstHasPoint above in case we were at a range key pause // point that matches extKey. In that case, the below SeekGE would make // no forward progress. - // - // However, seeking is not safe if we're at an ext range key; we could - // miss conflicts and overlaps with sst range keys in between - // [current SST Key, extKey.Key). Do a next and go back to the start of - // the loop. If we had a dedicated sst range key iterator, we could have - // optimized away this unconditional-next. - if extHasRange { - sstIter.NextKey() - sstOK, sstErr = sstIter.Valid() - if sstOK { - extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) - extOK, extErr = extIter.Valid() - } - continue - } sstIter.SeekGE(MVCCKey{Key: extKey.Key}) sstOK, sstErr = sstIter.Valid() if sstOK { @@ -864,31 +844,37 @@ func CheckSSTConflicts( } extValueDeletedByRange := extHasRange && extHasPoint && extRangeKeys.Covers(extKey) - if sstHasPoint && extHasPoint && !extValueDeletedByRange { - // TODO(sumeer): extValueRaw is not always needed below. In many cases - // MVCCValueLenAndIsTombstone() suffices. This will require some - // rearrangement of the logic in compareForCollision. - extValueRaw, err := extIter.UnsafeValue() - if err != nil { - return enginepb.MVCCStats{}, err - } - if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw); err != nil { - return enginepb.MVCCStats{}, err - } - } else if sstHasPoint && extValueDeletedByRange { - // Don't double-count the current key. - var deletedAt hlc.Timestamp - if _, isTombstone, err := extIter.MVCCValueLenAndIsTombstone(); err != nil { - return enginepb.MVCCStats{}, err - } else if isTombstone { - deletedAt = extKey.Timestamp - } else { - version, _ := extRangeKeys.Versions.FirstAtOrAbove(extKey.Timestamp) - deletedAt = version.Timestamp + if extKey.Key.Equal(sstKey.Key) { + if sstHasPoint && extHasPoint && !extValueDeletedByRange { + // TODO(sumeer): extValueRaw is not always needed below. In many cases + // MVCCValueLenAndIsTombstone() suffices. This will require some + // rearrangement of the logic in compareForCollision. + extValueRaw, err := extIter.UnsafeValue() + if err != nil { + return enginepb.MVCCStats{}, err + } + var sstRangeKeyVersion MVCCRangeKeyVersion + if sstHasRange && sstRangeKeys.Covers(extKey) { + sstRangeKeyVersion, _ = sstRangeKeys.FirstAtOrAbove(extKey.Timestamp) + } + if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw, sstRangeKeyVersion); err != nil { + return enginepb.MVCCStats{}, err + } + } else if sstHasPoint && extValueDeletedByRange { + // Don't double-count the current key. + var deletedAt hlc.Timestamp + if _, isTombstone, err := extIter.MVCCValueLenAndIsTombstone(); err != nil { + return enginepb.MVCCStats{}, err + } else if isTombstone { + deletedAt = extKey.Timestamp + } else { + version, _ := extRangeKeys.Versions.FirstAtOrAbove(extKey.Timestamp) + deletedAt = version.Timestamp + } + statsDiff.AgeTo(deletedAt.WallTime) + statsDiff.KeyCount-- + statsDiff.KeyBytes -= int64(len(extKey.Key) + 1) } - statsDiff.AgeTo(deletedAt.WallTime) - statsDiff.KeyCount-- - statsDiff.KeyBytes -= int64(len(extKey.Key) + 1) } // Fast path with sstTimestamp set and a common case of import cancellation. @@ -1014,14 +1000,49 @@ func CheckSSTConflicts( // seek the other, and on the next iteration, step the second iterator // and seek the former iterator back to the same point. if sstHasPoint && extHasPoint && !steppedExtIter { - sstIter.NextKey() - sstOK, sstErr = sstIter.Valid() - if sstOK { - extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + if sstHasRange && extHasRange { + // Step both iterators. Seek whichever one lands further ahead. + extIter.NextKey() + extOK, extErr = extIter.Valid() + sstIter.NextKey() + sstOK, sstErr = sstIter.Valid() + if sstOK && (!extOK || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) > 0) { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() + } else if extOK && (!sstOK || sstIter.UnsafeKey().Key.Compare(extIter.UnsafeKey().Key) > 0) { + sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) + sstOK, sstErr = sstIter.Valid() + if sstOK && (!extOK || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0) { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() + } + } + } else if sstHasRange { + // Step the ext iter instead of the sst iter. This prevents us from + // missing any ext keys that could overlap with this sst range key. + // The downside of doing this is that we have to reseek both iterators + // right after, to preserve the sst iterator < ext iterator invariant. + extIter.NextKey() + extOK, extErr = extIter.Valid() + if extOK { + sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) + sstOK, sstErr = sstIter.Valid() + if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() + } + } + } else { + sstIter.NextKey() + sstOK, sstErr = sstIter.Valid() + if sstOK { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + } + extOK, extErr = extIter.Valid() } - extOK, extErr = extIter.Valid() } else if !steppedExtIter { oldKey := sstIter.UnsafeKey().Clone() + oldExtKey := extIter.UnsafeKey().Clone() if sstHasPoint { // !extHasPoint // Check if ext has a point at this key. If not, NextKey() on sstIter // and seek extIter. @@ -1031,15 +1052,24 @@ func CheckSSTConflicts( if extErr != nil { return enginepb.MVCCStats{}, extErr } - if !extOK || !extIter.UnsafeKey().Key.Equal(oldKey.Key) { - // extIter either went out of bounds or stepped one key ahead. Re-seek - // it at the next SST key. + if !extOK || !extIter.UnsafeKey().Key.Equal(oldExtKey.Key) { + // extIter either went out of bounds or stepped one key ahead. If the + // ext iter is at a new key that's less than the next sst key, re-seek + // the sst iter. If not, re-seek the ext iter at the next sst key. sstIter.NextKey() sstOK, sstErr = sstIter.Valid() - if sstOK { + + if sstOK && extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) + sstOK, sstErr = sstIter.Valid() + if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() + } + } else if sstOK { extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() } - extOK, extErr = extIter.Valid() } // If extIter found a point key at the same MVCC Key, we still need // to check for conflicts against it. @@ -1076,14 +1106,22 @@ func CheckSSTConflicts( sstIter.Next() sstOK, sstErr = sstIter.Valid() sstChangedKeys := !sstOK || !sstIter.UnsafeKey().Key.Equal(oldKey.Key) - if sstOK && sstChangedKeys { + extIter.Next() + steppedExtIter = true + extOK, extErr = extIter.Valid() + extChangedKeys := !extOK || !extIter.UnsafeKey().Key.Equal(oldExtKey.Key) + if sstOK && extOK && sstChangedKeys && extChangedKeys && + extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) + sstOK, sstErr = sstIter.Valid() + if sstOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + extOK, extErr = extIter.Valid() + } + } else if sstOK && sstChangedKeys && !extOK { extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) extOK, extErr = extIter.Valid() } else { - extIter.Next() - steppedExtIter = true - extOK, extErr = extIter.Valid() - extChangedKeys := !extOK || !extIter.UnsafeKey().Key.Equal(oldKey.Key) if sstChangedKeys && !extChangedKeys { sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) sstOK, sstErr = sstIter.Valid()