diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index f49a26d2955f..b67209c2d79e 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -37,6 +37,10 @@ import ( // (key/value/timestamp), for backwards compatibility. If disallowShadowingBelow // is non-empty, disallowShadowing is ignored. // +// sstTimestamp, if non-zero, represents the timestamp that all keys in the SST +// are expected to be at. This method can make performance optimizations with +// the expectation that no SST keys will be at any other timestamp. +// // The given SST and reader cannot contain intents or inline values (i.e. zero // timestamps), but this is only checked for keys that exist in both sides, for // performance. @@ -52,7 +56,7 @@ func CheckSSTConflicts( leftPeekBound, rightPeekBound roachpb.Key, disallowShadowing bool, disallowShadowingBelow hlc.Timestamp, - sstToReqTimestamp hlc.Timestamp, + sstTimestamp hlc.Timestamp, maxIntents int64, usePrefixSeek bool, ) (enginepb.MVCCStats, error) { @@ -72,6 +76,11 @@ func CheckSSTConflicts( rightPeekBound = keys.MaxKey } + // In some iterations below, we try to call Next() instead of SeekGE() for a + // few iterations, as nexts are more performant. If `numNextsBeforeSeek` nexts + // are not sufficient to land at or after a desired SeekGE key, we fall back to + // a seek. + const numNextsBeforeSeek = 5 var statsDiff enginepb.MVCCStats var intents []roachpb.Intent if usePrefixSeek { @@ -132,11 +141,26 @@ func CheckSSTConflicts( } rkIter.Close() + if usePrefixSeek { + // Prefix iteration and range key masking don't work together. See the + // comment on the panic inside pebbleIterator.setOptions. + sstTimestamp = hlc.Timestamp{} + } extIter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: leftPeekBound, - UpperBound: rightPeekBound, - RangeKeyMaskingBelow: sstToReqTimestamp, + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: leftPeekBound, + UpperBound: rightPeekBound, + // NB: Range key masking is performant, but it skips instances where we need + // to adjust GCBytesAge in the returned stats diff. Consider an example + // where a point key is masked by a range tombstone, and we added a new + // revision of that key above the range tombstone in the SST. The GCBytesAge + // contribution of that range tombstone on the point key's key (as opposed + // to the version contribution) needs to be un-done as that key is now being + // used by the live key. + // + // TODO(bilal): Add tests that test for this case, and close this gap in + // GCBytesAge calculation. + RangeKeyMaskingBelow: sstTimestamp, Prefix: usePrefixSeek, useL6Filters: true, }) @@ -716,7 +740,20 @@ func CheckSSTConflicts( sstIter.SeekGE(MVCCKey{Key: extKey.Key}) sstOK, sstErr = sstIter.Valid() if sstOK { - extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + // Seeks on the engine are expensive. Try Next()ing if we're very close + // to the sst key (which we might be). + nextsUntilSeek := numNextsBeforeSeek + for extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.NextKey() + extOK, _ = extIter.Valid() + nextsUntilSeek-- + if nextsUntilSeek <= 0 { + break + } + } + if extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + } } extOK, extErr = extIter.Valid() continue @@ -735,6 +772,42 @@ func CheckSSTConflicts( statsDiff.KeyBytes -= int64(len(extKey.Key) + 1) } + // Fast path with sstTimestamp set and a common case of import cancellation. + // Since we use range key masking, we can just Next() the ext iterator + // past its range key. + if sstTimestamp.IsSet() && extHasRange && !extHasPoint && !sstHasRange { + if vers, ok := extRangeKeys.FirstAtOrAbove(sstTimestamp); !ok || vers.Timestamp.Equal(sstTimestamp) { + // All range key versions are below the request timestamp. We can seek + // past the range key, as all SST points/ranges are going to be above + // this range key. + extIter.Next() + extOK, extErr = extIter.Valid() + if !extOK { + break + } + + sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key}) + sstOK, sstErr = sstIter.Valid() + if sstOK { + // Seeks on the engine are expensive. Try Next()ing if we're very close + // to the sst key (which we might be). + nextsUntilSeek := numNextsBeforeSeek + for extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.NextKey() + extOK, _ = extIter.Valid() + nextsUntilSeek-- + if nextsUntilSeek <= 0 { + break + } + } + if extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 { + extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) + } + } + extOK, extErr = extIter.Valid() + continue + } + } steppedExtIter := false // Before Next-ing the SST iter, if it contains any range keys, check if both: // 1) the next SST key takes us outside the current SST range key