From 949bd9049869c8cc796ed9043e5f64c41beef319 Mon Sep 17 00:00:00 2001
From: Bilal Akhtar <bilal@cockroachlabs.com>
Date: Thu, 29 Sep 2022 17:20:49 -0400
Subject: [PATCH] storage: Try seeks using nexts, other fast-paths in
 CheckSSTConflicts

Import cancellations and retries tend to set sstToReqTS
and have incoming SSTs where we can expect all keys
to have the same timestamp. This allows us to more
efficiently skip keys in the engine beyond what range
key masking does for us (eg. we don't need to check each
individual SST key for conflict with an engine range key).

This change takes advantage of that to speed up import
retries. It also replaces engine seeks with Next()s
as much as possible; the TrySeekUsingNext optimization
inside the iterator isn't kicking in as some seeks/nexts
are interspersed.

Release note: None
---
 pkg/storage/sst.go | 85 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 6 deletions(-)

diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go
index f49a26d2955f..b67209c2d79e 100644
--- a/pkg/storage/sst.go
+++ b/pkg/storage/sst.go
@@ -37,6 +37,10 @@ import (
 // (key/value/timestamp), for backwards compatibility. If disallowShadowingBelow
 // is non-empty, disallowShadowing is ignored.
 //
+// sstTimestamp, if non-zero, represents the timestamp that all keys in the SST
+// are expected to be at. This method can make performance optimizations with
+// the expectation that no SST keys will be at any other timestamp.
+//
 // The given SST and reader cannot contain intents or inline values (i.e. zero
 // timestamps), but this is only checked for keys that exist in both sides, for
 // performance.
@@ -52,7 +56,7 @@ func CheckSSTConflicts(
 	leftPeekBound, rightPeekBound roachpb.Key,
 	disallowShadowing bool,
 	disallowShadowingBelow hlc.Timestamp,
-	sstToReqTimestamp hlc.Timestamp,
+	sstTimestamp hlc.Timestamp,
 	maxIntents int64,
 	usePrefixSeek bool,
 ) (enginepb.MVCCStats, error) {
@@ -72,6 +76,11 @@ func CheckSSTConflicts(
 		rightPeekBound = keys.MaxKey
 	}
 
+	// In some iterations below, we try to call Next() instead of SeekGE() for a
+	// few iterations, as nexts are more performant. If `numNextsBeforeSeek` nexts
+	// are not sufficient to land at or after a desired SeekGE key, we fall back to
+	// a seek.
+	const numNextsBeforeSeek = 5
 	var statsDiff enginepb.MVCCStats
 	var intents []roachpb.Intent
 	if usePrefixSeek {
@@ -132,11 +141,26 @@ func CheckSSTConflicts(
 	}
 	rkIter.Close()
 
+	if usePrefixSeek {
+		// Prefix iteration and range key masking don't work together. See the
+		// comment on the panic inside pebbleIterator.setOptions.
+		sstTimestamp = hlc.Timestamp{}
+	}
 	extIter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{
-		KeyTypes:             IterKeyTypePointsAndRanges,
-		LowerBound:           leftPeekBound,
-		UpperBound:           rightPeekBound,
-		RangeKeyMaskingBelow: sstToReqTimestamp,
+		KeyTypes:   IterKeyTypePointsAndRanges,
+		LowerBound: leftPeekBound,
+		UpperBound: rightPeekBound,
+		// NB: Range key masking is performant, but it skips instances where we need
+		// to adjust GCBytesAge in the returned stats diff. Consider an example
+		// where a point key is masked by a range tombstone, and we added a new
+		// revision of that key above the range tombstone in the SST. The GCBytesAge
+		// contribution of that range tombstone on the point key's key (as opposed
+		// to the version contribution) needs to be un-done as that key is now being
+		// used by the live key.
+		//
+		// TODO(bilal): Add tests that test for this case, and close this gap in
+		// GCBytesAge calculation.
+		RangeKeyMaskingBelow: sstTimestamp,
 		Prefix:               usePrefixSeek,
 		useL6Filters:         true,
 	})
@@ -716,7 +740,20 @@ func CheckSSTConflicts(
 			sstIter.SeekGE(MVCCKey{Key: extKey.Key})
 			sstOK, sstErr = sstIter.Valid()
 			if sstOK {
-				extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
+				// Seeks on the engine are expensive. Try Next()ing if we're very close
+				// to the sst key (which we might be).
+				nextsUntilSeek := numNextsBeforeSeek
+				for extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
+					extIter.NextKey()
+					extOK, _ = extIter.Valid()
+					nextsUntilSeek--
+					if nextsUntilSeek <= 0 {
+						break
+					}
+				}
+				if extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
+					extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
+				}
 			}
 			extOK, extErr = extIter.Valid()
 			continue
@@ -735,6 +772,42 @@ func CheckSSTConflicts(
 			statsDiff.KeyBytes -= int64(len(extKey.Key) + 1)
 		}
 
+		// Fast path with sstTimestamp set and a common case of import cancellation.
+		// Since we use range key masking, we can just Next() the ext iterator
+		// past its range key.
+		if sstTimestamp.IsSet() && extHasRange && !extHasPoint && !sstHasRange {
+			if vers, ok := extRangeKeys.FirstAtOrAbove(sstTimestamp); !ok || vers.Timestamp.Equal(sstTimestamp) {
+				// All range key versions are below the request timestamp. We can seek
+				// past the range key, as all SST points/ranges are going to be above
+				// this range key.
+				extIter.Next()
+				extOK, extErr = extIter.Valid()
+				if !extOK {
+					break
+				}
+
+				sstIter.SeekGE(MVCCKey{Key: extIter.UnsafeKey().Key})
+				sstOK, sstErr = sstIter.Valid()
+				if sstOK {
+					// Seeks on the engine are expensive. Try Next()ing if we're very close
+					// to the sst key (which we might be).
+					nextsUntilSeek := numNextsBeforeSeek
+					for extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
+						extIter.NextKey()
+						extOK, _ = extIter.Valid()
+						nextsUntilSeek--
+						if nextsUntilSeek <= 0 {
+							break
+						}
+					}
+					if extOK && extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0 {
+						extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key})
+					}
+				}
+				extOK, extErr = extIter.Valid()
+				continue
+			}
+		}
 		steppedExtIter := false
 		// Before Next-ing the SST iter, if it contains any range keys, check if both:
 		// 1) the next SST key takes us outside the current SST range key