From 4b1b8699c92c60b53ddc49d3bc71306eabbf231f Mon Sep 17 00:00:00 2001
From: Nathan VanBenschoten <nvanbenschoten@gmail.com>
Date: Thu, 26 Oct 2023 00:57:50 -0400
Subject: [PATCH] storage: check for replicated locks in CheckSSTConflicts

Informs #111984.
Informs #111893.
Informs #111530.

This commit changes storage.CheckSSTConflicts to also check for
replicated locks, in addition to checking for intents. This is necessary
to prevent AddSSTable from ingesting key-values pairs below replicated
locks, which could violate the isolation expected from these locks if an
ingested key-value is below the commit timestamp of the lock holder.
This was caught in kvnemesis by the validation logic recently introduced
in 7ff1c793.

The change alters the semantics of CheckSSTConflicts slightly for
intents. It now detects conflicts with any intents in the AddSSTable's
key span, instead of just those on overlapping keys. I think this is ok,
as the `checkConflicts = false` path in `EvalAddSSTable` already had
this behavior. This is reflected in the changes to cmd_add_sstable_test.go,
where three cases are no longer inconsistent with blind writes. I also
don't think this is a performance concern (like the comment that's being
deleted was suggesting) because we now have the separated lock table. In
fact, this change allows us to avoid using an `intentInterleavingIter`,
so if anything, it may provide a small performance win.

Release note: None
---
 .../batcheval/cmd_add_sstable_test.go         | 18 ++++----
 pkg/storage/sst.go                            | 46 ++++++-------------
 pkg/storage/sst_test.go                       | 29 ++++++++----
 3 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
index 530891adaccf..0211ad427bc9 100644
--- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
+++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go
@@ -375,11 +375,11 @@ func TestEvalAddSSTable(t *testing.T) {
 			sst:        kvs{pointKV("b", 3, "sst")},
 			expectErr:  &kvpb.LockConflictError{},
 		},
-		"DisallowConflicts ignores intents in span": { // inconsistent with blind writes
+		"DisallowConflicts returns LockConflictError in span": {
 			noConflict: true,
 			data:       kvs{pointKV("b", intentTS, "intent")},
 			sst:        kvs{pointKV("a", 3, "sst"), pointKV("c", 3, "sst")},
-			expect:     kvs{pointKV("a", 3, "sst"), pointKV("b", intentTS, "intent"), pointKV("c", 3, "sst")},
+			expectErr:  &kvpb.LockConflictError{},
 		},
 		"DisallowConflicts is not idempotent": {
 			noConflict: true,
@@ -479,11 +479,11 @@ func TestEvalAddSSTable(t *testing.T) {
 			sst:       kvs{pointKV("b", 3, "sst")},
 			expectErr: &kvpb.LockConflictError{},
 		},
-		"DisallowShadowing ignores intents in span": { // inconsistent with blind writes
-			noShadow: true,
-			data:     kvs{pointKV("b", intentTS, "intent")},
-			sst:      kvs{pointKV("a", 3, "sst"), pointKV("c", 3, "sst")},
-			expect:   kvs{pointKV("a", 3, "sst"), pointKV("b", intentTS, "intent"), pointKV("c", 3, "sst")},
+		"DisallowShadowing returns LockConflictError in span": {
+			noShadow:  true,
+			data:      kvs{pointKV("b", intentTS, "intent")},
+			sst:       kvs{pointKV("a", 3, "sst"), pointKV("c", 3, "sst")},
+			expectErr: &kvpb.LockConflictError{},
 		},
 		"DisallowShadowing is idempotent": {
 			noShadow: true,
@@ -607,11 +607,11 @@ func TestEvalAddSSTable(t *testing.T) {
 			sst:           kvs{pointKV("b", 3, "sst")},
 			expectErr:     &kvpb.LockConflictError{},
 		},
-		"DisallowShadowingBelow ignores intents in span": { // inconsistent with blind writes
+		"DisallowShadowingBelow returns LockConflictError in span": {
 			noShadowBelow: 5,
 			data:          kvs{pointKV("b", intentTS, "intent")},
 			sst:           kvs{pointKV("a", 3, "sst"), pointKV("c", 3, "sst")},
-			expect:        kvs{pointKV("a", 3, "sst"), pointKV("b", intentTS, "intent"), pointKV("c", 3, "sst")},
+			expectErr:     &kvpb.LockConflictError{},
 		},
 		"DisallowShadowingBelow is not generally idempotent": {
 			noShadowBelow: 5,
diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go
index affc2376a853..be7b5a8942fe 100644
--- a/pkg/storage/sst.go
+++ b/pkg/storage/sst.go
@@ -103,9 +103,9 @@ func NewMultiMemSSTIterator(ssts [][]byte, verify bool, opts IterOptions) (MVCCI
 // engine contains MVCC range keys in the ingested span then this will cause
 // MVCC stats to be estimates since we can't adjust stats for masked points.
 //
-// The given SST and reader cannot contain intents or inline values (i.e. zero
-// timestamps), but this is only checked for keys that exist in both sides, for
-// performance.
+// The given SST and reader cannot contain intents, replicated locks, or inline
+// values (i.e. zero timestamps). This is checked across the entire key span,
+// from start to end.
 //
 // The returned MVCC statistics is a delta between the SST-only statistics and
 // their effect when applied, which when added to the SST statistics will adjust
@@ -147,7 +147,6 @@ func CheckSSTConflicts(
 	// a seek.
 	const numNextsBeforeSeek = 5
 	var statsDiff enginepb.MVCCStats
-	var intents []roachpb.Intent
 	if usePrefixSeek {
 		// If we're going to be using a prefix iterator, check for the fast path
 		// first, where there are no keys in the reader between the sstable's start
@@ -168,6 +167,13 @@ func CheckSSTConflicts(
 		}
 	}
 
+	// Check for any overlapping locks, and return them to be resolved.
+	if locks, err := ScanLocks(ctx, reader, start.Key, end.Key, maxLockConflicts, 0); err != nil {
+		return enginepb.MVCCStats{}, err
+	} else if len(locks) > 0 {
+		return enginepb.MVCCStats{}, &kvpb.LockConflictError{Locks: locks}
+	}
+
 	// Check for any range keys.
 	//
 	// TODO(bilal): Expose reader.Properties.NumRangeKeys() here, so we don't
@@ -233,7 +239,7 @@ func CheckSSTConflicts(
 		// https://github.com/cockroachdb/cockroach/issues/92254
 		statsDiff.ContainsEstimates += 2
 	}
-	extIter, err := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{
+	extIter, err := reader.NewMVCCIterator(MVCCKeyIterKind, IterOptions{
 		KeyTypes:             IterKeyTypePointsAndRanges,
 		LowerBound:           leftPeekBound,
 		UpperBound:           rightPeekBound,
@@ -278,20 +284,11 @@ func CheckSSTConflicts(
 				return err
 			}
 			if len(mvccMeta.RawBytes) > 0 {
-				return errors.New("inline values are unsupported")
+				return errors.AssertionFailedf("inline values are unsupported")
 			} else if mvccMeta.Txn == nil {
-				return errors.New("found intent without transaction")
+				return errors.AssertionFailedf("found intent without transaction")
 			} else {
-				// If we encounter a write intent, keep looking for additional intents
-				// in order to return a large batch for intent resolution. The caller
-				// will likely resolve the returned intents and retry the call, which
-				// would be quadratic, so this significantly reduces the overall number
-				// of scans.
-				intents = append(intents, roachpb.MakeIntent(mvccMeta.Txn, extIter.UnsafeKey().Key.Clone()))
-				if int64(len(intents)) >= maxLockConflicts {
-					return &kvpb.LockConflictError{Locks: roachpb.AsLocks(intents)}
-				}
-				return nil
+				return errors.AssertionFailedf("found intent after ScanLocks call")
 			}
 		}
 		extValueIsTombstone, err := EncodedMVCCValueIsTombstone(extValueRaw)
@@ -532,17 +529,7 @@ func CheckSSTConflicts(
 					return enginepb.MVCCStats{}, err
 				}
 			} else {
-				// extIter is at an intent. Save it to the intents list and Next().
-				var mvccMeta enginepb.MVCCMetadata
-				if err = extIter.ValueProto(&mvccMeta); err != nil {
-					return enginepb.MVCCStats{}, err
-				}
-				intents = append(intents, roachpb.MakeIntent(mvccMeta.Txn, extIter.UnsafeKey().Key.Clone()))
-				if int64(len(intents)) >= maxLockConflicts {
-					return statsDiff, &kvpb.LockConflictError{Locks: roachpb.AsLocks(intents)}
-				}
-				extIter.Next()
-				continue
+				return enginepb.MVCCStats{}, errors.AssertionFailedf("found intent after ScanLocks call")
 			}
 
 			if sstBottomTombstone.Timestamp.LessEq(extKey.Timestamp) {
@@ -1229,9 +1216,6 @@ func CheckSSTConflicts(
 	if sstErr != nil {
 		return enginepb.MVCCStats{}, sstErr
 	}
-	if len(intents) > 0 {
-		return enginepb.MVCCStats{}, &kvpb.LockConflictError{Locks: roachpb.AsLocks(intents)}
-	}
 
 	return statsDiff, nil
 }
diff --git a/pkg/storage/sst_test.go b/pkg/storage/sst_test.go
index 67e8003f87f8..14d4c65d878c 100644
--- a/pkg/storage/sst_test.go
+++ b/pkg/storage/sst_test.go
@@ -19,6 +19,7 @@ import (
 	"testing"
 
 	"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
+	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
 	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
 	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
@@ -35,18 +36,20 @@ func TestCheckSSTConflictsMaxLockConflicts(t *testing.T) {
 
 	keys := []string{"aa", "bb", "cc", "dd"}
 	intents := []string{"a", "b", "c"}
+	locks := []string{"d", "e"}
 	start, end := "a", "z"
 
 	testcases := []struct {
-		maxLockConflicts int64
-		expectIntents    []string
+		maxLockConflicts    int64
+		expectLockConflicts []string
 	}{
-		{maxLockConflicts: -1, expectIntents: []string{"a"}},
-		{maxLockConflicts: 0, expectIntents: []string{"a"}},
-		{maxLockConflicts: 1, expectIntents: []string{"a"}},
-		{maxLockConflicts: 2, expectIntents: []string{"a", "b"}},
-		{maxLockConflicts: 3, expectIntents: []string{"a", "b", "c"}},
-		{maxLockConflicts: 4, expectIntents: []string{"a", "b", "c"}},
+		{maxLockConflicts: 0, expectLockConflicts: []string{"a", "b", "c", "d", "e"}}, // 0 means no limit
+		{maxLockConflicts: 1, expectLockConflicts: []string{"a"}},
+		{maxLockConflicts: 2, expectLockConflicts: []string{"a", "b"}},
+		{maxLockConflicts: 3, expectLockConflicts: []string{"a", "b", "c"}},
+		{maxLockConflicts: 4, expectLockConflicts: []string{"a", "b", "c", "d"}},
+		{maxLockConflicts: 5, expectLockConflicts: []string{"a", "b", "c", "d", "e"}},
+		{maxLockConflicts: 6, expectLockConflicts: []string{"a", "b", "c", "d", "e"}},
 	}
 
 	// Create SST with keys equal to intents at txn2TS.
@@ -81,6 +84,14 @@ func TestCheckSSTConflictsMaxLockConflicts(t *testing.T) {
 	for _, key := range intents {
 		require.NoError(t, MVCCPut(ctx, batch, roachpb.Key(key), txn1TS, roachpb.MakeValueFromString("intent"), MVCCWriteOptions{Txn: txn1}))
 	}
+	// Also write some replicated locks held by txn1.
+	for i, key := range locks {
+		str := lock.Shared
+		if i%2 != 0 {
+			str = lock.Exclusive
+		}
+		require.NoError(t, MVCCAcquireLock(ctx, batch, txn1, str, roachpb.Key(key), nil, 0))
+	}
 	require.NoError(t, batch.Commit(true))
 	batch.Close()
 	require.NoError(t, engine.Flush())
@@ -101,7 +112,7 @@ func TestCheckSSTConflictsMaxLockConflicts(t *testing.T) {
 					for _, i := range lcErr.Locks {
 						actual = append(actual, string(i.Key))
 					}
-					require.Equal(t, tc.expectIntents, actual)
+					require.Equal(t, tc.expectLockConflicts, actual)
 				})
 			}
 		})