From 6bbcc3a45fca4675ec88d7aec47f735ef448fd71 Mon Sep 17 00:00:00 2001 From: Arul Ajmani Date: Tue, 6 Dec 2022 17:42:25 -0500 Subject: [PATCH] kv: use correct sequence number when scanning for conflicting intents A read only request scans the lock table before it can proceed with dropping latches. It can only evaluate if no conflicting intents are found. While doing so, it also determines if the MVCC scan evaluation needs to consult intent history (by using the interleaved iterator). The MVCC scan evaluation needs to consult intent history if we discover an intent by the transaction performing the read operation at a higher sequence number or a higher timestamp. The correct sequence numbers to compare here are those on the `BatchRequest`, and not on the transaction. Before this patch, we were using the sequence number on the transaction, which could lead us to wrongly conclude that the use of an intent interleaving iterator wasn't required. Specifically, if the batch of the following construction was retried: ``` b.Scan(a, e) b.Put(b, "value") ``` The scan would end up (erroneously) reading "value" at key b. As part of this patch, I've also renamed `ScanConflictingIntents` to `ScanConflictingIntentsForDroppingLatchesEarly` -- the function isn't as generalized as the old name would suggest. Closes #92217 Closes #92189 Release note: None --- pkg/kv/kvclient/kvcoord/txn_test.go | 52 +++++++++++++++++++++++++++++ pkg/kv/kvserver/replica_evaluate.go | 2 ++ pkg/kv/kvserver/replica_read.go | 12 +++++-- pkg/storage/engine.go | 40 ++++++++++++++-------- 4 files changed, 89 insertions(+), 17 deletions(-) diff --git a/pkg/kv/kvclient/kvcoord/txn_test.go b/pkg/kv/kvclient/kvcoord/txn_test.go index 35dfe4338afe..01734497384e 100644 --- a/pkg/kv/kvclient/kvcoord/txn_test.go +++ b/pkg/kv/kvclient/kvcoord/txn_test.go @@ -979,3 +979,55 @@ func TestRetrySerializableBumpsToNow(t *testing.T) { })) require.Greater(t, attempt, 1, "Transaction is expected to retry once") } + +// TestTxnRetryWithLatchesDroppedEarly serves as a regression test for +// https://github.com/cockroachdb/cockroach/issues/92189. It constructs a batch +// like: +// b.Scan(a, e) +// b.Put(b, "value2") +// which is forced to retry at a higher timestamp. It ensures that the scan +// request does not see the intent at key b, even when the retry happens. +func TestTxnRetryWithLatchesDroppedEarly(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + s := createTestDB(t) + defer s.Stop() + + keyA := "a" + keyB := "b" + keyE := "e" + keyF := "f" + + err := s.DB.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { + s.Manual.Advance(1 * time.Second) + + { + // Attempt to write to keyF in another txn. + conflictTxn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */) + conflictTxn.TestingSetPriority(enginepb.MaxTxnPriority) + if err := conflictTxn.Put(ctx, keyF, "valueF"); err != nil { + return err + } + if err := conflictTxn.Commit(ctx); err != nil { + return err + } + } + + b := txn.NewBatch() + b.Scan(keyA, keyE) + b.Put(keyB, "value2") + b.Put(keyF, "value3") // bumps the transaction and causes a server side retry. + + err := txn.Run(ctx, b) + if err != nil { + return err + } + + // Ensure no rows were returned as part of the scan. + require.Equal(t, 0, len(b.RawResponse().Responses[0].GetInner().(*roachpb.ScanResponse).Rows)) + return nil + }) + if err != nil { + t.Fatal(err) + } +} diff --git a/pkg/kv/kvserver/replica_evaluate.go b/pkg/kv/kvserver/replica_evaluate.go index 9b838c7efa23..86605beea432 100644 --- a/pkg/kv/kvserver/replica_evaluate.go +++ b/pkg/kv/kvserver/replica_evaluate.go @@ -454,6 +454,8 @@ func evaluateBatch( if baHeader.Txn != nil { // If transactional, send out the final transaction entry with the reply. br.Txn = baHeader.Txn + // Reset response transaction's sequence number. + br.Txn.Sequence = 0 // Note that br.Txn.ReadTimestamp might be higher than baHeader.Timestamp if // we had an EndTxn that decided that it can refresh to something higher // than baHeader.Timestamp because there were no refresh spans. diff --git a/pkg/kv/kvserver/replica_read.go b/pkg/kv/kvserver/replica_read.go index de7e521ba75f..6c5a242a7cf4 100644 --- a/pkg/kv/kvserver/replica_read.go +++ b/pkg/kv/kvserver/replica_read.go @@ -31,6 +31,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/kr/pretty" ) @@ -300,9 +301,14 @@ func (r *Replica) canDropLatchesBeforeEval( // Check if any of the requests within the batch need to resolve any intents // or if any of them need to use an intent interleaving iterator. for _, req := range ba.Requests { - start, end := req.GetInner().Header().Key, req.GetInner().Header().EndKey - needsIntentInterleavingForThisRequest, err := storage.ScanConflictingIntents( - ctx, rw, ba.Txn, ba.Header.Timestamp, start, end, &intents, maxIntents, + reqHeader := req.GetInner().Header() + start, end, seq := reqHeader.Key, reqHeader.EndKey, reqHeader.Sequence + var txnID uuid.UUID + if ba.Txn != nil { + txnID = ba.Txn.ID + } + needsIntentInterleavingForThisRequest, err := storage.ScanConflictingIntentsForDroppingLatchesEarly( + ctx, rw, txnID, ba.Header.Timestamp, start, end, seq, &intents, maxIntents, ) if err != nil { return false /* ok */, true /* stillNeedsIntentInterleaving */, roachpb.NewError( diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index e0644148100d..df9c69c1e4f9 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -1677,21 +1677,31 @@ func assertMVCCIteratorInvariants(iter MVCCIterator) error { return nil } -// ScanConflictingIntents scans intents using only the separated intents lock -// table. The result set is added to the given `intents` slice. It ignores -// intents that do not conflict with `txn`. If it encounters intents that were -// written by `txn` that are either at a higher sequence number than txn's or at -// a lower sequence number but at a higher timestamp, `needIntentHistory` is set -// to true. This flag is used to signal to the caller that a subsequent scan -// over the MVCC key space (for the batch in question) will need to be performed -// using an intent interleaving iterator in order to be able to read the correct -// provisional value. -func ScanConflictingIntents( +// ScanConflictingIntentsForDroppingLatchesEarly scans intents using only the +// separated intents lock table on behalf of a batch request trying to drop its +// latches early. If found, conflicting intents are added to the supplied +// `intents` slice, which indicates to the caller that evaluation should not +// proceed until the intents are resolved. Intents that don't conflict with the +// transaction referenced by txnID[1] at the supplied `ts` are ignored. +// +// The caller must supply the sequence number of the request on behalf of which +// the intents are being scanned. This is used to determine if the caller needs +// to consult intent history when performing a scan over the MVCC keyspace +// (indicated by the `needIntentHistory` return parameter). Intent history is +// required to read the correct provisional value when scanning if we encounter +// an intent written by the `txn` at a higher sequence number than the one +// supplied or at a higher timestamp than the `ts` supplied (regardless of the +// sequence number of the intent). +// +// [1] The supplied txnID may be empty (uuid.Nil) if the request on behalf of +// which the scan is being performed is non-transactional. +func ScanConflictingIntentsForDroppingLatchesEarly( ctx context.Context, reader Reader, - txn *roachpb.Transaction, + txnID uuid.UUID, ts hlc.Timestamp, start, end roachpb.Key, + seq enginepb.TxnSeq, intents *[]roachpb.Intent, maxIntents int64, ) (needIntentHistory bool, err error) { @@ -1718,7 +1728,9 @@ func ScanConflictingIntents( var ok bool for ok, err = iter.SeekEngineKeyGE(EngineKey{Key: ltStart}); ok; ok, err = iter.NextEngineKey() { if maxIntents != 0 && int64(len(*intents)) >= maxIntents { - break + // Return early if we're done accumulating intents; make no claims about + // not needing intent history. + return true /* needsIntentHistory */, nil } if err = protoutil.Unmarshal(iter.UnsafeValue(), &meta); err != nil { return false, err @@ -1726,14 +1738,14 @@ func ScanConflictingIntents( if meta.Txn == nil { return false, errors.Errorf("intent without transaction") } - ownIntent := txn != nil && txn.ID == meta.Txn.ID + ownIntent := txnID != uuid.Nil && txnID == meta.Txn.ID if ownIntent { // If we ran into one of our own intents, check whether the intent has a // higher (or equal) sequence number or a higher (or equal) timestamp. If // either of these conditions is true, a corresponding scan over the MVCC // key space will need access to the key's intent history in order to read // the correct provisional value. So we set `needIntentHistory` to true. - if txn.Sequence <= meta.Txn.Sequence || ts.LessEq(meta.Timestamp.ToTimestamp()) { + if seq <= meta.Txn.Sequence || ts.LessEq(meta.Timestamp.ToTimestamp()) { needIntentHistory = true } continue