From 15187bb670fdf04a14323e2285b9a1ccac95f6a4 Mon Sep 17 00:00:00 2001
From: Nathan VanBenschoten <nvanbenschoten@gmail.com>
Date: Fri, 16 Apr 2021 15:28:54 -0400
Subject: [PATCH] kvnemesis: add backoff to retry loop on txn aborts

This commit adds an exponential backoff to the transaction retry loop
when it detects that a transaction has been aborted. This was observed
to prevent thrashing under heavy read-write contention on `global_read`
ranges, which are added to kvnemesis in #63747. These ranges have an
added propensity to cause thrashing because every write to these ranges
gets bumped to a higher timestamp, so it is currently imperative that a
transaction be able to refresh its reads after writing to a global_read
range. If other transactions continue to invalidate a read-write
transaction's reads, it may never complete and will repeatedly abort
conflicting txns after detecting deadlocks. This commit prevents this
from stalling kvnemesis indefinitely.

I see two ways that we can improve this situation in the future.
1. The first option is that we could introduce some form of pessimistic
   read-locking for long running read-write transactions, so that they can
   eventually prevent other transactions from invalidating their reads as
   they proceed to write to a global_reads range and get their write
   timestamp bumped. This ensures that when the long-running transaction
   returns to refresh (if it even needs to, depending on the durability of
   the read locks) its reads, the refresh will have a high likelihood of
   succeeding. This is discussed in #52768.
2. The second option is to allow a transaction to re-write its existing
   intents in new epochs without being bumped by the closed timestamp. If a
   transaction only got bumped by the closed timestamp when writing new
   intents, then after a transaction was forced to retry, it would have a
   high likelihood of succeeding on its second epoch as long as it didn't
   write to a new set of keys. This is discussed in #63796.
---
 pkg/kv/kvnemesis/applier.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/pkg/kv/kvnemesis/applier.go b/pkg/kv/kvnemesis/applier.go
index bde84f442580..9d43b92b433f 100644
--- a/pkg/kv/kvnemesis/applier.go
+++ b/pkg/kv/kvnemesis/applier.go
@@ -12,6 +12,7 @@ package kvnemesis
 
 import (
 	"context"
+	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/kv"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -86,8 +87,17 @@ func applyOp(ctx context.Context, db *kv.DB, op *Operation) {
 		err := db.AdminTransferLease(ctx, o.Key, o.Target)
 		o.Result = resultError(ctx, err)
 	case *ClosureTxnOperation:
+		// Use a backoff loop to avoid thrashing on txn aborts. Don't wait between
+		// epochs of the same transaction to avoid waiting while holding locks.
+		retryOnAbort := retry.StartWithCtx(ctx, retry.Options{
+			InitialBackoff: 1 * time.Millisecond,
+			MaxBackoff:     250 * time.Millisecond,
+		})
 		var savedTxn *kv.Txn
 		txnErr := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+			if savedTxn != nil && savedTxn.TestingCloneTxn().Epoch == 0 {
+				retryOnAbort.Next()
+			}
 			savedTxn = txn
 			for i := range o.Ops {
 				op := &o.Ops[i]
@@ -116,7 +126,7 @@ func applyOp(ctx context.Context, db *kv.DB, op *Operation) {
 		})
 		o.Result = resultError(ctx, txnErr)
 		if txnErr == nil {
-			o.Txn = savedTxn.Sender().TestingCloneTxn()
+			o.Txn = savedTxn.TestingCloneTxn()
 		}
 	default:
 		panic(errors.AssertionFailedf(`unknown operation type: %T %v`, o, o))