Skip to content

Commit

Permalink
kv: gate batched resolution of pushed locks behind a cluster setting
Browse files Browse the repository at this point in the history
This commit adds a new `kv.lock_table.batch_pushed_lock_resolution.enabled` cluster
setting which controls whether the lock table should allow non-locking readers
to defer and batch the resolution of conflicting locks whose holder is known to
be pending and have been pushed above the reader's timestamp.

This is a safeguard against bugs or behavior changes as we quickly backport a
fix for cockroachdb#103126.

Epic: None
Release note: None
  • Loading branch information
nvanbenschoten committed Jul 13, 2023
1 parent a7366c4 commit 3dcc9a0
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 2 deletions.
13 changes: 13 additions & 0 deletions pkg/kv/kvserver/concurrency/concurrency_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,19 @@ var DiscoveredLocksThresholdToConsultTxnStatusCache = settings.RegisterIntSettin
settings.NonNegativeInt,
)

// BatchPushedLockResolution controls whether the lock table should allow
// non-locking readers to defer and batch the resolution of conflicting locks
// whose holder is known to be pending and have been pushed above the reader's
// timestamp.
var BatchPushedLockResolution = settings.RegisterBoolSetting(
settings.SystemOnly,
"kv.lock_table.batch_pushed_lock_resolution.enabled",
"whether the lock table should allow non-locking readers to defer and batch the resolution of "+
"conflicting locks whose holder is known to be pending and have been pushed above the reader's "+
"timestamp",
true,
)

// managerImpl implements the Manager interface.
type managerImpl struct {
st *cluster.Settings
Expand Down
11 changes: 11 additions & 0 deletions pkg/kv/kvserver/concurrency/concurrency_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ import (
// debug-set-clock ts=<secs>
// debug-advance-clock ts=<secs>
// debug-set-discovered-locks-threshold-to-consult-txn-status-cache n=<count>
// debug-set-batch-pushed-lock-resolution-enabled ok=<enabled>
// debug-set-max-locks n=<count>
// reset
func TestConcurrencyManagerBasic(t *testing.T) {
Expand Down Expand Up @@ -570,6 +571,12 @@ func TestConcurrencyManagerBasic(t *testing.T) {
c.setDiscoveredLocksThresholdToConsultTxnStatusCache(n)
return ""

case "debug-set-batch-pushed-lock-resolution-enabled":
var ok bool
d.ScanArgs(t, "ok", &ok)
c.setBatchPushedLockResolutionEnabled(ok)
return ""

case "debug-set-max-locks":
var n int
d.ScanArgs(t, "n", &n)
Expand Down Expand Up @@ -953,6 +960,10 @@ func (c *cluster) setDiscoveredLocksThresholdToConsultTxnStatusCache(n int) {
concurrency.DiscoveredLocksThresholdToConsultTxnStatusCache.Override(context.Background(), &c.st.SV, int64(n))
}

func (c *cluster) setBatchPushedLockResolutionEnabled(ok bool) {
concurrency.BatchPushedLockResolution.Override(context.Background(), &c.st.SV, ok)
}

// reset clears all request state in the cluster. This avoids portions of tests
// leaking into one another and also serves as an assertion that a sequence of
// commands has completed without leaking any requests.
Expand Down
11 changes: 9 additions & 2 deletions pkg/kv/kvserver/concurrency/lock_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -1662,7 +1662,7 @@ func (l *lockState) tryActiveWait(
// lockTableWaiter but difficult to coordinate through the txnStatusCache.
// This limitation is acceptable because the most important case here is
// optimizing the Export requests issued by backup.
if !g.hasUncertaintyInterval() {
if !g.hasUncertaintyInterval() && g.lt.batchPushedLockResolution() {
pushedTxn, ok := g.lt.txnStatusCache.pendingTxns.get(lockHolderTxn.ID)
if ok && g.ts.Less(pushedTxn.WriteTimestamp) {
up := roachpb.MakeLockUpdate(pushedTxn, roachpb.Span{Key: l.key})
Expand Down Expand Up @@ -2730,7 +2730,7 @@ func (t *lockTableImpl) AddDiscoveredLock(
// holder is known to have been pushed above the reader's timestamp. See the
// comment in tryActiveWait for more details, including why we include the
// hasUncertaintyInterval condition.
if sa == spanset.SpanReadOnly && !g.hasUncertaintyInterval() {
if sa == spanset.SpanReadOnly && !g.hasUncertaintyInterval() && t.batchPushedLockResolution() {
pushedTxn, ok := g.lt.txnStatusCache.pendingTxns.get(intent.Txn.ID)
if ok && g.ts.Less(pushedTxn.WriteTimestamp) {
g.toResolve = append(
Expand Down Expand Up @@ -3018,6 +3018,13 @@ func stepToNextSpan(g *lockTableGuardImpl) *spanset.Span {
return nil
}

// batchPushedLockResolution returns whether non-locking readers can defer and
// batch resolution of conflicting locks whose holder is known to be pending and
// have been pushed above the reader's timestamp.
func (t *lockTableImpl) batchPushedLockResolution() bool {
return BatchPushedLockResolution.Get(&t.settings.SV)
}

// PushedTransactionUpdated implements the lockTable interface.
func (t *lockTableImpl) PushedTransactionUpdated(txn *roachpb.Transaction) {
// TODO(sumeer): We don't take any action for requests that are already
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,183 @@ local: num=0

reset namespace
----

# -------------------------------------------------------------------------
# The kv.lock_table.batch_pushed_lock_resolution.enabled cluster setting can
# be used to disable deferred lock resolution of pushed intents.
# -------------------------------------------------------------------------

debug-set-batch-pushed-lock-resolution-enabled ok=false
----

new-txn name=txn1 ts=10,1 epoch=0 priority=high
----

new-txn name=txn2 ts=10,1 epoch=0
----

new-txn name=txn3 ts=10,1 epoch=0
----

new-request name=req1 txn=txn1 ts=10,1
scan key=a endkey=z
----

sequence req=req1
----
[1] sequence req1: sequencing request
[1] sequence req1: acquiring latches
[1] sequence req1: scanning lock table for conflicting locks
[1] sequence req1: sequencing complete, returned guard

handle-write-intent-error req=req1 lease-seq=1
intent txn=txn2 key=a
intent txn=txn2 key=b
intent txn=txn2 key=c
intent txn=txn2 key=d
intent txn=txn2 key=e
intent txn=txn2 key=f
intent txn=txn2 key=g
intent txn=txn2 key=h
intent txn=txn2 key=i
intent txn=txn2 key=j
----
[2] handle write intent error req1: handled conflicting intents on "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", released latches

debug-lock-table
----
global: num=10
lock: "a"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "b"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "c"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "d"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "e"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "f"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "g"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "h"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "i"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "j"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
local: num=0

# Before re-scanning and pushing, add a waiter on a single key to demonstrate
# that uncontended, replicated keys are released when pushed, while contended,
# replicated keys are not.
new-request name=req2 txn=txn3 ts=10,1
put key=c value=val
----

sequence req=req2
----
[3] sequence req2: sequencing request
[3] sequence req2: acquiring latches
[3] sequence req2: scanning lock table for conflicting locks
[3] sequence req2: waiting in lock wait-queues
[3] sequence req2: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "c" (queuedWriters: 1, queuedReaders: 0)
[3] sequence req2: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = false
[3] sequence req2: pushing txn 00000002 to abort
[3] sequence req2: blocked on select in concurrency_test.(*cluster).PushTransaction

# Now re-scan with the high-priority reader.
sequence req=req1
----
[4] sequence req1: re-sequencing request
[4] sequence req1: acquiring latches
[4] sequence req1: scanning lock table for conflicting locks
[4] sequence req1: waiting in lock wait-queues
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "a" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: pusher pushed pushee to 10.000000000,2
[4] sequence req1: resolving intent "a" for txn 00000002 with PENDING status and clock observation {1 123.000000000,13}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "b" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "a" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "b" for txn 00000002 with PENDING status and clock observation {1 123.000000000,15}
[4] sequence req1: lock wait-queue event: wait for txn 00000002 holding lock @ key "c" (queuedWriters: 1, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "b" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = false, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "c" for txn 00000002 with PENDING status and clock observation {1 123.000000000,17}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "d" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "c" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "d" for txn 00000002 with PENDING status and clock observation {1 123.000000000,19}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "e" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "d" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "e" for txn 00000002 with PENDING status and clock observation {1 123.000000000,21}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "f" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "e" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "f" for txn 00000002 with PENDING status and clock observation {1 123.000000000,23}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "g" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "f" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "g" for txn 00000002 with PENDING status and clock observation {1 123.000000000,25}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "h" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "g" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "h" for txn 00000002 with PENDING status and clock observation {1 123.000000000,27}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "i" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "h" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "i" for txn 00000002 with PENDING status and clock observation {1 123.000000000,29}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key "j" (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "i" for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent "j" for txn 00000002 with PENDING status and clock observation {1 123.000000000,31}
[4] sequence req1: lock wait-queue event: done waiting
[4] sequence req1: conflicted with 00000002-0000-0000-0000-000000000000 on "j" for 0.000s
[4] sequence req1: acquiring latches
[4] sequence req1: scanning lock table for conflicting locks
[4] sequence req1: sequencing complete, returned guard

finish req=req1
----
[-] finish req1: finishing request

# Only the contended lock remains.
debug-lock-table
----
global: num=1
lock: "c"
res: req: 7, txn: 00000003-0000-0000-0000-000000000000, ts: 10.000000000,1, seq: 0
local: num=0

on-txn-updated txn=txn2 status=aborted
----
[-] update txn: aborting txn2
[3] sequence req2: resolving intent "c" for txn 00000002 with ABORTED status
[3] sequence req2: lock wait-queue event: done waiting
[3] sequence req2: conflicted with 00000002-0000-0000-0000-000000000000 on "c" for 0.000s
[3] sequence req2: acquiring latches
[3] sequence req2: scanning lock table for conflicting locks
[3] sequence req2: sequencing complete, returned guard

finish req=req2
----
[-] finish req2: finishing request

reset namespace
----

debug-set-batch-pushed-lock-resolution-enabled ok=true
----
Loading

0 comments on commit 3dcc9a0

Please sign in to comment.