Skip to content

Commit

Permalink
kv: gate batched resolution of pushed locks behind a cluster setting
Browse files Browse the repository at this point in the history
This commit adds a new `kv.lock_table.batch_pushed_lock_resolution.enabled` cluster
setting which controls whether the lock table should allow non-locking readers
to defer and batch the resolution of conflicting locks whose holder is known to
be pending and have been pushed above the reader's timestamp.

This is a safeguard against bugs or behavior changes as we quickly backport a
fix for cockroachdb#103126.

Epic: None
Release note: None
  • Loading branch information
nvanbenschoten committed Jun 19, 2023
1 parent 23ee0a9 commit 71d6e46
Show file tree
Hide file tree
Showing 5 changed files with 386 additions and 2 deletions.
13 changes: 13 additions & 0 deletions pkg/kv/kvserver/concurrency/concurrency_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ var DiscoveredLocksThresholdToConsultTxnStatusCache = settings.RegisterIntSettin
settings.NonNegativeInt,
)

// BatchPushedLockResolution controls whether the lock table should allow
// non-locking readers to defer and batch the resolution of conflicting locks
// whose holder is known to be pending and have been pushed above the reader's
// timestamp.
var BatchPushedLockResolution = settings.RegisterBoolSetting(
settings.SystemOnly,
"kv.lock_table.batch_pushed_lock_resolution.enabled",
"whether the lock table should allow non-locking readers to defer and batch the resolution of "+
"conflicting locks whose holder is known to be pending and have been pushed above the reader's "+
"timestamp",
true,
)

// managerImpl implements the Manager interface.
type managerImpl struct {
st *cluster.Settings
Expand Down
11 changes: 11 additions & 0 deletions pkg/kv/kvserver/concurrency/concurrency_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ import (
// debug-set-clock ts=<secs>
// debug-advance-clock ts=<secs>
// debug-set-discovered-locks-threshold-to-consult-txn-status-cache n=<count>
// debug-set-batch-pushed-lock-resolution-enabled ok=<enabled>
// debug-set-max-locks n=<count>
// reset
func TestConcurrencyManagerBasic(t *testing.T) {
Expand Down Expand Up @@ -577,6 +578,12 @@ func TestConcurrencyManagerBasic(t *testing.T) {
c.setDiscoveredLocksThresholdToConsultTxnStatusCache(n)
return ""

case "debug-set-batch-pushed-lock-resolution-enabled":
var ok bool
d.ScanArgs(t, "ok", &ok)
c.setBatchPushedLockResolutionEnabled(ok)
return ""

case "debug-set-max-locks":
var n int
d.ScanArgs(t, "n", &n)
Expand Down Expand Up @@ -958,6 +965,10 @@ func (c *cluster) setDiscoveredLocksThresholdToConsultTxnStatusCache(n int) {
concurrency.DiscoveredLocksThresholdToConsultTxnStatusCache.Override(context.Background(), &c.st.SV, int64(n))
}

func (c *cluster) setBatchPushedLockResolutionEnabled(ok bool) {
concurrency.BatchPushedLockResolution.Override(context.Background(), &c.st.SV, ok)
}

// reset clears all request state in the cluster. This avoids portions of tests
// leaking into one another and also serves as an assertion that a sequence of
// commands has completed without leaking any requests.
Expand Down
11 changes: 9 additions & 2 deletions pkg/kv/kvserver/concurrency/lock_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ func (l *lockState) tryActiveWait(
// lockTableWaiter but difficult to coordinate through the txnStatusCache.
// This limitation is acceptable because the most important case here is
// optimizing the Export requests issued by backup.
if !g.hasUncertaintyInterval() {
if !g.hasUncertaintyInterval() && g.lt.batchPushedLockResolution() {
pushedTxn, ok := g.lt.txnStatusCache.pendingTxns.get(lockHolderTxn.ID)
if ok && g.ts.Less(pushedTxn.WriteTimestamp) {
up := roachpb.MakeLockUpdate(pushedTxn, roachpb.Span{Key: l.key})
Expand Down Expand Up @@ -2883,7 +2883,7 @@ func (t *lockTableImpl) AddDiscoveredLock(
// holder is known to have been pushed above the reader's timestamp. See the
// comment in tryActiveWait for more details, including why we include the
// hasUncertaintyInterval condition.
if str == lock.None && !g.hasUncertaintyInterval() {
if str == lock.None && !g.hasUncertaintyInterval() && t.batchPushedLockResolution() {
pushedTxn, ok := g.lt.txnStatusCache.pendingTxns.get(intent.Txn.ID)
if ok && g.ts.Less(pushedTxn.WriteTimestamp) {
g.toResolve = append(
Expand Down Expand Up @@ -3143,6 +3143,13 @@ func stepToNextSpan(g *lockTableGuardImpl) *roachpb.Span {
return nil
}

// batchPushedLockResolution returns whether non-locking readers can defer and
// batch resolution of conflicting locks whose holder is known to be pending and
// have been pushed above the reader's timestamp.
func (t *lockTableImpl) batchPushedLockResolution() bool {
return BatchPushedLockResolution.Get(&t.settings.SV)
}

// PushedTransactionUpdated implements the lockTable interface.
func (t *lockTableImpl) PushedTransactionUpdated(txn *roachpb.Transaction) {
// TODO(sumeer): We don't take any action for requests that are already
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,182 @@ num=0

reset namespace
----

# -------------------------------------------------------------------------
# The kv.lock_table.batch_pushed_lock_resolution.enabled cluster setting can
# be used to disable deferred lock resolution of pushed intents.
# -------------------------------------------------------------------------

debug-set-batch-pushed-lock-resolution-enabled ok=false
----

new-txn name=txn1 ts=10,1 epoch=0 priority=high
----

new-txn name=txn2 ts=10,1 epoch=0
----

new-txn name=txn3 ts=10,1 epoch=0
----

new-request name=req1 txn=txn1 ts=10,1
scan key=a endkey=z
----

sequence req=req1
----
[1] sequence req1: sequencing request
[1] sequence req1: acquiring latches
[1] sequence req1: scanning lock table for conflicting locks
[1] sequence req1: sequencing complete, returned guard

handle-write-intent-error req=req1 lease-seq=1
intent txn=txn2 key=a
intent txn=txn2 key=b
intent txn=txn2 key=c
intent txn=txn2 key=d
intent txn=txn2 key=e
intent txn=txn2 key=f
intent txn=txn2 key=g
intent txn=txn2 key=h
intent txn=txn2 key=i
intent txn=txn2 key=j
----
[2] handle write intent error req1: handled conflicting intents on ‹"a"›, ‹"b"›, ‹"c"›, ‹"d"›, ‹"e"›, ‹"f"›, ‹"g"›, ‹"h"›, ‹"i"›, ‹"j"›, released latches

debug-lock-table
----
num=10
lock: "a"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "b"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "c"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "d"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "e"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "f"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "g"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "h"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "i"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]
lock: "j"
holder: txn: 00000002-0000-0000-0000-000000000000, ts: 10.000000000,1, info: repl epoch: 0, seqs: [0]

# Before re-scanning and pushing, add a waiter on a single key to demonstrate
# that uncontended, replicated keys are released when pushed, while contended,
# replicated keys are not.
new-request name=req2 txn=txn3 ts=10,1
put key=c value=val
----

sequence req=req2
----
[3] sequence req2: sequencing request
[3] sequence req2: acquiring latches
[3] sequence req2: scanning lock table for conflicting locks
[3] sequence req2: waiting in lock wait-queues
[3] sequence req2: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"c"› (queuedWriters: 1, queuedReaders: 0)
[3] sequence req2: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = false
[3] sequence req2: pushing txn 00000002 to abort
[3] sequence req2: blocked on select in concurrency_test.(*cluster).PushTransaction

# Now re-scan with the high-priority reader.
sequence req=req1
----
[4] sequence req1: re-sequencing request
[4] sequence req1: acquiring latches
[4] sequence req1: scanning lock table for conflicting locks
[4] sequence req1: waiting in lock wait-queues
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"a"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: pusher pushed pushee to 10.000000000,2
[4] sequence req1: resolving intent ‹"a"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,13}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"b"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"a"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"b"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,15}
[4] sequence req1: lock wait-queue event: wait for txn 00000002 holding lock @ key ‹"c"› (queuedWriters: 1, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"b"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = false, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"c"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,17}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"d"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"c"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"d"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,19}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"e"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"d"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"e"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,21}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"f"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"e"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"f"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,23}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"g"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"f"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"g"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,25}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"h"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"g"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"h"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,27}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"i"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"h"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"i"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,29}
[4] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000002 holding lock @ key ‹"j"› (queuedWriters: 0, queuedReaders: 1)
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"i"› for 0.000s
[4] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = true
[4] sequence req1: pushing timestamp of txn 00000002 above 10.000000000,1
[4] sequence req1: resolving intent ‹"j"› for txn 00000002 with PENDING status and clock observation {1 123.000000000,31}
[4] sequence req1: lock wait-queue event: done waiting
[4] sequence req1: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"j"› for 0.000s
[4] sequence req1: acquiring latches
[4] sequence req1: scanning lock table for conflicting locks
[4] sequence req1: sequencing complete, returned guard

finish req=req1
----
[-] finish req1: finishing request

# Only the contended lock remains.
debug-lock-table
----
num=1
lock: "c"
queued writers:
active: false req: 7, txn: 00000003-0000-0000-0000-000000000000

on-txn-updated txn=txn2 status=aborted
----
[-] update txn: aborting txn2
[3] sequence req2: resolving intent ‹"c"› for txn 00000002 with ABORTED status
[3] sequence req2: lock wait-queue event: done waiting
[3] sequence req2: conflicted with ‹00000002-0000-0000-0000-000000000000› on ‹"c"› for 0.000s
[3] sequence req2: acquiring latches
[3] sequence req2: scanning lock table for conflicting locks
[3] sequence req2: sequencing complete, returned guard

finish req=req2
----
[-] finish req2: finishing request

reset namespace
----

debug-set-batch-pushed-lock-resolution-enabled ok=true
----
Loading

0 comments on commit 71d6e46

Please sign in to comment.