diff --git a/pkg/kv/kvserver/concurrency/lock_table.go b/pkg/kv/kvserver/concurrency/lock_table.go index 8444f5ad9b5d..55770181b1e3 100644 --- a/pkg/kv/kvserver/concurrency/lock_table.go +++ b/pkg/kv/kvserver/concurrency/lock_table.go @@ -746,6 +746,9 @@ func (l *lockState) Format(buf *strings.Builder) { } else { writeHolderInfo(buf, txn, ts) } + // TODO(sumeer): Add an optional `description string` field to Request and + // lockTableGuardImpl that tests can set to avoid relying on the seqNum to + // identify requests. if l.waitingReaders.Len() > 0 { fmt.Fprintln(buf, " waiting readers:") for e := l.waitingReaders.Front(); e != nil; e = e.Next() { @@ -1625,13 +1628,52 @@ func (l *lockState) requestDone(g *lockTableGuardImpl) (gc bool) { return false } +// tryFreeLockOnReplicatedAcquire attempts to free a write-uncontended lock +// during the state transition from the Unreplicated durability to the +// Replicated durability. This is possible because a Replicated lock is also +// stored as an MVCC intent, so it does not need to also be stored in the +// lockTable if writers are not queuing on it. This is beneficial because it +// serves as a mitigation for #49973. Since we aren't currently great at +// avoiding excessive contention on limited scans when locks are in the +// lockTable, it's better the keep locks out of the lockTable when possible. +// +// If any of the readers do truly contend with this lock even after their limit +// has been applied, they will notice during their MVCC scan and re-enter the +// queue (possibly recreating the lock through AddDiscoveredLock). Still, in +// practice this seems to work well in avoiding most of the artificial +// concurrency discussed in #49973. +// +// Acquires l.mu. +func (l *lockState) tryFreeLockOnReplicatedAcquire() bool { + l.mu.Lock() + defer l.mu.Unlock() + + // Bail if not locked with only the Unreplicated durability. + if !l.holder.locked || l.holder.holder[lock.Replicated].txn != nil { + return false + } + + // Bail if the lock has waiting writers. It is not uncontended. + if l.queuedWriters.Len() != 0 { + return false + } + + // The lock is uncontended by other writers, so we're safe to drop it. + // This may release readers who were waiting on the lock. + if gc := l.lockIsFree(); !gc { + panic("expected lockIsFree to return true") + } + return true +} + // The lock has transitioned from locked/reserved to unlocked. There could be // waiters, but there cannot be a reservation. // REQUIRES: l.mu is locked. func (l *lockState) lockIsFree() (gc bool) { if l.reservation != nil { - panic("lockTable bug") + panic("called lockIsFree on lock with reservation") } + // All waiting readers don't need to wait here anymore. for e := l.waitingReaders.Front(); e != nil; { g := e.Value.(*lockTableGuardImpl) @@ -1831,8 +1873,8 @@ func (t *lockTableImpl) AcquireLock( iter.FirstOverlap(&lockState{key: key}) if !iter.Valid() { if durability == lock.Replicated { - tree.mu.Unlock() // Don't remember uncontended replicated locks. + tree.mu.Unlock() return nil } l = &lockState{id: tree.nextLockSeqNum(), key: key, ss: ss} @@ -1843,6 +1885,16 @@ func (t *lockTableImpl) AcquireLock( atomic.AddInt64(&tree.numLocks, 1) } else { l = iter.Cur() + if durability == lock.Replicated && l.tryFreeLockOnReplicatedAcquire() { + // Don't remember uncontended replicated locks. Just like in the + // case where the lock is initially added as replicated, we drop + // replicated locks from the lockTable when being upgraded from + // Unreplicated to Replicated, whenever possible. + tree.Delete(l) + tree.mu.Unlock() + atomic.AddInt64(&tree.numLocks, -1) + return nil + } } err := l.acquireLock(strength, durability, txn, txn.WriteTimestamp) tree.mu.Unlock() diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update index 75b7cef634fa..2ee445ae9ee3 100644 --- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update +++ b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update @@ -333,6 +333,24 @@ local: num=0 # Issue another write to the same key for txn1 at its initial timestamp, # this time with a replicated durability. The timestamp in the lock # table should regress back down to reflect the replicated lock state. +# +# NOTE: we currently drop locks from the lockTable when they are +# upgraded from unreplicated to replicated if they have no active +# writers waiting on them. So to test what we want to test here, first +# enqueue a writer on the lock. + +new-request name=req4 txn=none ts=10,1 + put key=k value=v4 +---- + +sequence req=req4 +---- +[3] sequence req4: sequencing request +[3] sequence req4: acquiring latches +[3] sequence req4: scanning lock table for conflicting locks +[3] sequence req4: waiting in lock wait-queues +[3] sequence req4: pushing txn 00000001 to abort +[3] sequence req4: blocked on select in concurrency_test.(*cluster).PushTransaction new-request name=req3 txn=txn1 ts=10,1 put key=k value=v2 seq=1 @@ -340,10 +358,10 @@ new-request name=req3 txn=txn1 ts=10,1 sequence req=req3 ---- -[3] sequence req3: sequencing request -[3] sequence req3: acquiring latches -[3] sequence req3: scanning lock table for conflicting locks -[3] sequence req3: sequencing complete, returned guard +[4] sequence req3: sequencing request +[4] sequence req3: acquiring latches +[4] sequence req3: scanning lock table for conflicting locks +[4] sequence req3: sequencing complete, returned guard on-lock-acquired req=req3 key=k seq=1 dur=r ---- @@ -358,7 +376,23 @@ debug-lock-table global: num=1 lock: "k" holder: txn: 00000001-0000-0000-0000-000000000000, ts: 0.000000010,1, info: repl epoch: 0, seqs: [1], unrepl epoch: 0, seqs: [0] + queued writers: + active: true req: 9, txn: none + distinguished req: 9 local: num=0 +# Finish off txn1. Not needed once we can get rid of req4. +on-txn-updated txn=txn1 status=committed +---- +[-] update txn: committing txn1 +[3] sequence req4: resolving intent "k" for txn 00000001 with COMMITTED status +[3] sequence req4: acquiring latches +[3] sequence req4: scanning lock table for conflicting locks +[3] sequence req4: sequencing complete, returned guard + +finish req=req4 +---- +[-] finish req4: finishing request + reset namespace ---- diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes index ee90e3fc38c6..ca9ae5f3409a 100644 --- a/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes +++ b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes @@ -53,10 +53,30 @@ local: num=0 # Lock is reacquired at same epoch with lower timestamp and different durability. # The lock's timestamp is allowed to regress in this case, because it must never # diverge from the replicated state machine. +# +# We first add a queued writer because the lockTable currently does not keep track +# of uncontended replicated locks. # --------------------------------------------------------------------------------- +new-request r=reqContend txn=none ts=10 spans=w@a +---- + +scan r=reqContend +---- +start-waiting: true + acquire r=req2 k=a durability=r ---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 0, seqs: [2, 3] + queued writers: + active: true req: 1, txn: none + distinguished req: 1 +local: num=0 + +dequeue r=reqContend +---- global: num=1 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 0, seqs: [2, 3] @@ -119,8 +139,8 @@ global: num=1 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 2, seqs: [0] waiting readers: - req: 1, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 1 + req: 2, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 2 local: num=0 new-txn txn=txn1 ts=14 epoch=1 seq=1 @@ -135,8 +155,8 @@ global: num=1 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 1, seqs: [1], unrepl epoch: 2, seqs: [0] waiting readers: - req: 1, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 1 + req: 2, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 2 local: num=0 guard-state r=req5 @@ -178,8 +198,8 @@ global: num=1 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000014,0, info: repl epoch: 1, seqs: [1], unrepl epoch: 1, seqs: [0, 1] waiting readers: - req: 2, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 2 + req: 3, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 3 local: num=0 guard-state r=req7 diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_dropped b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_dropped new file mode 100644 index 000000000000..415c5f784c46 --- /dev/null +++ b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_dropped @@ -0,0 +1,129 @@ +# Misc tests where locks are ignored or dropped during lock acquisitions. + +new-lock-table maxlocks=10000 +---- + +# --------------------------------------------------------------------------------- +# New replicated locks are ignored. +# --------------------------------------------------------------------------------- + +new-txn txn=txn1 ts=10 epoch=0 seq=2 +---- + +new-request r=req1 txn=txn1 ts=10 spans=w@a +---- + +acquire r=req1 k=a durability=r +---- +global: num=0 +local: num=0 + +# --------------------------------------------------------------------------------- +# Upgrading from unreplicated to replicated for an uncontended lock causes that +# lock to be dropped. +# --------------------------------------------------------------------------------- + +acquire r=req1 k=a durability=u +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2] +local: num=0 + +acquire r=req1 k=a durability=r +---- +global: num=0 +local: num=0 + +# --------------------------------------------------------------------------------- +# Upgrading from unreplicated to replicated for a lock with only waiting readers +# causes that lock to be dropped and the readers to be released. +# --------------------------------------------------------------------------------- + +new-request r=reqContendReader txn=none ts=10 spans=r@a +---- + +acquire r=req1 k=a durability=u +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2] +local: num=0 + +scan r=reqContendReader +---- +start-waiting: true + +print +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2] + waiting readers: + req: 1, txn: none + distinguished req: 1 +local: num=0 + +acquire r=req1 k=a durability=r +---- +global: num=0 +local: num=0 + +guard-state r=reqContendReader +---- +new: state=doneWaiting + +# --------------------------------------------------------------------------------- +# Upgrading from unreplicated to replicated for a lock with waiting reader and +# writers causes the lock to be retained. +# --------------------------------------------------------------------------------- + +new-request r=reqContendWriter txn=none ts=10 spans=w@a +---- + +acquire r=req1 k=a durability=u +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2] +local: num=0 + +scan r=reqContendReader +---- +start-waiting: true + +scan r=reqContendWriter +---- +start-waiting: true + +print +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2] + waiting readers: + req: 1, txn: none + queued writers: + active: true req: 2, txn: none + distinguished req: 1 +local: num=0 + +acquire r=req1 k=a durability=r +---- +global: num=1 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [2], unrepl epoch: 0, seqs: [2] + waiting readers: + req: 1, txn: none + queued writers: + active: true req: 2, txn: none + distinguished req: 1 +local: num=0 + +guard-state r=reqContendReader +---- +new: state=waitForDistinguished txn=txn1 ts=10 key="a" held=true guard-access=read + +guard-state r=reqContendWriter +---- +new: state=waitFor txn=txn1 ts=10 key="a" held=true guard-access=write diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/size_limit_exceeded b/pkg/kv/kvserver/concurrency/testdata/lock_table/size_limit_exceeded index 659947960767..d07fef562aac 100644 --- a/pkg/kv/kvserver/concurrency/testdata/lock_table/size_limit_exceeded +++ b/pkg/kv/kvserver/concurrency/testdata/lock_table/size_limit_exceeded @@ -38,11 +38,13 @@ global: num=2 holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] local: num=0 -# c is locked both replicated and unreplicated, though we really only need it -# replicated locked for the case we want to exercise. This is because the -# lockTable currently does not keep track of uncontended replicated locks. -# When that behavior changes with the segregated lock table, we can remove -# this unreplicated lock acquisition. +# c is first locked as unreplicated and establishes a writer queue +# before being locked as replicated. We really only need it replicated +# locked for the case we want to exercise, but we jump through these +# extra hoops because the lockTable currently does not keep track of +# uncontended replicated locks. When that behavior changes with the +# segregated lock table, we can remove this unreplicated lock +# acquisition and queued writer. acquire r=req1 k=c durability=u ---- global: num=3 @@ -54,8 +56,29 @@ global: num=3 holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] local: num=0 +new-request r=reqContend txn=none ts=10 spans=w@c +---- + +scan r=reqContend +---- +start-waiting: true + acquire r=req1 k=c durability=r ---- +global: num=3 + lock: "a" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] + lock: "b" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] + lock: "c" + holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [0], unrepl epoch: 0, seqs: [0] + queued writers: + active: true req: 2, txn: none + distinguished req: 2 +local: num=0 + +dequeue r=reqContend +---- global: num=3 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] @@ -96,9 +119,9 @@ global: num=3 lock: "a" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] queued writers: - active: true req: 2, txn: 00000000-0000-0000-0000-000000000002 active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 2 + active: true req: 4, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 3 lock: "b" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] lock: "c" @@ -109,9 +132,9 @@ release txn=txn1 span=a ---- global: num=3 lock: "a" - res: req: 2, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 + res: req: 3, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 queued writers: - active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 + active: true req: 4, txn: 00000000-0000-0000-0000-000000000002 lock: "b" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] lock: "c" @@ -130,14 +153,14 @@ print ---- global: num=3 lock: "a" - res: req: 2, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 + res: req: 3, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 queued writers: - active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 + active: true req: 4, txn: 00000000-0000-0000-0000-000000000002 lock: "b" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] queued writers: - active: true req: 2, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 2 + active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 3 lock: "c" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [0], unrepl epoch: 0, seqs: [0] local: num=0 @@ -178,26 +201,26 @@ add-discovered r=req7 k=d txn=txn1 ---- global: num=4 lock: "a" - res: req: 2, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 + res: req: 3, txn: 00000000-0000-0000-0000-000000000002, ts: 0.000000010,0, seq: 0 queued writers: - active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 + active: true req: 4, txn: 00000000-0000-0000-0000-000000000002 lock: "b" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [0] waiting readers: - req: 4, txn: 00000000-0000-0000-0000-000000000002 + req: 5, txn: 00000000-0000-0000-0000-000000000002 queued writers: - active: true req: 2, txn: 00000000-0000-0000-0000-000000000002 - active: true req: 5, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 2 + active: true req: 3, txn: 00000000-0000-0000-0000-000000000002 + active: true req: 6, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 3 lock: "c" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [0], unrepl epoch: 0, seqs: [0] queued writers: - active: true req: 6, txn: 00000000-0000-0000-0000-000000000002 - distinguished req: 6 + active: true req: 7, txn: 00000000-0000-0000-0000-000000000002 + distinguished req: 7 lock: "d" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [0] queued writers: - active: false req: 7, txn: 00000000-0000-0000-0000-000000000002 + active: false req: 8, txn: 00000000-0000-0000-0000-000000000002 local: num=0 new-request r=req8 txn=txn2 ts=10 spans=w@e @@ -215,7 +238,7 @@ global: num=1 lock: "d" holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [0] queued writers: - active: false req: 7, txn: 00000000-0000-0000-0000-000000000002 + active: false req: 8, txn: 00000000-0000-0000-0000-000000000002 local: num=0 guard-state r=req2