cockroachdb · nvanbenschoten · Jun 12, 2020 · Jun 8, 2020
diff --git a/pkg/kv/kvserver/concurrency/lock_table.go b/pkg/kv/kvserver/concurrency/lock_table.go
@@ -746,6 +746,9 @@ func (l *lockState) Format(buf *strings.Builder) {
 	} else {
 		writeHolderInfo(buf, txn, ts)
 	}
+	// TODO(sumeer): Add an optional `description string` field to Request and
+	// lockTableGuardImpl that tests can set to avoid relying on the seqNum to
+	// identify requests.
 	if l.waitingReaders.Len() > 0 {
 		fmt.Fprintln(buf, "   waiting readers:")
 		for e := l.waitingReaders.Front(); e != nil; e = e.Next() {
@@ -1625,13 +1628,52 @@ func (l *lockState) requestDone(g *lockTableGuardImpl) (gc bool) {
 	return false
 }
 
+// tryFreeLockOnReplicatedAcquire attempts to free a write-uncontended lock
+// during the state transition from the Unreplicated durability to the
+// Replicated durability. This is possible because a Replicated lock is also
+// stored as an MVCC intent, so it does not need to also be stored in the
+// lockTable if writers are not queuing on it. This is beneficial because it
+// serves as a mitigation for #49973. Since we aren't currently great at
+// avoiding excessive contention on limited scans when locks are in the
+// lockTable, it's better the keep locks out of the lockTable when possible.
+//
+// If any of the readers do truly contend with this lock even after their limit
+// has been applied, they will notice during their MVCC scan and re-enter the
+// queue (possibly recreating the lock through AddDiscoveredLock). Still, in
+// practice this seems to work well in avoiding most of the artificial
+// concurrency discussed in #49973.
+//
+// Acquires l.mu.
+func (l *lockState) tryFreeLockOnReplicatedAcquire() bool {
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	// Bail if not locked with only the Unreplicated durability.
+	if !l.holder.locked || l.holder.holder[lock.Replicated].txn != nil {
+		return false
+	}
+
+	// Bail if the lock has waiting writers. It is not uncontended.
+	if l.queuedWriters.Len() != 0 {
+		return false
+	}
+
+	// The lock is uncontended by other writers, so we're safe to drop it.
+	// This may release readers who were waiting on the lock.
+	if gc := l.lockIsFree(); !gc {
+		panic("expected lockIsFree to return true")
+	}
+	return true
+}
+
 // The lock has transitioned from locked/reserved to unlocked. There could be
 // waiters, but there cannot be a reservation.
 // REQUIRES: l.mu is locked.
 func (l *lockState) lockIsFree() (gc bool) {
 	if l.reservation != nil {
-		panic("lockTable bug")
+		panic("called lockIsFree on lock with reservation")
 	}
+
 	// All waiting readers don't need to wait here anymore.
 	for e := l.waitingReaders.Front(); e != nil; {
 		g := e.Value.(*lockTableGuardImpl)
@@ -1831,8 +1873,8 @@ func (t *lockTableImpl) AcquireLock(
 	iter.FirstOverlap(&lockState{key: key})
 	if !iter.Valid() {
 		if durability == lock.Replicated {
-			tree.mu.Unlock()
 			// Don't remember uncontended replicated locks.
+			tree.mu.Unlock()
 			return nil
 		}
 		l = &lockState{id: tree.nextLockSeqNum(), key: key, ss: ss}
@@ -1843,6 +1885,16 @@ func (t *lockTableImpl) AcquireLock(
 		atomic.AddInt64(&tree.numLocks, 1)
 	} else {
 		l = iter.Cur()
+		if durability == lock.Replicated && l.tryFreeLockOnReplicatedAcquire() {
+			// Don't remember uncontended replicated locks. Just like in the
+			// case where the lock is initially added as replicated, we drop
+			// replicated locks from the lockTable when being upgraded from
+			// Unreplicated to Replicated, whenever possible.
+			tree.Delete(l)
+			tree.mu.Unlock()
+			atomic.AddInt64(&tree.numLocks, -1)
+			return nil
+		}
 	}
 	err := l.acquireLock(strength, durability, txn, txn.WriteTimestamp)
 	tree.mu.Unlock()

diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/update
@@ -333,17 +333,35 @@ local: num=0
 # Issue another write to the same key for txn1 at its initial timestamp,
 # this time with a replicated durability. The timestamp in the lock
 # table should regress back down to reflect the replicated lock state.
+#
+# NOTE: we currently drop locks from the lockTable when they are
+# upgraded from unreplicated to replicated if they have no active
+# writers waiting on them. So to test what we want to test here, first
+# enqueue a writer on the lock.
+
+new-request name=req4 txn=none ts=10,1
+  put key=k value=v4
+----
+
+sequence req=req4
+----
+[3] sequence req4: sequencing request
+[3] sequence req4: acquiring latches
+[3] sequence req4: scanning lock table for conflicting locks
+[3] sequence req4: waiting in lock wait-queues
+[3] sequence req4: pushing txn 00000001 to abort
+[3] sequence req4: blocked on select in concurrency_test.(*cluster).PushTransaction
 
 new-request name=req3 txn=txn1 ts=10,1
   put key=k value=v2 seq=1
 ----
 
 sequence req=req3
 ----
-[3] sequence req3: sequencing request
-[3] sequence req3: acquiring latches
-[3] sequence req3: scanning lock table for conflicting locks
-[3] sequence req3: sequencing complete, returned guard
+[4] sequence req3: sequencing request
+[4] sequence req3: acquiring latches
+[4] sequence req3: scanning lock table for conflicting locks
+[4] sequence req3: sequencing complete, returned guard
 
 on-lock-acquired req=req3 key=k seq=1 dur=r
 ----
@@ -358,7 +376,23 @@ debug-lock-table
 global: num=1
  lock: "k"
   holder: txn: 00000001-0000-0000-0000-000000000000, ts: 0.000000010,1, info: repl epoch: 0, seqs: [1], unrepl epoch: 0, seqs: [0]
+   queued writers:
+    active: true req: 9, txn: none
+   distinguished req: 9
 local: num=0
 
+# Finish off txn1. Not needed once we can get rid of req4.
+on-txn-updated txn=txn1 status=committed
+----
+[-] update txn: committing txn1
+[3] sequence req4: resolving intent "k" for txn 00000001 with COMMITTED status
+[3] sequence req4: acquiring latches
+[3] sequence req4: scanning lock table for conflicting locks
+[3] sequence req4: sequencing complete, returned guard
+
+finish req=req4
+----
+[-] finish req4: finishing request
+
 reset namespace
 ----
diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_changes
@@ -53,10 +53,30 @@ local: num=0
 # Lock is reacquired at same epoch with lower timestamp and different durability.
 # The lock's timestamp is allowed to regress in this case, because it must never
 # diverge from the replicated state machine.
+#
+# We first add a queued writer because the lockTable currently does not keep track
+# of uncontended replicated locks.
 # ---------------------------------------------------------------------------------
 
+new-request r=reqContend txn=none ts=10 spans=w@a
+----
+
+scan r=reqContend
+----
+start-waiting: true
+
 acquire r=req2 k=a durability=r
 ----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 0, seqs: [2, 3]
+   queued writers:
+    active: true req: 1, txn: none
+   distinguished req: 1
+local: num=0
+
+dequeue r=reqContend
+----
 global: num=1
  lock: "a"
   holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 0, seqs: [2, 3]
@@ -119,8 +139,8 @@ global: num=1
  lock: "a"
   holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000008,0, info: repl epoch: 0, seqs: [3], unrepl epoch: 2, seqs: [0]
    waiting readers:
-    req: 1, txn: 00000000-0000-0000-0000-000000000002
-   distinguished req: 1
+    req: 2, txn: 00000000-0000-0000-0000-000000000002
+   distinguished req: 2
 local: num=0
 
 new-txn txn=txn1 ts=14 epoch=1 seq=1
@@ -135,8 +155,8 @@ global: num=1
  lock: "a"
   holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 1, seqs: [1], unrepl epoch: 2, seqs: [0]
    waiting readers:
-    req: 1, txn: 00000000-0000-0000-0000-000000000002
-   distinguished req: 1
+    req: 2, txn: 00000000-0000-0000-0000-000000000002
+   distinguished req: 2
 local: num=0
 
 guard-state r=req5
@@ -178,8 +198,8 @@ global: num=1
  lock: "a"
   holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000014,0, info: repl epoch: 1, seqs: [1], unrepl epoch: 1, seqs: [0, 1]
    waiting readers:
-    req: 2, txn: 00000000-0000-0000-0000-000000000002
-   distinguished req: 2
+    req: 3, txn: 00000000-0000-0000-0000-000000000002
+   distinguished req: 3
 local: num=0
 
 guard-state r=req7

diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_dropped b/pkg/kv/kvserver/concurrency/testdata/lock_table/lock_dropped
@@ -0,0 +1,129 @@
+# Misc tests where locks are ignored or dropped during lock acquisitions.
+
+new-lock-table maxlocks=10000
+----
+
+# ---------------------------------------------------------------------------------
+# New replicated locks are ignored.
+# ---------------------------------------------------------------------------------
+
+new-txn txn=txn1 ts=10 epoch=0 seq=2
+----
+
+new-request r=req1 txn=txn1 ts=10 spans=w@a
+----
+
+acquire r=req1 k=a durability=r
+----
+global: num=0
+local: num=0
+
+# ---------------------------------------------------------------------------------
+# Upgrading from unreplicated to replicated for an uncontended lock causes that
+# lock to be dropped.
+# ---------------------------------------------------------------------------------
+
+acquire r=req1 k=a durability=u
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2]
+local: num=0
+
+acquire r=req1 k=a durability=r
+----
+global: num=0
+local: num=0
+
+# ---------------------------------------------------------------------------------
+# Upgrading from unreplicated to replicated for a lock with only waiting readers
+# causes that lock to be dropped and the readers to be released.
+# ---------------------------------------------------------------------------------
+
+new-request r=reqContendReader txn=none ts=10 spans=r@a
+----
+
+acquire r=req1 k=a durability=u
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2]
+local: num=0
+
+scan r=reqContendReader
+----
+start-waiting: true
+
+print
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2]
+   waiting readers:
+    req: 1, txn: none
+   distinguished req: 1
+local: num=0
+
+acquire r=req1 k=a durability=r
+----
+global: num=0
+local: num=0
+
+guard-state r=reqContendReader
+----
+new: state=doneWaiting
+
+# ---------------------------------------------------------------------------------
+# Upgrading from unreplicated to replicated for a lock with waiting reader and
+# writers causes the lock to be retained.
+# ---------------------------------------------------------------------------------
+
+new-request r=reqContendWriter txn=none ts=10 spans=w@a
+----
+
+acquire r=req1 k=a durability=u
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2]
+local: num=0
+
+scan r=reqContendReader
+----
+start-waiting: true
+
+scan r=reqContendWriter
+----
+start-waiting: true
+
+print
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: unrepl epoch: 0, seqs: [2]
+   waiting readers:
+    req: 1, txn: none
+   queued writers:
+    active: true req: 2, txn: none
+   distinguished req: 1
+local: num=0
+
+acquire r=req1 k=a durability=r
+----
+global: num=1
+ lock: "a"
+  holder: txn: 00000000-0000-0000-0000-000000000001, ts: 0.000000010,0, info: repl epoch: 0, seqs: [2], unrepl epoch: 0, seqs: [2]
+   waiting readers:
+    req: 1, txn: none
+   queued writers:
+    active: true req: 2, txn: none
+   distinguished req: 1
+local: num=0
+
+guard-state r=reqContendReader
+----
+new: state=waitForDistinguished txn=txn1 ts=10 key="a" held=true guard-access=read
+
+guard-state r=reqContendWriter
+----
+new: state=waitFor txn=txn1 ts=10 key="a" held=true guard-access=write