From 5eb44e3f60d6e6052981501f24bcedda8bc7f823 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Fri, 6 Oct 2023 12:52:05 -0400 Subject: [PATCH] kv: dequeue request from lock table wait-queues on scan error Informs #111352. Informs #111530. Informs #111564. Informs #111893. In 8205b437, we added a case to the lock table where a request's initial scan could throw an error. This was not being handled properly if the request had already entered any other lock wait-queues. In these cases, the request's entries in those wait-queues would be abandoned and the locks would get stuck. This commit fixes that issue by dequeuing the request from the lock table when throwing an error from ScanAndEnqueue. This was one of the causes of the recent kvnemesis instability, but we believe that there is at least one other issue that is still causing timeouts. Release note: None --- pkg/kv/kvserver/concurrency/lock_table.go | 3 +++ pkg/kv/kvserver/concurrency/testdata/lock_table/shared_locks | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/kv/kvserver/concurrency/lock_table.go b/pkg/kv/kvserver/concurrency/lock_table.go index cb5121d2849f..3b1295371604 100644 --- a/pkg/kv/kvserver/concurrency/lock_table.go +++ b/pkg/kv/kvserver/concurrency/lock_table.go @@ -3785,6 +3785,9 @@ func (t *lockTableImpl) ScanAndEnqueue(req Request, guard lockTableGuard) (lockT err := g.resumeScan(true /* notify */) if err != nil { + // We're not returning the guard on this error path, so we need to + // release the guard in case it has already entered any wait-queues. + t.Dequeue(g) return nil, kvpb.NewError(err) } if g.notRemovableLock != nil { diff --git a/pkg/kv/kvserver/concurrency/testdata/lock_table/shared_locks b/pkg/kv/kvserver/concurrency/testdata/lock_table/shared_locks index 4aca46ad8fb3..e97a37cd9083 100644 --- a/pkg/kv/kvserver/concurrency/testdata/lock_table/shared_locks +++ b/pkg/kv/kvserver/concurrency/testdata/lock_table/shared_locks @@ -1137,10 +1137,7 @@ lock promotion from Shared to Exclusive is not allowed print ---- -num=2 - lock: "a" - queued locking requests: - active: false req: 58, strength: Exclusive, txn: 00000000-0000-0000-0000-000000000002 +num=1 lock: "b" holder: txn: 00000000-0000-0000-0000-000000000002 epoch: 0, iso: Serializable, info: unrepl [(str: Shared seq: 0)]