From 574f667ed643c0bb9f380d4203d43f687bb315a0 Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Mon, 13 May 2024 18:00:28 -0400 Subject: [PATCH] kvserver: read lease under mutex when switching lease type A race could occur when a replica queue and post lease application both attempted to switch the lease type. This race would cause the queue to not process the replica because the lease type had already changed. As a result, lease preference violations might not have been quickly resolved by the lease queue. Read the lease under the same mutex used for requesting the lease, when possibly switching the lease type. Resolves: #123998 Release note: None --- pkg/kv/kvserver/queue.go | 4 ++-- pkg/kv/kvserver/replica_range_lease.go | 27 ++++++++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pkg/kv/kvserver/queue.go b/pkg/kv/kvserver/queue.go index bc0290c51bcc..001c4713ac52 100644 --- a/pkg/kv/kvserver/queue.go +++ b/pkg/kv/kvserver/queue.go @@ -1058,7 +1058,7 @@ func (bq *baseQueue) replicaCanBeProcessed( // and renew or acquire if necessary. if bq.needsLease { if acquireLeaseIfNeeded { - leaseStatus, pErr := repl.redirectOnOrAcquireLease(ctx) + _, pErr := repl.redirectOnOrAcquireLease(ctx) if pErr != nil { switch v := pErr.GetDetail().(type) { case *kvpb.NotLeaseHolderError, *kvpb.RangeNotFoundError: @@ -1071,7 +1071,7 @@ func (bq *baseQueue) replicaCanBeProcessed( // TODO(baptist): Should this be added to replicaInQueue? realRepl, _ := repl.(*Replica) - pErr = realRepl.maybeSwitchLeaseType(ctx, leaseStatus) + pErr = realRepl.maybeSwitchLeaseType(ctx) if pErr != nil { return nil, pErr.GoError() } diff --git a/pkg/kv/kvserver/replica_range_lease.go b/pkg/kv/kvserver/replica_range_lease.go index 3825119ff4e1..644f60a85b95 100644 --- a/pkg/kv/kvserver/replica_range_lease.go +++ b/pkg/kv/kvserver/replica_range_lease.go @@ -1610,17 +1610,24 @@ func (r *Replica) shouldRequestLeaseRLocked( // maybeSwitchLeaseType will synchronously renew a lease using the appropriate // type if it is (or was) owned by this replica and has an incorrect type. This // typically happens when changing kv.expiration_leases_only.enabled. -func (r *Replica) maybeSwitchLeaseType(ctx context.Context, st kvserverpb.LeaseStatus) *kvpb.Error { - if !st.OwnedBy(r.store.StoreID()) { - return nil - } +func (r *Replica) maybeSwitchLeaseType(ctx context.Context) *kvpb.Error { + llHandle := func() *leaseRequestHandle { + now := r.store.Clock().NowAsClockTimestamp() + // The lease status needs to be checked and requested under the same lock, + // to avoid an interleaving lease request changing the lease between the + // two. + r.mu.Lock() + defer r.mu.Unlock() - var llHandle *leaseRequestHandle - r.mu.Lock() - if !r.hasCorrectLeaseTypeRLocked(st.Lease) { - llHandle = r.requestLeaseLocked(ctx, st, nil /* limiter */) - } - r.mu.Unlock() + st := r.leaseStatusAtRLocked(ctx, now) + if !st.OwnedBy(r.store.StoreID()) { + return nil + } + if r.hasCorrectLeaseTypeRLocked(st.Lease) { + return nil + } + return r.requestLeaseLocked(ctx, st, nil /* limiter */) + }() if llHandle != nil { select {