From a9081374615abb6a06511bc35d29121fd14259af Mon Sep 17 00:00:00 2001 From: Pavel Kalinnikov Date: Tue, 21 Feb 2023 14:23:51 +0000 Subject: [PATCH] kvserver: prevent recursive Replica.mu.RLock Previously, we were passing an RLocked Replica to addToReplicasByKeyLocked. That method internally visits the descriptors in replicasByKey, which calls Desc() that may cause a recursive call to RLock if the added replica intersects itself. This is only possible in tests which try to reinsert the Replica to the map; in prod code each Replica is inserted to replicasByKey only once. Release note: none Epic: none --- pkg/kv/kvserver/helpers_test.go | 4 +--- pkg/kv/kvserver/replica_consistency_test.go | 6 +----- pkg/kv/kvserver/store.go | 6 +----- pkg/kv/kvserver/store_create_replica.go | 21 ++++++++++++++------- pkg/kv/kvserver/store_split.go | 4 ++-- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/pkg/kv/kvserver/helpers_test.go b/pkg/kv/kvserver/helpers_test.go index 243c6d9d773c..316f4c61cd83 100644 --- a/pkg/kv/kvserver/helpers_test.go +++ b/pkg/kv/kvserver/helpers_test.go @@ -68,11 +68,9 @@ func (s *Store) FindTargetAndTransferLease( func (s *Store) AddReplica(repl *Replica) error { s.mu.Lock() defer s.mu.Unlock() - repl.mu.RLock() - defer repl.mu.RUnlock() if err := s.addToReplicasByRangeIDLocked(repl); err != nil { return err - } else if err := s.addToReplicasByKeyLockedReplicaRLocked(repl); err != nil { + } else if err := s.addToReplicasByKeyLocked(repl, repl.Desc()); err != nil { return err } s.metrics.ReplicaCount.Inc(1) diff --git a/pkg/kv/kvserver/replica_consistency_test.go b/pkg/kv/kvserver/replica_consistency_test.go index b813c959ae07..d5be211ce6f7 100644 --- a/pkg/kv/kvserver/replica_consistency_test.go +++ b/pkg/kv/kvserver/replica_consistency_test.go @@ -99,15 +99,11 @@ func TestStoreCheckpointSpans(t *testing.T) { addReplica := func(rangeID roachpb.RangeID, start, end string) { desc := makeDesc(rangeID, start, end) r := &Replica{RangeID: rangeID, startKey: desc.StartKey} - // NB: locking is unnecessary during Replica creation, but this is to work - // around the mutex hold asserts in "Locked" methods below. - r.mu.Lock() - defer r.mu.Unlock() r.mu.state.Desc = &desc r.isInitialized.Set(desc.IsInitialized()) require.NoError(t, s.addToReplicasByRangeIDLocked(r)) if r.IsInitialized() { - require.NoError(t, s.addToReplicasByKeyLockedReplicaRLocked(r)) + require.NoError(t, s.addToReplicasByKeyLocked(r, r.Desc())) descs = append(descs, desc) } } diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index aab168d91406..9f6069076b8b 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -1931,11 +1931,7 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error { // TODO(pavelkalinnikov): hide these in Store's replica create functions. err = s.addToReplicasByRangeIDLocked(rep) if err == nil { - // NB: no locking of the Replica is needed since it's being created, but - // it is asserted on in "Locked" methods. - rep.mu.RLock() - err = s.addToReplicasByKeyLockedReplicaRLocked(rep) - rep.mu.RUnlock() + err = s.addToReplicasByKeyLocked(rep, rep.Desc()) } s.mu.Unlock() if err != nil { diff --git a/pkg/kv/kvserver/store_create_replica.go b/pkg/kv/kvserver/store_create_replica.go index 3af5b72ba4f3..43334aa80c63 100644 --- a/pkg/kv/kvserver/store_create_replica.go +++ b/pkg/kv/kvserver/store_create_replica.go @@ -252,12 +252,14 @@ func fromReplicaIsTooOldRLocked(toReplica *Replica, fromReplica *roachpb.Replica return !found && fromReplica.ReplicaID < desc.NextReplicaID } -// addToReplicasByKeyLockedReplicaRLocked adds the replica to the replicasByKey -// btree. The replica must already be in replicasByRangeID. Returns an error if -// a different replica with the same range ID, or an overlapping replica or -// placeholder exists in this Store. Replica.mu must be at least read-locked. -func (s *Store) addToReplicasByKeyLockedReplicaRLocked(repl *Replica) error { - desc := repl.descRLocked() +// addToReplicasByKeyLocked adds the replica to the replicasByKey btree. The +// replica must already be in replicasByRangeID. Returns an error if a different +// replica with the same range ID, or an overlapping replica or placeholder +// exists in this Store. +// +// The descriptor must match repl.Desc() and repl.descRLocked(). It is passed in +// separately to allow callers both holding and not holding Replica.mu. +func (s *Store) addToReplicasByKeyLocked(repl *Replica, desc *roachpb.RangeDescriptor) error { if !desc.IsInitialized() { return errors.Errorf("%s: attempted to add uninitialized replica %s", s, repl) } @@ -314,7 +316,12 @@ func (s *Store) markReplicaInitializedLockedReplLocked(ctx context.Context, r *R } delete(s.mu.uninitReplicas, r.RangeID) - if err := s.addToReplicasByKeyLockedReplicaRLocked(r); err != nil { + // NB: there is a risk that this func tries to lock an already locked r.mu + // while calling r.Desc() in getOverlappingKeyRangeLocked. This can only + // happen if r is already in replicasByKey, which must not be the case by the + // time we get here. + // TODO(pavelkalinnikov): make locking in replicasByKey less subtle. + if err := s.addToReplicasByKeyLocked(r, r.descRLocked()); err != nil { return err } diff --git a/pkg/kv/kvserver/store_split.go b/pkg/kv/kvserver/store_split.go index 73baeefe2869..cf26013173a1 100644 --- a/pkg/kv/kvserver/store_split.go +++ b/pkg/kv/kvserver/store_split.go @@ -341,7 +341,7 @@ func (s *Store) SplitRange( return err } - rightRepl.mu.RLock() - defer rightRepl.mu.RUnlock() + rightRepl.mu.Lock() + defer rightRepl.mu.Unlock() return s.markReplicaInitializedLockedReplLocked(ctx, rightRepl) }