Skip to content

Commit

Permalink
Merge #97381
Browse files Browse the repository at this point in the history
97381: kvserver: prevent recursive Replica.mu.RLock r=tbg a=pavelkalinnikov

Previously, we were passing an RLocked Replica to addToReplicasByKeyLocked.
That method internally visits the descriptors in replicasByKey, which calls
Desc() that may cause a recursive call to RLock if the added replica intersects
itself. This is only possible in tests which try to reinsert the Replica to the
map; in prod code each Replica is inserted to replicasByKey only once.

This PR avoids the possibility of this deadlock by shifting the responsibility
of locking to the caller.

Fixes #96931

Release note: none
Epic: none

Co-authored-by: Pavel Kalinnikov <[email protected]>
  • Loading branch information
craig[bot] and pav-kv committed Feb 22, 2023
2 parents 286b3e2 + a908137 commit e9e513d
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 22 deletions.
4 changes: 1 addition & 3 deletions pkg/kv/kvserver/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,9 @@ func (s *Store) FindTargetAndTransferLease(
func (s *Store) AddReplica(repl *Replica) error {
s.mu.Lock()
defer s.mu.Unlock()
repl.mu.RLock()
defer repl.mu.RUnlock()
if err := s.addToReplicasByRangeIDLocked(repl); err != nil {
return err
} else if err := s.addToReplicasByKeyLockedReplicaRLocked(repl); err != nil {
} else if err := s.addToReplicasByKeyLocked(repl, repl.Desc()); err != nil {
return err
}
s.metrics.ReplicaCount.Inc(1)
Expand Down
6 changes: 1 addition & 5 deletions pkg/kv/kvserver/replica_consistency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,11 @@ func TestStoreCheckpointSpans(t *testing.T) {
addReplica := func(rangeID roachpb.RangeID, start, end string) {
desc := makeDesc(rangeID, start, end)
r := &Replica{RangeID: rangeID, startKey: desc.StartKey}
// NB: locking is unnecessary during Replica creation, but this is to work
// around the mutex hold asserts in "Locked" methods below.
r.mu.Lock()
defer r.mu.Unlock()
r.mu.state.Desc = &desc
r.isInitialized.Set(desc.IsInitialized())
require.NoError(t, s.addToReplicasByRangeIDLocked(r))
if r.IsInitialized() {
require.NoError(t, s.addToReplicasByKeyLockedReplicaRLocked(r))
require.NoError(t, s.addToReplicasByKeyLocked(r, r.Desc()))
descs = append(descs, desc)
}
}
Expand Down
6 changes: 1 addition & 5 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -1931,11 +1931,7 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error {
// TODO(pavelkalinnikov): hide these in Store's replica create functions.
err = s.addToReplicasByRangeIDLocked(rep)
if err == nil {
// NB: no locking of the Replica is needed since it's being created, but
// it is asserted on in "Locked" methods.
rep.mu.RLock()
err = s.addToReplicasByKeyLockedReplicaRLocked(rep)
rep.mu.RUnlock()
err = s.addToReplicasByKeyLocked(rep, rep.Desc())
}
s.mu.Unlock()
if err != nil {
Expand Down
21 changes: 14 additions & 7 deletions pkg/kv/kvserver/store_create_replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,14 @@ func fromReplicaIsTooOldRLocked(toReplica *Replica, fromReplica *roachpb.Replica
return !found && fromReplica.ReplicaID < desc.NextReplicaID
}

// addToReplicasByKeyLockedReplicaRLocked adds the replica to the replicasByKey
// btree. The replica must already be in replicasByRangeID. Returns an error if
// a different replica with the same range ID, or an overlapping replica or
// placeholder exists in this Store. Replica.mu must be at least read-locked.
func (s *Store) addToReplicasByKeyLockedReplicaRLocked(repl *Replica) error {
desc := repl.descRLocked()
// addToReplicasByKeyLocked adds the replica to the replicasByKey btree. The
// replica must already be in replicasByRangeID. Returns an error if a different
// replica with the same range ID, or an overlapping replica or placeholder
// exists in this Store.
//
// The descriptor must match repl.Desc() and repl.descRLocked(). It is passed in
// separately to allow callers both holding and not holding Replica.mu.
func (s *Store) addToReplicasByKeyLocked(repl *Replica, desc *roachpb.RangeDescriptor) error {
if !desc.IsInitialized() {
return errors.Errorf("%s: attempted to add uninitialized replica %s", s, repl)
}
Expand Down Expand Up @@ -314,7 +316,12 @@ func (s *Store) markReplicaInitializedLockedReplLocked(ctx context.Context, r *R
}
delete(s.mu.uninitReplicas, r.RangeID)

if err := s.addToReplicasByKeyLockedReplicaRLocked(r); err != nil {
// NB: there is a risk that this func tries to lock an already locked r.mu
// while calling r.Desc() in getOverlappingKeyRangeLocked. This can only
// happen if r is already in replicasByKey, which must not be the case by the
// time we get here.
// TODO(pavelkalinnikov): make locking in replicasByKey less subtle.
if err := s.addToReplicasByKeyLocked(r, r.descRLocked()); err != nil {
return err
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/store_split.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ func (s *Store) SplitRange(
return err
}

rightRepl.mu.RLock()
defer rightRepl.mu.RUnlock()
rightRepl.mu.Lock()
defer rightRepl.mu.Unlock()
return s.markReplicaInitializedLockedReplLocked(ctx, rightRepl)
}

0 comments on commit e9e513d

Please sign in to comment.