Skip to content

Commit

Permalink
kvserver: stop pretending to deal with Raft leadership when draining
Browse files Browse the repository at this point in the history
The draining code was pretending to deal with transferring Raft
leadership, besides leases, but it was all an illusion. This patch stops
the pretending, simplifying the code and preventing mis-interpretation
(which I've suffered from).
There are a few cases to discuss:
1) There is no lease. The code really looked like it will try to do
   something to the leadership, but ended up not doing anything because
   maybeTransferRaftLeadership() is a no-op when there's no lease.
2) The leases needs to be moved. In this case, the code was first trying
   to move the lease and then, if that *failed, was trying to move the
   leadership. This was a no-op since maybeTransferRaftLeadership() only
   moves to the leadership to the leaseholder; if the leaseholder hasn't
   moved, it's a no-op.
3) The lease doesn't need to be moved, but the leadership does. In this
   case the code could theoretically do the leadership transfer, but
   this case is very rare - the leadership moves as lease transfers
   apply, and if that fails, we'll continue attempting to move the
   leadership every Raft tick (every 200ms).

Release note: None
  • Loading branch information
andreimatei committed Oct 16, 2020
1 parent 984bcf1 commit c1b11f2
Showing 1 changed file with 12 additions and 25 deletions.
37 changes: 12 additions & 25 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -1069,11 +1069,10 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) {
const leaseTransferConcurrency = 100
sem := quotapool.NewIntPool("Store.SetDraining", leaseTransferConcurrency)

// Incremented for every lease or Raft leadership transfer
// attempted. We try to send both the lease and the Raft leaders
// away, but this may not reliably work. Instead, we run the
// surrounding retry loop until there are no leaders/leases left
// (ignoring single-replica or uninitialized Raft groups).
// Incremented for every lease transfer attempted. We try to send the lease
// away, but this may not reliably work. Instead, we run the surrounding
// retry loop until there are no leases left (ignoring single-replica
// ranges).
var numTransfersAttempted int32
newStoreReplicaVisitor(s).Visit(func(r *Replica) bool {
//
Expand Down Expand Up @@ -1110,12 +1109,6 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) {

r.mu.Lock()
r.mu.draining = true
status := r.raftStatusRLocked()
// needsRaftTransfer is true when we can reasonably hope to transfer
// this replica's lease and/or Raft leadership away.
needsRaftTransfer := status != nil &&
len(status.Progress) > 1 &&
!(status.RaftState == raft.StateFollower && status.Lead != 0)
r.mu.Unlock()

var drainingLease roachpb.Lease
Expand All @@ -1142,7 +1135,13 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) {
drainingLease.OwnedBy(s.StoreID()) &&
r.IsLeaseValid(ctx, drainingLease, s.Clock().Now())

if !needsLeaseTransfer && !needsRaftTransfer {
// Note that this code doesn't deal with transferring the Raft
// leadership. Leadership tries to follow the lease, so when leases
// are transferred, leadership will be transferred too. For ranges
// without leases we probably should try to move the leadership
// manually to a non-draining replica.

if !needsLeaseTransfer {
if log.V(1) {
// This logging is useful to troubleshoot incomplete drains.
log.Info(ctx, "not moving out")
Expand All @@ -1152,7 +1151,7 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) {
}
if log.V(1) {
// This logging is useful to troubleshoot incomplete drains.
log.Infof(ctx, "trying to move replica out: lease transfer = %v, raft transfer = %v", needsLeaseTransfer, needsRaftTransfer)
log.Infof(ctx, "trying to move replica out")
}

if needsLeaseTransfer {
Expand All @@ -1175,18 +1174,6 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) {
err,
)
}
if err == nil && leaseTransferred {
// If we just transferred the lease away, Raft leadership will
// usually transfer with it. Invoking a separate Raft leadership
// transfer would only obstruct this.
needsRaftTransfer = false
}
}

if needsRaftTransfer {
r.raftMu.Lock()
r.maybeTransferRaftLeadership(ctx)
r.raftMu.Unlock()
}
}); err != nil {
if log.V(1) {
Expand Down

0 comments on commit c1b11f2

Please sign in to comment.