From c1b11f24bb68e20704f40eb40a1519d21774c54d Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 15 Oct 2020 17:51:40 -0400 Subject: [PATCH] kvserver: stop pretending to deal with Raft leadership when draining The draining code was pretending to deal with transferring Raft leadership, besides leases, but it was all an illusion. This patch stops the pretending, simplifying the code and preventing mis-interpretation (which I've suffered from). There are a few cases to discuss: 1) There is no lease. The code really looked like it will try to do something to the leadership, but ended up not doing anything because maybeTransferRaftLeadership() is a no-op when there's no lease. 2) The leases needs to be moved. In this case, the code was first trying to move the lease and then, if that *failed, was trying to move the leadership. This was a no-op since maybeTransferRaftLeadership() only moves to the leadership to the leaseholder; if the leaseholder hasn't moved, it's a no-op. 3) The lease doesn't need to be moved, but the leadership does. In this case the code could theoretically do the leadership transfer, but this case is very rare - the leadership moves as lease transfers apply, and if that fails, we'll continue attempting to move the leadership every Raft tick (every 200ms). Release note: None --- pkg/kv/kvserver/store.go | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 3095c2f75d86..dd3611106097 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -1069,11 +1069,10 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) { const leaseTransferConcurrency = 100 sem := quotapool.NewIntPool("Store.SetDraining", leaseTransferConcurrency) - // Incremented for every lease or Raft leadership transfer - // attempted. We try to send both the lease and the Raft leaders - // away, but this may not reliably work. Instead, we run the - // surrounding retry loop until there are no leaders/leases left - // (ignoring single-replica or uninitialized Raft groups). + // Incremented for every lease transfer attempted. We try to send the lease + // away, but this may not reliably work. Instead, we run the surrounding + // retry loop until there are no leases left (ignoring single-replica + // ranges). var numTransfersAttempted int32 newStoreReplicaVisitor(s).Visit(func(r *Replica) bool { // @@ -1110,12 +1109,6 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) { r.mu.Lock() r.mu.draining = true - status := r.raftStatusRLocked() - // needsRaftTransfer is true when we can reasonably hope to transfer - // this replica's lease and/or Raft leadership away. - needsRaftTransfer := status != nil && - len(status.Progress) > 1 && - !(status.RaftState == raft.StateFollower && status.Lead != 0) r.mu.Unlock() var drainingLease roachpb.Lease @@ -1142,7 +1135,13 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) { drainingLease.OwnedBy(s.StoreID()) && r.IsLeaseValid(ctx, drainingLease, s.Clock().Now()) - if !needsLeaseTransfer && !needsRaftTransfer { + // Note that this code doesn't deal with transferring the Raft + // leadership. Leadership tries to follow the lease, so when leases + // are transferred, leadership will be transferred too. For ranges + // without leases we probably should try to move the leadership + // manually to a non-draining replica. + + if !needsLeaseTransfer { if log.V(1) { // This logging is useful to troubleshoot incomplete drains. log.Info(ctx, "not moving out") @@ -1152,7 +1151,7 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) { } if log.V(1) { // This logging is useful to troubleshoot incomplete drains. - log.Infof(ctx, "trying to move replica out: lease transfer = %v, raft transfer = %v", needsLeaseTransfer, needsRaftTransfer) + log.Infof(ctx, "trying to move replica out") } if needsLeaseTransfer { @@ -1175,18 +1174,6 @@ func (s *Store) SetDraining(drain bool, reporter func(int, redact.SafeString)) { err, ) } - if err == nil && leaseTransferred { - // If we just transferred the lease away, Raft leadership will - // usually transfer with it. Invoking a separate Raft leadership - // transfer would only obstruct this. - needsRaftTransfer = false - } - } - - if needsRaftTransfer { - r.raftMu.Lock() - r.maybeTransferRaftLeadership(ctx) - r.raftMu.Unlock() } }); err != nil { if log.V(1) {