From b96a51897e2206f05d5dbee3ce92a3406e7edbdc Mon Sep 17 00:00:00 2001 From: Arul Ajmani Date: Tue, 1 Oct 2024 21:35:19 -0400 Subject: [PATCH] raft: use candidate's log term to determine inFortifyLease condition Previously, if a node received a Msg{,Pre}Vote request from a peer but it still supported a leader in StoreLiveness, the node would reject the request with the justification that it was fortified. However, if the campaigning peer is doing so with a higher log term than we're aware of, then this conclusively proves that the leadership term we're fortifying has actually ended. This then allows us to de-fortify, which ends our fortification lease, granting us the freedom to call elections and vote in the current (and future) campaigns. This patch recognizes this case and switches to this behavior. Note that this is more than just an optimization -- one can craft scenarios where de-fortifying and voting in an election is the only way we'll ever elect a new leader. One such scenario is shown in the test case attached to this commit. Epic: none Release note: None --- pkg/raft/raft.go | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 7c5245a6d5ce..75544b991222 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1418,12 +1418,20 @@ func (r *raft) Step(m pb.Message) error { if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote { force := bytes.Equal(m.Context, []byte(campaignTransfer)) inHeartbeatLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout - // NB: A fortified leader is allowed to bump its term. It'll need to - // re-fortify once if it gets elected at the higher term though, so the - // leader must take care to not regress its supported expiration. However, - // at the follower, we grant the fortified leader our vote at the higher - // term. - inFortifyLease := r.supportingFortifiedLeader() && r.lead != m.From + inFortifyLease := r.supportingFortifiedLeader() && + // NB: A fortified leader is allowed to bump its term. It'll need to + // re-fortify once if it gets elected at the higher term though, so the + // leader must take care to not regress its supported expiration. + // However, at the follower, we grant the fortified leader our vote at + // the higher term. + r.lead != m.From && + // NB: If the peer that's campaigning has an entry in its log with a + // higher term than what we're aware of, then this conclusively proves + // that a new leader was elected at a higher term. We never heard from + // this new leader (otherwise we'd have bumped r.Term in response). + // However, any fortification we're providing to a leader that has been + // since dethroned is pointless. + m.LogTerm <= r.Term if !force && (inHeartbeatLease || inFortifyLease) { // If a server receives a Request{,Pre}Vote message but is still // supporting a fortified leader, it does not update its term or grant @@ -1551,11 +1559,12 @@ func (r *raft) Step(m pb.Message) error { case pb.MsgVote, pb.MsgPreVote: // We can vote if this is a repeat of a vote we've already cast... canVote := r.Vote == m.From || - // ...we haven't voted and we don't think there's a leader yet in this term... + // ...OR we haven't voted and we don't think there's a leader yet in this + // term... (r.Vote == None && r.lead == None) || - // ...or this is a PreVote for a future term... + // ...OR this is a PreVote for a future term... (m.Type == pb.MsgPreVote && m.Term > r.Term) - // ...and we believe the candidate is up to date. + // ...AND we believe the candidate is up to date. lastID := r.raftLog.lastEntryID() candLastID := entryID{term: m.LogTerm, index: m.Index} if canVote && r.raftLog.isUpToDate(candLastID) {