Skip to content

Commit

Permalink
Fix candidate stepdown logic
Browse files Browse the repository at this point in the history
Candidates should not be stepping down based on their pterm but rather their actual term.

Signed-off-by: reubenninan <[email protected]>
  • Loading branch information
ReubenMathew committed Jul 30, 2024
1 parent 12e1889 commit 401059d
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 4 deletions.
7 changes: 3 additions & 4 deletions server/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -3187,10 +3187,9 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
// another node has taken on the leader role already, so we should convert
// to a follower of that node instead.
if n.State() == Candidate {
// Ignore old terms, otherwise we might end up stepping down incorrectly.
// Needs to be ahead of our pterm (last log index), as an isolated node
// could have bumped its vote term up considerably past this point.
if ae.term >= n.pterm {
// If we have a leader in the current term or higher, we should stepdown,
// write the term and vote if the term of the request is higher.
if ae.term >= n.term {
// If the append entry term is newer than the current term, erase our
// vote.
if ae.term > n.term {
Expand Down
54 changes: 54 additions & 0 deletions server/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,3 +844,57 @@ func TestNRGNoResetOnAppendEntryResponse(t *testing.T) {
require_NotEqual(t, leader.pterm, 0)
require_NotEqual(t, leader.pindex, 0)
}

func TestNRGCandidateDontStepdownDueToLeaderOfPreviousTerm(t *testing.T) {
c := createJetStreamClusterExplicit(t, "R3S", 3)
defer c.shutdown()
c.waitOnLeader()

nc, _ := jsClientConnect(t, c.leader(), nats.UserInfo("admin", "s3cr3t!"))
defer nc.Close()

rg := c.createRaftGroup("TEST", 3, newStateAdder)
rg.waitOnLeader()

// create a candidate that has received entries while they were a follower in a previous term
candidate := rg.nonLeader().node().(*raft)
candidate.Lock()
candidate.switchState(Candidate)
candidate.pterm = 50
candidate.pindex = 70
candidate.term = 100
candidate.Unlock()

// leader term is behind candidate
leader := rg.leader().node().(*raft)
leader.Lock()
leader.term = candidate.pterm
leader.pterm = candidate.pterm
leader.pindex = candidate.pindex
leader.Unlock()

// Subscribe to the append entry subject.
sub, err := nc.SubscribeSync(leader.asubj)
require_NoError(t, err)

// Get the first append entry that we receive, should be heartbeat from leader of prev term
msg, err := sub.NextMsg(5 * time.Second)
require_NoError(t, err)

// Decode the append entry
ae, err := leader.decodeAppendEntry(msg.Data, nil, msg.Reply)
require_NoError(t, err)

// check that the append entry is from the leader of the previous term
leader.RLock()
require_Equal(t, ae.term, leader.term)
require_Equal(t, ae.leader, leader.id)
leader.RUnlock()

// check that we haven't stepped down
require_Equal(t, candidate.State(), Candidate)
// check that our term is still ahead of the leader's term
if candidate.term <= ae.term {
t.Fatalf("Expected candidate term to be ahead of leader term")
}
}

0 comments on commit 401059d

Please sign in to comment.