Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
129000: raft: respect leader fortification in MsgForgetLeader r=nvanbenschoten a=nvanbenschoten

Fixes cockroachdb#124496.

This commit makes peers consult leader fortification state when processing a `MsgForgetLeader` message. If the leader is fortified and currently supported, the peer will ignore the message.

Epic: None
Release note: None

Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
craig[bot] and nvanbenschoten committed Aug 14, 2024
2 parents 739cc7d + 74cc8e2 commit 9421149
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 8 deletions.
49 changes: 44 additions & 5 deletions pkg/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,12 @@ func (r *raft) hup(t CampaignType) {
r.logger.Debugf("%x ignoring MsgHup because already leader", r.id)
return
}

// TODO(arul): we will eventually want some kind of logic like this.
//
//if r.supportingFortifiedLeader() && t != campaignTransfer {
// r.logger.Debugf("%x ignoring MsgHup due to leader fortification", r.id)
// return
//}
if !r.promotable() {
r.logger.Warningf("%x is unpromotable and can not campaign", r.id)
return
Expand All @@ -1009,6 +1014,21 @@ func (r *raft) hup(t CampaignType) {
r.campaign(t)
}

// supportingFortifiedLeader returns whether the peer is providing fortification
// support to a leader. When a peer is providing support to a leader, it must
// not campaign or vote to disrupt that leader's term, unless specifically asked
// to do so by the leader.
// TODO(arul): this is a placeholder implementation. Move it around as you see
// fit.
func (r *raft) supportingFortifiedLeader() bool {
if r.leadEpoch == 0 {
return false // not supporting any leader
}
assertTrue(r.lead != None, "leader epoch is set but leader is not")
epoch, ok := r.storeLiveness.SupportFor(r.lead)
return ok && epoch == r.leadEpoch
}

// errBreak is a sentinel error used to break a callback-based loop.
var errBreak = errors.New("break")

Expand Down Expand Up @@ -1727,12 +1747,31 @@ func stepFollower(r *raft, m pb.Message) error {
m.To = r.lead
r.send(m)
case pb.MsgForgetLeader:
if r.lead != None {
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
// TODO(nvanbenschoten): remove MsgForgetLeader once the sole caller in
// Replica.maybeForgetLeaderOnVoteRequestLocked is removed. This will
// accompany the removal of epoch-based leases.
if r.lead == None {
r.logger.Infof("%x no leader at term %d; dropping forget leader msg", r.id, r.Term)
return nil
}
if r.supportingFortifiedLeader() && r.lead != m.From {
r.logger.Infof("%x [term %d] ignored MsgForgetLeader from %x due to leader fortification", r.id, r.Term, m.From)
return nil
}
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
case pb.MsgTimeoutNow:
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From)
// TODO(nvanbenschoten): we will eventually want some kind of logic like
// this. However, even this may not be enough, because we're calling a
// campaignTransfer election, which bypasses leader fortification checks. It
// may never be safe for MsgTimeoutNow to come from anyone but the leader.
// We need to think about this more.
//
//if r.supportingFortifiedLeader() && r.lead != m.From {
// r.logger.Infof("%x [term %d] ignored MsgTimeoutNow from %x due to leader fortification", r.id, r.Term, m.From)
// return nil
//}
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From)
// Leadership transfers never use pre-vote even if r.preVote is true; we
// know we are not recovering from a partition so there is no need for the
// extra round trip.
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v2_replace_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ stabilize
1->4 MsgTimeoutNow Term:1 Log:0/0
> 4 receiving messages
1->4 MsgTimeoutNow Term:1 Log:0/0
INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership.
INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership
INFO 4 is starting a new election at term 1
INFO 4 became candidate at term 2
INFO 4 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2
Expand Down
39 changes: 37 additions & 2 deletions pkg/raft/testdata/forget_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ raft-state
3: StateFollower (Voter) Term:1 Lead:1
4: StateFollower (Non-Voter) Term:1 Lead:1

# ForgetLeader is ignored if the follower is supporting the leader's store
# liveness epoch.
forget-leader 2
----
INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 2 1
----
1 2 3 4
1 1 1 1 1
2 x 1 1 1
3 1 1 1 1
4 1 1 1 1

# ForgetLeader causes a follower to forget its leader, but remain in the current
# term. It's a noop if it's called again.
forget-leader 2
Expand All @@ -45,7 +59,7 @@ raft-state

forget-leader 2
----
ok
INFO 2 no leader at term 1; dropping forget leader msg

raft-state
----
Expand All @@ -54,7 +68,20 @@ raft-state
3: StateFollower (Voter) Term:1 Lead:1
4: StateFollower (Non-Voter) Term:1 Lead:1

# ForgetLeader also works on learners.
# ForgetLeader also works on learners, but only if they are not supporting the
# leader's store liveness epoch.
forget-leader 4
----
INFO 4 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 4 1
----
1 2 3 4
1 1 1 1 1
2 x 1 1 1
3 1 1 1 1
4 x 1 1 1

forget-leader 4
----
INFO 4 forgetting leader 1 at term 1
Expand Down Expand Up @@ -169,6 +196,14 @@ tick-heartbeat 2
----
ok

withdraw-support 2 3
----
1 2 3 4
1 1 1 1 1
2 x 1 x 1
3 1 1 1 1
4 x 1 1 1

forget-leader 2
----
INFO 2 forgetting leader 3 at term 2
Expand Down
20 changes: 20 additions & 0 deletions pkg/raft/testdata/forget_leader_prevote_checkquorum.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ raft-state
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

# ForgetLeader is ignored if the follower is supporting the leader's store
# liveness epoch.
forget-leader 2
----
INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 2 1
----
1 2 3
1 1 1 1
2 x 1 1
3 1 1 1

# If 2 forgets the leader, then 3 can obtain prevotes and hold an election
# despite 2 having heard from the leader recently.
forget-leader 2
Expand Down Expand Up @@ -187,6 +200,13 @@ stabilize 2
Messages:
2->3 MsgAppResp Term:2 Log:0/13

withdraw-support 2 3
----
1 2 3
1 1 1 1
2 x 1 x
3 1 1 1

forget-leader 2
----
INFO 2 forgetting leader 3 at term 2
Expand Down

0 comments on commit 9421149

Please sign in to comment.