Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

raft: respect leader fortification in MsgForgetLeader #129000

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 44 additions & 5 deletions pkg/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,12 @@ func (r *raft) hup(t CampaignType) {
r.logger.Debugf("%x ignoring MsgHup because already leader", r.id)
return
}

// TODO(arul): we will eventually want some kind of logic like this.
//
//if r.supportingFortifiedLeader() && t != campaignTransfer {
// r.logger.Debugf("%x ignoring MsgHup due to leader fortification", r.id)
// return
//}
if !r.promotable() {
r.logger.Warningf("%x is unpromotable and can not campaign", r.id)
return
Expand All @@ -1009,6 +1014,21 @@ func (r *raft) hup(t CampaignType) {
r.campaign(t)
}

// supportingFortifiedLeader returns whether the peer is providing fortification
// support to a leader. When a peer is providing support to a leader, it must
// not campaign or vote to disrupt that leader's term, unless specifically asked
// to do so by the leader.
// TODO(arul): this is a placeholder implementation. Move it around as you see
// fit.
func (r *raft) supportingFortifiedLeader() bool {
if r.leadEpoch == 0 {
return false // not supporting any leader
}
assertTrue(r.lead != None, "leader epoch is set but leader is not")
epoch, ok := r.storeLiveness.SupportFor(r.lead)
return ok && epoch == r.leadEpoch
}

// errBreak is a sentinel error used to break a callback-based loop.
var errBreak = errors.New("break")

Expand Down Expand Up @@ -1727,12 +1747,31 @@ func stepFollower(r *raft, m pb.Message) error {
m.To = r.lead
r.send(m)
case pb.MsgForgetLeader:
if r.lead != None {
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
// TODO(nvanbenschoten): remove MsgForgetLeader once the sole caller in
// Replica.maybeForgetLeaderOnVoteRequestLocked is removed. This will
// accompany the removal of epoch-based leases.
if r.lead == None {
r.logger.Infof("%x no leader at term %d; dropping forget leader msg", r.id, r.Term)
return nil
}
if r.supportingFortifiedLeader() && r.lead != m.From {
r.logger.Infof("%x [term %d] ignored MsgForgetLeader from %x due to leader fortification", r.id, r.Term, m.From)
return nil
}
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
case pb.MsgTimeoutNow:
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From)
// TODO(nvanbenschoten): we will eventually want some kind of logic like
// this. However, even this may not be enough, because we're calling a
// campaignTransfer election, which bypasses leader fortification checks. It
// may never be safe for MsgTimeoutNow to come from anyone but the leader.
// We need to think about this more.
//
//if r.supportingFortifiedLeader() && r.lead != m.From {
// r.logger.Infof("%x [term %d] ignored MsgTimeoutNow from %x due to leader fortification", r.id, r.Term, m.From)
// return nil
//}
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From)
// Leadership transfers never use pre-vote even if r.preVote is true; we
// know we are not recovering from a partition so there is no need for the
// extra round trip.
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v2_replace_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ stabilize
1->4 MsgTimeoutNow Term:1 Log:0/0
> 4 receiving messages
1->4 MsgTimeoutNow Term:1 Log:0/0
INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership.
INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership
INFO 4 is starting a new election at term 1
INFO 4 became candidate at term 2
INFO 4 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2
Expand Down
39 changes: 37 additions & 2 deletions pkg/raft/testdata/forget_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ raft-state
3: StateFollower (Voter) Term:1 Lead:1
4: StateFollower (Non-Voter) Term:1 Lead:1

# ForgetLeader is ignored if the follower is supporting the leader's store
# liveness epoch.
forget-leader 2
----
INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 2 1
----
1 2 3 4
1 1 1 1 1
2 x 1 1 1
3 1 1 1 1
4 1 1 1 1

# ForgetLeader causes a follower to forget its leader, but remain in the current
# term. It's a noop if it's called again.
forget-leader 2
Expand All @@ -45,7 +59,7 @@ raft-state

forget-leader 2
----
ok
INFO 2 no leader at term 1; dropping forget leader msg

raft-state
----
Expand All @@ -54,7 +68,20 @@ raft-state
3: StateFollower (Voter) Term:1 Lead:1
4: StateFollower (Non-Voter) Term:1 Lead:1

# ForgetLeader also works on learners.
# ForgetLeader also works on learners, but only if they are not supporting the
# leader's store liveness epoch.
forget-leader 4
----
INFO 4 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 4 1
----
1 2 3 4
1 1 1 1 1
2 x 1 1 1
3 1 1 1 1
4 x 1 1 1

forget-leader 4
----
INFO 4 forgetting leader 1 at term 1
Expand Down Expand Up @@ -169,6 +196,14 @@ tick-heartbeat 2
----
ok

withdraw-support 2 3
----
1 2 3 4
1 1 1 1 1
2 x 1 x 1
3 1 1 1 1
4 x 1 1 1

forget-leader 2
----
INFO 2 forgetting leader 3 at term 2
Expand Down
20 changes: 20 additions & 0 deletions pkg/raft/testdata/forget_leader_prevote_checkquorum.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ raft-state
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

# ForgetLeader is ignored if the follower is supporting the leader's store
# liveness epoch.
forget-leader 2
----
INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification

withdraw-support 2 1
----
1 2 3
1 1 1 1
2 x 1 1
3 1 1 1

# If 2 forgets the leader, then 3 can obtain prevotes and hold an election
# despite 2 having heard from the leader recently.
forget-leader 2
Expand Down Expand Up @@ -187,6 +200,13 @@ stabilize 2
Messages:
2->3 MsgAppResp Term:2 Log:0/13

withdraw-support 2 3
----
1 2 3
1 1 1 1
2 x 1 x
3 1 1 1

forget-leader 2
----
INFO 2 forgetting leader 3 at term 2
Expand Down
Loading