diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 500befea0fef..435572576977 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -995,7 +995,12 @@ func (r *raft) hup(t CampaignType) { r.logger.Debugf("%x ignoring MsgHup because already leader", r.id) return } - + // TODO(arul): we will eventually want some kind of logic like this. + // + //if r.supportingFortifiedLeader() && t != campaignTransfer { + // r.logger.Debugf("%x ignoring MsgHup due to leader fortification", r.id) + // return + //} if !r.promotable() { r.logger.Warningf("%x is unpromotable and can not campaign", r.id) return @@ -1009,6 +1014,21 @@ func (r *raft) hup(t CampaignType) { r.campaign(t) } +// supportingFortifiedLeader returns whether the peer is providing fortification +// support to a leader. When a peer is providing support to a leader, it must +// not campaign or vote to disrupt that leader's term, unless specifically asked +// to do so by the leader. +// TODO(arul): this is a placeholder implementation. Move it around as you see +// fit. +func (r *raft) supportingFortifiedLeader() bool { + if r.leadEpoch == 0 { + return false // not supporting any leader + } + assertTrue(r.lead != None, "leader epoch is set but leader is not") + epoch, ok := r.storeLiveness.SupportFor(r.lead) + return ok && epoch == r.leadEpoch +} + // errBreak is a sentinel error used to break a callback-based loop. var errBreak = errors.New("break") @@ -1727,12 +1747,31 @@ func stepFollower(r *raft, m pb.Message) error { m.To = r.lead r.send(m) case pb.MsgForgetLeader: - if r.lead != None { - r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term) - r.lead = None + // TODO(nvanbenschoten): remove MsgForgetLeader once the sole caller in + // Replica.maybeForgetLeaderOnVoteRequestLocked is removed. This will + // accompany the removal of epoch-based leases. + if r.lead == None { + r.logger.Infof("%x no leader at term %d; dropping forget leader msg", r.id, r.Term) + return nil + } + if r.supportingFortifiedLeader() && r.lead != m.From { + r.logger.Infof("%x [term %d] ignored MsgForgetLeader from %x due to leader fortification", r.id, r.Term, m.From) + return nil } + r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term) + r.lead = None case pb.MsgTimeoutNow: - r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From) + // TODO(nvanbenschoten): we will eventually want some kind of logic like + // this. However, even this may not be enough, because we're calling a + // campaignTransfer election, which bypasses leader fortification checks. It + // may never be safe for MsgTimeoutNow to come from anyone but the leader. + // We need to think about this more. + // + //if r.supportingFortifiedLeader() && r.lead != m.From { + // r.logger.Infof("%x [term %d] ignored MsgTimeoutNow from %x due to leader fortification", r.id, r.Term, m.From) + // return nil + //} + r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From) // Leadership transfers never use pre-vote even if r.preVote is true; we // know we are not recovering from a partition so there is no need for the // extra round trip. diff --git a/pkg/raft/testdata/confchange_v2_replace_leader.txt b/pkg/raft/testdata/confchange_v2_replace_leader.txt index 66fb09452538..f5e2a2c70254 100644 --- a/pkg/raft/testdata/confchange_v2_replace_leader.txt +++ b/pkg/raft/testdata/confchange_v2_replace_leader.txt @@ -167,7 +167,7 @@ stabilize 1->4 MsgTimeoutNow Term:1 Log:0/0 > 4 receiving messages 1->4 MsgTimeoutNow Term:1 Log:0/0 - INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership. + INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership INFO 4 is starting a new election at term 1 INFO 4 became candidate at term 2 INFO 4 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2 diff --git a/pkg/raft/testdata/forget_leader.txt b/pkg/raft/testdata/forget_leader.txt index 65875ca3a87a..6c44015408bc 100644 --- a/pkg/raft/testdata/forget_leader.txt +++ b/pkg/raft/testdata/forget_leader.txt @@ -30,6 +30,20 @@ raft-state 3: StateFollower (Voter) Term:1 Lead:1 4: StateFollower (Non-Voter) Term:1 Lead:1 +# ForgetLeader is ignored if the follower is supporting the leader's store +# liveness epoch. +forget-leader 2 +---- +INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 2 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 1 1 +3 1 1 1 1 +4 1 1 1 1 + # ForgetLeader causes a follower to forget its leader, but remain in the current # term. It's a noop if it's called again. forget-leader 2 @@ -45,7 +59,7 @@ raft-state forget-leader 2 ---- -ok +INFO 2 no leader at term 1; dropping forget leader msg raft-state ---- @@ -54,7 +68,20 @@ raft-state 3: StateFollower (Voter) Term:1 Lead:1 4: StateFollower (Non-Voter) Term:1 Lead:1 -# ForgetLeader also works on learners. +# ForgetLeader also works on learners, but only if they are not supporting the +# leader's store liveness epoch. +forget-leader 4 +---- +INFO 4 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 4 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 1 1 +3 1 1 1 1 +4 x 1 1 1 + forget-leader 4 ---- INFO 4 forgetting leader 1 at term 1 @@ -169,6 +196,14 @@ tick-heartbeat 2 ---- ok +withdraw-support 2 3 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 x 1 +3 1 1 1 1 +4 x 1 1 1 + forget-leader 2 ---- INFO 2 forgetting leader 3 at term 2 diff --git a/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt b/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt index a929b9f1ca21..421d5120ea91 100644 --- a/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt +++ b/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt @@ -87,6 +87,19 @@ raft-state 2: StateFollower (Voter) Term:1 Lead:1 3: StateFollower (Voter) Term:1 Lead:1 +# ForgetLeader is ignored if the follower is supporting the leader's store +# liveness epoch. +forget-leader 2 +---- +INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 2 1 +---- + 1 2 3 +1 1 1 1 +2 x 1 1 +3 1 1 1 + # If 2 forgets the leader, then 3 can obtain prevotes and hold an election # despite 2 having heard from the leader recently. forget-leader 2 @@ -187,6 +200,13 @@ stabilize 2 Messages: 2->3 MsgAppResp Term:2 Log:0/13 +withdraw-support 2 3 +---- + 1 2 3 +1 1 1 1 +2 x 1 x +3 1 1 1 + forget-leader 2 ---- INFO 2 forgetting leader 3 at term 2