From 596605b6f1d4decb5131f3c739fe09681cff8fa5 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 14 Aug 2024 12:20:12 -0400 Subject: [PATCH 1/3] raft: remove punctuation from log line Logs typically don't have punctuation. Epic: None Release note: None --- pkg/raft/raft.go | 2 +- pkg/raft/testdata/confchange_v2_replace_leader.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 0c54c1ffa583..ceec34807bca 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1732,7 +1732,7 @@ func stepFollower(r *raft, m pb.Message) error { r.lead = None } case pb.MsgTimeoutNow: - r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From) + r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From) // Leadership transfers never use pre-vote even if r.preVote is true; we // know we are not recovering from a partition so there is no need for the // extra round trip. diff --git a/pkg/raft/testdata/confchange_v2_replace_leader.txt b/pkg/raft/testdata/confchange_v2_replace_leader.txt index 0b07247a83fc..f1e93a54a3ba 100644 --- a/pkg/raft/testdata/confchange_v2_replace_leader.txt +++ b/pkg/raft/testdata/confchange_v2_replace_leader.txt @@ -167,7 +167,7 @@ stabilize 1->4 MsgTimeoutNow Term:1 Log:0/0 > 4 receiving messages 1->4 MsgTimeoutNow Term:1 Log:0/0 - INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership. + INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership INFO 4 is starting a new election at term 1 INFO 4 became candidate at term 2 INFO 4 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2 From e35dc3178a02d9cbca2e4640ff8088ce61c8e5e2 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 14 Aug 2024 12:27:38 -0400 Subject: [PATCH 2/3] raft: log on ignored MsgForgetLeader Makes the no-op case more observable. Epic: None Release note: None --- pkg/raft/raft.go | 8 +++++--- pkg/raft/testdata/forget_leader.txt | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index ceec34807bca..4eb1eba9884e 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1727,10 +1727,12 @@ func stepFollower(r *raft, m pb.Message) error { m.To = r.lead r.send(m) case pb.MsgForgetLeader: - if r.lead != None { - r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term) - r.lead = None + if r.lead == None { + r.logger.Infof("%x no leader at term %d; dropping forget leader msg", r.id, r.Term) + return nil } + r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term) + r.lead = None case pb.MsgTimeoutNow: r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From) // Leadership transfers never use pre-vote even if r.preVote is true; we diff --git a/pkg/raft/testdata/forget_leader.txt b/pkg/raft/testdata/forget_leader.txt index 65875ca3a87a..b06bfa659d40 100644 --- a/pkg/raft/testdata/forget_leader.txt +++ b/pkg/raft/testdata/forget_leader.txt @@ -45,7 +45,7 @@ raft-state forget-leader 2 ---- -ok +INFO 2 no leader at term 1; dropping forget leader msg raft-state ---- From 74cc8e2a4501a63d566a77a31e58845928a5f66c Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 14 Aug 2024 12:50:37 -0400 Subject: [PATCH 3/3] raft: respect leader fortification in MsgForgetLeader Fixes #124496. This commit makes peers consult leader fortification state when processing a MsgForgetLeader message. If the leader is fortified and currently supported, the peer will ignore the message. Epic: None Release note: None --- pkg/raft/raft.go | 39 ++++++++++++++++++- pkg/raft/testdata/forget_leader.txt | 37 +++++++++++++++++- .../forget_leader_prevote_checkquorum.txt | 20 ++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 4eb1eba9884e..10b5fb82f17a 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -995,7 +995,12 @@ func (r *raft) hup(t CampaignType) { r.logger.Debugf("%x ignoring MsgHup because already leader", r.id) return } - + // TODO(arul): we will eventually want some kind of logic like this. + // + //if r.supportingFortifiedLeader() && t != campaignTransfer { + // r.logger.Debugf("%x ignoring MsgHup due to leader fortification", r.id) + // return + //} if !r.promotable() { r.logger.Warningf("%x is unpromotable and can not campaign", r.id) return @@ -1009,6 +1014,21 @@ func (r *raft) hup(t CampaignType) { r.campaign(t) } +// supportingFortifiedLeader returns whether the peer is providing fortification +// support to a leader. When a peer is providing support to a leader, it must +// not campaign or vote to disrupt that leader's term, unless specifically asked +// to do so by the leader. +// TODO(arul): this is a placeholder implementation. Move it around as you see +// fit. +func (r *raft) supportingFortifiedLeader() bool { + if r.leadEpoch == 0 { + return false // not supporting any leader + } + assertTrue(r.lead != None, "leader epoch is set but leader is not") + epoch, ok := r.storeLiveness.SupportFor(r.lead) + return ok && epoch == r.leadEpoch +} + // errBreak is a sentinel error used to break a callback-based loop. var errBreak = errors.New("break") @@ -1727,13 +1747,30 @@ func stepFollower(r *raft, m pb.Message) error { m.To = r.lead r.send(m) case pb.MsgForgetLeader: + // TODO(nvanbenschoten): remove MsgForgetLeader once the sole caller in + // Replica.maybeForgetLeaderOnVoteRequestLocked is removed. This will + // accompany the removal of epoch-based leases. if r.lead == None { r.logger.Infof("%x no leader at term %d; dropping forget leader msg", r.id, r.Term) return nil } + if r.supportingFortifiedLeader() && r.lead != m.From { + r.logger.Infof("%x [term %d] ignored MsgForgetLeader from %x due to leader fortification", r.id, r.Term, m.From) + return nil + } r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term) r.lead = None case pb.MsgTimeoutNow: + // TODO(nvanbenschoten): we will eventually want some kind of logic like + // this. However, even this may not be enough, because we're calling a + // campaignTransfer election, which bypasses leader fortification checks. It + // may never be safe for MsgTimeoutNow to come from anyone but the leader. + // We need to think about this more. + // + //if r.supportingFortifiedLeader() && r.lead != m.From { + // r.logger.Infof("%x [term %d] ignored MsgTimeoutNow from %x due to leader fortification", r.id, r.Term, m.From) + // return nil + //} r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership", r.id, r.Term, m.From) // Leadership transfers never use pre-vote even if r.preVote is true; we // know we are not recovering from a partition so there is no need for the diff --git a/pkg/raft/testdata/forget_leader.txt b/pkg/raft/testdata/forget_leader.txt index b06bfa659d40..6c44015408bc 100644 --- a/pkg/raft/testdata/forget_leader.txt +++ b/pkg/raft/testdata/forget_leader.txt @@ -30,6 +30,20 @@ raft-state 3: StateFollower (Voter) Term:1 Lead:1 4: StateFollower (Non-Voter) Term:1 Lead:1 +# ForgetLeader is ignored if the follower is supporting the leader's store +# liveness epoch. +forget-leader 2 +---- +INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 2 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 1 1 +3 1 1 1 1 +4 1 1 1 1 + # ForgetLeader causes a follower to forget its leader, but remain in the current # term. It's a noop if it's called again. forget-leader 2 @@ -54,7 +68,20 @@ raft-state 3: StateFollower (Voter) Term:1 Lead:1 4: StateFollower (Non-Voter) Term:1 Lead:1 -# ForgetLeader also works on learners. +# ForgetLeader also works on learners, but only if they are not supporting the +# leader's store liveness epoch. +forget-leader 4 +---- +INFO 4 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 4 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 1 1 +3 1 1 1 1 +4 x 1 1 1 + forget-leader 4 ---- INFO 4 forgetting leader 1 at term 1 @@ -169,6 +196,14 @@ tick-heartbeat 2 ---- ok +withdraw-support 2 3 +---- + 1 2 3 4 +1 1 1 1 1 +2 x 1 x 1 +3 1 1 1 1 +4 x 1 1 1 + forget-leader 2 ---- INFO 2 forgetting leader 3 at term 2 diff --git a/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt b/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt index a929b9f1ca21..421d5120ea91 100644 --- a/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt +++ b/pkg/raft/testdata/forget_leader_prevote_checkquorum.txt @@ -87,6 +87,19 @@ raft-state 2: StateFollower (Voter) Term:1 Lead:1 3: StateFollower (Voter) Term:1 Lead:1 +# ForgetLeader is ignored if the follower is supporting the leader's store +# liveness epoch. +forget-leader 2 +---- +INFO 2 [term 1] ignored MsgForgetLeader from 0 due to leader fortification + +withdraw-support 2 1 +---- + 1 2 3 +1 1 1 1 +2 x 1 1 +3 1 1 1 + # If 2 forgets the leader, then 3 can obtain prevotes and hold an election # despite 2 having heard from the leader recently. forget-leader 2 @@ -187,6 +200,13 @@ stabilize 2 Messages: 2->3 MsgAppResp Term:2 Log:0/13 +withdraw-support 2 3 +---- + 1 2 3 +1 1 1 1 +2 x 1 x +3 1 1 1 + forget-leader 2 ---- INFO 2 forgetting leader 3 at term 2