Skip to content

Commit

Permalink
add ForgetLeader
Browse files Browse the repository at this point in the history
This patch adds `ForgetLeader()`, which causes a follower to forget its
current leader, changing it to None. It remains a leaderless follower in
the current term, without campaigning.

This is particularly useful with PreVote+CheckQuorum, if the caller has
strong reason to believe the leader is dead, since it will grant
prevotes but also revert to follower if it hears from the leader. A
quorum of such leaderless followers can thus allow a pre-candidate to
hold an election if they believe the leader to be dead.

Signed-off-by: Erik Grinaker <[email protected]>
  • Loading branch information
erikgrinaker committed Jun 23, 2023
1 parent 09ea4c5 commit 1159466
Show file tree
Hide file tree
Showing 12 changed files with 628 additions and 69 deletions.
30 changes: 30 additions & 0 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,32 @@ type Node interface {
// TransferLeadership attempts to transfer leadership to the given transferee.
TransferLeadership(ctx context.Context, lead, transferee uint64)

// ForgetLeader forgets a follower's current leader, changing it to None. It
// remains a leaderless follower in the current term, without campaigning.
//
// This is useful with PreVote+CheckQuorum, where followers will normally not
// grant pre-votes if they've heard from the leader in the past election
// timeout interval. Leaderless followers can grant pre-votes immediately, so
// if a quorum of followers have strong reason to believe the leader is dead
// (for example via a side-channel or external failure detector) and forget it
// then they can elect a new leader immediately, without waiting out the
// election timeout. They will also revert to normal followers if they hear
// from the leader again, or transition to candidates on an election timeout.
//
// For example, consider a three-node cluster where 1 is the leader and 2+3
// have just received a heartbeat from it. If 2 and 3 believe the leader has
// now died (maybe they know that an orchestration system shut down 1's VM),
// we can instruct 2 to forget the leader and 3 to campaign. 2 will then be
// able to grant 3's pre-vote and elect 3 as leader immediately (normally 2
// would reject the vote until an election timeout passes because it has heard
// from the leader recently). However, 3 can not campaign unilaterally, a
// quorum have to agree that the leader is dead, which avoids disrupting the
// leader if individual nodes are wrong about it being dead.
//
// This does nothing with ReadOnlyLeaseBased, since it would allow a new
// leader to be elected without the old leader knowing.
ForgetLeader(ctx context.Context) error

// ReadIndex request a read state. The read state will be set in the ready.
// Read state has a read index. Once the application advances further than the read
// index, any linearizable read requests issued before the read request can be
Expand Down Expand Up @@ -575,6 +601,10 @@ func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64)
}
}

func (n *node) ForgetLeader(ctx context.Context) error {
return n.step(ctx, pb.Message{Type: pb.MsgForgetLeader})
}

func (n *node) ReadIndex(ctx context.Context, rctx []byte) error {
return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
}
11 changes: 11 additions & 0 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,8 @@ func stepLeader(r *raft, m pb.Message) error {
sendMsgReadIndexResponse(r, m)

return nil
case pb.MsgForgetLeader:
return nil // noop on leader
}

// All other message types require a progress for m.From (pr).
Expand Down Expand Up @@ -1661,6 +1663,15 @@ func stepFollower(r *raft, m pb.Message) error {
}
m.To = r.lead
r.send(m)
case pb.MsgForgetLeader:
if r.readOnly.option == ReadOnlyLeaseBased {
r.logger.Error("ignoring MsgForgetLeader due to ReadOnlyLeaseBased")
return nil
}
if r.lead != None {
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
}
case pb.MsgTimeoutNow:
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From)
// Leadership transfers never use pre-vote even if r.preVote is true; we
Expand Down
141 changes: 72 additions & 69 deletions raftpb/raft.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions raftpb/raft.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum MessageType {
MsgStorageAppendResp = 20;
MsgStorageApply = 21;
MsgStorageApplyResp = 22;
MsgForgetLeader = 23;
// NOTE: when adding new message types, remember to update the isLocalMsg and
// isResponseMsg arrays in raft/util.go and update the corresponding tests in
// raft/util_test.go.
Expand Down
7 changes: 7 additions & 0 deletions rafttest/interaction_env_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
//
// transfer-leadership from=1 to=4
err = env.handleTransferLeadership(t, d)
case "forget-leader":
// Forgets the current leader of the given node.
//
// Example:
//
// forget-leader 1
err = env.handleForgetLeader(t, d)
case "propose":
// Propose an entry.
//
Expand Down
9 changes: 9 additions & 0 deletions rafttest/interaction_env_handler_add_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) e
arg.Scan(t, i, &cfg.PreVote)
case "checkquorum":
arg.Scan(t, i, &cfg.CheckQuorum)
case "read-only":
switch arg.Vals[i] {
case "safe":
cfg.ReadOnlyOption = raft.ReadOnlySafe
case "lease-based":
cfg.ReadOnlyOption = raft.ReadOnlyLeaseBased
default:
return fmt.Errorf("invalid read-only option %q", arg.Vals[i])
}
}
}
}
Expand Down
32 changes: 32 additions & 0 deletions rafttest/interaction_env_handler_forget_leader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package rafttest

import (
"testing"

"github.com/cockroachdb/datadriven"
)

func (env *InteractionEnv) handleForgetLeader(t *testing.T, d datadriven.TestData) error {
idx := firstAsNodeIdx(t, d)
env.ForgetLeader(t, idx)
return nil
}

// ForgetLeader makes the follower at the given index forget its leader.
func (env *InteractionEnv) ForgetLeader(t *testing.T, idx int) {
env.Nodes[idx].ForgetLeader()
}
Loading

0 comments on commit 1159466

Please sign in to comment.