Skip to content

Commit

Permalink
Merge pull request #78 from erikgrinaker/forget-leader
Browse files Browse the repository at this point in the history
add `ForgetLeader`
  • Loading branch information
ahrtr authored Jun 26, 2023
2 parents 30e2fa4 + 1159466 commit a10cd45
Show file tree
Hide file tree
Showing 27 changed files with 764 additions and 158 deletions.
30 changes: 30 additions & 0 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,32 @@ type Node interface {
// TransferLeadership attempts to transfer leadership to the given transferee.
TransferLeadership(ctx context.Context, lead, transferee uint64)

// ForgetLeader forgets a follower's current leader, changing it to None. It
// remains a leaderless follower in the current term, without campaigning.
//
// This is useful with PreVote+CheckQuorum, where followers will normally not
// grant pre-votes if they've heard from the leader in the past election
// timeout interval. Leaderless followers can grant pre-votes immediately, so
// if a quorum of followers have strong reason to believe the leader is dead
// (for example via a side-channel or external failure detector) and forget it
// then they can elect a new leader immediately, without waiting out the
// election timeout. They will also revert to normal followers if they hear
// from the leader again, or transition to candidates on an election timeout.
//
// For example, consider a three-node cluster where 1 is the leader and 2+3
// have just received a heartbeat from it. If 2 and 3 believe the leader has
// now died (maybe they know that an orchestration system shut down 1's VM),
// we can instruct 2 to forget the leader and 3 to campaign. 2 will then be
// able to grant 3's pre-vote and elect 3 as leader immediately (normally 2
// would reject the vote until an election timeout passes because it has heard
// from the leader recently). However, 3 can not campaign unilaterally, a
// quorum have to agree that the leader is dead, which avoids disrupting the
// leader if individual nodes are wrong about it being dead.
//
// This does nothing with ReadOnlyLeaseBased, since it would allow a new
// leader to be elected without the old leader knowing.
ForgetLeader(ctx context.Context) error

// ReadIndex request a read state. The read state will be set in the ready.
// Read state has a read index. Once the application advances further than the read
// index, any linearizable read requests issued before the read request can be
Expand Down Expand Up @@ -575,6 +601,10 @@ func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64)
}
}

func (n *node) ForgetLeader(ctx context.Context) error {
return n.step(ctx, pb.Message{Type: pb.MsgForgetLeader})
}

func (n *node) ReadIndex(ctx context.Context, rctx []byte) error {
return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
}
11 changes: 11 additions & 0 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,8 @@ func stepLeader(r *raft, m pb.Message) error {
sendMsgReadIndexResponse(r, m)

return nil
case pb.MsgForgetLeader:
return nil // noop on leader
}

// All other message types require a progress for m.From (pr).
Expand Down Expand Up @@ -1661,6 +1663,15 @@ func stepFollower(r *raft, m pb.Message) error {
}
m.To = r.lead
r.send(m)
case pb.MsgForgetLeader:
if r.readOnly.option == ReadOnlyLeaseBased {
r.logger.Error("ignoring MsgForgetLeader due to ReadOnlyLeaseBased")
return nil
}
if r.lead != None {
r.logger.Infof("%x forgetting leader %x at term %d", r.id, r.lead, r.Term)
r.lead = None
}
case pb.MsgTimeoutNow:
r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From)
// Leadership transfers never use pre-vote even if r.preVote is true; we
Expand Down
141 changes: 72 additions & 69 deletions raftpb/raft.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions raftpb/raft.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum MessageType {
MsgStorageAppendResp = 20;
MsgStorageApply = 21;
MsgStorageApplyResp = 22;
MsgForgetLeader = 23;
// NOTE: when adding new message types, remember to update the isLocalMsg and
// isResponseMsg arrays in raft/util.go and update the corresponding tests in
// raft/util_test.go.
Expand Down
22 changes: 13 additions & 9 deletions rafttest/interaction_env_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
//
// transfer-leadership from=1 to=4
err = env.handleTransferLeadership(t, d)
case "forget-leader":
// Forgets the current leader of the given node.
//
// Example:
//
// forget-leader 1
err = env.handleForgetLeader(t, d)
case "propose":
// Propose an entry.
//
Expand Down Expand Up @@ -182,20 +189,17 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
default:
err = fmt.Errorf("unknown command")
}
if err != nil {
env.Output.WriteString(err.Error())
}
// NB: the highest log level suppresses all output, including that of the
// handlers. This comes in useful during setup which can be chatty.
// However, errors are always logged.
if env.Output.Len() == 0 {
return "ok"
}
if env.Output.Lvl == len(lvlNames)-1 {
if err != nil {
if err != nil {
if env.Output.Quiet() {
return err.Error()
}
return "ok (quiet)"
env.Output.WriteString(err.Error())
}
if env.Output.Len() == 0 {
return "ok"
}
return env.Output.String()
}
Expand Down
9 changes: 9 additions & 0 deletions rafttest/interaction_env_handler_add_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) e
arg.Scan(t, i, &cfg.PreVote)
case "checkquorum":
arg.Scan(t, i, &cfg.CheckQuorum)
case "read-only":
switch arg.Vals[i] {
case "safe":
cfg.ReadOnlyOption = raft.ReadOnlySafe
case "lease-based":
cfg.ReadOnlyOption = raft.ReadOnlyLeaseBased
default:
return fmt.Errorf("invalid read-only option %q", arg.Vals[i])
}
}
}
}
Expand Down
Loading

0 comments on commit a10cd45

Please sign in to comment.