forked from etcd-io/etcd
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
raft: internally support joint consensus
This commit introduces machinery to safely apply joint consensus configuration changes to Raft. The main contribution is the new package, `confchange`, which offers the primitives `Simple`, `EnterJoint`, and `LeaveJoint`. The first two take a list of configuration changes. `Simple` only declares success if these configuration changes (applied atomically) change the set of voters by at most one (i.e. it's fine to add or remove any number of learners, but change only one voter). `EnterJoint` makes the configuration joint and then applies the changes to it, in preparation of the caller returning later and transitioning out of the joint config into the final desired configuration via `LeaveJoint()`. This commit streamlines the conversion between voters and learners, which is now generally allowed whenever the above conditions are upheld (i.e. it's not possible to demote a voter and add a new voter in the context of a Simple configuration change, but it is possible via EnterJoint). Previously, we had the artificial restriction that a voter could not be demoted to a learner, but had to be removed first. Even though demoting a learner is generally less useful than promoting a learner (the latter is used to catch up future voters), demotions could see use in improved handling of temporary node unavailability, where it is desired to remove voting power from a down node, but to preserve its data should it return. An additional change that was made in this commit is to prevent the use of empty commit quorums, which was previously possible but for no good reason; this: Closes etcd-io#10884. The work left to do in a future PR is to actually expose joint configurations to the applications using Raft. This will entail mostly API design and the addition of suitable testing, which to be carried out ergonomically is likely to motivate a larger refactor. Touches etcd-io#7625.
- Loading branch information
Showing
15 changed files
with
1,162 additions
and
132 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// Copyright 2019 The etcd Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package confchange | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/cockroachdb/datadriven" | ||
pb "go.etcd.io/etcd/raft/raftpb" | ||
"go.etcd.io/etcd/raft/tracker" | ||
) | ||
|
||
func TestConfChangeDataDriven(t *testing.T) { | ||
datadriven.Walk(t, "testdata", func(t *testing.T, path string) { | ||
tr := tracker.MakeProgressTracker(10) | ||
c := Changer{ | ||
Tracker: tr, | ||
LastIndex: 0, // incremented in this test with each cmd | ||
} | ||
|
||
// The test files use the commands | ||
// - simple: run a simple conf change (i.e. no joint consensus), | ||
// - enter-joint: enter a joint config, and | ||
// - leave-joint: leave a joint config. | ||
// The first two take a list of config changes, which have the following | ||
// syntax: | ||
// - vn: make n a voter, | ||
// - ln: make n a learner, | ||
// - rn: remove n, and | ||
// - un: update n. | ||
datadriven.RunTest(t, path, func(d *datadriven.TestData) string { | ||
defer func() { | ||
c.LastIndex++ | ||
}() | ||
var ccs []pb.ConfChange | ||
toks := strings.Split(strings.TrimSpace(d.Input), " ") | ||
if toks[0] == "" { | ||
toks = nil | ||
} | ||
for _, tok := range toks { | ||
if len(tok) < 2 { | ||
return fmt.Sprintf("unknown token %s", tok) | ||
} | ||
var cc pb.ConfChange | ||
switch tok[0] { | ||
case 'v': | ||
cc.Type = pb.ConfChangeAddNode | ||
case 'l': | ||
cc.Type = pb.ConfChangeAddLearnerNode | ||
case 'r': | ||
cc.Type = pb.ConfChangeRemoveNode | ||
case 'u': | ||
cc.Type = pb.ConfChangeUpdateNode | ||
default: | ||
return fmt.Sprintf("unknown input: %s", tok) | ||
} | ||
id, err := strconv.ParseUint(tok[1:], 10, 64) | ||
if err != nil { | ||
return err.Error() | ||
} | ||
cc.NodeID = id | ||
ccs = append(ccs, cc) | ||
} | ||
|
||
var cfg tracker.Config | ||
var prs tracker.ProgressMap | ||
var err error | ||
switch d.Cmd { | ||
case "simple": | ||
cfg, prs, err = c.Simple(ccs...) | ||
case "enter-joint": | ||
cfg, prs, err = c.EnterJoint(ccs...) | ||
case "leave-joint": | ||
if len(ccs) > 0 { | ||
err = errors.New("this command takes no input") | ||
} else { | ||
cfg, prs, err = c.LeaveJoint() | ||
} | ||
default: | ||
return "unknown command" | ||
} | ||
if err != nil { | ||
return err.Error() + "\n" | ||
} | ||
c.Tracker.Config, c.Tracker.Progress = cfg, prs | ||
return fmt.Sprintf("%s\n%s", c.Tracker.Config, c.Tracker.Progress) | ||
}) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
// Copyright 2019 The etcd Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package confchange | ||
|
||
import ( | ||
"math/rand" | ||
"reflect" | ||
"testing" | ||
"testing/quick" | ||
|
||
pb "go.etcd.io/etcd/raft/raftpb" | ||
"go.etcd.io/etcd/raft/tracker" | ||
) | ||
|
||
// TestConfChangeQuick uses quickcheck to verify that simple and joint config | ||
// changes arrive at the same result. | ||
func TestConfChangeQuick(t *testing.T) { | ||
cfg := &quick.Config{ | ||
MaxCount: 1000, | ||
} | ||
|
||
// Log the first couple of runs to give some indication of things working | ||
// as intended. | ||
const infoCount = 5 | ||
|
||
runWithJoint := func(c *Changer, ccs []pb.ConfChange) error { | ||
cfg, prs, err := c.EnterJoint(ccs...) | ||
if err != nil { | ||
return err | ||
} | ||
c.Tracker.Config = cfg | ||
c.Tracker.Progress = prs | ||
cfg, prs, err = c.LeaveJoint() | ||
if err != nil { | ||
return err | ||
} | ||
c.Tracker.Config = cfg | ||
c.Tracker.Progress = prs | ||
return nil | ||
} | ||
|
||
runWithSimple := func(c *Changer, ccs []pb.ConfChange) error { | ||
for _, cc := range ccs { | ||
cfg, prs, err := c.Simple(cc) | ||
if err != nil { | ||
return err | ||
} | ||
c.Tracker.Config, c.Tracker.Progress = cfg, prs | ||
} | ||
return nil | ||
} | ||
|
||
type testFunc func(*Changer, []pb.ConfChange) error | ||
|
||
wrapper := func(invoke testFunc) func(setup initialChanges, ccs confChanges) (*Changer, error) { | ||
return func(setup initialChanges, ccs confChanges) (*Changer, error) { | ||
tr := tracker.MakeProgressTracker(10) | ||
c := &Changer{ | ||
Tracker: tr, | ||
LastIndex: 10, | ||
} | ||
|
||
if err := runWithSimple(c, setup); err != nil { | ||
return nil, err | ||
} | ||
|
||
err := invoke(c, ccs) | ||
return c, err | ||
} | ||
} | ||
|
||
var n int | ||
f1 := func(setup initialChanges, ccs confChanges) *Changer { | ||
c, err := wrapper(runWithSimple)(setup, ccs) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
if n < infoCount { | ||
t.Log("initial setup:", Describe(setup...)) | ||
t.Log("changes:", Describe(ccs...)) | ||
t.Log(c.Tracker.Config) | ||
t.Log(c.Tracker.Progress) | ||
} | ||
n++ | ||
return c | ||
} | ||
f2 := func(setup initialChanges, ccs confChanges) *Changer { | ||
c, err := wrapper(runWithJoint)(setup, ccs) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
return c | ||
} | ||
err := quick.CheckEqual(f1, f2, cfg) | ||
if err == nil { | ||
return | ||
} | ||
cErr, ok := err.(*quick.CheckEqualError) | ||
if !ok { | ||
t.Fatal(err) | ||
} | ||
|
||
t.Error("setup:", Describe(cErr.In[0].([]pb.ConfChange)...)) | ||
t.Error("ccs:", Describe(cErr.In[1].([]pb.ConfChange)...)) | ||
t.Errorf("out1: %+v\nout2: %+v", cErr.Out1, cErr.Out2) | ||
} | ||
|
||
type confChangeTyp pb.ConfChangeType | ||
|
||
func (confChangeTyp) Generate(rand *rand.Rand, _ int) reflect.Value { | ||
return reflect.ValueOf(confChangeTyp(rand.Intn(4))) | ||
} | ||
|
||
type confChanges []pb.ConfChange | ||
|
||
func genCC(num func() int, id func() uint64, typ func() pb.ConfChangeType) []pb.ConfChange { | ||
var ccs []pb.ConfChange | ||
n := num() | ||
for i := 0; i < n; i++ { | ||
ccs = append(ccs, pb.ConfChange{Type: typ(), NodeID: id()}) | ||
} | ||
return ccs | ||
} | ||
|
||
func (confChanges) Generate(rand *rand.Rand, _ int) reflect.Value { | ||
num := func() int { | ||
return 1 + rand.Intn(9) | ||
} | ||
id := func() uint64 { | ||
// Note that num() >= 1, so we're never returning 1 from this method, | ||
// meaning that we'll never touch NodeID one, which is special to avoid | ||
// voterless configs altogether in this test. | ||
return 1 + uint64(num()) | ||
} | ||
typ := func() pb.ConfChangeType { | ||
return pb.ConfChangeType(rand.Intn(len(pb.ConfChangeType_name))) | ||
} | ||
return reflect.ValueOf(genCC(num, id, typ)) | ||
} | ||
|
||
type initialChanges []pb.ConfChange | ||
|
||
func (initialChanges) Generate(rand *rand.Rand, _ int) reflect.Value { | ||
num := func() int { | ||
return 1 + rand.Intn(5) | ||
} | ||
id := func() uint64 { return uint64(num()) } | ||
typ := func() pb.ConfChangeType { | ||
return pb.ConfChangeAddNode | ||
} | ||
// NodeID one is special - it's in the initial config and will be a voter | ||
// always (this is to avoid uninteresting edge cases where the simple conf | ||
// changes can't easily make progress). | ||
ccs := append([]pb.ConfChange{{Type: pb.ConfChangeAddNode, NodeID: 1}}, genCC(num, id, typ)...) | ||
return reflect.ValueOf(ccs) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Verify that operations upon entering the joint state are idempotent, i.e. | ||
# removing an absent node is fine, etc. | ||
|
||
simple | ||
v1 | ||
---- | ||
voters=(1) | ||
1: StateProbe match=0 next=1 | ||
|
||
enter-joint | ||
r1 r2 r9 v2 v3 v4 v2 v3 v4 l2 l2 r4 r4 l1 l1 | ||
---- | ||
voters=(3)&&(1) learners=(2) learners_next=(1) | ||
1: StateProbe match=0 next=1 | ||
2: StateProbe match=0 next=2 learner | ||
3: StateProbe match=0 next=2 | ||
|
||
leave-joint | ||
---- | ||
voters=(3) learners=(1 2) | ||
1: StateProbe match=0 next=1 learner | ||
2: StateProbe match=0 next=2 learner | ||
3: StateProbe match=0 next=2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Verify that when a voter is demoted in a joint config, it will show up in | ||
# learners_next until the joint config is left, and only then will the progress | ||
# turn into that of a learner, without resetting the progress. Note that this | ||
# last fact is verified by `next`, which can tell us which "round" the progress | ||
# was originally created in. | ||
|
||
simple | ||
v1 | ||
---- | ||
voters=(1) | ||
1: StateProbe match=0 next=1 | ||
|
||
enter-joint | ||
v2 l1 | ||
---- | ||
voters=(2)&&(1) learners_next=(1) | ||
1: StateProbe match=0 next=1 | ||
2: StateProbe match=0 next=2 | ||
|
||
leave-joint | ||
---- | ||
voters=(2) learners=(1) | ||
1: StateProbe match=0 next=1 learner | ||
2: StateProbe match=0 next=2 |
Oops, something went wrong.