Skip to content

Commit

Permalink
raft: internally support joint consensus
Browse files Browse the repository at this point in the history
This commit introduces machinery to safely apply joint consensus
configuration changes to Raft.

The main contribution is the new package, `confchange`, which offers
the primitives `Simple`, `EnterJoint`, and `LeaveJoint`.

The first two take a list of configuration changes. `Simple` only
declares success if these configuration changes (applied atomically)
change the set of voters by at most one (i.e. it's fine to add or
remove any number of learners, but change only one voter). `EnterJoint`
makes the configuration joint and then applies the changes to it, in
preparation of the caller returning later and transitioning out of the
joint config into the final desired configuration via `LeaveJoint()`.

This commit streamlines the conversion between voters and learners, which
is now generally allowed whenever the above conditions are upheld (i.e.
it's not possible to demote a voter and add a new voter in the context
of a Simple configuration change, but it is possible via EnterJoint).
Previously, we had the artificial restriction that a voter could not be
demoted to a learner, but had to be removed first.
Even though demoting a learner is generally less useful than promoting
a learner (the latter is used to catch up future voters), demotions
could see use in improved handling of temporary node unavailability,
where it is desired to remove voting power from a down node, but to
preserve its data should it return.

An additional change that was made in this commit is to prevent the use
of empty commit quorums, which was previously possible but for no good
reason; this:

Closes etcd-io#10884.

The work left to do in a future PR is to actually expose joint
configurations to the applications using Raft. This will entail mostly
API design and the addition of suitable testing, which to be carried
out ergonomically is likely to motivate a larger refactor.

Touches etcd-io#7625.
  • Loading branch information
tbg committed Jul 16, 2019
1 parent 855da1d commit c560e1c
Show file tree
Hide file tree
Showing 15 changed files with 1,162 additions and 132 deletions.
411 changes: 411 additions & 0 deletions raft/confchange/confchange.go

Large diffs are not rendered by default.

105 changes: 105 additions & 0 deletions raft/confchange/datadriven_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package confchange

import (
"errors"
"fmt"
"strconv"
"strings"
"testing"

"github.com/cockroachdb/datadriven"
pb "go.etcd.io/etcd/raft/raftpb"
"go.etcd.io/etcd/raft/tracker"
)

func TestConfChangeDataDriven(t *testing.T) {
datadriven.Walk(t, "testdata", func(t *testing.T, path string) {
tr := tracker.MakeProgressTracker(10)
c := Changer{
Tracker: tr,
LastIndex: 0, // incremented in this test with each cmd
}

// The test files use the commands
// - simple: run a simple conf change (i.e. no joint consensus),
// - enter-joint: enter a joint config, and
// - leave-joint: leave a joint config.
// The first two take a list of config changes, which have the following
// syntax:
// - vn: make n a voter,
// - ln: make n a learner,
// - rn: remove n, and
// - un: update n.
datadriven.RunTest(t, path, func(d *datadriven.TestData) string {
defer func() {
c.LastIndex++
}()
var ccs []pb.ConfChange
toks := strings.Split(strings.TrimSpace(d.Input), " ")
if toks[0] == "" {
toks = nil
}
for _, tok := range toks {
if len(tok) < 2 {
return fmt.Sprintf("unknown token %s", tok)
}
var cc pb.ConfChange
switch tok[0] {
case 'v':
cc.Type = pb.ConfChangeAddNode
case 'l':
cc.Type = pb.ConfChangeAddLearnerNode
case 'r':
cc.Type = pb.ConfChangeRemoveNode
case 'u':
cc.Type = pb.ConfChangeUpdateNode
default:
return fmt.Sprintf("unknown input: %s", tok)
}
id, err := strconv.ParseUint(tok[1:], 10, 64)
if err != nil {
return err.Error()
}
cc.NodeID = id
ccs = append(ccs, cc)
}

var cfg tracker.Config
var prs tracker.ProgressMap
var err error
switch d.Cmd {
case "simple":
cfg, prs, err = c.Simple(ccs...)
case "enter-joint":
cfg, prs, err = c.EnterJoint(ccs...)
case "leave-joint":
if len(ccs) > 0 {
err = errors.New("this command takes no input")
} else {
cfg, prs, err = c.LeaveJoint()
}
default:
return "unknown command"
}
if err != nil {
return err.Error() + "\n"
}
c.Tracker.Config, c.Tracker.Progress = cfg, prs
return fmt.Sprintf("%s\n%s", c.Tracker.Config, c.Tracker.Progress)
})
})
}
168 changes: 168 additions & 0 deletions raft/confchange/quick_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package confchange

import (
"math/rand"
"reflect"
"testing"
"testing/quick"

pb "go.etcd.io/etcd/raft/raftpb"
"go.etcd.io/etcd/raft/tracker"
)

// TestConfChangeQuick uses quickcheck to verify that simple and joint config
// changes arrive at the same result.
func TestConfChangeQuick(t *testing.T) {
cfg := &quick.Config{
MaxCount: 1000,
}

// Log the first couple of runs to give some indication of things working
// as intended.
const infoCount = 5

runWithJoint := func(c *Changer, ccs []pb.ConfChange) error {
cfg, prs, err := c.EnterJoint(ccs...)
if err != nil {
return err
}
c.Tracker.Config = cfg
c.Tracker.Progress = prs
cfg, prs, err = c.LeaveJoint()
if err != nil {
return err
}
c.Tracker.Config = cfg
c.Tracker.Progress = prs
return nil
}

runWithSimple := func(c *Changer, ccs []pb.ConfChange) error {
for _, cc := range ccs {
cfg, prs, err := c.Simple(cc)
if err != nil {
return err
}
c.Tracker.Config, c.Tracker.Progress = cfg, prs
}
return nil
}

type testFunc func(*Changer, []pb.ConfChange) error

wrapper := func(invoke testFunc) func(setup initialChanges, ccs confChanges) (*Changer, error) {
return func(setup initialChanges, ccs confChanges) (*Changer, error) {
tr := tracker.MakeProgressTracker(10)
c := &Changer{
Tracker: tr,
LastIndex: 10,
}

if err := runWithSimple(c, setup); err != nil {
return nil, err
}

err := invoke(c, ccs)
return c, err
}
}

var n int
f1 := func(setup initialChanges, ccs confChanges) *Changer {
c, err := wrapper(runWithSimple)(setup, ccs)
if err != nil {
t.Fatal(err)
}
if n < infoCount {
t.Log("initial setup:", Describe(setup...))
t.Log("changes:", Describe(ccs...))
t.Log(c.Tracker.Config)
t.Log(c.Tracker.Progress)
}
n++
return c
}
f2 := func(setup initialChanges, ccs confChanges) *Changer {
c, err := wrapper(runWithJoint)(setup, ccs)
if err != nil {
t.Fatal(err)
}
return c
}
err := quick.CheckEqual(f1, f2, cfg)
if err == nil {
return
}
cErr, ok := err.(*quick.CheckEqualError)
if !ok {
t.Fatal(err)
}

t.Error("setup:", Describe(cErr.In[0].([]pb.ConfChange)...))
t.Error("ccs:", Describe(cErr.In[1].([]pb.ConfChange)...))
t.Errorf("out1: %+v\nout2: %+v", cErr.Out1, cErr.Out2)
}

type confChangeTyp pb.ConfChangeType

func (confChangeTyp) Generate(rand *rand.Rand, _ int) reflect.Value {
return reflect.ValueOf(confChangeTyp(rand.Intn(4)))
}

type confChanges []pb.ConfChange

func genCC(num func() int, id func() uint64, typ func() pb.ConfChangeType) []pb.ConfChange {
var ccs []pb.ConfChange
n := num()
for i := 0; i < n; i++ {
ccs = append(ccs, pb.ConfChange{Type: typ(), NodeID: id()})
}
return ccs
}

func (confChanges) Generate(rand *rand.Rand, _ int) reflect.Value {
num := func() int {
return 1 + rand.Intn(9)
}
id := func() uint64 {
// Note that num() >= 1, so we're never returning 1 from this method,
// meaning that we'll never touch NodeID one, which is special to avoid
// voterless configs altogether in this test.
return 1 + uint64(num())
}
typ := func() pb.ConfChangeType {
return pb.ConfChangeType(rand.Intn(len(pb.ConfChangeType_name)))
}
return reflect.ValueOf(genCC(num, id, typ))
}

type initialChanges []pb.ConfChange

func (initialChanges) Generate(rand *rand.Rand, _ int) reflect.Value {
num := func() int {
return 1 + rand.Intn(5)
}
id := func() uint64 { return uint64(num()) }
typ := func() pb.ConfChangeType {
return pb.ConfChangeAddNode
}
// NodeID one is special - it's in the initial config and will be a voter
// always (this is to avoid uninteresting edge cases where the simple conf
// changes can't easily make progress).
ccs := append([]pb.ConfChange{{Type: pb.ConfChangeAddNode, NodeID: 1}}, genCC(num, id, typ)...)
return reflect.ValueOf(ccs)
}
23 changes: 23 additions & 0 deletions raft/confchange/testdata/joint_idempotency.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Verify that operations upon entering the joint state are idempotent, i.e.
# removing an absent node is fine, etc.

simple
v1
----
voters=(1)
1: StateProbe match=0 next=1

enter-joint
r1 r2 r9 v2 v3 v4 v2 v3 v4 l2 l2 r4 r4 l1 l1
----
voters=(3)&&(1) learners=(2) learners_next=(1)
1: StateProbe match=0 next=1
2: StateProbe match=0 next=2 learner
3: StateProbe match=0 next=2

leave-joint
----
voters=(3) learners=(1 2)
1: StateProbe match=0 next=1 learner
2: StateProbe match=0 next=2 learner
3: StateProbe match=0 next=2
24 changes: 24 additions & 0 deletions raft/confchange/testdata/joint_learners_next.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Verify that when a voter is demoted in a joint config, it will show up in
# learners_next until the joint config is left, and only then will the progress
# turn into that of a learner, without resetting the progress. Note that this
# last fact is verified by `next`, which can tell us which "round" the progress
# was originally created in.

simple
v1
----
voters=(1)
1: StateProbe match=0 next=1

enter-joint
v2 l1
----
voters=(2)&&(1) learners_next=(1)
1: StateProbe match=0 next=1
2: StateProbe match=0 next=2

leave-joint
----
voters=(2) learners=(1)
1: StateProbe match=0 next=1 learner
2: StateProbe match=0 next=2
Loading

0 comments on commit c560e1c

Please sign in to comment.