Skip to content

Commit

Permalink
etcdserver: add mayPromote check
Browse files Browse the repository at this point in the history
  • Loading branch information
WIZARD-CXY committed Apr 28, 2019
1 parent 636bf1c commit 43d85f8
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 23 deletions.
27 changes: 11 additions & 16 deletions clientv3/integration/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,14 @@ func TestMemberAddForLearner(t *testing.T) {
}
}

func TestMemberPromoteForLearner(t *testing.T) {
func TestMemberPromoteForNotReadyLearner(t *testing.T) {
// TODO test not ready learner promotion.
defer testutil.AfterTest(t)

clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
// TODO change the random client to client that talk to leader directly.
capi := clus.RandClient()
// first client is talked to leader because cluster size is 1
capi := clus.Client(0)

urls := []string{"http://127.0.0.1:1234"}
isLearner := true
Expand All @@ -246,18 +246,13 @@ func TestMemberPromoteForLearner(t *testing.T) {
t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
}

memberPromoteResp, err := capi.MemberPromote(context.Background(), learnerID)
if err != nil {
t.Fatalf("failed to promote member: %v", err)
}

numberOfLearners = 0
for _, m := range memberPromoteResp.Members {
if m.IsLearner {
numberOfLearners++
}
// since we do not start learner, learner must be not ready.
_, err = capi.MemberPromote(context.Background(), learnerID)
expectedErrKeywords := "can only promote a learner member which catches up with leader"
if err == nil {
t.Fatalf("expecting promote not ready learner to fail, got no error")
}
if numberOfLearners != 0 {
t.Errorf("learner promoted, expect 0 learner, got %d", numberOfLearners)
if !strings.Contains(err.Error(), expectedErrKeywords) {
t.Errorf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
}
}
30 changes: 30 additions & 0 deletions etcdserver/api/membership/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,36 @@ func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
return true
}

func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
nmembers := 1
nstarted := 0

for _, member := range c.VotingMembers() {
if member.IsStarted() {
nstarted++
}
nmembers++
}

nquorum := nmembers/2 + 1
if nstarted < nquorum {
if c.lg != nil {
c.lg.Warn(
"rejecting member promote; started member will be less than quorum",
zap.Int("number-of-started-member", nstarted),
zap.Int("quorum", nquorum),
zap.String("cluster-id", c.cid.String()),
zap.String("local-member-id", c.localID.String()),
)
} else {
plog.Warningf("Reject promote member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
}
return false
}

return true
}

func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
members := make(map[types.ID]*Member)
removed := make(map[types.ID]bool)
Expand Down
1 change: 0 additions & 1 deletion etcdserver/api/membership/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ var (
ErrIDNotFound = errors.New("membership: ID not found")
ErrPeerURLexists = errors.New("membership: peerURL exists")
ErrMemberNotLearner = errors.New("membership: can only promote a learner member")
ErrLearnerNotReady = errors.New("membership: can only promote a learner member which catches up with leader")
)

func isKeyNotFound(err error) bool {
Expand Down
2 changes: 1 addition & 1 deletion etcdserver/api/v3rpc/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ var toGRPCErrorMap = map[error]error{
membership.ErrIDExists: rpctypes.ErrGRPCMemberExist,
membership.ErrPeerURLexists: rpctypes.ErrGRPCPeerURLExist,
membership.ErrMemberNotLearner: rpctypes.ErrGRPCMemberNotLearner,
membership.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
etcdserver.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,

mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
Expand Down
1 change: 1 addition & 0 deletions etcdserver/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var (
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
ErrLeaderChanged = errors.New("etcdserver: leader changed")
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which catches up with leader")
ErrNoLeader = errors.New("etcdserver: no leader")
ErrNotLeader = errors.New("etcdserver: not leader")
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
Expand Down
62 changes: 57 additions & 5 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ const (
maxPendingRevokes = 16

recommendedMaxRequestBytes = 10 * 1024 * 1024

readyPercent = 0.9
)

var (
Expand Down Expand Up @@ -1633,7 +1635,7 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
return nil, err
}

// check if we can promote this learner
// check if we can promote this learner.
if err := s.mayPromoteMember(types.ID(id)); err != nil {
return nil, err
}
Expand Down Expand Up @@ -1661,13 +1663,63 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
}

func (s *EtcdServer) mayPromoteMember(id types.ID) error {
err := isLearnerReady(uint64(id))
if err != nil {
return err
}

if !s.Cfg.StrictReconfigCheck {
return nil
}
// TODO add more checks whether the member can be promoted.
// like learner progress check or if cluster is ready to promote a learner
// this is an example to get progress
fmt.Printf("raftStatus, %#v\n", raftStatus())
if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
if lg := s.getLogger(); lg != nil {
lg.Warn(
"rejecting member promote request; not enough healthy members",
zap.String("local-member-id", s.ID().String()),
zap.String("requested-member-remove-id", id.String()),
zap.Error(ErrNotEnoughStartedMembers),
)
} else {
plog.Warningf("not enough started members, rejecting promote member %s", id)
}
return ErrNotEnoughStartedMembers
}

return nil
}

// check whether the learner catches up with leader or not.
func isLearnerReady(id uint64) error {
// this can happen in the unit test when we do not start node.
if raftStatus == nil {
return nil
}
raftStatus := raftStatus()

// leader's raftStatus.Progress is not nil
if raftStatus.Progress == nil {
return ErrNotLeader
}

var learnerMatch uint64
isFound := false
leaderID := raftStatus.ID
for memberID, progress := range raftStatus.Progress {
if id == memberID {
// check its status
learnerMatch = progress.Match
isFound = true
break
}
}

if isFound {
leaderMatch := raftStatus.Progress[leaderID].Match
// the learner's Match not caught up with leader yet
if float64(learnerMatch) < float64(leaderMatch)*readyPercent {
return ErrLearnerNotReady
}
}

return nil
}
Expand Down

0 comments on commit 43d85f8

Please sign in to comment.