Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add mayPromote check #19

Merged
merged 1 commit into from
May 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 11 additions & 17 deletions clientv3/integration/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,13 @@ func TestMemberAddForLearner(t *testing.T) {
}
}

func TestMemberPromoteForLearner(t *testing.T) {
// TODO test not ready learner promotion.
func TestMemberPromoteForNotReadyLearner(t *testing.T) {
defer testutil.AfterTest(t)

clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
// TODO change the random client to client that talk to leader directly.
capi := clus.RandClient()
// first client is talked to leader because cluster size is 1
capi := clus.Client(0)

urls := []string{"http://127.0.0.1:1234"}
isLearner := true
Expand All @@ -246,19 +245,14 @@ func TestMemberPromoteForLearner(t *testing.T) {
t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
}

memberPromoteResp, err := capi.MemberPromote(context.Background(), learnerID)
if err != nil {
t.Fatalf("failed to promote member: %v", err)
}

numberOfLearners = 0
for _, m := range memberPromoteResp.Members {
if m.IsLearner {
numberOfLearners++
}
// since we do not start learner, learner must be not ready.
_, err = capi.MemberPromote(context.Background(), learnerID)
expectedErrKeywords := "can only promote a learner member which catches up with leader"
if err == nil {
t.Fatalf("expecting promote not ready learner to fail, got no error")
}
if numberOfLearners != 0 {
t.Errorf("learner promoted, expect 0 learner, got %d", numberOfLearners)
if !strings.Contains(err.Error(), expectedErrKeywords) {
t.Errorf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
}
}

Expand Down
30 changes: 30 additions & 0 deletions etcdserver/api/membership/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,36 @@ func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
return true
}

func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
nmembers := 1
nstarted := 0

for _, member := range c.VotingMembers() {
if member.IsStarted() {
nstarted++
}
nmembers++
}

nquorum := nmembers/2 + 1
if nstarted < nquorum {
if c.lg != nil {
c.lg.Warn(
"rejecting member promote; started member will be less than quorum",
zap.Int("number-of-started-member", nstarted),
zap.Int("quorum", nquorum),
zap.String("cluster-id", c.cid.String()),
zap.String("local-member-id", c.localID.String()),
)
} else {
plog.Warningf("Reject promote member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
}
return false
}

return true
}

func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
members := make(map[types.ID]*Member)
removed := make(map[types.ID]bool)
Expand Down
1 change: 1 addition & 0 deletions etcdserver/api/v3rpc/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var toGRPCErrorMap = map[error]error{
membership.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,
membership.ErrTooManyLearners: rpctypes.ErrGRPCTooManyLearners,
etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
etcdserver.ErrLearnerNotReady: rpctypes.ErrGRPCLearnerNotReady,

mvcc.ErrCompacted: rpctypes.ErrGRPCCompacted,
mvcc.ErrFutureRev: rpctypes.ErrGRPCFutureRev,
Expand Down
1 change: 1 addition & 0 deletions etcdserver/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var (
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
ErrLeaderChanged = errors.New("etcdserver: leader changed")
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which catches up with leader")
ErrNoLeader = errors.New("etcdserver: no leader")
ErrNotLeader = errors.New("etcdserver: not leader")
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
Expand Down
64 changes: 59 additions & 5 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ const (
maxPendingRevokes = 16

recommendedMaxRequestBytes = 10 * 1024 * 1024

readyPercent = 0.9
)

var (
Expand Down Expand Up @@ -1633,7 +1635,7 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
return nil, err
}

// check if we can promote this learner
// check if we can promote this learner.
if err := s.mayPromoteMember(types.ID(id)); err != nil {
return nil, err
}
Expand Down Expand Up @@ -1661,13 +1663,65 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
}

func (s *EtcdServer) mayPromoteMember(id types.ID) error {
err := isLearnerReady(uint64(id))
if err != nil {
return err
}

if !s.Cfg.StrictReconfigCheck {
return nil
}
// TODO add more checks whether the member can be promoted.
// like learner progress check or if cluster is ready to promote a learner
// this is an example to get progress
fmt.Printf("raftStatus, %#v\n", raftStatus())
if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
if lg := s.getLogger(); lg != nil {
lg.Warn(
"rejecting member promote request; not enough healthy members",
zap.String("local-member-id", s.ID().String()),
zap.String("requested-member-remove-id", id.String()),
zap.Error(ErrNotEnoughStartedMembers),
)
} else {
plog.Warningf("not enough started members, rejecting promote member %s", id)
}
return ErrNotEnoughStartedMembers
}

return nil
}

// check whether the learner catches up with leader or not.
WIZARD-CXY marked this conversation as resolved.
Show resolved Hide resolved
// Note: it will return nil if member is not found in cluster or if member is not learner.
// These two conditions will be checked before apply phase later.
func isLearnerReady(id uint64) error {
// sanity check, this can happen in the unit test when we do not start node.
WIZARD-CXY marked this conversation as resolved.
Show resolved Hide resolved
if raftStatus == nil {
WIZARD-CXY marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
rs := raftStatus()

WIZARD-CXY marked this conversation as resolved.
Show resolved Hide resolved
// leader's raftStatus.Progress is not nil
if rs.Progress == nil {
return ErrNotLeader
}

var learnerMatch uint64
isFound := false
leaderID := rs.ID
for memberID, progress := range rs.Progress {
if id == memberID {
// check its status
learnerMatch = progress.Match
isFound = true
break
}
}

if isFound {
leaderMatch := rs.Progress[leaderID].Match
// the learner's Match not caught up with leader yet
if float64(learnerMatch) < float64(leaderMatch)*readyPercent {
return ErrLearnerNotReady
}
}

return nil
}
Expand Down