Skip to content

Commit

Permalink
Merge pull request #5720 from fabriziopandini/ensure-etcclientgenerat…
Browse files Browse the repository at this point in the history
…ors-never-return-nil

🐛 ensure etc client generators never return nil without an error
  • Loading branch information
k8s-ci-robot authored Nov 23, 2021
2 parents 3f56d7b + 94b5194 commit b031ecc
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 36 deletions.
47 changes: 43 additions & 4 deletions controlplane/kubeadm/internal/etcd_client_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/rest"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy"
Expand Down Expand Up @@ -57,6 +58,12 @@ func NewEtcdClientGenerator(restConfig *rest.Config, tlsConfig *tls.Config) *Etc

// forFirstAvailableNode takes a list of nodes and returns a client for the first one that connects.
func (c *EtcdClientGenerator) forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error) {
// This is an additional safeguard for avoiding this func to return nil, nil.
if len(nodeNames) == 0 {
return nil, errors.New("invalid argument: forLeader can't be called with an empty list of nodes")
}

// Loop through the existing control plane nodes.
var errs []error
for _, name := range nodeNames {
endpoints := []string{staticPodName("etcd", name)}
Expand All @@ -72,26 +79,58 @@ func (c *EtcdClientGenerator) forFirstAvailableNode(ctx context.Context, nodeNam

// forLeader takes a list of nodes and returns a client to the leader node.
func (c *EtcdClientGenerator) forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error) {
var errs []error
// This is an additional safeguard for avoiding this func to return nil, nil.
if len(nodeNames) == 0 {
return nil, errors.New("invalid argument: forLeader can't be called with an empty list of nodes")
}

nodes := sets.NewString()
for _, n := range nodeNames {
nodes.Insert(n)
}

// Loop through the existing control plane nodes.
var errs []error
for _, nodeName := range nodeNames {
// Get a temporary client to the etcd instance hosted on the node.
client, err := c.forFirstAvailableNode(ctx, []string{nodeName})
if err != nil {
errs = append(errs, err)
continue
}
defer client.Close()

// Get the list of members.
members, err := client.Members(ctx)
if err != nil {
errs = append(errs, err)
continue
}

// Get the leader member.
var leaderMember *etcd.Member
for _, member := range members {
if member.Name == nodeName && member.ID == client.LeaderID {
return c.forFirstAvailableNode(ctx, []string{nodeName})
if member.ID == client.LeaderID {
leaderMember = member
break
}
}
}

// If we found the leader, and it is one of the nodes,
// get a connection to the etcd leader via the node hosting it.
if leaderMember != nil {
if !nodes.Has(leaderMember.Name) {
return nil, errors.Errorf("etcd leader is reported as %x with name %q, but we couldn't find a corresponding Node in the cluster", leaderMember.ID, leaderMember.Name)
}
return c.forFirstAvailableNode(ctx, []string{leaderMember.Name})
}

// If it is not possible to get a connection to the leader via existing nodes,
// it means that the control plane is an invalid state, with an etcd member - the current leader -
// without a corresponding node.
// TODO: In future we can eventually try to automatically remediate this condition by moving the leader
// to another member with a corresponding node.
return nil, errors.Errorf("etcd leader is reported as %x, but we couldn't find any matching member", client.LeaderID)
}
return nil, errors.Wrap(kerrors.NewAggregate(errs), "could not establish a connection to the etcd leader")
}
94 changes: 62 additions & 32 deletions controlplane/kubeadm/internal/etcd_client_generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,14 @@ package internal
import (
"context"
"crypto/tls"
"errors"
"strings"
"testing"

. "github.com/onsi/gomega"

"github.com/pkg/errors"
"go.etcd.io/etcd/api/v3/etcdserverpb"
clientv3 "go.etcd.io/etcd/client/v3"

"k8s.io/client-go/rest"

"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
etcdfake "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/fake"
)
Expand All @@ -44,9 +41,7 @@ func TestNewEtcdClientGenerator(t *testing.T) {
g.Expect(subject.createClient).To(Not(BeNil()))
}

func TestForNodes(t *testing.T) {
g := NewWithT(t)

func TestFirstAvailableNode(t *testing.T) {
tests := []struct {
name string
nodes []string
Expand All @@ -64,15 +59,21 @@ func TestForNodes(t *testing.T) {
expectedClient: etcd.Client{Endpoint: "etcd-node-1"},
},
{
name: "Returns error",
name: "Fails when called with an empty node list",
nodes: nil,
cc: nil,
expectedErr: "invalid argument: forLeader can't be called with an empty list of nodes",
},
{
name: "Returns error from client",
nodes: []string{"node-1", "node-2"},
cc: func(ctx context.Context, endpoints []string) (*etcd.Client, error) {
return nil, errors.New("something went wrong")
},
expectedErr: "could not establish a connection to any etcd node: something went wrong",
},
{
name: "Returns client when nodes are down but atleast one node is up",
name: "Returns client when some of the nodes are down but at least one node is up",
nodes: []string{"node-down-1", "node-down-2", "node-up"},
cc: func(ctx context.Context, endpoints []string) (*etcd.Client, error) {
if strings.Contains(endpoints[0], "node-down") {
Expand All @@ -86,23 +87,24 @@ func TestForNodes(t *testing.T) {
}

for _, tt := range tests {
subject = NewEtcdClientGenerator(&rest.Config{}, &tls.Config{MinVersion: tls.VersionTLS12})
subject.createClient = tt.cc

client, err := subject.forFirstAvailableNode(ctx, tt.nodes)

if tt.expectedErr != "" {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).Should(Equal(tt.expectedErr))
} else {
g.Expect(*client).Should(Equal(tt.expectedClient))
}
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
subject = NewEtcdClientGenerator(&rest.Config{}, &tls.Config{MinVersion: tls.VersionTLS12})
subject.createClient = tt.cc

client, err := subject.forFirstAvailableNode(ctx, tt.nodes)

if tt.expectedErr != "" {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).Should(Equal(tt.expectedErr))
} else {
g.Expect(*client).Should(Equal(tt.expectedClient))
}
})
}
}

func TestForLeader(t *testing.T) {
g := NewWithT(t)

tests := []struct {
name string
nodes []string
Expand Down Expand Up @@ -140,7 +142,6 @@ func TestForLeader(t *testing.T) {
AlarmResponse: &clientv3.AlarmResponse{},
}},
},

{
name: "Returns client for leader even when one or more nodes are down",
nodes: []string{"node-down-1", "node-down-2", "node-leader"},
Expand Down Expand Up @@ -171,6 +172,31 @@ func TestForLeader(t *testing.T) {
AlarmResponse: &clientv3.AlarmResponse{},
}},
},
{
name: "Fails when called with an empty node list",
nodes: nil,
cc: nil,
expectedErr: "invalid argument: forLeader can't be called with an empty list of nodes",
},
{
name: "Returns error when the leader does not have a corresponding node",
nodes: []string{"node-1"},
cc: func(ctx context.Context, endpoints []string) (*etcd.Client, error) {
return &etcd.Client{
Endpoint: endpoints[0],
LeaderID: 1729,
EtcdClient: &etcdfake.FakeEtcdClient{
MemberListResponse: &clientv3.MemberListResponse{
Members: []*etcdserverpb.Member{
{ID: 1234, Name: "node-1"},
{ID: 1729, Name: "node-leader"},
},
},
AlarmResponse: &clientv3.AlarmResponse{},
}}, nil
},
expectedErr: "etcd leader is reported as 6c1 with name \"node-leader\", but we couldn't find a corresponding Node in the cluster",
},
{
name: "Returns error when all nodes are down",
nodes: []string{"node-down-1", "node-down-2", "node-down-3"},
Expand All @@ -182,16 +208,20 @@ func TestForLeader(t *testing.T) {
}

for _, tt := range tests {
subject = NewEtcdClientGenerator(&rest.Config{}, &tls.Config{MinVersion: tls.VersionTLS12})
subject.createClient = tt.cc
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

subject = NewEtcdClientGenerator(&rest.Config{}, &tls.Config{MinVersion: tls.VersionTLS12})
subject.createClient = tt.cc

client, err := subject.forLeader(ctx, tt.nodes)
client, err := subject.forLeader(ctx, tt.nodes)

if tt.expectedErr != "" {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).Should(Equal(tt.expectedErr))
} else {
g.Expect(*client).Should(Equal(tt.expectedClient))
}
if tt.expectedErr != "" {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).Should(Equal(tt.expectedErr))
} else {
g.Expect(*client).Should(Equal(tt.expectedClient))
}
})
}
}

0 comments on commit b031ecc

Please sign in to comment.