From dd2b0133e1d445505f78fb988ba99a2549328bfe Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 13 Nov 2020 22:39:38 +0100 Subject: [PATCH] Prevent reconcileEtcdMember to remove etcd members when etcd starts slowly --- controlplane/kubeadm/controllers/controller.go | 7 +++++++ controlplane/kubeadm/internal/workload_cluster_etcd.go | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 4f01773e3cc2..d8bfca2aa9ff 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -514,6 +514,13 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context return ctrl.Result{}, nil } + // If there are provisioning machines (machines without a node yet), return. + for _, machine := range controlPlane.Machines { + if machine.Status.NodeRef == nil { + return ctrl.Result{}, nil + } + } + // Potential inconsistencies between the list of members and the list of machines/nodes are // surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early. if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) { diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd.go b/controlplane/kubeadm/internal/workload_cluster_etcd.go index be71b360087b..525d8850a19d 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd.go @@ -18,6 +18,7 @@ package internal import ( "context" + "fmt" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -59,6 +60,11 @@ func (w *Workload) ReconcileEtcdMembers(ctx context.Context) ([]string, error) { // Check if any member's node is missing from workload cluster // If any, delete it with best effort for _, member := range members { + // If this member is just added, it has a empty name until the etcd pod starts. Ignore it. + if member.Name == "" { + continue + } + isFound := false for _, node := range controlPlaneNodes.Items { if member.Name == node.Name { @@ -70,7 +76,7 @@ func (w *Workload) ReconcileEtcdMembers(ctx context.Context) ([]string, error) { if isFound { continue } - removedMembers = append(removedMembers, member.Name) + removedMembers = append(removedMembers, fmt.Sprintf("%d (Name: %s)", member.ID, member.Name)) if err := w.removeMemberForNode(ctx, member.Name); err != nil { errs = append(errs, err) }