From b0e67d69fc4e77c8d21730f07f1f4683da12d6ec Mon Sep 17 00:00:00 2001 From: Sedef Date: Thu, 2 Apr 2020 12:11:31 -0700 Subject: [PATCH] [kcp] Combined health checks to a function --- .../kubeadm/controllers/controller.go | 27 ++++++++++++ controlplane/kubeadm/controllers/scale.go | 41 +++---------------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 494212ef99ee..a5234a879bc6 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -320,3 +320,30 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M return nil } + +func (r *KubeadmControlPlaneReconciler) generalHealthCheck(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) error { + logger := controlPlane.Logger() + + // Do a health check of the Control Plane components + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + logger.V(2).Info("Waiting for control plane to pass control plane health check to continue reconciliation", "cause", err) + r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", + "Waiting for control plane to pass control plane health check to continue reconciliation: %v", err) + return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} + } + + // Ensure etcd is healthy + if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + // If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap. + // This will solve issues related to manual control-plane machine deletion. + if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil { + logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation", "cause", err) + } + logger.V(2).Info("Waiting for control plane to pass etcd health check to continue reconciliation", "cause", err) + r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", + "Waiting for control plane to pass etcd health check to continue reconciliation: %v", err) + return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} + } + + return nil +} diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index 38b61babcb25..bf33fe6d75a9 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -49,20 +49,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, _ internal.FilterableMachineCollection, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := controlPlane.Logger() - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { - logger.V(2).Info("Waiting for control plane to pass control plane health check before adding an additional control plane machine", "cause", err) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before adding additional control plane machine: %v", err) - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } - - if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { - // If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap. - // This will solve issues related to manual control-plane machine deletion. - if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil { - logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check before adding an additional control plane machine", "cause", err) - } - logger.V(2).Info("Waiting for control plane to pass etcd health check before adding an additional control plane machine", "cause", err) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check before adding additional control plane machine: %v", err) + if err := r.generalHealthCheck(ctx, cluster, kcp, controlPlane); err != nil { return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} } @@ -95,13 +82,16 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") } - // We don't want to health check at the beginning of this method to avoid blocking re-entrancy - // Wait for any delete in progress to complete before deleting another Machine if controlPlane.HasDeletingMachine() { return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter} } + // We don't want to health check at the beginning of this method to avoid blocking re-entrancy + if err := r.generalHealthCheck(ctx, cluster, kcp, controlPlane); err != nil { + return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} + } + markedForDeletion := selectedMachines.Filter(machinefilters.HasAnnotationKey(controlplanev1.DeleteForScaleDownAnnotation)) if len(markedForDeletion) == 0 { fd := controlPlane.FailureDomainWithMostMachines(selectedMachines) @@ -122,18 +112,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.New("failed to pick control plane Machine to delete") } - // Ensure etcd is healthy prior to attempting to remove the member - if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { - // If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap. - // This will solve issues related to manual control-plane machine deletion. - if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil { - logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check before adding an additional control plane machine", "cause", err) - } - logger.V(2).Info("Waiting for control plane to pass etcd health check before removing a control plane machine", "cause", err) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", - "Waiting for control plane to pass etcd health check before removing a control plane machine: %v", err) - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } // If etcd leadership is on machine that is about to be deleted, move it to the newest member available. etcdLeaderCandidate := ownedMachines.Newest() if err := workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil { @@ -162,13 +140,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( } } - // Do a final health check of the Control Plane components prior to actually deleting the machine - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { - logger.V(2).Info("Waiting for control plane to pass control plane health check before removing a control plane machine", "cause", err) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", - "Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err) - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } logger = logger.WithValues("machine", machineToDelete) if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to delete control plane machine")