From b0e67d69fc4e77c8d21730f07f1f4683da12d6ec Mon Sep 17 00:00:00 2001
From: Sedef <ssavas@vmware.com>
Date: Thu, 2 Apr 2020 12:11:31 -0700
Subject: [PATCH] [kcp] Combined health checks to a function

---
 .../kubeadm/controllers/controller.go         | 27 ++++++++++++
 controlplane/kubeadm/controllers/scale.go     | 41 +++----------------
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go
index 494212ef99ee..a5234a879bc6 100644
--- a/controlplane/kubeadm/controllers/controller.go
+++ b/controlplane/kubeadm/controllers/controller.go
@@ -320,3 +320,30 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
 
 	return nil
 }
+
+func (r *KubeadmControlPlaneReconciler) generalHealthCheck(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) error {
+	logger := controlPlane.Logger()
+
+	// Do a health check of the Control Plane components
+	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
+		logger.V(2).Info("Waiting for control plane to pass control plane health check to continue reconciliation", "cause", err)
+		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
+			"Waiting for control plane to pass control plane health check to continue reconciliation: %v", err)
+		return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
+	}
+
+	// Ensure etcd is healthy
+	if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
+		// If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap.
+		// This will solve issues related to manual control-plane machine deletion.
+		if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil {
+			logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation", "cause", err)
+		}
+		logger.V(2).Info("Waiting for control plane to pass etcd health check to continue reconciliation", "cause", err)
+		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
+			"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)
+		return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
+	}
+
+	return nil
+}
diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go
index 38b61babcb25..bf33fe6d75a9 100644
--- a/controlplane/kubeadm/controllers/scale.go
+++ b/controlplane/kubeadm/controllers/scale.go
@@ -49,20 +49,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
 func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, _ internal.FilterableMachineCollection, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
 	logger := controlPlane.Logger()
 
-	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
-		logger.V(2).Info("Waiting for control plane to pass control plane health check before adding an additional control plane machine", "cause", err)
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before adding additional control plane machine: %v", err)
-		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
-	}
-
-	if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
-		// If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap.
-		// This will solve issues related to manual control-plane machine deletion.
-		if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil {
-			logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check before adding an additional control plane machine", "cause", err)
-		}
-		logger.V(2).Info("Waiting for control plane to pass etcd health check before adding an additional control plane machine", "cause", err)
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check before adding additional control plane machine: %v", err)
+	if err := r.generalHealthCheck(ctx, cluster, kcp, controlPlane); err != nil {
 		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
 	}
 
@@ -95,13 +82,16 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
 		return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster")
 	}
 
-	// We don't want to health check at the beginning of this method to avoid blocking re-entrancy
-
 	// Wait for any delete in progress to complete before deleting another Machine
 	if controlPlane.HasDeletingMachine() {
 		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter}
 	}
 
+	// We don't want to health check at the beginning of this method to avoid blocking re-entrancy
+	if err := r.generalHealthCheck(ctx, cluster, kcp, controlPlane); err != nil {
+		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
+	}
+
 	markedForDeletion := selectedMachines.Filter(machinefilters.HasAnnotationKey(controlplanev1.DeleteForScaleDownAnnotation))
 	if len(markedForDeletion) == 0 {
 		fd := controlPlane.FailureDomainWithMostMachines(selectedMachines)
@@ -122,18 +112,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
 		return ctrl.Result{}, errors.New("failed to pick control plane Machine to delete")
 	}
 
-	// Ensure etcd is healthy prior to attempting to remove the member
-	if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
-		// If there are any nodes in ETCD members that do not exist, remove them from ETCD and from kubeadm configmap.
-		// This will solve issues related to manual control-plane machine deletion.
-		if err := r.managementCluster.TargetClusterRemoveMissingNodes(ctx, util.ObjectKey(cluster)); err != nil {
-			logger.V(2).Info("Failed attempt to remove potential hanging etcd members to pass etcd health check before adding an additional control plane machine", "cause", err)
-		}
-		logger.V(2).Info("Waiting for control plane to pass etcd health check before removing a control plane machine", "cause", err)
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
-			"Waiting for control plane to pass etcd health check before removing a control plane machine: %v", err)
-		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
-	}
 	// If etcd leadership is on machine that is about to be deleted, move it to the newest member available.
 	etcdLeaderCandidate := ownedMachines.Newest()
 	if err := workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil {
@@ -162,13 +140,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
 		}
 	}
 
-	// Do a final health check of the Control Plane components prior to actually deleting the machine
-	if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil {
-		logger.V(2).Info("Waiting for control plane to pass control plane health check before removing a control plane machine", "cause", err)
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
-			"Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err)
-		return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
-	}
 	logger = logger.WithValues("machine", machineToDelete)
 	if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
 		logger.Error(err, "Failed to delete control plane machine")