Add KCP conditions, split reconcileHealth into preflight and reconcil…

…eEtcdMembers, make both use conditions
kubernetes-sigs · Nov 10, 2020 · e400d47 · e400d47
1 parent db6cc2a
commit e400d47
Show file tree

Hide file tree

Showing 19 changed files with 2,240 additions and 1,000 deletions.
diff --git a/controlplane/kubeadm/api/v1alpha3/condition_consts.go b/controlplane/kubeadm/api/v1alpha3/condition_consts.go
@@ -66,3 +66,69 @@ const (
 	// ScalingDownReason (Severity=Info) documents a KubeadmControlPlane that is decreasing the number of replicas.
 	ScalingDownReason = "ScalingDown"
 )
+
+const (
+	// ControlPlaneComponentsHealthyCondition reports the overall status of control plane components
+	// implemented as static pods generated by kubeadm including kube-api-server, kube-controller manager,
+	// kube-scheduler and etcd if managed.
+	ControlPlaneComponentsHealthyCondition clusterv1.ConditionType = "ControlPlaneComponentsHealthy"
+
+	// ControlPlaneComponentsUnhealthyReason (Severity=Error) documents a control plane component not healthy.
+	ControlPlaneComponentsUnhealthyReason = "ControlPlaneComponentsUnhealthy"
+
+	// ControlPlaneComponentsUnknownReason reports a control plane component in unknown status.
+	ControlPlaneComponentsUnknownReason = "ControlPlaneComponentsUnknown"
+
+	// ControlPlaneComponentsInspectionFailedReason documents a failure in inspecting the control plane component status.
+	ControlPlaneComponentsInspectionFailedReason = "ControlPlaneComponentsInspectionFailed"
+
+	// MachineAPIServerPodHealthyCondition reports a machine's kube-apiserver's operational status.
+	MachineAPIServerPodHealthyCondition clusterv1.ConditionType = "APIServerPodHealthy"
+
+	// MachineControllerManagerPodHealthyCondition reports a machine's kube-controller-manager's health status.
+	MachineControllerManagerPodHealthyCondition clusterv1.ConditionType = "ControllerManagerPodHealthy"
+
+	// MachineSchedulerPodHealthyCondition reports a machine's kube-scheduler's operational status.
+	MachineSchedulerPodHealthyCondition clusterv1.ConditionType = "SchedulerPodHealthy"
+
+	// MachineEtcdPodHealthyCondition reports a machine's etcd pod's operational status.
+	// NOTE: This conditions exists only if a stacked etcd cluster is used.
+	MachineEtcdPodHealthyCondition clusterv1.ConditionType = "EtcdPodHealthy"
+
+	// PodProvisioningReason (Severity=Info) documents a pod waiting to be provisioned i.e., Pod is in "Pending" phase.
+	PodProvisioningReason = "PodProvisioning"
+
+	// PodMissingReason (Severity=Error) documents a pod does not exist.
+	PodMissingReason = "PodMissing"
+
+	// PodFailedReason (Severity=Error) documents if a pod failed during provisioning i.e., e.g CrashLoopbackOff, ImagePullBackOff
+	// or if all the containers in a pod have terminated.
+	PodFailedReason = "PodFailed"
+
+	// PodInspectionFailedReason documents a failure in inspecting the pod status.
+	PodInspectionFailedReason = "PodInspectionFailed"
+)
+
+const (
+	// EtcdClusterHealthyCondition documents the overall etcd cluster's health.
+	EtcdClusterHealthyCondition clusterv1.ConditionType = "EtcdClusterHealthyCondition"
+
+	// EtcdClusterInspectionFailedReason documents a failure in inspecting the etcd cluster status.
+	EtcdClusterInspectionFailedReason = "EtcdClusterInspectionFailed"
+
+	// EtcdClusterUnknownReason reports an etcd cluster in unknown status.
+	EtcdClusterUnknownReason = "EtcdClusterUnknown"
+
+	// EtcdClusterUnhealthyReason (Severity=Error) is set when the etcd cluster is unhealthy.
+	EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy"
+
+	// MachineEtcdMemberHealthyCondition report the machine's etcd member's health status.
+	// NOTE: This conditions exists only if a stacked etcd cluster is used.
+	MachineEtcdMemberHealthyCondition clusterv1.ConditionType = "EtcdMemberHealthy"
+
+	// EtcdMemberInspectionFailedReason documents a failure in inspecting the etcd member status.
+	EtcdMemberInspectionFailedReason = "MemberInspectionFailed"
+
+	// EtcdMemberUnhealthyReason (Severity=Error) documents a Machine's etcd member is unhealthy.
+	EtcdMemberUnhealthyReason = "EtcdMemberUnhealthy"
+)
diff --git a/controlplane/kubeadm/controllers/consts.go b/controlplane/kubeadm/controllers/consts.go
@@ -23,9 +23,9 @@ const (
 	// all control plane machines have been deleted.
 	deleteRequeueAfter = 30 * time.Second
 
-	// healthCheckFailedRequeueAfter is how long to wait before trying to scale
-	// up/down if some target cluster health check has failed
-	healthCheckFailedRequeueAfter = 20 * time.Second
+	// preflightFailedRequeueAfter is how long to wait before trying to scale
+	// up/down if some preflight check for those operation has failed
+	preflightFailedRequeueAfter = 15 * time.Second
 
 	// dependentCertRequeueAfter is how long to wait before checking again to see if
 	// dependent certificates have been created.

diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go
@@ -304,9 +304,15 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
 	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
 	conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())
 
-	// reconcileControlPlaneHealth returns err if there is a machine being deleted or if the control plane is unhealthy.
-	// If the control plane is not yet initialized, this call shouldn't fail.
-	if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
+	// Updates conditions reporting the status of static pods and the status of the etcd cluster.
+	// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
+	if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() {
+		return result, err
+	}
+
+	// Ensures the number of etcd members is in sync with the number of machines/nodes.
+	// NOTE: This is usually required after a machine deletion.
+	if result, err := r.reconcileEtcdMembers(ctx, controlPlane); err != nil || !result.IsZero() {
 		return result, err
 	}
 
@@ -315,9 +321,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
 	switch {
 	case len(needRollout) > 0:
 		logger.Info("Rolling out Control Plane machines", "needRollout", needRollout.Names())
-		// NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message
-		// and this does not influence any reconciliation logic.
-		conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas)
+		conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(controlPlane.Machines)-len(needRollout))
 		return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needRollout)
 	default:
 		// make sure last upgrade operation is marked as completed.
@@ -384,27 +388,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu
 	logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name)
 	logger.Info("Reconcile KubeadmControlPlane deletion")
 
-	// Ignore the health check results here as well as the errors, health check functions are to set health related conditions on Machines.
-	// Errors may be due to not being able to get workload cluster nodes.
-	workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
-	if err != nil {
-		r.Log.V(2).Info("Cannot get remote client to workload cluster during delete reconciliation", "err", err.Error())
-	} else {
-		// Do a health check of the Control Plane components
-		_, err = workloadCluster.ControlPlaneIsHealthy(ctx)
-		if err != nil {
-			// Do nothing
-			r.Log.V(2).Info("Control plane did not pass control plane health check during delete reconciliation", "err", err.Error())
-		}
-
-		// Do a health check of the etcd
-		_, err = workloadCluster.EtcdIsHealthy(ctx)
-		if err != nil {
-			// Do nothing
-			r.Log.V(2).Info("Control plane did not pass etcd health check during delete reconciliation", "err", err.Error())
-		}
-	}
-
 	// Gets all machines, not just control plane machines.
 	allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster))
 	if err != nil {
@@ -418,6 +401,18 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu
 		return ctrl.Result{}, nil
 	}
 
+	controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines)
+	if err != nil {
+		logger.Error(err, "failed to initialize control plane")
+		return ctrl.Result{}, err
+	}
+
+	// Updates conditions reporting the status of static pods and the status of the etcd cluster.
+	// NOTE: Ignoring failures given that we are deleting
+	if _, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil {
+		logger.Info("failed to reconcile conditions", "error", err.Error())
+	}
+
 	// Aggregate the operational state of all the machines; while aggregating we are adding the
 	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
 	// However, during delete we are hiding the counter (1 of x) because it does not make sense given that
@@ -469,62 +464,69 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
 	return nil
 }
 
-// reconcileControlPlaneHealth performs health checks for control plane components and etcd
-// It removes any etcd members that do not have a corresponding node.
-// Also, as a final step, checks if there is any machines that is being deleted.
-func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
-	// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
-	if controlPlane.Machines.Len() == 0 {
+// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
+// the status of the etcd cluster.
+func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
+	// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
+	// for updating conditions. Return early.
+	if !controlPlane.KCP.Status.Initialized {
 		return ctrl.Result{}, nil
 	}
 
-	workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
+	workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
 	if err != nil {
-		// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
 		return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
 	}
 
-	errList := []error{}
+	// Update conditions status
+	workloadCluster.UpdateStaticPodConditions(ctx, controlPlane)
+	workloadCluster.UpdateEtcdConditions(ctx, controlPlane)
 
-	// Do a health check of the Control Plane components
-	checkResult, err := workloadCluster.ControlPlaneIsHealthy(ctx)
-	if err != nil {
-		errList = append(errList, errors.Wrap(err, "failed to pass control-plane health check"))
-	} else if err := checkResult.Aggregate(controlPlane); err != nil {
-		r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
-			"Waiting for control plane to pass control plane health check to continue reconciliation: %v", err)
-		errList = append(errList, errors.Wrap(err, "failed to pass control-plane health check"))
+	// Patch machines with the updated conditions.
+	if err := controlPlane.PatchMachines(ctx); err != nil {
+		return ctrl.Result{}, err
 	}
 
-	// If KCP should manage etcd, ensure etcd is healthy.
-	if controlPlane.IsEtcdManaged() {
-		checkResult, err := workloadCluster.EtcdIsHealthy(ctx)
-		if err != nil {
-			errList = append(errList, errors.Wrap(err, "failed to pass etcd health check"))
-		} else if err := checkResult.Aggregate(controlPlane); err != nil {
-			errList = append(errList, errors.Wrap(err, "failed to pass etcd health check"))
-			r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
-				"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)
-			// If there are any etcd members that do not have corresponding nodes, remove them from etcd and from the kubeadm configmap.
-			// This will solve issues related to manual control-plane machine deletion.
-			workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
-			if err != nil {
-				errList = append(errList, errors.Wrap(err, "cannot get remote client to workload cluster"))
-			} else if err := workloadCluster.ReconcileEtcdMembers(ctx); err != nil {
-				errList = append(errList, errors.Wrap(err, "failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation"))
-			}
-		}
+	// KCP will be patched at the end of Reconcile to reflect updated conditions, so we can return now.
+	return ctrl.Result{}, nil
+}
+
+// reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes.
+// This is usually required after a machine deletion.
+//
+// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this.
+func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
+	logger := r.Log.WithValues("namespace", controlPlane.KCP.Namespace, "kubeadmControlPlane", controlPlane.KCP.Name, "cluster", controlPlane.Cluster.Name)
+
+	// If etcd is not managed by KCP this is a no-op.
+	if !controlPlane.IsEtcdManaged() {
+		return ctrl.Result{}, nil
+	}
+
+	// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
+	if controlPlane.Machines.Len() == 0 {
+		return ctrl.Result{}, nil
 	}
 
-	if len(errList) > 0 {
-		return ctrl.Result{}, kerrors.NewAggregate(errList)
+	// Potential inconsistencies between the list of members and the list of machines/nodes are
+	// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
+	if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) {
+		return ctrl.Result{}, nil
 	}
 
-	// We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted.
-	// This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd.
-	// TODO: Revisit during machine remediation implementation which may need to cover other machine phases.
-	if controlPlane.HasDeletingMachine() {
-		return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
+	workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
+	if err != nil {
+		// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
+		return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
+	}
+
+	removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx)
+	if err != nil {
+		return ctrl.Result{}, errors.Wrap(err, "failed attempt to reconcile etcd members")
+	}
+
+	if len(removedMembers) > 0 {
+		logger.Info("Etcd members without nodes removed from the cluster", "members", removedMembers)
 	}
 
 	return ctrl.Result{}, nil

diff --git a/controlplane/kubeadm/controllers/controller_test.go b/controlplane/kubeadm/controllers/controller_test.go
@@ -397,10 +397,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 
 		fmc := &fakeManagementCluster{
 			Machines: internal.FilterableMachineCollection{},
-			Workload: fakeWorkloadCluster{
-				ControlPlaneHealthy: true,
-				EtcdHealthy:         true,
-			},
+			Workload: fakeWorkloadCluster{},
 		}
 		objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
 		for i := 0; i < 3; i++ {
@@ -468,10 +465,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 
 		fmc := &fakeManagementCluster{
 			Machines: internal.FilterableMachineCollection{},
-			Workload: fakeWorkloadCluster{
-				ControlPlaneHealthy: true,
-				EtcdHealthy:         true,
-			},
+			Workload: fakeWorkloadCluster{},
 		}
 		objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
 		for i := 0; i < 3; i++ {
@@ -585,10 +579,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 
 		fmc := &fakeManagementCluster{
 			Machines: internal.FilterableMachineCollection{},
-			Workload: fakeWorkloadCluster{
-				ControlPlaneHealthy: true,
-				EtcdHealthy:         true,
-			},
+			Workload: fakeWorkloadCluster{},
 		}
 		objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
 		for i := 0; i < 3; i++ {
@@ -671,10 +662,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
 					},
 				},
 			},
-			Workload: fakeWorkloadCluster{
-				ControlPlaneHealthy: true,
-				EtcdHealthy:         true,
-			},
+			Workload: fakeWorkloadCluster{},
 		}
 
 		fakeClient := newFakeClient(g, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy(), fmc.Machines["test0"].DeepCopy())
@@ -1187,10 +1175,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
 			Client: fakeClient,
 			managementCluster: &fakeManagementCluster{
 				Management: &internal.Management{Client: fakeClient},
-				Workload: fakeWorkloadCluster{
-					ControlPlaneHealthy: true,
-					EtcdHealthy:         true,
-				},
+				Workload:   fakeWorkloadCluster{},
 			},
 			Log:      log.Log,
 			recorder: record.NewFakeRecorder(32),
@@ -1240,10 +1225,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
 			Client: fakeClient,
 			managementCluster: &fakeManagementCluster{
 				Management: &internal.Management{Client: fakeClient},
-				Workload: fakeWorkloadCluster{
-					ControlPlaneHealthy: true,
-					EtcdHealthy:         true,
-				},
+				Workload:   fakeWorkloadCluster{},
 			},
 			Log:      log.Log,
 			recorder: record.NewFakeRecorder(32),
@@ -1275,10 +1257,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
 			Client: fakeClient,
 			managementCluster: &fakeManagementCluster{
 				Management: &internal.Management{Client: fakeClient},
-				Workload: fakeWorkloadCluster{
-					ControlPlaneHealthy: true,
-					EtcdHealthy:         true,
-				},
+				Workload:   fakeWorkloadCluster{},
 			},
 			recorder: record.NewFakeRecorder(32),
 			Log:      log.Log,
@@ -1394,6 +1373,11 @@ func createClusterWithControlPlane() (*clusterv1.Cluster, *controlplanev1.Kubead
 	return cluster, kcp, genericMachineTemplate
 }
 
+func setKCPHealthy(kcp *controlplanev1.KubeadmControlPlane) {
+	conditions.MarkTrue(kcp, controlplanev1.ControlPlaneComponentsHealthyCondition)
+	conditions.MarkTrue(kcp, controlplanev1.EtcdClusterHealthyCondition)
+}
+
 func createMachineNodePair(name string, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ready bool) (*clusterv1.Machine, *corev1.Node) {
 	machine := &clusterv1.Machine{
 		TypeMeta: metav1.TypeMeta{
@@ -1446,6 +1430,14 @@ func createMachineNodePair(name string, cluster *clusterv1.Cluster, kcp *control
 	return machine, node
 }
 
+func setMachineHealthy(m *clusterv1.Machine) {
+	conditions.MarkTrue(m, controlplanev1.MachineAPIServerPodHealthyCondition)
+	conditions.MarkTrue(m, controlplanev1.MachineControllerManagerPodHealthyCondition)
+	conditions.MarkTrue(m, controlplanev1.MachineSchedulerPodHealthyCondition)
+	conditions.MarkTrue(m, controlplanev1.MachineEtcdPodHealthyCondition)
+	conditions.MarkTrue(m, controlplanev1.MachineEtcdMemberHealthyCondition)
+}
+
 // newCluster return a CAPI cluster object
 func newCluster(namespacedName *types.NamespacedName) *clusterv1.Cluster {
 	return &clusterv1.Cluster{