Skip to content

Commit

Permalink
Add KCP conditions, split reconcileHealth into preflight and reconcil…
Browse files Browse the repository at this point in the history
…eEtcdMembers, make both use conditions
  • Loading branch information
fabriziopandini committed Nov 10, 2020
1 parent db6cc2a commit e400d47
Show file tree
Hide file tree
Showing 19 changed files with 2,240 additions and 1,000 deletions.
66 changes: 66 additions & 0 deletions controlplane/kubeadm/api/v1alpha3/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,69 @@ const (
// ScalingDownReason (Severity=Info) documents a KubeadmControlPlane that is decreasing the number of replicas.
ScalingDownReason = "ScalingDown"
)

const (
// ControlPlaneComponentsHealthyCondition reports the overall status of control plane components
// implemented as static pods generated by kubeadm including kube-api-server, kube-controller manager,
// kube-scheduler and etcd if managed.
ControlPlaneComponentsHealthyCondition clusterv1.ConditionType = "ControlPlaneComponentsHealthy"

// ControlPlaneComponentsUnhealthyReason (Severity=Error) documents a control plane component not healthy.
ControlPlaneComponentsUnhealthyReason = "ControlPlaneComponentsUnhealthy"

// ControlPlaneComponentsUnknownReason reports a control plane component in unknown status.
ControlPlaneComponentsUnknownReason = "ControlPlaneComponentsUnknown"

// ControlPlaneComponentsInspectionFailedReason documents a failure in inspecting the control plane component status.
ControlPlaneComponentsInspectionFailedReason = "ControlPlaneComponentsInspectionFailed"

// MachineAPIServerPodHealthyCondition reports a machine's kube-apiserver's operational status.
MachineAPIServerPodHealthyCondition clusterv1.ConditionType = "APIServerPodHealthy"

// MachineControllerManagerPodHealthyCondition reports a machine's kube-controller-manager's health status.
MachineControllerManagerPodHealthyCondition clusterv1.ConditionType = "ControllerManagerPodHealthy"

// MachineSchedulerPodHealthyCondition reports a machine's kube-scheduler's operational status.
MachineSchedulerPodHealthyCondition clusterv1.ConditionType = "SchedulerPodHealthy"

// MachineEtcdPodHealthyCondition reports a machine's etcd pod's operational status.
// NOTE: This conditions exists only if a stacked etcd cluster is used.
MachineEtcdPodHealthyCondition clusterv1.ConditionType = "EtcdPodHealthy"

// PodProvisioningReason (Severity=Info) documents a pod waiting to be provisioned i.e., Pod is in "Pending" phase.
PodProvisioningReason = "PodProvisioning"

// PodMissingReason (Severity=Error) documents a pod does not exist.
PodMissingReason = "PodMissing"

// PodFailedReason (Severity=Error) documents if a pod failed during provisioning i.e., e.g CrashLoopbackOff, ImagePullBackOff
// or if all the containers in a pod have terminated.
PodFailedReason = "PodFailed"

// PodInspectionFailedReason documents a failure in inspecting the pod status.
PodInspectionFailedReason = "PodInspectionFailed"
)

const (
// EtcdClusterHealthyCondition documents the overall etcd cluster's health.
EtcdClusterHealthyCondition clusterv1.ConditionType = "EtcdClusterHealthyCondition"

// EtcdClusterInspectionFailedReason documents a failure in inspecting the etcd cluster status.
EtcdClusterInspectionFailedReason = "EtcdClusterInspectionFailed"

// EtcdClusterUnknownReason reports an etcd cluster in unknown status.
EtcdClusterUnknownReason = "EtcdClusterUnknown"

// EtcdClusterUnhealthyReason (Severity=Error) is set when the etcd cluster is unhealthy.
EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy"

// MachineEtcdMemberHealthyCondition report the machine's etcd member's health status.
// NOTE: This conditions exists only if a stacked etcd cluster is used.
MachineEtcdMemberHealthyCondition clusterv1.ConditionType = "EtcdMemberHealthy"

// EtcdMemberInspectionFailedReason documents a failure in inspecting the etcd member status.
EtcdMemberInspectionFailedReason = "MemberInspectionFailed"

// EtcdMemberUnhealthyReason (Severity=Error) documents a Machine's etcd member is unhealthy.
EtcdMemberUnhealthyReason = "EtcdMemberUnhealthy"
)
6 changes: 3 additions & 3 deletions controlplane/kubeadm/controllers/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ const (
// all control plane machines have been deleted.
deleteRequeueAfter = 30 * time.Second

// healthCheckFailedRequeueAfter is how long to wait before trying to scale
// up/down if some target cluster health check has failed
healthCheckFailedRequeueAfter = 20 * time.Second
// preflightFailedRequeueAfter is how long to wait before trying to scale
// up/down if some preflight check for those operation has failed
preflightFailedRequeueAfter = 15 * time.Second

// dependentCertRequeueAfter is how long to wait before checking again to see if
// dependent certificates have been created.
Expand Down
140 changes: 71 additions & 69 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,15 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())

// reconcileControlPlaneHealth returns err if there is a machine being deleted or if the control plane is unhealthy.
// If the control plane is not yet initialized, this call shouldn't fail.
if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
// Updates conditions reporting the status of static pods and the status of the etcd cluster.
// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() {
return result, err
}

// Ensures the number of etcd members is in sync with the number of machines/nodes.
// NOTE: This is usually required after a machine deletion.
if result, err := r.reconcileEtcdMembers(ctx, controlPlane); err != nil || !result.IsZero() {
return result, err
}

Expand All @@ -315,9 +321,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
switch {
case len(needRollout) > 0:
logger.Info("Rolling out Control Plane machines", "needRollout", needRollout.Names())
// NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message
// and this does not influence any reconciliation logic.
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas)
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(controlPlane.Machines)-len(needRollout))
return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needRollout)
default:
// make sure last upgrade operation is marked as completed.
Expand Down Expand Up @@ -384,27 +388,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu
logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name)
logger.Info("Reconcile KubeadmControlPlane deletion")

// Ignore the health check results here as well as the errors, health check functions are to set health related conditions on Machines.
// Errors may be due to not being able to get workload cluster nodes.
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
if err != nil {
r.Log.V(2).Info("Cannot get remote client to workload cluster during delete reconciliation", "err", err.Error())
} else {
// Do a health check of the Control Plane components
_, err = workloadCluster.ControlPlaneIsHealthy(ctx)
if err != nil {
// Do nothing
r.Log.V(2).Info("Control plane did not pass control plane health check during delete reconciliation", "err", err.Error())
}

// Do a health check of the etcd
_, err = workloadCluster.EtcdIsHealthy(ctx)
if err != nil {
// Do nothing
r.Log.V(2).Info("Control plane did not pass etcd health check during delete reconciliation", "err", err.Error())
}
}

// Gets all machines, not just control plane machines.
allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster))
if err != nil {
Expand All @@ -418,6 +401,18 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu
return ctrl.Result{}, nil
}

controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines)
if err != nil {
logger.Error(err, "failed to initialize control plane")
return ctrl.Result{}, err
}

// Updates conditions reporting the status of static pods and the status of the etcd cluster.
// NOTE: Ignoring failures given that we are deleting
if _, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil {
logger.Info("failed to reconcile conditions", "error", err.Error())
}

// Aggregate the operational state of all the machines; while aggregating we are adding the
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
// However, during delete we are hiding the counter (1 of x) because it does not make sense given that
Expand Down Expand Up @@ -469,62 +464,69 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
return nil
}

// reconcileControlPlaneHealth performs health checks for control plane components and etcd
// It removes any etcd members that do not have a corresponding node.
// Also, as a final step, checks if there is any machines that is being deleted.
func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
if controlPlane.Machines.Len() == 0 {
// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
// the status of the etcd cluster.
func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
// for updating conditions. Return early.
if !controlPlane.KCP.Status.Initialized {
return ctrl.Result{}, nil
}

workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
if err != nil {
// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
}

errList := []error{}
// Update conditions status
workloadCluster.UpdateStaticPodConditions(ctx, controlPlane)
workloadCluster.UpdateEtcdConditions(ctx, controlPlane)

// Do a health check of the Control Plane components
checkResult, err := workloadCluster.ControlPlaneIsHealthy(ctx)
if err != nil {
errList = append(errList, errors.Wrap(err, "failed to pass control-plane health check"))
} else if err := checkResult.Aggregate(controlPlane); err != nil {
r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
"Waiting for control plane to pass control plane health check to continue reconciliation: %v", err)
errList = append(errList, errors.Wrap(err, "failed to pass control-plane health check"))
// Patch machines with the updated conditions.
if err := controlPlane.PatchMachines(ctx); err != nil {
return ctrl.Result{}, err
}

// If KCP should manage etcd, ensure etcd is healthy.
if controlPlane.IsEtcdManaged() {
checkResult, err := workloadCluster.EtcdIsHealthy(ctx)
if err != nil {
errList = append(errList, errors.Wrap(err, "failed to pass etcd health check"))
} else if err := checkResult.Aggregate(controlPlane); err != nil {
errList = append(errList, errors.Wrap(err, "failed to pass etcd health check"))
r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy",
"Waiting for control plane to pass etcd health check to continue reconciliation: %v", err)
// If there are any etcd members that do not have corresponding nodes, remove them from etcd and from the kubeadm configmap.
// This will solve issues related to manual control-plane machine deletion.
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
if err != nil {
errList = append(errList, errors.Wrap(err, "cannot get remote client to workload cluster"))
} else if err := workloadCluster.ReconcileEtcdMembers(ctx); err != nil {
errList = append(errList, errors.Wrap(err, "failed attempt to remove potential hanging etcd members to pass etcd health check to continue reconciliation"))
}
}
// KCP will be patched at the end of Reconcile to reflect updated conditions, so we can return now.
return ctrl.Result{}, nil
}

// reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes.
// This is usually required after a machine deletion.
//
// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this.
func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
logger := r.Log.WithValues("namespace", controlPlane.KCP.Namespace, "kubeadmControlPlane", controlPlane.KCP.Name, "cluster", controlPlane.Cluster.Name)

// If etcd is not managed by KCP this is a no-op.
if !controlPlane.IsEtcdManaged() {
return ctrl.Result{}, nil
}

// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
if controlPlane.Machines.Len() == 0 {
return ctrl.Result{}, nil
}

if len(errList) > 0 {
return ctrl.Result{}, kerrors.NewAggregate(errList)
// Potential inconsistencies between the list of members and the list of machines/nodes are
// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) {
return ctrl.Result{}, nil
}

// We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted.
// This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd.
// TODO: Revisit during machine remediation implementation which may need to cover other machine phases.
if controlPlane.HasDeletingMachine() {
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
if err != nil {
// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
}

removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed attempt to reconcile etcd members")
}

if len(removedMembers) > 0 {
logger.Info("Etcd members without nodes removed from the cluster", "members", removedMembers)
}

return ctrl.Result{}, nil
Expand Down
48 changes: 20 additions & 28 deletions controlplane/kubeadm/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {

fmc := &fakeManagementCluster{
Machines: internal.FilterableMachineCollection{},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
}
objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
for i := 0; i < 3; i++ {
Expand Down Expand Up @@ -468,10 +465,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {

fmc := &fakeManagementCluster{
Machines: internal.FilterableMachineCollection{},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
}
objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
for i := 0; i < 3; i++ {
Expand Down Expand Up @@ -585,10 +579,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {

fmc := &fakeManagementCluster{
Machines: internal.FilterableMachineCollection{},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
}
objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()}
for i := 0; i < 3; i++ {
Expand Down Expand Up @@ -671,10 +662,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) {
},
},
},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
}

fakeClient := newFakeClient(g, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy(), fmc.Machines["test0"].DeepCopy())
Expand Down Expand Up @@ -1187,10 +1175,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Client: fakeClient,
managementCluster: &fakeManagementCluster{
Management: &internal.Management{Client: fakeClient},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
},
Log: log.Log,
recorder: record.NewFakeRecorder(32),
Expand Down Expand Up @@ -1240,10 +1225,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Client: fakeClient,
managementCluster: &fakeManagementCluster{
Management: &internal.Management{Client: fakeClient},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
},
Log: log.Log,
recorder: record.NewFakeRecorder(32),
Expand Down Expand Up @@ -1275,10 +1257,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Client: fakeClient,
managementCluster: &fakeManagementCluster{
Management: &internal.Management{Client: fakeClient},
Workload: fakeWorkloadCluster{
ControlPlaneHealthy: true,
EtcdHealthy: true,
},
Workload: fakeWorkloadCluster{},
},
recorder: record.NewFakeRecorder(32),
Log: log.Log,
Expand Down Expand Up @@ -1394,6 +1373,11 @@ func createClusterWithControlPlane() (*clusterv1.Cluster, *controlplanev1.Kubead
return cluster, kcp, genericMachineTemplate
}

func setKCPHealthy(kcp *controlplanev1.KubeadmControlPlane) {
conditions.MarkTrue(kcp, controlplanev1.ControlPlaneComponentsHealthyCondition)
conditions.MarkTrue(kcp, controlplanev1.EtcdClusterHealthyCondition)
}

func createMachineNodePair(name string, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ready bool) (*clusterv1.Machine, *corev1.Node) {
machine := &clusterv1.Machine{
TypeMeta: metav1.TypeMeta{
Expand Down Expand Up @@ -1446,6 +1430,14 @@ func createMachineNodePair(name string, cluster *clusterv1.Cluster, kcp *control
return machine, node
}

func setMachineHealthy(m *clusterv1.Machine) {
conditions.MarkTrue(m, controlplanev1.MachineAPIServerPodHealthyCondition)
conditions.MarkTrue(m, controlplanev1.MachineControllerManagerPodHealthyCondition)
conditions.MarkTrue(m, controlplanev1.MachineSchedulerPodHealthyCondition)
conditions.MarkTrue(m, controlplanev1.MachineEtcdPodHealthyCondition)
conditions.MarkTrue(m, controlplanev1.MachineEtcdMemberHealthyCondition)
}

// newCluster return a CAPI cluster object
func newCluster(namespacedName *types.NamespacedName) *clusterv1.Cluster {
return &clusterv1.Cluster{
Expand Down
Loading

0 comments on commit e400d47

Please sign in to comment.