Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 KCP should update conditions when reconcileHealth is failing #3879

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,20 +304,14 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())

// reconcileControlPlaneHealth returns err if there is a machine being deleted or if the control plane is unhealthy.
// If the control plane is not yet initialized, this call shouldn't fail.
if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
return result, err
}

// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
needRollout := controlPlane.MachinesNeedingRollout()
switch {
case len(needRollout) > 0:
logger.Info("Rolling out Control Plane machines", "needRollout", needRollout.Names())
// NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message
// and this does not influence any reconciliation logic.
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas)
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(controlPlane.Machines)-len(needRollout))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching to len(controlPlane.Machines)-len(needRollout) because it relies on the same information used in this func (instead of relying on a valued computed somewhere else).
But this is a nit, the real fix to the issue is moving reconcileHealth back into ScaleUpControlPane and scaleDownControlPlane.

return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needRollout)
default:
// make sure last upgrade operation is marked as completed.
Expand Down
12 changes: 12 additions & 0 deletions controlplane/kubeadm/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
logger := controlPlane.Logger()

// reconcileControlPlaneHealth returns err if there is a machine being deleted or if the control plane is unhealthy.
// If the control plane is not yet initialized, this call shouldn't fail.
if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
return result, err
}

// Create the bootstrap configuration
bootstrapSpec := controlPlane.JoinControlPlaneConfig()
fd := controlPlane.NextFailureDomainForScaleUp()
Expand All @@ -84,6 +90,12 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
) (ctrl.Result, error) {
logger := controlPlane.Logger()

// reconcileControlPlaneHealth returns err if there is a machine being deleted or if the control plane is unhealthy.
// If the control plane is not yet initialized, this call shouldn't fail.
if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() {
return result, err
}

workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
if err != nil {
logger.Error(err, "Failed to create client to workload cluster")
Expand Down