From e13abeaed2bd3d7b04abd9c9823e2f4185fa6599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20B=C3=BCringer?= <4662360+sbueringer@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:54:44 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=B1=20Extend=20MS=20ScalingUp=20and=20?= =?UTF-8?q?Remediationg=20conditions=20to=20include=20preflight=20check=20?= =?UTF-8?q?errors=20(#11390)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Extend MS ScalingUp and Remediationg conditions to include preflight check errors Signed-off-by: Stefan Büringer buringerst@vmware.com * Always set message on OwnerRemediated condition Signed-off-by: Stefan Büringer buringerst@vmware.com --------- Signed-off-by: Stefan Büringer buringerst@vmware.com --- .../internal/controllers/remediation.go | 7 +- .../internal/controllers/remediation_test.go | 52 +++++------ .../internal/controllers/status_test.go | 4 +- .../cluster/cluster_controller_status_test.go | 4 +- .../machinedeployment_status_test.go | 4 +- .../machinehealthcheck_controller.go | 7 +- .../machineset/machineset_controller.go | 11 ++- .../machineset_controller_status.go | 29 ++++--- .../machineset_controller_status_test.go | 52 ++++++++++- .../machineset/machineset_controller_test.go | 86 +++++++++++-------- .../machineset/machineset_preflight.go | 15 ++-- .../machineset/machineset_preflight_test.go | 42 +++++---- 12 files changed, 196 insertions(+), 117 deletions(-) diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go index 0aa0063fea8d..48253222768d 100644 --- a/controlplane/kubeadm/internal/controllers/remediation.go +++ b/controlplane/kubeadm/internal/controllers/remediation.go @@ -315,9 +315,10 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") v1beta2conditions.Set(machineToBeRemediated, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }) // Prepare the info for tracking the remediation progress into the RemediationInProgressAnnotation. diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go index 086f43611638..89203037b759 100644 --- a/controlplane/kubeadm/internal/controllers/remediation_test.go +++ b/controlplane/kubeadm/internal/controllers/remediation_test.go @@ -198,7 +198,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m.Namespace, Name: m.Name}, m) g.Expect(err).ToNot(HaveOccurred()) @@ -322,7 +322,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -381,7 +381,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -707,7 +707,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -758,7 +758,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -794,7 +794,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(i - 1)) assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) g.Expect(err).ToNot(HaveOccurred()) @@ -850,7 +850,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -903,7 +903,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -956,7 +956,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1010,7 +1010,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1064,7 +1064,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1163,7 +1163,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1199,7 +1199,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(i - 4)) assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) g.Expect(err).ToNot(HaveOccurred()) @@ -1270,7 +1270,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1306,7 +1306,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(1)) assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1382,7 +1382,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) @@ -1419,7 +1419,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(1)) assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m3.Namespace, Name: m3.Name}, m3) g.Expect(err).ToNot(HaveOccurred()) @@ -1498,8 +1498,8 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") - assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") + assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, "Waiting for remediation") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) @@ -1934,9 +1934,10 @@ func withMachineHealthCheckFailed() machineOption { Reason: clusterv1.MachineHealthCheckNodeDeletedV1Beta2Reason, }) v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } @@ -1952,9 +1953,10 @@ func withStuckRemediation() machineOption { Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, }) v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 8aa919e9e49a..4c671b38e14a 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -514,7 +514,7 @@ func Test_setRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -550,7 +550,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "Machine deletionTimestamp set from Machine m3", }, }, { diff --git a/internal/controllers/cluster/cluster_controller_status_test.go b/internal/controllers/cluster/cluster_controller_status_test.go index e062665c6c49..3bf88c546869 100644 --- a/internal/controllers/cluster/cluster_controller_status_test.go +++ b/internal/controllers/cluster/cluster_controller_status_test.go @@ -1502,7 +1502,7 @@ func TestSetRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -1550,7 +1550,7 @@ func TestSetRemediatingCondition(t *testing.T) { Type: clusterv1.ClusterRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.ClusterRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "Machine deletionTimestamp set from Machine m3", }, }, { diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go index ce10e30cff06..f62da1063c7c 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -884,7 +884,7 @@ func Test_setRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -932,7 +932,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "Machine deletionTimestamp set from Machine m3", }, }, { diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go index 96b8560a8963..28cd2c230a0f 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -477,9 +477,10 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg if ownerRemediatedCondition := v1beta2conditions.Get(t.Machine, clusterv1.MachineOwnerRemediatedV1Beta2Condition); ownerRemediatedCondition == nil || ownerRemediatedCondition.Status == metav1.ConditionTrue { v1beta2conditions.Set(t.Machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index 0c20682441f3..133a99ada951 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -270,6 +270,8 @@ type scope struct { infrastructureObjectNotFound bool getAndAdoptMachinesForMachineSetSucceeded bool owningMachineDeployment *clusterv1.MachineDeployment + remediationPreflightCheckErrMessage string + scaleUpPreflightCheckErrMessage string } type machineSetReconcileFunc func(ctx context.Context, s *scope) (ctrl.Result, error) @@ -602,6 +604,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e // If the error is not nil use that as the message for the condition. preflightCheckErrMessage = err.Error() } + s.scaleUpPreflightCheckErrMessage = preflightCheckErrMessage conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage) return result, err } @@ -1350,6 +1353,7 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( if preflightChecksFailed { // PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with // WaitingForRemediationReason reason. + s.remediationPreflightCheckErrMessage = preflightCheckErrMessage if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, @@ -1375,9 +1379,10 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( // Instead if we set the condition but the deletion does not go through on next reconcile either the // condition will be fixed/updated or the Machine deletion will be retried. if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }, &clusterv1.Condition{ Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionTrue, diff --git a/internal/controllers/machineset/machineset_controller_status.go b/internal/controllers/machineset/machineset_controller_status.go index ac59d79b70ed..a7e8baca8d57 100644 --- a/internal/controllers/machineset/machineset_controller_status.go +++ b/internal/controllers/machineset/machineset_controller_status.go @@ -19,6 +19,7 @@ package machineset import ( "context" "fmt" + "slices" "sort" "strings" "time" @@ -47,7 +48,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // Conditions // Update the ScalingUp and ScalingDown condition. - setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded) + setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessage) setScalingDownCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded) // MachinesReady condition: aggregate the Machine's Ready condition. @@ -59,7 +60,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { machines := collections.FromMachines(s.machines...) machinesToBeRemediated := machines.Filter(collections.IsUnhealthyAndOwnerRemediated) unhealthyMachines := machines.Filter(collections.IsUnhealthy) - setRemediatingCondition(ctx, s.machineSet, machinesToBeRemediated, unhealthyMachines, s.getAndAdoptMachinesForMachineSetSucceeded) + setRemediatingCondition(ctx, s.machineSet, machinesToBeRemediated, unhealthyMachines, s.getAndAdoptMachinesForMachineSetSucceeded, s.remediationPreflightCheckErrMessage) setDeletingCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded) } @@ -92,7 +93,7 @@ func setReplicas(_ context.Context, ms *clusterv1.MachineSet, machines []*cluste ms.Status.V1Beta2.UpToDateReplicas = ptr.To(upToDateReplicas) } -func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool) { +func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessage string) { // If we got unexpected errors in listing the machines (this should never happen), surface them. if !getAndAdoptMachinesForMachineSetSucceeded { v1beta2conditions.Set(ms, metav1.Condition{ @@ -125,7 +126,7 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines if currentReplicas >= desiredReplicas { var message string if missingReferencesMessage != "" { - message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage) + message = fmt.Sprintf("Scaling up would be blocked because %s", missingReferencesMessage) } v1beta2conditions.Set(ms, metav1.Condition{ Type: clusterv1.MachineSetScalingUpV1Beta2Condition, @@ -138,8 +139,11 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines // Scaling up. message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas) - if missingReferencesMessage != "" { - message += fmt.Sprintf(" is blocked %s", missingReferencesMessage) + if missingReferencesMessage != "" || scaleUpPreflightCheckErrMessage != "" { + blockMessages := slices.DeleteFunc([]string{missingReferencesMessage, scaleUpPreflightCheckErrMessage}, func(s string) bool { + return s == "" + }) + message += fmt.Sprintf(" is blocked because %s", strings.Join(blockMessages, " and ")) } v1beta2conditions.Set(ms, metav1.Condition{ Type: clusterv1.MachineSetScalingUpV1Beta2Condition, @@ -281,7 +285,7 @@ func setMachinesUpToDateCondition(ctx context.Context, machineSet *clusterv1.Mac v1beta2conditions.Set(machineSet, *upToDateCondition) } -func setRemediatingCondition(ctx context.Context, machineSet *clusterv1.MachineSet, machinesToBeRemediated, unhealthyMachines collections.Machines, getAndAdoptMachinesForMachineSetSucceeded bool) { +func setRemediatingCondition(ctx context.Context, machineSet *clusterv1.MachineSet, machinesToBeRemediated, unhealthyMachines collections.Machines, getAndAdoptMachinesForMachineSetSucceeded bool, remediationPreflightCheckErrMessage string) { if !getAndAdoptMachinesForMachineSetSucceeded { v1beta2conditions.Set(machineSet, metav1.Condition{ Type: clusterv1.MachineSetRemediatingV1Beta2Condition, @@ -320,11 +324,16 @@ func setRemediatingCondition(ctx context.Context, machineSet *clusterv1.MachineS return } + msg := remediatingCondition.Message + if remediationPreflightCheckErrMessage != "" { + msg = fmt.Sprintf("Triggering further remediations is blocked because %s; %s", remediationPreflightCheckErrMessage, remediatingCondition.Message) + } + v1beta2conditions.Set(machineSet, metav1.Condition{ Type: remediatingCondition.Type, Status: metav1.ConditionTrue, Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, - Message: remediatingCondition.Message, + Message: msg, }) } @@ -383,10 +392,10 @@ func calculateMissingReferencesMessage(ms *clusterv1.MachineSet, bootstrapTempla } if len(missingObjects) == 1 { - return fmt.Sprintf("because %s does not exist", missingObjects[0]) + return fmt.Sprintf("%s does not exist", missingObjects[0]) } - return fmt.Sprintf("because %s do not exist", strings.Join(missingObjects, " and ")) + return fmt.Sprintf("%s do not exist", strings.Join(missingObjects, " and ")) } func aggregateStaleMachines(machines []*clusterv1.Machine) string { diff --git a/internal/controllers/machineset/machineset_controller_status_test.go b/internal/controllers/machineset/machineset_controller_status_test.go index 5ae3277fe2c6..42b2d29d1df4 100644 --- a/internal/controllers/machineset/machineset_controller_status_test.go +++ b/internal/controllers/machineset/machineset_controller_status_test.go @@ -194,6 +194,7 @@ func Test_setScalingUpCondition(t *testing.T) { bootstrapObjectNotFound bool infrastructureObjectNotFound bool getAndAdoptMachinesForMachineSetSucceeded bool + scaleUpPreflightCheckErrMessage string expectCondition metav1.Condition }{ { @@ -299,6 +300,27 @@ func Test_setScalingUpCondition(t *testing.T) { Message: "Scaling up from 0 to 3 replicas is blocked because DockerMachineTemplate does not exist", }, }, + { + name: "scaling up and blocked by bootstrap and infrastructure object and preflight checks", + ms: scalingUpMachineSetWith3Replicas, + bootstrapObjectNotFound: true, + infrastructureObjectNotFound: true, + getAndAdoptMachinesForMachineSetSucceeded: true, + // This preflight check error can happen when a MachineSet is scaling up while the control plane + // already has a newer Kubernetes version. + scaleUpPreflightCheckErrMessage: "MachineSet version (1.25.5) and ControlPlane version (1.26.2) " + + "do not conform to kubeadm version skew policy as kubeadm only supports joining with the same " + + "major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetScalingUpV1Beta2Reason, + Message: "Scaling up from 0 to 3 replicas is blocked because KubeadmBootstrapTemplate and DockerMachineTemplate " + + "do not exist and MachineSet version (1.25.5) and ControlPlane version (1.26.2) " + + "do not conform to kubeadm version skew policy as kubeadm only supports joining with the same " + + "major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", + }, + }, { name: "deleting", ms: deletingMachineSetWith3Replicas, @@ -317,7 +339,7 @@ func Test_setScalingUpCondition(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - setScalingUpCondition(ctx, tt.ms, tt.machines, tt.bootstrapObjectNotFound, tt.infrastructureObjectNotFound, tt.getAndAdoptMachinesForMachineSetSucceeded) + setScalingUpCondition(ctx, tt.ms, tt.machines, tt.bootstrapObjectNotFound, tt.infrastructureObjectNotFound, tt.getAndAdoptMachinesForMachineSetSucceeded, tt.scaleUpPreflightCheckErrMessage) condition := v1beta2conditions.Get(tt.ms, clusterv1.MachineSetScalingUpV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -758,13 +780,15 @@ func Test_setRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} + ownerRemediatedWaitingForRemediationV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, Message: "Waiting for remediation"} tests := []struct { name string machineSet *clusterv1.MachineSet machines []*clusterv1.Machine getAndAdoptMachinesForMachineSetSucceeded bool + remediationPreflightCheckErrMessage string expectCondition metav1.Condition }{ { @@ -806,7 +830,27 @@ func Test_setRemediatingCondition(t *testing.T) { Type: clusterv1.MachineSetRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "Machine deletionTimestamp set from Machine m3", + }, + }, + { + name: "With machines to be remediated by MS and preflight check error", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedV1Beta2)), + fakeMachine("m4", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedWaitingForRemediationV1Beta2)), + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + // This preflight check error can happen when a Machine becomes unhealthy while the control plane is upgrading. + remediationPreflightCheckErrMessage: "KubeadmControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, + Message: "Triggering further remediations is blocked because KubeadmControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed); " + + "Machine deletionTimestamp set from Machine m3; Waiting for remediation from Machine m4", }, }, { @@ -852,7 +896,7 @@ func Test_setRemediatingCondition(t *testing.T) { machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) unHealthyMachines = machines.Filter(collections.IsUnhealthy) } - setRemediatingCondition(ctx, tt.machineSet, machinesToBeRemediated, unHealthyMachines, tt.getAndAdoptMachinesForMachineSetSucceeded) + setRemediatingCondition(ctx, tt.machineSet, machinesToBeRemediated, unHealthyMachines, tt.getAndAdoptMachinesForMachineSetSucceeded, tt.remediationPreflightCheckErrMessage) condition := v1beta2conditions.Get(tt.machineSet, clusterv1.MachineSetRemediatingV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) diff --git a/internal/controllers/machineset/machineset_controller_test.go b/internal/controllers/machineset/machineset_controller_test.go index 0ccd20d9025c..1c9f529cb8f0 100644 --- a/internal/controllers/machineset/machineset_controller_test.go +++ b/internal/controllers/machineset/machineset_controller_test.go @@ -1518,9 +1518,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { V1Beta2: &clusterv1.MachineV1Beta2Status{ Conditions: []metav1.Condition{ { - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1553,9 +1554,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Conditions: []metav1.Condition{ { // This condition should be cleaned up because HealthCheckSucceeded is true. - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1592,9 +1594,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) g.Expect(c).ToNot(BeNil()) g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }, v1beta2conditions.IgnoreLastTransitionTime(true))) // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. @@ -1645,9 +1648,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { V1Beta2: &clusterv1.MachineV1Beta2Status{ Conditions: []metav1.Condition{ { - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1680,9 +1684,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Conditions: []metav1.Condition{ { // This condition should be cleaned up because HealthCheckSucceeded is true. - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1723,11 +1728,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) g.Expect(c).ToNot(BeNil()) g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, - Message: "Machine remediation on hold because GenericControlPlane default/cp1 is upgrading (\"ControlPlaneIsStable\" preflight failed). " + - "The operation will continue after the preflight check(s) pass", + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: "GenericControlPlane default/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", }, v1beta2conditions.IgnoreLastTransitionTime(true))) // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. @@ -1811,9 +1815,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { V1Beta2: &clusterv1.MachineV1Beta2Status{ Conditions: []metav1.Condition{ { - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1846,9 +1851,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Conditions: []metav1.Condition{ { // This condition should be cleaned up because HealthCheckSucceeded is true. - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -1931,9 +1937,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { c = v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) g.Expect(c).ToNot(BeNil()) g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }, v1beta2conditions.IgnoreLastTransitionTime(true))) // Verify (again) the healthy machine is not deleted and does not have the OwnerRemediated condition. @@ -2021,9 +2028,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { V1Beta2: &clusterv1.MachineV1Beta2Status{ Conditions: []metav1.Condition{ { - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -2058,9 +2066,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Conditions: []metav1.Condition{ { // This condition should be cleaned up because HealthCheckSucceeded is true. - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }, { Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, @@ -2197,9 +2206,10 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) g.Expect(c).ToNot(BeNil()) g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }, v1beta2conditions.IgnoreLastTransitionTime(true))) g.Expect(m.DeletionTimestamp).ToNot(BeZero()) diff --git a/internal/controllers/machineset/machineset_preflight.go b/internal/controllers/machineset/machineset_preflight.go index 0b3bb07bf63f..1f446eb96c81 100644 --- a/internal/controllers/machineset/machineset_preflight.go +++ b/internal/controllers/machineset/machineset_preflight.go @@ -136,9 +136,8 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1. for _, v := range preflightCheckErrs { preflightCheckErrStrings = append(preflightCheckErrStrings, *v) } - msg := fmt.Sprintf("%s on hold because %s. The operation will continue after the preflight check(s) pass", action, strings.Join(preflightCheckErrStrings, "; ")) - log.Info(msg) - return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, msg, nil + log.Info(fmt.Sprintf("%s on hold because %s. The operation will continue after the preflight check(s) pass", action, strings.Join(preflightCheckErrStrings, "; "))) + return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, strings.Join(preflightCheckErrStrings, "; "), nil } return ctrl.Result{}, "", nil } @@ -152,7 +151,7 @@ func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if %s %s is provisioning", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, controlPlane.GetKind(), cpKlogRef) } if isProvisioning { - return ptr.To(fmt.Sprintf("%s %s is provisioning (%q preflight failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil + return ptr.To(fmt.Sprintf("%s %s is provisioning (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil } // Check that the control plane is not upgrading. @@ -161,7 +160,7 @@ func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if the %s %s is upgrading", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, controlPlane.GetKind(), cpKlogRef) } if isUpgrading { - return ptr.To(fmt.Sprintf("%s %s is upgrading (%q preflight failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil + return ptr.To(fmt.Sprintf("%s %s is upgrading (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil } return nil, nil @@ -173,7 +172,7 @@ func (r *Reconciler) kubernetesVersionPreflightCheck(cpSemver, msSemver semver.V // => MS minor version cannot be outside of the supported skew. // Kubernetes skew policy: https://kubernetes.io/releases/version-skew-policy/#kubelet if msSemver.Minor > cpSemver.Minor { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (%q preflight failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (%q preflight check failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) } minorSkew := uint64(3) // For Control Planes running Kubernetes < v1.28, the version skew policy for kubelets is two. @@ -181,7 +180,7 @@ func (r *Reconciler) kubernetesVersionPreflightCheck(cpSemver, msSemver semver.V minorSkew = 2 } if msSemver.Minor < cpSemver.Minor-minorSkew { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is more than %d minor versions older than the ControlPlane version (%q preflight failed)", msSemver.String(), cpSemver.String(), minorSkew, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is more than %d minor versions older than the ControlPlane version (%q preflight check failed)", msSemver.String(), cpSemver.String(), minorSkew, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) } return nil @@ -205,7 +204,7 @@ func (r *Reconciler) kubeadmVersionPreflightCheck(cpSemver, msSemver semver.Vers groupVersion.Group == bootstrapv1.GroupVersion.Group if kubeadmBootstrapProviderUsed { if cpSemver.Minor != msSemver.Minor { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (%q preflight failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubeadmVersionSkew)), nil + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (%q preflight check failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubeadmVersionSkew)), nil } } return nil, nil diff --git a/internal/controllers/machineset/machineset_preflight_test.go b/internal/controllers/machineset/machineset_preflight_test.go index 811ddbe49aaa..58aca4af4fb9 100644 --- a/internal/controllers/machineset/machineset_preflight_test.go +++ b/internal/controllers/machineset/machineset_preflight_test.go @@ -69,12 +69,13 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { t.Run("should run preflight checks if the feature gate is enabled", func(t *testing.T) { tests := []struct { - name string - cluster *clusterv1.Cluster - controlPlane *unstructured.Unstructured - machineSet *clusterv1.MachineSet - wantPass bool - wantErr bool + name string + cluster *clusterv1.Cluster + controlPlane *unstructured.Unstructured + machineSet *clusterv1.MachineSet + wantPass bool + wantPreflightCheckErrMessage string + wantErr bool }{ { name: "should pass if cluster has no control plane", @@ -141,9 +142,10 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { ControlPlaneRef: contract.ObjToRef(controlPlaneProvisioning), }, }, - controlPlane: controlPlaneProvisioning, - machineSet: &clusterv1.MachineSet{}, - wantPass: false, + controlPlane: controlPlaneProvisioning, + machineSet: &clusterv1.MachineSet{}, + wantPass: false, + wantPreflightCheckErrMessage: "GenericControlPlane ns1/cp1 is provisioning (\"ControlPlaneIsStable\" preflight check failed)", }, { name: "control plane preflight check: should fail if the control plane is upgrading", @@ -155,9 +157,10 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { ControlPlaneRef: contract.ObjToRef(controlPlaneUpgrading), }, }, - controlPlane: controlPlaneUpgrading, - machineSet: &clusterv1.MachineSet{}, - wantPass: false, + controlPlane: controlPlaneUpgrading, + machineSet: &clusterv1.MachineSet{}, + wantPass: false, + wantPreflightCheckErrMessage: "GenericControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", }, { name: "control plane preflight check: should pass if the control plane is upgrading but the preflight check is skipped", @@ -269,7 +272,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.27.0) and ControlPlane version (1.26.2) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should fail if the machine set minor version is 4 older than control plane minor version for >= v1.28", @@ -294,7 +298,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.24.0) and ControlPlane version (1.28.0) do not conform to the kubernetes version skew policy as MachineSet version is more than 3 minor versions older than the ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should fail if the machine set minor version is 3 older than control plane minor version for < v1.28", @@ -319,7 +324,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.23.0) and ControlPlane version (1.26.2) do not conform to the kubernetes version skew policy as MachineSet version is more than 2 minor versions older than the ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should pass if the machine set minor version is greater than control plane minor version but the preflight check is skipped", @@ -426,7 +432,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.25.5) and ControlPlane version (1.26.2) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", }, { name: "kubeadm version preflight check: should pass if the machine set is not using kubeadm bootstrap provider", @@ -556,13 +563,14 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { r := &Reconciler{ Client: fakeClient, } - result, _, err := r.runPreflightChecks(ctx, tt.cluster, tt.machineSet, "") + result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, tt.cluster, tt.machineSet, "") if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).ToNot(HaveOccurred()) g.Expect(result.IsZero()).To(Equal(tt.wantPass)) } + g.Expect(preflightCheckErrMessage).To(BeComparableTo(tt.wantPreflightCheckErrMessage)) }) } })