Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuvaraj Kakaraparthi committed Jun 2, 2023
1 parent e1a1105 commit 8b4a0ea
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 68 deletions.
58 changes: 27 additions & 31 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, cluster *clusterv1.Cluste
// If the error is not nil use that as the message for the condition.
preflightCheckErrMessage = err.Error()
}
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityWarning, preflightCheckErrMessage)
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
return result, err
}

Expand Down Expand Up @@ -978,36 +978,14 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, cluster *cl
return ctrl.Result{}, nil
}

preflightChecksPassed := false
preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine Remediation")
preflightChecksPassed = preflightChecksResult.IsZero() && err == nil
if err != nil {
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessage = err.Error()
}

var aggregateErr error
if preflightChecksPassed {
// PreflightChecks passed, so it is safe to remediate unhealthy machines.
// Remediate unhealthy machines by deleting them.
var errs []error
for _, m := range machinesToRemediate {
log.Info(fmt.Sprintf("Deleting Machine %s because marked as unhealthy by the MachineHealthCheck controller", klog.KObj(m)))
patch := client.MergeFrom(m.DeepCopy())
if err := r.Client.Delete(ctx, m); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
continue
}
conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition)
if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) {
errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m)))
}
}
aggregateErr = kerrors.NewAggregate(errs)
if aggregateErr != nil {
log.Info("Failed while deleting unhealthy machines", "err", aggregateErr)
}
} else {
preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
if preflightChecksFailed {
// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
// WaitingForRemediationReason reason.
var errs []error
Expand All @@ -1022,16 +1000,34 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, cluster *cl
errs = append(errs, errors.Wrapf(err, "failed to patch Machine %s", klog.KObj(m)))
}
}
aggregateErr = kerrors.NewAggregate(errs)
if aggregateErr != nil {
log.Info("Failed while patching unhealthy machines", "err", aggregateErr)

if len(errs) > 0 {
return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to patch unhealthy Machines")
}
return preflightChecksResult, nil
}

// PreflightChecks passed, so it is safe to remediate unhealthy machines.
// Remediate unhealthy machines by deleting them.
var errs []error
for _, m := range machinesToRemediate {
log.Info(fmt.Sprintf("Deleting Machine %s because it was marked as unhealthy by the MachineHealthCheck controller", klog.KObj(m)))
patch := client.MergeFrom(m.DeepCopy())
if err := r.Client.Delete(ctx, m); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
continue
}
conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition)
if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) {
errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m)))
}
}

if aggregateErr != nil {
return ctrl.Result{}, aggregateErr
if len(errs) > 0 {
return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines")
}
return preflightChecksResult, nil

return ctrl.Result{}, nil
}

func reconcileExternalTemplateReference(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, ref *corev1.ObjectReference) error {
Expand Down
72 changes: 35 additions & 37 deletions internal/controllers/machineset/machineset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1379,16 +1379,12 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) {
_, err := r.reconcileUnhealthyMachines(ctx, cluster, machineSet, machines)
g.Expect(err).To(BeNil())
// Verify the unhealthy machine is deleted.
g.Eventually(func(g Gomega) {
m := &clusterv1.Machine{}
err := r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)
g.Expect(apierrors.IsNotFound(err)).To(BeTrue())
}).Should(Succeed())
m := &clusterv1.Machine{}
err = r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)
g.Expect(apierrors.IsNotFound(err)).To(BeTrue())
// Verify the healthy machine is not deleted.
g.Consistently(func(g Gomega) {
m := &clusterv1.Machine{}
g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).Should(Succeed())
}).Should(Succeed())
m = &clusterv1.Machine{}
g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).Should(Succeed())
})

t.Run("should update the unhealthy machine MachineOwnerRemediated condition if preflight checks did not pass", func(t *testing.T) {
Expand Down Expand Up @@ -1440,21 +1436,24 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) {
r := &Reconciler{Client: fakeClient}
_, err := r.reconcileUnhealthyMachines(ctx, cluster, machineSet, machines)
g.Expect(err).To(BeNil())

// Verify the unhealthy machine has the updated condition.
g.Eventually(func(g Gomega) {
m := &clusterv1.Machine{}
g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)).To(Succeed())
g.Expect(conditions.Has(m, clusterv1.MachineOwnerRemediatedCondition)).
To(BeTrue(), "Machine should have the %s condition set",
clusterv1.MachineOwnerRemediatedCondition)
machineOwnerRemediatedCondition := conditions.Get(m, clusterv1.MachineOwnerRemediatedCondition)
g.Expect(machineOwnerRemediatedCondition.Status).
To(Equal(corev1.ConditionFalse), "%s condition status should be false",
clusterv1.MachineOwnerRemediatedCondition)
g.Expect(machineOwnerRemediatedCondition.Reason).
To(Equal(clusterv1.WaitingForRemediationReason), "%s condition should have reason %s",
clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason)
}).Should(Succeed())
condition := clusterv1.MachineOwnerRemediatedCondition
m := &clusterv1.Machine{}
g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)).To(Succeed())
g.Expect(conditions.Has(m, condition)).
To(BeTrue(), "Machine should have the %s condition set", condition)
machineOwnerRemediatedCondition := conditions.Get(m, condition)
g.Expect(machineOwnerRemediatedCondition.Status).
To(Equal(corev1.ConditionFalse), "%s condition status should be false", condition)
g.Expect(machineOwnerRemediatedCondition.Reason).
To(Equal(clusterv1.WaitingForRemediationReason), "%s condition should have reason %s", condition, clusterv1.WaitingForRemediationReason)

// Verify the healthy machine continues to not have the MachineOwnerRemediated condition.
m = &clusterv1.Machine{}
g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed())
g.Expect(conditions.Has(m, condition)).
To(BeFalse(), "Machine should not have the %s condition set", condition)
})
}

Expand Down Expand Up @@ -1494,24 +1493,23 @@ func TestMachineSetReconciler_syncReplicas(t *testing.T) {
r := &Reconciler{Client: fakeClient}
result, err := r.syncReplicas(ctx, cluster, machineSet, nil)
g.Expect(err).To(BeNil())
g.Expect(result.IsZero()).To(BeFalse(), "syncReplicas should return a 'zero' result")
g.Expect(result.IsZero()).To(BeFalse(), "syncReplicas should not return a 'zero' result")

// Verify the proper condition is set on the MachineSet.
g.Expect(conditions.Has(machineSet, clusterv1.MachinesCreatedCondition)).
To(BeTrue(), "MachineSet should have the %s condition set",
clusterv1.MachinesCreatedCondition)
machinesCreatedCondition := conditions.Get(machineSet, clusterv1.MachinesCreatedCondition)
condition := clusterv1.MachinesCreatedCondition
g.Expect(conditions.Has(machineSet, condition)).
To(BeTrue(), "MachineSet should have the %s condition set", condition)
machinesCreatedCondition := conditions.Get(machineSet, condition)
g.Expect(machinesCreatedCondition.Status).
To(Equal(corev1.ConditionFalse), "%s condition status should be %s",
clusterv1.MachinesCreatedCondition, corev1.ConditionFalse)
To(Equal(corev1.ConditionFalse), "%s condition status should be %s", condition, corev1.ConditionFalse)
g.Expect(machinesCreatedCondition.Reason).
To(Equal(clusterv1.PreflightCheckFailedReason), "%s condition reason should be %s",
clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason)
To(Equal(clusterv1.PreflightCheckFailedReason), "%s condition reason should be %s", condition, clusterv1.PreflightCheckFailedReason)

// Verify no new Machines are created.
g.Consistently(func(g Gomega) {
machineList := &clusterv1.MachineList{}
g.Expect(r.Client.List(ctx, machineList)).To(Succeed())
g.Expect(machineList.Items).To(BeEmpty(), "There should not be any machines")
})
machineList := &clusterv1.MachineList{}
g.Expect(r.Client.List(ctx, machineList)).To(Succeed())
g.Expect(machineList.Items).To(BeEmpty(), "There should not be any machines")

})
}

Expand Down

0 comments on commit 8b4a0ea

Please sign in to comment.