Skip to content

Commit

Permalink
Merge pull request #3495 from fabriziopandini/fix-mhc-condition-const
Browse files Browse the repository at this point in the history
🌱 Fix MHC conditions const
  • Loading branch information
k8s-ci-robot authored Aug 18, 2020
2 parents 3881021 + 70eafa2 commit 3ae0afb
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 16 deletions.
20 changes: 10 additions & 10 deletions api/v1alpha3/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,24 @@ const (
// In the event that the health check fails it will be set to False.
MachineHealthCheckSuccededCondition ConditionType = "HealthCheckSucceeded"

// MachineHasFailure is the reason used when a machine has either a FailureReason or a FailureMessage set on its status.
MachineHasFailure = "MachineHasFailure"
// MachineHasFailureReason is the reason used when a machine has either a FailureReason or a FailureMessage set on its status.
MachineHasFailureReason = "MachineHasFailure"

// NodeNotFound is the reason used when a machine's node has previously been observed but is now gone.
NodeNotFound = "NodeNotFound"
// NodeNotFoundReason is the reason used when a machine's node has previously been observed but is now gone.
NodeNotFoundReason = "NodeNotFound"

// NodeStartupTimeout is the reason used when a machine's node does not appear within the specified timeout.
NodeStartupTimeout = "NodeStartupTimeout"
// NodeStartupTimeoutReason is the reason used when a machine's node does not appear within the specified timeout.
NodeStartupTimeoutReason = "NodeStartupTimeout"

// UnhealthyNodeCondition is the reason used when a machine's node has one of the MachineHealthCheck's unhealthy conditions.
UnhealthyNodeCondition = "UnhealthyNode"
// UnhealthyNodeConditionReason is the reason used when a machine's node has one of the MachineHealthCheck's unhealthy conditions.
UnhealthyNodeConditionReason = "UnhealthyNode"
)

const (
// MachineOwnerRemediatedCondition is set on machines that have failed a healthcheck by the MachineHealthCheck controller.
// MachineOwnerRemediatedCondition is set to False after a health check fails, but should be changed to True by the owning controller after remediation succeeds.
MachineOwnerRemediatedCondition ConditionType = "OwnerRemediated"

// WaitingForRemediation is the reason used when a machine fails a health check and remediation is needed.
WaitingForRemediation = "WaitingForRemediation"
// WaitingForRemediationReason is the reason used when a machine fails a health check and remediation is needed.
WaitingForRemediationReason = "WaitingForRemediation"
)
2 changes: 1 addition & 1 deletion controllers/machinehealthcheck_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func (r *MachineHealthCheckReconciler) reconcile(ctx context.Context, logger log
logger.Info("Machine has failed health check, but machine is paused so skipping remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
} else {
logger.Info("Target has failed health check, marking for remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
conditions.MarkFalse(t.Machine, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediation, clusterv1.ConditionSeverityWarning, "MachineHealthCheck failed")
conditions.MarkFalse(t.Machine, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "MachineHealthCheck failed")
}
if err := t.patchHelper.Patch(ctx, t.Machine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "Failed to patch unhealthy machine status for machine %q", t.Machine.Name)
Expand Down
10 changes: 5 additions & 5 deletions controllers/machinehealthcheck_targets.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,21 +96,21 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
now := time.Now()

if t.Machine.Status.FailureReason != nil {
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.MachineHasFailure, clusterv1.ConditionSeverityWarning, "FailureReason: %v", t.Machine.Status.FailureReason)
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "FailureReason: %v", t.Machine.Status.FailureReason)
logger.V(3).Info("Target is unhealthy", "failureReason", t.Machine.Status.FailureReason)
return true, time.Duration(0)
}

if t.Machine.Status.FailureMessage != nil {
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.MachineHasFailure, clusterv1.ConditionSeverityWarning, "FailureMessage: %v", t.Machine.Status.FailureMessage)
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "FailureMessage: %v", t.Machine.Status.FailureMessage)
logger.V(3).Info("Target is unhealthy", "failureMessage", t.Machine.Status.FailureMessage)
return true, time.Duration(0)
}

// the node does not exist
if t.nodeMissing {
logger.V(3).Info("Target is unhealthy: node is missing")
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeNotFound, clusterv1.ConditionSeverityWarning, "")
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeNotFoundReason, clusterv1.ConditionSeverityWarning, "")
return true, time.Duration(0)
}

Expand All @@ -121,7 +121,7 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
return false, timeoutForMachineToHaveNode
}
if t.Machine.Status.LastUpdated.Add(timeoutForMachineToHaveNode).Before(now) {
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeout, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String())
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeoutReason, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String())
logger.V(3).Info("Target is unhealthy: machine has no node", "duration", timeoutForMachineToHaveNode.String())
return true, time.Duration(0)
}
Expand All @@ -143,7 +143,7 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
// If the condition has been in the unhealthy state for longer than the
// timeout, return true with no requeue time.
if nodeCondition.LastTransitionTime.Add(c.Timeout.Duration).Before(now) {
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.UnhealthyNodeCondition, clusterv1.ConditionSeverityWarning, "Condition %s on node is reporting status %s for more than %s", c.Type, c.Status, c.Timeout.Duration.String())
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.UnhealthyNodeConditionReason, clusterv1.ConditionSeverityWarning, "Condition %s on node is reporting status %s for more than %s", c.Type, c.Status, c.Timeout.Duration.String())
logger.V(3).Info("Target is unhealthy: condition is in state longer than allowed timeout", "condition", c.Type, "state", c.Status, "timeout", c.Timeout.Duration.String())
return true, time.Duration(0)
}
Expand Down

0 comments on commit 3ae0afb

Please sign in to comment.