From 3757cba041406ad80cecb82793d1510f7fbf8394 Mon Sep 17 00:00:00 2001 From: Danny Thomson Date: Thu, 7 Nov 2019 23:18:01 -0800 Subject: [PATCH] Reset ProgressDeadline on retry --- rollout/canary_test.go | 41 ++++++++++++++++++++++++++++++++++ rollout/controller_test.go | 4 ++++ rollout/sync.go | 5 +++++ utils/conditions/conditions.go | 9 +++++++- 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 4f2366c816..9a188f3296 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -191,6 +191,47 @@ func TestCanaryRolloutUpdatePauseConditionWhilePaused(t *testing.T) { assert.Equal(t, calculatePatch(r2, expectedPatch), patch) } +func TestCanaryRolloutResetProgressDeadlineOnRetry(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + Pause: &v1alpha1.RolloutPause{}, + }, + } + r1 := newCanaryRollout("foo", 10, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + progressingCondition, _ := newProgressingCondition(conditions.RolloutAbortedReason, r2) + conditions.SetRolloutCondition(&r2.Status, progressingCondition) + + rs1 := newReplicaSetWithStatus(r1, 10, 10) + rs2 := newReplicaSetWithStatus(r2, 0, 0) + + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 10, 0, 10, false) + r2.Status.Abort = false + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + addPausedConditionPatch := f.expectPatchRolloutAction(r2) + f.expectPatchRolloutAction(r2) + f.run(getKey(r2, t)) + + patch := f.getPatchedRollout(addPausedConditionPatch) + _, retryCondition := newProgressingCondition(conditions.RolloutRetryReason, r2) + expectedPatch := fmt.Sprintf(`{ + "status": { + "conditions": [%s] + } + }`, retryCondition) + assert.Equal(t, calculatePatch(r2, expectedPatch), patch) +} + func TestCanaryRolloutIncrementStepAfterUnPaused(t *testing.T) { f := newFixture(t) defer f.Close() diff --git a/rollout/controller_test.go b/rollout/controller_test.go index 1406be1a5e..eb5e48cb50 100644 --- a/rollout/controller_test.go +++ b/rollout/controller_test.go @@ -176,6 +176,10 @@ func newProgressingCondition(reason string, resourceObj runtime.Object) (v1alpha msg = fmt.Sprintf(conditions.RolloutAnalysisRunFailedMessage, arName, resource.Name) status = corev1.ConditionFalse } + if reason == conditions.RolloutRetryReason { + msg = conditions.RolloutRetryMessage + status = corev1.ConditionUnknown + } case *corev1.Service: if reason == conditions.ServiceNotFoundReason { msg = fmt.Sprintf(conditions.ServiceNotFoundMessage, resource.Name) diff --git a/rollout/sync.go b/rollout/sync.go index 638d584e11..3310162672 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -441,6 +441,11 @@ func (c *RolloutController) checkPausedConditions(r *v1alpha1.Rollout) error { updatedConditon = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.ResumedRolloutReason, conditions.ResumeRolloutMessage) } + abortCondExists := cond != nil && cond.Reason == conditions.RolloutAbortedReason + if !r.Status.Abort && abortCondExists { + updatedConditon = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.RolloutRetryReason, conditions.RolloutRetryMessage) + } + if updatedConditon == nil { return nil } diff --git a/utils/conditions/conditions.go b/utils/conditions/conditions.go index 8fdde984cf..e2a6e3a709 100644 --- a/utils/conditions/conditions.go +++ b/utils/conditions/conditions.go @@ -85,6 +85,10 @@ const ( RolloutAbortedReason = "RolloutAborted" // RolloutAbortedMessage indicates that the rollout was aborted RolloutAbortedMessage = "Rollout is aborted" + // RolloutRetryReason indicates that the rollout is retrying after being aborted + RolloutRetryReason = "RolloutRetry" + // RolloutRetryMessage indicates that the rollout is retrying after being aborted + RolloutRetryMessage = "Retrying Rollout after abort" // NewRSAvailableReason is added in a rollout when its newest replica set is made available // ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds @@ -402,7 +406,10 @@ func RolloutTimedOut(rollout *v1alpha1.Rollout, newStatus *v1alpha1.RolloutStatu // If it's already set with a TimedOutReason reason, we have already timed out, no need to check // again. condition := GetRolloutCondition(*newStatus, v1alpha1.RolloutProgressing) - if condition == nil { + // When a rollout is retried, the controller should not evaluate for a timeout based on the + // aborted condition because the abort could have happened a while back and the rollout should + // not enter degraded as a result of that + if condition == nil || condition.Reason == RolloutAbortedReason { return false }