From 8b0215d6009ea700901978293592d397e12890e1 Mon Sep 17 00:00:00 2001 From: Andy Chen Date: Thu, 31 Aug 2023 14:16:35 -0700 Subject: [PATCH] fix(controller): rollback should skip all steps to active rs within RollbackWindow (#2953) * fix(canary): skip steps when in rollback window and rs is still active Resolve #2939 Signed-off-by: Andy Chen * test(canary): add case where rollback when in window and rs is still active Signed-off-by: Andy Chen * test(replicaset): add test case for IsActive function Signed-off-by: Andy Chen --------- Signed-off-by: Andy Chen Co-authored-by: Yohan Belval Co-authored-by: zachaller --- rollout/canary.go | 11 +++++-- rollout/canary_test.go | 49 +++++++++++++++++++++++++++++ utils/replicaset/replicaset.go | 9 ++++++ utils/replicaset/replicaset_test.go | 12 +++++++ utils/time/now.go | 5 +++ 5 files changed, 83 insertions(+), 3 deletions(-) diff --git a/rollout/canary.go b/rollout/canary.go index 7c31506a82..dff4b52d50 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -359,13 +359,18 @@ func (c *rolloutContext) syncRolloutStatusCanary() error { if replicasetutil.PodTemplateOrStepsChanged(c.rollout, c.newRS) { c.resetRolloutStatus(&newStatus) - if c.newRS != nil && c.rollout.Status.StableRS == replicasetutil.GetPodTemplateHash(c.newRS) { - if stepCount > 0 { + if c.newRS != nil && stepCount > 0 { + if c.rollout.Status.StableRS == replicasetutil.GetPodTemplateHash(c.newRS) { // If we get here, we detected that we've moved back to the stable ReplicaSet - c.recorder.Eventf(c.rollout, record.EventOptions{EventReason: "SkipSteps"}, "Rollback to stable") + c.recorder.Eventf(c.rollout, record.EventOptions{EventReason: "SkipSteps"}, "Rollback to stable ReplicaSets") + newStatus.CurrentStepIndex = &stepCount + } else if c.isRollbackWithinWindow() && replicasetutil.IsActive(c.newRS) { + // Else if we get here we detected that we are within the rollback window we can skip steps and move back to the active ReplicaSet + c.recorder.Eventf(c.rollout, record.EventOptions{EventReason: "SkipSteps"}, "Rollback to active ReplicaSets within RollbackWindow") newStatus.CurrentStepIndex = &stepCount } } + newStatus = c.calculateRolloutConditions(newStatus) return c.persistRolloutStatus(&newStatus) } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 1811f6a4c4..9f66cf1078 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -846,6 +846,55 @@ func TestRollBackToStable(t *testing.T) { assert.Equal(t, calculatePatch(r2, expectedPatch), patch) } +func TestRollBackToActiveReplicaSetWithinWindow(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{{ + SetWeight: int32Ptr(10), + }} + + r1 := newCanaryRollout("foo", 1, nil, steps, int32Ptr(0), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + // For the fast rollback to work, we need to: + // 1. Have the previous revision be active (i.e. not scaled down) + // 2. Be in rollback window (within window revisions and previous creation timestamp) + rs1 := newReplicaSetWithStatus(r1, 1, 1) + rs2 := newReplicaSetWithStatus(r2, 1, 1) + r2.Spec.RollbackWindow = &v1alpha1.RollbackWindowSpec{Revisions: 1} + rs1.CreationTimestamp = timeutil.MetaTime(time.Now().Add(-1 * time.Hour)) + rs2.CreationTimestamp = timeutil.MetaNow() + + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + f.serviceLister = append(f.serviceLister) + + // Switch back to version 1 + r2.Spec.Template = r1.Spec.Template + + rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + rs2PodHash := rs2.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + + // Since old replicaset is still active, expect twice the number of replicas + r2 = updateCanaryRolloutStatus(r2, rs2PodHash, 2, 2, 2, false) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + f.expectUpdateReplicaSetAction(rs1) + f.expectUpdateReplicaSetAction(rs1) + rolloutPatchIndex := f.expectPatchRolloutAction(r2) + f.run(getKey(r2, t)) + + expectedStepIndex := len(steps) + patch := f.getPatchedRolloutWithoutConditions(rolloutPatchIndex) + + // Assert current pod hash is the old replicaset and steps were skipped + assert.Regexp(t, fmt.Sprintf(`"currentPodHash":"%s"`, rs1PodHash), patch) + assert.Regexp(t, fmt.Sprintf(`"currentStepIndex":%d`, expectedStepIndex), patch) +} + func TestGradualShiftToNewStable(t *testing.T) { f := newFixture(t) defer f.Close() diff --git a/utils/replicaset/replicaset.go b/utils/replicaset/replicaset.go index 8554cdf94b..9aec161b66 100644 --- a/utils/replicaset/replicaset.go +++ b/utils/replicaset/replicaset.go @@ -341,6 +341,15 @@ func FindActiveOrLatest(newRS *appsv1.ReplicaSet, oldRSs []*appsv1.ReplicaSet) * } } +// IsActive returns if replica set is active (has, or at least ought to have pods). +func IsActive(rs *appsv1.ReplicaSet) bool { + if rs == nil { + return false + } + + return len(controller.FilterActiveReplicaSets([]*appsv1.ReplicaSet{rs})) > 0 +} + // GetReplicaCountForReplicaSets returns the sum of Replicas of the given replica sets. func GetReplicaCountForReplicaSets(replicaSets []*appsv1.ReplicaSet) int32 { totalReplicas := int32(0) diff --git a/utils/replicaset/replicaset_test.go b/utils/replicaset/replicaset_test.go index 40c7d1ae2c..23bf320955 100644 --- a/utils/replicaset/replicaset_test.go +++ b/utils/replicaset/replicaset_test.go @@ -153,6 +153,18 @@ func TestFindOldReplicaSets(t *testing.T) { } } +func TestIsActive(t *testing.T) { + rs1 := generateRS(generateRollout("foo")) + *(rs1.Spec.Replicas) = 1 + + rs2 := generateRS(generateRollout("foo")) + *(rs2.Spec.Replicas) = 0 + + assert.False(t, IsActive(nil)) + assert.True(t, IsActive(&rs1)) + assert.False(t, IsActive(&rs2)) +} + func TestGetReplicaCountForReplicaSets(t *testing.T) { rs1 := generateRS(generateRollout("foo")) *(rs1.Spec.Replicas) = 1 diff --git a/utils/time/now.go b/utils/time/now.go index 4103857811..1b51cb3cc0 100644 --- a/utils/time/now.go +++ b/utils/time/now.go @@ -13,3 +13,8 @@ var Now = time.Now var MetaNow = func() metav1.Time { return metav1.Time{Time: Now()} } + +// MetaTime is a wrapper around metav1.Time and used to override behavior in tests. +var MetaTime = func(time time.Time) metav1.Time { + return metav1.Time{Time: time} +}