From 81b015d15c942458308c8c7a266e47403ed1d9f5 Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Mon, 27 Feb 2023 19:24:14 -0600 Subject: [PATCH] fix: switch service selector back to stable on canary service when aborted (#2540) * wip Signed-off-by: zachaller * work with both dynamic stable scale on and off Signed-off-by: zachaller * add e2e tests Signed-off-by: zachaller * cleanup comments Signed-off-by: zachaller * cleanup if logic Signed-off-by: zachaller --------- Signed-off-by: zachaller --- rollout/trafficrouting.go | 10 +++++ rollout/trafficrouting_test.go | 74 ++++++++++++++++++++++++++++++++++ test/e2e/canary_test.go | 11 +++++ 3 files changed, 95 insertions(+) diff --git a/rollout/trafficrouting.go b/rollout/trafficrouting.go index 41e2db48be..6064b60dad 100644 --- a/rollout/trafficrouting.go +++ b/rollout/trafficrouting.go @@ -166,6 +166,16 @@ func (c *rolloutContext) reconcileTrafficRouting() error { desiredWeight = minInt(desiredWeight, c.rollout.Status.Canary.Weights.Canary.Weight) } } + + if (c.rollout.Spec.Strategy.Canary.DynamicStableScale && desiredWeight == 0) || !c.rollout.Spec.Strategy.Canary.DynamicStableScale { + // If we are using dynamic stable scale we need to also make sure that desiredWeight=0 aka we are completely + // done with aborting before resetting the canary service selectors back to stable + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, true) + if err != nil { + return err + } + } + err := reconciler.RemoveManagedRoutes() if err != nil { return err diff --git a/rollout/trafficrouting_test.go b/rollout/trafficrouting_test.go index 8ac4a08e67..da82f812a4 100644 --- a/rollout/trafficrouting_test.go +++ b/rollout/trafficrouting_test.go @@ -893,6 +893,7 @@ func TestDynamicScalingDontIncreaseWeightWhenAborted(t *testing.T) { f.objects = append(f.objects, r2) f.expectPatchRolloutAction(r2) + f.expectPatchServiceAction(canarySvc, rs1PodHash) f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler() f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil) @@ -977,3 +978,76 @@ func TestDynamicScalingDecreaseWeightAccordingToStableAvailabilityWhenAborted(t f.fakeTrafficRouting.On("VerifyWeight", mock.Anything).Return(pointer.BoolPtr(true), nil) f.run(getKey(r1, t)) } + +// TestDynamicScalingDecreaseWeightAccordingToStableAvailabilityWhenAbortedAndResetService verifies we decrease the weight +// to the canary depending on the availability of the stable ReplicaSet when aborting and also that at the end of the abort +// we reset the canary service selectors back to the stable service +func TestDynamicScalingDecreaseWeightAccordingToStableAvailabilityWhenAbortedAndResetService(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + SetWeight: pointer.Int32Ptr(50), + }, + { + Pause: &v1alpha1.RolloutPause{}, + }, + } + r1 := newCanaryRollout("foo", 5, nil, steps, pointer.Int32Ptr(1), intstr.FromInt(1), intstr.FromInt(1)) + r1.Spec.Strategy.Canary.DynamicStableScale = true + r1.Spec.Strategy.Canary.TrafficRouting = &v1alpha1.RolloutTrafficRouting{ + SMI: &v1alpha1.SMITrafficRouting{}, + } + r1.Spec.Strategy.Canary.CanaryService = "canary" + r1.Spec.Strategy.Canary.StableService = "stable" + r1.Status.ReadyReplicas = 5 + r1.Status.AvailableReplicas = 5 + r1.Status.Abort = true + r1.Status.AbortedAt = &metav1.Time{Time: time.Now().Add(-1 * time.Minute)} + r2 := bumpVersion(r1) + + rs1 := newReplicaSetWithStatus(r1, 5, 5) + rs2 := newReplicaSetWithStatus(r2, 0, 0) + + rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + rs2PodHash := rs2.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + canarySelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: rs2PodHash} + stableSelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: rs1PodHash} + canarySvc := newService("canary", 80, canarySelector, r1) + stableSvc := newService("stable", 80, stableSelector, r1) + r2.Status.StableRS = rs1PodHash + r2.Status.Canary.Weights = &v1alpha1.TrafficWeights{ + Canary: v1alpha1.WeightDestination{ + Weight: 20, + ServiceName: "canary", + PodTemplateHash: rs2PodHash, + }, + Stable: v1alpha1.WeightDestination{ + Weight: 80, + ServiceName: "stable", + PodTemplateHash: rs1PodHash, + }, + } + + f.kubeobjects = append(f.kubeobjects, rs1, rs2, canarySvc, stableSvc) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + f.expectPatchRolloutAction(r2) + f.expectPatchServiceAction(canarySvc, rs1PodHash) + + f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler() + f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil) + f.fakeTrafficRouting.On("SetWeight", mock.Anything, mock.Anything).Return(func(desiredWeight int32, additionalDestinations ...v1alpha1.WeightDestination) error { + // make sure SetWeight was called with correct value + assert.Equal(t, int32(0), desiredWeight) + return nil + }) + f.fakeTrafficRouting.On("SetHeaderRoute", mock.Anything, mock.Anything).Return(nil) + f.fakeTrafficRouting.On("RemoveManagedRoutes", mock.Anything, mock.Anything).Return(nil) + f.fakeTrafficRouting.On("VerifyWeight", mock.Anything).Return(pointer.BoolPtr(true), nil) + f.run(getKey(r1, t)) +} diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 8732db50da..00b588c598 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -538,6 +538,10 @@ func (s *CanarySuite) TestCanaryScaleDownOnAbort() { WaitForRolloutStatus("Paused"). AbortRollout(). WaitForRolloutStatus("Degraded"). + Then(). + //Expect that the canary service selector has been moved back to stable + ExpectServiceSelector("canary-scaledowndelay-canary", map[string]string{"app": "canary-scaledowndelay", "rollouts-pod-template-hash": "66597877b7"}, false). + When(). Sleep(3*time.Second). Then(). ExpectRevisionPodCount("2", 0) @@ -586,9 +590,16 @@ func (s *CanarySuite) TestCanaryDynamicStableScale() { WaitForRevisionPodCount("2", 1). Then(). ExpectRevisionPodCount("1", 4). + //Assert that the canary service selector is still not set to stable rs because of dynamic stable scale still in progress + Assert(func(t *fixtures.Then) { + canarySvc, stableSvc := t.GetServices() + assert.NotEqual(s.T(), canarySvc.Spec.Selector["rollouts-pod-template-hash"], stableSvc.Spec.Selector["rollouts-pod-template-hash"]) + }). When(). MarkPodsReady("1", 1). // mark last remaining stable pod as ready (4/4 stable are ready) WaitForRevisionPodCount("2", 0). Then(). + //Expect that the canary service selector is now set to stable because of dynamic stable scale is over and we have all pods up on stable rs + ExpectServiceSelector("dynamic-stable-scale-canary", map[string]string{"app": "dynamic-stable-scale", "rollouts-pod-template-hash": "868d98995b"}, false). ExpectRevisionPodCount("1", 4) }