From 408bc79f8b1aa12688ccd5f5e2cf8cec68a2960f Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Wed, 10 Aug 2022 15:10:09 -0400 Subject: [PATCH] fix: return an error when we cannot swap the replicaset hashes fixes #2050 Signed-off-by: Jack Andersen --- rollout/canary.go | 6 ++++++ rollout/canary_test.go | 8 +++++++- rollout/service.go | 10 +++++++++- rollout/service_test.go | 3 ++- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/rollout/canary.go b/rollout/canary.go index 87228499ab..acc35e6dce 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -1,6 +1,7 @@ package rollout import ( + "errors" "sort" appsv1 "k8s.io/api/apps/v1" @@ -50,6 +51,11 @@ func (c *rolloutContext) rolloutCanary() error { } if err := c.reconcileStableAndCanaryService(); err != nil { + // if we cannot reconcile the stable and canary services then + // we should not continue to adjust traffic routing + if errors.Is(err, DelayServiceSelectorSwapError) { + return nil + } return err } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 8cc1c3b0df..73370bb593 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1262,7 +1262,13 @@ func TestCanarySVCSelectors(t *testing.T) { informers.Start(stopchan) informers.WaitForCacheSync(stopchan) err := rc.reconcileStableAndCanaryService() - assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + // There is an error returned here because we could not reconcile + // unhealthy services. + if tc.shouldTargetNewRS { + assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + } else { + assert.Error(t, err, "able to reconcileStableAndCanaryService for unhealthy replicas") + } updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) assert.NoError(t, err, "unable to get updated canary service") if tc.shouldTargetNewRS { diff --git a/rollout/service.go b/rollout/service.go index a2861be0c2..8a657efb8d 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -45,6 +45,14 @@ const ( }` ) +type delayServiceSelectorSwapError struct{} + +func (e delayServiceSelectorSwapError) Error() string { + return "Selectors cannot be swapped yet because not all pods are ready" +} + +var DelayServiceSelectorSwapError = delayServiceSelectorSwapError{} + func generatePatch(service *corev1.Service, newRolloutUniqueLabelValue string, r *v1alpha1.Rollout) string { if _, ok := service.Annotations[v1alpha1.ManagedByRolloutsKey]; !ok { return fmt.Sprintf(switchSelectorAndAddManagedByPatch, r.Name, newRolloutUniqueLabelValue) @@ -282,7 +290,7 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet, if checkRsAvailability && !replicasetutil.IsReplicaSetAvailable(rs) { logCtx := c.log.WithField(logutil.ServiceKey, svc.Name) logCtx.Infof("delaying service switch from %s to %s: ReplicaSet not fully available", currSelector, desiredSelector) - return nil + return DelayServiceSelectorSwapError } err = c.switchServiceSelector(svc, desiredSelector, c.rollout) if err != nil { diff --git a/rollout/service_test.go b/rollout/service_test.go index aa4f1d020b..c0e6ad7d5a 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -771,7 +771,8 @@ func TestDelayCanaryStableServiceLabelInjection(t *testing.T) { roCtx.stableRS = newReplicaSetWithStatus(ro2, 3, 0) err = roCtx.reconcileStableAndCanaryService() - assert.NoError(t, err) + // an error is returned because we are delaying + assert.Equal(t, err, DelayServiceSelectorSwapError) _, canaryInjected := canarySvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] assert.False(t, canaryInjected) _, stableInjected := stableSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]