From 31519bdae328275a37b0f9822fb7abc7b719759d Mon Sep 17 00:00:00 2001 From: Jesse Suen Date: Wed, 24 Feb 2021 04:41:57 -0800 Subject: [PATCH] fix: blue-green rollouts could pause prematurely during prePromotionAnalysis Signed-off-by: Jesse Suen --- Makefile | 2 +- hack/update-manifests.sh | 4 +- .../rollouts/v1alpha1/openapi_generated.go | 8 +- pkg/apis/rollouts/v1alpha1/types.go | 31 ++++---- .../v1alpha1/zz_generated.deepcopy.go | 5 -- .../rollouts/validation/validation_test.go | 2 +- rollout/analysis_test.go | 6 +- rollout/bluegreen.go | 75 +++++++++++++------ rollout/bluegreen_test.go | 9 +-- rollout/canary.go | 2 +- rollout/pause.go | 60 ++++++++------- rollout/service.go | 8 +- test/e2e/analysis_test.go | 14 +++- 13 files changed, 130 insertions(+), 96 deletions(-) diff --git a/Makefile b/Makefile index 9774d19fd2..f5769a2e0d 100644 --- a/Makefile +++ b/Makefile @@ -106,7 +106,7 @@ test-kustomize: .PHONY: start-e2e start-e2e: - go run ./cmd/rollouts-controller/main.go --instance-id ${E2E_INSTANCE_ID} + go run ./cmd/rollouts-controller/main.go --instance-id ${E2E_INSTANCE_ID} --loglevel debug .PHONY: test-e2e test-e2e: diff --git a/hack/update-manifests.sh b/hack/update-manifests.sh index 3fc7d6eb21..95ba51ac6e 100755 --- a/hack/update-manifests.sh +++ b/hack/update-manifests.sh @@ -19,9 +19,9 @@ fi kustomize version echo "${AUTOGENMSG}" > "${SRCROOT}/manifests/install.yaml" -kustomize build --load_restrictor none "${SRCROOT}/manifests/cluster-install" >> "${SRCROOT}/manifests/install.yaml" +kustomize build --load_restrictor none "${SRCROOT}/manifests/cluster-install" || kustomize build --load-restrictor none "${SRCROOT}/manifests/cluster-install">> "${SRCROOT}/manifests/install.yaml" update_image "${SRCROOT}/manifests/install.yaml" echo "${AUTOGENMSG}" > "${SRCROOT}/manifests/namespace-install.yaml" -kustomize build --load_restrictor none "${SRCROOT}/manifests/namespace-install" >> "${SRCROOT}/manifests/namespace-install.yaml" +kustomize build --load_restrictor none "${SRCROOT}/manifests/namespace-install" || kustomize build --load-restrictor none "${SRCROOT}/manifests/namespace-install" >> "${SRCROOT}/manifests/namespace-install.yaml" update_image "${SRCROOT}/manifests/namespace-install.yaml" diff --git a/pkg/apis/rollouts/v1alpha1/openapi_generated.go b/pkg/apis/rollouts/v1alpha1/openapi_generated.go index 98376cab1a..3bc349b15d 100644 --- a/pkg/apis/rollouts/v1alpha1/openapi_generated.go +++ b/pkg/apis/rollouts/v1alpha1/openapi_generated.go @@ -731,7 +731,7 @@ func schema_pkg_apis_rollouts_v1alpha1_BlueGreenStrategy(ref common.ReferenceCal }, "previewReplicaCount": { SchemaProps: spec.SchemaProps{ - Description: "PreviewReplica the number of replicas to run under the preview service before the switchover. Once the rollout is resumed the new replicaset will be full scaled up before the switch occurs", + Description: "PreviewReplicaCount is the number of replicas to run for the preview stack before the switchover. Once the rollout is resumed the desired replicaset will be full scaled up before the switch occurs", Type: []string{"integer"}, Format: "int32", }, @@ -745,14 +745,14 @@ func schema_pkg_apis_rollouts_v1alpha1_BlueGreenStrategy(ref common.ReferenceCal }, "autoPromotionSeconds": { SchemaProps: spec.SchemaProps{ - Description: "AutoPromotionSeconds automatically promotes the current ReplicaSet to active after the specified pause delay in seconds after the ReplicaSet becomes ready. If omitted, the Rollout enters and remains in a paused state until manually resumed by removing the pause condition.", + Description: "AutoPromotionSeconds is a duration in seconds in which to delay auto-promotion (default: 0). The countdown begins after the preview ReplicaSet have reached full availability. This option is ignored if autoPromotionEnabled is set to false.", Type: []string{"integer"}, Format: "int32", }, }, "maxUnavailable": { SchemaProps: spec.SchemaProps{ - Description: "MaxUnavailable The maximum number of pods that can be unavailable during the update. Value can be an absolute number (ex: 5) or a percentage of total pods at the start of update (ex: 10%). Absolute number is calculated from percentage by rounding down. This can not be 0 if MaxSurge is 0. By default, a fixed value of 1 is used. Example: when this is set to 30%, the old RC can be scaled down by 30% immediately when the rolling update starts. Once new pods are ready, old RC can be scaled down further, followed by scaling up the new RC, ensuring that at least 70% of original number of pods are available at all times during the update.", + Description: "MaxUnavailable The maximum number of pods that can be unavailable during a restart operation. Defaults to 25% of total replicas.", Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), }, }, @@ -2991,7 +2991,7 @@ func schema_pkg_apis_rollouts_v1alpha1_RolloutStatus(ref common.ReferenceCallbac }, "controllerPause": { SchemaProps: spec.SchemaProps{ - Description: "ControllerPause indicates the controller has paused the rollout", + Description: "ControllerPause indicates the controller has paused the rollout. It is set to true when the controller adds a pause condition. This field helps to discern the scenario where a rollout was resumed after being paused by the controller (e.g. via the plugin). In that situation, the pauseConditions would have been cleared , but controllerPause would still be set to true.", Type: []string{"boolean"}, Format: "", }, diff --git a/pkg/apis/rollouts/v1alpha1/types.go b/pkg/apis/rollouts/v1alpha1/types.go index 686abcb345..443d2d5de1 100644 --- a/pkg/apis/rollouts/v1alpha1/types.go +++ b/pkg/apis/rollouts/v1alpha1/types.go @@ -96,30 +96,21 @@ type BlueGreenStrategy struct { // Name of the service that the rollout modifies as the preview service. // +optional PreviewService string `json:"previewService,omitempty"` - // PreviewReplica the number of replicas to run under the preview service before the switchover. Once the rollout is - // resumed the new replicaset will be full scaled up before the switch occurs + // PreviewReplicaCount is the number of replicas to run for the preview stack before the + // switchover. Once the rollout is resumed the desired replicaset will be full scaled up before the switch occurs // +optional PreviewReplicaCount *int32 `json:"previewReplicaCount,omitempty"` // AutoPromotionEnabled indicates if the rollout should automatically promote the new ReplicaSet // to the active service or enter a paused state. If not specified, the default value is true. // +optional AutoPromotionEnabled *bool `json:"autoPromotionEnabled,omitempty"` - // AutoPromotionSeconds automatically promotes the current ReplicaSet to active after the - // specified pause delay in seconds after the ReplicaSet becomes ready. - // If omitted, the Rollout enters and remains in a paused state until manually resumed by - // removing the pause condition. + // AutoPromotionSeconds is a duration in seconds in which to delay auto-promotion (default: 0). + // The countdown begins after the preview ReplicaSet have reached full availability. + // This option is ignored if autoPromotionEnabled is set to false. // +optional - AutoPromotionSeconds *int32 `json:"autoPromotionSeconds,omitempty"` - // MaxUnavailable The maximum number of pods that can be unavailable during the update. - // Value can be an absolute number (ex: 5) or a percentage of total pods at the start of update (ex: 10%). - // Absolute number is calculated from percentage by rounding down. - // This can not be 0 if MaxSurge is 0. - // By default, a fixed value of 1 is used. - // Example: when this is set to 30%, the old RC can be scaled down by 30% - // immediately when the rolling update starts. Once new pods are ready, old RC - // can be scaled down further, followed by scaling up the new RC, ensuring - // that at least 70% of original number of pods are available at all times - // during the update. + AutoPromotionSeconds int32 `json:"autoPromotionSeconds,omitempty"` + // MaxUnavailable The maximum number of pods that can be unavailable during a restart operation. + // Defaults to 25% of total replicas. // +optional MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` // ScaleDownDelaySeconds adds a delay before scaling down the previous replicaset. @@ -537,7 +528,11 @@ type RolloutStatus struct { Abort bool `json:"abort,omitempty"` // PauseConditions indicates why the rollout is currently paused PauseConditions []PauseCondition `json:"pauseConditions,omitempty"` - //ControllerPause indicates the controller has paused the rollout + // ControllerPause indicates the controller has paused the rollout. It is set to true when + // the controller adds a pause condition. This field helps to discern the scenario where a + // rollout was resumed after being paused by the controller (e.g. via the plugin). In that + // situation, the pauseConditions would have been cleared , but controllerPause would still be + // set to true. ControllerPause bool `json:"controllerPause,omitempty"` // AbortedAt indicates the controller reconciled an aborted rollout. The controller uses this to understand if // the controller needs to do some specific work when a Rollout is aborted. For example, the reconcileAbort is used diff --git a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go index 3c97992407..16145ab06c 100644 --- a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go @@ -392,11 +392,6 @@ func (in *BlueGreenStrategy) DeepCopyInto(out *BlueGreenStrategy) { *out = new(bool) **out = **in } - if in.AutoPromotionSeconds != nil { - in, out := &in.AutoPromotionSeconds, &out.AutoPromotionSeconds - *out = new(int32) - **out = **in - } if in.MaxUnavailable != nil { in, out := &in.MaxUnavailable, &out.MaxUnavailable *out = new(intstr.IntOrString) diff --git a/pkg/apis/rollouts/validation/validation_test.go b/pkg/apis/rollouts/validation/validation_test.go index 5d1f2e6089..4fd24d0ec2 100644 --- a/pkg/apis/rollouts/validation/validation_test.go +++ b/pkg/apis/rollouts/validation/validation_test.go @@ -105,7 +105,7 @@ func TestValidateRolloutStrategyBlueGreen(t *testing.T) { PreviewService: "service-name", ActiveService: "service-name", ScaleDownDelayRevisionLimit: &scaleDownDelayRevisionLimit, - AutoPromotionSeconds: &autoPromotionSeconds, + AutoPromotionSeconds: autoPromotionSeconds, }, }, }, diff --git a/rollout/analysis_test.go b/rollout/analysis_test.go index 3b24ae5e23..893314a1b3 100644 --- a/rollout/analysis_test.go +++ b/rollout/analysis_test.go @@ -1743,18 +1743,18 @@ func TestRolloutPrePromotionAnalysisHonorAutoPromotionSeconds(t *testing.T) { r1 := newBlueGreenRollout("foo", 1, nil, "active", "") r1.Spec.Strategy.BlueGreen.AutoPromotionEnabled = pointer.BoolPtr(true) r2 := bumpVersion(r1) - r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = pointer.Int32Ptr(10) + r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = 10 r2.Spec.Strategy.BlueGreen.PrePromotionAnalysis = &v1alpha1.RolloutAnalysis{ Templates: []v1alpha1.RolloutAnalysisTemplate{{ TemplateName: at.Name, }}, } ar := analysisRun(at, v1alpha1.RolloutTypePrePromotionLabel, r2) - ar.Status.Phase = v1alpha1.AnalysisPhaseRunning + ar.Status.Phase = v1alpha1.AnalysisPhaseSuccessful r2.Status.BlueGreen.PrePromotionAnalysisRun = ar.Name r2.Status.BlueGreen.PrePromotionAnalysisRunStatus = &v1alpha1.RolloutAnalysisRunStatus{ Name: ar.Name, - Status: v1alpha1.AnalysisPhaseRunning, + Status: v1alpha1.AnalysisPhaseSuccessful, } rs1 := newReplicaSetWithStatus(r1, 1, 1) diff --git a/rollout/bluegreen.go b/rollout/bluegreen.go index 7cdb04cbd7..2b831f95d6 100644 --- a/rollout/bluegreen.go +++ b/rollout/bluegreen.go @@ -51,7 +51,7 @@ func (c *rolloutContext) rolloutBlueGreen() error { c.reconcileBlueGreenPause(activeSvc, previewSvc) - err = c.reconcileActiveService(previewSvc, activeSvc) + err = c.reconcileActiveService(activeSvc) if err != nil { return err } @@ -121,7 +121,8 @@ func (c *rolloutContext) reconcileBlueGreenTemplateChange() bool { return c.rollout.Status.CurrentPodHash != c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] } -func (c *rolloutContext) skipPause(activeSvc *corev1.Service) bool { +// isBlueGreenFastTracked returns true if we should skip the pause step because update has been fast tracked +func (c *rolloutContext) isBlueGreenFastTracked(activeSvc *corev1.Service) bool { if replicasetutil.HasScaleDownDeadline(c.newRS) { c.log.Infof("Detected scale down annotation for ReplicaSet '%s' and will skip pause", c.newRS.Name) return true @@ -129,12 +130,6 @@ func (c *rolloutContext) skipPause(activeSvc *corev1.Service) bool { if c.rollout.Status.PromoteFull { return true } - - // If a rollout has a PrePromotionAnalysis, the controller only skips the pause after the analysis passes - if defaults.GetAutoPromotionEnabledOrDefault(c.rollout) && c.completedPrePromotionAnalysis() { - return true - } - if _, ok := activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]; !ok { return true } @@ -145,7 +140,6 @@ func (c *rolloutContext) skipPause(activeSvc *corev1.Service) bool { } func (c *rolloutContext) reconcileBlueGreenPause(activeSvc, previewSvc *corev1.Service) { - c.log.Info("Reconciling pause") if c.rollout.Status.Abort { return } @@ -155,31 +149,64 @@ func (c *rolloutContext) reconcileBlueGreenPause(activeSvc, previewSvc *corev1.S return } if c.rollout.Spec.Paused { + c.log.Info("rollout has been paused by user") return } - - if c.skipPause(activeSvc) { + if c.isBlueGreenFastTracked(activeSvc) { + c.log.Debug("skipping pause: fast-tracked update") c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) return } - newRSPodHash := c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] - cond := getPauseCondition(c.rollout, v1alpha1.PauseReasonBlueGreenPause) - // If the rollout is not paused and the active service is not point at the newRS, we should pause the rollout. - if cond == nil && !c.rollout.Status.ControllerPause && !c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint && activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] != newRSPodHash { + if activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] == newRSPodHash { + c.log.Debug("skipping pause: desired ReplicaSet already active") + c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + return + } + if c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { + c.log.Debug("skipping pause: scaleUpPreviewCheckPoint passed") + c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + return + } + if !needsBlueGreenControllerPause(c.rollout) { + c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + return + } + + // if we get here, the controller should manage the pause/resume + c.log.Infof("reconciling pause (autoPromotionSeconds: %d)", c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds) + if !c.completedPrePromotionAnalysis() { + c.log.Infof("not ready for pause: prePromotionAnalysis incomplete") + return + } + pauseCond := getPauseCondition(c.rollout, v1alpha1.PauseReasonBlueGreenPause) + if pauseCond == nil && !c.rollout.Status.ControllerPause { + if pauseCond == nil { + c.log.Info("pausing") + } c.pauseContext.AddPauseCondition(v1alpha1.PauseReasonBlueGreenPause) return } - autoPromoteActiveServiceDelaySeconds := c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds - if autoPromoteActiveServiceDelaySeconds != nil && cond != nil { - c.checkEnqueueRolloutDuringWait(cond.StartTime, *autoPromoteActiveServiceDelaySeconds) - switchDeadline := cond.StartTime.Add(time.Duration(*autoPromoteActiveServiceDelaySeconds) * time.Second) - now := metav1.Now() - if now.After(switchDeadline) { - c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + if !c.pauseContext.CompletedBlueGreenPause() { + c.log.Info("pause incomplete") + if c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds > 0 { + c.checkEnqueueRolloutDuringWait(pauseCond.StartTime, c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds) + } + } else { + c.log.Infof("pause completed") + c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + } +} + +// needsBlueGreenControllerPause indicates if the controller should manage the pause status of the blue-green rollout +func needsBlueGreenControllerPause(ro *v1alpha1.Rollout) bool { + if ro.Spec.Strategy.BlueGreen.AutoPromotionEnabled != nil { + if !*ro.Spec.Strategy.BlueGreen.AutoPromotionEnabled { + return true } } + return ro.Spec.Strategy.BlueGreen.AutoPromotionSeconds > 0 } // scaleDownOldReplicaSetsForBlueGreen scales down old replica sets when rollout strategy is "Blue Green". @@ -374,8 +401,8 @@ func (c *rolloutContext) calculateScaleUpPreviewCheckPoint(stableRSHash string) if c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { return c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint } - if !c.completedPrePromotionAnalysis() { - // do not set the checkpoint unless prePromotion was successful + if !c.completedPrePromotionAnalysis() || !c.pauseContext.CompletedBlueGreenPause() { + // do not set the checkpoint unless prePromotionAnalysis was successful and we completed our pause return false } previewCountAvailable := *c.rollout.Spec.Strategy.BlueGreen.PreviewReplicaCount == replicasetutil.GetAvailableReplicaCountForReplicaSets([]*appsv1.ReplicaSet{c.newRS}) diff --git a/rollout/bluegreen_test.go b/rollout/bluegreen_test.go index 7d76ce2847..c9bf45141b 100644 --- a/rollout/bluegreen_test.go +++ b/rollout/bluegreen_test.go @@ -338,9 +338,8 @@ func TestBlueGreenHandlePause(t *testing.T) { defer f.Close() r1 := newBlueGreenRollout("foo", 1, nil, "active", "preview") - r1.Spec.Strategy.BlueGreen.AutoPromotionEnabled = pointer.BoolPtr(false) r2 := bumpVersion(r1) - r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = pointer.Int32Ptr(10) + r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = 10 rs1 := newReplicaSetWithStatus(r1, 1, 1) rs2 := newReplicaSetWithStatus(r2, 1, 1) @@ -373,9 +372,8 @@ func TestBlueGreenHandlePause(t *testing.T) { defer f.Close() r1 := newBlueGreenRollout("foo", 1, nil, "active", "preview") - r1.Spec.Strategy.BlueGreen.AutoPromotionEnabled = pointer.BoolPtr(false) r2 := bumpVersion(r1) - r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = pointer.Int32Ptr(10) + r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = 10 rs1 := newReplicaSetWithStatus(r1, 1, 1) rs2 := newReplicaSetWithStatus(r2, 1, 1) @@ -427,9 +425,8 @@ func TestBlueGreenHandlePause(t *testing.T) { defer f.Close() r1 := newBlueGreenRollout("foo", 1, nil, "active", "preview") - r1.Spec.Strategy.BlueGreen.AutoPromotionEnabled = pointer.BoolPtr(false) r2 := bumpVersion(r1) - r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = pointer.Int32Ptr(10) + r2.Spec.Strategy.BlueGreen.AutoPromotionSeconds = 10 rs1 := newReplicaSetWithStatus(r1, 1, 1) rs2 := newReplicaSetWithStatus(r2, 1, 1) diff --git a/rollout/canary.go b/rollout/canary.go index 861a3b8e5c..ece3bf1b47 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -210,7 +210,7 @@ func (c *rolloutContext) completedCurrentCanaryStep() bool { } switch { case currentStep.Pause != nil: - return c.pauseContext.CompletedPauseStep(*currentStep.Pause) + return c.pauseContext.CompletedCanaryPauseStep(*currentStep.Pause) case currentStep.SetCanaryScale != nil: return replicasetutil.AtDesiredReplicaCountsForCanary(c.rollout, c.newRS, c.stableRS, c.otherRSs) case currentStep.SetWeight != nil: diff --git a/rollout/pause.go b/rollout/pause.go index 1e32d81f62..6335678c88 100644 --- a/rollout/pause.go +++ b/rollout/pause.go @@ -125,53 +125,63 @@ func getPauseCondition(rollout *v1alpha1.Rollout, reason v1alpha1.PauseReason) * return nil } -// completedPrePromotionAnalysis checks if the Pre Promotion Analysis has completed successfully or the rollout passed -// the auto promote seconds. +// completedPrePromotionAnalysis checks if the Pre Promotion Analysis has completed successfully func (c *rolloutContext) completedPrePromotionAnalysis() bool { if c.rollout.Spec.Strategy.BlueGreen == nil || c.rollout.Spec.Strategy.BlueGreen.PrePromotionAnalysis == nil { return true } - - cond := getPauseCondition(c.rollout, v1alpha1.PauseReasonBlueGreenPause) - autoPromoteActiveServiceDelaySeconds := c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds - if autoPromoteActiveServiceDelaySeconds != nil && cond != nil { - switchDeadline := cond.StartTime.Add(time.Duration(*autoPromoteActiveServiceDelaySeconds) * time.Second) - now := metav1.Now() - if now.After(switchDeadline) { - return true - } - return false - } - currentAr := c.currentArs.BlueGreenPrePromotion if currentAr != nil && currentAr.Status.Phase == v1alpha1.AnalysisPhaseSuccessful { return true } - return false } +// CompletedBlueGreenPause returns true if we have already completed our automated pause, either +// because a human has resumed the rollout, or we surpassed autoPromotionSeconds. func (pCtx *pauseContext) CompletedBlueGreenPause() bool { rollout := pCtx.rollout if pCtx.HasAddPause() { + // return false if we just added a pause condition as part of this reconciliation return false } - cond := getPauseCondition(rollout, v1alpha1.PauseReasonBlueGreenPause) - - autoPromoteActiveServiceDelaySeconds := rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds - if autoPromoteActiveServiceDelaySeconds != nil && cond != nil { - switchDeadline := cond.StartTime.Add(time.Duration(*autoPromoteActiveServiceDelaySeconds) * time.Second) - now := metav1.Now() - if now.After(switchDeadline) { - pCtx.log.Info("Rollout has waited the duration of the autoPromoteActiveServiceDelaySeconds") + if rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { + return true + } + if !needsBlueGreenControllerPause(rollout) { + return true + } + pauseCond := getPauseCondition(rollout, v1alpha1.PauseReasonBlueGreenPause) + if rollout.Spec.Strategy.BlueGreen.AutoPromotionEnabled == nil || *rollout.Spec.Strategy.BlueGreen.AutoPromotionEnabled { + // autoPromotion is enabled. check if we surpassed the delay + autoPromotionSeconds := rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds + if autoPromotionSeconds == 0 { + return true + } + if rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { return true } + if pauseCond != nil { + switchDeadline := pauseCond.StartTime.Add(time.Duration(autoPromotionSeconds) * time.Second) + now := metav1.Now() + if now.After(switchDeadline) { + return true + } + return false + } + // we never paused the rollout + return false + } else { + // autoPromotion is disabled. the presence of a pause condition means human has not resumed it + if rollout.Status.ControllerPause { + return pauseCond == nil + } + // status.controllerPause has not yet been set return false } - return cond == nil && (rollout.Status.ControllerPause || rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint) } -func (pCtx *pauseContext) CompletedPauseStep(pause v1alpha1.RolloutPause) bool { +func (pCtx *pauseContext) CompletedCanaryPauseStep(pause v1alpha1.RolloutPause) bool { rollout := pCtx.rollout pauseCondition := getPauseCondition(rollout, v1alpha1.PauseReasonCanaryPauseStep) diff --git a/rollout/service.go b/rollout/service.go index b02a630e5f..0d73ba3528 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -70,8 +70,6 @@ func (c *rolloutContext) reconcilePreviewService(previewSvc *corev1.Service) err if previewSvc == nil { return nil } - c.log.Infof("Reconciling preview service '%s'", previewSvc.Name) - newPodHash := c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] err := c.switchServiceSelector(previewSvc, newPodHash, c.rollout) if err != nil { @@ -81,14 +79,14 @@ func (c *rolloutContext) reconcilePreviewService(previewSvc *corev1.Service) err return nil } -func (c *rolloutContext) reconcileActiveService(previewSvc, activeSvc *corev1.Service) error { +func (c *rolloutContext) reconcileActiveService(activeSvc *corev1.Service) error { if !replicasetutil.ReadyForPause(c.rollout, c.newRS, c.allRSs) || !annotations.IsSaturated(c.rollout, c.newRS) { - c.log.Infof("New RS '%s' is not fully saturated", c.newRS.Name) + c.log.Infof("skipping active service switch: New RS '%s' is not fully saturated", c.newRS.Name) return nil } newPodHash := activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] - if c.skipPause(activeSvc) { + if c.isBlueGreenFastTracked(activeSvc) { newPodHash = c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] } if c.pauseContext.CompletedBlueGreenPause() && c.completedPrePromotionAnalysis() { diff --git a/test/e2e/analysis_test.go b/test/e2e/analysis_test.go index 450f3673d4..73d1ec776e 100644 --- a/test/e2e/analysis_test.go +++ b/test/e2e/analysis_test.go @@ -99,6 +99,9 @@ spec: prePromotionAnalysis: templates: - templateName: sleep-job + args: + - name: duration + value: "5" postPromotionAnalysis: templates: - templateName: sleep-job @@ -149,6 +152,7 @@ spec: WaitForRolloutStatus("Progressing"). WaitForRolloutStatus("Paused"). Then(). + ExpectAnalysisRunCount(1). ExpectActiveRevision("1"). ExpectPreviewRevision("2"). When(). @@ -431,7 +435,7 @@ spec: activeService: bluegreen-kitchensink-active previewService: bluegreen-kitchensink-preview previewReplicaCount: 1 - autoPromotionSeconds: 5 + autoPromotionSeconds: 10 scaleDownDelaySeconds: 5 prePromotionAnalysis: templates: @@ -477,6 +481,14 @@ spec: ExpectReplicaCounts(2, 3, 1, 2, 2). // desired, current, updated, ready, available ExpectAnalysisRunCount(1). When(). + Sleep(5*time.Second). // sleep 5 seconds, verify we did not autopromote too early + Then(). + ExpectActiveRevision("1"). + ExpectStableRevision("1"). + ExpectRevisionPodCount("1", 2). + ExpectRevisionPodCount("2", 1). + ExpectReplicaCounts(2, 3, 1, 2, 2). // desired, current, updated, ready, available + When(). WaitForActiveRevision("2"). // no need to manually promote since autoPromotionSeconds will do it Then(). ExpectRevisionPodCount("2", 2).