diff --git a/manifests/crds/rollout-crd.yaml b/manifests/crds/rollout-crd.yaml index 68411afed9..d3985f8fda 100644 --- a/manifests/crds/rollout-crd.yaml +++ b/manifests/crds/rollout-crd.yaml @@ -2680,6 +2680,8 @@ spec: - type type: object type: array + controllerPause: + type: boolean currentPodHash: type: string currentStepHash: @@ -2689,6 +2691,19 @@ spec: type: integer observedGeneration: type: string + pauseConditions: + items: + properties: + reason: + type: string + startTime: + format: date-time + type: string + required: + - reason + - startTime + type: object + type: array pauseStartTime: format: date-time type: string diff --git a/manifests/install.yaml b/manifests/install.yaml index a165369a2a..c6269b85dc 100644 --- a/manifests/install.yaml +++ b/manifests/install.yaml @@ -10354,6 +10354,8 @@ spec: - type type: object type: array + controllerPause: + type: boolean currentPodHash: type: string currentStepHash: @@ -10363,6 +10365,19 @@ spec: type: integer observedGeneration: type: string + pauseConditions: + items: + properties: + reason: + type: string + startTime: + format: date-time + type: string + required: + - reason + - startTime + type: object + type: array pauseStartTime: format: date-time type: string diff --git a/manifests/namespace-install.yaml b/manifests/namespace-install.yaml index 4bc229b0ff..79d35f04b5 100644 --- a/manifests/namespace-install.yaml +++ b/manifests/namespace-install.yaml @@ -10354,6 +10354,8 @@ spec: - type type: object type: array + controllerPause: + type: boolean currentPodHash: type: string currentStepHash: @@ -10363,6 +10365,19 @@ spec: type: integer observedGeneration: type: string + pauseConditions: + items: + properties: + reason: + type: string + startTime: + format: date-time + type: string + required: + - reason + - startTime + type: object + type: array pauseStartTime: format: date-time type: string diff --git a/pkg/apis/rollouts/v1alpha1/openapi_generated.go b/pkg/apis/rollouts/v1alpha1/openapi_generated.go index c91b8a13cc..cb90b402d6 100644 --- a/pkg/apis/rollouts/v1alpha1/openapi_generated.go +++ b/pkg/apis/rollouts/v1alpha1/openapi_generated.go @@ -54,6 +54,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.Metric": schema_pkg_apis_rollouts_v1alpha1_Metric(ref), "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.MetricProvider": schema_pkg_apis_rollouts_v1alpha1_MetricProvider(ref), "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.MetricResult": schema_pkg_apis_rollouts_v1alpha1_MetricResult(ref), + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.PauseCondition": schema_pkg_apis_rollouts_v1alpha1_PauseCondition(ref), "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.PodTemplateMetadata": schema_pkg_apis_rollouts_v1alpha1_PodTemplateMetadata(ref), "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.PrometheusMetric": schema_pkg_apis_rollouts_v1alpha1_PrometheusMetric(ref), "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.Rollout": schema_pkg_apis_rollouts_v1alpha1_Rollout(ref), @@ -1238,6 +1239,32 @@ func schema_pkg_apis_rollouts_v1alpha1_MetricResult(ref common.ReferenceCallback } } +func schema_pkg_apis_rollouts_v1alpha1_PauseCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "PauseCondition the reason for a pause and when it started", + Properties: map[string]spec.Schema{ + "reason": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "startTime": { + SchemaProps: spec.SchemaProps{ + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), + }, + }, + }, + Required: []string{"reason", "startTime"}, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, + } +} + func schema_pkg_apis_rollouts_v1alpha1_PodTemplateMetadata(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -1657,6 +1684,26 @@ func schema_pkg_apis_rollouts_v1alpha1_RolloutStatus(ref common.ReferenceCallbac SchemaProps: spec.SchemaProps{ Description: "RolloutStatus is the status for a Rollout resource", Properties: map[string]spec.Schema{ + "pauseConditions": { + SchemaProps: spec.SchemaProps{ + Description: "PauseConditions indicates why the rollout is currently paused", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.PauseCondition"), + }, + }, + }, + }, + }, + "controllerPause": { + SchemaProps: spec.SchemaProps{ + Description: "ControllerPause indicates the controller has paused the rollout", + Type: []string{"boolean"}, + Format: "", + }, + }, "currentPodHash": { SchemaProps: spec.SchemaProps{ Description: "CurrentPodHash the hash of the current pod template", @@ -1769,7 +1816,7 @@ func schema_pkg_apis_rollouts_v1alpha1_RolloutStatus(ref common.ReferenceCallbac }, }, Dependencies: []string{ - "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.BlueGreenStatus", "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.CanaryStatus", "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.RolloutCondition", "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.BlueGreenStatus", "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.CanaryStatus", "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.PauseCondition", "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1.RolloutCondition", "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, } } diff --git a/pkg/apis/rollouts/v1alpha1/types.go b/pkg/apis/rollouts/v1alpha1/types.go index 24a853ff60..2ac26f9253 100644 --- a/pkg/apis/rollouts/v1alpha1/types.go +++ b/pkg/apis/rollouts/v1alpha1/types.go @@ -254,8 +254,30 @@ type RolloutPause struct { Duration *int32 `json:"duration,omitempty"` } +// PauseReason reasons that the rollout can pause +type PauseReason string + +const ( + // PauseReasonInconclusiveAnalysis pauses rollout when rollout has an inconclusive analysis run + PauseReasonInconclusiveAnalysis PauseReason = "InconclusiveAnalysisRun" + // PauseReasonCanaryPauseStep pause rollout for canary pause step + PauseReasonCanaryPauseStep PauseReason = "CanaryPauseStep" + // PauseReasonBlueGreenPause pause rollout before promoting rollout + PauseReasonBlueGreenPause PauseReason = "BlueGreenPause" +) + +// PauseCondition the reason for a pause and when it started +type PauseCondition struct { + Reason PauseReason `json:"reason"` + StartTime metav1.Time `json:"startTime"` +} + // RolloutStatus is the status for a Rollout resource type RolloutStatus struct { + // PauseConditions indicates why the rollout is currently paused + PauseConditions []PauseCondition `json:"pauseConditions,omitempty"` + //ControllerPause indicates the controller has paused the rollout + ControllerPause bool `json:"controllerPause,omitempty"` // CurrentPodHash the hash of the current pod template // +optional CurrentPodHash string `json:"currentPodHash,omitempty"` diff --git a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go index 828f12d8d6..81a6865fca 100644 --- a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go @@ -709,6 +709,23 @@ func (in *MetricResult) DeepCopy() *MetricResult { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PauseCondition) DeepCopyInto(out *PauseCondition) { + *out = *in + in.StartTime.DeepCopyInto(&out.StartTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PauseCondition. +func (in *PauseCondition) DeepCopy() *PauseCondition { + if in == nil { + return nil + } + out := new(PauseCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodTemplateMetadata) DeepCopyInto(out *PodTemplateMetadata) { *out = *in @@ -959,6 +976,13 @@ func (in *RolloutSpec) DeepCopy() *RolloutSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RolloutStatus) DeepCopyInto(out *RolloutStatus) { *out = *in + if in.PauseConditions != nil { + in, out := &in.PauseConditions, &out.PauseConditions + *out = make([]PauseCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.CurrentStepIndex != nil { in, out := &in.CurrentStepIndex, &out.CurrentStepIndex *out = new(int32) diff --git a/pkg/kubectl-argo-rollouts/cmd/resume/resume.go b/pkg/kubectl-argo-rollouts/cmd/resume/resume.go index 632c3e7eaf..a73493da31 100644 --- a/pkg/kubectl-argo-rollouts/cmd/resume/resume.go +++ b/pkg/kubectl-argo-rollouts/cmd/resume/resume.go @@ -14,6 +14,14 @@ const ( # Resume a rollout %[1]s resume guestbook ` + unpausePatch = `{ + "spec": { + "paused": false + }, + "status: { + "pauseConditions": null + } +}` ) // NewCmdResume returns a new instance of an `rollouts resume` command @@ -29,7 +37,7 @@ func NewCmdResume(o *options.ArgoRolloutsOptions) *cobra.Command { } rolloutIf := o.RolloutsClientset().ArgoprojV1alpha1().Rollouts(o.Namespace()) for _, name := range args { - ro, err := rolloutIf.Patch(name, types.MergePatchType, []byte(`{"spec":{"paused":false}}`)) + ro, err := rolloutIf.Patch(name, types.MergePatchType, []byte(unpausePatch)) if err != nil { return err } diff --git a/pkg/kubectl-argo-rollouts/cmd/resume/resume_test.go b/pkg/kubectl-argo-rollouts/cmd/resume/resume_test.go index d670a745e3..ce6f370ef4 100644 --- a/pkg/kubectl-argo-rollouts/cmd/resume/resume_test.go +++ b/pkg/kubectl-argo-rollouts/cmd/resume/resume_test.go @@ -38,6 +38,12 @@ func TestResumeCmdSuccess(t *testing.T) { Spec: v1alpha1.RolloutSpec{ Paused: true, }, + Status: v1alpha1.RolloutStatus{ + PauseConditions: []v1alpha1.PauseCondition{{ + Reason: v1alpha1.PauseReasonCanaryPauseStep, + }}, + ControllerPause: true, + }, } tf, o := options.NewFakeArgoRolloutsOptions(&ro) @@ -46,7 +52,8 @@ func TestResumeCmdSuccess(t *testing.T) { fakeClient.ReactionChain = nil fakeClient.AddReactor("patch", "*", func(action kubetesting.Action) (handled bool, ret runtime.Object, err error) { if patchAction, ok := action.(kubetesting.PatchAction); ok { - if string(patchAction.GetPatch()) == `{"spec":{"paused":false}}` { + if string(patchAction.GetPatch()) == unpausePatch { + ro.Status.PauseConditions = nil ro.Spec.Paused = false } } @@ -59,7 +66,9 @@ func TestResumeCmdSuccess(t *testing.T) { err := cmd.Execute() assert.Nil(t, err) + assert.Nil(t, ro.Status.PauseConditions) assert.False(t, ro.Spec.Paused) + assert.True(t, ro.Status.ControllerPause) stdout := o.Out.(*bytes.Buffer).String() stderr := o.ErrOut.(*bytes.Buffer).String() assert.Equal(t, stdout, "rollout 'guestbook' resumed\n") diff --git a/rollout/analysis.go b/rollout/analysis.go index 7151f4249c..164068eec3 100644 --- a/rollout/analysis.go +++ b/rollout/analysis.go @@ -44,7 +44,7 @@ func (c *RolloutController) getAnalysisRunsForRollout(rollout *v1alpha1.Rollout) func (c *RolloutController) reconcileAnalysisRuns(roCtx *canaryContext) error { rollout := roCtx.Rollout() otherArs := roCtx.OtherAnalysisRuns() - if rollout.Spec.Paused { + if len(rollout.Status.PauseConditions) > 0 { return nil } newCurrentAnalysisRuns := []*v1alpha1.AnalysisRun{} @@ -147,6 +147,10 @@ func (c *RolloutController) reconcileStepBasedAnalysisRun(roCtx *canaryContext) return currentAr, err } + if currentAr.Status != nil && currentAr.Status.Status == v1alpha1.AnalysisStatusInconclusive { + roCtx.PauseContext().AddPauseCondition(v1alpha1.PauseReasonInconclusiveAnalysis) + } + return currentAr, nil } diff --git a/rollout/analysis_test.go b/rollout/analysis_test.go index d757a2f250..96a6a63740 100644 --- a/rollout/analysis_test.go +++ b/rollout/analysis_test.go @@ -542,20 +542,21 @@ func TestPausedStepAfterInconclusiveAnalysisRun(t *testing.T) { patch := f.getPatchedRollout(patchIndex) now := metav1.Now().UTC().Format(time.RFC3339) expectedPatch := `{ - "spec":{ - "paused": true - }, "status": { "conditions": %s, "canary": { "currentStepAnalysisRun": null }, - "pauseStartTime": "%s" + "pauseConditions": [{ + "reason": "%s", + "startTime": "%s" + }], + "controllerPause": true } }` condition := generateConditionsPatch(true, conditions.ReplicaSetUpdatedReason, r2, false) - assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, condition, now)), patch) + assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, condition, v1alpha1.PauseReasonInconclusiveAnalysis, now)), patch) } func TestErrorConditionAfterErrorAnalysisRun(t *testing.T) { diff --git a/rollout/bluegreen.go b/rollout/bluegreen.go index 37b142212d..d902016a29 100644 --- a/rollout/bluegreen.go +++ b/rollout/bluegreen.go @@ -32,8 +32,9 @@ func (c *RolloutController) rolloutBlueGreen(r *v1alpha1.Rollout, rsList []*apps logCtx := roCtx.Log() allRSs := roCtx.AllRSs() if reconcileBlueGreenTemplateChange(roCtx) { + roCtx.PauseContext().ClearPauseConditions() logCtx.Infof("New pod template or template change detected") - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } // Scale up, if we can. @@ -61,44 +62,44 @@ func (c *RolloutController) rolloutBlueGreen(r *v1alpha1.Rollout, rsList []*apps if scaledUp { logCtx.Infof("Not finished reconciling new ReplicaSet '%s'", newRS.Name) - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } if scaledDown { logCtx.Info("Not finished reconciling old replica sets") - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } if switchPreviewSvc { logCtx.Infof("Not finished reconciling preview service' %s'", previewSvc.Name) - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } if !replicasetutil.ReadyForPause(r, newRS, allRSs) { logutil.WithRollout(r).Infof("New RS '%s' is not fully saturated", newRS.Name) - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } noFastRollback := true if newRS != nil { - _, ok := newRS.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey] - noFastRollback = !ok - if !noFastRollback { - logCtx.Infof("Detected '%s' annotation and will skip pause", newRS.Name) + _, hasScaleDownDeadlineAnnotationKey := newRS.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey] + if hasScaleDownDeadlineAnnotationKey { + logCtx.Infof("Detected scale down annotation for ReplicaSet '%s' and will skip pause", newRS.Name) + noFastRollback = false } } - if !defaults.GetAutoPromotionEnabledOrDefault(r) && noFastRollback { + if noFastRollback { logCtx.Info("Reconciling pause") pauseBeforeSwitchActive := c.reconcileBlueGreenPause(activeSvc, previewSvc, roCtx) if pauseBeforeSwitchActive { logCtx.Info("Not finished reconciling pause") - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, true) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } } logCtx.Infof("Reconciling active service '%s'", activeSvc.Name) if !annotations.IsSaturated(r, newRS) { logCtx.Infof("New RS '%s' is not fully saturated", newRS.Name) - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } switchActiveSvc, err := c.reconcileActiveService(roCtx, previewSvc, activeSvc) if err != nil { @@ -106,7 +107,7 @@ func (c *RolloutController) rolloutBlueGreen(r *v1alpha1.Rollout, rsList []*apps } if switchActiveSvc { logCtx.Infof("Not finished reconciling active service '%s'", activeSvc.Name) - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } if _, ok := newRS.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]; ok { @@ -118,7 +119,7 @@ func (c *RolloutController) rolloutBlueGreen(r *v1alpha1.Rollout, rsList []*apps } } - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, false) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } // reconcileBlueGreenTemplateChange returns true if we detect there was a change in the pod template @@ -134,24 +135,37 @@ func reconcileBlueGreenTemplateChange(roCtx *blueGreenContext) bool { func (c *RolloutController) reconcileBlueGreenPause(activeSvc, previewSvc *corev1.Service, roCtx *blueGreenContext) bool { rollout := roCtx.Rollout() + + if defaults.GetAutoPromotionEnabledOrDefault(rollout) { + return false + } + newRS := roCtx.NewRS() newRSPodHash := newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] if _, ok := activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]; !ok { return false } + + cond := roCtx.PauseContext().GetPauseCondition(v1alpha1.PauseReasonBlueGreenPause) // If the rollout is not paused and the active service is not point at the newRS, we should pause the rollout. - if !rollout.Spec.Paused && rollout.Status.PauseStartTime == nil && !rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint && activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] != newRSPodHash { + if cond == nil && !rollout.Status.ControllerPause && !rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint && activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] != newRSPodHash { + roCtx.PauseContext().AddPauseCondition(v1alpha1.PauseReasonBlueGreenPause) return true } - pauseStartTime := rollout.Status.PauseStartTime autoPromoteActiveServiceDelaySeconds := rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds - if autoPromoteActiveServiceDelaySeconds != nil && pauseStartTime != nil { - c.checkEnqueueRolloutDuringWait(rollout, *pauseStartTime, *autoPromoteActiveServiceDelaySeconds) + if autoPromoteActiveServiceDelaySeconds != nil && cond != nil { + c.checkEnqueueRolloutDuringWait(rollout, cond.StartTime, *autoPromoteActiveServiceDelaySeconds) + switchDeadline := cond.StartTime.Add(time.Duration(*autoPromoteActiveServiceDelaySeconds) * time.Second) + now := metav1.Now() + if now.After(switchDeadline) { + roCtx.PauseContext().RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause) + } + } - return rollout.Spec.Paused && pauseStartTime != nil + return cond != nil && rollout.Status.ControllerPause } // scaleDownOldReplicaSetsForBlueGreen scales down old replica sets when rollout strategy is "Blue Green". @@ -200,7 +214,7 @@ func (c *RolloutController) scaleDownOldReplicaSetsForBlueGreen(oldRSs []*appsv1 return totalScaledDown, nil } -func (c *RolloutController) syncRolloutStatusBlueGreen(previewSvc *corev1.Service, activeSvc *corev1.Service, roCtx *blueGreenContext, addPause bool) error { +func (c *RolloutController) syncRolloutStatusBlueGreen(previewSvc *corev1.Service, activeSvc *corev1.Service, roCtx *blueGreenContext) error { r := roCtx.Rollout() newRS := roCtx.NewRS() oldRSs := roCtx.OlderRSs() @@ -236,11 +250,10 @@ func (c *RolloutController) syncRolloutStatusBlueGreen(previewSvc *corev1.Servic newStatus.Selector = metav1.FormatLabelSelector(r.Spec.Selector) } - pauseStartTime, paused := calculatePauseStatus(roCtx, addPause) - newStatus.PauseStartTime = pauseStartTime newStatus.BlueGreen.ScaleUpPreviewCheckPoint = calculateScaleUpPreviewCheckPoint(roCtx, activeRS) + newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, &paused) + return c.persistRolloutStatus(roCtx, &newStatus) } func calculateScaleUpPreviewCheckPoint(roCtx *blueGreenContext, activeRS *appsv1.ReplicaSet) bool { diff --git a/rollout/bluegreen_test.go b/rollout/bluegreen_test.go index 5a81011651..42435bea6a 100644 --- a/rollout/bluegreen_test.go +++ b/rollout/bluegreen_test.go @@ -157,14 +157,16 @@ func TestBlueGreenHandlePause(t *testing.T) { f.run(getKey(r2, t)) expectedPatch := `{ - "spec": { - "paused": true - }, "status": { - "pauseStartTime": "%s" + "pauseConditions": [{ + "reason": "%s", + "startTime": "%s" + }], + "controllerPause": true } }` - assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, metav1.Now().UTC().Format(time.RFC3339))), patch) + now := metav1.Now().UTC().Format(time.RFC3339) + assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, v1alpha1.PauseReasonBlueGreenPause, now)), patch) }) @@ -295,7 +297,8 @@ func TestBlueGreenHandlePause(t *testing.T) { r2 = updateBlueGreenRolloutStatus(r2, rs2PodHash, rs1PodHash, 1, 1, 2, 1, true, true) now := metav1.Now() before := metav1.NewTime(now.Add(-1 * time.Minute)) - r2.Status.PauseStartTime = &before + r2.Status.PauseConditions[0].StartTime = before + r2.Status.ControllerPause = true pausedCondition, _ := newProgressingCondition(conditions.PausedRolloutReason, rs2) conditions.SetRolloutCondition(&r2.Status, pausedCondition) @@ -311,11 +314,9 @@ func TestBlueGreenHandlePause(t *testing.T) { f.serviceLister = append(f.serviceLister, activeSvc, previewSvc) expectedPatchWithoutSubs := `{ - "spec": { - "paused": null - }, "status": { - "pauseStartTime": null + "pauseConditions": null, + "controllerPause": null } }` expectedPatch := calculatePatch(r2, expectedPatchWithoutSubs) @@ -403,14 +404,15 @@ func TestBlueGreenHandlePause(t *testing.T) { now := metav1.Now().UTC().Format(time.RFC3339) expectedPatchWithoutSubs := `{ - "spec": { - "paused": true - }, "status": { - "pauseStartTime": "%s" + "pauseConditions": [{ + "reason":"%s", + "startTime": "%s" + }], + "controllerPause": true } }` - expectedPatch := calculatePatch(r2, fmt.Sprintf(expectedPatchWithoutSubs, now)) + expectedPatch := calculatePatch(r2, fmt.Sprintf(expectedPatchWithoutSubs, v1alpha1.PauseReasonBlueGreenPause, now)) patchIndex := f.expectPatchRolloutActionWithPatch(r2, expectedPatch) f.run(getKey(r2, t)) @@ -477,8 +479,7 @@ func TestBlueGreenHandlePause(t *testing.T) { r2.Spec.Strategy.BlueGreen.ScaleDownDelaySeconds = pointer.Int32Ptr(10) r2 = updateBlueGreenRolloutStatus(r2, rs2PodHash, rs1PodHash, 1, 1, 2, 1, false, true) - now := metav1.Now() - r2.Status.PauseStartTime = &now + r2.Status.ControllerPause = true pausedCondition, _ := newProgressingCondition(conditions.PausedRolloutReason, rs2) conditions.SetRolloutCondition(&r2.Status, pausedCondition) @@ -516,7 +517,7 @@ func TestBlueGreenHandlePause(t *testing.T) { "blueGreen": { "activeSelector": "%s" }, - "pauseStartTime": null, + "controllerPause":null, "conditions": %s, "selector": "%s" } @@ -661,7 +662,7 @@ func TestBlueGreenRolloutStatusHPAStatusFieldsNoActiveSelector(t *testing.T) { c, _, _ := f.newController(noResyncPeriodFunc) - err := c.syncRolloutStatusBlueGreen(nil, activeSvc, roCtx, false) + err := c.syncRolloutStatusBlueGreen(nil, activeSvc, roCtx) assert.Nil(t, err) assert.Len(t, f.client.Actions(), 1) result := f.client.Actions()[0].(core.PatchAction).GetPatch() diff --git a/rollout/canary.go b/rollout/canary.go index 79b5d9231f..c83512840d 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -119,14 +119,22 @@ func (c *RolloutController) reconcileCanaryPause(roCtx *canaryContext) bool { if currentStep.Pause == nil { return false } - - if currentStep.Pause.Duration == nil { + cond := roCtx.PauseContext().GetPauseCondition(v1alpha1.PauseReasonCanaryPauseStep) + if cond == nil { + // When the pause condition is null, that means the rollout is in an not paused state. + // As a result,, the controller needs to detect whether a rollout was unpaused or the + // rollout needs to be paused for the first time. If the ControllerPause is false, + // the controller has not paused the rollout yet and needs to do so before it + // can proceed. + if !rollout.Status.ControllerPause { + roCtx.PauseContext().AddPauseCondition(v1alpha1.PauseReasonCanaryPauseStep) + } return true } - if rollout.Status.PauseStartTime == nil { + if currentStep.Pause.Duration == nil { return true } - c.checkEnqueueRolloutDuringWait(rollout, *rollout.Status.PauseStartTime, *currentStep.Pause.Duration) + c.checkEnqueueRolloutDuringWait(rollout, cond.StartTime, *currentStep.Pause.Duration) return true } @@ -206,7 +214,7 @@ func completedCurrentCanaryStep(roCtx *canaryContext) bool { return false } if currentStep.Pause != nil { - return completedPauseStep(r, *currentStep.Pause) + return roCtx.PauseContext().CompletedPauseStep(*currentStep.Pause) } if currentStep.SetWeight != nil && replicasetutil.AtDesiredReplicaCountsForCanary(r, roCtx.NewRS(), roCtx.StableRS(), roCtx.OlderRSs()) { logCtx.Info("Rollout has reached the desired state for the correct weight") @@ -239,7 +247,7 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error newStatus.HPAReplicas = replicasetutil.GetActualReplicaCountForReplicaSets(allRSs) newStatus.Selector = metav1.FormatLabelSelector(r.Spec.Selector) - currentStep, currentStepIndex := replicasetutil.GetCurrentCanaryStep(r) + _, currentStepIndex := replicasetutil.GetCurrentCanaryStep(r) newStatus.Canary.StableRS = r.Status.Canary.StableRS newStatus.CurrentStepHash = conditions.ComputeStepHash(r) stepCount := int32(len(r.Spec.Strategy.Canary.Steps)) @@ -254,8 +262,9 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error c.recorder.Event(r, corev1.EventTypeNormal, "SkipSteps", msg) } } + roCtx.PauseContext().ClearPauseConditions() newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, pointer.BoolPtr(false)) + return c.persistRolloutStatus(roCtx, &newStatus) } if r.Status.Canary.StableRS == "" { @@ -269,17 +278,11 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error newStatus.CurrentStepIndex = &stepCount } + roCtx.PauseContext().ClearPauseConditions() newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, pointer.BoolPtr(false)) + return c.persistRolloutStatus(roCtx, &newStatus) } - currStepAr := analysisutil.GetCurrentStepAnalysisRun(currArs) - if currStepAr != nil { - if currStepAr.Status == nil || !currStepAr.Status.Status.Completed() || analysisutil.IsTerminating(currStepAr) { - newStatus.Canary.CurrentStepAnalysisRun = currStepAr.Name - } - - } currBackgroundAr := analysisutil.GetCurrentBackgroundAnalysisRun(currArs) if currBackgroundAr != nil { if currBackgroundAr.Status == nil || !currBackgroundAr.Status.Status.Completed() || analysisutil.IsTerminating(currBackgroundAr) { @@ -287,31 +290,29 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error } } - if stepCount == 0 { - logCtx.Info("Rollout has no steps") + if currentStepIndex != nil && *currentStepIndex == stepCount { + logCtx.Info("Rollout has executed every step") + newStatus.CurrentStepIndex = &stepCount if newRS != nil && newRS.Status.AvailableReplicas == defaults.GetRolloutReplicasOrDefault(r) { + //TODO(dthomson) cancel background analysis here not when we reach currentStepIndex == stepCount logCtx.Info("New RS has successfully progressed") newStatus.Canary.StableRS = newStatus.CurrentPodHash } + roCtx.PauseContext().ClearPauseConditions() newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, pointer.BoolPtr(false)) + return c.persistRolloutStatus(roCtx, &newStatus) } - if *currentStepIndex == stepCount { - logCtx.Info("Rollout has executed every step") - newStatus.CurrentStepIndex = &stepCount + if stepCount == 0 { + logCtx.Info("Rollout has no steps") if newRS != nil && newRS.Status.AvailableReplicas == defaults.GetRolloutReplicasOrDefault(r) { logCtx.Info("New RS has successfully progressed") newStatus.Canary.StableRS = newStatus.CurrentPodHash } newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, pointer.BoolPtr(false)) + return c.persistRolloutStatus(roCtx, &newStatus) } - // reconcileCanaryPause will ensure we will requeue this rollout at the appropriate time - // if we are at a pause step with a duration. - c.reconcileCanaryPause(roCtx) - if completedCurrentCanaryStep(roCtx) { *currentStepIndex++ newStatus.CurrentStepIndex = currentStepIndex @@ -320,8 +321,17 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error } logCtx.Infof("Incrementing the Current Step Index to %d", *currentStepIndex) c.recorder.Eventf(r, corev1.EventTypeNormal, "SetStepIndex", "Set Step Index to %d", int(*currentStepIndex)) + roCtx.PauseContext().RemovePauseCondition(v1alpha1.PauseReasonCanaryPauseStep) newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, pointer.BoolPtr(false)) + return c.persistRolloutStatus(roCtx, &newStatus) + } + + currStepAr := analysisutil.GetCurrentStepAnalysisRun(currArs) + if currStepAr != nil { + if currStepAr.Status == nil || !currStepAr.Status.Status.Completed() || analysisutil.IsTerminating(currStepAr) { + newStatus.Canary.CurrentStepAnalysisRun = currStepAr.Name + } + } if currExp != nil { @@ -331,12 +341,9 @@ func (c *RolloutController) syncRolloutStatusCanary(roCtx *canaryContext) error } } - addPause := !r.Spec.Paused && currentStep != nil && currentStep.Pause != nil - var paused bool - newStatus.PauseStartTime, paused = calculatePauseStatus(roCtx, addPause) newStatus.CurrentStepIndex = currentStepIndex newStatus = c.calculateRolloutConditions(roCtx, newStatus) - return c.persistRolloutStatus(r, &newStatus, &paused) + return c.persistRolloutStatus(roCtx, &newStatus) } func (c *RolloutController) reconcileCanaryReplicaSets(roCtx *canaryContext) (bool, error) { diff --git a/rollout/canary_test.go b/rollout/canary_test.go index df009a6abc..150fc9f794 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -104,17 +104,19 @@ func TestCanaryRolloutEnterPauseState(t *testing.T) { patch := f.getPatchedRollout(patchIndex) expectedPatchTemplate := `{ - "spec":{ - "paused": true - }, "status":{ - "pauseStartTime":"%s", - "conditions": %s + "pauseConditions":[{ + "reason": "%s", + "startTime": "%s" + }], + "conditions": %s, + "controllerPause": true } }` conditions := generateConditionsPatch(true, conditions.ReplicaSetUpdatedReason, r2, false) - expectedPatchWithoutObservedGen := fmt.Sprintf(expectedPatchTemplate, metav1.Now().UTC().Format(time.RFC3339), conditions) + now := metav1.Now().UTC().Format(time.RFC3339) + expectedPatchWithoutObservedGen := fmt.Sprintf(expectedPatchTemplate, v1alpha1.PauseReasonCanaryPauseStep, now, conditions) expectedPatch := calculatePatch(r2, expectedPatchWithoutObservedGen) assert.Equal(t, expectedPatch, patch) } @@ -209,8 +211,7 @@ func TestCanaryRolloutIncrementStepAfterUnPaused(t *testing.T) { r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 10, 0, 10, false) r2.Status.AvailableReplicas = 10 - now := metav1.Now() - r2.Status.PauseStartTime = &now + r2.Status.ControllerPause = true f.rolloutLister = append(f.rolloutLister, r2) f.objects = append(f.objects, r2) @@ -222,7 +223,7 @@ func TestCanaryRolloutIncrementStepAfterUnPaused(t *testing.T) { patch := f.getPatchedRollout(patchIndex) expectedPatchTemplate := `{ "status":{ - "pauseStartTime": null, + "controllerPause": null, "conditions" : %s, "currentStepIndex": 1 } @@ -747,52 +748,6 @@ func TestCanaryRolloutIncrementStepIfSetWeightsAreCorrect(t *testing.T) { assert.Equal(t, calculatePatch(r3, fmt.Sprintf(expectedPatch, newConditions)), patch) } -func TestSyncRolloutsSetPauseStartTime(t *testing.T) { - f := newFixture(t) - defer f.Close() - - steps := []v1alpha1.CanaryStep{ - { - SetWeight: int32Ptr(10), - }, { - Pause: &v1alpha1.RolloutPause{ - Duration: int32Ptr(10), - }, - }, - } - r1 := newCanaryRollout("foo", 10, nil, steps, int32Ptr(1), intstr.FromInt(1), intstr.FromInt(0)) - r2 := bumpVersion(r1) - - rs1 := newReplicaSetWithStatus(r1, 9, 9) - rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] - rs2 := newReplicaSetWithStatus(r2, 1, 1) - - f.kubeobjects = append(f.kubeobjects, rs1, rs2) - f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) - - r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 10, 1, 10, false) - f.rolloutLister = append(f.rolloutLister, r2) - f.objects = append(f.objects, r2) - - expectedPatchWithoutTime := `{ - "spec" :{ - "paused": true - }, - "status":{ - "pauseStartTime": "%s", - "conditions": %s - } - }` - condtions := generateConditionsPatch(true, conditions.ReplicaSetUpdatedReason, r2, false) - expectedPatch := fmt.Sprintf(expectedPatchWithoutTime, metav1.Now().UTC().Format(time.RFC3339), condtions) - - index := f.expectPatchRolloutActionWithPatch(r2, expectedPatch) - f.run(getKey(r2, t)) - - patch := f.getPatchedRollout(index) - assert.Equal(t, calculatePatch(r2, expectedPatch), patch) -} - func TestSyncRolloutWaitAddToQueue(t *testing.T) { f := newFixture(t) defer f.Close() @@ -898,10 +853,16 @@ func TestSyncRolloutWaitIncrementStepIndex(t *testing.T) { f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 10, 1, 10, false) + pausedCondition, _ := newProgressingCondition(conditions.PausedRolloutReason, rs2) + conditions.SetRolloutCondition(&r2.Status, pausedCondition) + earlier := metav1.Now() earlier.Time = earlier.Add(-10 * time.Second) - r2.Status.PauseStartTime = &earlier - + r2.Status.ControllerPause = true + r2.Status.PauseConditions = []v1alpha1.PauseCondition{{ + Reason: v1alpha1.PauseReasonCanaryPauseStep, + StartTime: earlier, + }} f.rolloutLister = append(f.rolloutLister, r2) f.objects = append(f.objects, r2) @@ -911,13 +872,12 @@ func TestSyncRolloutWaitIncrementStepIndex(t *testing.T) { patch := f.getPatchedRollout(patchIndex) expectedPatch := `{ "status":{ - "pauseStartTime": null, - "currentStepIndex":2, - "conditions": %s + "controllerPause": null, + "pauseConditions": null, + "currentStepIndex":2 } }` - newCondtions := generateConditionsPatch(true, conditions.ReplicaSetUpdatedReason, rs2, false) - assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, newCondtions)), patch) + assert.Equal(t, calculatePatch(r2, expectedPatch), patch) } func TestCanaryRolloutStatusHPAStatusFields(t *testing.T) { @@ -1070,11 +1030,12 @@ func TestResumeRolloutAfterPauseDuration(t *testing.T) { rs1 := newReplicaSetWithStatus(r1, 1, 1) rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] r1 = updateCanaryRolloutStatus(r1, rs1PodHash, 1, 1, 1, true) - r1.Spec.Paused = true overAMinuteAgo := metav1.Time{Time: time.Now().Add(-61 * time.Second)} r1.Status.ObservedGeneration = conditions.ComputeGenerationHash(r1.Spec) - r1.Status.PauseStartTime = &overAMinuteAgo - + r1.Status.PauseConditions = []v1alpha1.PauseCondition{{ + Reason: v1alpha1.PauseReasonCanaryPauseStep, + StartTime: overAMinuteAgo, + }} f.kubeobjects = append(f.kubeobjects, rs1) f.replicaSetLister = append(f.replicaSetLister, rs1) f.rolloutLister = append(f.rolloutLister, r1) @@ -1089,14 +1050,9 @@ func TestResumeRolloutAfterPauseDuration(t *testing.T) { err := json.Unmarshal([]byte(patch), &patchObj) assert.NoError(t, err) - spec := patchObj["spec"].(map[string]interface{}) - paused, ok := spec["paused"] - assert.True(t, ok) - assert.Nil(t, paused) - status := patchObj["status"].(map[string]interface{}) assert.Equal(t, float64(2), status["currentStepIndex"]) - pauseStartTime, ok := status["pauseStartTime"] + controllerPause, ok := status["controllerPause"] assert.True(t, ok) - assert.Equal(t, nil, pauseStartTime) + assert.Nil(t, controllerPause) } diff --git a/rollout/context.go b/rollout/context.go index ccc39779c9..c38aacf552 100644 --- a/rollout/context.go +++ b/rollout/context.go @@ -21,14 +21,19 @@ type rolloutContext interface { OtherAnalysisRuns() []*v1alpha1.AnalysisRun CurrentExperiment() *v1alpha1.Experiment OtherExperiments() []*v1alpha1.Experiment + + PauseContext() *pauseContext } type blueGreenContext struct { - rollout *v1alpha1.Rollout - log *log.Entry + rollout *v1alpha1.Rollout + log *log.Entry + newRS *appsv1.ReplicaSet olderRSs []*appsv1.ReplicaSet allRSs []*appsv1.ReplicaSet + + pauseContext *pauseContext } type canaryContext struct { @@ -45,16 +50,25 @@ type canaryContext struct { currentEx *v1alpha1.Experiment otherExs []*v1alpha1.Experiment + + pauseContext *pauseContext } func newBlueGreenCtx(r *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, olderRSs []*appsv1.ReplicaSet) *blueGreenContext { allRSs := append(olderRSs, newRS) + logCtx := logutil.WithRollout(r) return &blueGreenContext{ - rollout: r, - log: logutil.WithRollout(r), + rollout: r, + log: logCtx, + newRS: newRS, olderRSs: olderRSs, allRSs: allRSs, + + pauseContext: &pauseContext{ + rollout: r, + log: logCtx, + }, } } @@ -93,6 +107,10 @@ func (bgCtx *blueGreenContext) OtherExperiments() []*v1alpha1.Experiment { return nil } +func (bgCtx *blueGreenContext) PauseContext() *pauseContext { + return bgCtx.pauseContext +} + func newCanaryCtx(r *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, stableRS *appsv1.ReplicaSet, olderRSs []*appsv1.ReplicaSet, exList []*v1alpha1.Experiment, arList []*v1alpha1.AnalysisRun) *canaryContext { allRSs := append(olderRSs, newRS) if stableRS != nil { @@ -102,9 +120,10 @@ func newCanaryCtx(r *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, stableRS *appsv currentArs, otherArs := analysisutil.FilterCurrentRolloutAnalysisRuns(arList, r) currentEx := experimentutil.GetCurrentExperiment(r, exList) otherExs := experimentutil.GetOldExperiments(r, exList) + logCtx := logutil.WithRollout(r) return &canaryContext{ rollout: r, - log: logutil.WithRollout(r), + log: logCtx, newRS: newRS, stableRS: stableRS, olderRSs: olderRSs, @@ -115,6 +134,11 @@ func newCanaryCtx(r *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, stableRS *appsv currentEx: currentEx, otherExs: otherExs, + + pauseContext: &pauseContext{ + rollout: r, + log: logCtx, + }, } } @@ -164,3 +188,7 @@ func (cCtx *canaryContext) CurrentExperiment() *v1alpha1.Experiment { func (cCtx *canaryContext) OtherExperiments() []*v1alpha1.Experiment { return cCtx.otherExs } + +func (cCtx *canaryContext) PauseContext() *pauseContext { + return cCtx.pauseContext +} diff --git a/rollout/controller.go b/rollout/controller.go index 054219f48f..a3cde1be10 100644 --- a/rollout/controller.go +++ b/rollout/controller.go @@ -243,14 +243,12 @@ func (c *RolloutController) syncHandler(key string) error { generation := conditions.ComputeGenerationHash(r.Spec) if r.Status.ObservedGeneration != generation || !reflect.DeepEqual(invalidSpecCond, prevCond) { newStatus := r.Status.DeepCopy() - newStatus.ObservedGeneration = generation // SetRolloutCondition only updates the condition when the status and/or reason changes, but // the controller should update the invalidSpec if there is a change in why the spec is invalid if prevCond != nil && prevCond.Message != invalidSpecCond.Message { conditions.RemoveRolloutCondition(newStatus, v1alpha1.InvalidSpec) } - conditions.SetRolloutCondition(newStatus, *invalidSpecCond) - err := c.persistRolloutStatus(r, newStatus, nil) + err := c.patchCondition(r, newStatus, invalidSpecCond) if err != nil { return err } @@ -275,7 +273,7 @@ func (c *RolloutController) syncHandler(key string) error { return err } - if rollout.Spec.Paused || isScalingEvent { + if len(rollout.Status.PauseConditions) > 0 || isScalingEvent { return c.syncReplicasOnly(r, rsList, isScalingEvent) } diff --git a/rollout/controller_test.go b/rollout/controller_test.go index 71bdbed676..7263e54034 100644 --- a/rollout/controller_test.go +++ b/rollout/controller_test.go @@ -238,7 +238,7 @@ func generateConditionsPatch(available bool, progressingReason string, progressi // func updateBlueGreenRolloutStatus(r *v1alpha1.Rollout, preview, active string, availableReplicas, updatedReplicas, hpaReplicas int32, pause bool, available bool, progressingStatus string) *v1alpha1.Rollout { func updateBlueGreenRolloutStatus(r *v1alpha1.Rollout, preview, active string, availableReplicas, updatedReplicas, totalReplicas, hpaReplicas int32, pause bool, available bool) *v1alpha1.Rollout { - newRollout := updateBaseRolloutStatus(r, availableReplicas, updatedReplicas, totalReplicas, hpaReplicas, pause) + newRollout := updateBaseRolloutStatus(r, availableReplicas, updatedReplicas, totalReplicas, hpaReplicas) selector := newRollout.Spec.Selector.DeepCopy() if active != "" { selector.MatchLabels[v1alpha1.DefaultRolloutUniqueLabelKey] = active @@ -248,25 +248,38 @@ func updateBlueGreenRolloutStatus(r *v1alpha1.Rollout, preview, active string, a newRollout.Status.BlueGreen.PreviewSelector = preview cond, _ := newAvailableCondition(available) newRollout.Status.Conditions = append(newRollout.Status.Conditions, cond) + if pause { + now := metav1.Now() + cond := v1alpha1.PauseCondition{ + Reason: v1alpha1.PauseReasonBlueGreenPause, + StartTime: now, + } + newRollout.Status.ControllerPause = true + newRollout.Status.PauseConditions = append(newRollout.Status.PauseConditions, cond) + } return newRollout } func updateCanaryRolloutStatus(r *v1alpha1.Rollout, stableRS string, availableReplicas, updatedReplicas, hpaReplicas int32, pause bool) *v1alpha1.Rollout { - newRollout := updateBaseRolloutStatus(r, availableReplicas, updatedReplicas, availableReplicas, hpaReplicas, pause) + newRollout := updateBaseRolloutStatus(r, availableReplicas, updatedReplicas, availableReplicas, hpaReplicas) newRollout.Status.Canary.StableRS = stableRS + if pause { + now := metav1.Now() + cond := v1alpha1.PauseCondition{ + Reason: v1alpha1.PauseReasonCanaryPauseStep, + StartTime: now, + } + newRollout.Status.ControllerPause = true + newRollout.Status.PauseConditions = append(newRollout.Status.PauseConditions, cond) + } return newRollout } -func updateBaseRolloutStatus(r *v1alpha1.Rollout, availableReplicas, updatedReplicas, totalReplicas, hpaReplicas int32, pause bool) *v1alpha1.Rollout { +func updateBaseRolloutStatus(r *v1alpha1.Rollout, availableReplicas, updatedReplicas, totalReplicas, hpaReplicas int32) *v1alpha1.Rollout { newRollout := r.DeepCopy() newRollout.Status.Replicas = totalReplicas newRollout.Status.AvailableReplicas = availableReplicas newRollout.Status.UpdatedReplicas = updatedReplicas newRollout.Status.HPAReplicas = hpaReplicas - if pause { - newRollout.Spec.Paused = pause - now := metav1.Now() - newRollout.Status.PauseStartTime = &now - } return newRollout } @@ -876,10 +889,14 @@ func TestRequeueStuckRollout(t *testing.T) { r := &v1alpha1.Rollout{ Spec: v1alpha1.RolloutSpec{ Replicas: pointer.Int32Ptr(0), - Paused: rolloutPaused, ProgressDeadlineSeconds: progessDeadlineSeconds, }, } + if rolloutPaused { + r.Status.PauseConditions = []v1alpha1.PauseCondition{{ + Reason: v1alpha1.PauseReasonBlueGreenPause, + }} + } if rolloutCompleted { r.Status.ObservedGeneration = conditions.ComputeGenerationHash(r.Spec) } diff --git a/rollout/pause.go b/rollout/pause.go index 14b1bd0261..f53751c7d4 100644 --- a/rollout/pause.go +++ b/rollout/pause.go @@ -3,26 +3,99 @@ package rollout import ( "time" + log "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" - "github.com/argoproj/argo-rollouts/utils/defaults" logutil "github.com/argoproj/argo-rollouts/utils/log" ) -func completedPauseStep(rollout *v1alpha1.Rollout, pause v1alpha1.RolloutPause) bool { - logCtx := logutil.WithRollout(rollout) +type pauseContext struct { + rollout *v1alpha1.Rollout + log *log.Entry + + addPauseReasons []v1alpha1.PauseReason + removePauseReasons []v1alpha1.PauseReason + clearPauseConditions bool +} + +func (pCtx *pauseContext) AddPauseCondition(reason v1alpha1.PauseReason) { + pCtx.addPauseReasons = append(pCtx.addPauseReasons, reason) +} + +func (pCtx *pauseContext) RemovePauseCondition(reason v1alpha1.PauseReason) { + pCtx.removePauseReasons = append(pCtx.removePauseReasons, reason) +} +func (pCtx *pauseContext) ClearPauseConditions() { + pCtx.clearPauseConditions = true +} + +func (pCtx *pauseContext) CalculatePauseStatus(newStatus *v1alpha1.RolloutStatus) { + if pCtx.clearPauseConditions { + return + } + + controllerPause := pCtx.rollout.Status.ControllerPause + statusToRemove := map[v1alpha1.PauseReason]bool{} + for i := range pCtx.removePauseReasons { + statusToRemove[pCtx.removePauseReasons[i]] = true + } + + newPauseConditions := []v1alpha1.PauseCondition{} + pauseAlreadyExists := map[v1alpha1.PauseReason]bool{} + for _, cond := range pCtx.rollout.Status.PauseConditions { + if remove := statusToRemove[cond.Reason]; !remove { + newPauseConditions = append(newPauseConditions, cond) + } + pauseAlreadyExists[cond.Reason] = true + } + + now := metav1.Now() + for i := range pCtx.addPauseReasons { + reason := pCtx.addPauseReasons[i] + if exists := pauseAlreadyExists[reason]; !exists { + pCtx.log.Infof("Adding pause reason %s with start time %s", reason, now.UTC().Format(time.RFC3339)) + cond := v1alpha1.PauseCondition{ + Reason: reason, + StartTime: now, + } + newPauseConditions = append(newPauseConditions, cond) + controllerPause = true + } + } + + if len(newPauseConditions) == 0 { + return + } + newStatus.ControllerPause = controllerPause + newStatus.PauseConditions = newPauseConditions +} + +func (pCtx *pauseContext) GetPauseCondition(reason v1alpha1.PauseReason) *v1alpha1.PauseCondition { + for i := range pCtx.rollout.Status.PauseConditions { + cond := pCtx.rollout.Status.PauseConditions[i] + if cond.Reason == reason { + return &cond + } + } + return nil +} + +func (pCtx *pauseContext) CompletedPauseStep(pause v1alpha1.RolloutPause) bool { + rollout := pCtx.rollout + pauseCondition := pCtx.GetPauseCondition(v1alpha1.PauseReasonCanaryPauseStep) + if pause.Duration != nil { now := metav1.Now() - if rollout.Status.PauseStartTime != nil { - expiredTime := rollout.Status.PauseStartTime.Add(time.Duration(*pause.Duration) * time.Second) + if pauseCondition != nil { + expiredTime := pauseCondition.StartTime.Add(time.Duration(*pause.Duration) * time.Second) if now.After(expiredTime) { - logCtx.Info("Rollout has waited the duration of the pause step") + pCtx.log.Info("Rollout has waited the duration of the pause step") return true } } - } else if rollout.Status.PauseStartTime != nil && !rollout.Spec.Paused { - logCtx.Info("Rollout has been unpaused") + } else if rollout.Status.ControllerPause && pauseCondition == nil { + pCtx.log.Info("Rollout has been unpaused") return true } return false @@ -39,53 +112,3 @@ func (c *RolloutController) checkEnqueueRolloutDuringWait(rollout *v1alpha1.Roll c.enqueueRolloutAfter(rollout, timeRemaining) } } - -// calculatePauseStatus finds the fields related to a pause step for a rollout. If the pause is nil, -// the rollout will use the previous values -func calculatePauseStatus(roCtx rolloutContext, addPause bool) (*metav1.Time, bool) { - rollout := roCtx.Rollout() - logCtx := roCtx.Log() - pauseStartTime := rollout.Status.PauseStartTime - paused := rollout.Spec.Paused - if !paused { - pauseStartTime = nil - } - if rollout.Spec.Strategy.BlueGreen != nil && defaults.GetAutoPromotionEnabledOrDefault(rollout) { - return nil, false - } - - pauseForInconclusiveAnalysisRun := false - currArs := roCtx.CurrentAnalysisRuns() - for i := range currArs { - ar := currArs[i] - if ar != nil && ar.Status != nil && ar.Status.Status == v1alpha1.AnalysisStatusInconclusive { - pauseForInconclusiveAnalysisRun = true - } - } - - if addPause || pauseForInconclusiveAnalysisRun { - if pauseStartTime == nil { - now := metav1.Now() - logCtx.Infof("Setting PauseStartTime to %s", now.UTC().Format(time.RFC3339)) - pauseStartTime = &now - paused = true - } - } - - if rollout.Spec.Strategy.BlueGreen != nil { - bgCtx := roCtx.(*blueGreenContext) - if reconcileBlueGreenTemplateChange(bgCtx) { - return nil, false - } - if paused && pauseStartTime != nil && rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds != nil { - now := metav1.Now() - autoPromoteActiveServiceDelaySeconds := *rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds - switchDeadline := pauseStartTime.Add(time.Duration(autoPromoteActiveServiceDelaySeconds) * time.Second) - if now.After(switchDeadline) { - return nil, false - } - return pauseStartTime, true - } - } - return pauseStartTime, paused -} diff --git a/rollout/service.go b/rollout/service.go index 1d4550e044..7ad3db428a 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -127,8 +127,7 @@ func (c *RolloutController) getReferencedService(r *v1alpha1.Rollout, serviceNam c.recorder.Event(r, corev1.EventTypeWarning, conditions.ServiceNotFoundReason, msg) newStatus := r.Status.DeepCopy() cond := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.ServiceNotFoundReason, msg) - conditions.SetRolloutCondition(newStatus, *cond) - c.persistRolloutStatus(r, newStatus, &r.Spec.Paused) + c.patchCondition(r, newStatus, cond) } return nil, err } @@ -139,6 +138,7 @@ func (c *RolloutController) getPreviewAndActiveServices(r *v1alpha1.Rollout) (*c var previewSvc *corev1.Service var activeSvc *corev1.Service var err error + if r.Spec.Strategy.BlueGreen.PreviewService != "" { previewSvc, err = c.getReferencedService(r, r.Spec.Strategy.BlueGreen.PreviewService) if err != nil { diff --git a/rollout/sync.go b/rollout/sync.go index c939163a36..ebd3345c7a 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -182,8 +182,7 @@ func (c *RolloutController) getNewReplicaSet(rollout *v1alpha1.Rollout, rsList, c.recorder.Event(rollout, corev1.EventTypeWarning, conditions.FailedRSCreateReason, msg) newStatus := rollout.Status.DeepCopy() cond := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.FailedRSCreateReason, msg) - conditions.SetRolloutCondition(newStatus, *cond) - c.persistRolloutStatus(rollout, newStatus, &rollout.Spec.Paused) + err := c.patchCondition(rollout, newStatus, cond) return nil, err } @@ -208,7 +207,8 @@ func (c *RolloutController) getNewReplicaSet(rollout *v1alpha1.Rollout, rsList, // syncReplicasOnly is responsible for reconciling rollouts on scaling events. func (c *RolloutController) syncReplicasOnly(r *v1alpha1.Rollout, rsList []*appsv1.ReplicaSet, isScaling bool) error { logCtx := logutil.WithRollout(r) - logCtx.Infof("Syncing replicas only (paused: %v, isScaling: %v)", r.Spec.Paused, isScaling) + isPaused := len(r.Status.PauseConditions) > 0 + logCtx.Infof("Syncing replicas only (paused: %v, isScaling: %v)", isPaused, isScaling) newRS, oldRSs, err := c.getAllReplicaSetsAndSyncRevision(r, rsList, false) if err != nil { return err @@ -225,7 +225,8 @@ func (c *RolloutController) syncReplicasOnly(r *v1alpha1.Rollout, rsList []*apps // so we can abort this resync return err } - return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx, r.Spec.Paused) + c.reconcileBlueGreenPause(previewSvc, activeSvc, roCtx) + return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc, roCtx) } // The controller wants to use the rolloutCanary method to reconcile the rolllout if the rollout is not paused. // If there are no scaling events, the rollout should only sync its status @@ -250,6 +251,10 @@ func (c *RolloutController) syncReplicasOnly(r *v1alpha1.Rollout, rsList []*apps return err } } + // reconcileCanaryPause will ensure we will requeue this rollout at the appropriate time + // if we are at a pause step with a duration. + c.reconcileCanaryPause(roCtx) + return c.syncRolloutStatusCanary(roCtx) } return fmt.Errorf("no rollout strategy provided") @@ -419,31 +424,57 @@ func (c *RolloutController) checkPausedConditions(r *v1alpha1.Rollout) error { } pausedCondExists := cond != nil && cond.Reason == conditions.PausedRolloutReason - newStatus := r.Status.DeepCopy() - needsUpdate := false - if r.Spec.Paused && !pausedCondExists { - condition := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.PausedRolloutReason, conditions.PausedRolloutMessage) - conditions.SetRolloutCondition(newStatus, *condition) - needsUpdate = true - } else if !r.Spec.Paused && pausedCondExists { - condition := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.ResumedRolloutReason, conditions.ResumeRolloutMessage) - conditions.SetRolloutCondition(newStatus, *condition) - needsUpdate = true + var updatedConditon *v1alpha1.RolloutCondition + if len(r.Status.PauseConditions) > 0 && !pausedCondExists { + updatedConditon = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.PausedRolloutReason, conditions.PausedRolloutMessage) + } else if len(r.Status.PauseConditions) == 0 && pausedCondExists { + updatedConditon = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionUnknown, conditions.ResumedRolloutReason, conditions.ResumeRolloutMessage) } - if !needsUpdate { + if updatedConditon == nil { return nil } - err := c.persistRolloutStatus(r, newStatus, &r.Spec.Paused) + newStatus := r.Status.DeepCopy() + err := c.patchCondition(r, newStatus, updatedConditon) return err } +func (c *RolloutController) patchCondition(r *v1alpha1.Rollout, newStatus *v1alpha1.RolloutStatus, condition *v1alpha1.RolloutCondition) error { + conditions.SetRolloutCondition(newStatus, *condition) + newStatus.ObservedGeneration = conditions.ComputeGenerationHash(r.Spec) + + logCtx := logutil.WithRollout(r) + patch, modified, err := diff.CreateTwoWayMergePatch( + &v1alpha1.Rollout{ + Status: r.Status, + }, + &v1alpha1.Rollout{ + Status: *newStatus, + }, v1alpha1.Rollout{}) + if err != nil { + logCtx.Errorf("Error constructing app status patch: %v", err) + return err + } + if !modified { + logCtx.Info("No status changes. Skipping patch") + return nil + } + logCtx.Debugf("Rollout Condition Patch: %s", patch) + _, err = c.argoprojclientset.ArgoprojV1alpha1().Rollouts(r.Namespace).Patch(r.Name, patchtypes.MergePatchType, patch) + if err != nil { + logCtx.Warningf("Error patching rollout: %v", err) + return err + } + logCtx.Info("Condition Patch status successfully") + return nil +} + func (c *RolloutController) calculateRolloutConditions(roCtx rolloutContext, newStatus v1alpha1.RolloutStatus) v1alpha1.RolloutStatus { r := roCtx.Rollout() allRSs := roCtx.AllRSs() newRS := roCtx.NewRS() - if r.Spec.Paused { + if len(r.Status.PauseConditions) > 0 { return newStatus } @@ -541,27 +572,16 @@ func (c *RolloutController) calculateRolloutConditions(roCtx rolloutContext, new } // persistRolloutStatus persists updates to rollout status. If no changes were made, it is a no-op -func (c *RolloutController) persistRolloutStatus(orig *v1alpha1.Rollout, newStatus *v1alpha1.RolloutStatus, newPause *bool) error { - specCopy := orig.Spec.DeepCopy() - paused := specCopy.Paused - if newPause != nil { - paused = *newPause - specCopy.Paused = *newPause - } - newStatus.ObservedGeneration = conditions.ComputeGenerationHash(*specCopy) - +func (c *RolloutController) persistRolloutStatus(roCtx rolloutContext, newStatus *v1alpha1.RolloutStatus) error { + orig := roCtx.Rollout() + roCtx.PauseContext().CalculatePauseStatus(newStatus) + newStatus.ObservedGeneration = conditions.ComputeGenerationHash(orig.Spec) logCtx := logutil.WithRollout(orig) patch, modified, err := diff.CreateTwoWayMergePatch( &v1alpha1.Rollout{ - Spec: v1alpha1.RolloutSpec{ - Paused: orig.Spec.Paused, - }, Status: orig.Status, }, &v1alpha1.Rollout{ - Spec: v1alpha1.RolloutSpec{ - Paused: paused, - }, Status: *newStatus, }, v1alpha1.Rollout{}) if err != nil { @@ -597,7 +617,7 @@ func (c *RolloutController) requeueStuckRollout(r *v1alpha1.Rollout, newStatus v return time.Duration(-1) } // No need to estimate progress if the rollout is complete or already timed out. - if conditions.RolloutComplete(r, &newStatus) || currentCond.Reason == conditions.TimedOutReason || r.Spec.Paused { + if conditions.RolloutComplete(r, &newStatus) || currentCond.Reason == conditions.TimedOutReason || len(r.Status.PauseConditions) > 0 { return time.Duration(-1) } // If there is no sign of progress at this point then there is a high chance that the diff --git a/utils/replicaset/replicaset.go b/utils/replicaset/replicaset.go index 6378611aa1..e6a9ce44ab 100644 --- a/utils/replicaset/replicaset.go +++ b/utils/replicaset/replicaset.go @@ -85,7 +85,7 @@ func NewRSNewReplicas(rollout *v1alpha1.Rollout, allRSs []*appsv1.ReplicaSet, ne if newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] != rollout.Status.CurrentPodHash { return *rollout.Spec.Strategy.BlueGreen.PreviewReplicaCount, nil } - if !rollout.Spec.Paused && rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { + if len(rollout.Status.PauseConditions) == 0 && rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint { return defaults.GetRolloutReplicasOrDefault(rollout), nil } return *rollout.Spec.Strategy.BlueGreen.PreviewReplicaCount, nil