From 85aa2466d079c52f094cbe453dbe970cb75c3573 Mon Sep 17 00:00:00 2001 From: Jesse Suen Date: Sat, 26 Sep 2020 04:52:42 -0700 Subject: [PATCH] fix: unavailable stable RS was not scaled down to make room for canary --- test/e2e/analysis_test.go | 4 +- test/e2e/canary_test.go | 96 ++++++++- test/e2e/experiment_test.go | 2 +- test/e2e/functional/alb-template.yaml | 6 +- test/e2e/functional/analysis-run-job.yaml | 2 +- .../functional/analysistemplate-echo-job.yaml | 2 +- .../rollout-background-analysis.yaml | 9 +- test/e2e/functional/rollout-basic.yaml | 10 +- .../rollout-container-resource-formats.yaml | 36 ---- test/e2e/functional/rollout-experiment.yaml | 9 +- .../functional/rollout-inline-analysis.yaml | 9 +- test/e2e/functional_test.go | 189 +++++++++++++++++- test/fixtures/common.go | 3 - test/fixtures/e2e_suite.go | 17 +- test/fixtures/given.go | 24 +-- test/fixtures/then.go | 42 ++-- test/fixtures/when.go | 63 +++++- utils/replicaset/canary.go | 32 +-- utils/replicaset/canary_test.go | 38 +++- 19 files changed, 450 insertions(+), 143 deletions(-) delete mode 100644 test/e2e/functional/rollout-container-resource-formats.yaml diff --git a/test/e2e/analysis_test.go b/test/e2e/analysis_test.go index 60e1d4d121..8a049eb7b2 100644 --- a/test/e2e/analysis_test.go +++ b/test/e2e/analysis_test.go @@ -24,7 +24,7 @@ func (s *AnalysisSuite) TestRolloutWithBackgroundAnalysis() { Then(). ExpectAnalysisRunCount(0). When(). - UpdateImage("argoproj/rollouts-demo:yellow"). + UpdateSpec(). WaitForRolloutStatus("Paused"). Then(). ExpectAnalysisRunCount(1). @@ -45,7 +45,7 @@ func (s *AnalysisSuite) TestRolloutWithInlineAnalysis() { Then(). ExpectAnalysisRunCount(0). When(). - UpdateImage("argoproj/rollouts-demo:yellow"). + UpdateSpec(). WaitForRolloutStatus("Paused"). Then(). ExpectAnalysisRunCount(1). diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 19596803c4..3c4dc54012 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -14,6 +14,10 @@ type CanarySuite struct { fixtures.E2ESuite } +func TestCanarySuite(t *testing.T) { + suite.Run(t, new(CanarySuite)) +} + func (s *CanarySuite) TestCanarySetCanaryScale() { canarySteps := ` - pause: {} @@ -35,42 +39,116 @@ func (s *CanarySuite) TestCanarySetCanaryScale() { When(). ApplyManifests(). WaitForRolloutStatus("Healthy"). - UpdateImage("argoproj/rollouts-demo:yellow"). + UpdateSpec(). // at step 0 WaitForRolloutStatus("Paused"). Then(). - ExpectCanaryPodCount(0). + ExpectCanaryStablePodCount(0, 4). When(). PromoteRollout(). WaitForRolloutCanaryStepIndex(2). Then(). // at step 2 - ExpectCanaryPodCount(1). + ExpectCanaryStablePodCount(1, 4). When(). PromoteRollout(). WaitForRolloutCanaryStepIndex(4). Then(). // at step 4 - ExpectCanaryPodCount(1). + ExpectCanaryStablePodCount(1, 4). When(). PromoteRollout(). WaitForRolloutCanaryStepIndex(6). Then(). // at step 6 - ExpectCanaryPodCount(3). + ExpectCanaryStablePodCount(3, 4). When(). PromoteRollout(). WaitForRolloutCanaryStepIndex(8). Then(). // at step 8 - ExpectCanaryPodCount(2). + ExpectCanaryStablePodCount(2, 4). When(). PromoteRollout(). WaitForRolloutStatus("Healthy"). Then(). - ExpectCanaryPodCount(4) + ExpectCanaryStablePodCount(4, 4) } -func TestCanarySuite(t *testing.T) { - suite.Run(t, new(CanarySuite)) +// TestRolloutScalingWhenPaused verifies behavior when scaling a rollout up/down when paused +func (s *FunctionalSuite) TestRolloutScalingWhenPaused() { + s.Given(). + RolloutObjects(`@functional/rollout-basic.yaml`). + SetSteps(` +- setWeight: 25 +- pause: {}`). + When(). + ApplyManifests(). + WaitForRolloutStatus("Healthy"). + UpdateSpec(). + WaitForRolloutStatus("Paused"). + Then(). + ExpectCanaryStablePodCount(1, 1). + When(). + ScaleRollout(8). + WaitForRolloutAvailableReplicas(8). + Then(). + ExpectCanaryStablePodCount(2, 6). + When(). + ScaleRollout(4). + WaitForRolloutAvailableReplicas(4). + Then(). + ExpectCanaryStablePodCount(1, 3) +} + +// TestRolloutScalingDuringUpdate verifies behavior when scaling a rollout up/down in middle of update +func (s *FunctionalSuite) TestRolloutScalingDuringUpdate() { + s.Given(). + HealthyRollout(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: updatescaling +spec: + replicas: 4 + strategy: + canary: + maxSurge: 2 + maxUnavailable: 0 + selector: + matchLabels: + app: updatescaling + template: + metadata: + labels: + app: updatescaling + spec: + containers: + - name: updatescaling + image: nginx:1.19-alpine + resources: + requests: + memory: 16Mi + cpu: 1m`). + When(). + PatchSpec(` +spec: + template: + spec: + containers: + - name: updatescaling + command: [/bad-command]`). + WaitForRolloutReplicas(6). + Then(). + ExpectCanaryStablePodCount(2, 4). + When(). + ScaleRollout(8). + WaitForRolloutReplicas(10). + Then(). + // NOTE: the numbers below may change in the future. + // See: https://github.com/argoproj/argo-rollouts/issues/738 + ExpectCanaryStablePodCount(6, 4). + When(). + ScaleRollout(4) + // WaitForRolloutReplicas(4) // this doesn't work yet (bug) } diff --git a/test/e2e/experiment_test.go b/test/e2e/experiment_test.go index 00af9e5df0..42d2a2b5ef 100644 --- a/test/e2e/experiment_test.go +++ b/test/e2e/experiment_test.go @@ -23,7 +23,7 @@ func (s *ExperimentSuite) TestRolloutWithExperiment() { // TODO: verify there are no experiments Then(). When(). - UpdateImage("argoproj/rollouts-demo:yellow"). + UpdateSpec(). // TODO: wait for experiment to start and complete successful // TODO: verify pods WaitForRolloutStatus("Paused"). diff --git a/test/e2e/functional/alb-template.yaml b/test/e2e/functional/alb-template.yaml index 5526066b50..033ac507b3 100644 --- a/test/e2e/functional/alb-template.yaml +++ b/test/e2e/functional/alb-template.yaml @@ -14,14 +14,14 @@ spec: spec: containers: - name: REPLACEME - image: argoproj/rollouts-demo:blue + image: nginx:1.19-alpine ports: - name: http - containerPort: 8080 + containerPort: 80 protocol: TCP resources: requests: - memory: 32Mi + memory: 16Mi cpu: 5m strategy: canary: diff --git a/test/e2e/functional/analysis-run-job.yaml b/test/e2e/functional/analysis-run-job.yaml index 879f5ab106..bc84e01084 100644 --- a/test/e2e/functional/analysis-run-job.yaml +++ b/test/e2e/functional/analysis-run-job.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: sleep - image: alpine:3.8 + image: nginx:1.19-alpine command: [sleep, "30"] restartPolicy: Never backoffLimit: 0 diff --git a/test/e2e/functional/analysistemplate-echo-job.yaml b/test/e2e/functional/analysistemplate-echo-job.yaml index 587bbc7dba..2e2b798673 100644 --- a/test/e2e/functional/analysistemplate-echo-job.yaml +++ b/test/e2e/functional/analysistemplate-echo-job.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: sleep - image: alpine:3.12 + image: nginx:1.19-alpine command: [echo, done] restartPolicy: Never backoffLimit: 0 diff --git a/test/e2e/functional/rollout-background-analysis.yaml b/test/e2e/functional/rollout-background-analysis.yaml index 54a0d4fe28..f95252d37f 100644 --- a/test/e2e/functional/rollout-background-analysis.yaml +++ b/test/e2e/functional/rollout-background-analysis.yaml @@ -21,13 +21,8 @@ spec: spec: containers: - name: rollouts-demo - image: argoproj/rollouts-demo:blue - args: [--termination-delay, "0"] - ports: - - name: http - containerPort: 8080 - protocol: TCP + image: nginx:1.19-alpine resources: requests: - memory: 32Mi + memory: 16Mi cpu: 5m diff --git a/test/e2e/functional/rollout-basic.yaml b/test/e2e/functional/rollout-basic.yaml index fe891d82d4..b0ba2c3fbd 100644 --- a/test/e2e/functional/rollout-basic.yaml +++ b/test/e2e/functional/rollout-basic.yaml @@ -17,14 +17,8 @@ spec: app: basic spec: containers: - - name: rollouts-demo - image: argoproj/rollouts-demo:blue - imagePullPolicy: IfNotPresent - # reduce termination delay (default 10s) for e2e tests - args: [--termination-delay, "0"] - ports: - - name: http - containerPort: 8080 + - name: basic + image: nginx:1.19-alpine resources: requests: memory: 16Mi diff --git a/test/e2e/functional/rollout-container-resource-formats.yaml b/test/e2e/functional/rollout-container-resource-formats.yaml deleted file mode 100644 index 2156c8b212..0000000000 --- a/test/e2e/functional/rollout-container-resource-formats.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Test object to verify resource requests are accepted in multiple formats and not rejected by validation -apiVersion: argoproj.io/v1alpha1 -kind: Rollout -metadata: - name: container-resource-formats -spec: - replicas: 0 - selector: - matchLabels: - app: container-resource-formats - template: - metadata: - labels: - app: container-resource-formats - spec: - containers: - - name: a - image: argoproj/rollouts-demo:blue - args: [--termination-delay, "0"] - resources: - requests: - cpu: 0.001 - - name: b - image: argoproj/rollouts-demo:blue - args: ["--termination-delay", "0", "-listen-addr", ":8081"] - resources: - requests: - cpu: '0.001' - - name: c - image: argoproj/rollouts-demo:blue - args: ["--termination-delay", "0", "-listen-addr", ":8082"] - resources: - requests: - cpu: 1m - strategy: - canary: {} diff --git a/test/e2e/functional/rollout-experiment.yaml b/test/e2e/functional/rollout-experiment.yaml index a45a141131..aaf3a9cc91 100644 --- a/test/e2e/functional/rollout-experiment.yaml +++ b/test/e2e/functional/rollout-experiment.yaml @@ -25,13 +25,8 @@ spec: spec: containers: - name: rollouts-demo - image: argoproj/rollouts-demo:blue - args: [--termination-delay, "0"] - ports: - - name: http - containerPort: 8080 - protocol: TCP + image: nginx:1.19-alpine resources: requests: - memory: 32Mi + memory: 16Mi cpu: 5m diff --git a/test/e2e/functional/rollout-inline-analysis.yaml b/test/e2e/functional/rollout-inline-analysis.yaml index b33eede71e..8af666f265 100644 --- a/test/e2e/functional/rollout-inline-analysis.yaml +++ b/test/e2e/functional/rollout-inline-analysis.yaml @@ -27,13 +27,8 @@ spec: spec: containers: - name: rollouts-demo - image: argoproj/rollouts-demo:blue - args: [--termination-delay, "0"] - ports: - - name: http - containerPort: 8080 - protocol: TCP + image: nginx:1.19-alpine resources: requests: - memory: 32Mi + memory: 16Mi cpu: 5m diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 745005aa63..ae479d133d 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -15,6 +15,10 @@ type FunctionalSuite struct { fixtures.E2ESuite } +func TestFunctionalSuite(t *testing.T) { + suite.Run(t, new(FunctionalSuite)) +} + func countReplicaSets(count int) fixtures.ReplicaSetExpectation { return func(rsets *appsv1.ReplicaSetList) bool { return len(rsets.Items) == count @@ -25,7 +29,7 @@ func (s *FunctionalSuite) TestRolloutAbortRetryPromote() { s.Given(). HealthyRollout(`@functional/rollout-basic.yaml`). When(). - UpdateImage("argoproj/rollouts-demo:yellow"). + UpdateSpec(). WaitForRolloutStatus("Paused"). Then(). ExpectReplicaSets("two replicasets", countReplicaSets(2)). @@ -47,11 +51,188 @@ func (s *FunctionalSuite) TestRolloutRestart() { WaitForRolloutStatus("Healthy") } +// TestContainerResourceFormats verifies resource requests are accepted in multiple formats and not +// rejected by validation func (s *FunctionalSuite) TestContainerResourceFormats() { s.Given(). - HealthyRollout(`@functional/rollout-container-resource-formats.yaml`) + HealthyRollout(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: container-resource-formats +spec: + replicas: 0 + selector: + matchLabels: + app: container-resource-formats + template: + metadata: + labels: + app: container-resource-formats + spec: + containers: + - name: a + image: nginx:1.19-alpine + command: [sleep, infinity] + resources: + requests: + cpu: 0.001 + - name: b + image: nginx:1.19-alpine + command: [sleep, infinity] + resources: + requests: + cpu: '0.001' + - name: c + image: nginx:1.19-alpine + command: [sleep, infinity] + resources: + requests: + cpu: 1m + strategy: + canary: {} +`) } -func TestFunctionalSuite(t *testing.T) { - suite.Run(t, new(FunctionalSuite)) +// TestRolloutUpdateToBadImage updates a healthy rollout to a bad image and verifies it honors maxSurge, maxUnavailable +func (s *FunctionalSuite) TestRolloutUpdateToBadImage() { + s.Given(). + HealthyRollout(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: badupdate +spec: + replicas: 4 + strategy: + canary: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: badupdate + template: + metadata: + labels: + app: badupdate + spec: + containers: + - name: badupdate + image: nginx:1.19-alpine + resources: + requests: + memory: 16Mi + cpu: 1m`). + When(). + PatchSpec(` +spec: + progressDeadlineSeconds: 10 + template: + spec: + containers: + - name: badupdate + command: [/bad-command]`). + WaitForRolloutStatus("Degraded"). + Then(). + ExpectCanaryStablePodCount(1, 4) +} + +// TestRolloutCrashloopUpdate deploys a rollout which crashes, then updates it to a working image +func (s *FunctionalSuite) TestRolloutCrashloopUpdate() { + s.Given(). + RolloutObjects(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: crashloop +spec: + replicas: 4 + progressDeadlineSeconds: 10 + strategy: + canary: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: crashloop + template: + metadata: + labels: + app: crashloop + spec: + containers: + - name: crashloop + image: nginx:1.19-alpine + command: [/bad-command] + resources: + requests: + memory: 16Mi + cpu: 1m`). + When(). + ApplyManifests(). + WaitForRolloutStatus("Degraded"). + PatchSpec(` +spec: + progressDeadlineSeconds: 60 + template: + spec: + containers: + - name: crashloop + command: null`). + WaitForRolloutStatus("Healthy"). + Then(). + ExpectRevisionPodCount("2", 4). + ExpectRevisionPodCount("1", 0) + +} + +// TestRolloutCrashloopSetWeightUpdate deploys a rollout which crashes, then updates it to a working +// image with a canary weight of 50% +func (s *FunctionalSuite) TestRolloutCrashloopSetWeightUpdate() { + s.Given(). + RolloutObjects(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: crashloop-setweight +spec: + replicas: 4 + progressDeadlineSeconds: 10 + strategy: + canary: + maxSurge: 1 + maxUnavailable: 0 + steps: + - setWeight: 50 + - pause: {} + selector: + matchLabels: + app: crashloop-setweight + template: + metadata: + labels: + app: crashloop-setweight + spec: + containers: + - name: crashloop-setweight + image: nginx:1.19-alpine + command: [/bad-command] + resources: + requests: + memory: 16Mi + cpu: 1m`). + When(). + ApplyManifests(). + WaitForRolloutStatus("Degraded"). + PatchSpec(` +spec: + template: + spec: + containers: + - name: crashloop-setweight + command: null`). + WaitForRolloutStatus("Progressing"). + WaitForRolloutStatus("Degraded"). + Then(). + ExpectCanaryStablePodCount(2, 2) } diff --git a/test/fixtures/common.go b/test/fixtures/common.go index 0fb50cc66c..19032f112c 100644 --- a/test/fixtures/common.go +++ b/test/fixtures/common.go @@ -27,9 +27,6 @@ type Common struct { rollout *rov1.Rollout objects []*unstructured.Unstructured - // podDelay slows down pod startup and shutdown by the value in seconds - // Used humans slow down rollout activity during a test - podDelay int } func (c *Common) CheckError(err error) { diff --git a/test/fixtures/e2e_suite.go b/test/fixtures/e2e_suite.go index e9eccb55d2..a479f41fac 100644 --- a/test/fixtures/e2e_suite.go +++ b/test/fixtures/e2e_suite.go @@ -42,6 +42,7 @@ const ( var ( E2EWaitTimeout time.Duration = time.Second * 60 + E2EPodDelay = 0 // All e2e tests will be labeled with this instance-id (unless E2E_INSTANCE_ID="") E2ELabelValueInstanceID = "argo-rollouts-e2e" @@ -70,6 +71,13 @@ func init() { } E2EWaitTimeout = time.Duration(timeout) * time.Second } + if e2ePodDelay, ok := os.LookupEnv(EnvVarE2EPodDelay); ok { + delay, err := strconv.Atoi(e2ePodDelay) + if err != nil { + panic(fmt.Sprintf("Invalid pod delay value: %s", e2ePodDelay)) + } + E2EPodDelay = delay + } } type E2ESuite struct { @@ -103,12 +111,6 @@ func (s *E2ESuite) SetupSuite() { _ = flag.Set("v", strconv.Itoa(7)) flag.Parse() } - - if delayStr := os.Getenv(EnvVarE2EPodDelay); delayStr != "" { - delay, err := strconv.Atoi(delayStr) - s.CheckError(err) - s.podDelay = delay - } } func (s *E2ESuite) TearDownSuite() { @@ -124,6 +126,9 @@ func (s *E2ESuite) BeforeTest(suiteName, testName string) { } func (s *E2ESuite) AfterTest(_, _ string) { + if s.Common.t.Failed() && s.rollout != nil { + s.PrintRollout(s.rollout) + } } func (s *E2ESuite) DeleteResources() { diff --git a/test/fixtures/given.go b/test/fixtures/given.go index 90808693dc..6515439de0 100644 --- a/test/fixtures/given.go +++ b/test/fixtures/given.go @@ -5,12 +5,12 @@ import ( "strconv" "strings" - rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" - unstructuredutil "github.com/argoproj/argo-rollouts/utils/unstructured" "github.com/ghodss/yaml" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/intstr" + + rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" + unstructuredutil "github.com/argoproj/argo-rollouts/utils/unstructured" ) type Given struct { @@ -51,17 +51,17 @@ func (g *Given) RolloutObjects(text string) *Given { g.CheckError(err) g.log = g.log.WithField("rollout", g.rollout.Name) - // Modify pod termination delay if set - if g.podDelay > 0 { - g.rollout.Spec.Template.Spec.Containers[0].Args = []string{"--termination-delay", strconv.Itoa(g.podDelay)} - g.rollout.Spec.Template.Spec.Containers[0].ReadinessProbe = &corev1.Probe{ - InitialDelaySeconds: int32(g.podDelay), - Handler: corev1.Handler{ - TCPSocket: &corev1.TCPSocketAction{ - Port: intstr.FromInt(8080), - }, + if E2EPodDelay > 0 { + // Add postStart/preStop handlers to slow down readiness/termination + sleepHandler := corev1.Handler{ + Exec: &corev1.ExecAction{ + Command: []string{"sleep", strconv.Itoa(E2EPodDelay)}, }, } + g.rollout.Spec.Template.Spec.Containers[0].Lifecycle = &corev1.Lifecycle{ + PostStart: &sleepHandler, + PreStop: &sleepHandler, + } } } else { // other non-rollout objects diff --git a/test/fixtures/then.go b/test/fixtures/then.go index 981718c010..119a90339a 100644 --- a/test/fixtures/then.go +++ b/test/fixtures/then.go @@ -8,6 +8,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" + "github.com/argoproj/argo-rollouts/utils/annotations" ) type Then struct { @@ -42,27 +43,32 @@ func (t *Then) ExpectPods(expectation string, expectFunc PodExpectation) *Then { return t } -func (t *Then) ExpectStablePodCount(expectedCount int) *Then { - return t.expectPodCountByHash("stable", expectedCount) +func (t *Then) ExpectCanaryStablePodCount(canaryCount, stableCount int) *Then { + ro, err := t.rolloutClient.ArgoprojV1alpha1().Rollouts(t.namespace).Get(t.rollout.Name, metav1.GetOptions{}) + t.CheckError(err) + return t.expectPodCountByHash("canary", ro.Status.CurrentPodHash, canaryCount). + expectPodCountByHash("stable", ro.Status.Canary.StableRS, stableCount) } -func (t *Then) ExpectCanaryPodCount(expectedCount int) *Then { - return t.expectPodCountByHash("canary", expectedCount) +func (t *Then) ExpectRevisionPodCount(revision string, expectedCount int) *Then { + selector, err := metav1.LabelSelectorAsSelector(t.rollout.Spec.Selector) + t.CheckError(err) + replicasets, err := t.kubeClient.AppsV1().ReplicaSets(t.namespace).List(metav1.ListOptions{LabelSelector: selector.String()}) + t.CheckError(err) + for _, rs := range replicasets.Items { + if rs.Annotations[annotations.RevisionAnnotation] == revision { + description := fmt.Sprintf("revision:%s", revision) + hash := rs.Labels[rov1.DefaultRolloutUniqueLabelKey] + return t.expectPodCountByHash(description, hash, expectedCount) + } + } + t.t.Fatalf("Could not find ReplicaSet with revision: %s", revision) + return t } -func (t *Then) expectPodCountByHash(which string, expectedCount int) *Then { - return t.ExpectPods(fmt.Sprintf("%s pod count == %d", which, expectedCount), func(pods *corev1.PodList) bool { - ro, err := t.rolloutClient.ArgoprojV1alpha1().Rollouts(t.namespace).Get(t.rollout.Name, metav1.GetOptions{}) - t.CheckError(err) +func (t *Then) expectPodCountByHash(description, hash string, expectedCount int) *Then { + return t.ExpectPods(fmt.Sprintf("%s pod count == %d", description, expectedCount), func(pods *corev1.PodList) bool { count := 0 - var hash string - if which == "stable" { - hash = ro.Status.Canary.StableRS - } else if which == "canary" { - hash = ro.Status.CurrentPodHash - } else { - t.t.Fatalf("unknown which: %s", which) - } for _, pod := range pods.Items { if pod.DeletionTimestamp != nil { // ignore deleting pods from the count, since it messes with the counts and will @@ -70,13 +76,13 @@ func (t *Then) expectPodCountByHash(which string, expectedCount int) *Then { t.log.Debugf("ignoring deleting pod %s from expected pod count", pod.Name) continue } - if pod.Labels["rollouts-pod-template-hash"] == hash { + if pod.Labels[rov1.DefaultRolloutUniqueLabelKey] == hash { count += 1 } } metExpectation := expectedCount == count if !metExpectation { - t.log.Warnf("unexpected %s pod count: expected %d, saw: %d", which, expectedCount, count) + t.log.Warnf("unexpected %s (hash %s) pod count: expected %d, saw: %d", description, hash, expectedCount, count) } return metExpectation }) diff --git a/test/fixtures/when.go b/test/fixtures/when.go index 435b9c2758..51bad3c98c 100644 --- a/test/fixtures/when.go +++ b/test/fixtures/when.go @@ -8,17 +8,19 @@ import ( "os/exec" "time" + "github.com/ghodss/yaml" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/strategicpatch" rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/cmd/abort" "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/cmd/promote" "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/cmd/restart" "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/cmd/retry" - "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/cmd/set" "github.com/argoproj/argo-rollouts/pkg/kubectl-argo-rollouts/info" ) @@ -55,13 +57,13 @@ func (w *When) ApplyManifests() *When { return w } -func (w *When) UpdateImage(image string) *When { +func (w *When) UpdateSpec() *When { if w.rollout == nil { w.t.Fatal("Rollout not set") } - err := set.SetImage(w.dynamicClient, w.namespace, w.rollout.Name, "*", image) + patchStr := fmt.Sprintf(`{"spec":{"template":{"metadata":{"annotations":{"update":"%s"}}}}}`, time.Now()) + _, err := w.rolloutClient.ArgoprojV1alpha1().Rollouts(w.namespace).Patch(w.rollout.Name, types.MergePatchType, []byte(patchStr)) w.CheckError(err) - w.log.Infof("Updated image to %s", image) return w } @@ -105,6 +107,45 @@ func (w *When) RestartRollout() *When { return w } +func (w *When) ScaleRollout(scale int) *When { + if w.rollout == nil { + w.t.Fatal("Rollout not set") + } + patchStr := fmt.Sprintf(`{"spec":{"replicas":%d}}`, scale) + _, err := w.rolloutClient.ArgoprojV1alpha1().Rollouts(w.namespace).Patch(w.rollout.Name, types.MergePatchType, []byte(patchStr)) + w.CheckError(err) + w.log.Infof("Scaled rollout to %d", scale) + return w +} + +// PatchSpec patches the rollout +func (w *When) PatchSpec(patch string) *When { + if w.rollout == nil { + w.t.Fatal("Rollout not set") + } + // convert YAML patch to JSON patch + var patchObj map[string]interface{} + err := yaml.Unmarshal([]byte(patch), &patchObj) + w.CheckError(err) + jsonPatch, err := json.Marshal(patchObj) + w.CheckError(err) + + // Apply patch + ro, err := w.rolloutClient.ArgoprojV1alpha1().Rollouts(w.namespace).Get(w.rollout.Name, metav1.GetOptions{}) + w.CheckError(err) + originalBytes, err := json.Marshal(ro) + w.CheckError(err) + newRolloutBytes, err := strategicpatch.StrategicMergePatch(originalBytes, jsonPatch, rov1.Rollout{}) + w.CheckError(err) + var newRollout rov1.Rollout + err = json.Unmarshal(newRolloutBytes, &newRollout) + w.CheckError(err) + _, err = w.rolloutClient.ArgoprojV1alpha1().Rollouts(w.namespace).Update(&newRollout) + w.CheckError(err) + w.log.Infof("Patched rollout: %s", string(jsonPatch)) + return w +} + func (w *When) WaitForRolloutStatus(status string) *When { checkStatus := func(ro *rov1.Rollout) bool { if info.RolloutStatusString(ro) == status { @@ -136,6 +177,20 @@ func (w *When) WaitForRolloutCanaryStepIndex(index int32) *When { return w.WaitForRolloutCondition(checkStatus, fmt.Sprintf("status.currentStepIndex=%d", index), E2EWaitTimeout) } +func (w *When) WaitForRolloutAvailableReplicas(count int32) *When { + checkStatus := func(ro *rov1.Rollout) bool { + return ro.Status.AvailableReplicas == count + } + return w.WaitForRolloutCondition(checkStatus, fmt.Sprintf("status.availableReplicas=%d", count), E2EWaitTimeout) +} + +func (w *When) WaitForRolloutReplicas(count int32) *When { + checkStatus := func(ro *rov1.Rollout) bool { + return ro.Status.Replicas == count + } + return w.WaitForRolloutCondition(checkStatus, fmt.Sprintf("status.replicas=%d", count), E2EWaitTimeout) +} + func (w *When) WaitForRolloutCondition(test func(ro *rov1.Rollout) bool, condition string, timeout time.Duration) *When { start := time.Now() w.log.Infof("Waiting for condition: %s", condition) diff --git a/utils/replicaset/canary.go b/utils/replicaset/canary.go index 1b4a78f311..4268eab57d 100644 --- a/utils/replicaset/canary.go +++ b/utils/replicaset/canary.go @@ -160,23 +160,23 @@ func CalculateReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS *appsv1.Re } } - minAvailableReplicaCount := rolloutSpecReplica - MaxUnavailable(rollout) + if GetReplicaCountForReplicaSets(oldRSs) > 0 { + // If any older ReplicaSets exist, we should scale those down first, before even considering + // scaling down the newRS or stableRS + return newRSReplicaCount, stableRSReplicaCount + } - totalAvailableOlderReplicaCount := GetAvailableReplicaCountForReplicaSets(oldRSs) - scaleDownCount := GetReplicasForScaleDown(newRS) + GetReplicasForScaleDown(stableRS) + totalAvailableOlderReplicaCount - minAvailableReplicaCount + minAvailableReplicaCount := rolloutSpecReplica - MaxUnavailable(rollout) + replicasToScaleDown := GetReplicasForScaleDown(newRS, false) + GetReplicasForScaleDown(stableRS, true) - if scaleDownCount <= 0 { + if replicasToScaleDown <= minAvailableReplicaCount { // Cannot scale down stableRS or newRS without going below min available replica count return newRSReplicaCount, stableRSReplicaCount } - if scaleDownCount <= totalAvailableOlderReplicaCount { - //Need to scale down older replicas before scaling down the newRS or stableRS. - return newRSReplicaCount, stableRSReplicaCount - } - scaleDownCount = scaleDownCount - totalAvailableOlderReplicaCount + scaleDownCount := replicasToScaleDown - minAvailableReplicaCount - if newRS != nil && *newRS.Spec.Replicas > desiredNewRSReplicaCount && scaleDownCount > 0 { + if newRS != nil && *newRS.Spec.Replicas > desiredNewRSReplicaCount { // if the controller doesn't have to use every replica to achieve the desired count, it only scales down to the // desired count. if *newRS.Spec.Replicas-scaleDownCount < desiredNewRSReplicaCount { @@ -190,7 +190,7 @@ func CalculateReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS *appsv1.Re } } - if scaleStableRS && *stableRS.Spec.Replicas > desiredStableRSReplicaCount && scaleDownCount > 0 { + if scaleStableRS && *stableRS.Spec.Replicas > desiredStableRSReplicaCount { // This follows the same logic as scaling down the newRS except with the stableRS and it does not need to // set the scaleDownCount again since it's not used again if *stableRS.Spec.Replicas-scaleDownCount < desiredStableRSReplicaCount { @@ -230,7 +230,8 @@ func CheckStableRSExists(newRS, stableRS *appsv1.ReplicaSet) bool { } // GetReplicasForScaleDown returns the number of replicas to consider for scaling down. -func GetReplicasForScaleDown(rs *appsv1.ReplicaSet) int32 { +// isStableRS indicates if the supplied ReplicaSet is the stableRS +func GetReplicasForScaleDown(rs *appsv1.ReplicaSet, isStableRS bool) int32 { if rs == nil { return int32(0) } @@ -242,6 +243,13 @@ func GetReplicasForScaleDown(rs *appsv1.ReplicaSet) int32 { // violate the min available. return *rs.Spec.Replicas } + if isStableRS && rs.Status.AvailableReplicas < *rs.Spec.Replicas { + // The stable ReplicaSet might be scaled up, but its pods may be unavailable. + // In this case we need to return the spec.Replicas so that the controller will still + // consider scaling down this ReplicaSet. Without this, a rollout update could become stuck + // not scaling down the stable, in order to make room for more canaries. + return *rs.Spec.Replicas + } return rs.Status.AvailableReplicas } diff --git a/utils/replicaset/canary_test.go b/utils/replicaset/canary_test.go index f2e2bd372c..0ab1664fb3 100644 --- a/utils/replicaset/canary_test.go +++ b/utils/replicaset/canary_test.go @@ -450,6 +450,40 @@ func TestCalculateReplicaCountsForCanary(t *testing.T) { expectedStableReplicaCount: 10, expectedCanaryReplicaCount: 2, }, + { + // verify we surge canary up correctly when stable RS is not available + name: "honor maxSurge during scale up when stableRS unavailable", + rolloutSpecReplicas: 4, + setWeight: 100, + maxSurge: intstr.FromInt(1), + maxUnavailable: intstr.FromInt(0), + + stableSpecReplica: 4, + stableAvailableReplica: 1, + + canarySpecReplica: 0, + canaryAvailableReplica: 0, + + expectedStableReplicaCount: 4, + expectedCanaryReplicaCount: 1, // should only surge by 1 to honor maxSurge: 1 + }, + { + // verify we scale down stableRS while honoring maxUnavailable even when stableRS unavailable + name: "honor maxUnavailable during scale down stableRS unavailable", + rolloutSpecReplicas: 4, + setWeight: 100, + maxSurge: intstr.FromInt(1), + maxUnavailable: intstr.FromInt(0), + + stableSpecReplica: 4, + stableAvailableReplica: 1, + + canarySpecReplica: 1, + canaryAvailableReplica: 1, + + expectedStableReplicaCount: 3, // should only scale down by 1 to honor maxUnavailable: 0 + expectedCanaryReplicaCount: 1, + }, } for i := range tests { test := tests[i] @@ -458,8 +492,8 @@ func TestCalculateReplicaCountsForCanary(t *testing.T) { stableRS := newRS("stable", test.stableSpecReplica, test.stableAvailableReplica) canaryRS := newRS("canary", test.canarySpecReplica, test.canaryAvailableReplica) newRSReplicaCount, stableRSReplicaCount := CalculateReplicaCountsForCanary(rollout, canaryRS, stableRS, []*appsv1.ReplicaSet{test.olderRS}) - assert.Equal(t, test.expectedCanaryReplicaCount, newRSReplicaCount) - assert.Equal(t, test.expectedStableReplicaCount, stableRSReplicaCount) + assert.Equal(t, test.expectedCanaryReplicaCount, newRSReplicaCount, "check canary replica count") + assert.Equal(t, test.expectedStableReplicaCount, stableRSReplicaCount, "check stable replica count") }) } }