From 7eeb5d217b637fb07ce54b30ff78bd202d57f593 Mon Sep 17 00:00:00 2001 From: "mingzhou.swx" Date: Wed, 23 Nov 2022 14:08:53 +0800 Subject: [PATCH] rewrite batchRelease controller Signed-off-by: mingzhou.swx --- api/v1alpha1/batchrelease_plan_types.go | 53 +- api/v1alpha1/batchrelease_types.go | 6 - .../rollouts.kruise.io_batchreleases.yaml | 10 +- config/default/manager_config_patch.yaml | 1 + go.mod | 2 +- go.sum | 18 +- .../batchrelease/batchrelease_controller.go | 15 +- .../batchrelease_controller_test.go | 190 +++-- .../batchrelease_event_handler.go | 27 + .../batchrelease_event_handler_test.go | 250 ++++++- .../batchrelease/batchrelease_executor.go | 229 ++++++ .../batchrelease_plan_executor.go | 279 -------- .../batchrelease/batchrelease_status.go | 172 +++-- .../batchrelease/batchrelease_util.go | 143 ---- .../batchrelease/context/context.go | 114 +++ .../batchrelease/context/context_test.go | 182 +++++ .../control/canarystyle/control_plane.go | 213 ++++++ .../control/canarystyle/deployment/canary.go | 191 +++++ .../control/canarystyle/deployment/control.go | 107 +++ .../canarystyle/deployment/control_test.go | 334 +++++++++ .../control/canarystyle/deployment/stable.go | 106 +++ .../control/canarystyle/interface.go | 61 ++ .../batchrelease/control/interface.go | 69 ++ .../partitionstyle/cloneset/control.go | 204 ++++++ .../partitionstyle/cloneset/control_test.go | 342 +++++++++ .../control/partitionstyle/control_plane.go | 287 ++++++++ .../control/partitionstyle/interface.go | 49 ++ .../partitionstyle/statefulset/control.go | 225 ++++++ .../statefulset/control_test.go | 663 ++++++++++++++++++ pkg/controller/batchrelease/control/util.go | 106 +++ .../batchrelease/control/util_test.go | 169 +++++ .../batchrelease/labelpatch/filter.go | 149 ++++ .../batchrelease/labelpatch/filter_test.go | 315 +++++++++ .../batchrelease/labelpatch/patcher.go | 112 +++ .../batchrelease/labelpatch/patcher_test.go | 183 +++++ .../workloads/cloneset_control_plane.go | 405 ----------- .../workloads/cloneset_controller.go | 221 ------ .../workloads/cloneset_controller_test.go | 227 ------ .../batchrelease/workloads/commons.go | 285 -------- .../batchrelease/workloads/commons_test.go | 438 ------------ .../workloads/controller_types.go | 93 --- .../deployment_canary_control_plane.go | 351 ---------- .../workloads/deployment_canary_controller.go | 316 --------- .../deployment_canary_controller_test.go | 198 ------ .../workloads/statefulset_like_controller.go | 188 ----- .../workload_rollout_control_plane.go | 407 ----------- .../batchrelease/inner_batchrelease.go | 135 +--- pkg/util/controller_finder.go | 4 +- pkg/util/expectation/resource_expectations.go | 159 +++++ .../expectation/resource_expectations_test.go | 59 ++ pkg/util/parse_utils.go | 61 +- pkg/util/parse_utils_test.go | 32 +- pkg/util/pod_utils.go | 1 + pkg/util/workloads_utils.go | 128 +++- pkg/util/workloads_utils_test.go | 16 + .../rollout_create_update_handler_test.go | 16 + .../mutating/workload_update_handler.go | 6 +- .../mutating/workload_update_handler_test.go | 4 +- test/e2e/batchrelease_test.go | 6 +- test/e2e/rollout_test.go | 6 +- .../rollout/advanced_statefulset.yaml | 2 +- .../test_data/rollout/native_statefulset.yaml | 2 +- 62 files changed, 5385 insertions(+), 3957 deletions(-) create mode 100644 pkg/controller/batchrelease/batchrelease_executor.go delete mode 100644 pkg/controller/batchrelease/batchrelease_plan_executor.go delete mode 100644 pkg/controller/batchrelease/batchrelease_util.go create mode 100644 pkg/controller/batchrelease/context/context.go create mode 100644 pkg/controller/batchrelease/context/context_test.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/control_plane.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/deployment/canary.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/deployment/control.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/deployment/control_test.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/deployment/stable.go create mode 100644 pkg/controller/batchrelease/control/canarystyle/interface.go create mode 100644 pkg/controller/batchrelease/control/interface.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/control_plane.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/interface.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go create mode 100644 pkg/controller/batchrelease/control/util.go create mode 100644 pkg/controller/batchrelease/control/util_test.go create mode 100644 pkg/controller/batchrelease/labelpatch/filter.go create mode 100644 pkg/controller/batchrelease/labelpatch/filter_test.go create mode 100644 pkg/controller/batchrelease/labelpatch/patcher.go create mode 100644 pkg/controller/batchrelease/labelpatch/patcher_test.go delete mode 100644 pkg/controller/batchrelease/workloads/cloneset_control_plane.go delete mode 100644 pkg/controller/batchrelease/workloads/cloneset_controller.go delete mode 100644 pkg/controller/batchrelease/workloads/cloneset_controller_test.go delete mode 100644 pkg/controller/batchrelease/workloads/commons.go delete mode 100644 pkg/controller/batchrelease/workloads/commons_test.go delete mode 100644 pkg/controller/batchrelease/workloads/controller_types.go delete mode 100644 pkg/controller/batchrelease/workloads/deployment_canary_control_plane.go delete mode 100644 pkg/controller/batchrelease/workloads/deployment_canary_controller.go delete mode 100644 pkg/controller/batchrelease/workloads/deployment_canary_controller_test.go delete mode 100644 pkg/controller/batchrelease/workloads/statefulset_like_controller.go delete mode 100644 pkg/controller/batchrelease/workloads/workload_rollout_control_plane.go create mode 100644 pkg/util/expectation/resource_expectations.go create mode 100644 pkg/util/expectation/resource_expectations_test.go diff --git a/api/v1alpha1/batchrelease_plan_types.go b/api/v1alpha1/batchrelease_plan_types.go index f2066947..fd33c903 100644 --- a/api/v1alpha1/batchrelease_plan_types.go +++ b/api/v1alpha1/batchrelease_plan_types.go @@ -21,9 +21,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) -// ReleaseStrategyType defines strategies for pods rollout -type ReleaseStrategyType string - // ReleasePlan fines the details of the release plan type ReleasePlan struct { // Batches is the details on each batch of the ReleasePlan. @@ -37,7 +34,7 @@ type ReleasePlan struct { Batches []ReleaseBatch `json:"batches"` // All pods in the batches up to the batchPartition (included) will have // the target resource specification while the rest still is the stable revision. - // This is designed for the operators to manually rollout + // This is designed for the operators to manually rollout. // Default is nil, which means no partition and will release all batches. // BatchPartition start from 0. // +optional @@ -50,8 +47,22 @@ type ReleasePlan struct { // FailureThreshold. // Defaults to nil. FailureThreshold *intstr.IntOrString `json:"failureThreshold,omitempty"` + // FinalizingPolicy define the behavior of controller when phase enter Finalizing + // Defaults to "Immediate" + FinalizingPolicy FinalizingPolicyType `json:"finalizingPolicy,omitempty"` } +type FinalizingPolicyType string + +const ( + // WaitResumeFinalizingPolicyType will wait workload to be resumed, which means + // controller will be hold at Finalizing phase util all pods of workload is upgraded. + // WaitResumeFinalizingPolicyType only works in canary-style BatchRelease controller. + WaitResumeFinalizingPolicyType FinalizingPolicyType = "WaitResume" + // ImmediateFinalizingPolicyType will not to wait workload to be resumed. + ImmediateFinalizingPolicyType FinalizingPolicyType = "Immediate" +) + // ReleaseBatch is used to describe how each batch release should be type ReleaseBatch struct { // CanaryReplicas is the number of upgraded pods that should have in this batch. @@ -123,38 +134,10 @@ const ( ) const ( - // RolloutPhaseCancelled indicates a rollout is cancelled - RolloutPhaseCancelled RolloutPhase = "Cancelled" + // RolloutPhasePreparing indicates a rollout is preparing for next progress. + RolloutPhasePreparing RolloutPhase = "Preparing" // RolloutPhaseFinalizing indicates a rollout is finalizing RolloutPhaseFinalizing RolloutPhase = "Finalizing" - // RolloutPhaseCompleted indicates a rollout is completed + // RolloutPhaseCompleted indicates a rollout is completed/cancelled/terminated RolloutPhaseCompleted RolloutPhase = "Completed" - // RolloutPhasePreparing indicates a rollout is preparing for next progress. - RolloutPhasePreparing RolloutPhase = "Preparing" -) - -const ( - // VerifyingBatchReleaseCondition indicates the controller is verifying whether workload - // is ready to do rollout. - VerifyingBatchReleaseCondition RolloutConditionType = "Verifying" - // PreparingBatchReleaseCondition indicates the controller is preparing something before executing - // release plan, such as create canary deployment and record stable & canary revisions. - PreparingBatchReleaseCondition RolloutConditionType = "Preparing" - // ProgressingBatchReleaseCondition indicates the controller is executing release plan. - ProgressingBatchReleaseCondition RolloutConditionType = "Progressing" - // FinalizingBatchReleaseCondition indicates the canary state is completed, - // and the controller is doing something, such as cleaning up canary deployment. - FinalizingBatchReleaseCondition RolloutConditionType = "Finalizing" - // TerminatingBatchReleaseCondition indicates the rollout is terminating when the - // BatchRelease cr is being deleted or cancelled. - TerminatingBatchReleaseCondition RolloutConditionType = "Terminating" - // TerminatedBatchReleaseCondition indicates the BatchRelease cr can be deleted. - TerminatedBatchReleaseCondition RolloutConditionType = "Terminated" - // CancelledBatchReleaseCondition indicates the release plan is cancelled during rollout. - CancelledBatchReleaseCondition RolloutConditionType = "Cancelled" - // CompletedBatchReleaseCondition indicates the release plan is completed successfully. - CompletedBatchReleaseCondition RolloutConditionType = "Completed" - - SucceededBatchReleaseConditionReason = "Succeeded" - FailedBatchReleaseConditionReason = "Failed" ) diff --git a/api/v1alpha1/batchrelease_types.go b/api/v1alpha1/batchrelease_types.go index 9981b8d9..bcbcd86e 100644 --- a/api/v1alpha1/batchrelease_types.go +++ b/api/v1alpha1/batchrelease_types.go @@ -44,14 +44,8 @@ type BatchReleaseSpec struct { TargetRef ObjectRef `json:"targetReference"` // ReleasePlan is the details on how to rollout the resources ReleasePlan ReleasePlan `json:"releasePlan"` - // Paused the rollout, the release progress will be paused util paused is false. - // default is false - // +optional - Paused bool `json:"paused,omitempty"` } -type DeploymentReleaseStrategyType string - // BatchReleaseList contains a list of BatchRelease // +kubebuilder:object:root=true type BatchReleaseList struct { diff --git a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml index 610a241a..61752ad3 100644 --- a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml +++ b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml @@ -52,10 +52,6 @@ spec: description: BatchReleaseSpec defines how to describe an update between different compRevision properties: - paused: - description: Paused the rollout, the release progress will be paused - util paused is false. default is false - type: boolean releasePlan: description: ReleasePlan is the details on how to rollout the resources properties: @@ -63,7 +59,7 @@ spec: description: All pods in the batches up to the batchPartition (included) will have the target resource specification while the rest still is the stable revision. This is designed for - the operators to manually rollout Default is nil, which means + the operators to manually rollout. Default is nil, which means no partition and will release all batches. BatchPartition start from 0. format: int32 @@ -102,6 +98,10 @@ spec: is nil, Rollout will use the MaxUnavailable of workload as its FailureThreshold. Defaults to nil. x-kubernetes-int-or-string: true + finalizingPolicy: + description: FinalizingPolicy define the behavior of controller + when phase enter Finalizing Defaults to "Immediate" + type: string rolloutID: description: RolloutID indicates an id for each rollout progress type: string diff --git a/config/default/manager_config_patch.yaml b/config/default/manager_config_patch.yaml index 6c400155..f1262b37 100644 --- a/config/default/manager_config_patch.yaml +++ b/config/default/manager_config_patch.yaml @@ -8,6 +8,7 @@ spec: spec: containers: - name: manager + imagePullPolicy: Always args: - "--config=controller_manager_config.yaml" volumeMounts: diff --git a/go.mod b/go.mod index b7850131..7464ee60 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/evanphx/json-patch v4.11.0+incompatible github.com/onsi/ginkgo v1.16.5 github.com/onsi/gomega v1.17.0 - github.com/openkruise/kruise-api v1.0.0 + github.com/openkruise/kruise-api v1.3.0 github.com/spf13/pflag v1.0.5 github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 gopkg.in/yaml.v2 v2.4.0 diff --git a/go.sum b/go.sum index 039758a4..eace13ab 100644 --- a/go.sum +++ b/go.sum @@ -43,17 +43,14 @@ github.com/Azure/go-ansiterm v0.0.0-20210608223527-2377c96fe795/go.mod h1:LmzpDX github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= github.com/Azure/go-autorest/autorest v0.11.12/go.mod h1:eipySxLmqSyC5s5k1CLupqet0PSENBEDP93LQ9a8QYw= github.com/Azure/go-autorest/autorest v0.11.18 h1:90Y4srNYrwOtAgVo3ndrQkTYn6kf1Eg/AjTFJ8Is2aM= github.com/Azure/go-autorest/autorest v0.11.18/go.mod h1:dSiJPy22c3u0OtOKDNttNgqpNFY/GeWa7GH/Pz56QRA= -github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= github.com/Azure/go-autorest/autorest/adal v0.9.13 h1:Mp5hbtOePIzM8pJVRa3YLrWWmZtoxRXqUEzCfJt3+/Q= github.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M= github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= -github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/autorest/mocks v0.4.1 h1:K0laFcLE6VLTOwNgSxaGbUcLPuGXlNkbVvq4cW4nIHk= github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= @@ -127,7 +124,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= -github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -155,7 +151,6 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= -github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -402,8 +397,8 @@ github.com/onsi/gomega v1.14.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+t github.com/onsi/gomega v1.15.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+tEHG0= github.com/onsi/gomega v1.17.0 h1:9Luw4uT5HTjHTN8+aNcSThgH1vdXnmdJ8xIfZ4wyTRE= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= -github.com/openkruise/kruise-api v1.0.0 h1:ScA0LxRRNBsgbcyLhTzR9B+KpGNWsIMptzzmjTqfYQo= -github.com/openkruise/kruise-api v1.0.0/go.mod h1:kxV/UA/vrf/hz3z+kL21c0NOawC6K1ZjaKcJFgiOwsE= +github.com/openkruise/kruise-api v1.3.0 h1:yfEy64uXgSuX/5RwePLbwUK/uX8RRM8fHJkccel5ZIQ= +github.com/openkruise/kruise-api v1.3.0/go.mod h1:9ZX+ycdHKNzcA5ezAf35xOa2Mwfa2BYagWr0lKgi5dU= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= @@ -718,7 +713,6 @@ golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -756,7 +750,6 @@ golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac h1:7zkz7BUtwNFFqcowJ+RIgu2MaV/MapERkDIy+mwPyjs= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -976,7 +969,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.20.10/go.mod h1:0kei3F6biGjtRQBo5dUeujq6Ji3UCh9aOSfp/THYd7I= k8s.io/api v0.21.3/go.mod h1:hUgeYHUbBp23Ue4qdX9tR8/ANi/g3ehylAqDn9NWVOg= k8s.io/api v0.22.1/go.mod h1:bh13rkTp3F1XEaLGykbyRD2QaTTzPm0e/BMd8ptFONY= k8s.io/api v0.22.2/go.mod h1:y3ydYpLJAaDI+BbSe2xmGcqxiWHmWjkEeIbiwHvnPR8= @@ -986,7 +978,6 @@ k8s.io/apiextensions-apiserver v0.21.3/go.mod h1:kl6dap3Gd45+21Jnh6utCx8Z2xxLm8L k8s.io/apiextensions-apiserver v0.22.2/go.mod h1:2E0Ve/isxNl7tWLSUDgi6+cmwHi5fQRdwGVCxbC+KFA= k8s.io/apiextensions-apiserver v0.22.6 h1:TH+9+EGtoVzzbrlfSDnObzFTnyXKqw1NBfT5XFATeJI= k8s.io/apiextensions-apiserver v0.22.6/go.mod h1:wNsLwy8mfIkGThiv4Qq/Hy4qRazViKXqmH5pfYiRKyY= -k8s.io/apimachinery v0.20.10/go.mod h1:kQa//VOAwyVwJ2+L9kOREbsnryfsGSkSM1przND4+mw= k8s.io/apimachinery v0.21.3/go.mod h1:H/IM+5vH9kZRNJ4l3x/fXP/5bOPJaVP/guptnZPeCFI= k8s.io/apimachinery v0.22.1/go.mod h1:O3oNtNadZdeOMxHFVxOreoznohCpy0z6mocxbZr7oJ0= k8s.io/apimachinery v0.22.2/go.mod h1:O3oNtNadZdeOMxHFVxOreoznohCpy0z6mocxbZr7oJ0= @@ -995,13 +986,11 @@ k8s.io/apimachinery v0.22.6/go.mod h1:ZvVLP5iLhwVFg2Yx9Gh5W0um0DUauExbRhe+2Z8I1E k8s.io/apiserver v0.21.3/go.mod h1:eDPWlZG6/cCCMj/JBcEpDoK+I+6i3r9GsChYBHSbAzU= k8s.io/apiserver v0.22.2/go.mod h1:vrpMmbyjWrgdyOvZTSpsusQq5iigKNWv9o9KlDAbBHI= k8s.io/apiserver v0.22.6/go.mod h1:OlL1rGa2kKWGj2JEXnwBcul/BwC9Twe95gm4ohtiIIs= -k8s.io/client-go v0.20.10/go.mod h1:fFg+aLoasv/R+xiVaWjxeqGFYltzgQcOQzkFaSRfnJ0= k8s.io/client-go v0.21.3/go.mod h1:+VPhCgTsaFmGILxR/7E1N0S+ryO010QBeNCv5JwRGYU= k8s.io/client-go v0.22.1/go.mod h1:BquC5A4UOo4qVDUtoc04/+Nxp1MeHcVc1HJm1KmG8kk= k8s.io/client-go v0.22.2/go.mod h1:sAlhrkVDf50ZHx6z4K0S40wISNTarf1r800F+RlCF6U= k8s.io/client-go v0.22.6 h1:ugAXeC312xeGXsn7zTRz+btgtLBnW3qYhtUUpVQL7YE= k8s.io/client-go v0.22.6/go.mod h1:TffU4AV2idZGeP+g3kdFZP+oHVHWPL1JYFySOALriw0= -k8s.io/code-generator v0.20.10/go.mod h1:i6FmG+QxaLxvJsezvZp0q/gAEzzOz3U53KFibghWToU= k8s.io/code-generator v0.21.3/go.mod h1:K3y0Bv9Cz2cOW2vXUrNZlFbflhuPvuadW6JdnN6gGKo= k8s.io/code-generator v0.22.0/go.mod h1:eV77Y09IopzeXOJzndrDyCI88UBok2h6WxAlBwpxa+o= k8s.io/code-generator v0.22.2/go.mod h1:eV77Y09IopzeXOJzndrDyCI88UBok2h6WxAlBwpxa+o= @@ -1011,19 +1000,16 @@ k8s.io/component-base v0.22.2/go.mod h1:5Br2QhI9OTe79p+TzPe9JKNQYvEKbq9rTJDWllun k8s.io/component-base v0.22.6 h1:YgGMDVnr97rhn0eljuYIU/9XFyz8JVDM30slMYrDgPc= k8s.io/component-base v0.22.6/go.mod h1:ngHLefY4J5fq2fApNdbWyj4yh0lvw36do4aAjNN8rc8= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= -k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/gengo v0.0.0-20201203183100-97869a43a9d9/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/gengo v0.0.0-20201214224949-b6c5ce23f027/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/klog v0.2.0 h1:0ElL0OHzF3N+OhoJTL0uca20SxtYt4X4+bzHeqrB83c= k8s.io/klog v0.2.0/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.8.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/klog/v2 v2.9.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/klog/v2 v2.10.0 h1:R2HDMDJsHVTHA2n4RjwbeYXdOcBymXdX/JRb1v0VGhE= k8s.io/klog/v2 v2.10.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= -k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAGcJo0Tvi+dK12EcqSLqcWsryKMpfM= k8s.io/kube-openapi v0.0.0-20210305001622-591a79e4bda7/go.mod h1:wXW5VT87nVfh/iLV8FpR2uDvrFyomxbtb1KivDbvPTE= k8s.io/kube-openapi v0.0.0-20210421082810-95288971da7e/go.mod h1:vHXdDvt9+2spS2Rx9ql3I8tycm3H9FDfdUoIuKCefvw= k8s.io/kube-openapi v0.0.0-20211109043538-20434351676c h1:jvamsI1tn9V0S8jicyX82qaFC0H/NKxv2e5mbqsgR80= diff --git a/pkg/controller/batchrelease/batchrelease_controller.go b/pkg/controller/batchrelease/batchrelease_controller.go index 9da2012b..35bde005 100644 --- a/pkg/controller/batchrelease/batchrelease_controller.go +++ b/pkg/controller/batchrelease/batchrelease_controller.go @@ -29,6 +29,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" @@ -186,11 +187,13 @@ func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request return reconcile.Result{}, err } + errList := field.ErrorList{} + // executor start to execute the batch release plan. startTimestamp := time.Now() result, currentStatus, err := r.executor.Do(release) if err != nil { - return reconcile.Result{}, err + errList = append(errList, field.InternalError(field.NewPath("do-release"), err)) } defer func() { @@ -202,9 +205,14 @@ func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request "reconcile-result ", result, "time-cost", time.Since(startTimestamp)) }() - return result, r.updateStatus(release, currentStatus) + err = r.updateStatus(release, currentStatus) + if err != nil { + errList = append(errList, field.InternalError(field.NewPath("update-status"), err)) + } + return result, errList.ToAggregate() } +// updateStatus update BatchRelease status to newStatus func (r *BatchReleaseReconciler) updateStatus(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) error { var err error defer func() { @@ -235,6 +243,7 @@ func (r *BatchReleaseReconciler) updateStatus(release *v1alpha1.BatchRelease, ne return err } +// handleFinalizer will remove finalizer in finalized phase and add finalizer in the other phases. func (r *BatchReleaseReconciler) handleFinalizer(release *v1alpha1.BatchRelease) (bool, error) { var err error defer func() { @@ -245,7 +254,7 @@ func (r *BatchReleaseReconciler) handleFinalizer(release *v1alpha1.BatchRelease) // remove the release finalizer if it needs if !release.DeletionTimestamp.IsZero() && - HasTerminatingCondition(release.Status) && + release.Status.Phase == v1alpha1.RolloutPhaseCompleted && controllerutil.ContainsFinalizer(release, ReleaseFinalizer) { err = util.UpdateFinalizer(r.Client, release, util.RemoveFinalizerOpType, ReleaseFinalizer) if client.IgnoreNotFound(err) != nil { diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index 7b292257..e6045562 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -68,6 +68,7 @@ var ( }, }, ReleasePlan: v1alpha1.ReleasePlan{ + BatchPartition: pointer.Int32(0), Batches: []v1alpha1.ReleaseBatch{ { CanaryReplicas: intstr.FromString("10%"), @@ -147,6 +148,7 @@ var ( }, }, ReleasePlan: v1alpha1.ReleasePlan{ + BatchPartition: pointer.Int32Ptr(0), Batches: []v1alpha1.ReleaseBatch{ { CanaryReplicas: intstr.FromString("10%"), @@ -233,47 +235,6 @@ func TestReconcile_CloneSet(t *testing.T) { ExpectedPhase v1alpha1.RolloutPhase ExpectedState v1alpha1.BatchReleaseBatchStateType }{ - // Following cases of Linear Transaction on State Machine - { - Name: "IfNeedProgress=false, Input-Phase=Initial, Output-Phase=Healthy", - GetRelease: func() client.Object { - return setPhase(releaseClone, v1alpha1.RolloutPhaseInitial) - }, - GetCloneSet: func() []client.Object { - clone := stableClone.DeepCopy() - clone.Annotations = nil - return []client.Object{ - clone, - } - }, - ExpectedPhase: v1alpha1.RolloutPhaseHealthy, - }, - { - Name: "IfNeedProgress=false, Input-Phase=Healthy, Output-Phase=Healthy", - GetRelease: func() client.Object { - return setPhase(releaseClone, v1alpha1.RolloutPhaseHealthy) - }, - GetCloneSet: func() []client.Object { - return []client.Object{ - stableClone.DeepCopy(), - } - }, - ExpectedPhase: v1alpha1.RolloutPhaseHealthy, - }, - { - Name: "IfNeedProgress=true, Input-Phase=Healthy, Output-Phase=Preparing", - GetRelease: func() client.Object { - return setPhase(releaseClone, v1alpha1.RolloutPhaseHealthy) - }, - GetCloneSet: func() []client.Object { - stable := getStableWithReady(stableClone, "v2") - canary := getCanaryWithStage(stable, "v2", -1, true) - return []client.Object{ - canary, - } - }, - ExpectedPhase: v1alpha1.RolloutPhasePreparing, - }, { Name: "Preparing, Input-Phase=Preparing, Output-Phase=Progressing", GetRelease: func() client.Object { @@ -349,6 +310,8 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 return release }, GetCloneSet: func() []client.Object { @@ -372,6 +335,9 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + release.Spec.ReleasePlan.BatchPartition = pointer.Int32(1) return release }, GetCloneSet: func() []client.Object { @@ -397,6 +363,8 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 return release }, GetCloneSet: func() []client.Object { @@ -421,6 +389,8 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 return release }, GetCloneSet: func() []client.Object { @@ -435,7 +405,7 @@ func TestReconcile_CloneSet(t *testing.T) { ExpectedState: v1alpha1.UpgradingBatchState, }, { - Name: `Special Case: RollBack, Input-Phase=Progressing, Output-Phase=Abort`, + Name: `Special Case: RollBack, Input-Phase=Progressing, Output-Phase=Progressing`, GetRelease: func() client.Object { release := setState(releaseClone, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -446,6 +416,8 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 return release }, GetCloneSet: func() []client.Object { @@ -459,11 +431,11 @@ func TestReconcile_CloneSet(t *testing.T) { canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseFinalizing, + ExpectedPhase: v1alpha1.RolloutPhaseProgressing, ExpectedState: v1alpha1.ReadyBatchState, }, { - Name: `Special Case: Deletion, Input-Phase=Progressing, Output-Phase=Terminating`, + Name: `Special Case: Deletion, Input-Phase=Progressing, Output-Phase=Finalizing`, GetRelease: func() client.Object { release := setState(releaseClone, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -476,6 +448,8 @@ func TestReconcile_CloneSet(t *testing.T) { release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) release.DeletionTimestamp = &metav1.Time{Time: time.Now()} release.Finalizers = append(release.Finalizers, ReleaseFinalizer) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 return release }, GetCloneSet: func() []client.Object { @@ -485,11 +459,11 @@ func TestReconcile_CloneSet(t *testing.T) { canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseTerminating, + ExpectedPhase: v1alpha1.RolloutPhaseFinalizing, ExpectedState: v1alpha1.ReadyBatchState, }, { - Name: `Special Case: Continuous Release, Input-Phase=Progressing, Output-Phase=Initial`, + Name: `Special Case: Continuous Release, Input-Phase=Progressing, Output-Phase=Progressing`, GetRelease: func() client.Object { release := setState(releaseClone, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -500,6 +474,10 @@ func TestReconcile_CloneSet(t *testing.T) { canaryTemplate.Spec.Containers = containers("v2") release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + release.Spec.ReleasePlan.BatchPartition = pointer.Int32Ptr(1) + release.Status.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) return release }, GetCloneSet: func() []client.Object { @@ -515,13 +493,41 @@ func TestReconcile_CloneSet(t *testing.T) { canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseInitial, + ExpectedPhase: v1alpha1.RolloutPhaseProgressing, + ExpectedState: v1alpha1.ReadyBatchState, + }, + { + Name: `Special Case: BatchPartition=nil, Input-Phase=Progressing, Output-Phase=Finalizing`, + GetRelease: func() client.Object { + release := setState(releaseClone, v1alpha1.ReadyBatchState) + now := metav1.Now() + release.Status.CanaryStatus.BatchReadyTime = &now + stableTemplate := stableClone.Spec.Template.DeepCopy() + canaryTemplate := stableClone.Spec.Template.DeepCopy() + stableTemplate.Spec.Containers = containers("v1") + canaryTemplate.Spec.Containers = containers("v2") + release.Status.StableRevision = util.ComputeHash(stableTemplate, nil) + release.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) + release.Finalizers = append(release.Finalizers, ReleaseFinalizer) + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + release.Spec.ReleasePlan.BatchPartition = nil + return release + }, + GetCloneSet: func() []client.Object { + stable := getStableWithReady(stableClone, "v2") + canary := getCanaryWithStage(stable, "v2", 0, true) + return []client.Object{ + canary, + } + }, + ExpectedPhase: v1alpha1.RolloutPhaseFinalizing, + ExpectedState: v1alpha1.ReadyBatchState, }, } for _, cs := range cases { t.Run(cs.Name, func(t *testing.T) { - defer GinkgoRecover() release := cs.GetRelease() clonesets := cs.GetCloneSet() rec := record.NewFakeRecorder(100) @@ -530,6 +536,7 @@ func TestReconcile_CloneSet(t *testing.T) { Client: cli, recorder: rec, Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), } key := client.ObjectKeyFromObject(release) @@ -561,31 +568,7 @@ func TestReconcile_Deployment(t *testing.T) { }{ // Following cases of Linear Transaction on State Machine { - Name: "IfNeedProgress=false, Input-Phase=Initial, Output-Phase=Healthy", - GetRelease: func() client.Object { - return setPhase(releaseDeploy, v1alpha1.RolloutPhaseInitial) - }, - GetDeployments: func() []client.Object { - return []client.Object{ - stableDeploy.DeepCopy(), - } - }, - ExpectedPhase: v1alpha1.RolloutPhaseHealthy, - }, - { - Name: "IfNeedProgress=false, Input-Phase=Healthy, Output-Phase=Healthy", - GetRelease: func() client.Object { - return setPhase(releaseDeploy, v1alpha1.RolloutPhaseHealthy) - }, - GetDeployments: func() []client.Object { - return []client.Object{ - stableDeploy.DeepCopy(), - } - }, - ExpectedPhase: v1alpha1.RolloutPhaseHealthy, - }, - { - Name: "IfNeedProgress=true, Input-Phase=Healthy, Output-Phase=Preparing", + Name: "IfNeedProgress=true, Input-Phase=Healthy, Output-Phase=Progressing", GetRelease: func() client.Object { return setPhase(releaseDeploy, v1alpha1.RolloutPhaseHealthy) }, @@ -596,7 +579,7 @@ func TestReconcile_Deployment(t *testing.T) { stable, canary, } }, - ExpectedPhase: v1alpha1.RolloutPhasePreparing, + ExpectedPhase: v1alpha1.RolloutPhaseProgressing, }, { Name: "Preparing, Input-Phase=Preparing, Output-Phase=Progressing", @@ -628,24 +611,30 @@ func TestReconcile_Deployment(t *testing.T) { ExpectedState: v1alpha1.VerifyingBatchState, }, { - Name: "Progressing, stage=0, Input-State=Upgrade, Output-State=Verify", + Name: "Progressing, stage=0, Input-State=Verify, Output-State=Upgrade", GetRelease: func() client.Object { - return setState(releaseDeploy, v1alpha1.UpgradingBatchState) + release := releaseDeploy.DeepCopy() + release.Status.CanaryStatus.UpdatedReplicas = 5 + release.Status.CanaryStatus.UpdatedReadyReplicas = 5 + return setState(release, v1alpha1.VerifyingBatchState) }, GetDeployments: func() []client.Object { stable := getStableWithReady(stableDeploy, "v2") - canary := getCanaryWithStage(stable, "v2", -1, true) + canary := getCanaryWithStage(stable, "v2", 0, false) return []client.Object{ stable, canary, } }, ExpectedPhase: v1alpha1.RolloutPhaseProgressing, - ExpectedState: v1alpha1.VerifyingBatchState, + ExpectedState: v1alpha1.UpgradingBatchState, }, { Name: "Progressing, stage=0, Input-State=Verify, Output-State=BatchReady", GetRelease: func() client.Object { - return setState(releaseDeploy, v1alpha1.VerifyingBatchState) + release := releaseDeploy.DeepCopy() + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + return setState(release, v1alpha1.VerifyingBatchState) }, GetDeployments: func() []client.Object { stable := getStableWithReady(stableDeploy, "v2") @@ -660,9 +649,11 @@ func TestReconcile_Deployment(t *testing.T) { { Name: "Progressing, stage=0->1, Input-State=BatchReady, Output-State=Upgrade", GetRelease: func() client.Object { - release := setState(releaseDeploy, v1alpha1.ReadyBatchState) - release.Status.CanaryStatus.BatchReadyTime = getOldTime() - return release + release := releaseDeploy.DeepCopy() + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + release.Spec.ReleasePlan.BatchPartition = pointer.Int32Ptr(1) + return setState(release, v1alpha1.ReadyBatchState) }, GetDeployments: func() []client.Object { stable := getStableWithReady(stableDeploy, "v2") @@ -678,9 +669,10 @@ func TestReconcile_Deployment(t *testing.T) { { Name: "Progressing, stage=0->1, Input-State=BatchReady, Output-State=BatchReady", GetRelease: func() client.Object { - release := setState(releaseDeploy, v1alpha1.ReadyBatchState) - now := metav1.Now() - release.Status.CanaryStatus.BatchReadyTime = &now + release := releaseDeploy.DeepCopy() + release.Status.CanaryStatus.UpdatedReplicas = 10 + release.Status.CanaryStatus.UpdatedReadyReplicas = 10 + release = setState(release, v1alpha1.ReadyBatchState) return release }, GetDeployments: func() []client.Object { @@ -713,7 +705,7 @@ func TestReconcile_Deployment(t *testing.T) { ExpectedState: v1alpha1.UpgradingBatchState, }, { - Name: `Special Case: RollBack, Input-Phase=Progressing, Output-Phase=Abort`, + Name: `Special Case: RollBack, Input-Phase=Progressing, Output-Phase=Progressing`, GetRelease: func() client.Object { release := setState(releaseDeploy, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -733,11 +725,11 @@ func TestReconcile_Deployment(t *testing.T) { stable, canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseFinalizing, + ExpectedPhase: v1alpha1.RolloutPhaseProgressing, ExpectedState: v1alpha1.ReadyBatchState, }, { - Name: `Special Case: Deletion, Input-Phase=Progressing, Output-Phase=Terminating`, + Name: `Special Case: Deletion, Input-Phase=Progressing, Output-Phase=Finalizing`, GetRelease: func() client.Object { release := setState(releaseDeploy, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -759,11 +751,11 @@ func TestReconcile_Deployment(t *testing.T) { stable, canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseTerminating, + ExpectedPhase: v1alpha1.RolloutPhaseFinalizing, ExpectedState: v1alpha1.ReadyBatchState, }, { - Name: `Special Case: Continuous Release, Input-Phase=Progressing, Output-Phase=Initial`, + Name: `Special Case: Continuous Release, Input-Phase=Progressing, Output-Phase=Progressing`, GetRelease: func() client.Object { release := setState(releaseDeploy, v1alpha1.ReadyBatchState) now := metav1.Now() @@ -783,13 +775,13 @@ func TestReconcile_Deployment(t *testing.T) { stable, canary, } }, - ExpectedPhase: v1alpha1.RolloutPhaseInitial, + ExpectedState: v1alpha1.ReadyBatchState, + ExpectedPhase: v1alpha1.RolloutPhaseProgressing, }, } for _, cs := range cases { t.Run(cs.Name, func(t *testing.T) { - defer GinkgoRecover() release := cs.GetRelease() deployments := cs.GetDeployments() rec := record.NewFakeRecorder(100) @@ -808,16 +800,16 @@ func TestReconcile_Deployment(t *testing.T) { Client: cli, recorder: rec, Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), } key := client.ObjectKeyFromObject(release) request := reconcile.Request{NamespacedName: key} - result, err := reconciler.Reconcile(context.TODO(), request) - Expect(err).NotTo(HaveOccurred()) + result, _ := reconciler.Reconcile(context.TODO(), request) Expect(result.RequeueAfter).Should(BeNumerically(">=", int64(0))) newRelease := v1alpha1.BatchRelease{} - err = cli.Get(context.TODO(), key, &newRelease) + err := cli.Get(context.TODO(), key, &newRelease) Expect(err).NotTo(HaveOccurred()) Expect(newRelease.Status.Phase).Should(Equal(cs.ExpectedPhase)) Expect(newRelease.Status.CanaryStatus.CurrentBatch).Should(Equal(cs.ExpectedBatch)) @@ -838,12 +830,8 @@ func containers(version string) []corev1.Container { func setPhase(release *v1alpha1.BatchRelease, phase v1alpha1.RolloutPhase) *v1alpha1.BatchRelease { r := release.DeepCopy() r.Status.Phase = phase - switch phase { - case v1alpha1.RolloutPhaseInitial, v1alpha1.RolloutPhaseHealthy: - default: - r.Status.ObservedWorkloadReplicas = 100 - r.Status.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) - } + r.Status.ObservedWorkloadReplicas = 100 + r.Status.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) return r } diff --git a/pkg/controller/batchrelease/batchrelease_event_handler.go b/pkg/controller/batchrelease/batchrelease_event_handler.go index 73124371..fd09fd38 100644 --- a/pkg/controller/batchrelease/batchrelease_event_handler.go +++ b/pkg/controller/batchrelease/batchrelease_event_handler.go @@ -26,6 +26,7 @@ import ( "github.com/openkruise/rollouts/api/v1alpha1" "github.com/openkruise/rollouts/pkg/util" utilclient "github.com/openkruise/rollouts/pkg/util/client" + expectations "github.com/openkruise/rollouts/pkg/util/expectation" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -61,10 +62,13 @@ func (p podEventHandler) Create(evt event.CreateEvent, q workqueue.RateLimitingI } p.enqueue(pod, q) } + func (p podEventHandler) Generic(evt event.GenericEvent, q workqueue.RateLimitingInterface) { } + func (p podEventHandler) Delete(evt event.DeleteEvent, q workqueue.RateLimitingInterface) { } + func (p podEventHandler) Update(evt event.UpdateEvent, q workqueue.RateLimitingInterface) { oldPod, oldOK := evt.ObjectOld.(*corev1.Pod) newPod, newOK := evt.ObjectNew.(*corev1.Pod) @@ -118,6 +122,7 @@ type workloadEventHandler struct { } func (w workloadEventHandler) Create(evt event.CreateEvent, q workqueue.RateLimitingInterface) { + expectationObserved(evt.Object) w.handleWorkload(q, evt.Object, CreateEventAction) } @@ -140,6 +145,7 @@ func (w workloadEventHandler) Update(evt event.UpdateEvent, q workqueue.RateLimi oldObject := evt.ObjectNew newObject := evt.ObjectOld + expectationObserved(newObject) if newObject.GetResourceVersion() == oldObject.GetResourceVersion() { return } @@ -244,3 +250,24 @@ func getBatchRelease(c client.Reader, workloadNamespaceName types.NamespacedName return } + +func expectationObserved(object client.Object) { + controllerKey := getControllerKey(object) + if controllerKey != nil { + klog.V(3).Infof("observed %v, remove from expectation %s: %s", + klog.KObj(object), *controllerKey, string(object.GetUID())) + expectations.ResourceExpectations.Observe(*controllerKey, expectations.Create, string(object.GetUID())) + } +} + +func getControllerKey(object client.Object) *string { + owner := metav1.GetControllerOfNoCopy(object) + if owner == nil { + return nil + } + if owner.APIVersion == v1alpha1.GroupVersion.String() { + key := types.NamespacedName{Namespace: object.GetNamespace(), Name: owner.Name}.String() + return &key + } + return nil +} diff --git a/pkg/controller/batchrelease/batchrelease_event_handler_test.go b/pkg/controller/batchrelease/batchrelease_event_handler_test.go index d654c53a..3e81e86e 100644 --- a/pkg/controller/batchrelease/batchrelease_event_handler_test.go +++ b/pkg/controller/batchrelease/batchrelease_event_handler_test.go @@ -26,7 +26,9 @@ import ( "github.com/openkruise/rollouts/api/v1alpha1" "github.com/openkruise/rollouts/pkg/util" apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/util/workqueue" "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,7 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/event" ) -func TestEventHandler_Update(t *testing.T) { +func TestWorkloadEventHandler_Update(t *testing.T) { RegisterFailHandler(Fail) cases := []struct { @@ -141,7 +143,7 @@ func TestEventHandler_Update(t *testing.T) { } } -func TestEventHandler_Create(t *testing.T) { +func TestWorkloadEventHandler_Create(t *testing.T) { RegisterFailHandler(Fail) cases := []struct { @@ -197,7 +199,7 @@ func TestEventHandler_Create(t *testing.T) { } } -func TestEventHandler_Delete(t *testing.T) { +func TestWorkloadEventHandler_Delete(t *testing.T) { RegisterFailHandler(Fail) cases := []struct { @@ -252,3 +254,245 @@ func TestEventHandler_Delete(t *testing.T) { }) } } + +func TestPodEventHandler_Update(t *testing.T) { + RegisterFailHandler(Fail) + + cases := []struct { + Name string + GetOldPod func() client.Object + GetNewPod func() client.Object + GetWorkload func() client.Object + ExpectedQueueLen int + }{ + { + Name: "CloneSet Pod NotReady -> Ready", + GetOldPod: func() client.Object { + return generatePod(false, true, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, true, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 1, + }, + { + Name: "CloneSet Pod Ready -> Ready", + GetOldPod: func() client.Object { + return generatePod(true, true, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, true, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "Orphan Pod NotReady -> Ready", + GetOldPod: func() client.Object { + return generatePod(false, false, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "Orphan Pod Ready -> Ready", + GetOldPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "Free CloneSet Pod NotReady -> Ready", + GetOldPod: func() client.Object { + return generatePod(false, false, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "Free CloneSet Pod Ready -> Ready", + GetOldPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, false, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "CloneSet Pod V1 -> V2", + GetOldPod: func() client.Object { + return generatePod(true, true, "version-1") + }, + GetNewPod: func() client.Object { + return generatePod(true, true, "version-2") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 1, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + oldObject := cs.GetOldPod() + newObject := cs.GetNewPod() + workload := cs.GetWorkload() + newSJk := scheme + fmt.Println(newSJk) + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(releaseDeploy.DeepCopy(), workload).Build() + handler := podEventHandler{Reader: cli} + updateQ := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) + updateEvt := event.UpdateEvent{ + ObjectOld: oldObject, + ObjectNew: newObject, + } + handler.Update(updateEvt, updateQ) + Expect(updateQ.Len()).Should(Equal(cs.ExpectedQueueLen)) + }) + } +} + +func TestPodEventHandler_Create(t *testing.T) { + RegisterFailHandler(Fail) + + cases := []struct { + Name string + GetNewPod func() client.Object + GetWorkload func() client.Object + ExpectedQueueLen int + }{ + { + Name: "CloneSet Pod", + GetNewPod: func() client.Object { + return generatePod(false, true, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 1, + }, + { + Name: "Orphan Pod", + GetNewPod: func() client.Object { + return generatePod(false, false, "version-1") + }, + GetWorkload: func() client.Object { + clone := stableClone.DeepCopy() + owner, _ := json.Marshal(metav1.NewControllerRef(releaseClone, releaseClone.GetObjectKind().GroupVersionKind())) + clone.Annotations = map[string]string{ + util.BatchReleaseControlAnnotation: string(owner), + } + return clone + }, + ExpectedQueueLen: 0, + }, + { + Name: "Free CloneSet Pod", + GetNewPod: func() client.Object { + return generatePod(false, true, "version-1") + }, + GetWorkload: func() client.Object { + return stableClone.DeepCopy() + }, + ExpectedQueueLen: 0, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + newObject := cs.GetNewPod() + workload := cs.GetWorkload() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(releaseDeploy.DeepCopy(), workload).Build() + handler := podEventHandler{Reader: cli} + createQ := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) + createEvt := event.CreateEvent{ + Object: newObject, + } + handler.Create(createEvt, createQ) + Expect(createQ.Len()).Should(Equal(cs.ExpectedQueueLen)) + }) + } +} + +func generatePod(ready, owned bool, version string) *corev1.Pod { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: stableClone.Namespace, + ResourceVersion: string(uuid.NewUUID()), + Labels: map[string]string{ + apps.ControllerRevisionHashLabelKey: version, + }, + }, + } + if ready { + pod.Status.Phase = corev1.PodRunning + pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{ + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }) + } + if owned { + pod.OwnerReferences = append(pod.OwnerReferences, + *metav1.NewControllerRef(stableClone, stableClone.GroupVersionKind())) + } + return pod +} diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go new file mode 100644 index 00000000..c66d73a6 --- /dev/null +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -0,0 +1,229 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package batchrelease + +import ( + "fmt" + "reflect" + "time" + + appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/canarystyle" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/canarystyle/deployment" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/cloneset" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/statefulset" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +const ( + DefaultDuration = 2 * time.Second +) + +// Executor is the controller that controls the release plan resource +type Executor struct { + client client.Client + recorder record.EventRecorder +} + +// NewReleasePlanExecutor creates a RolloutPlanController +func NewReleasePlanExecutor(cli client.Client, recorder record.EventRecorder) *Executor { + return &Executor{ + client: cli, + recorder: recorder, + } +} + +// Do execute the release plan +func (r *Executor) Do(release *v1alpha1.BatchRelease) (reconcile.Result, *v1alpha1.BatchReleaseStatus, error) { + klog.InfoS("Starting one round of reconciling release plan", + "BatchRelease", client.ObjectKeyFromObject(release), + "phase", release.Status.Phase, + "current-batch", release.Status.CanaryStatus.CurrentBatch, + "current-batch-state", release.Status.CanaryStatus.CurrentBatchState) + + newStatus := getInitializedStatus(&release.Status) + workloadController, err := r.getReleaseController(release, newStatus) + if err != nil || workloadController == nil { + return reconcile.Result{}, nil, nil + } + + stop, result, err := r.syncStatusBeforeExecuting(release, newStatus, workloadController) + if stop || err != nil { + return result, newStatus, err + } + + return r.executeBatchReleasePlan(release, newStatus, workloadController) +} + +func (r *Executor) executeBatchReleasePlan(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, workloadController control.Interface) (reconcile.Result, *v1alpha1.BatchReleaseStatus, error) { + var err error + result := reconcile.Result{} + + klog.V(3).Infof("BatchRelease(%v) State Machine into '%s' state", klog.KObj(release), newStatus.Phase) + + switch newStatus.Phase { + default: + // for compatibility. if it is an unknown phase, should start from beginning. + newStatus.Phase = v1alpha1.RolloutPhasePreparing + fallthrough + + case v1alpha1.RolloutPhasePreparing: + // prepare and initialize something before progressing in this state. + err = workloadController.Initialize() + switch { + case err == nil: + newStatus.Phase = v1alpha1.RolloutPhaseProgressing + result = reconcile.Result{RequeueAfter: DefaultDuration} + } + + case v1alpha1.RolloutPhaseProgressing: + // progress the release plan in this state. + result, err = r.progressBatches(release, newStatus, workloadController) + + case v1alpha1.RolloutPhaseFinalizing: + err = workloadController.Finalize() + switch { + case err == nil: + newStatus.Phase = v1alpha1.RolloutPhaseCompleted + } + + case v1alpha1.RolloutPhaseCompleted: + // this state indicates that the plan is executed/cancelled successfully, should do nothing in these states. + } + + return result, newStatus, err +} + +// reconcile logic when we are in the middle of release, we have to go through finalizing state before succeed or fail +func (r *Executor) progressBatches(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, workloadController control.Interface) (reconcile.Result, error) { + var err error + result := reconcile.Result{} + + klog.V(3).Infof("BatchRelease(%v) Canary Batch State Machine into '%s' state", klog.KObj(release), newStatus.CanaryStatus.CurrentBatchState) + + switch newStatus.CanaryStatus.CurrentBatchState { + default: + // for compatibility. if it is an unknown state, should start from beginning. + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState + fallthrough + + case v1alpha1.UpgradingBatchState: + // modify workload replicas/partition based on release plan in this state. + err = workloadController.UpgradeBatch() + switch { + case err == nil: + result = reconcile.Result{RequeueAfter: DefaultDuration} + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.VerifyingBatchState + } + + case v1alpha1.VerifyingBatchState: + // replicas/partition has been modified, should wait pod ready in this state. + err = workloadController.CheckBatchReady() + switch { + case err != nil: + // should go to upgrade state to do again to avoid dead wait. + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState + default: + now := metav1.Now() + newStatus.CanaryStatus.BatchReadyTime = &now + result = reconcile.Result{RequeueAfter: DefaultDuration} + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.ReadyBatchState + } + + case v1alpha1.ReadyBatchState: + // replicas/partition may be modified even though ready, should recheck in this state. + err = workloadController.CheckBatchReady() + switch { + case err != nil: + // if the batch ready condition changed due to some reasons, just recalculate the current batch. + newStatus.CanaryStatus.BatchReadyTime = nil + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState + case !isPartitioned(release): + r.moveToNextBatch(release, newStatus) + result = reconcile.Result{RequeueAfter: DefaultDuration} + } + } + + return result, err +} + +// GetWorkloadController pick the right workload controller to work on the workload +func (r *Executor) getReleaseController(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) (control.Interface, error) { + targetRef := release.Spec.TargetRef.WorkloadRef + if targetRef == nil { + return nil, nil + } + + gvk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind) + if !util.IsSupportedWorkload(gvk) { + message := fmt.Sprintf("the workload type '%v' is not supported", gvk) + r.recorder.Event(release, v1.EventTypeWarning, "UnsupportedWorkload", message) + return nil, fmt.Errorf(message) + } + + targetKey := types.NamespacedName{ + Namespace: release.Namespace, + Name: targetRef.Name, + } + + switch targetRef.APIVersion { + case appsv1alpha1.GroupVersion.String(): + if targetRef.Kind == reflect.TypeOf(appsv1alpha1.CloneSet{}).Name() { + klog.InfoS("Using CloneSet batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + } + + case apps.SchemeGroupVersion.String(): + if targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { + klog.InfoS("Using Deployment batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + return canarystyle.NewControlPlane(deployment.NewController, r.client, r.recorder, release, newStatus, targetKey), nil + } + } + + // try to use StatefulSet-like rollout controller by default + klog.InfoS("Using StatefulSet-like batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + return partitionstyle.NewControlPlane(statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil +} + +func (r *Executor) moveToNextBatch(release *v1alpha1.BatchRelease, status *v1alpha1.BatchReleaseStatus) { + currentBatch := int(status.CanaryStatus.CurrentBatch) + if currentBatch >= len(release.Spec.ReleasePlan.Batches)-1 { + klog.V(3).Infof("BatchRelease(%v) finished all batch, release current batch: %v", klog.KObj(release), status.CanaryStatus.CurrentBatch) + } + if release.Spec.ReleasePlan.BatchPartition == nil || *release.Spec.ReleasePlan.BatchPartition > status.CanaryStatus.CurrentBatch { + status.CanaryStatus.CurrentBatch++ + } + status.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState + klog.V(3).Infof("BatchRelease(%v) finished one batch, release current batch: %v", klog.KObj(release), status.CanaryStatus.CurrentBatch) +} + +func isPartitioned(release *v1alpha1.BatchRelease) bool { + return release.Spec.ReleasePlan.BatchPartition != nil && + *release.Spec.ReleasePlan.BatchPartition <= release.Status.CanaryStatus.CurrentBatch +} diff --git a/pkg/controller/batchrelease/batchrelease_plan_executor.go b/pkg/controller/batchrelease/batchrelease_plan_executor.go deleted file mode 100644 index 490be99e..00000000 --- a/pkg/controller/batchrelease/batchrelease_plan_executor.go +++ /dev/null @@ -1,279 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package batchrelease - -import ( - "fmt" - "reflect" - "time" - - appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/controller/batchrelease/workloads" - "github.com/openkruise/rollouts/pkg/util" - apps "k8s.io/api/apps/v1" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/reconcile" -) - -const ( - DefaultDuration = 2 * time.Second -) - -// Executor is the controller that controls the release plan resource -type Executor struct { - client client.Client - recorder record.EventRecorder -} - -// NewReleasePlanExecutor creates a RolloutPlanController -func NewReleasePlanExecutor(cli client.Client, recorder record.EventRecorder) *Executor { - return &Executor{ - client: cli, - recorder: recorder, - } -} - -// Do execute the release plan -func (r *Executor) Do(release *v1alpha1.BatchRelease) (reconcile.Result, *v1alpha1.BatchReleaseStatus, error) { - klog.InfoS("Starting one round of reconciling release plan", - "BatchRelease", client.ObjectKeyFromObject(release), - "phase", release.Status.Phase, - "current-batch", release.Status.CanaryStatus.CurrentBatch, - "current-batch-state", release.Status.CanaryStatus.CurrentBatchState) - - newStatus := getInitializedStatus(&release.Status) - workloadController, err := r.getWorkloadController(release, newStatus) - if err != nil || workloadController == nil { - return reconcile.Result{}, nil, nil - } - - stop, result, err := r.syncStatusBeforeExecuting(release, newStatus, workloadController) - if stop || err != nil { - return result, newStatus, err - } - - return r.executeBatchReleasePlan(release, newStatus, workloadController) -} - -func (r *Executor) executeBatchReleasePlan(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, workloadController workloads.WorkloadController) (reconcile.Result, *v1alpha1.BatchReleaseStatus, error) { - var err error - result := reconcile.Result{} - - klog.V(3).Infof("BatchRelease(%v) State Machine into '%s' state", klog.KObj(release), newStatus.Phase) - - switch newStatus.Phase { - case v1alpha1.RolloutPhaseInitial: - // if this batchRelease was created but workload doest not exist, - // should keep this phase and do nothing util workload is created. - - case v1alpha1.RolloutPhaseHealthy: - // verify whether the workload is ready to execute the release plan in this state. - var verifiedDone bool - verifiedDone, err = workloadController.VerifyWorkload() - switch { - case err != nil: - setCondition(newStatus, v1alpha1.VerifyingBatchReleaseCondition, v1.ConditionFalse, v1alpha1.FailedBatchReleaseConditionReason, err.Error()) - case verifiedDone: - newStatus.Phase = v1alpha1.RolloutPhasePreparing - result = reconcile.Result{RequeueAfter: DefaultDuration} - setCondition(newStatus, v1alpha1.PreparingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is preparing for progress") - } - - case v1alpha1.RolloutPhasePreparing: - // prepare and initialize something before progressing in this state. - var preparedDone bool - var replicasNoNeedToRollback *int32 - preparedDone, replicasNoNeedToRollback, err = workloadController.PrepareBeforeProgress() - switch { - case err != nil: - setCondition(newStatus, v1alpha1.PreparingBatchReleaseCondition, v1.ConditionFalse, v1alpha1.FailedBatchReleaseConditionReason, err.Error()) - case preparedDone: - newStatus.Phase = v1alpha1.RolloutPhaseProgressing - result = reconcile.Result{RequeueAfter: DefaultDuration} - newStatus.CanaryStatus.NoNeedUpdateReplicas = replicasNoNeedToRollback - setCondition(newStatus, v1alpha1.ProgressingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is progressing") - default: - result = reconcile.Result{RequeueAfter: DefaultDuration} - } - - case v1alpha1.RolloutPhaseProgressing: - // progress the release plan in this state. - var progressDone bool - progressDone, result, err = r.progressBatches(release, newStatus, workloadController) - switch { - case progressDone: - newStatus.Phase = v1alpha1.RolloutPhaseFinalizing - setCondition(newStatus, v1alpha1.FinalizingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is finalizing") - } - - case v1alpha1.RolloutPhaseFinalizing: - // finalize canary the resources when progressing done. - // Do not clean the canary resources, because rollout - // controller should set the traffic routing first. - var finalizedDone bool - finalizedDone, err = workloadController.FinalizeProgress(false) - switch { - case err != nil: - setCondition(newStatus, v1alpha1.CompletedBatchReleaseCondition, v1.ConditionFalse, v1alpha1.FailedBatchReleaseConditionReason, err.Error()) - case finalizedDone: - if IsAllBatchReady(release) { - newStatus.Phase = v1alpha1.RolloutPhaseCompleted - setCondition(newStatus, v1alpha1.CompletedBatchReleaseCondition, v1.ConditionTrue, v1alpha1.SucceededBatchReleaseConditionReason, "BatchRelease is completed") - } else { - newStatus.Phase = v1alpha1.RolloutPhaseCancelled - setCondition(newStatus, v1alpha1.CancelledBatchReleaseCondition, v1.ConditionTrue, v1alpha1.SucceededBatchReleaseConditionReason, "BatchRelease is cancelled") - } - default: - result = reconcile.Result{RequeueAfter: DefaultDuration} - } - - case v1alpha1.RolloutPhaseTerminating: - var finalizedDone bool - finalizedDone, err = workloadController.FinalizeProgress(true) - switch { - case err != nil: - setCondition(newStatus, v1alpha1.CompletedBatchReleaseCondition, v1.ConditionFalse, v1alpha1.FailedBatchReleaseConditionReason, err.Error()) - case finalizedDone: - setCondition(newStatus, v1alpha1.TerminatedBatchReleaseCondition, v1.ConditionTrue, v1alpha1.SucceededBatchReleaseConditionReason, "BatchRelease is terminated") - default: - result = reconcile.Result{RequeueAfter: DefaultDuration} - } - - case v1alpha1.RolloutPhaseCompleted, v1alpha1.RolloutPhaseCancelled: - // this state indicates that the plan is executed/cancelled successfully, should do nothing in these states. - - default: - klog.V(3).Infof("BatchRelease(%v) State Machine into %s state", klog.KObj(release), "Unknown") - panic(fmt.Sprintf("illegal release status %+v", newStatus)) - } - - return result, newStatus, err -} - -// reconcile logic when we are in the middle of release, we have to go through finalizing state before succeed or fail -func (r *Executor) progressBatches(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, workloadController workloads.WorkloadController) (bool, reconcile.Result, error) { - var err error - progressDone := false - result := reconcile.Result{} - - klog.V(3).Infof("BatchRelease(%v) Canary Batch State Machine into '%s' state", klog.KObj(release), newStatus.CanaryStatus.CurrentBatchState) - - switch newStatus.CanaryStatus.CurrentBatchState { - case "", v1alpha1.UpgradingBatchState: - // modify workload replicas/partition based on release plan in this state. - upgradeDone, upgradeErr := workloadController.UpgradeOneBatch() - switch { - case upgradeErr != nil: - err = upgradeErr - setCondition(newStatus, "Progressing", v1.ConditionFalse, "UpgradeBatchFailed", err.Error()) - case upgradeDone: - result = reconcile.Result{RequeueAfter: DefaultDuration} - newStatus.CanaryStatus.CurrentBatchState = v1alpha1.VerifyingBatchState - } - - case v1alpha1.VerifyingBatchState: - // replicas/partition has been modified, should wait pod ready in this state. - verified, verifiedErr := workloadController.CheckOneBatchReady() - switch { - case verifiedErr != nil: - err = verifiedErr - setCondition(newStatus, "Progressing", v1.ConditionFalse, "VerifyBatchFailed", err.Error()) - case verified: - now := metav1.Now() - newStatus.CanaryStatus.BatchReadyTime = &now - result = reconcile.Result{RequeueAfter: DefaultDuration} - newStatus.CanaryStatus.CurrentBatchState = v1alpha1.ReadyBatchState - default: - newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState - } - - case v1alpha1.ReadyBatchState: - if !IsPartitioned(release) { - // expected pods in the batch are upgraded and the state is ready, then try to move to the next batch - progressDone = r.moveToNextBatch(release, newStatus) - result = reconcile.Result{RequeueAfter: DefaultDuration} - setCondition(newStatus, v1alpha1.ProgressingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is progressing") - } else { - setCondition(newStatus, "Progressing", v1.ConditionFalse, "Paused", fmt.Sprintf("BatchRelease is partitioned in %v-th batch", newStatus.CanaryStatus.CurrentBatch)) - } - - default: - klog.V(3).Infof("ReleasePlan(%v) Batch State Machine into %s state", "Unknown") - panic(fmt.Sprintf("illegal status %+v", newStatus)) - } - - return progressDone, result, err -} - -// GetWorkloadController pick the right workload controller to work on the workload -func (r *Executor) getWorkloadController(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) (workloads.WorkloadController, error) { - targetRef := release.Spec.TargetRef.WorkloadRef - if targetRef == nil { - return nil, nil - } - - gvk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind) - if !util.IsSupportedWorkload(gvk) { - message := fmt.Sprintf("the workload type '%v' is not supported", gvk) - r.recorder.Event(release, v1.EventTypeWarning, "UnsupportedWorkload", message) - return nil, fmt.Errorf(message) - } - - targetKey := types.NamespacedName{ - Namespace: release.Namespace, - Name: targetRef.Name, - } - - switch targetRef.APIVersion { - case appsv1alpha1.GroupVersion.String(): - if targetRef.Kind == reflect.TypeOf(appsv1alpha1.CloneSet{}).Name() { - klog.InfoS("using cloneset batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return workloads.NewCloneSetRolloutController(r.client, r.recorder, release, newStatus, targetKey), nil - } - - case apps.SchemeGroupVersion.String(): - if targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { - klog.InfoS("using deployment batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return workloads.NewDeploymentRolloutController(r.client, r.recorder, release, newStatus, targetKey), nil - } - } - - klog.InfoS("using statefulset-like batch release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return workloads.NewUnifiedWorkloadRolloutControlPlane(workloads.NewStatefulSetLikeController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil -} - -func (r *Executor) moveToNextBatch(release *v1alpha1.BatchRelease, status *v1alpha1.BatchReleaseStatus) bool { - currentBatch := int(status.CanaryStatus.CurrentBatch) - if currentBatch >= len(release.Spec.ReleasePlan.Batches)-1 { - klog.V(3).Infof("BatchRelease(%v) finished all batch, release current batch: %v", klog.KObj(release), status.CanaryStatus.CurrentBatch) - return true - } else { - if release.Spec.ReleasePlan.BatchPartition == nil || *release.Spec.ReleasePlan.BatchPartition > status.CanaryStatus.CurrentBatch { - status.CanaryStatus.CurrentBatch++ - } - status.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState - klog.V(3).Infof("BatchRelease(%v) finished one batch, release current batch: %v", klog.KObj(release), status.CanaryStatus.CurrentBatch) - return false - } -} diff --git a/pkg/controller/batchrelease/batchrelease_status.go b/pkg/controller/batchrelease/batchrelease_status.go index 7c8ad451..cc7a5281 100644 --- a/pkg/controller/batchrelease/batchrelease_status.go +++ b/pkg/controller/batchrelease/batchrelease_status.go @@ -20,24 +20,23 @@ import ( "reflect" "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/controller/batchrelease/workloads" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" "github.com/openkruise/rollouts/pkg/util" - v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/klog/v2" + "k8s.io/utils/integer" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, controller workloads.WorkloadController) (bool, reconcile.Result, error) { +func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, controller control.Interface) (bool, reconcile.Result, error) { var err error - var reason string var message string var needRetry bool needStopThisRound := false result := reconcile.Result{} // sync the workload info and watch the workload change event - workloadEvent, workloadInfo, err := controller.SyncWorkloadInfo() + workloadEvent, workloadInfo, err := controller.SyncWorkloadInformation() // Note: must keep the order of the following cases: switch { @@ -45,40 +44,35 @@ func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, new SPECIAL CASES ABOUT THE BATCH RELEASE PLAN *************************************************************************/ //The following special cases are about the **batch release plan**, include: - // (1). Plan is deleted or cancelled - // (2). Plan is paused during rollout + // (1). Plan has been terminated + // (2). Plan is deleted or cancelled // (3). Plan is changed during rollout // (4). Plan status is unexpected/unhealthy - case isPlanTerminating(release): - // handle the case that the plan is deleted or is terminating - reason = "PlanTerminating" - message = "Release plan is deleted or cancelled, then terminate" - signalTerminating(newStatus) - - case isPlanPaused(workloadEvent, release): - // handle the case that releasePlan.paused = true - reason = "PlanPaused" - message = "release plan is paused, then stop reconcile" + case isPlanCompleted(release): + message = "release plan has been terminated, will do nothing" needStopThisRound = true + case isPlanFinalizing(release): + // handle the case that the plan is deleted or is terminating + message = "release plan is deleted or cancelled, then finalize" + signalFinalizing(newStatus) + case isPlanChanged(release): // handle the case that release plan is changed during progressing - reason = "PlanChanged" message = "release plan is changed, then recalculate status" signalRecalculate(release, newStatus) case isPlanUnhealthy(release): // handle the case that release status is chaos which may lead to panic - reason = "PlanStatusUnhealthy" message = "release plan is unhealthy, then restart" - needStopThisRound = true + signalRestartAll(newStatus) /************************************************************************** SPECIAL CASES ABOUT THE WORKLOAD *************************************************************************/ // The following special cases are about the **workload**, include: // (1). Get workload info err - // (2). Workload was deleted + // (2). Workload is deleted // (3). Workload is created // (4). Workload scale when rollout // (5). Workload rollback when rollout @@ -87,57 +81,39 @@ func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, new // (8). Workload is at unstable state, its workload info is untrustworthy case isGetWorkloadInfoError(err): // handle the case of IgnoreNotFound(err) != nil - reason = "GetWorkloadError" message = err.Error() case isWorkloadGone(workloadEvent, release): // handle the case that the workload is deleted - reason = "WorkloadGone" - message = "target workload has gone, then terminate" - signalTerminating(newStatus) - - case isWorkloadLocated(err, release): - // handle the case that workload is newly created - reason = "WorkloadLocated" - message = "workload is located, then start" - signalLocated(newStatus) + message = "target workload has gone, then finalize" + signalFinalizing(newStatus) case isWorkloadScaling(workloadEvent, release): // handle the case that workload is scaling during progressing - reason = "ReplicasChanged" message = "workload is scaling, then reinitialize batch status" - signalReinitializeBatch(newStatus) + signalRestartBatch(newStatus) // we must ensure that this field is updated only when we have observed // the workload scaling event, otherwise this event may be lost. - newStatus.ObservedWorkloadReplicas = *workloadInfo.Replicas + newStatus.ObservedWorkloadReplicas = workloadInfo.Replicas case isWorkloadRevisionChanged(workloadEvent, release): // handle the case of continuous release - reason = "TargetRevisionChanged" message = "workload revision was changed, then abort" - signalFinalize(newStatus) - - case isWorkloadUnhealthy(workloadEvent, release): - // handle the case that workload is unhealthy, and rollout plan cannot go on - reason = "WorkloadUnHealthy" - message = "workload is UnHealthy, then stop" + newStatus.UpdateRevision = workloadInfo.Status.UpdateRevision needStopThisRound = true case isWorkloadUnstable(workloadEvent, release): // handle the case that workload.Generation != workload.Status.ObservedGeneration - reason = "WorkloadNotStable" message = "workload status is not stable, then wait" needStopThisRound = true case isWorkloadRollbackInBatch(workloadEvent, release): // handle the case of rollback in batches if isRollbackInBatchSatisfied(workloadInfo, release) { - reason = "RollbackInBatch" message = "workload is rollback in batch" signalRePrepareRollback(newStatus) newStatus.UpdateRevision = workloadInfo.Status.UpdateRevision } else { - reason = "Rollback" message = "workload is preparing rollback, wait condition to be satisfied" needStopThisRound = true } @@ -145,7 +121,6 @@ func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, new // log the special event info if len(message) > 0 { - r.recorder.Eventf(release, v1.EventTypeWarning, reason, message) klog.Warningf("Special case occurred in BatchRelease(%v), message: %v", klog.KObj(release), message) } @@ -173,18 +148,23 @@ func (r *Executor) syncStatusBeforeExecuting(release *v1alpha1.BatchRelease, new func refreshStatus(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, workloadInfo *util.WorkloadInfo) { // refresh workload info for status if workloadInfo != nil { - if workloadInfo.Status != nil { - newStatus.CanaryStatus.UpdatedReplicas = workloadInfo.Status.UpdatedReplicas - newStatus.CanaryStatus.UpdatedReadyReplicas = workloadInfo.Status.UpdatedReadyReplicas - } + newStatus.CanaryStatus.UpdatedReplicas = workloadInfo.Status.UpdatedReplicas + newStatus.CanaryStatus.UpdatedReadyReplicas = workloadInfo.Status.UpdatedReadyReplicas } if len(newStatus.ObservedReleasePlanHash) == 0 { newStatus.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) } } -func isPlanTerminating(release *v1alpha1.BatchRelease) bool { - return release.DeletionTimestamp != nil || release.Status.Phase == v1alpha1.RolloutPhaseTerminating +func isPlanFinalizing(release *v1alpha1.BatchRelease) bool { + if release.DeletionTimestamp != nil || release.Status.Phase == v1alpha1.RolloutPhaseFinalizing { + return true + } + return release.Spec.ReleasePlan.BatchPartition == nil +} + +func isPlanCompleted(release *v1alpha1.BatchRelease) bool { + return release.Status.Phase == v1alpha1.RolloutPhaseCompleted } func isPlanChanged(release *v1alpha1.BatchRelease) bool { @@ -195,46 +175,90 @@ func isPlanUnhealthy(release *v1alpha1.BatchRelease) bool { return int(release.Status.CanaryStatus.CurrentBatch) >= len(release.Spec.ReleasePlan.Batches) && release.Status.Phase == v1alpha1.RolloutPhaseProgressing } -func isPlanPaused(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return release.Spec.Paused && release.Status.Phase == v1alpha1.RolloutPhaseProgressing && !isWorkloadGone(event, release) -} - func isGetWorkloadInfoError(err error) bool { return err != nil && !errors.IsNotFound(err) } -func isWorkloadLocated(err error, release *v1alpha1.BatchRelease) bool { - return err == nil && (release.Status.Phase == v1alpha1.RolloutPhaseInitial || release.Status.Phase == "") +func isWorkloadGone(event control.WorkloadEventType, release *v1alpha1.BatchRelease) bool { + return event == control.WorkloadHasGone && release.Status.Phase != v1alpha1.RolloutPhaseInitial && release.Status.Phase != "" } -func isWorkloadGone(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return event == workloads.WorkloadHasGone && release.Status.Phase != v1alpha1.RolloutPhaseInitial && release.Status.Phase != "" +func isWorkloadScaling(event control.WorkloadEventType, release *v1alpha1.BatchRelease) bool { + return event == control.WorkloadReplicasChanged && release.Status.Phase == v1alpha1.RolloutPhaseProgressing } -func isWorkloadScaling(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return event == workloads.WorkloadReplicasChanged && release.Status.Phase == v1alpha1.RolloutPhaseProgressing +func isWorkloadRevisionChanged(event control.WorkloadEventType, release *v1alpha1.BatchRelease) bool { + return event == control.WorkloadPodTemplateChanged && release.Status.Phase == v1alpha1.RolloutPhaseProgressing } -func isWorkloadRevisionChanged(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return event == workloads.WorkloadPodTemplateChanged && release.Status.Phase == v1alpha1.RolloutPhaseProgressing +func isWorkloadRollbackInBatch(event control.WorkloadEventType, release *v1alpha1.BatchRelease) bool { + return (event == control.WorkloadRollbackInBatch || release.Annotations[util.RollbackInBatchAnnotation] != "") && + release.Status.CanaryStatus.NoNeedUpdateReplicas == nil && release.Status.Phase == v1alpha1.RolloutPhaseProgressing } -func isWorkloadRollbackInBatch(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return (event == workloads.WorkloadRollbackInBatch || release.Annotations[util.RollbackInBatchAnnotation] != "") && - release.Status.CanaryStatus.NoNeedUpdateReplicas == nil && release.Status.Phase == v1alpha1.RolloutPhaseProgressing +func isWorkloadUnstable(event control.WorkloadEventType, _ *v1alpha1.BatchRelease) bool { + return event == control.WorkloadStillReconciling +} + +func isRollbackInBatchSatisfied(workloadInfo *util.WorkloadInfo, release *v1alpha1.BatchRelease) bool { + return workloadInfo.Status.StableRevision == workloadInfo.Status.UpdateRevision && release.Annotations[util.RollbackInBatchAnnotation] != "" } -func isWorkloadUnhealthy(event workloads.WorkloadEventType, release *v1alpha1.BatchRelease) bool { - return event == workloads.WorkloadUnHealthy && release.Status.Phase == v1alpha1.RolloutPhaseProgressing +func signalRePrepareRollback(newStatus *v1alpha1.BatchReleaseStatus) { + newStatus.Phase = v1alpha1.RolloutPhasePreparing + newStatus.CanaryStatus.BatchReadyTime = nil + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState } -func isWorkloadUnstable(event workloads.WorkloadEventType, _ *v1alpha1.BatchRelease) bool { - return event == workloads.WorkloadStillReconciling +func signalRestartBatch(status *v1alpha1.BatchReleaseStatus) { + status.CanaryStatus.BatchReadyTime = nil + status.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState } -func isRollbackInBatchSatisfied(workloadInfo *util.WorkloadInfo, release *v1alpha1.BatchRelease) bool { - if workloadInfo.Status == nil { - return false +func signalRestartAll(status *v1alpha1.BatchReleaseStatus) { + emptyStatus := v1alpha1.BatchReleaseStatus{} + resetStatus(&emptyStatus) + *status = emptyStatus +} + +func signalFinalizing(status *v1alpha1.BatchReleaseStatus) { + status.Phase = v1alpha1.RolloutPhaseFinalizing +} + +func signalRecalculate(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) { + // When BatchRelease plan was changed, rollout controller will update this batchRelease cr, + // and rollout controller will set BatchPartition as its expected current batch index. + currentBatch := int32(0) + // if rollout-id is not changed, just use batchPartition; + // if rollout-id is changed, we should patch pod batch id from batch 0. + observedRolloutID := release.Status.ObservedRolloutID + if release.Spec.ReleasePlan.BatchPartition != nil && release.Spec.ReleasePlan.RolloutID == observedRolloutID { + // ensure current batch upper bound + currentBatch = integer.Int32Min(*release.Spec.ReleasePlan.BatchPartition, int32(len(release.Spec.ReleasePlan.Batches)-1)) } - return workloadInfo.Status.StableRevision == workloadInfo.Status.UpdateRevision && release.Annotations[util.RollbackInBatchAnnotation] != "" + + klog.Infof("BatchRelease(%v) canary batch changed from %v to %v when the release plan changed, observed-rollout-id: %s, current-rollout-id: %s", + client.ObjectKeyFromObject(release), newStatus.CanaryStatus.CurrentBatch, currentBatch, observedRolloutID, release.Spec.ReleasePlan.RolloutID) + newStatus.CanaryStatus.BatchReadyTime = nil + newStatus.CanaryStatus.CurrentBatch = currentBatch + newStatus.ObservedRolloutID = release.Spec.ReleasePlan.RolloutID + newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState + newStatus.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) +} + +func getInitializedStatus(status *v1alpha1.BatchReleaseStatus) *v1alpha1.BatchReleaseStatus { + newStatus := status.DeepCopy() + if len(status.Phase) == 0 { + resetStatus(newStatus) + } + return newStatus +} + +func resetStatus(status *v1alpha1.BatchReleaseStatus) { + status.Phase = v1alpha1.RolloutPhasePreparing + status.StableRevision = "" + status.UpdateRevision = "" + status.ObservedReleasePlanHash = "" + status.ObservedWorkloadReplicas = -1 + status.CanaryStatus = v1alpha1.BatchReleaseCanaryStatus{} } diff --git a/pkg/controller/batchrelease/batchrelease_util.go b/pkg/controller/batchrelease/batchrelease_util.go deleted file mode 100644 index 5759cc99..00000000 --- a/pkg/controller/batchrelease/batchrelease_util.go +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package batchrelease - -import ( - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/klog/v2" - "k8s.io/utils/integer" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func HasTerminatingCondition(status v1alpha1.BatchReleaseStatus) bool { - for i := range status.Conditions { - c := status.Conditions[i] - if c.Type == v1alpha1.TerminatedBatchReleaseCondition && c.Status == v1.ConditionTrue { - return true - } - } - return false -} - -func getInitializedStatus(status *v1alpha1.BatchReleaseStatus) *v1alpha1.BatchReleaseStatus { - newStatus := status.DeepCopy() - if len(status.Phase) == 0 { - resetStatus(newStatus) - } - return newStatus -} - -func signalRePrepareRollback(newStatus *v1alpha1.BatchReleaseStatus) { - newStatus.Phase = v1alpha1.RolloutPhasePreparing - newStatus.CanaryStatus.BatchReadyTime = nil - newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState -} - -func signalReinitializeBatch(status *v1alpha1.BatchReleaseStatus) { - status.CanaryStatus.BatchReadyTime = nil - status.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState -} - -func signalLocated(status *v1alpha1.BatchReleaseStatus) { - status.Phase = v1alpha1.RolloutPhaseHealthy - setCondition(status, v1alpha1.VerifyingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is verifying the workload") -} - -func signalTerminating(status *v1alpha1.BatchReleaseStatus) { - status.Phase = v1alpha1.RolloutPhaseTerminating - setCondition(status, v1alpha1.TerminatingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is terminating") -} - -func signalFinalize(status *v1alpha1.BatchReleaseStatus) { - status.Phase = v1alpha1.RolloutPhaseFinalizing - setCondition(status, v1alpha1.FinalizingBatchReleaseCondition, v1.ConditionTrue, "", "BatchRelease is finalizing") -} - -func signalRecalculate(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) { - // When BatchRelease plan was changed, rollout controller will update this batchRelease cr, - // and rollout controller will set BatchPartition as its expected current batch index. - currentBatch := int32(0) - // if rollout-id is not changed, just use batchPartition; - // if rollout-id is changed, we should patch pod batch id from batch 0. - observedRolloutID := release.Status.ObservedRolloutID - if release.Spec.ReleasePlan.BatchPartition != nil && release.Spec.ReleasePlan.RolloutID == observedRolloutID { - // ensure current batch upper bound - currentBatch = integer.Int32Min(*release.Spec.ReleasePlan.BatchPartition, int32(len(release.Spec.ReleasePlan.Batches)-1)) - } - - klog.Infof("BatchRelease(%v) canary batch changed from %v to %v when the release plan changed, observed-rollout-id: %s, current-rollout-id: %s", - client.ObjectKeyFromObject(release), newStatus.CanaryStatus.CurrentBatch, currentBatch, observedRolloutID, release.Spec.ReleasePlan.RolloutID) - newStatus.CanaryStatus.BatchReadyTime = nil - newStatus.CanaryStatus.CurrentBatch = currentBatch - newStatus.ObservedRolloutID = release.Spec.ReleasePlan.RolloutID - newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState - newStatus.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan) -} - -func resetStatus(status *v1alpha1.BatchReleaseStatus) { - status.Phase = v1alpha1.RolloutPhaseInitial - status.StableRevision = "" - status.UpdateRevision = "" - status.ObservedReleasePlanHash = "" - status.ObservedWorkloadReplicas = -1 - status.CanaryStatus = v1alpha1.BatchReleaseCanaryStatus{} -} - -func setCondition(status *v1alpha1.BatchReleaseStatus, condType v1alpha1.RolloutConditionType, condStatus v1.ConditionStatus, reason, message string) { - if status == nil { - return - } - - if len(status.Conditions) == 0 { - status.Conditions = append(status.Conditions, v1alpha1.RolloutCondition{ - Type: condType, - Status: condStatus, - Reason: reason, - Message: message, - LastUpdateTime: metav1.Now(), - LastTransitionTime: metav1.Now(), - }) - return - } - - condition := &status.Conditions[0] - isConditionChanged := func() bool { - return condition.Type != condType || condition.Status != condStatus || condition.Reason != reason || condition.Message != message - } - - if isConditionChanged() { - condition.Type = condType - condition.Reason = reason - condition.Message = message - condition.LastUpdateTime = metav1.Now() - if condition.Status != condStatus { - condition.LastTransitionTime = metav1.Now() - } - condition.Status = condStatus - } -} - -func IsPartitioned(release *v1alpha1.BatchRelease) bool { - return release.Spec.ReleasePlan.BatchPartition != nil && *release.Spec.ReleasePlan.BatchPartition <= release.Status.CanaryStatus.CurrentBatch -} - -func IsAllBatchReady(release *v1alpha1.BatchRelease) bool { - return len(release.Spec.ReleasePlan.Batches)-1 == int(release.Status.CanaryStatus.CurrentBatch) && release.Status.CanaryStatus.CurrentBatchState == v1alpha1.ReadyBatchState -} diff --git a/pkg/controller/batchrelease/context/context.go b/pkg/controller/batchrelease/context/context.go new file mode 100644 index 00000000..cf032a5f --- /dev/null +++ b/pkg/controller/batchrelease/context/context.go @@ -0,0 +1,114 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package context + +import ( + "encoding/json" + "fmt" + + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type BatchContext struct { + RolloutID string `json:"rolloutID,omitempty"` + // current batch index, start from 0 + CurrentBatch int32 `json:"currentBatchIndex"` + // workload update revision + UpdateRevision string `json:"updateRevision,omitempty"` + + // workload replicas + Replicas int32 `json:"replicas"` + // Updated replicas + UpdatedReplicas int32 `json:"updatedReplicas"` + // Updated ready replicas + UpdatedReadyReplicas int32 `json:"updatedReadyReplicas"` + // no need update replicas that marked before rollout + NoNeedUpdatedReplicas *int32 `json:"noNeedUpdatedReplicas,omitempty"` + // the planned number of Pods should be upgrade in current batch + // this field corresponds to releasePlan.Batches[currentBatch] + PlannedUpdatedReplicas int32 `json:"plannedUpdatedReplicas,omitempty"` + // the total number of the really updated pods you desired in current batch. + // In most normal cases, this field will equal to PlannedUpdatedReplicas, + // but in some scene, e.g., rolling back in batches, the really desired updated + // replicas will not equal to planned update replicas, because we just roll the + // pods that really need update back in batches. + DesiredUpdatedReplicas int32 `json:"desiredUpdatedReplicas,omitempty"` + // workload current partition + CurrentPartition intstr.IntOrString `json:"currentPartition,omitempty"` + // desired partition replicas in current batch + DesiredPartition intstr.IntOrString `json:"desiredPartition,omitempty"` + // failureThreshold to tolerate unready updated replicas; + FailureThreshold *intstr.IntOrString `json:"failureThreshold,omitempty"` + + // the pods owned by workload + Pods []*corev1.Pod `json:"-"` + // filter or sort pods before patch label + FilterFunc FilterFuncType `json:"-"` +} + +type FilterFuncType func(pods []*corev1.Pod, ctx *BatchContext) []*corev1.Pod + +func (bc *BatchContext) Log() string { + marshal, _ := json.Marshal(bc) + return fmt.Sprintf("%s with %d pods", string(marshal), len(bc.Pods)) +} + +// IsBatchReady return nil if the batch is ready +func (bc *BatchContext) IsBatchReady() error { + if bc.UpdatedReplicas < bc.DesiredUpdatedReplicas { + return fmt.Errorf("current batch not ready: updated replicas not satified") + } + + unavailableToleration := allowedUnavailable(bc.FailureThreshold, bc.UpdatedReplicas) + if unavailableToleration+bc.UpdatedReadyReplicas < bc.DesiredUpdatedReplicas { + return fmt.Errorf("current batch not ready: updated ready replicas not satified") + } + + if bc.DesiredUpdatedReplicas > 0 && bc.UpdatedReadyReplicas == 0 { + return fmt.Errorf("current batch not ready: no updated ready replicas") + } + + if !batchLabelSatisfied(bc.Pods, bc.RolloutID, bc.PlannedUpdatedReplicas) { + return fmt.Errorf("current batch not ready: pods with batch label not satified") + } + return nil +} + +// batchLabelSatisfied return true if the expected batch label has been patched +func batchLabelSatisfied(pods []*corev1.Pod, rolloutID string, targetCount int32) bool { + if rolloutID == "" || len(pods) == 0 { + return true + } + patchedCount := util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { + if !pod.DeletionTimestamp.IsZero() { + return false + } + return pod.Labels[util.RolloutIDLabel] == rolloutID + }) + return patchedCount >= int(targetCount) +} + +// allowedUnavailable return absolute number of failure threshold +func allowedUnavailable(threshold *intstr.IntOrString, replicas int32) int32 { + failureThreshold := 0 + if threshold != nil { + failureThreshold, _ = intstr.GetScaledValueFromIntOrPercent(threshold, int(replicas), true) + } + return int32(failureThreshold) +} diff --git a/pkg/controller/batchrelease/context/context_test.go b/pkg/controller/batchrelease/context/context_test.go new file mode 100644 index 00000000..fc7bcde4 --- /dev/null +++ b/pkg/controller/batchrelease/context/context_test.go @@ -0,0 +1,182 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package context + +import ( + "fmt" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func TestIsBatchReady(t *testing.T) { + RegisterFailHandler(Fail) + + p := func(f intstr.IntOrString) *intstr.IntOrString { + return &f + } + r := func(f *intstr.IntOrString, id, revision string) *v1alpha1.BatchRelease { + return &v1alpha1.BatchRelease{ + Spec: v1alpha1.BatchReleaseSpec{ReleasePlan: v1alpha1.ReleasePlan{RolloutID: id, FailureThreshold: f}}, + Status: v1alpha1.BatchReleaseStatus{UpdateRevision: revision}, + } + } + cases := map[string]struct { + release *v1alpha1.BatchRelease + pods []*corev1.Pod + maxUnavailable *intstr.IntOrString + labelDesired int32 + desired int32 + updated int32 + updatedReady int32 + isReady bool + }{ + "ready: no-rollout-id, all pod ready": { + release: r(p(intstr.FromInt(1)), "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 5, + isReady: true, + }, + "ready: no-rollout-id, tolerated failed pods": { + release: r(p(intstr.FromInt(1)), "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 4, + isReady: true, + }, + "false: no-rollout-id, un-tolerated failed pods": { + release: r(p(intstr.FromInt(1)), "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 3, + isReady: false, + }, + "false: no-rollout-id, tolerated failed pods, but 1 pod isn't updated": { + release: r(p(intstr.FromString("60%")), "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 4, + updatedReady: 4, + isReady: false, + }, + "false: no-rollout-id, tolerated, but no-pod-ready": { + release: r(p(intstr.FromInt(100)), "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 0, + isReady: false, + }, + "false: no-rollout-id, un-tolerated failed pods, failureThreshold=nil": { + release: r(nil, "", "v2"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 3, + isReady: false, + }, + "true: rollout-id, labeled pods satisfied": { + release: r(p(intstr.FromInt(1)), "1", "version-1"), + pods: generatePods(5, 0), + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 5, + isReady: true, + }, + "false: rollout-id, labeled pods not satisfied": { + release: r(p(intstr.FromInt(1)), "1", "version-1"), + pods: generatePods(3, 0), + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 5, + isReady: false, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + ctx := BatchContext{ + Pods: cs.pods, + PlannedUpdatedReplicas: cs.labelDesired, + DesiredUpdatedReplicas: cs.desired, + UpdatedReplicas: cs.updated, + UpdatedReadyReplicas: cs.updatedReady, + UpdateRevision: cs.release.Status.UpdateRevision, + RolloutID: cs.release.Spec.ReleasePlan.RolloutID, + FailureThreshold: cs.release.Spec.ReleasePlan.FailureThreshold, + } + err := ctx.IsBatchReady() + if cs.isReady { + Expect(err).NotTo(HaveOccurred()) + } else { + Expect(err).To(HaveOccurred()) + } + }) + } +} + +func generatePods(updatedReplicas, noNeedRollbackReplicas int) []*corev1.Pod { + podsNoNeed := generatePodsWith(map[string]string{ + util.NoNeedUpdatePodLabel: "0x1", + util.RolloutIDLabel: "1", + apps.ControllerRevisionHashLabelKey: "version-1", + }, noNeedRollbackReplicas, 0) + return append(generatePodsWith(map[string]string{ + util.RolloutIDLabel: "1", + apps.ControllerRevisionHashLabelKey: "version-1", + }, updatedReplicas-noNeedRollbackReplicas, noNeedRollbackReplicas), podsNoNeed...) +} + +func generatePodsWith(labels map[string]string, replicas int, beginOrder int) []*corev1.Pod { + pods := make([]*corev1.Pod, replicas) + for i := 0; i < replicas; i++ { + pods[i] = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pod-name-%d", beginOrder+i), + Labels: labels, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + } + return pods +} diff --git a/pkg/controller/batchrelease/control/canarystyle/control_plane.go b/pkg/controller/batchrelease/control/canarystyle/control_plane.go new file mode 100644 index 00000000..bda594a3 --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/control_plane.go @@ -0,0 +1,213 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package canarystyle + +import ( + "fmt" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// CloneSetRolloutController is responsible for handling rollout CloneSet type of workloads +type realCanaryController struct { + Interface + client.Client + record.EventRecorder + patcher labelpatch.LabelPatcher + release *v1alpha1.BatchRelease + newStatus *v1alpha1.BatchReleaseStatus +} + +type NewInterfaceFunc func(cli client.Client, key types.NamespacedName) Interface + +// NewControlPlane creates a new release controller to drive batch release state machine +func NewControlPlane(f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, key types.NamespacedName) *realCanaryController { + return &realCanaryController{ + Client: cli, + EventRecorder: recorder, + newStatus: newStatus, + Interface: f(cli, key), + release: release.DeepCopy(), + patcher: labelpatch.NewLabelPatcher(cli, klog.KObj(release)), + } +} + +func (rc *realCanaryController) Initialize() error { + stable, err := rc.BuildStableController() + if err != nil { + return err + } + + err = stable.Initialize(rc.release) + if err != nil { + return err + } + + canary, err := rc.BuildCanaryController(rc.release) + if client.IgnoreNotFound(err) != nil { + return err + } + + err = canary.Create(rc.release) + if err != nil { + return err + } + + // record revision and replicas + stableInfo := stable.GetStableInfo() + canaryInfo := canary.GetCanaryInfo() + rc.newStatus.ObservedWorkloadReplicas = stableInfo.Replicas + rc.newStatus.StableRevision = stableInfo.Status.StableRevision + rc.newStatus.UpdateRevision = canaryInfo.Status.UpdateRevision + return nil +} + +func (rc *realCanaryController) UpgradeBatch() error { + stable, err := rc.BuildStableController() + if err != nil { + return err + } + + if stable.GetStableInfo().Replicas == 0 { + return nil + } + + canary, err := rc.BuildCanaryController(rc.release) + if err != nil { + return err + } + + if !canary.GetCanaryInfo().IsStable() { + return fmt.Errorf("wait canary workload %v reconcile", canary.GetCanaryInfo().LogKey) + } + + batchContext := rc.CalculateBatchContext(rc.release) + klog.Infof("BatchRelease %v upgrade batch: %s", klog.KObj(rc.release), batchContext.Log()) + + return canary.UpgradeBatch(batchContext) +} + +func (rc *realCanaryController) CheckBatchReady() error { + stable, err := rc.BuildStableController() + if err != nil { + return err + } + + if stable.GetStableInfo().Replicas == 0 { + return nil + } + + canary, err := rc.BuildCanaryController(rc.release) + if err != nil { + return err + } + + if !canary.GetCanaryInfo().IsStable() { + return fmt.Errorf("wait canary workload %v reconcile", canary.GetCanaryInfo().LogKey) + } + + batchContext := rc.CalculateBatchContext(rc.release) + klog.Infof("BatchRelease %v check batch: %s", klog.KObj(rc.release), batchContext.Log()) + + return batchContext.IsBatchReady() +} + +func (rc *realCanaryController) Finalize() error { + stable, err := rc.BuildStableController() + if client.IgnoreNotFound(err) != nil { + klog.Errorf("BatchRelease %v build stable controller err: %v", klog.KObj(rc.release), err) + return err + } + + err = stable.Finalize(rc.release) + if err != nil { + klog.Errorf("BatchRelease %v finalize stable err: %v", klog.KObj(rc.release), err) + return err + } + + canary, err := rc.BuildCanaryController(rc.release) + if client.IgnoreNotFound(err) != nil { + klog.Errorf("BatchRelease %v build canary controller err: %v", klog.KObj(rc.release), err) + return err + } + err = canary.Delete(rc.release) + if err != nil { + klog.Errorf("BatchRelease %v delete canary workload err: %v", klog.KObj(rc.release), err) + } + return err +} + +func (rc *realCanaryController) SyncWorkloadInformation() (control.WorkloadEventType, *util.WorkloadInfo, error) { + // ignore the sync if the release plan is deleted + if rc.release.DeletionTimestamp != nil { + return control.WorkloadNormalState, nil, nil + } + + stable, err := rc.BuildStableController() + if err != nil { + if errors.IsNotFound(err) { + return control.WorkloadHasGone, nil, err + } + return control.WorkloadUnknownState, nil, err + } + + canary, err := rc.BuildCanaryController(rc.release) + if client.IgnoreNotFound(err) != nil { + return control.WorkloadUnknownState, nil, err + } + + syncInfo := &util.WorkloadInfo{} + stableInfo, canaryInfo := stable.GetStableInfo(), canary.GetCanaryInfo() + if canaryInfo != nil { + syncInfo.Status.UpdatedReplicas = canaryInfo.Status.Replicas + syncInfo.Status.UpdatedReadyReplicas = canaryInfo.Status.AvailableReplicas + } + + if !stableInfo.IsStable() { + klog.Warningf("Workload(%v) is still reconciling, generation: %v, observed: %v", + stableInfo.LogKey, stableInfo.Generation, stableInfo.Status.ObservedGeneration) + return control.WorkloadStillReconciling, syncInfo, nil + } + + // in case of that the workload has been promoted + if stableInfo.IsPromoted() { + return control.WorkloadNormalState, syncInfo, nil + } + + if stableInfo.IsScaling(rc.newStatus.ObservedWorkloadReplicas) { + syncInfo.Replicas = stableInfo.Replicas + klog.Warningf("Workload(%v) replicas is modified, replicas from: %v to -> %v", + stableInfo.LogKey, rc.newStatus.ObservedWorkloadReplicas, stableInfo.Replicas) + return control.WorkloadReplicasChanged, syncInfo, nil + } + + if stableInfo.IsRevisionNotEqual(rc.newStatus.UpdateRevision) { + syncInfo.Status.UpdateRevision = stableInfo.Status.UpdateRevision + klog.Warningf("Workload(%v) updateRevision is modified, updateRevision from: %v to -> %v", + stableInfo.LogKey, rc.newStatus.UpdateRevision, stableInfo.Status.UpdateRevision) + return control.WorkloadPodTemplateChanged, syncInfo, nil + } + return control.WorkloadUnknownState, syncInfo, nil +} diff --git a/pkg/controller/batchrelease/control/canarystyle/deployment/canary.go b/pkg/controller/batchrelease/control/canarystyle/deployment/canary.go new file mode 100644 index 00000000..80d4b457 --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/deployment/canary.go @@ -0,0 +1,191 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "encoding/json" + "fmt" + "sort" + + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + utilclient "github.com/openkruise/rollouts/pkg/util/client" + expectations "github.com/openkruise/rollouts/pkg/util/expectation" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +type realCanaryController struct { + canaryInfo *util.WorkloadInfo + canaryObject *apps.Deployment + canaryClient client.Client + objectKey types.NamespacedName +} + +func newCanary(cli client.Client, key types.NamespacedName) realCanaryController { + return realCanaryController{canaryClient: cli, objectKey: key} +} + +func (r *realCanaryController) GetCanaryInfo() *util.WorkloadInfo { + return r.canaryInfo +} + +// Delete do not delete canary deployments actually, it only removes the finalizers of +// Deployments. These deployments will be cascaded deleted when BatchRelease is deleted. +func (r *realCanaryController) Delete(release *v1alpha1.BatchRelease) error { + deployments, err := r.listDeployment(release, client.InNamespace(r.objectKey.Namespace), utilclient.DisableDeepCopy) + if err != nil { + return err + } + + for _, d := range deployments { + if !controllerutil.ContainsFinalizer(d, util.CanaryDeploymentFinalizer) { + continue + } + err = util.UpdateFinalizer(r.canaryClient, d, util.RemoveFinalizerOpType, util.CanaryDeploymentFinalizer) + if err != nil && !errors.IsNotFound(err) { + return err + } + klog.Infof("Successfully remove finalizers for Deplot %v", klog.KObj(d)) + } + return nil +} + +func (r *realCanaryController) UpgradeBatch(ctx *batchcontext.BatchContext) error { + // desired replicas for canary deployment + desired := ctx.DesiredUpdatedReplicas + deployment := util.GetEmptyObjectWithKey(r.canaryObject) + + if r.canaryInfo.Replicas >= desired { + return nil + } + + body := fmt.Sprintf(`{"spec":{"replicas":%d}}`, desired) + if err := r.canaryClient.Patch(context.TODO(), deployment, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + klog.Infof("Successfully submit rolling replicas %d to Deployment %v", desired, klog.KObj(deployment)) + return nil +} + +func (r *realCanaryController) Create(release *v1alpha1.BatchRelease) error { + if r.canaryObject != nil { + return nil // Don't re-create if exists + } + + // check expectation before creating canary deployment to avoid + // repeatedly create multiple canary deployment incorrectly. + controllerKey := client.ObjectKeyFromObject(release).String() + satisfied, timeoutDuration, rest := expectations.ResourceExpectations.SatisfiedExpectations(controllerKey) + if !satisfied { + if timeoutDuration >= expectations.ExpectationTimeout { + klog.Warningf("Unsatisfied time of expectation exceeds %v, delete key and continue, key: %v, rest: %v", + expectations.ExpectationTimeout, klog.KObj(release), rest) + expectations.ResourceExpectations.DeleteExpectations(controllerKey) + } else { + return fmt.Errorf("expectation is not satisfied, key: %v, rest: %v", klog.KObj(release), rest) + } + } + + // fetch the stable deployment as template to create canary deployment. + stable := &apps.Deployment{} + if err := r.canaryClient.Get(context.TODO(), r.objectKey, stable); err != nil { + return err + } + return r.create(release, stable) +} +func (r *realCanaryController) create(release *v1alpha1.BatchRelease, template *apps.Deployment) error { + canary := &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: fmt.Sprintf("%v-", r.objectKey.Name), + Namespace: r.objectKey.Namespace, + Labels: map[string]string{}, + Annotations: map[string]string{}, + }, + } + + // metadata + canary.Finalizers = append(canary.Finalizers, util.CanaryDeploymentFinalizer) + canary.OwnerReferences = append(canary.OwnerReferences, *metav1.NewControllerRef(release, release.GroupVersionKind())) + canary.Labels[util.CanaryDeploymentLabel] = template.Name + ownerInfo, _ := json.Marshal(metav1.NewControllerRef(release, release.GroupVersionKind())) + canary.Annotations[util.BatchReleaseControlAnnotation] = string(ownerInfo) + + // spec + canary.Spec = *template.Spec.DeepCopy() + canary.Spec.Replicas = pointer.Int32Ptr(0) + canary.Spec.Paused = false + + if err := r.canaryClient.Create(context.TODO(), canary); err != nil { + klog.Errorf("Failed to create canary Deployment(%v), error: %v", klog.KObj(canary), err) + return err + } + + // add expect to avoid to create repeatedly + controllerKey := client.ObjectKeyFromObject(release).String() + expectations.ResourceExpectations.Expect(controllerKey, expectations.Create, string(canary.UID)) + + canaryInfo, _ := json.Marshal(canary) + klog.Infof("Create canary Deployment(%v) successfully, details: %s", klog.KObj(canary), string(canaryInfo)) + return fmt.Errorf("created canary deployment %v succeeded, but waiting informer synced", klog.KObj(canary)) +} + +func (r *realCanaryController) listDeployment(release *v1alpha1.BatchRelease, options ...client.ListOption) ([]*apps.Deployment, error) { + dList := &apps.DeploymentList{} + if err := r.canaryClient.List(context.TODO(), dList, options...); err != nil { + return nil, err + } + + var ds []*apps.Deployment + for i := range dList.Items { + d := &dList.Items[i] + o := metav1.GetControllerOf(d) + if o == nil || o.UID != release.UID { + continue + } + ds = append(ds, d) + } + return ds, nil +} + +// return the latest deployment with the newer creation time +func filterCanaryDeployment(ds []*apps.Deployment, template *corev1.PodTemplateSpec) *apps.Deployment { + if len(ds) == 0 { + return nil + } + sort.Slice(ds, func(i, j int) bool { + return ds[i].CreationTimestamp.After(ds[j].CreationTimestamp.Time) + }) + if template == nil { + return ds[0] + } + for _, d := range ds { + if util.EqualIgnoreHash(template, &d.Spec.Template) { + return d + } + } + return nil +} diff --git a/pkg/controller/batchrelease/control/canarystyle/deployment/control.go b/pkg/controller/batchrelease/control/canarystyle/deployment/control.go new file mode 100644 index 00000000..cf5a968c --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/deployment/control.go @@ -0,0 +1,107 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/canarystyle" + "github.com/openkruise/rollouts/pkg/util" + utilclient "github.com/openkruise/rollouts/pkg/util/client" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type realController struct { + realStableController + realCanaryController +} + +func NewController(cli client.Client, key types.NamespacedName) canarystyle.Interface { + return &realController{ + realStableController: newStable(cli, key), + realCanaryController: newCanary(cli, key), + } +} + +func (rc *realController) BuildStableController() (canarystyle.StableInterface, error) { + if rc.stableObject != nil { + return rc, nil + } + + object := &apps.Deployment{} + err := rc.stableClient.Get(context.TODO(), rc.stableKey, object) + if err != nil { + return rc, err + } + rc.stableObject = object + rc.stableInfo = util.ParseWorkload(object) + return rc, nil +} + +func (rc *realController) BuildCanaryController(release *v1alpha1.BatchRelease) (canarystyle.CanaryInterface, error) { + if rc.canaryObject != nil { + return rc, nil + } + + ds, err := rc.listDeployment(release, client.InNamespace(rc.stableKey.Namespace), utilclient.DisableDeepCopy) + if err != nil { + return rc, err + } + + template, err := rc.getLatestTemplate() + if client.IgnoreNotFound(err) != nil { + return rc, err + } + + rc.canaryObject = filterCanaryDeployment(util.FilterActiveDeployment(ds), template) + if rc.canaryObject == nil { + return rc, control.GenerateNotFoundError(fmt.Sprintf("%v-canary", rc.stableKey), "Deployment") + } + + rc.canaryInfo = util.ParseWorkload(rc.canaryObject) + return rc, nil +} + +func (rc *realController) CalculateBatchContext(release *v1alpha1.BatchRelease) *batchcontext.BatchContext { + replicas := *rc.stableObject.Spec.Replicas + currentBatch := release.Status.CanaryStatus.CurrentBatch + desiredUpdate := int32(control.CalculateBatchReplicas(release, int(replicas), int(currentBatch))) + + return &batchcontext.BatchContext{ + Replicas: replicas, + CurrentBatch: currentBatch, + DesiredUpdatedReplicas: desiredUpdate, + FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, + UpdatedReplicas: rc.canaryObject.Status.Replicas, + UpdatedReadyReplicas: rc.canaryObject.Status.AvailableReplicas, + } +} + +func (rc *realController) getLatestTemplate() (*v1.PodTemplateSpec, error) { + _, err := rc.BuildStableController() + if err != nil { + return nil, err + } + return &rc.stableObject.Spec.Template, nil +} diff --git a/pkg/controller/batchrelease/control/canarystyle/deployment/control_test.go b/pkg/controller/batchrelease/control/canarystyle/deployment/control_test.go new file mode 100644 index 00000000..6846f829 --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/deployment/control_test.go @@ -0,0 +1,334 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "encoding/json" + "reflect" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + expectations "github.com/openkruise/rollouts/pkg/util/expectation" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +var ( + scheme = runtime.NewScheme() + + deploymentKey = types.NamespacedName{ + Name: "deployment", + Namespace: "default", + } + + deploymentDemo = &apps.Deployment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps/v1", + Kind: "Deployment", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentKey.Name, + Namespace: deploymentKey.Namespace, + Generation: 1, + Labels: map[string]string{ + "app": "busybox", + }, + Annotations: map[string]string{ + "type": "unit-test", + }, + }, + Spec: apps.DeploymentSpec{ + Paused: true, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "busybox", + }, + }, + Replicas: pointer.Int32(10), + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 1}, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "busybox", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "busybox", + Image: "busybox:latest", + }, + }, + }, + }, + }, + Status: apps.DeploymentStatus{ + Replicas: 10, + UpdatedReplicas: 10, + ReadyReplicas: 10, + AvailableReplicas: 10, + CollisionCount: pointer.Int32Ptr(1), + ObservedGeneration: 1, + }, + } + + releaseDemo = &v1alpha1.BatchRelease{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "rollouts.kruise.io/v1alpha1", + Kind: "BatchRelease", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "release", + Namespace: deploymentKey.Namespace, + UID: uuid.NewUUID(), + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FinalizingPolicy: v1alpha1.WaitResumeFinalizingPolicyType, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: intstr.FromString("10%"), + }, + { + CanaryReplicas: intstr.FromString("50%"), + }, + { + CanaryReplicas: intstr.FromString("100%"), + }, + }, + }, + TargetRef: v1alpha1.ObjectRef{ + WorkloadRef: &v1alpha1.WorkloadRef{ + APIVersion: deploymentDemo.APIVersion, + Kind: deploymentDemo.Kind, + Name: deploymentDemo.Name, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 1, + }, + }, + } +) + +func init() { + apps.AddToScheme(scheme) + v1alpha1.AddToScheme(scheme) +} + +func TestCalculateBatchContext(t *testing.T) { + RegisterFailHandler(Fail) + + percent := intstr.FromString("20%") + cases := map[string]struct { + workload func() (*apps.Deployment, *apps.Deployment) + release func() *v1alpha1.BatchRelease + result *batchcontext.BatchContext + }{ + "normal case": { + workload: func() (*apps.Deployment, *apps.Deployment) { + stable := &apps.Deployment{ + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32Ptr(10), + }, + Status: apps.DeploymentStatus{ + Replicas: 10, + UpdatedReplicas: 0, + AvailableReplicas: 10, + }, + } + canary := &apps.Deployment{ + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32Ptr(5), + }, + Status: apps.DeploymentStatus{ + Replicas: 5, + UpdatedReplicas: 5, + AvailableReplicas: 5, + }, + } + return stable, canary + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + FinalizingPolicy: v1alpha1.WaitResumeFinalizingPolicyType, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + }, + } + return r + }, + result: &batchcontext.BatchContext{ + FailureThreshold: &percent, + CurrentBatch: 0, + Replicas: 10, + UpdatedReplicas: 5, + UpdatedReadyReplicas: 5, + DesiredUpdatedReplicas: 2, + }, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + stable, canary := cs.workload() + control := realController{ + realStableController: realStableController{ + stableObject: stable, + }, + realCanaryController: realCanaryController{ + canaryObject: canary, + }, + } + got := control.CalculateBatchContext(cs.release()) + Expect(reflect.DeepEqual(got, cs.result)).Should(BeTrue()) + }) + } +} + +func TestRealStableController(t *testing.T) { + RegisterFailHandler(Fail) + + release := releaseDemo.DeepCopy() + deployment := deploymentDemo.DeepCopy() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(release, deployment).Build() + c := NewController(cli, deploymentKey).(*realController) + controller, err := c.BuildStableController() + Expect(err).NotTo(HaveOccurred()) + + err = controller.Initialize(release) + Expect(err).NotTo(HaveOccurred()) + fetch := &apps.Deployment{} + Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal(getControlInfo(release))) + c.stableObject = fetch // mock + + err = controller.Finalize(release) + Expect(err).NotTo(HaveOccurred()) + fetch = &apps.Deployment{} + Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal("")) + + stableInfo := controller.GetStableInfo() + Expect(stableInfo).ShouldNot(BeNil()) + checkWorkloadInfo(stableInfo, deployment) +} + +func TestRealCanaryController(t *testing.T) { + RegisterFailHandler(Fail) + release := releaseDemo.DeepCopy() + deployment := deploymentDemo.DeepCopy() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(release, deployment).Build() + c := NewController(cli, deploymentKey).(*realController) + controller, err := c.BuildCanaryController(release) + Expect(client.IgnoreNotFound(err)).NotTo(HaveOccurred()) + + // check creation + for { + err = controller.Create(release) + if err == nil { + break + } + // mock create event handler + controller, err = c.BuildCanaryController(release) + Expect(err).NotTo(HaveOccurred()) + if c.canaryObject != nil { + controllerKey := client.ObjectKeyFromObject(release).String() + resourceKey := client.ObjectKeyFromObject(c.canaryObject).String() + expectations.ResourceExpectations.Observe(controllerKey, expectations.Create, resourceKey) + } + } + Expect(metav1.IsControlledBy(c.canaryObject, release)).Should(BeTrue()) + Expect(controllerutil.ContainsFinalizer(c.canaryObject, util.CanaryDeploymentFinalizer)).Should(BeTrue()) + Expect(*c.canaryObject.Spec.Replicas).Should(BeNumerically("==", 0)) + Expect(util.EqualIgnoreHash(&c.canaryObject.Spec.Template, &deployment.Spec.Template)).Should(BeTrue()) + + // check rolling + batchContext := c.CalculateBatchContext(release) + err = controller.UpgradeBatch(batchContext) + Expect(err).NotTo(HaveOccurred()) + canary := getCanaryDeployment(release, deployment, c) + Expect(canary).ShouldNot(BeNil()) + Expect(*canary.Spec.Replicas).Should(BeNumerically("==", batchContext.DesiredUpdatedReplicas)) + + // check deletion + for { + err = controller.Delete(release) + if err == nil { + break + } + } + d := getCanaryDeployment(release, deployment, c) + Expect(d).NotTo(BeNil()) + Expect(len(d.Finalizers)).Should(Equal(0)) +} + +func getCanaryDeployment(release *v1alpha1.BatchRelease, stable *apps.Deployment, c *realController) *apps.Deployment { + ds, err := c.listDeployment(release) + Expect(err).NotTo(HaveOccurred()) + if len(ds) == 0 { + return nil + } + return filterCanaryDeployment(ds, &stable.Spec.Template) +} + +func checkWorkloadInfo(stableInfo *util.WorkloadInfo, deployment *apps.Deployment) { + Expect(stableInfo.Replicas).Should(Equal(*deployment.Spec.Replicas)) + Expect(stableInfo.Status.Replicas).Should(Equal(deployment.Status.Replicas)) + Expect(stableInfo.Status.ReadyReplicas).Should(Equal(deployment.Status.ReadyReplicas)) + Expect(stableInfo.Status.UpdatedReplicas).Should(Equal(deployment.Status.UpdatedReplicas)) + Expect(stableInfo.Status.AvailableReplicas).Should(Equal(deployment.Status.AvailableReplicas)) + Expect(stableInfo.Status.ObservedGeneration).Should(Equal(deployment.Status.ObservedGeneration)) +} + +func getControlInfo(release *v1alpha1.BatchRelease) string { + owner, _ := json.Marshal(metav1.NewControllerRef(release, release.GetObjectKind().GroupVersionKind())) + return string(owner) +} diff --git a/pkg/controller/batchrelease/control/canarystyle/deployment/stable.go b/pkg/controller/batchrelease/control/canarystyle/deployment/stable.go new file mode 100644 index 00000000..d0252008 --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/deployment/stable.go @@ -0,0 +1,106 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type realStableController struct { + stableInfo *util.WorkloadInfo + stableObject *apps.Deployment + stableClient client.Client + stableKey types.NamespacedName +} + +func newStable(cli client.Client, key types.NamespacedName) realStableController { + return realStableController{stableClient: cli, stableKey: key} +} + +func (rc *realStableController) GetStableInfo() *util.WorkloadInfo { + return rc.stableInfo +} + +func (rc *realStableController) Initialize(release *v1alpha1.BatchRelease) error { + if control.IsControlledByBatchRelease(release, rc.stableObject) { + return nil + } + + d := util.GetEmptyObjectWithKey(rc.stableObject) + owner := control.BuildReleaseControlInfo(release) + + body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}}}`, util.BatchReleaseControlAnnotation, owner) + if err := rc.stableClient.Patch(context.TODO(), d, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + klog.Infof("Successfully claim Deployment %v", klog.KObj(rc.stableObject)) + return nil +} + +func (rc *realStableController) Finalize(release *v1alpha1.BatchRelease) (err error) { + if rc.stableObject == nil { + return nil // no need to process deleted object + } + + defer func() { + if err == nil { + klog.Infof("Successfully finalize Deployment %v", klog.KObj(rc.stableObject)) + } + }() + + // if batchPartition == nil, workload should be promoted; + pause := release.Spec.ReleasePlan.BatchPartition != nil + body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}},"spec":{"paused":%v}}`, + util.BatchReleaseControlAnnotation, pause) + + d := util.GetEmptyObjectWithKey(rc.stableObject) + if err = rc.stableClient.Patch(context.TODO(), d, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return + } + if control.ShouldWaitResume(release) { + err = waitAllUpdatedAndReady(d.(*apps.Deployment)) + } + return +} + +func waitAllUpdatedAndReady(deployment *apps.Deployment) error { + if deployment.Spec.Paused { + return fmt.Errorf("promote error: deployment should not be paused") + } + + createdReplicas := deployment.Status.Replicas + updatedReplicas := deployment.Status.UpdatedReplicas + if createdReplicas != updatedReplicas { + return fmt.Errorf("promote error: all replicas should be upgraded") + } + + availableReplicas := deployment.Status.AvailableReplicas + allowedUnavailable := util.DeploymentMaxUnavailable(deployment) + if allowedUnavailable+availableReplicas < createdReplicas { + return fmt.Errorf("promote error: ready replicas should satisfy maxUnavailable") + } + return nil +} diff --git a/pkg/controller/batchrelease/control/canarystyle/interface.go b/pkg/controller/batchrelease/control/canarystyle/interface.go new file mode 100644 index 00000000..d50ec4ae --- /dev/null +++ b/pkg/controller/batchrelease/control/canarystyle/interface.go @@ -0,0 +1,61 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package canarystyle + +import ( + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" +) + +type Interface interface { + CanaryInterface + StableInterface + // BuildStableController will get stable workload object and parse + // stable workload info, and return a controller for stable workload. + BuildStableController() (StableInterface, error) + // BuildCanaryController will get canary workload object and parse + // canary workload info, and return a controller for canary workload. + BuildCanaryController(release *v1alpha1.BatchRelease) (CanaryInterface, error) + // CalculateBatchContext calculate the current batch context according to + // our release plan and the statues of stable workload and canary workload. + CalculateBatchContext(release *v1alpha1.BatchRelease) *batchcontext.BatchContext +} + +// CanaryInterface contains the methods about canary workload +type CanaryInterface interface { + // GetCanaryInfo return the information about canary workload + GetCanaryInfo() *util.WorkloadInfo + // UpgradeBatch upgrade canary workload according to current batch context + UpgradeBatch(*batchcontext.BatchContext) error + // Create creates canary workload before rolling out + Create(controller *v1alpha1.BatchRelease) error + // Delete deletes canary workload after rolling out + Delete(controller *v1alpha1.BatchRelease) error +} + +// StableInterface contains the methods about stable workload +type StableInterface interface { + // GetStableInfo return the information about stable workload + GetStableInfo() *util.WorkloadInfo + // Initialize claim the stable workload is under rollout control + Initialize(controller *v1alpha1.BatchRelease) error + // Finalize do something after rolling out, for example: + // - free the stable workload from rollout control; + // - resume stable workload and wait all pods updated if we need. + Finalize(controller *v1alpha1.BatchRelease) error +} diff --git a/pkg/controller/batchrelease/control/interface.go b/pkg/controller/batchrelease/control/interface.go new file mode 100644 index 00000000..680de8b8 --- /dev/null +++ b/pkg/controller/batchrelease/control/interface.go @@ -0,0 +1,69 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package control + +import ( + "github.com/openkruise/rollouts/pkg/util" +) + +type WorkloadEventType string + +const ( + // WorkloadNormalState means workload is normal and event should be ignored. + WorkloadNormalState WorkloadEventType = "workload-is-at-normal-state" + // WorkloadUnknownState means workload state is unknown and should retry. + WorkloadUnknownState WorkloadEventType = "workload-is-at-unknown-state" + // WorkloadPodTemplateChanged means workload revision changed, should be stopped to execute batch release plan. + WorkloadPodTemplateChanged WorkloadEventType = "workload-pod-template-changed" + // WorkloadReplicasChanged means workload is scaling during rollout, should recalculate upgraded pods in current batch. + WorkloadReplicasChanged WorkloadEventType = "workload-replicas-changed" + // WorkloadStillReconciling means workload status is untrusted Untrustworthy, we should wait workload controller to reconcile. + WorkloadStillReconciling WorkloadEventType = "workload-is-reconciling" + // WorkloadHasGone means workload is deleted during rollout, we should do something finalizing works if this event occurs. + WorkloadHasGone WorkloadEventType = "workload-has-gone" + // WorkloadRollbackInBatch means workload is rollback according to BatchRelease batch plan. + WorkloadRollbackInBatch WorkloadEventType = "workload-rollback-in-batch" +) + +// Interface is the interface that all type of control plane implements for rollout. +type Interface interface { + // Initialize make sure that the resource is ready to be progressed. + // this function is tasked to do any initialization work on the resources. + // it returns nil if the preparation is succeeded, else the preparation should retry. + Initialize() error + + // UpgradeBatch tries to upgrade old replicas according to the release plan. + // it will upgrade the old replicas as the release plan allows in the current batch. + // this function is tasked to do any initialization work on the resources. + // it returns nil if the preparation is succeeded, else the preparation should retry. + UpgradeBatch() error + + // CheckBatchReady checks how many replicas are ready to serve requests in the current batch. + // this function is tasked to do any initialization work on the resources. + // it returns nil if the preparation is succeeded, else the preparation should retry. + CheckBatchReady() error + + // Finalize makes sure the resources are in a good final state. + // this function is tasked to do any initialization work on the resources. + // it returns nil if the preparation is succeeded, else the preparation should retry. + Finalize() error + + // SyncWorkloadInformation will watch and compare the status recorded in Status of BatchRelease + // and the real-time workload information. If workload status is inconsistent with that recorded + // in BatchRelease, it will return the corresponding WorkloadEventType and info. + SyncWorkloadInformation() (WorkloadEventType, *util.WorkloadInfo, error) +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go new file mode 100644 index 00000000..87db6ed9 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go @@ -0,0 +1,204 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloneset + +import ( + "context" + "fmt" + + kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type realController struct { + *util.WorkloadInfo + client client.Client + pods []*corev1.Pod + key types.NamespacedName + object *kruiseappsv1alpha1.CloneSet +} + +func NewController(cli client.Client, key types.NamespacedName, _ schema.GroupVersionKind) partitionstyle.Interface { + return &realController{ + key: key, + client: cli, + } +} + +func (rc *realController) GetInfo() *util.WorkloadInfo { + return rc.WorkloadInfo +} + +func (rc *realController) BuildController() (partitionstyle.Interface, error) { + if rc.object != nil { + return rc, nil + } + object := &kruiseappsv1alpha1.CloneSet{} + if err := rc.client.Get(context.TODO(), rc.key, object); err != nil { + return rc, err + } + rc.object = object + rc.WorkloadInfo = util.ParseWorkload(object) + return rc, nil +} + +func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { + if rc.pods != nil { + return rc.pods, nil + } + var err error + rc.pods, err = util.ListOwnedPods(rc.client, rc.object) + return rc.pods, err +} + +func (rc *realController) Initialize(release *v1alpha1.BatchRelease) error { + if control.IsControlledByBatchRelease(release, rc.object) { + return nil + } + + clone := util.GetEmptyObjectWithKey(rc.object) + owner := control.BuildReleaseControlInfo(release) + body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"updateStrategy":{"paused":%v,"partition":"%s"}}}`, + util.BatchReleaseControlAnnotation, owner, false, "100%") + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + klog.Infof("Successfully initialized CloneSet %v", klog.KObj(clone)) + return nil +} + +func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { + var body string + var desired int + switch partition := ctx.DesiredPartition; partition.Type { + case intstr.Int: + desired = int(partition.IntVal) + body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition": %d }}}`, partition.IntValue()) + case intstr.String: + desired, _ = intstr.GetScaledValueFromIntOrPercent(&partition, int(ctx.Replicas), true) + body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition":"%s"}}}`, partition.String()) + } + current, _ := intstr.GetScaledValueFromIntOrPercent(&ctx.CurrentPartition, int(ctx.Replicas), true) + + // current less than desired, which means current revision replicas will be less than desired, + // in other word, update revision replicas will be more than desired, no need to update again. + if current <= desired { + return nil + } + + clone := util.GetEmptyObjectWithKey(rc.object) + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + klog.Infof("Successfully submit partition %v for CloneSet %v", ctx.DesiredPartition, klog.KObj(clone)) + return nil +} + +func (rc *realController) Finalize(release *v1alpha1.BatchRelease) error { + if rc.object == nil { + return nil + } + + var specBody string + // if batchPartition == nil, workload should be promoted. + if release.Spec.ReleasePlan.BatchPartition == nil { + specBody = `,"spec":{"updateStrategy":{"partition":null,"paused":false}}` + } + + body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) + + clone := util.GetEmptyObjectWithKey(rc.object) + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + klog.Infof("Successfully finalize StatefulSet %v", klog.KObj(rc.object)) + return nil +} + +func (rc *realController) CalculateBatchContext(release *v1alpha1.BatchRelease) (*batchcontext.BatchContext, error) { + rolloutID := release.Spec.ReleasePlan.RolloutID + if rolloutID != "" { + // if rollout-id is set, the pod will be patched batch label, + // so we have to list pod here. + if _, err := rc.ListOwnedPods(); err != nil { + return nil, err + } + } + + // current batch index + currentBatch := release.Status.CanaryStatus.CurrentBatch + // the number of no need update pods that marked before rollout + noNeedUpdate := release.Status.CanaryStatus.NoNeedUpdateReplicas + // the number of upgraded pods according to release plan in current batch. + plannedUpdate := int32(control.CalculateBatchReplicas(release, int(rc.Replicas), int(currentBatch))) + // the number of pods that should be upgraded in real + desiredUpdate := plannedUpdate + // the number of pods that should not be upgraded in real + desiredStable := rc.Replicas - desiredUpdate + // if we should consider the no-need-update pods that were marked before progressing + if noNeedUpdate != nil && *noNeedUpdate > 0 { + // specially, we should ignore the pods that were marked as no-need-update, this logic is for Rollback scene + desiredUpdateNew := int32(control.CalculateBatchReplicas(release, int(rc.Replicas-*noNeedUpdate), int(currentBatch))) + desiredStable = rc.Replicas - *noNeedUpdate - desiredUpdateNew + desiredUpdate = rc.Replicas - desiredStable + } + + // make sure at least one pod is upgrade is canaryReplicas is not "0%" + desiredPartition := intstr.FromInt(int(desiredStable)) + batchPlan := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas + if batchPlan.Type == intstr.String { + desiredPartition = control.ParseIntegerAsPercentageIfPossible(desiredStable, rc.Replicas, &batchPlan) + } + + currentPartition := intstr.FromInt(0) + if rc.object.Spec.UpdateStrategy.Partition != nil { + currentPartition = *rc.object.Spec.UpdateStrategy.Partition + } + + batchContext := &batchcontext.BatchContext{ + Pods: rc.pods, + RolloutID: rolloutID, + CurrentBatch: currentBatch, + UpdateRevision: release.Status.UpdateRevision, + DesiredPartition: desiredPartition, + CurrentPartition: currentPartition, + FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, + + Replicas: rc.Replicas, + UpdatedReplicas: rc.Status.UpdatedReplicas, + UpdatedReadyReplicas: rc.Status.UpdatedReadyReplicas, + NoNeedUpdatedReplicas: noNeedUpdate, + PlannedUpdatedReplicas: plannedUpdate, + DesiredUpdatedReplicas: desiredUpdate, + } + + if noNeedUpdate != nil { + batchContext.FilterFunc = labelpatch.FilterPodsForUnorderedUpdate + } + return batchContext, nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go new file mode 100644 index 00000000..3be07836 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go @@ -0,0 +1,342 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloneset + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + scheme = runtime.NewScheme() + + cloneKey = types.NamespacedName{ + Namespace: "default", + Name: "cloneset", + } + cloneDemo = &kruiseappsv1alpha1.CloneSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps.kruise.io/v1alpha1", + Kind: "CloneSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: cloneKey.Name, + Namespace: cloneKey.Namespace, + Generation: 1, + Labels: map[string]string{ + "app": "busybox", + }, + Annotations: map[string]string{ + "type": "unit-test", + }, + }, + Spec: kruiseappsv1alpha1.CloneSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "busybox", + }, + }, + Replicas: pointer.Int32(10), + UpdateStrategy: kruiseappsv1alpha1.CloneSetUpdateStrategy{ + Paused: true, + Partition: &intstr.IntOrString{Type: intstr.String, StrVal: "100%"}, + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 1}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "busybox", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "busybox", + Image: "busybox:latest", + }, + }, + }, + }, + }, + Status: kruiseappsv1alpha1.CloneSetStatus{ + Replicas: 10, + UpdatedReplicas: 0, + ReadyReplicas: 10, + AvailableReplicas: 10, + UpdatedReadyReplicas: 0, + UpdateRevision: "version-2", + CurrentRevision: "version-1", + ObservedGeneration: 1, + CollisionCount: pointer.Int32Ptr(1), + }, + } + + releaseDemo = &v1alpha1.BatchRelease{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "rollouts.kruise.io/v1alpha1", + Kind: "BatchRelease", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "release", + Namespace: cloneKey.Namespace, + UID: uuid.NewUUID(), + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: intstr.FromString("10%"), + }, + { + CanaryReplicas: intstr.FromString("50%"), + }, + { + CanaryReplicas: intstr.FromString("100%"), + }, + }, + }, + TargetRef: v1alpha1.ObjectRef{ + WorkloadRef: &v1alpha1.WorkloadRef{ + APIVersion: cloneDemo.APIVersion, + Kind: cloneDemo.Kind, + Name: cloneDemo.Name, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + }, + } +) + +func init() { + apps.AddToScheme(scheme) + v1alpha1.AddToScheme(scheme) + kruiseappsv1alpha1.AddToScheme(scheme) +} + +func TestCalculateBatchContext(t *testing.T) { + RegisterFailHandler(Fail) + + percent := intstr.FromString("20%") + cases := map[string]struct { + workload func() *kruiseappsv1alpha1.CloneSet + release func() *v1alpha1.BatchRelease + result *batchcontext.BatchContext + }{ + "without NoNeedUpdate": { + workload: func() *kruiseappsv1alpha1.CloneSet { + return &kruiseappsv1alpha1.CloneSet{ + Spec: kruiseappsv1alpha1.CloneSetSpec{ + Replicas: pointer.Int32Ptr(10), + UpdateStrategy: kruiseappsv1alpha1.CloneSetUpdateStrategy{ + Partition: func() *intstr.IntOrString { p := intstr.FromString("100%"); return &p }(), + }, + }, + Status: kruiseappsv1alpha1.CloneSetStatus{ + Replicas: 10, + UpdatedReplicas: 5, + UpdatedReadyReplicas: 5, + AvailableReplicas: 10, + }, + } + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + }, + } + return r + }, + result: &batchcontext.BatchContext{ + FailureThreshold: &percent, + CurrentBatch: 0, + Replicas: 10, + UpdatedReplicas: 5, + UpdatedReadyReplicas: 5, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 2, + CurrentPartition: intstr.FromString("100%"), + DesiredPartition: intstr.FromString("80%"), + }, + }, + "with NoNeedUpdate": { + workload: func() *kruiseappsv1alpha1.CloneSet { + return &kruiseappsv1alpha1.CloneSet{ + Spec: kruiseappsv1alpha1.CloneSetSpec{ + Replicas: pointer.Int32Ptr(20), + UpdateStrategy: kruiseappsv1alpha1.CloneSetUpdateStrategy{ + Partition: func() *intstr.IntOrString { p := intstr.FromString("100%"); return &p }(), + }, + }, + Status: kruiseappsv1alpha1.CloneSetStatus{ + Replicas: 20, + UpdatedReplicas: 10, + UpdatedReadyReplicas: 10, + AvailableReplicas: 20, + ReadyReplicas: 20, + }, + } + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + NoNeedUpdateReplicas: pointer.Int32(10), + }, + UpdateRevision: "update-version", + }, + } + return r + }, + result: &batchcontext.BatchContext{ + CurrentBatch: 0, + UpdateRevision: "update-version", + Replicas: 20, + UpdatedReplicas: 10, + UpdatedReadyReplicas: 10, + NoNeedUpdatedReplicas: pointer.Int32Ptr(10), + PlannedUpdatedReplicas: 4, + DesiredUpdatedReplicas: 12, + CurrentPartition: intstr.FromString("100%"), + DesiredPartition: intstr.FromString("40%"), + FailureThreshold: &percent, + FilterFunc: labelpatch.FilterPodsForUnorderedUpdate, + }, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + control := realController{ + object: cs.workload(), + WorkloadInfo: util.ParseWorkload(cs.workload()), + } + got, err := control.CalculateBatchContext(cs.release()) + fmt.Println(got) + Expect(err).NotTo(HaveOccurred()) + Expect(got.Log()).Should(Equal(cs.result.Log())) + }) + } +} + +func TestRealController(t *testing.T) { + RegisterFailHandler(Fail) + + release := releaseDemo.DeepCopy() + clone := cloneDemo.DeepCopy() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(release, clone).Build() + c := NewController(cli, cloneKey, clone.GroupVersionKind()).(*realController) + controller, err := c.BuildController() + Expect(err).NotTo(HaveOccurred()) + + err = controller.Initialize(release) + Expect(err).NotTo(HaveOccurred()) + fetch := &kruiseappsv1alpha1.CloneSet{} + Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Spec.UpdateStrategy.Paused).Should(BeFalse()) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal(getControlInfo(release))) + c.object = fetch // mock + + for { + batchContext, err := controller.CalculateBatchContext(release) + Expect(err).NotTo(HaveOccurred()) + err = controller.UpgradeBatch(batchContext) + fetch = &kruiseappsv1alpha1.CloneSet{} + // mock + Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) + c.object = fetch + if err == nil { + break + } + } + fetch = &kruiseappsv1alpha1.CloneSet{} + Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Spec.UpdateStrategy.Partition.StrVal).Should(Equal("90%")) + + err = controller.Finalize(release) + Expect(err).NotTo(HaveOccurred()) + fetch = &kruiseappsv1alpha1.CloneSet{} + Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal("")) + + stableInfo := controller.GetInfo() + Expect(stableInfo).ShouldNot(BeNil()) + checkWorkloadInfo(stableInfo, clone) +} + +func checkWorkloadInfo(stableInfo *util.WorkloadInfo, clone *kruiseappsv1alpha1.CloneSet) { + Expect(stableInfo.Replicas).Should(Equal(*clone.Spec.Replicas)) + Expect(stableInfo.Status.Replicas).Should(Equal(clone.Status.Replicas)) + Expect(stableInfo.Status.ReadyReplicas).Should(Equal(clone.Status.ReadyReplicas)) + Expect(stableInfo.Status.UpdatedReplicas).Should(Equal(clone.Status.UpdatedReplicas)) + Expect(stableInfo.Status.UpdatedReadyReplicas).Should(Equal(clone.Status.UpdatedReadyReplicas)) + Expect(stableInfo.Status.UpdateRevision).Should(Equal(clone.Status.UpdateRevision)) + Expect(stableInfo.Status.StableRevision).Should(Equal(clone.Status.CurrentRevision)) + Expect(stableInfo.Status.AvailableReplicas).Should(Equal(clone.Status.AvailableReplicas)) + Expect(stableInfo.Status.ObservedGeneration).Should(Equal(clone.Status.ObservedGeneration)) +} + +func getControlInfo(release *v1alpha1.BatchRelease) string { + owner, _ := json.Marshal(metav1.NewControllerRef(release, release.GetObjectKind().GroupVersionKind())) + return string(owner) +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go new file mode 100644 index 00000000..d444e995 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -0,0 +1,287 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "context" + "fmt" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type realBatchControlPlane struct { + Interface + client.Client + record.EventRecorder + patcher labelpatch.LabelPatcher + release *v1alpha1.BatchRelease + newStatus *v1alpha1.BatchReleaseStatus +} + +type NewInterfaceFunc func(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) Interface + +// NewControlPlane creates a new release controller with partitioned-style to drive batch release state machine +func NewControlPlane(f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, key types.NamespacedName, gvk schema.GroupVersionKind) *realBatchControlPlane { + return &realBatchControlPlane{ + Client: cli, + EventRecorder: recorder, + newStatus: newStatus, + Interface: f(cli, key, gvk), + release: release.DeepCopy(), + patcher: labelpatch.NewLabelPatcher(cli, klog.KObj(release)), + } +} + +func (rc *realBatchControlPlane) Initialize() error { + controller, err := rc.BuildController() + if err != nil { + return err + } + + // claim workload under our control + err = controller.Initialize(rc.release) + if err != nil { + return err + } + + // record revision and replicas + workloadInfo := controller.GetInfo() + rc.newStatus.StableRevision = workloadInfo.Status.StableRevision + rc.newStatus.UpdateRevision = workloadInfo.Status.UpdateRevision + rc.newStatus.ObservedWorkloadReplicas = workloadInfo.Replicas + + // mark the pods that no need to update if it needs + noNeedUpdateReplicas, err := rc.markNoNeedUpdatePodsIfNeeds() + if noNeedUpdateReplicas != nil && err == nil { + rc.newStatus.CanaryStatus.NoNeedUpdateReplicas = noNeedUpdateReplicas + } + return err +} + +func (rc *realBatchControlPlane) UpgradeBatch() error { + controller, err := rc.BuildController() + if err != nil { + return err + } + + if controller.GetInfo().Replicas == 0 { + return nil + } + + err = rc.countAndUpdateNoNeedUpdateReplicas() + if err != nil { + return err + } + + batchContext, err := controller.CalculateBatchContext(rc.release) + if err != nil { + return err + } + klog.Infof("BatchRelease %v upgrade batch: %s", klog.KObj(rc.release), batchContext.Log()) + + err = controller.UpgradeBatch(batchContext) + if err != nil { + return err + } + + return rc.patcher.PatchPodBatchLabel(batchContext) +} + +func (rc *realBatchControlPlane) CheckBatchReady() error { + controller, err := rc.BuildController() + if err != nil { + return err + } + + if controller.GetInfo().Replicas == 0 { + return nil + } + + // do not countAndUpdateNoNeedUpdateReplicas when checking, + // the target calculated should be consistent with UpgradeBatch. + batchContext, err := controller.CalculateBatchContext(rc.release) + if err != nil { + return err + } + + klog.Infof("BatchRelease %v check batch: %s", klog.KObj(rc.release), batchContext.Log()) + + return batchContext.IsBatchReady() +} + +func (rc *realBatchControlPlane) Finalize() error { + controller, err := rc.BuildController() + if err != nil { + return client.IgnoreNotFound(err) + } + + // release workload control info and clean up resources if it needs + return controller.Finalize(rc.release) +} + +func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEventType, *util.WorkloadInfo, error) { + // ignore the sync if the release plan is deleted + if rc.release.DeletionTimestamp != nil { + return control.WorkloadNormalState, nil, nil + } + + controller, err := rc.BuildController() + if err != nil { + if errors.IsNotFound(err) { + return control.WorkloadHasGone, nil, err + } + return control.WorkloadUnknownState, nil, err + } + + workloadInfo := controller.GetInfo() + if !workloadInfo.IsStable() { + klog.Info("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v", + workloadInfo.LogKey, workloadInfo.Generation, workloadInfo.Status.ObservedGeneration) + return control.WorkloadStillReconciling, workloadInfo, nil + } + + if workloadInfo.IsPromoted() { + klog.Info("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v", + workloadInfo.LogKey, workloadInfo.Replicas, workloadInfo.Status.UpdatedReadyReplicas) + return control.WorkloadNormalState, workloadInfo, nil + } + + if workloadInfo.IsScaling(rc.newStatus.ObservedWorkloadReplicas) { + klog.Warningf("Workload(%v) replicas is modified, replicas from: %v to -> %v", + workloadInfo.LogKey, rc.newStatus.ObservedWorkloadReplicas, workloadInfo.Replicas) + return control.WorkloadReplicasChanged, workloadInfo, nil + } + + if workloadInfo.IsRollback(rc.newStatus.StableRevision, rc.newStatus.UpdateRevision) { + klog.Warningf("Workload(%v) is rolling back", workloadInfo.LogKey) + return control.WorkloadRollbackInBatch, workloadInfo, nil + } + + if workloadInfo.IsRevisionNotEqual(rc.newStatus.UpdateRevision) { + klog.Warningf("Workload(%v) updateRevision is modified, updateRevision from: %v to -> %v", + workloadInfo.LogKey, rc.newStatus.UpdateRevision, workloadInfo.Status.UpdateRevision) + return control.WorkloadPodTemplateChanged, workloadInfo, nil + } + + return control.WorkloadNormalState, workloadInfo, nil +} + +/* -------------------------------------------- + The functions below are helper functions +----------------------------------------------- */ + +// MarkNoNeedUpdatePods makes sure that the updated pods have been patched no-need-update label. +// return values: +// - *int32: how many pods have been patched; +// - err: whether error occurs. +func (rc *realBatchControlPlane) markNoNeedUpdatePodsIfNeeds() (*int32, error) { + // currently, we only support rollback scene, in the future, we may support more scenes. + if rc.release.Annotations[util.RollbackInBatchAnnotation] == "" { + return nil, nil + } + // currently, if rollout-id is not set, it is no scene which require patch this label + // we only return the current updated replicas. + if rc.release.Spec.ReleasePlan.RolloutID == "" { + return pointer.Int32(rc.newStatus.CanaryStatus.UpdatedReplicas), nil + } + + var err error + var pods []*v1.Pod + var filterPods []*v1.Pod + noNeedUpdateReplicas := int32(0) + rolloutID := rc.release.Spec.ReleasePlan.RolloutID + if pods, err = rc.ListOwnedPods(); err != nil { + return nil, err + } + + for i := range pods { + if !pods[i].DeletionTimestamp.IsZero() { + continue + } + if !util.IsConsistentWithRevision(pods[i], rc.newStatus.UpdateRevision) { + continue + } + if pods[i].Labels[util.NoNeedUpdatePodLabel] == rolloutID { + noNeedUpdateReplicas++ + continue + } + filterPods = append(filterPods, pods[i]) + } + + if len(filterPods) == 0 { + return &noNeedUpdateReplicas, nil + } + + for _, pod := range filterPods { + clone := util.GetEmptyObjectWithKey(pod) + body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, util.NoNeedUpdatePodLabel, rolloutID) + err = rc.Patch(context.TODO(), clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) + if err != nil { + klog.Errorf("Failed to patch no-need-update label(%v) to pod %v, err: %v", rolloutID, klog.KObj(pod), err) + return &noNeedUpdateReplicas, err + } else { + klog.Info("Succeeded to patch no-need-update label(%v) to pod %v", rolloutID, klog.KObj(pod)) + } + noNeedUpdateReplicas++ + } + + return &noNeedUpdateReplicas, fmt.Errorf("initilization not yet: patch and find %d pods with no-need-update-label", noNeedUpdateReplicas) +} + +// countAndUpdateNoNeedUpdateReplicas will count the pods with no-need-update +// label and update corresponding field for BatchRelease +func (rc *realBatchControlPlane) countAndUpdateNoNeedUpdateReplicas() error { + if rc.release.Spec.ReleasePlan.RolloutID == "" || rc.release.Status.CanaryStatus.NoNeedUpdateReplicas == nil { + return nil + } + + pods, err := rc.ListOwnedPods() + if err != nil { + return err + } + + noNeedUpdateReplicas := int32(0) + for _, pod := range pods { + if !pod.DeletionTimestamp.IsZero() { + continue + } + if !util.IsConsistentWithRevision(pod, rc.release.Status.UpdateRevision) { + continue + } + id, ok := pod.Labels[util.NoNeedUpdatePodLabel] + if ok && id == rc.release.Spec.ReleasePlan.RolloutID { + noNeedUpdateReplicas++ + } + } + + // refresh newStatus for updating + rc.newStatus.CanaryStatus.NoNeedUpdateReplicas = &noNeedUpdateReplicas + // refresh release.Status for calculation of BatchContext + rc.release.Status.CanaryStatus.NoNeedUpdateReplicas = &noNeedUpdateReplicas + return nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/interface.go b/pkg/controller/batchrelease/control/partitionstyle/interface.go new file mode 100644 index 00000000..4fb27f36 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/interface.go @@ -0,0 +1,49 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" +) + +type Interface interface { + // BuildController will get workload object and parse workload info, + // and return a controller for workload + BuildController() (Interface, error) + // GetInfo return workload information + GetInfo() *util.WorkloadInfo + // ListOwnedPods fetch the pods owned by the workload. + // Note that we should list pod only if we really need it. + ListOwnedPods() ([]*corev1.Pod, error) + // CalculateBatchContext calculate current batch context + // according to release plan and current status of workload. + CalculateBatchContext(release *v1alpha1.BatchRelease) (*batchcontext.BatchContext, error) + + // Initialize do something before rolling out, for example + // - claim the workload is under our control; + // - other things related with specific type of workload, such as 100% partition settings. + Initialize(release *v1alpha1.BatchRelease) error + // UpgradeBatch upgrade workload according current batch context. + UpgradeBatch(ctx *batchcontext.BatchContext) error + // Finalize do something after rolling out, for example: + // - free the stable workload from rollout control; + // - resume workload if we need. + Finalize(release *v1alpha1.BatchRelease) error +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go new file mode 100644 index 00000000..99e37941 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go @@ -0,0 +1,225 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "context" + "fmt" + "math" + + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type realController struct { + *util.WorkloadInfo + client client.Client + pods []*corev1.Pod + key types.NamespacedName + gvk schema.GroupVersionKind + object client.Object +} + +func NewController(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) partitionstyle.Interface { + return &realController{ + key: key, + gvk: gvk, + client: cli, + } +} + +func (rc *realController) GetInfo() *util.WorkloadInfo { + return rc.WorkloadInfo +} + +func (rc *realController) BuildController() (partitionstyle.Interface, error) { + if rc.object != nil { + return rc, nil + } + object := util.GetEmptyWorkloadObject(rc.gvk) + if err := rc.client.Get(context.TODO(), rc.key, object); err != nil { + return rc, err + } + rc.object = object + rc.WorkloadInfo = util.ParseWorkload(object) + + // for native StatefulSet which has no updatedReadyReplicas field, we should + // list and count its owned Pods one by one. + if rc.WorkloadInfo != nil && rc.WorkloadInfo.Status.UpdatedReadyReplicas <= 0 { + pods, err := rc.ListOwnedPods() + if err != nil { + return nil, err + } + updatedReadyReplicas := util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { + if !pod.DeletionTimestamp.IsZero() { + return false + } + if !util.IsConsistentWithRevision(pod, rc.WorkloadInfo.Status.UpdateRevision) { + return false + } + return util.IsPodReady(pod) + }) + rc.WorkloadInfo.Status.UpdatedReadyReplicas = int32(updatedReadyReplicas) + } + + return rc, nil +} + +func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { + if rc.pods != nil { + return rc.pods, nil + } + var err error + rc.pods, err = util.ListOwnedPods(rc.client, rc.object) + return rc.pods, err +} + +func (rc *realController) Initialize(release *v1alpha1.BatchRelease) error { + if control.IsControlledByBatchRelease(release, rc.object) { + return nil + } + + owner := control.BuildReleaseControlInfo(release) + metaBody := fmt.Sprintf(`"metadata":{"annotations":{"%s":"%s"}}`, util.BatchReleaseControlAnnotation, owner) + specBody := fmt.Sprintf(`"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d,"paused":false}}}`, math.MaxInt16) + body := fmt.Sprintf(`{%s,%s}`, metaBody, specBody) + + clone := util.GetEmptyObjectWithKey(rc.object) + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + + klog.Infof("Successfully initialize StatefulSet %v", klog.KObj(clone)) + return nil +} + +func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { + desired := ctx.DesiredPartition.IntVal + current := ctx.CurrentPartition.IntVal + // current less than desired, which means current revision replicas will be less than desired, + // in other word, update revision replicas will be more than desired, no need to update again. + if current <= desired { + return nil + } + + body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d}}}}`, desired) + + clone := rc.object.DeepCopyObject().(client.Object) + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + + klog.Infof("Successfully patch partition from %d to %d for StatefulSet %v", current, desired, klog.KObj(clone)) + return nil +} + +func (rc *realController) Finalize(release *v1alpha1.BatchRelease) error { + if rc.object == nil { + return nil + } + + var specBody string + // If batchPartition == nil, workload should be promoted; + if release.Spec.ReleasePlan.BatchPartition == nil { + specBody = `,"spec":{"updateStrategy":{"rollingUpdate":{"partition":null}}}` + } + + body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) + + clone := util.GetEmptyObjectWithKey(rc.object) + if err := rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return err + } + + klog.Infof("Successfully finalize StatefulSet %v", klog.KObj(clone)) + return nil +} + +func (rc *realController) CalculateBatchContext(release *v1alpha1.BatchRelease) (*batchcontext.BatchContext, error) { + rolloutID := release.Spec.ReleasePlan.RolloutID + if rolloutID != "" { + // if rollout-id is set, the pod will be patched batch label, + // so we have to list pod here. + if _, err := rc.ListOwnedPods(); err != nil { + return nil, err + } + } + + // current batch index + currentBatch := release.Status.CanaryStatus.CurrentBatch + // the number of no need update pods that marked before rollout + noNeedUpdate := release.Status.CanaryStatus.NoNeedUpdateReplicas + // the number of upgraded pods according to release plan in current batch. + plannedUpdate := int32(control.CalculateBatchReplicas(release, int(rc.Replicas), int(currentBatch))) + // the number of pods that should be upgraded in real + desiredUpdate := plannedUpdate + // the number of pods that should not be upgraded in real + desiredStable := rc.Replicas - desiredUpdate + // if we should consider the no-need-update pods that were marked before rolling, the desired will change + if noNeedUpdate != nil && *noNeedUpdate > 0 { + // specially, we should ignore the pods that were marked as no-need-update, this logic is for Rollback scene + desiredUpdateNew := int32(control.CalculateBatchReplicas(release, int(rc.Replicas-*noNeedUpdate), int(currentBatch))) + desiredStable = rc.Replicas - *noNeedUpdate - desiredUpdateNew + desiredUpdate = rc.Replicas - desiredStable + } + + // Note that: + // * if ordered update, partition is related with pod ordinals; + // * if unordered update, partition just like cloneSet partition. + unorderedUpdate := util.IsStatefulSetUnorderedUpdate(rc.object) + if !unorderedUpdate && noNeedUpdate != nil { + desiredStable += *noNeedUpdate + desiredUpdate = rc.Replicas - desiredStable + *noNeedUpdate + } + + // if canaryReplicas is percentage, we should calculate its real + batchContext := &batchcontext.BatchContext{ + Pods: rc.pods, + RolloutID: rolloutID, + CurrentBatch: currentBatch, + UpdateRevision: release.Status.UpdateRevision, + DesiredPartition: intstr.FromInt(int(desiredStable)), + CurrentPartition: intstr.FromInt(int(util.GetStatefulSetPartition(rc.object))), + FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, + + Replicas: rc.Replicas, + UpdatedReplicas: rc.Status.UpdatedReplicas, + UpdatedReadyReplicas: rc.Status.UpdatedReadyReplicas, + NoNeedUpdatedReplicas: noNeedUpdate, + PlannedUpdatedReplicas: plannedUpdate, + DesiredUpdatedReplicas: desiredUpdate, + } + + if noNeedUpdate != nil { + if unorderedUpdate { + batchContext.FilterFunc = labelpatch.FilterPodsForUnorderedUpdate + } else { + batchContext.FilterFunc = labelpatch.FilterPodsForOrderedUpdate + } + } + return batchContext, nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go new file mode 100644 index 00000000..a21aea02 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go @@ -0,0 +1,663 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + appsv1pub "github.com/openkruise/kruise-api/apps/pub" + kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + kruiseappsv1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" + "github.com/openkruise/rollouts/api/v1alpha1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/labelpatch" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + scheme = runtime.NewScheme() + + stsKey = types.NamespacedName{ + Namespace: "default", + Name: "statefulset", + } + stsDemo = &kruiseappsv1beta1.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps.kruise.io/v1alpha1", + Kind: "StatefulSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: stsKey.Name, + Namespace: stsKey.Namespace, + Generation: 1, + Labels: map[string]string{ + "app": "busybox", + }, + Annotations: map[string]string{ + "type": "unit-test", + }, + }, + Spec: kruiseappsv1beta1.StatefulSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "busybox", + }, + }, + Replicas: pointer.Int32(10), + UpdateStrategy: kruiseappsv1beta1.StatefulSetUpdateStrategy{ + Type: apps.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &kruiseappsv1beta1.RollingUpdateStatefulSetStrategy{ + Paused: true, + Partition: pointer.Int32(10), + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 1}, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "busybox", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "busybox", + Image: "busybox:latest", + }, + }, + }, + }, + }, + Status: kruiseappsv1beta1.StatefulSetStatus{ + Replicas: 10, + UpdatedReplicas: 0, + ReadyReplicas: 10, + AvailableReplicas: 10, + UpdateRevision: "version-2", + CurrentRevision: "version-1", + ObservedGeneration: 1, + UpdatedReadyReplicas: 0, + CollisionCount: pointer.Int32Ptr(1), + }, + } + + releaseDemo = &v1alpha1.BatchRelease{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "rollouts.kruise.io/v1alpha1", + Kind: "BatchRelease", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "release", + Namespace: stsKey.Namespace, + UID: uuid.NewUUID(), + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: intstr.FromString("10%"), + }, + { + CanaryReplicas: intstr.FromString("50%"), + }, + { + CanaryReplicas: intstr.FromString("100%"), + }, + }, + }, + TargetRef: v1alpha1.ObjectRef{ + WorkloadRef: &v1alpha1.WorkloadRef{ + APIVersion: stsDemo.APIVersion, + Kind: stsDemo.Kind, + Name: stsDemo.Name, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + }, + } +) + +func init() { + rand.Seed(87076677) + apps.AddToScheme(scheme) + corev1.AddToScheme(scheme) + v1alpha1.AddToScheme(scheme) + kruiseappsv1alpha1.AddToScheme(scheme) + kruiseappsv1beta1.AddToScheme(scheme) +} + +func TestCalculateBatchContextForNativeStatefulSet(t *testing.T) { + RegisterFailHandler(Fail) + + percent := intstr.FromString("20%") + cases := map[string]struct { + workload func() *apps.StatefulSet + release func() *v1alpha1.BatchRelease + pods func() []*corev1.Pod + result *batchcontext.BatchContext + }{ + "without NoNeedUpdate": { + workload: func() *apps.StatefulSet { + return &apps.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps/v1", + Kind: "StatefulSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test", + UID: "test", + }, + Spec: apps.StatefulSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}, + Replicas: pointer.Int32Ptr(10), + UpdateStrategy: apps.StatefulSetUpdateStrategy{ + Type: apps.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &apps.RollingUpdateStatefulSetStrategy{ + Partition: pointer.Int32Ptr(100), + }, + }, + }, + Status: apps.StatefulSetStatus{ + Replicas: 10, + UpdatedReplicas: 5, + AvailableReplicas: 10, + CurrentRevision: "stable-version", + UpdateRevision: "update-version", + }, + } + }, + pods: func() []*corev1.Pod { + stablePods := generatePods(5, "stable-version", "True") + updatedReadyPods := generatePods(5, "update-version", "True") + return append(stablePods, updatedReadyPods...) + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-br", + Namespace: "test", + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + UpdateRevision: "update-version", + }, + } + return r + }, + result: &batchcontext.BatchContext{ + FailureThreshold: &percent, + CurrentBatch: 0, + Replicas: 10, + UpdatedReplicas: 5, + UpdatedReadyReplicas: 5, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 2, + UpdateRevision: "update-version", + CurrentPartition: intstr.FromInt(100), + DesiredPartition: intstr.FromInt(8), + Pods: generatePods(10, "", ""), + }, + }, + "with NoNeedUpdate": { + workload: func() *apps.StatefulSet { + return &apps.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps/v1", + Kind: "StatefulSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test", + UID: "test", + }, + Spec: apps.StatefulSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}, + Replicas: pointer.Int32Ptr(20), + UpdateStrategy: apps.StatefulSetUpdateStrategy{ + Type: apps.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &apps.RollingUpdateStatefulSetStrategy{ + Partition: pointer.Int32Ptr(100), + }, + }, + }, + Status: apps.StatefulSetStatus{ + Replicas: 20, + UpdatedReplicas: 10, + AvailableReplicas: 20, + CurrentRevision: "stable-version", + UpdateRevision: "update-version", + }, + } + }, + pods: func() []*corev1.Pod { + stablePods := generatePods(10, "stable-version", "True") + updatedReadyPods := generatePods(10, "update-version", "True") + return append(stablePods, updatedReadyPods...) + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-br", + Namespace: "test", + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + NoNeedUpdateReplicas: pointer.Int32(10), + }, + UpdateRevision: "update-version", + }, + } + return r + }, + result: &batchcontext.BatchContext{ + CurrentBatch: 0, + UpdateRevision: "update-version", + Replicas: 20, + UpdatedReplicas: 10, + UpdatedReadyReplicas: 10, + NoNeedUpdatedReplicas: pointer.Int32Ptr(10), + PlannedUpdatedReplicas: 4, + DesiredUpdatedReplicas: 12, + CurrentPartition: intstr.FromInt(100), + DesiredPartition: intstr.FromInt(18), + FailureThreshold: &percent, + FilterFunc: labelpatch.FilterPodsForUnorderedUpdate, + Pods: generatePods(20, "", ""), + }, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + pods := func() []client.Object { + var objects []client.Object + pods := cs.pods() + for _, pod := range pods { + objects = append(objects, pod) + } + return objects + }() + br := cs.release() + sts := cs.workload() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(br, sts).WithObjects(pods...).Build() + control := realController{ + client: cli, + gvk: sts.GetObjectKind().GroupVersionKind(), + key: types.NamespacedName{Namespace: "test", Name: "test-sts"}, + } + c, err := control.BuildController() + Expect(err).NotTo(HaveOccurred()) + got, err := c.CalculateBatchContext(cs.release()) + fmt.Println(got.Log()) + fmt.Println(cs.result.Log()) + Expect(err).NotTo(HaveOccurred()) + Expect(got.Log()).Should(Equal(cs.result.Log())) + }) + } +} + +func TestCalculateBatchContextForAdvancedStatefulSet(t *testing.T) { + RegisterFailHandler(Fail) + + percent := intstr.FromString("20%") + cases := map[string]struct { + workload func() *kruiseappsv1beta1.StatefulSet + release func() *v1alpha1.BatchRelease + pods func() []*corev1.Pod + result *batchcontext.BatchContext + }{ + "without NoNeedUpdate": { + workload: func() *kruiseappsv1beta1.StatefulSet { + return &kruiseappsv1beta1.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps.kruise.io/v1beta1", + Kind: "StatefulSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test", + UID: "test", + }, + Spec: kruiseappsv1beta1.StatefulSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}, + Replicas: pointer.Int32Ptr(10), + UpdateStrategy: kruiseappsv1beta1.StatefulSetUpdateStrategy{ + Type: apps.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &kruiseappsv1beta1.RollingUpdateStatefulSetStrategy{ + Partition: pointer.Int32Ptr(100), + UnorderedUpdate: &kruiseappsv1beta1.UnorderedUpdateStrategy{ + PriorityStrategy: &appsv1pub.UpdatePriorityStrategy{ + OrderPriority: []appsv1pub.UpdatePriorityOrderTerm{ + { + OrderedKey: "test", + }, + }, + }, + }, + }, + }, + }, + Status: kruiseappsv1beta1.StatefulSetStatus{ + Replicas: 10, + UpdatedReplicas: 5, + AvailableReplicas: 10, + CurrentRevision: "stable-version", + UpdateRevision: "update-version", + }, + } + }, + pods: func() []*corev1.Pod { + stablePods := generatePods(5, "stable-version", "True") + updatedReadyPods := generatePods(5, "update-version", "True") + return append(stablePods, updatedReadyPods...) + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-br", + Namespace: "test", + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + }, + UpdateRevision: "update-version", + }, + } + return r + }, + result: &batchcontext.BatchContext{ + FailureThreshold: &percent, + CurrentBatch: 0, + Replicas: 10, + UpdatedReplicas: 5, + UpdatedReadyReplicas: 5, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 2, + UpdateRevision: "update-version", + CurrentPartition: intstr.FromInt(100), + DesiredPartition: intstr.FromInt(8), + Pods: generatePods(10, "", ""), + }, + }, + "with NoNeedUpdate": { + workload: func() *kruiseappsv1beta1.StatefulSet { + return &kruiseappsv1beta1.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps.kruise.io/v1beta1", + Kind: "StatefulSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sts", + Namespace: "test", + UID: "test", + }, + Spec: kruiseappsv1beta1.StatefulSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}, + Replicas: pointer.Int32Ptr(20), + UpdateStrategy: kruiseappsv1beta1.StatefulSetUpdateStrategy{ + Type: apps.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &kruiseappsv1beta1.RollingUpdateStatefulSetStrategy{ + Partition: pointer.Int32Ptr(100), + UnorderedUpdate: &kruiseappsv1beta1.UnorderedUpdateStrategy{ + PriorityStrategy: &appsv1pub.UpdatePriorityStrategy{ + OrderPriority: []appsv1pub.UpdatePriorityOrderTerm{ + { + OrderedKey: "test", + }, + }, + }, + }, + }, + }, + }, + Status: kruiseappsv1beta1.StatefulSetStatus{ + Replicas: 20, + UpdatedReplicas: 10, + AvailableReplicas: 20, + CurrentRevision: "stable-version", + UpdateRevision: "update-version", + }, + } + }, + pods: func() []*corev1.Pod { + stablePods := generatePods(10, "stable-version", "True") + updatedReadyPods := generatePods(10, "update-version", "True") + return append(stablePods, updatedReadyPods...) + }, + release: func() *v1alpha1.BatchRelease { + r := &v1alpha1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-br", + Namespace: "test", + }, + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + FailureThreshold: &percent, + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: percent, + }, + }, + }, + }, + Status: v1alpha1.BatchReleaseStatus{ + CanaryStatus: v1alpha1.BatchReleaseCanaryStatus{ + CurrentBatch: 0, + NoNeedUpdateReplicas: pointer.Int32(10), + }, + UpdateRevision: "update-version", + }, + } + return r + }, + result: &batchcontext.BatchContext{ + CurrentBatch: 0, + UpdateRevision: "update-version", + Replicas: 20, + UpdatedReplicas: 10, + UpdatedReadyReplicas: 10, + NoNeedUpdatedReplicas: pointer.Int32Ptr(10), + PlannedUpdatedReplicas: 4, + DesiredUpdatedReplicas: 12, + CurrentPartition: intstr.FromInt(100), + DesiredPartition: intstr.FromInt(8), + FailureThreshold: &percent, + FilterFunc: labelpatch.FilterPodsForUnorderedUpdate, + Pods: generatePods(20, "", ""), + }, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + pods := func() []client.Object { + var objects []client.Object + pods := cs.pods() + for _, pod := range pods { + objects = append(objects, pod) + } + return objects + }() + br := cs.release() + sts := cs.workload() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(br, sts).WithObjects(pods...).Build() + control := realController{ + client: cli, + gvk: sts.GetObjectKind().GroupVersionKind(), + key: types.NamespacedName{Namespace: "test", Name: "test-sts"}, + } + c, err := control.BuildController() + Expect(err).NotTo(HaveOccurred()) + got, err := c.CalculateBatchContext(cs.release()) + fmt.Println(got.Log()) + fmt.Println(cs.result.Log()) + Expect(err).NotTo(HaveOccurred()) + Expect(got.Log()).Should(Equal(cs.result.Log())) + }) + } +} + +func TestRealController(t *testing.T) { + RegisterFailHandler(Fail) + + release := releaseDemo.DeepCopy() + release.Spec.ReleasePlan.RolloutID = "1" + sts := stsDemo.DeepCopy() + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(release, sts).Build() + c := NewController(cli, stsKey, sts.GroupVersionKind()).(*realController) + controller, err := c.BuildController() + Expect(err).NotTo(HaveOccurred()) + + err = controller.Initialize(release) + Expect(err).NotTo(HaveOccurred()) + fetch := &kruiseappsv1beta1.StatefulSet{} + Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) + Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(BeNumerically(">=", 1000)) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal(getControlInfo(release))) + c.object = fetch // mock + + for { + batchContext, err := controller.CalculateBatchContext(release) + Expect(err).NotTo(HaveOccurred()) + err = controller.UpgradeBatch(batchContext) + // mock + fetch = &kruiseappsv1beta1.StatefulSet{} + Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) + c.object = fetch + if err == nil { + break + } + } + fetch = &kruiseappsv1beta1.StatefulSet{} + Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) + Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(BeNumerically("==", 9)) + + // mock + _ = controller.Finalize(release) + fetch = &kruiseappsv1beta1.StatefulSet{} + Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) + c.object = fetch + err = controller.Finalize(release) + Expect(err).NotTo(HaveOccurred()) + fetch = &kruiseappsv1beta1.StatefulSet{} + Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) + Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal("")) + + stableInfo := controller.GetInfo() + Expect(stableInfo).ShouldNot(BeNil()) + checkWorkloadInfo(stableInfo, sts) +} + +func checkWorkloadInfo(stableInfo *util.WorkloadInfo, sts *kruiseappsv1beta1.StatefulSet) { + Expect(stableInfo.Replicas).Should(Equal(*sts.Spec.Replicas)) + Expect(stableInfo.Status.Replicas).Should(Equal(sts.Status.Replicas)) + Expect(stableInfo.Status.ReadyReplicas).Should(Equal(sts.Status.ReadyReplicas)) + Expect(stableInfo.Status.UpdatedReplicas).Should(Equal(sts.Status.UpdatedReplicas)) + Expect(stableInfo.Status.UpdateRevision).Should(Equal(sts.Status.UpdateRevision)) + Expect(stableInfo.Status.UpdatedReadyReplicas).Should(Equal(sts.Status.UpdatedReadyReplicas)) + Expect(stableInfo.Status.StableRevision).Should(Equal(sts.Status.CurrentRevision)) + Expect(stableInfo.Status.AvailableReplicas).Should(Equal(sts.Status.AvailableReplicas)) + Expect(stableInfo.Status.ObservedGeneration).Should(Equal(sts.Status.ObservedGeneration)) +} + +func getControlInfo(release *v1alpha1.BatchRelease) string { + owner, _ := json.Marshal(metav1.NewControllerRef(release, release.GetObjectKind().GroupVersionKind())) + return string(owner) +} + +func generatePods(replicas int, version, readyStatus string) []*corev1.Pod { + var pods []*corev1.Pod + for replicas > 0 { + pods = append(pods, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: fmt.Sprintf("pod-%s", rand.String(10)), + Labels: map[string]string{ + "app": "foo", + apps.ControllerRevisionHashLabelKey: version, + }, + OwnerReferences: []metav1.OwnerReference{{ + UID: "test", + Controller: pointer.Bool(true), + }}, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: corev1.ConditionStatus(readyStatus), + }}, + }, + }) + replicas-- + } + return pods +} diff --git a/pkg/controller/batchrelease/control/util.go b/pkg/controller/batchrelease/control/util.go new file mode 100644 index 00000000..c1a4a698 --- /dev/null +++ b/pkg/controller/batchrelease/control/util.go @@ -0,0 +1,106 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package control + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/util" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// CalculateBatchReplicas return the planned updated replicas of current batch. +func CalculateBatchReplicas(release *v1alpha1.BatchRelease, workloadReplicas, currentBatch int) int { + batchSize, _ := intstr.GetScaledValueFromIntOrPercent(&release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas, workloadReplicas, true) + if batchSize > workloadReplicas { + klog.Warningf("releasePlan has wrong batch replicas, batches[%d].replicas %v is more than workload.replicas %v", currentBatch, batchSize, workloadReplicas) + batchSize = workloadReplicas + } else if batchSize < 0 { + klog.Warningf("releasePlan has wrong batch replicas, batches[%d].replicas %v is less than 0 %v", currentBatch, batchSize) + batchSize = 0 + } + + klog.V(3).InfoS("calculated the number of new pod size", "current batch", currentBatch, "new pod target", batchSize) + return batchSize +} + +// IsControlledByBatchRelease return true if +// * object ownerReference has referred release; +// * object has batchRelease control info annotation about release. +func IsControlledByBatchRelease(release *v1alpha1.BatchRelease, object client.Object) bool { + if owner := metav1.GetControllerOfNoCopy(object); owner != nil && owner.UID == release.UID { + return true + } + if controlInfo, ok := object.GetAnnotations()[util.BatchReleaseControlAnnotation]; ok && controlInfo != "" { + ref := &metav1.OwnerReference{} + err := json.Unmarshal([]byte(controlInfo), ref) + if err == nil && ref.UID == release.UID { + return true + } + } + return false +} + +// BuildReleaseControlInfo return a NewControllerRef of release with escaped `"`. +func BuildReleaseControlInfo(release *v1alpha1.BatchRelease) string { + owner, _ := json.Marshal(metav1.NewControllerRef(release, release.GetObjectKind().GroupVersionKind())) + return strings.Replace(string(owner), `"`, `\"`, -1) +} + +// ParseIntegerAsPercentageIfPossible will return a percentage type IntOrString, such as "20%", "33%", but "33.3%" is illegal. +// Given A, B, return P that should try best to satisfy ⌈P * B⌉ == A, and we ensure that the error is less than 1%. +// For examples: +// * Given stableReplicas 1, allReplicas 3, return "33%"; +// * Given stableReplicas 98, allReplicas 99, return "97%"; +// * Given stableReplicas 1, allReplicas 101, return "1"; +func ParseIntegerAsPercentageIfPossible(stableReplicas, allReplicas int32, canaryReplicas *intstr.IntOrString) intstr.IntOrString { + if stableReplicas >= allReplicas { + return intstr.FromString("100%") + } + + if stableReplicas <= 0 { + return intstr.FromString("0%") + } + + pValue := stableReplicas * 100 / allReplicas + percent := intstr.FromString(fmt.Sprintf("%v%%", pValue)) + restoredStableReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&percent, int(allReplicas), true) + // restoredStableReplicas == 0 is un-tolerated if user-defined canaryReplicas is not 100%. + // we must make sure that at least one canary pod is created. + if restoredStableReplicas <= 0 && canaryReplicas.StrVal != "100%" { + return intstr.FromString("1%") + } + + return percent +} + +// GenerateNotFoundError return a not found error +func GenerateNotFoundError(name, resource string) error { + return errors.NewNotFound(schema.GroupResource{Group: "apps", Resource: resource}, name) +} + +func ShouldWaitResume(release *v1alpha1.BatchRelease) bool { + return release.Spec.ReleasePlan.FinalizingPolicy == v1alpha1.WaitResumeFinalizingPolicyType +} diff --git a/pkg/controller/batchrelease/control/util_test.go b/pkg/controller/batchrelease/control/util_test.go new file mode 100644 index 00000000..fa11b737 --- /dev/null +++ b/pkg/controller/batchrelease/control/util_test.go @@ -0,0 +1,169 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package control + +import ( + "encoding/json" + "fmt" + "math" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func TestParseIntegerAsPercentage(t *testing.T) { + RegisterFailHandler(Fail) + + supposeUpper := 10000 + for allReplicas := 1; allReplicas <= supposeUpper; allReplicas++ { + for percent := 0; percent <= 100; percent++ { + canaryPercent := intstr.FromString(fmt.Sprintf("%v%%", percent)) + canaryReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&canaryPercent, allReplicas, true) + partition := ParseIntegerAsPercentageIfPossible(int32(allReplicas-canaryReplicas), int32(allReplicas), &canaryPercent) + stableReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&partition, allReplicas, true) + if percent == 0 { + Expect(stableReplicas).Should(BeNumerically("==", allReplicas)) + } else if percent == 100 { + Expect(stableReplicas).Should(BeNumerically("==", 0)) + } else if percent > 0 { + Expect(allReplicas - stableReplicas).To(BeNumerically(">", 0)) + } + Expect(stableReplicas).Should(BeNumerically("<=", allReplicas)) + Expect(math.Abs(float64((allReplicas - canaryReplicas) - stableReplicas))).Should(BeNumerically("<", float64(allReplicas)*0.01)) + } + } +} + +func TestCalculateBatchReplicas(t *testing.T) { + RegisterFailHandler(Fail) + + cases := map[string]struct { + batchReplicas intstr.IntOrString + workloadReplicas int32 + expectedReplicas int32 + }{ + "batch: 5, replicas: 10": { + batchReplicas: intstr.FromInt(5), + workloadReplicas: 10, + expectedReplicas: 5, + }, + "batch: 20%, replicas: 10": { + batchReplicas: intstr.FromString("20%"), + workloadReplicas: 10, + expectedReplicas: 2, + }, + "batch: 100%, replicas: 10": { + batchReplicas: intstr.FromString("100%"), + workloadReplicas: 10, + expectedReplicas: 10, + }, + "batch: 200%, replicas: 10": { + batchReplicas: intstr.FromString("200%"), + workloadReplicas: 10, + expectedReplicas: 10, + }, + "batch: 200, replicas: 10": { + batchReplicas: intstr.FromInt(200), + workloadReplicas: 10, + expectedReplicas: 10, + }, + "batch: 0, replicas: 10": { + batchReplicas: intstr.FromInt(0), + workloadReplicas: 10, + expectedReplicas: 0, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + release := &v1alpha1.BatchRelease{ + Spec: v1alpha1.BatchReleaseSpec{ + ReleasePlan: v1alpha1.ReleasePlan{ + Batches: []v1alpha1.ReleaseBatch{ + { + CanaryReplicas: cs.batchReplicas, + }, + }, + }, + }, + } + got := CalculateBatchReplicas(release, int(cs.workloadReplicas), 0) + Expect(got).Should(BeNumerically("==", cs.expectedReplicas)) + }) + } +} + +func TestIsControlledByBatchRelease(t *testing.T) { + RegisterFailHandler(Fail) + + release := &v1alpha1.BatchRelease{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "rollouts.kruise.io/v1alpha1", + Kind: "BatchRelease", + }, + ObjectMeta: metav1.ObjectMeta{ + UID: "test", + Name: "test", + Namespace: "test", + }, + } + + controlInfo, _ := json.Marshal(metav1.NewControllerRef(release, release.GroupVersionKind())) + + cases := map[string]struct { + object *apps.Deployment + result bool + }{ + "ownerRef": { + object: &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + *metav1.NewControllerRef(release, release.GroupVersionKind()), + }, + }, + }, + result: true, + }, + "annoRef": { + object: &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + util.BatchReleaseControlAnnotation: string(controlInfo), + }, + }, + }, + result: true, + }, + "notRef": { + object: &apps.Deployment{}, + result: false, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + got := IsControlledByBatchRelease(release, cs.object) + Expect(got == cs.result).To(BeTrue()) + }) + } +} diff --git a/pkg/controller/batchrelease/labelpatch/filter.go b/pkg/controller/batchrelease/labelpatch/filter.go new file mode 100644 index 00000000..ff6a0ae2 --- /dev/null +++ b/pkg/controller/batchrelease/labelpatch/filter.go @@ -0,0 +1,149 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labelpatch + +import ( + "sort" + "strconv" + "strings" + + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/integer" +) + +// FilterPodsForUnorderedUpdate can filter pods before patch pod batch label when rolling back in batches. +// for example: +// * There are 20 replicas: 10 updated replicas (version 2), 10 replicas (version 1), and the release plan is +// - batch 0: 20% +// - batch 1: 50% +// - batch 2: 100% +// Currently, if we decide to roll back to version 1, if you use this function, can help you just rollback +// the pods that are really need to be rolled back according to release plan, but patch batch label according +// original release plan, and will patch the pods that are really rolled back in priority. +// - in batch 0: really roll back (20 - 10) * 20% = 2 pods, but 20 * 20% = 4 pod will be patched batch label; +// - in batch 0: really roll back (20 - 10) * 50% = 5 pods, but 20 * 50% = 10 pod will be patched batch label; +// - in batch 0: really roll back (20 - 10) * 100% = 10 pods, but 20 * 100% = 20 pod will be patched batch label; +// +// Mainly for PaaS platform display pod list in conveniently. +// +// This function only works for such unordered update strategy, such as CloneSet, Deployment, or Advanced +// StatefulSet with unordered update strategy. +func FilterPodsForUnorderedUpdate(pods []*corev1.Pod, ctx *batchcontext.BatchContext) []*corev1.Pod { + var terminatingPods []*corev1.Pod + var lowPriorityPods []*corev1.Pod + var highPriorityPods []*corev1.Pod + + noNeedUpdate := int32(0) + for _, pod := range pods { + if !pod.DeletionTimestamp.IsZero() { + terminatingPods = append(terminatingPods, pod) + continue + } + if !util.IsConsistentWithRevision(pod, ctx.UpdateRevision) { + continue + } + if pod.Labels[util.NoNeedUpdatePodLabel] == ctx.RolloutID && pod.Labels[util.RolloutIDLabel] != ctx.RolloutID { + noNeedUpdate++ + lowPriorityPods = append(lowPriorityPods, pod) + } else { + highPriorityPods = append(highPriorityPods, pod) + } + } + + needUpdate := ctx.DesiredUpdatedReplicas - noNeedUpdate + if needUpdate <= 0 { // may never occur + return pods + } + + diff := ctx.PlannedUpdatedReplicas - needUpdate + if diff <= 0 { + return append(highPriorityPods, terminatingPods...) + } + + lastIndex := integer.Int32Min(diff, int32(len(lowPriorityPods))) + return append(append(highPriorityPods, lowPriorityPods[:lastIndex]...), terminatingPods...) +} + +// FilterPodsForOrderedUpdate can filter pods before patch pod batch label when rolling back in batches. +// for example: +// * There are 20 replicas: 10 updated replicas (version 2), 10 replicas (version 1), and the release plan is +// - batch 0: 20% +// - batch 1: 50% +// - batch 2: 100% +// Currently, if we decide to roll back to version 1, if you use this function, can help you just rollback +// the pods that are really need to be rolled back according to release plan, but patch batch label according +// original release plan, and will patch the pods that are really rolled back in priority. +// - in batch 0: really roll back (20 - 10) * 20% = 2 pods, but 20 * 20% = 4 pod will be patched batch label; +// - in batch 0: really roll back (20 - 10) * 50% = 5 pods, but 20 * 50% = 10 pod will be patched batch label; +// - in batch 0: really roll back (20 - 10) * 100% = 10 pods, but 20 * 100% = 20 pod will be patched batch label; +// +// Mainly for PaaS platform display pod list in conveniently. +// +// This function only works for such unordered update strategy, such as Native StatefulSet, and Advanced StatefulSet +// with ordered update strategy. +// TODO: support advanced statefulSet reserveOrdinal feature +func FilterPodsForOrderedUpdate(pods []*corev1.Pod, ctx *batchcontext.BatchContext) []*corev1.Pod { + var terminatingPods []*corev1.Pod + var lowPriorityPods []*corev1.Pod + var highPriorityPods []*corev1.Pod + + sortPodsByOrdinal(pods) + partition, _ := intstr.GetScaledValueFromIntOrPercent( + &ctx.DesiredPartition, int(ctx.Replicas), true) + for _, pod := range pods { + if !pod.DeletionTimestamp.IsZero() { + terminatingPods = append(terminatingPods, pod) + continue + } + if !util.IsConsistentWithRevision(pod, ctx.UpdateRevision) { + continue + } + if getPodOrdinal(pod) >= partition { + highPriorityPods = append(highPriorityPods, pod) + } else { + lowPriorityPods = append(lowPriorityPods, pod) + } + } + needUpdate := ctx.Replicas - int32(partition) + if needUpdate <= 0 { // may never occur + return pods + } + + diff := ctx.PlannedUpdatedReplicas - needUpdate + if diff <= 0 { + return append(highPriorityPods, terminatingPods...) + } + + lastIndex := integer.Int32Min(diff, int32(len(lowPriorityPods))) + return append(append(highPriorityPods, lowPriorityPods[:lastIndex]...), terminatingPods...) +} + +func sortPodsByOrdinal(pods []*corev1.Pod) { + sort.Slice(pods, func(i, j int) bool { + ordI, _ := strconv.Atoi(pods[i].Name[strings.LastIndex(pods[i].Name, "-"):]) + ordJ, _ := strconv.Atoi(pods[j].Name[strings.LastIndex(pods[j].Name, "-"):]) + return ordJ > ordI + }) +} + +func getPodOrdinal(pod *corev1.Pod) int { + ord, _ := strconv.Atoi(pod.Name[strings.LastIndex(pod.Name, "-")+1:]) + return ord +} diff --git a/pkg/controller/batchrelease/labelpatch/filter_test.go b/pkg/controller/batchrelease/labelpatch/filter_test.go new file mode 100644 index 00000000..63c02364 --- /dev/null +++ b/pkg/controller/batchrelease/labelpatch/filter_test.go @@ -0,0 +1,315 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labelpatch + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func TestFilterPodsForUnorderedRollback(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + + cases := []struct { + Name string + GetPods func() []*corev1.Pod + ExpectWithLabels int + ExpectWithoutLabels int + Replicas int32 + NoNeedUpdatedReplicas int32 + PlannedUpdatedReplicas int32 + DesiredUpdatedReplicas int32 + }{ + { + Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=20%, realCanary=6", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 5) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 5, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 6, + ExpectWithoutLabels: 5, + ExpectWithLabels: 1, + }, + { + Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=60%, realCanary=8", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 5) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 5, + PlannedUpdatedReplicas: 6, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 5, + ExpectWithLabels: 3, + }, + { + Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=100%, realCanary=10", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 5) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 5, + PlannedUpdatedReplicas: 10, + DesiredUpdatedReplicas: 10, + ExpectWithoutLabels: 5, + ExpectWithLabels: 5, + }, + { + Name: "replicas=10, updatedReplicas=9, noNeedRollback=7, stepCanary=20%, realCanary=6", + GetPods: func() []*corev1.Pod { + return generatePodsWith(9, 7) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 7, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 6, + ExpectWithoutLabels: 2, + ExpectWithLabels: 7, + }, + { + Name: "replicas=10, updatedReplicas=8, noNeedRollback=7, stepCanary=60%, realCanary=8", + GetPods: func() []*corev1.Pod { + return generatePodsWith(8, 7) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 7, + PlannedUpdatedReplicas: 6, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 1, + ExpectWithLabels: 5, + }, + { + Name: "replicas=10, updatedReplicas=9, noNeedRollback=7, stepCanary=100%, realCanary=10", + GetPods: func() []*corev1.Pod { + return generatePodsWith(9, 7) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 7, + PlannedUpdatedReplicas: 10, + DesiredUpdatedReplicas: 10, + ExpectWithoutLabels: 2, + ExpectWithLabels: 7, + }, + { + Name: "replicas=10, updatedReplicas=6, noNeedRollback=5, stepCanary=20%, realCanary=6", + GetPods: func() []*corev1.Pod { + return generatePodsWith(6, 5) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 5, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 6, + ExpectWithoutLabels: 1, + ExpectWithLabels: 1, + }, + { + Name: "replicas=10, updatedReplicas=6, noNeedRollback=5, stepCanary=60%, realCanary=8", + GetPods: func() []*corev1.Pod { + return generatePodsWith(6, 5) + }, + Replicas: 10, + NoNeedUpdatedReplicas: 5, + PlannedUpdatedReplicas: 6, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 1, + ExpectWithLabels: 3, + }, + } + + check := func(pods []*corev1.Pod, expectWith, expectWithout int) bool { + var with, without int + for _, pod := range pods { + if pod.Labels[util.NoNeedUpdatePodLabel] == "0x1" { + with++ + } else { + without++ + } + } + return with == expectWith && without == expectWithout + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + pods := cs.GetPods() + for i := 0; i < 10; i++ { + rand.Shuffle(len(pods), func(i, j int) { + pods[i], pods[j] = pods[j], pods[i] + }) + batchCtx := &batchcontext.BatchContext{ + Replicas: cs.Replicas, + RolloutID: "0x1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: cs.PlannedUpdatedReplicas, + DesiredUpdatedReplicas: cs.DesiredUpdatedReplicas, + } + filteredPods := FilterPodsForUnorderedUpdate(pods, batchCtx) + var podName []string + for i := range filteredPods { + podName = append(podName, filteredPods[i].Name) + } + fmt.Println(podName) + gomega.Expect(check(filteredPods, cs.ExpectWithLabels, cs.ExpectWithoutLabels)).To(gomega.BeTrue()) + } + }) + } +} + +func TestFilterPodsForOrderedRollback(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + + cases := []struct { + Name string + GetPods func() []*corev1.Pod + ExpectWithLabels int + ExpectWithoutLabels int + Replicas int32 + PlannedUpdatedReplicas int32 + DesiredUpdatedReplicas int32 + }{ + { + Name: "replicas=10, updatedReplicas=10, stepCanary=40%, realCanary=2", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 8) + }, + Replicas: 10, + PlannedUpdatedReplicas: 4, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 2, + ExpectWithLabels: 2, + }, + { + Name: "replicas=10, updatedReplicas=10, stepCanary=60%, realCanary=2", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 8) + }, + Replicas: 10, + PlannedUpdatedReplicas: 6, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 2, + ExpectWithLabels: 4, + }, + { + Name: "replicas=10, updatedReplicas=10, stepCanary=100%, realCanary=10", + GetPods: func() []*corev1.Pod { + return generatePodsWith(10, 0) + }, + Replicas: 10, + PlannedUpdatedReplicas: 10, + DesiredUpdatedReplicas: 0, + ExpectWithoutLabels: 10, + ExpectWithLabels: 0, + }, + { + Name: "replicas=10, updatedReplicas=9, stepCanary=20%, realCanary=2", + GetPods: func() []*corev1.Pod { + return generatePodsWith(9, 8) + }, + Replicas: 10, + PlannedUpdatedReplicas: 2, + DesiredUpdatedReplicas: 8, + ExpectWithoutLabels: 1, + ExpectWithLabels: 0, + }, + } + + check := func(pods []*corev1.Pod, expectWith, expectWithout int) bool { + var with, without int + for _, pod := range pods { + if pod.Labels[util.NoNeedUpdatePodLabel] == "0x1" { + with++ + } else { + without++ + } + } + return with == expectWith && without == expectWithout + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + pods := cs.GetPods() + for i := 0; i < 10; i++ { + rand.Shuffle(len(pods), func(i, j int) { + pods[i], pods[j] = pods[j], pods[i] + }) + batchCtx := &batchcontext.BatchContext{ + DesiredUpdatedReplicas: cs.PlannedUpdatedReplicas, + DesiredPartition: intstr.FromInt(int(cs.DesiredUpdatedReplicas)), + Replicas: cs.Replicas, + RolloutID: "0x1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: cs.PlannedUpdatedReplicas, + } + filteredPods := FilterPodsForOrderedUpdate(pods, batchCtx) + var podName []string + for i := range filteredPods { + podName = append(podName, filteredPods[i].Name) + } + fmt.Println(podName) + gomega.Expect(check(filteredPods, cs.ExpectWithLabels, cs.ExpectWithoutLabels)).To(gomega.BeTrue()) + } + }) + } +} + +func TestSortPodsByOrdinal(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + + pods := generatePodsWith(100, 10) + rand.Shuffle(len(pods), func(i, j int) { + pods[i], pods[j] = pods[j], pods[i] + }) + sortPodsByOrdinal(pods) + for i, pod := range pods { + expectedName := fmt.Sprintf("pod-name-%d", 99-i) + gomega.Expect(pod.Name == expectedName).Should(gomega.BeTrue()) + } +} + +func generatePodsWith(updatedReplicas, noNeedRollbackReplicas int) []*corev1.Pod { + podsNoNeed := generateLabeledPods(map[string]string{ + util.NoNeedUpdatePodLabel: "0x1", + apps.ControllerRevisionHashLabelKey: "version-1", + }, noNeedRollbackReplicas, 0) + return append(generateLabeledPods(map[string]string{ + apps.ControllerRevisionHashLabelKey: "version-1", + }, updatedReplicas-noNeedRollbackReplicas, noNeedRollbackReplicas), podsNoNeed...) +} + +func generateLabeledPods(labels map[string]string, replicas int, beginOrder int) []*corev1.Pod { + pods := make([]*corev1.Pod, replicas) + for i := 0; i < replicas; i++ { + pods[i] = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pod-name-%d", beginOrder+i), + Labels: labels, + }, + } + } + return pods +} diff --git a/pkg/controller/batchrelease/labelpatch/patcher.go b/pkg/controller/batchrelease/labelpatch/patcher.go new file mode 100644 index 00000000..e4867a59 --- /dev/null +++ b/pkg/controller/batchrelease/labelpatch/patcher.go @@ -0,0 +1,112 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labelpatch + +import ( + "context" + "fmt" + + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type LabelPatcher interface { + PatchPodBatchLabel(ctx *batchcontext.BatchContext) error +} + +func NewLabelPatcher(cli client.Client, logKey klog.ObjectRef) *realPatcher { + return &realPatcher{Client: cli, logKey: logKey} +} + +type realPatcher struct { + client.Client + logKey klog.ObjectRef +} + +func (r *realPatcher) PatchPodBatchLabel(ctx *batchcontext.BatchContext) error { + if ctx.RolloutID == "" || len(ctx.Pods) == 0 { + return nil + } + pods := ctx.Pods + if ctx.FilterFunc != nil { + pods = ctx.FilterFunc(pods, ctx) + } + return r.patchPodBatchLabel(pods, ctx) +} + +// PatchPodBatchLabel will patch rollout-id && batch-id to pods +func (r *realPatcher) patchPodBatchLabel(pods []*corev1.Pod, ctx *batchcontext.BatchContext) error { + // the number of active pods that has been patched successfully. + patchedUpdatedReplicas := int32(0) + // the number of target active pods that should be patched batch label. + plannedUpdatedReplicas := ctx.PlannedUpdatedReplicas + + for _, pod := range pods { + if !util.IsConsistentWithRevision(pod, ctx.UpdateRevision) { + continue + } + + podRolloutID := pod.Labels[util.RolloutIDLabel] + if pod.DeletionTimestamp.IsZero() && podRolloutID == ctx.RolloutID { + patchedUpdatedReplicas++ + } + } + + // all pods that should be patched have been patched + if patchedUpdatedReplicas >= plannedUpdatedReplicas { + return nil // return fast + } + + for _, pod := range pods { + if pod.DeletionTimestamp.IsZero() { + // we don't patch label for the active old revision pod + if !util.IsConsistentWithRevision(pod, ctx.UpdateRevision) { + continue + } + // we don't continue to patch if the goal is met + if patchedUpdatedReplicas >= ctx.PlannedUpdatedReplicas { + continue + } + } + + // if it has been patched, just ignore + if pod.Labels[util.RolloutIDLabel] == ctx.RolloutID { + continue + } + + clone := util.GetEmptyObjectWithKey(pod) + by := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s","%s":"%d"}}}`, + util.RolloutIDLabel, ctx.RolloutID, util.RolloutBatchIDLabel, ctx.CurrentBatch+1) + if err := r.Patch(context.TODO(), clone, client.RawPatch(types.StrategicMergePatchType, []byte(by))); err != nil { + return err + } + + if pod.DeletionTimestamp.IsZero() { + patchedUpdatedReplicas++ + } + klog.Infof("Successfully patch Pod(%v) batchID %d label", klog.KObj(pod), ctx.CurrentBatch+1) + } + + if patchedUpdatedReplicas >= plannedUpdatedReplicas { + return nil + } + return fmt.Errorf("patched %v pods for %v, however the goal is %d", patchedUpdatedReplicas, r.logKey, plannedUpdatedReplicas) +} diff --git a/pkg/controller/batchrelease/labelpatch/patcher_test.go b/pkg/controller/batchrelease/labelpatch/patcher_test.go new file mode 100644 index 00000000..d32c96c3 --- /dev/null +++ b/pkg/controller/batchrelease/labelpatch/patcher_test.go @@ -0,0 +1,183 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labelpatch + +import ( + "context" + "strconv" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/util" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + scheme = runtime.NewScheme() +) + +func init() { + corev1.AddToScheme(scheme) +} + +func TestLabelPatcher(t *testing.T) { + RegisterFailHandler(Fail) + + cases := map[string]struct { + batchContext func() *batchcontext.BatchContext + expectedPatched int + }{ + "10 pods, 0 patched, 5 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + CurrentBatch: 0, + Replicas: 10, + } + pods := generatePods(1, ctx.Replicas, 0, "", "", ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 5, + }, + "10 pods, 2 patched, 3 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + Replicas: 10, + } + pods := generatePods(1, ctx.Replicas, 2, + ctx.RolloutID, strconv.Itoa(int(ctx.CurrentBatch)), ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 5, + }, + "10 pods, 5 patched, 0 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + Replicas: 10, + } + pods := generatePods(1, ctx.Replicas, 5, + ctx.RolloutID, strconv.Itoa(int(ctx.CurrentBatch)), ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 5, + }, + "10 pods, 7 patched, 0 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + Replicas: 10, + } + pods := generatePods(1, ctx.Replicas, 7, + ctx.RolloutID, strconv.Itoa(int(ctx.CurrentBatch)), ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 7, + }, + "2 pods, 0 patched, 2 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + Replicas: 10, + } + pods := generatePods(1, 2, 0, + ctx.RolloutID, strconv.Itoa(int(ctx.CurrentBatch)), ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 2, + }, + "10 pods, 3 patched with old rollout-id, 5 new patched": { + batchContext: func() *batchcontext.BatchContext { + ctx := &batchcontext.BatchContext{ + RolloutID: "rollout-1", + UpdateRevision: "version-1", + PlannedUpdatedReplicas: 5, + Replicas: 10, + } + pods := generatePods(1, ctx.Replicas, 3, + "previous-rollout-id", strconv.Itoa(int(ctx.CurrentBatch)), ctx.UpdateRevision) + ctx.Pods = pods + return ctx + }, + expectedPatched: 5, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + ctx := cs.batchContext() + var objects []client.Object + for _, pod := range ctx.Pods { + objects = append(objects, pod) + } + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + patcher := NewLabelPatcher(cli, klog.ObjectRef{Name: "test"}) + patchErr := patcher.patchPodBatchLabel(ctx.Pods, ctx) + + podList := &corev1.PodList{} + err := cli.List(context.TODO(), podList) + Expect(err).NotTo(HaveOccurred()) + patched := 0 + for _, pod := range podList.Items { + if pod.Labels[util.RolloutIDLabel] == ctx.RolloutID { + patched++ + } + } + Expect(patched).Should(BeNumerically("==", cs.expectedPatched)) + if patched < int(ctx.PlannedUpdatedReplicas) { + Expect(patchErr).To(HaveOccurred()) + } + }) + } +} + +func generatePods(ordinalBegin, ordinalEnd, labeled int32, rolloutID, batchID, version string) []*corev1.Pod { + podsWithLabel := generateLabeledPods(map[string]string{ + util.RolloutIDLabel: rolloutID, + util.RolloutBatchIDLabel: batchID, + apps.ControllerRevisionHashLabelKey: version, + }, int(labeled), int(ordinalBegin)) + + total := ordinalEnd - ordinalBegin + 1 + podsWithoutLabel := generateLabeledPods(map[string]string{ + apps.ControllerRevisionHashLabelKey: version, + }, int(total-labeled), int(labeled+ordinalBegin)) + return append(podsWithoutLabel, podsWithLabel...) +} diff --git a/pkg/controller/batchrelease/workloads/cloneset_control_plane.go b/pkg/controller/batchrelease/workloads/cloneset_control_plane.go deleted file mode 100644 index 77399d6d..00000000 --- a/pkg/controller/batchrelease/workloads/cloneset_control_plane.go +++ /dev/null @@ -1,405 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "fmt" - - kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - v1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// CloneSetRolloutController is responsible for handling rollout CloneSet type of workloads -type CloneSetRolloutController struct { - cloneSetController - clone *kruiseappsv1alpha1.CloneSet -} - -// NewCloneSetRolloutController creates a new CloneSet rollout controller -func NewCloneSetRolloutController(cli client.Client, recorder record.EventRecorder, release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, targetNamespacedName types.NamespacedName) *CloneSetRolloutController { - return &CloneSetRolloutController{ - cloneSetController: cloneSetController{ - workloadController: workloadController{ - client: cli, - recorder: recorder, - release: release, - newStatus: newStatus, - }, - releasePlanKey: client.ObjectKeyFromObject(release), - targetNamespacedName: targetNamespacedName, - }, - } -} - -// VerifyWorkload verifies that the workload is ready to execute release plan -func (c *CloneSetRolloutController) VerifyWorkload() (bool, error) { - return true, nil -} - -// prepareBeforeRollback makes sure that the updated pods have been patched no-need-update label. -// return values: -// - bool: whether all updated pods have been patched no-need-update label; -// - *int32: how many pods have been patched; -// - err: whether error occurs. -func (c *CloneSetRolloutController) prepareBeforeRollback() (bool, *int32, error) { - if c.release.Annotations[util.RollbackInBatchAnnotation] != "true" { - return true, nil, nil - } - - noNeedRollbackReplicas := int32(0) - rolloutID := c.release.Spec.ReleasePlan.RolloutID - if rolloutID == "" { - return true, &noNeedRollbackReplicas, nil - } - - pods, err := util.ListOwnedPods(c.client, c.clone) - if err != nil { - klog.Errorf("Failed to list pods for CloneSet %v", c.targetNamespacedName) - return false, nil, err - } - - updateRevision := c.clone.Status.UpdateRevision - var filterPods []*v1.Pod - for i := range pods { - if !pods[i].DeletionTimestamp.IsZero() { - continue - } - if !util.IsConsistentWithRevision(pods[i], updateRevision) { - continue - } - if id, ok := pods[i].Labels[util.NoNeedUpdatePodLabel]; ok && id == rolloutID { - noNeedRollbackReplicas++ - continue - } - filterPods = append(filterPods, pods[i]) - } - - if len(filterPods) == 0 { - return true, &noNeedRollbackReplicas, nil - } - - for _, pod := range filterPods { - podClone := pod.DeepCopy() - body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, util.NoNeedUpdatePodLabel, rolloutID) - err = c.client.Patch(context.TODO(), podClone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) - if err != nil { - klog.Errorf("Failed to patch rollback labels[%s]=%s to pod %v", util.NoNeedUpdatePodLabel, rolloutID, client.ObjectKeyFromObject(pod)) - return false, &noNeedRollbackReplicas, err - } else { - klog.Info("Succeeded to patch rollback labels[%s]=%s to pod %v", util.NoNeedUpdatePodLabel, rolloutID, client.ObjectKeyFromObject(pod)) - } - noNeedRollbackReplicas++ - } - klog.Infof("BatchRelease(%v) find %v replicas no need to rollback", c.releasePlanKey, noNeedRollbackReplicas) - return false, &noNeedRollbackReplicas, nil -} - -// PrepareBeforeProgress makes sure that the source and target CloneSet is under our control -func (c *CloneSetRolloutController) PrepareBeforeProgress() (bool, *int32, error) { - if err := c.fetchCloneSet(); err != nil { - return false, nil, err - } - - done, noNeedRollbackReplicas, err := c.prepareBeforeRollback() - if err != nil || !done { - return false, noNeedRollbackReplicas, err - } - - // claim the cloneSet is under our control - if _, err := c.claimCloneSet(c.clone); err != nil { - return false, noNeedRollbackReplicas, err - } - - // record revisions and replicas info to BatchRelease.Status - c.recordCloneSetRevisionAndReplicas() - - c.recorder.Event(c.release, v1.EventTypeNormal, "InitializedSuccessfully", "Rollout resource are initialized") - return true, noNeedRollbackReplicas, nil -} - -// UpgradeOneBatch calculates the number of pods we can upgrade once according to the rollout spec -// and then set the partition accordingly -func (c *CloneSetRolloutController) UpgradeOneBatch() (bool, error) { - if err := c.fetchCloneSet(); err != nil { - return false, err - } - - if c.newStatus.ObservedWorkloadReplicas == 0 { - klog.Infof("BatchRelease(%v) observed workload replicas is 0, no need to upgrade", c.releasePlanKey) - return true, nil - } - - // if the workload status is untrustworthy - if c.clone.Status.ObservedGeneration != c.clone.Generation { - return false, nil - } - - var err error - var pods []*v1.Pod - if c.release.Spec.ReleasePlan.RolloutID != "" { - pods, err = util.ListOwnedPods(c.client, c.clone) - if err != nil { - klog.Errorf("Failed to list pods for CloneSet %v", c.targetNamespacedName) - return false, err - } - } - - var noNeedRollbackReplicas int32 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - noNeedRollbackReplicas = countNoNeedRollbackReplicas(pods, c.newStatus.UpdateRevision, c.release.Spec.ReleasePlan.RolloutID) - c.newStatus.CanaryStatus.NoNeedUpdateReplicas = pointer.Int32(noNeedRollbackReplicas) - } - - updatedReplicas := c.clone.Status.UpdatedReplicas - replicas := c.newStatus.ObservedWorkloadReplicas - currentBatch := c.newStatus.CanaryStatus.CurrentBatch - partitionedStableReplicas, _ := intstr.GetValueFromIntOrPercent(c.clone.Spec.UpdateStrategy.Partition, int(replicas), true) - - // the number of canary pods should have in current batch in plan - plannedBatchCanaryReplicas := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedBatchCanaryReplicas := c.calculateCurrentCanary(replicas - noNeedRollbackReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedBatchStableReplicas := replicas - noNeedRollbackReplicas - expectedBatchCanaryReplicas - - // if canaryReplicas is int, then we use int; - // if canaryReplicas is percentage, then we use percentage. - var expectedPartition intstr.IntOrString - canaryIntOrStr := c.release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas - if canaryIntOrStr.Type == intstr.Int { - expectedPartition = intstr.FromInt(int(expectedBatchStableReplicas)) - } else if c.newStatus.ObservedWorkloadReplicas > 0 { - expectedPartition = ParseIntegerAsPercentageIfPossible(expectedBatchStableReplicas, c.newStatus.ObservedWorkloadReplicas, &canaryIntOrStr) - } - - klog.V(3).InfoS("upgraded one batch, current info:", - "BatchRelease", c.releasePlanKey, - "currentBatch", currentBatch, - "replicas", replicas, - "updatedReplicas", updatedReplicas, - "noNeedRollbackReplicas", noNeedRollbackReplicas, - "partitionedStableReplicas", partitionedStableReplicas, - "plannedBatchCanaryReplicas", plannedBatchCanaryReplicas, - "expectedBatchCanaryReplicas", expectedBatchCanaryReplicas, - "expectedBatchStableReplicas", expectedBatchStableReplicas, - "expectedPartition", expectedPartition) - - if err := c.patchCloneSetPartition(c.clone, &expectedPartition); err != nil { - return false, err - } - - patchDone, err := c.patchPodBatchLabel(pods, plannedBatchCanaryReplicas, expectedBatchStableReplicas) - if !patchDone || err != nil { - return false, err - } - - c.recorder.Eventf(c.release, v1.EventTypeNormal, "SetBatchDone", "Finished submitting all upgrade quests for batch %d", c.newStatus.CanaryStatus.CurrentBatch) - return true, nil -} - -// CheckOneBatchReady checks to see if the pods are all available according to the rollout plan -func (c *CloneSetRolloutController) CheckOneBatchReady() (bool, error) { - if err := c.fetchCloneSet(); err != nil { - return false, err - } - - // if the workload status is untrustworthy - if c.clone.Status.ObservedGeneration != c.clone.Generation { - return false, nil - } - - rolloutID := c.release.Spec.ReleasePlan.RolloutID - - var err error - var pods []*v1.Pod - // if rolloutID is not set, no need to list pods, - // because we cannot patch correct batch label to pod. - if rolloutID != "" { - pods, err = util.ListOwnedPods(c.client, c.clone) - if err != nil { - return false, err - } - } - - var noNeedRollbackReplicas int32 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - noNeedRollbackReplicas = countNoNeedRollbackReplicas(pods, c.newStatus.UpdateRevision, c.release.Spec.ReleasePlan.RolloutID) - c.newStatus.CanaryStatus.NoNeedUpdateReplicas = pointer.Int32(noNeedRollbackReplicas) - } - - replicas := *c.clone.Spec.Replicas - // the number of updated pods - updatedReplicas := c.clone.Status.UpdatedReplicas - // the number of updated ready pods - updatedReadyReplicas := c.clone.Status.UpdatedReadyReplicas - - // current batch id - currentBatch := c.newStatus.CanaryStatus.CurrentBatch - // the number of canary pods should have in current batch in plan - plannedUpdatedReplicas := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - // the number of pods will be partitioned by cloneSet - partitionedStableReplicas, _ := intstr.GetValueFromIntOrPercent(c.clone.Spec.UpdateStrategy.Partition, int(replicas), true) - // the number of canary pods that consider rollback context and other real-world situations - expectedUpdatedReplicas := c.calculateCurrentCanary(replicas - noNeedRollbackReplicas) - // the number of stable pods that consider rollback context and other real-world situations - expectedStableReplicas := replicas - noNeedRollbackReplicas - expectedUpdatedReplicas - // the number of canary pods that cloneSet will be upgraded - realDesiredUpdatedReplicas := CalculateRealCanaryReplicasGoal(expectedStableReplicas, replicas, &c.release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas) - - klog.V(3).InfoS("check one batch, current info:", - "BatchRelease", c.releasePlanKey, - "currentBatch", currentBatch, - "replicas", replicas, - "updatedReplicas", updatedReplicas, - "noNeedRollbackReplicas", noNeedRollbackReplicas, - "partitionedStableReplicas", partitionedStableReplicas, - "expectedUpdatedReplicas", expectedUpdatedReplicas, - "realDesiredUpdatedReplicas", realDesiredUpdatedReplicas, - "expectedStableReplicas", expectedStableReplicas) - - if !isBatchReady(c.release, pods, c.clone.Spec.UpdateStrategy.MaxUnavailable, - plannedUpdatedReplicas, realDesiredUpdatedReplicas, updatedReplicas, updatedReadyReplicas) { - klog.Infof("BatchRelease(%v) batch is not ready yet, current batch=%d", klog.KObj(c.release), currentBatch) - return false, nil - } - - klog.Infof("BatchRelease(%v) batch is ready, current batch=%d", klog.KObj(c.release), currentBatch) - return true, nil -} - -// FinalizeProgress makes sure the CloneSet is all upgraded -func (c *CloneSetRolloutController) FinalizeProgress(cleanup bool) (bool, error) { - if err := c.fetchCloneSet(); client.IgnoreNotFound(err) != nil { - return false, err - } - - if _, err := c.releaseCloneSet(c.clone, cleanup); err != nil { - return false, err - } - - c.recorder.Eventf(c.release, v1.EventTypeNormal, "FinalizedSuccessfully", "Rollout resource are finalized: cleanup=%v", cleanup) - return true, nil -} - -// SyncWorkloadInfo return change type if workload was changed during release -func (c *CloneSetRolloutController) SyncWorkloadInfo() (WorkloadEventType, *util.WorkloadInfo, error) { - // ignore the sync if the release plan is deleted - if c.release.DeletionTimestamp != nil { - return IgnoreWorkloadEvent, nil, nil - } - - if err := c.fetchCloneSet(); err != nil { - if apierrors.IsNotFound(err) { - return WorkloadHasGone, nil, err - } - return "", nil, err - } - - // in case that the cloneSet status is untrustworthy - if c.clone.Status.ObservedGeneration != c.clone.Generation { - klog.Warningf("CloneSet(%v) is still reconciling, waiting for it to complete, generation: %v, observed: %v", - c.targetNamespacedName, c.clone.Generation, c.clone.Status.ObservedGeneration) - return WorkloadStillReconciling, nil, nil - } - - workloadInfo := &util.WorkloadInfo{ - Status: &util.WorkloadStatus{ - UpdatedReplicas: c.clone.Status.UpdatedReplicas, - UpdatedReadyReplicas: c.clone.Status.UpdatedReadyReplicas, - UpdateRevision: c.clone.Status.UpdateRevision, - StableRevision: c.clone.Status.CurrentRevision, - }, - } - - // in case of that the updated revision of the workload is promoted - if c.clone.Status.UpdatedReplicas == c.clone.Status.Replicas { - return IgnoreWorkloadEvent, workloadInfo, nil - } - - // in case of that the workload is scaling - if *c.clone.Spec.Replicas != c.newStatus.ObservedWorkloadReplicas && c.newStatus.ObservedWorkloadReplicas != -1 { - workloadInfo.Replicas = c.clone.Spec.Replicas - klog.Warningf("CloneSet(%v) replicas changed during releasing, should pause and wait for it to complete, "+ - "replicas from: %v -> %v", c.targetNamespacedName, c.newStatus.ObservedWorkloadReplicas, *c.clone.Spec.Replicas) - return WorkloadReplicasChanged, workloadInfo, nil - } - - // updateRevision == CurrentRevision means CloneSet is rolling back or newly-created. - if c.clone.Status.UpdateRevision == c.clone.Status.CurrentRevision && - // stableRevision == UpdateRevision means CloneSet is rolling back instead of newly-created. - c.newStatus.StableRevision == c.clone.Status.UpdateRevision && - // StableRevision != observed UpdateRevision means the rollback event have not been observed. - c.newStatus.StableRevision != c.newStatus.UpdateRevision { - klog.Warningf("CloneSet(%v) is rolling back in batches", c.targetNamespacedName) - return WorkloadRollbackInBatch, workloadInfo, nil - } - - // in case of that the workload was changed - if c.clone.Status.UpdateRevision != c.newStatus.UpdateRevision { - klog.Warningf("CloneSet(%v) updateRevision changed during releasing, should try to restart the release plan, "+ - "updateRevision from: %v -> %v", c.targetNamespacedName, c.newStatus.UpdateRevision, c.clone.Status.UpdateRevision) - return WorkloadPodTemplateChanged, workloadInfo, nil - } - - return IgnoreWorkloadEvent, workloadInfo, nil -} - -/* ---------------------------------- -The functions below are helper functions -------------------------------------- */ -// fetchCloneSet fetch cloneSet to c.clone -func (c *CloneSetRolloutController) fetchCloneSet() error { - clone := &kruiseappsv1alpha1.CloneSet{} - if err := c.client.Get(context.TODO(), c.targetNamespacedName, clone); err != nil { - if !apierrors.IsNotFound(err) { - c.recorder.Event(c.release, v1.EventTypeWarning, "GetCloneSetFailed", err.Error()) - } - return err - } - c.clone = clone - return nil -} - -func (c *CloneSetRolloutController) recordCloneSetRevisionAndReplicas() { - c.newStatus.ObservedWorkloadReplicas = *c.clone.Spec.Replicas - c.newStatus.StableRevision = c.clone.Status.CurrentRevision - c.newStatus.UpdateRevision = c.clone.Status.UpdateRevision -} - -func (c *CloneSetRolloutController) patchPodBatchLabel(pods []*v1.Pod, plannedBatchCanaryReplicas, expectedBatchStableReplicas int32) (bool, error) { - rolloutID := c.release.Spec.ReleasePlan.RolloutID - if rolloutID == "" || len(pods) == 0 { - return true, nil - } - - updateRevision := c.release.Status.UpdateRevision - batchID := c.release.Status.CanaryStatus.CurrentBatch + 1 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - pods = filterPodsForUnorderedRollback(pods, plannedBatchCanaryReplicas, expectedBatchStableReplicas, c.release.Status.ObservedWorkloadReplicas, rolloutID, updateRevision) - } - return patchPodBatchLabel(c.client, pods, rolloutID, batchID, updateRevision, plannedBatchCanaryReplicas, c.releasePlanKey) -} diff --git a/pkg/controller/batchrelease/workloads/cloneset_controller.go b/pkg/controller/batchrelease/workloads/cloneset_controller.go deleted file mode 100644 index fe4da3ce..00000000 --- a/pkg/controller/batchrelease/workloads/cloneset_controller.go +++ /dev/null @@ -1,221 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "encoding/json" - "fmt" - "reflect" - - kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// cloneSetController is the place to hold fields needed for handle CloneSet type of workloads -type cloneSetController struct { - workloadController - releasePlanKey types.NamespacedName - targetNamespacedName types.NamespacedName -} - -// add the parent controller to the owner of the deployment, unpause it and initialize the size -// before kicking start the update and start from every pod in the old version -func (c *cloneSetController) claimCloneSet(clone *kruiseappsv1alpha1.CloneSet) (bool, error) { - var controlled bool - if controlInfo, ok := clone.Annotations[util.BatchReleaseControlAnnotation]; ok && controlInfo != "" { - ref := &metav1.OwnerReference{} - err := json.Unmarshal([]byte(controlInfo), ref) - if err == nil && ref.UID == c.release.UID { - controlled = true - klog.V(3).Infof("CloneSet(%v) has been controlled by this BatchRelease(%v), no need to claim again", - c.targetNamespacedName, c.releasePlanKey) - } else { - klog.Errorf("Failed to parse controller info from CloneSet(%v) annotation, error: %v, controller info: %+v", - c.targetNamespacedName, err, *ref) - } - } - - patch := map[string]interface{}{} - switch { - // if the cloneSet has been claimed by this release - case controlled: - // make sure paused=false - if clone.Spec.UpdateStrategy.Paused { - patch = map[string]interface{}{ - "spec": map[string]interface{}{ - "updateStrategy": map[string]interface{}{ - "paused": false, - }, - }, - } - } - - default: - patch = map[string]interface{}{ - "spec": map[string]interface{}{ - "updateStrategy": map[string]interface{}{ - "partition": &intstr.IntOrString{Type: intstr.String, StrVal: "100%"}, - "paused": false, - }, - }, - } - - controlInfo := metav1.NewControllerRef(c.release, c.release.GetObjectKind().GroupVersionKind()) - controlByte, _ := json.Marshal(controlInfo) - patch["metadata"] = map[string]interface{}{ - "annotations": map[string]string{ - util.BatchReleaseControlAnnotation: string(controlByte), - }, - } - } - - if len(patch) > 0 { - cloneObj := clone.DeepCopy() - patchByte, _ := json.Marshal(patch) - if err := c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.MergePatchType, patchByte)); err != nil { - c.recorder.Eventf(c.release, v1.EventTypeWarning, "ClaimCloneSetFailed", err.Error()) - return false, err - } - } - - klog.V(3).Infof("Claim CloneSet(%v) Successfully", c.targetNamespacedName) - return true, nil -} - -// remove the parent controller from the deployment's owner list -func (c *cloneSetController) releaseCloneSet(clone *kruiseappsv1alpha1.CloneSet, cleanup bool) (bool, error) { - if clone == nil { - return true, nil - } - - var found bool - var refByte string - if refByte, found = clone.Annotations[util.BatchReleaseControlAnnotation]; found && refByte != "" { - ref := &metav1.OwnerReference{} - if err := json.Unmarshal([]byte(refByte), ref); err != nil { - found = false - klog.Errorf("failed to decode controller annotations of BatchRelease") - } else if ref.UID != c.release.UID { - found = false - } - } - - if !found { - klog.V(3).Infof("the CloneSet(%v) is already released", c.targetNamespacedName) - return true, nil - } - - cloneObj := clone.DeepCopy() - patchByte := []byte(fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, util.BatchReleaseControlAnnotation)) - if err := c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.MergePatchType, patchByte)); err != nil { - c.recorder.Eventf(c.release, v1.EventTypeWarning, "ReleaseCloneSetFailed", err.Error()) - return false, err - } - - klog.V(3).Infof("Release CloneSet(%v) Successfully", c.targetNamespacedName) - return true, nil -} - -// scale the deployment -func (c *cloneSetController) patchCloneSetPartition(clone *kruiseappsv1alpha1.CloneSet, partition *intstr.IntOrString) error { - if reflect.DeepEqual(clone.Spec.UpdateStrategy.Partition, partition) { - return nil - } - - patch := map[string]interface{}{ - "spec": map[string]interface{}{ - "updateStrategy": map[string]interface{}{ - "partition": partition, - }, - }, - } - - cloneObj := clone.DeepCopy() - patchByte, _ := json.Marshal(patch) - if err := c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.MergePatchType, patchByte)); err != nil { - c.recorder.Eventf(c.release, v1.EventTypeWarning, "PatchPartitionFailed", - "Failed to update the CloneSet(%v) to the correct target partition %d, error: %v", - c.targetNamespacedName, partition, err) - return err - } - - klog.InfoS("Submitted modified partition quest for CloneSet", "CloneSet", c.targetNamespacedName, - "target partition size", partition, "batch", c.newStatus.CanaryStatus.CurrentBatch) - - return nil -} - -// the canary workload size for the current batch -func (c *cloneSetController) calculateCurrentCanary(totalSize int32) int32 { - targetSize := int32(calculateNewBatchTarget(&c.release.Spec.ReleasePlan, int(totalSize), int(c.newStatus.CanaryStatus.CurrentBatch))) - klog.InfoS("Calculated the number of pods in the target CloneSet after current batch", - "CloneSet", c.targetNamespacedName, "BatchRelease", c.releasePlanKey, - "current batch", c.newStatus.CanaryStatus.CurrentBatch, "workload updateRevision size", targetSize) - return targetSize -} - -// the source workload size for the current batch -func (c *cloneSetController) calculateCurrentStable(totalSize int32) int32 { - sourceSize := totalSize - c.calculateCurrentCanary(totalSize) - klog.InfoS("Calculated the number of pods in the source CloneSet after current batch", - "CloneSet", c.targetNamespacedName, "BatchRelease", c.releasePlanKey, - "current batch", c.newStatus.CanaryStatus.CurrentBatch, "workload stableRevision size", sourceSize) - return sourceSize -} - -// ParseIntegerAsPercentageIfPossible will return a percentage type IntOrString, such as "20%", "33%", but "33.3%" is illegal. -// Given A, B, return P that should try best to satisfy ⌈P * B⌉ == A, and we ensure that the error is less than 1%. -// For examples: -// * Given stableReplicas 1, allReplicas 3, return "33%"; -// * Given stableReplicas 98, allReplicas 99, return "97%"; -// * Given stableReplicas 1, allReplicas 101, return "1"; -func ParseIntegerAsPercentageIfPossible(stableReplicas, allReplicas int32, canaryReplicas *intstr.IntOrString) intstr.IntOrString { - if stableReplicas >= allReplicas { - return intstr.FromString("100%") - } - - if stableReplicas <= 0 { - return intstr.FromString("0%") - } - - pValue := stableReplicas * 100 / allReplicas - percent := intstr.FromString(fmt.Sprintf("%v%%", pValue)) - restoredStableReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&percent, int(allReplicas), true) - // restoredStableReplicas == 0 is un-tolerated if user-defined canaryReplicas is not 100%. - // we must make sure that at least one canary pod is created. - if restoredStableReplicas <= 0 && canaryReplicas.StrVal != "100%" { - return intstr.FromString("1%") - } - - return percent -} - -func CalculateRealCanaryReplicasGoal(expectedStableReplicas, allReplicas int32, canaryReplicas *intstr.IntOrString) int32 { - if canaryReplicas.Type == intstr.Int { - return allReplicas - expectedStableReplicas - } - partition := ParseIntegerAsPercentageIfPossible(expectedStableReplicas, allReplicas, canaryReplicas) - realStableReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&partition, int(allReplicas), true) - return allReplicas - int32(realStableReplicas) -} diff --git a/pkg/controller/batchrelease/workloads/cloneset_controller_test.go b/pkg/controller/batchrelease/workloads/cloneset_controller_test.go deleted file mode 100644 index 1fc0194d..00000000 --- a/pkg/controller/batchrelease/workloads/cloneset_controller_test.go +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "fmt" - "math" - "reflect" - "testing" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - apps "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - apimachineryruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/client-go/tools/record" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -var ( - scheme *runtime.Scheme - releaseClone = &v1alpha1.BatchRelease{ - TypeMeta: metav1.TypeMeta{ - APIVersion: v1alpha1.GroupVersion.String(), - Kind: "BatchRelease", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "release", - Namespace: "application", - UID: uuid.NewUUID(), - }, - Spec: v1alpha1.BatchReleaseSpec{ - TargetRef: v1alpha1.ObjectRef{ - WorkloadRef: &v1alpha1.WorkloadRef{ - APIVersion: "apps.kruise.io/v1alpha1", - Kind: "CloneSet", - Name: "sample", - }, - }, - ReleasePlan: v1alpha1.ReleasePlan{ - Batches: []v1alpha1.ReleaseBatch{ - { - CanaryReplicas: intstr.FromString("10%"), - }, - { - CanaryReplicas: intstr.FromString("50%"), - }, - { - CanaryReplicas: intstr.FromString("80%"), - }, - }, - }, - }, - } - - stableClone = &kruiseappsv1alpha1.CloneSet{ - TypeMeta: metav1.TypeMeta{ - APIVersion: kruiseappsv1alpha1.SchemeGroupVersion.String(), - Kind: "CloneSet", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "sample", - Namespace: "application", - UID: types.UID("87076677"), - Generation: 1, - Labels: map[string]string{ - "app": "busybox", - }, - Annotations: map[string]string{ - "something": "whatever", - }, - }, - Spec: kruiseappsv1alpha1.CloneSetSpec{ - Replicas: pointer.Int32Ptr(100), - UpdateStrategy: kruiseappsv1alpha1.CloneSetUpdateStrategy{ - Partition: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(1)}, - MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(2)}, - MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(2)}, - }, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "busybox", - }, - }, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: containers("v2"), - }, - }, - }, - Status: kruiseappsv1alpha1.CloneSetStatus{ - Replicas: 100, - ReadyReplicas: 100, - UpdatedReplicas: 0, - UpdatedReadyReplicas: 0, - ObservedGeneration: 1, - }, - } -) - -func init() { - scheme = runtime.NewScheme() - apimachineryruntime.Must(apps.AddToScheme(scheme)) - apimachineryruntime.Must(v1alpha1.AddToScheme(scheme)) - apimachineryruntime.Must(kruiseappsv1alpha1.AddToScheme(scheme)) - - canaryTemplate := stableClone.Spec.Template.DeepCopy() - stableTemplate := canaryTemplate.DeepCopy() - stableTemplate.Spec.Containers = containers("v1") - stableClone.Status.CurrentRevision = util.ComputeHash(stableTemplate, nil) - stableClone.Status.UpdateRevision = util.ComputeHash(canaryTemplate, nil) -} - -func TestCloneSetController(t *testing.T) { - RegisterFailHandler(Fail) - - cases := []struct { - Name string - Paused bool - Cleanup bool - }{ - { - Name: "paused=true, cleanup=true", - Paused: true, - Cleanup: true, - }, - { - Name: "paused=true, cleanup=false", - Paused: true, - Cleanup: false, - }, - { - Name: "paused=false cleanup=true", - Paused: false, - Cleanup: true, - }, - { - Name: "paused=false , cleanup=false", - Paused: false, - Cleanup: false, - }, - } - - for _, cs := range cases { - t.Run(cs.Name, func(t *testing.T) { - cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(releaseClone.DeepCopy(), stableClone.DeepCopy()).Build() - rec := record.NewFakeRecorder(100) - c := cloneSetController{ - workloadController: workloadController{ - client: cli, - recorder: rec, - release: releaseClone, - newStatus: &releaseClone.Status, - }, - targetNamespacedName: client.ObjectKeyFromObject(stableClone), - } - oldObject := &kruiseappsv1alpha1.CloneSet{} - Expect(cli.Get(context.TODO(), c.targetNamespacedName, oldObject)).NotTo(HaveOccurred()) - succeed, err := c.claimCloneSet(oldObject.DeepCopy()) - Expect(succeed).Should(BeTrue()) - Expect(err).NotTo(HaveOccurred()) - - newObject := &kruiseappsv1alpha1.CloneSet{} - Expect(cli.Get(context.TODO(), c.targetNamespacedName, newObject)).NotTo(HaveOccurred()) - succeed, err = c.releaseCloneSet(newObject.DeepCopy(), cs.Cleanup) - Expect(succeed).Should(BeTrue()) - Expect(err).NotTo(HaveOccurred()) - - newObject = &kruiseappsv1alpha1.CloneSet{} - Expect(cli.Get(context.TODO(), c.targetNamespacedName, newObject)).NotTo(HaveOccurred()) - newObject.Spec.UpdateStrategy.Paused = oldObject.Spec.UpdateStrategy.Paused - newObject.Spec.UpdateStrategy.Partition = oldObject.Spec.UpdateStrategy.Partition - Expect(reflect.DeepEqual(oldObject.Spec, newObject.Spec)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Labels, newObject.Labels)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Finalizers, newObject.Finalizers)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Annotations, newObject.Annotations)).Should(BeTrue()) - }) - } -} - -func TestParseIntegerAsPercentage(t *testing.T) { - RegisterFailHandler(Fail) - - supposeUpper := 10000 - for allReplicas := 1; allReplicas <= supposeUpper; allReplicas++ { - for percent := 0; percent <= 100; percent++ { - canaryPercent := intstr.FromString(fmt.Sprintf("%v%%", percent)) - canaryReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&canaryPercent, allReplicas, true) - partition := ParseIntegerAsPercentageIfPossible(int32(allReplicas-canaryReplicas), int32(allReplicas), &canaryPercent) - stableReplicas, _ := intstr.GetScaledValueFromIntOrPercent(&partition, allReplicas, true) - if percent == 0 { - Expect(stableReplicas).Should(BeNumerically("==", allReplicas)) - } else if percent == 100 { - Expect(stableReplicas).Should(BeNumerically("==", 0)) - } else if percent > 0 { - Expect(allReplicas - stableReplicas).To(BeNumerically(">", 0)) - } - Expect(stableReplicas).Should(BeNumerically("<=", allReplicas)) - Expect(math.Abs(float64((allReplicas - canaryReplicas) - stableReplicas))).Should(BeNumerically("<", float64(allReplicas)*0.01)) - } - } -} diff --git a/pkg/controller/batchrelease/workloads/commons.go b/pkg/controller/batchrelease/workloads/commons.go deleted file mode 100644 index 4356cdfb..00000000 --- a/pkg/controller/batchrelease/workloads/commons.go +++ /dev/null @@ -1,285 +0,0 @@ -package workloads - -import ( - "context" - "encoding/json" - "fmt" - "sort" - "strconv" - "strings" - - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/klog/v2" - "k8s.io/utils/integer" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func filterPodsForUnorderedRollback(pods []*corev1.Pod, plannedBatchCanaryReplicas, expectedBatchStableReplicas, replicas int32, rolloutID, updateRevision string) []*corev1.Pod { - var noNeedRollbackReplicas int32 - var realNeedRollbackReplicas int32 - var expectedRollbackReplicas int32 // total need rollback - - var terminatingPods []*corev1.Pod - var needRollbackPods []*corev1.Pod - var noNeedRollbackPods []*corev1.Pod - - for _, pod := range pods { - if !pod.DeletionTimestamp.IsZero() { - terminatingPods = append(terminatingPods, pod) - continue - } - if !util.IsConsistentWithRevision(pod, updateRevision) { - continue - } - podRolloutID := pod.Labels[util.RolloutIDLabel] - podRollbackID := pod.Labels[util.NoNeedUpdatePodLabel] - if podRollbackID == rolloutID && podRolloutID != rolloutID { - noNeedRollbackReplicas++ - noNeedRollbackPods = append(noNeedRollbackPods, pod) - } else { - needRollbackPods = append(needRollbackPods, pod) - } - } - - expectedRollbackReplicas = replicas - expectedBatchStableReplicas - realNeedRollbackReplicas = expectedRollbackReplicas - noNeedRollbackReplicas - if realNeedRollbackReplicas <= 0 { // may never occur - return pods - } - - diff := plannedBatchCanaryReplicas - realNeedRollbackReplicas - if diff <= 0 { - return append(needRollbackPods, terminatingPods...) - } - - lastIndex := integer.Int32Min(diff, int32(len(noNeedRollbackPods))) - return append(append(needRollbackPods, noNeedRollbackPods[:lastIndex]...), terminatingPods...) -} - -// TODO: support advanced statefulSet reserveOrdinal feature -func filterPodsForOrderedRollback(pods []*corev1.Pod, plannedBatchCanaryReplicas, expectedBatchStableReplicas, replicas int32, rolloutID, updateRevision string) []*corev1.Pod { - var terminatingPods []*corev1.Pod - var needRollbackPods []*corev1.Pod - var noNeedRollbackPods []*corev1.Pod - - sortPodsByOrdinal(pods) - for _, pod := range pods { - if !pod.DeletionTimestamp.IsZero() { - terminatingPods = append(terminatingPods, pod) - continue - } - if !util.IsConsistentWithRevision(pod, updateRevision) { - continue - } - if getPodOrdinal(pod) >= int(expectedBatchStableReplicas) { - needRollbackPods = append(needRollbackPods, pod) - } else { - noNeedRollbackPods = append(noNeedRollbackPods, pod) - } - } - realNeedRollbackReplicas := replicas - expectedBatchStableReplicas - if realNeedRollbackReplicas <= 0 { // may never occur - return pods - } - - diff := plannedBatchCanaryReplicas - realNeedRollbackReplicas - if diff <= 0 { - return append(needRollbackPods, terminatingPods...) - } - - lastIndex := integer.Int32Min(diff, int32(len(noNeedRollbackPods))) - return append(append(needRollbackPods, noNeedRollbackPods[:lastIndex]...), terminatingPods...) -} - -func countNoNeedRollbackReplicas(pods []*corev1.Pod, updateRevision, rolloutID string) int32 { - noNeedRollbackReplicas := int32(0) - for _, pod := range pods { - if !pod.DeletionTimestamp.IsZero() { - continue - } - if !util.IsConsistentWithRevision(pod, updateRevision) { - continue - } - id, ok := pod.Labels[util.NoNeedUpdatePodLabel] - if ok && id == rolloutID { - noNeedRollbackReplicas++ - } - } - return noNeedRollbackReplicas -} - -// patchPodBatchLabel will patch rollout-id && batch-id to pods -func patchPodBatchLabel(c client.Client, pods []*corev1.Pod, rolloutID string, batchID int32, updateRevision string, replicas int32, logKey types.NamespacedName) (bool, error) { - // the number of active pods that has been patched successfully. - patchedUpdatedReplicas := int32(0) - for _, pod := range pods { - if !util.IsConsistentWithRevision(pod, updateRevision) { - continue - } - - podRolloutID := pod.Labels[util.RolloutIDLabel] - if pod.DeletionTimestamp.IsZero() && podRolloutID == rolloutID { - patchedUpdatedReplicas++ - } - } - - for _, pod := range pods { - podRolloutID := pod.Labels[util.RolloutIDLabel] - if pod.DeletionTimestamp.IsZero() { - // we don't patch label for the active old revision pod - if !util.IsConsistentWithRevision(pod, updateRevision) { - continue - } - // we don't continue to patch if the goal is met - if patchedUpdatedReplicas >= replicas { - continue - } - } - - // if it has been patched, just ignore - if podRolloutID == rolloutID { - continue - } - - podClone := pod.DeepCopy() - by := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s","%s":"%d"}}}`, util.RolloutIDLabel, rolloutID, util.RolloutBatchIDLabel, batchID) - err := c.Patch(context.TODO(), podClone, client.RawPatch(types.StrategicMergePatchType, []byte(by))) - if err != nil { - klog.Errorf("Failed to patch Pod(%v) batchID, err: %v", client.ObjectKeyFromObject(pod), err) - return false, err - } else { - klog.Infof("Succeed to patch Pod(%v) batchID, err: %v", client.ObjectKeyFromObject(pod), err) - } - - if pod.DeletionTimestamp.IsZero() { - patchedUpdatedReplicas++ - } - } - - klog.V(3).Infof("Patch %v pods with batchID for batchRelease %v, goal is %d pods", patchedUpdatedReplicas, logKey, replicas) - return patchedUpdatedReplicas >= replicas, nil -} - -func releaseWorkload(c client.Client, object client.Object) error { - _, found := object.GetAnnotations()[util.BatchReleaseControlAnnotation] - if !found { - klog.V(3).Infof("Workload(%v) is already released", client.ObjectKeyFromObject(object)) - return nil - } - - clone := object.DeepCopyObject().(client.Object) - patchByte := []byte(fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, util.BatchReleaseControlAnnotation)) - return c.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, patchByte)) -} - -func claimWorkload(c client.Client, planController *v1alpha1.BatchRelease, object client.Object, patchUpdateStrategy map[string]interface{}) error { - if controlInfo, ok := object.GetAnnotations()[util.BatchReleaseControlAnnotation]; ok && controlInfo != "" { - ref := &metav1.OwnerReference{} - err := json.Unmarshal([]byte(controlInfo), ref) - if err == nil && ref.UID == planController.UID { - klog.V(3).Infof("Workload(%v) has been controlled by this BatchRelease(%v), no need to claim again", - client.ObjectKeyFromObject(object), client.ObjectKeyFromObject(planController)) - return nil - } else { - klog.Errorf("Failed to parse controller info from Workload(%v) annotation, error: %v, controller info: %+v", - client.ObjectKeyFromObject(object), err, *ref) - } - } - - controlInfo, _ := json.Marshal(metav1.NewControllerRef(planController, planController.GetObjectKind().GroupVersionKind())) - patch := map[string]interface{}{ - "metadata": map[string]interface{}{ - "annotations": map[string]string{ - util.BatchReleaseControlAnnotation: string(controlInfo), - }, - }, - "spec": map[string]interface{}{ - "updateStrategy": patchUpdateStrategy, - }, - } - - patchByte, _ := json.Marshal(patch) - clone := object.DeepCopyObject().(client.Object) - return c.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, patchByte)) -} - -func patchSpec(c client.Client, object client.Object, spec map[string]interface{}) error { - patchByte, err := json.Marshal(map[string]interface{}{"spec": spec}) - if err != nil { - return err - } - clone := object.DeepCopyObject().(client.Object) - return c.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, patchByte)) -} - -func calculateNewBatchTarget(rolloutSpec *v1alpha1.ReleasePlan, workloadReplicas, currentBatch int) int { - batchSize, _ := intstr.GetValueFromIntOrPercent(&rolloutSpec.Batches[currentBatch].CanaryReplicas, workloadReplicas, true) - if batchSize > workloadReplicas { - klog.Warningf("releasePlan has wrong batch replicas, batches[%d].replicas %v is more than workload.replicas %v", currentBatch, batchSize, workloadReplicas) - batchSize = workloadReplicas - } else if batchSize < 0 { - klog.Warningf("releasePlan has wrong batch replicas, batches[%d].replicas %v is less than 0 %v", currentBatch, batchSize) - batchSize = 0 - } - - klog.V(3).InfoS("calculated the number of new pod size", "current batch", currentBatch, "new pod target", batchSize) - return batchSize -} - -func sortPodsByOrdinal(pods []*corev1.Pod) { - sort.Slice(pods, func(i, j int) bool { - ordI, _ := strconv.Atoi(pods[i].Name[strings.LastIndex(pods[i].Name, "-"):]) - ordJ, _ := strconv.Atoi(pods[j].Name[strings.LastIndex(pods[j].Name, "-"):]) - return ordJ > ordI - }) -} - -func getPodOrdinal(pod *corev1.Pod) int { - ord, _ := strconv.Atoi(pod.Name[strings.LastIndex(pod.Name, "-")+1:]) - return ord -} - -func failureThreshold(threshold, maxUnavailable *intstr.IntOrString, replicas int32) int32 { - globalThreshold := 0 - if threshold != nil { - globalThreshold, _ = intstr.GetScaledValueFromIntOrPercent(threshold, int(replicas), true) - } else if maxUnavailable != nil { - globalThreshold, _ = intstr.GetScaledValueFromIntOrPercent(maxUnavailable, int(replicas), true) - } - return int32(integer.IntMax(0, globalThreshold)) -} - -func isBatchReady(release *v1alpha1.BatchRelease, pods []*corev1.Pod, maxUnavailable *intstr.IntOrString, labelDesired, desired, updated, updatedReady int32) bool { - updateRevision := release.Status.UpdateRevision - if updatedReady <= 0 { // Some workloads, such as StatefulSet, may not have such field - updatedReady = int32(util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { - return pod.DeletionTimestamp.IsZero() && util.IsConsistentWithRevision(pod, updateRevision) && util.IsPodReady(pod) - })) - } - - rolloutID := release.Spec.ReleasePlan.RolloutID - threshold := failureThreshold(release.Spec.ReleasePlan.FailureThreshold, maxUnavailable, updated) - podReady := updated >= desired && updatedReady+threshold >= desired && (desired == 0 || updatedReady > 0) - return podReady && isPodBatchLabelSatisfied(pods, rolloutID, labelDesired) -} - -func isPodBatchLabelSatisfied(pods []*corev1.Pod, rolloutID string, targetCount int32) bool { - if len(rolloutID) == 0 || len(pods) == 0 { - return true - } - labeledCount := int32(0) - for _, pod := range pods { - if !pod.DeletionTimestamp.IsZero() { - continue - } - if pod.Labels[util.RolloutIDLabel] == rolloutID { - labeledCount++ - } - } - return labeledCount >= targetCount -} diff --git a/pkg/controller/batchrelease/workloads/commons_test.go b/pkg/controller/batchrelease/workloads/commons_test.go deleted file mode 100644 index 95882e8b..00000000 --- a/pkg/controller/batchrelease/workloads/commons_test.go +++ /dev/null @@ -1,438 +0,0 @@ -package workloads - -import ( - "fmt" - "math/rand" - "testing" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - apps "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" -) - -func TestFilterPodsForUnorderedRollback(t *testing.T) { - RegisterFailHandler(Fail) - - cases := []struct { - Name string - GetPods func() []*corev1.Pod - ExpectWithLabels int - ExpectWithoutLabels int - Replicas int32 - NoNeedRollbackReplicas int32 - PlannedBatchCanaryReplicas int32 - ExpectedBatchStableReplicas int32 - }{ - { - Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=20%, realCanary=6", - GetPods: func() []*corev1.Pod { - return generatePods(10, 5) - }, - Replicas: 10, - NoNeedRollbackReplicas: 5, - PlannedBatchCanaryReplicas: 2, - ExpectedBatchStableReplicas: 4, - ExpectWithoutLabels: 5, - ExpectWithLabels: 1, - }, - { - Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=60%, realCanary=8", - GetPods: func() []*corev1.Pod { - return generatePods(10, 5) - }, - Replicas: 10, - NoNeedRollbackReplicas: 5, - PlannedBatchCanaryReplicas: 6, - ExpectedBatchStableReplicas: 2, - ExpectWithoutLabels: 5, - ExpectWithLabels: 3, - }, - { - Name: "replicas=10, updatedReplicas=10, noNeedRollback=5, stepCanary=100%, realCanary=10", - GetPods: func() []*corev1.Pod { - return generatePods(10, 5) - }, - Replicas: 10, - NoNeedRollbackReplicas: 5, - PlannedBatchCanaryReplicas: 10, - ExpectedBatchStableReplicas: 0, - ExpectWithoutLabels: 5, - ExpectWithLabels: 5, - }, - { - Name: "replicas=10, updatedReplicas=9, noNeedRollback=7, stepCanary=20%, realCanary=6", - GetPods: func() []*corev1.Pod { - return generatePods(9, 7) - }, - Replicas: 10, - NoNeedRollbackReplicas: 7, - PlannedBatchCanaryReplicas: 2, - ExpectedBatchStableReplicas: 2, - ExpectWithoutLabels: 2, - ExpectWithLabels: 1, - }, - { - Name: "replicas=10, updatedReplicas=9, noNeedRollback=7, stepCanary=60%, realCanary=8", - GetPods: func() []*corev1.Pod { - return generatePods(9, 7) - }, - Replicas: 10, - NoNeedRollbackReplicas: 7, - PlannedBatchCanaryReplicas: 6, - ExpectedBatchStableReplicas: 1, - ExpectWithoutLabels: 2, - ExpectWithLabels: 4, - }, - { - Name: "replicas=10, updatedReplicas=9, noNeedRollback=7, stepCanary=100%, realCanary=10", - GetPods: func() []*corev1.Pod { - return generatePods(9, 7) - }, - Replicas: 10, - NoNeedRollbackReplicas: 7, - PlannedBatchCanaryReplicas: 10, - ExpectedBatchStableReplicas: 0, - ExpectWithoutLabels: 2, - ExpectWithLabels: 7, - }, - { - Name: "replicas=10, updatedReplicas=6, noNeedRollback=5, stepCanary=20%, realCanary=6", - GetPods: func() []*corev1.Pod { - return generatePods(6, 5) - }, - Replicas: 10, - NoNeedRollbackReplicas: 5, - PlannedBatchCanaryReplicas: 2, - ExpectedBatchStableReplicas: 4, - ExpectWithoutLabels: 1, - ExpectWithLabels: 1, - }, - { - Name: "replicas=10, updatedReplicas=6, noNeedRollback=5, stepCanary=60%, realCanary=8", - GetPods: func() []*corev1.Pod { - return generatePods(6, 5) - }, - Replicas: 10, - NoNeedRollbackReplicas: 5, - PlannedBatchCanaryReplicas: 6, - ExpectedBatchStableReplicas: 2, - ExpectWithoutLabels: 1, - ExpectWithLabels: 3, - }, - } - - check := func(pods []*corev1.Pod, expectWith, expectWithout int) bool { - var with, without int - for _, pod := range pods { - if pod.Labels[util.NoNeedUpdatePodLabel] == "0x1" { - with++ - } else { - without++ - } - } - return with == expectWith && without == expectWithout - } - - for _, cs := range cases { - t.Run(cs.Name, func(t *testing.T) { - pods := cs.GetPods() - for i := 0; i < 10; i++ { - rand.Shuffle(len(pods), func(i, j int) { - pods[i], pods[j] = pods[j], pods[i] - }) - filteredPods := filterPodsForUnorderedRollback(pods, cs.PlannedBatchCanaryReplicas, cs.ExpectedBatchStableReplicas, cs.Replicas, "0x1", "version-1") - var podName []string - for i := range filteredPods { - podName = append(podName, filteredPods[i].Name) - } - fmt.Println(podName) - Expect(check(filteredPods, cs.ExpectWithLabels, cs.ExpectWithoutLabels)).To(BeTrue()) - } - }) - } -} - -func TestFilterPodsForOrderedRollback(t *testing.T) { - RegisterFailHandler(Fail) - - cases := []struct { - Name string - GetPods func() []*corev1.Pod - ExpectWithLabels int - ExpectWithoutLabels int - Replicas int32 - PlannedBatchCanaryReplicas int32 - ExpectedBatchStableReplicas int32 - }{ - { - Name: "replicas=10, updatedReplicas=10, stepCanary=40%, realCanary=2", - GetPods: func() []*corev1.Pod { - return generatePods(10, 8) - }, - Replicas: 10, - PlannedBatchCanaryReplicas: 4, - ExpectedBatchStableReplicas: 8, - ExpectWithoutLabels: 2, - ExpectWithLabels: 2, - }, - { - Name: "replicas=10, updatedReplicas=10, stepCanary=60%, realCanary=2", - GetPods: func() []*corev1.Pod { - return generatePods(10, 8) - }, - Replicas: 10, - PlannedBatchCanaryReplicas: 6, - ExpectedBatchStableReplicas: 8, - ExpectWithoutLabels: 2, - ExpectWithLabels: 4, - }, - { - Name: "replicas=10, updatedReplicas=10, stepCanary=100%, realCanary=10", - GetPods: func() []*corev1.Pod { - return generatePods(10, 0) - }, - Replicas: 10, - PlannedBatchCanaryReplicas: 10, - ExpectedBatchStableReplicas: 0, - ExpectWithoutLabels: 10, - ExpectWithLabels: 0, - }, - { - Name: "replicas=10, updatedReplicas=9, stepCanary=20%, realCanary=2", - GetPods: func() []*corev1.Pod { - return generatePods(9, 8) - }, - Replicas: 10, - PlannedBatchCanaryReplicas: 2, - ExpectedBatchStableReplicas: 8, - ExpectWithoutLabels: 1, - ExpectWithLabels: 0, - }, - } - - check := func(pods []*corev1.Pod, expectWith, expectWithout int) bool { - var with, without int - for _, pod := range pods { - if pod.Labels[util.NoNeedUpdatePodLabel] == "0x1" { - with++ - } else { - without++ - } - } - return with == expectWith && without == expectWithout - } - - for _, cs := range cases { - t.Run(cs.Name, func(t *testing.T) { - pods := cs.GetPods() - for i := 0; i < 10; i++ { - rand.Shuffle(len(pods), func(i, j int) { - pods[i], pods[j] = pods[j], pods[i] - }) - filteredPods := filterPodsForOrderedRollback(pods, cs.PlannedBatchCanaryReplicas, cs.ExpectedBatchStableReplicas, cs.Replicas, "0x1", "version-1") - var podName []string - for i := range filteredPods { - podName = append(podName, filteredPods[i].Name) - } - fmt.Println(podName) - Expect(check(filteredPods, cs.ExpectWithLabels, cs.ExpectWithoutLabels)).To(BeTrue()) - } - }) - } -} - -func TestIsBatchReady(t *testing.T) { - RegisterFailHandler(Fail) - - p := func(f intstr.IntOrString) *intstr.IntOrString { - return &f - } - r := func(f *intstr.IntOrString, id, revision string) *v1alpha1.BatchRelease { - return &v1alpha1.BatchRelease{ - Spec: v1alpha1.BatchReleaseSpec{ReleasePlan: v1alpha1.ReleasePlan{RolloutID: id, FailureThreshold: f}}, - Status: v1alpha1.BatchReleaseStatus{UpdateRevision: revision}, - } - } - cases := map[string]struct { - release *v1alpha1.BatchRelease - pods []*corev1.Pod - maxUnavailable *intstr.IntOrString - labelDesired int32 - desired int32 - updated int32 - updatedReady int32 - result bool - }{ - "ready: no-rollout-id, all pod ready": { - release: r(p(intstr.FromInt(1)), "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(1)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 5, - result: true, - }, - "ready: no-rollout-id, tolerated failed pods": { - release: r(p(intstr.FromInt(1)), "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(1)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 4, - result: true, - }, - "false: no-rollout-id, un-tolerated failed pods": { - release: r(p(intstr.FromInt(1)), "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 3, - result: false, - }, - "false: no-rollout-id, tolerated failed pods, but 1 pod isn't updated": { - release: r(p(intstr.FromString("60%")), "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 4, - updatedReady: 4, - result: false, - }, - "false: no-rollout-id, tolerated, but no-pod-ready": { - release: r(p(intstr.FromInt(100)), "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 0, - result: false, - }, - "true: no-rollout-id, tolerated failed pods, failureThreshold=nil": { - release: r(nil, "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(3)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 3, - result: true, - }, - "false: no-rollout-id, un-tolerated failed pods, failureThreshold=nil": { - release: r(nil, "", "v2"), - pods: nil, - maxUnavailable: p(intstr.FromInt(1)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 3, - result: false, - }, - "true: rollout-id, labeled pods satisfied": { - release: r(p(intstr.FromInt(1)), "1", "version-1"), - pods: generatePods(5, 0), - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 5, - result: true, - }, - "false: rollout-id, labeled pods not satisfied": { - release: r(p(intstr.FromInt(1)), "1", "version-1"), - pods: generatePods(3, 0), - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 5, - result: false, - }, - "true: rollout-id, no updated-ready field": { - release: r(p(intstr.FromInt(1)), "1", "version-1"), - pods: generatePods(5, 0), - maxUnavailable: p(intstr.FromInt(5)), - labelDesired: 5, - desired: 5, - updated: 5, - updatedReady: 0, - result: true, - }, - } - - for name, cs := range cases { - t.Run(name, func(t *testing.T) { - got := isBatchReady(cs.release, cs.pods, cs.maxUnavailable, cs.labelDesired, cs.desired, cs.updated, cs.updatedReady) - fmt.Printf("%v %v", got, cs.result) - Expect(got).To(Equal(cs.result)) - fmt.Printf("%v %v", got, cs.result) - - }) - } -} - -func TestSortPodsByOrdinal(t *testing.T) { - RegisterFailHandler(Fail) - - pods := generatePods(100, 10) - rand.Shuffle(len(pods), func(i, j int) { - pods[i], pods[j] = pods[j], pods[i] - }) - sortPodsByOrdinal(pods) - for i, pod := range pods { - expectedName := fmt.Sprintf("pod-name-%d", 99-i) - Expect(pod.Name == expectedName).Should(BeTrue()) - } -} - -func generatePods(updatedReplicas, noNeedRollbackReplicas int) []*corev1.Pod { - podsNoNeed := generatePodsWith(map[string]string{ - util.NoNeedUpdatePodLabel: "0x1", - util.RolloutIDLabel: "1", - apps.ControllerRevisionHashLabelKey: "version-1", - }, noNeedRollbackReplicas, 0) - return append(generatePodsWith(map[string]string{ - util.RolloutIDLabel: "1", - apps.ControllerRevisionHashLabelKey: "version-1", - }, updatedReplicas-noNeedRollbackReplicas, noNeedRollbackReplicas), podsNoNeed...) -} - -func generatePodsWith(labels map[string]string, replicas int, beginOrder int) []*corev1.Pod { - pods := make([]*corev1.Pod, replicas) - for i := 0; i < replicas; i++ { - pods[i] = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("pod-name-%d", beginOrder+i), - Labels: labels, - }, - Status: corev1.PodStatus{ - Conditions: []corev1.PodCondition{ - { - Type: corev1.PodReady, - Status: corev1.ConditionTrue, - }, - }, - }, - } - } - return pods -} - -func containers(version string) []corev1.Container { - return []corev1.Container{ - { - Name: "busybox", - Image: fmt.Sprintf("busybox:%v", version), - }, - } -} diff --git a/pkg/controller/batchrelease/workloads/controller_types.go b/pkg/controller/batchrelease/workloads/controller_types.go deleted file mode 100644 index 27e41903..00000000 --- a/pkg/controller/batchrelease/workloads/controller_types.go +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type WorkloadEventType string - -const ( - // IgnoreWorkloadEvent means workload event should be ignored. - IgnoreWorkloadEvent WorkloadEventType = "workload-event-ignore" - // WorkloadPodTemplateChanged means workload revision changed, should be stopped to execute batch release plan. - WorkloadPodTemplateChanged WorkloadEventType = "workload-pod-template-changed" - // WorkloadReplicasChanged means workload is scaling during rollout, should recalculate upgraded pods in current batch. - WorkloadReplicasChanged WorkloadEventType = "workload-replicas-changed" - // WorkloadStillReconciling means workload status is untrusted Untrustworthy, we should wait workload controller to reconcile. - WorkloadStillReconciling WorkloadEventType = "workload-is-reconciling" - // WorkloadHasGone means workload is deleted during rollout, we should do something finalizing works if this event occurs. - WorkloadHasGone WorkloadEventType = "workload-has-gone" - // WorkloadUnHealthy means workload is at some unexpected state that our controller cannot handle, we should stop reconcile. - WorkloadUnHealthy WorkloadEventType = "workload-is-unhealthy" - // WorkloadRollbackInBatch means workload is rollback according to BatchRelease batch plan. - WorkloadRollbackInBatch WorkloadEventType = "workload-rollback-in-batch" -) - -type workloadController struct { - client client.Client - recorder record.EventRecorder - newStatus *v1alpha1.BatchReleaseStatus - release *v1alpha1.BatchRelease -} - -// WorkloadController is the interface that all type of cloneSet controller implements -type WorkloadController interface { - // VerifyWorkload makes sure that the workload can be upgraded according to the release plan. - // it returns 'true', if this verification is successful. - // it returns 'false' or err != nil, if this verification is failed. - // it returns not-empty error if the verification has something wrong, and should not retry. - VerifyWorkload() (bool, error) - - // PrepareBeforeProgress make sure that the resource is ready to be progressed. - // this function is tasked to do any initialization work on the resources. - // it returns 'true' if the preparation is succeeded. - // it returns 'false' if the preparation should retry. - // it returns not-empty error if the preparation has something wrong, and should not retry. - PrepareBeforeProgress() (bool, *int32, error) - - // UpgradeOneBatch tries to upgrade old replicas following the release plan. - // it will upgrade the old replicas as the release plan allows in the current batch. - // it returns 'true' if the progress is succeeded. - // it returns 'false' if the progress should retry. - // it returns not-empty error if the progress has something wrong, and should not retry. - UpgradeOneBatch() (bool, error) - - // CheckOneBatchReady checks how many replicas are ready to serve requests in the current batch. - // it returns 'true' if the batch has been ready. - // it returns 'false' if the batch should be reset and recheck. - // it returns not-empty error if the check operation has something wrong, and should not retry. - CheckOneBatchReady() (bool, error) - - // FinalizeProgress makes sure the resources are in a good final state. - // It might depend on if the rollout succeeded or not. - // For example, we may remove the objects which created by batchRelease. - // this function will always retry util it returns 'true'. - // parameters: - // - pause: 'nil' means keep current state, 'true' means pause workload, 'false' means do not pause workload - // - cleanup: 'true' means clean up canary settings, 'false' means do not clean up. - FinalizeProgress(cleanup bool) (bool, error) - - // SyncWorkloadInfo will watch and compare the status recorded in BatchRelease.Status - // and the real-time workload info. If workload status is inconsistent with that recorded - // in release.status, will return the corresponding WorkloadEventType and info. - SyncWorkloadInfo() (WorkloadEventType, *util.WorkloadInfo, error) -} diff --git a/pkg/controller/batchrelease/workloads/deployment_canary_control_plane.go b/pkg/controller/batchrelease/workloads/deployment_canary_control_plane.go deleted file mode 100644 index 5e1fd99e..00000000 --- a/pkg/controller/batchrelease/workloads/deployment_canary_control_plane.go +++ /dev/null @@ -1,351 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "fmt" - "sort" - - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - utilclient "github.com/openkruise/rollouts/pkg/util/client" - apps "k8s.io/api/apps/v1" - v1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" -) - -// DeploymentsRolloutController is responsible for handling Deployment type of workloads -type DeploymentsRolloutController struct { - deploymentController - stable *apps.Deployment - canary *apps.Deployment -} - -// NewDeploymentRolloutController creates a new Deployment rollout controller -func NewDeploymentRolloutController(cli client.Client, recorder record.EventRecorder, release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, stableNamespacedName types.NamespacedName) *DeploymentsRolloutController { - return &DeploymentsRolloutController{ - deploymentController: deploymentController{ - workloadController: workloadController{ - client: cli, - recorder: recorder, - release: release, - newStatus: newStatus, - }, - stableNamespacedName: stableNamespacedName, - canaryNamespacedName: stableNamespacedName, - releaseKey: client.ObjectKeyFromObject(release), - }, - } -} - -// VerifyWorkload verifies that the workload is ready to execute release plan -func (c *DeploymentsRolloutController) VerifyWorkload() (bool, error) { - // claim the deployment is under our control, and create canary deployment if it needs. - // Do not move this function to Preparing phase, otherwise multi canary deployments - // will be repeatedly created due to informer cache latency. - if _, err := c.claimDeployment(c.stable, c.canary); err != nil { - return false, err - } - - c.recorder.Event(c.release, v1.EventTypeNormal, "Verified", "ReleasePlan and the Deployment resource are verified") - return true, nil -} - -// PrepareBeforeProgress makes sure that the Deployment is under our control -func (c *DeploymentsRolloutController) PrepareBeforeProgress() (bool, *int32, error) { - // the workload is verified, and we should record revision and replicas info before progressing - if err := c.recordDeploymentRevisionAndReplicas(); err != nil { - klog.Errorf("Failed to record deployment(%v) revision and replicas info, error: %v", c.stableNamespacedName, err) - return false, nil, err - } - - c.recorder.Event(c.release, v1.EventTypeNormal, "Initialized", "Rollout resource are initialized") - return true, nil, nil -} - -// UpgradeOneBatch calculates the number of pods we can upgrade once -// according to the release plan and then set the canary deployment replicas -func (c *DeploymentsRolloutController) UpgradeOneBatch() (bool, error) { - if err := c.fetchStableDeployment(); err != nil { - return false, err - } - if err := c.fetchCanaryDeployment(); err != nil { - return false, err - } - - // canary replicas now we have at current state - currentCanaryReplicas := *c.canary.Spec.Replicas - - // canary goal we should achieve - canaryGoal := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - - klog.V(3).InfoS("upgraded one batch, but no need to update replicas of canary Deployment", - "Deployment", client.ObjectKeyFromObject(c.canary), - "BatchRelease", c.releaseKey, - "current-batch", c.newStatus.CanaryStatus.CurrentBatch, - "canary-goal", canaryGoal, - "current-canary-replicas", currentCanaryReplicas, - "current-canary-status-replicas", c.canary.Status.UpdatedReplicas) - - if err := c.patchDeploymentReplicas(c.canary, canaryGoal); err != nil { - return false, err - } - - // patch current batch label to pods - patchDone, err := c.patchPodBatchLabel(canaryGoal) - if !patchDone || err != nil { - return false, err - } - - c.recorder.Eventf(c.release, v1.EventTypeNormal, "Batch Rollout", "Finished submitting all upgrade quests for batch %d", c.newStatus.CanaryStatus.CurrentBatch) - return true, nil -} - -// CheckOneBatchReady checks to see if the pods are all available according to the rollout plan -func (c *DeploymentsRolloutController) CheckOneBatchReady() (bool, error) { - if err := c.fetchStableDeployment(); err != nil { - return false, err - } - if err := c.fetchCanaryDeployment(); err != nil { - return false, err - } - - // in case of workload status is Untrustworthy - if c.canary.Status.ObservedGeneration != c.canary.Generation { - return false, nil - } - - // canary pods that have been created - canaryPodCount := c.canary.Status.Replicas - // canary pods that have been available - availableCanaryPodCount := c.canary.Status.AvailableReplicas - // canary goal that should have in current batch - canaryGoal := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - // max unavailable of deployment - var maxUnavailable *intstr.IntOrString - if c.canary.Spec.Strategy.RollingUpdate != nil { - maxUnavailable = c.canary.Spec.Strategy.RollingUpdate.MaxUnavailable - } - - var err error - var pods []*v1.Pod - // if rolloutID is not set, no need to list pods, - // because we cannot patch correct batch label to pod. - if c.release.Spec.ReleasePlan.RolloutID != "" { - pods, err = util.ListOwnedPods(c.client, c.canary) - if err != nil { - return false, err - } - } - - klog.InfoS("checking the batch releasing progress", - "BatchRelease", c.releaseKey, - "len(pods)", len(pods), - "canary-goal", canaryGoal, - "current-batch", c.newStatus.CanaryStatus.CurrentBatch, - "canary-available-pod-count", availableCanaryPodCount, - "stable-pod-status-replicas", c.stable.Status.Replicas) - - if !isBatchReady(c.release, pods, maxUnavailable, canaryGoal, canaryGoal, canaryPodCount, availableCanaryPodCount) { - klog.Infof("BatchRelease(%v) batch is not ready yet, current batch=%d", c.releaseKey, c.newStatus.CanaryStatus.CurrentBatch) - return false, nil - } - - klog.Infof("BatchRelease(%v) batch is ready, current batch=%d", c.releaseKey, c.newStatus.CanaryStatus.CurrentBatch) - return true, nil -} - -// FinalizeProgress makes sure restore deployments and clean up some canary settings -func (c *DeploymentsRolloutController) FinalizeProgress(cleanup bool) (bool, error) { - if err := c.fetchStableDeployment(); client.IgnoreNotFound(err) != nil { - return false, err - } - - // make the deployment ride out of our control, and clean up canary resources - succeed, err := c.releaseDeployment(c.stable, cleanup) - if !succeed || err != nil { - klog.Errorf("Failed to finalize deployment(%v), error: %v", c.stableNamespacedName, err) - return false, err - } - - c.recorder.Eventf(c.release, v1.EventTypeNormal, "Finalized", "Finalized: cleanup=%v", cleanup) - return true, nil -} - -// SyncWorkloadInfo return workloadInfo if workload info is changed during rollout -// TODO: abstract a WorkloadEventTypeJudge interface for these following `if` clauses -func (c *DeploymentsRolloutController) SyncWorkloadInfo() (WorkloadEventType, *util.WorkloadInfo, error) { - // ignore the sync if the release plan is deleted - if c.release.DeletionTimestamp != nil { - return IgnoreWorkloadEvent, nil, nil - } - - var err error - err = c.fetchStableDeployment() - if err != nil { - if apierrors.IsNotFound(err) { - return WorkloadHasGone, nil, err - } - return "", nil, err - } - - err = c.fetchCanaryDeployment() - if client.IgnoreNotFound(err) != nil { - return "", nil, err - } - - workloadInfo := util.NewWorkloadInfo() - if c.canary != nil { - workloadInfo.Status = &util.WorkloadStatus{ - UpdatedReplicas: c.canary.Status.Replicas, - UpdatedReadyReplicas: c.canary.Status.AvailableReplicas, - } - } - - // in case of that the canary deployment is being deleted but still have the finalizer, it is out of our expectation - if c.canary != nil && c.canary.DeletionTimestamp != nil && controllerutil.ContainsFinalizer(c.canary, util.CanaryDeploymentFinalizer) { - return WorkloadUnHealthy, workloadInfo, nil - } - - // in case of that the workload status is trustworthy - if c.stable.Status.ObservedGeneration != c.stable.Generation { - klog.Warningf("Deployment(%v) is still reconciling, waiting for it to complete, generation: %v, observed: %v", - c.stableNamespacedName, c.stable.Generation, c.stable.Status.ObservedGeneration) - return WorkloadStillReconciling, workloadInfo, nil - } - - // in case of that the workload has been promoted - if !c.stable.Spec.Paused && c.stable.Status.UpdatedReplicas == c.stable.Status.Replicas { - return IgnoreWorkloadEvent, workloadInfo, nil - } - - // in case of that the workload is scaling up/down - if *c.stable.Spec.Replicas != c.newStatus.ObservedWorkloadReplicas && c.newStatus.ObservedWorkloadReplicas != -1 { - workloadInfo.Replicas = c.stable.Spec.Replicas - klog.Warningf("Deployment(%v) replicas changed during releasing, should pause and wait for it to complete, replicas from: %v -> %v", - c.stableNamespacedName, c.newStatus.ObservedWorkloadReplicas, *c.stable.Spec.Replicas) - return WorkloadReplicasChanged, workloadInfo, nil - } - - // in case of that the workload revision was changed - if hashRevision := util.ComputeHash(&c.stable.Spec.Template, nil); hashRevision != c.newStatus.UpdateRevision { - workloadInfo.Status.UpdateRevision = hashRevision - klog.Warningf("Deployment(%v) updateRevision changed during releasing", c.stableNamespacedName) - return WorkloadPodTemplateChanged, workloadInfo, nil - } - - return IgnoreWorkloadEvent, workloadInfo, nil -} - -/* ---------------------------------- -The functions below are helper functions -------------------------------------- */ -// fetchStableDeployment fetch stable deployment to c.stable -func (c *DeploymentsRolloutController) fetchStableDeployment() error { - if c.stable != nil { - return nil - } - - stable := &apps.Deployment{} - if err := c.client.Get(context.TODO(), c.stableNamespacedName, stable); err != nil { - klog.Errorf("BatchRelease(%v) get stable deployment error: %v", c.releaseKey, err) - return err - } - c.stable = stable - return nil -} - -// fetchCanaryDeployment fetch canary deployment to c.canary -func (c *DeploymentsRolloutController) fetchCanaryDeployment() error { - var err error - defer func() { - if err != nil { - klog.Errorf("BatchRelease(%v) get canary deployment error: %v", c.releaseKey, err) - } - }() - - err = c.fetchStableDeployment() - if err != nil { - return err - } - - ds, err := c.listCanaryDeployment(client.InNamespace(c.stable.Namespace), utilclient.DisableDeepCopy) - if err != nil { - return err - } - - ds = util.FilterActiveDeployment(ds) - sort.Slice(ds, func(i, j int) bool { - return ds[i].CreationTimestamp.After(ds[j].CreationTimestamp.Time) - }) - - if len(ds) == 0 || !util.EqualIgnoreHash(&ds[0].Spec.Template, &c.stable.Spec.Template) { - err = apierrors.NewNotFound(schema.GroupResource{ - Group: apps.SchemeGroupVersion.Group, - Resource: c.stable.Kind, - }, fmt.Sprintf("%v-canary", c.canaryNamespacedName.Name)) - return err - } - - c.canary = ds[0].DeepCopy() - return nil -} - -// recordDeploymentRevisionAndReplicas records stableRevision, canaryRevision, workloadReplicas to BatchRelease.Status -func (c *DeploymentsRolloutController) recordDeploymentRevisionAndReplicas() error { - err := c.fetchStableDeployment() - if err != nil { - return err - } - - updateRevision := util.ComputeHash(&c.stable.Spec.Template, nil) - stableRevision, err := c.GetStablePodTemplateHash(c.stable) - if err != nil { - return err - } - c.newStatus.StableRevision = stableRevision - c.newStatus.UpdateRevision = updateRevision - c.newStatus.ObservedWorkloadReplicas = *c.stable.Spec.Replicas - return nil -} - -func (c *DeploymentsRolloutController) patchPodBatchLabel(canaryGoal int32) (bool, error) { - rolloutID := c.release.Spec.ReleasePlan.RolloutID - // if rolloutID is not set, no need to list pods, - // because we cannot patch correct batch label to pod. - if rolloutID == "" || c.canary == nil { - return true, nil - } - - pods, err := util.ListOwnedPods(c.client, c.canary) - if err != nil { - klog.Errorf("Failed to list pods for Deployment %v", c.stableNamespacedName) - return false, err - } - - batchID := c.release.Status.CanaryStatus.CurrentBatch + 1 - updateRevision := c.release.Status.UpdateRevision - return patchPodBatchLabel(c.client, pods, rolloutID, batchID, updateRevision, canaryGoal, c.releaseKey) -} diff --git a/pkg/controller/batchrelease/workloads/deployment_canary_controller.go b/pkg/controller/batchrelease/workloads/deployment_canary_controller.go deleted file mode 100644 index 6a99d323..00000000 --- a/pkg/controller/batchrelease/workloads/deployment_canary_controller.go +++ /dev/null @@ -1,316 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "encoding/json" - "fmt" - "sort" - - "github.com/openkruise/rollouts/pkg/util" - utilclient "github.com/openkruise/rollouts/pkg/util/client" - apps "k8s.io/api/apps/v1" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/klog/v2" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" -) - -// deploymentController is the place to hold fields needed for handle Deployment type of workloads -type deploymentController struct { - workloadController - releaseKey types.NamespacedName - stableNamespacedName types.NamespacedName - canaryNamespacedName types.NamespacedName -} - -// add the parent controller to the owner of the deployment, unpause it and initialize the size -// before kicking start the update and start from every pod in the old version -func (c *deploymentController) claimDeployment(stableDeploy, canaryDeploy *apps.Deployment) (*apps.Deployment, error) { - var controlled bool - if controlInfo, ok := stableDeploy.Annotations[util.BatchReleaseControlAnnotation]; ok && controlInfo != "" { - ref := &metav1.OwnerReference{} - err := json.Unmarshal([]byte(controlInfo), ref) - if err == nil && ref.UID == c.release.UID { - klog.V(3).Infof("Deployment(%v) has been controlled by this BatchRelease(%v), no need to claim again", - c.stableNamespacedName, c.releaseKey) - controlled = true - } else { - klog.Errorf("Failed to parse controller info from Deployment(%v) annotation, error: %v, controller info: %+v", - c.stableNamespacedName, err, *ref) - } - } - - // patch control info to stable deployments if it needs - if !controlled { - controlInfo, _ := json.Marshal(metav1.NewControllerRef(c.release, c.release.GetObjectKind().GroupVersionKind())) - patchedInfo := map[string]interface{}{ - "metadata": map[string]interface{}{ - "annotations": map[string]string{ - util.BatchReleaseControlAnnotation: string(controlInfo), - }, - }, - } - cloneObj := stableDeploy.DeepCopy() - patchedBody, _ := json.Marshal(patchedInfo) - if err := c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.StrategicMergePatchType, patchedBody)); err != nil { - klog.Errorf("Failed to patch controller info annotations to stable deployment(%v), error: %v", client.ObjectKeyFromObject(stableDeploy), err) - return canaryDeploy, err - } - } - - // create canary deployment if it needs - if canaryDeploy == nil || !util.EqualIgnoreHash(&stableDeploy.Spec.Template, &canaryDeploy.Spec.Template) { - var err error - for { - canaryDeploy, err = c.createCanaryDeployment(stableDeploy) - if err != nil { - if errors.IsAlreadyExists(err) { - continue - } - return nil, err - } - break - } - } - - return canaryDeploy, nil -} - -func (c *deploymentController) createCanaryDeployment(stableDeploy *apps.Deployment) (*apps.Deployment, error) { - // TODO: find a better way to generate canary deployment name - suffix := util.GenRandomStr(3) - canaryDeploy := &apps.Deployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%v-%v", c.canaryNamespacedName.Name, suffix), - Namespace: c.stableNamespacedName.Namespace, - Labels: map[string]string{}, - Annotations: map[string]string{}, - }, - } - for k, v := range stableDeploy.Labels { - canaryDeploy.Labels[k] = v - } - for k, v := range stableDeploy.Annotations { - canaryDeploy.Annotations[k] = v - } - for _, f := range stableDeploy.Finalizers { - canaryDeploy.Finalizers = append(canaryDeploy.Finalizers, f) - } - for _, o := range stableDeploy.OwnerReferences { - canaryDeploy.OwnerReferences = append(canaryDeploy.OwnerReferences, *o.DeepCopy()) - } - - canaryDeploy.Finalizers = append(canaryDeploy.Finalizers, util.CanaryDeploymentFinalizer) - canaryDeploy.OwnerReferences = append(canaryDeploy.OwnerReferences, *metav1.NewControllerRef(c.release, c.release.GroupVersionKind())) - - // set extra labels & annotations - canaryDeploy.Labels[util.CanaryDeploymentLabel] = c.stableNamespacedName.Name - owner := metav1.NewControllerRef(c.release, c.release.GroupVersionKind()) - if owner != nil { - ownerInfo, _ := json.Marshal(owner) - canaryDeploy.Annotations[util.BatchReleaseControlAnnotation] = string(ownerInfo) - } - - // copy spec - canaryDeploy.Spec = *stableDeploy.Spec.DeepCopy() - canaryDeploy.Spec.Replicas = pointer.Int32Ptr(0) - canaryDeploy.Spec.Paused = false - - // create canary Deployment - canaryKey := client.ObjectKeyFromObject(canaryDeploy) - err := c.client.Create(context.TODO(), canaryDeploy) - if err != nil { - klog.Errorf("Failed to create canary Deployment(%v), error: %v", canaryKey, err) - return nil, err - } - - canaryDeployInfo, _ := json.Marshal(canaryDeploy) - klog.V(3).Infof("Create canary Deployment(%v) successfully, details: %v", canaryKey, string(canaryDeployInfo)) - return canaryDeploy, err -} - -func (c *deploymentController) releaseDeployment(stableDeploy *apps.Deployment, cleanup bool) (bool, error) { - var patchErr, deleteErr error - - // clean up control info for stable deployment if it needs - if stableDeploy != nil && len(stableDeploy.Annotations[util.BatchReleaseControlAnnotation]) > 0 { - var patchByte []byte - cloneObj := stableDeploy.DeepCopy() - patchByte = []byte(fmt.Sprintf(`{"metadata":{"annotations":{"%v":null}}}`, util.BatchReleaseControlAnnotation)) - patchErr = c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.StrategicMergePatchType, patchByte)) - if patchErr != nil { - klog.Errorf("Error occurred when patching Deployment(%v), error: %v", c.stableNamespacedName, patchErr) - return false, patchErr - } - } - - // clean up canary deployment if it needs - if cleanup { - ds, err := c.listCanaryDeployment(client.InNamespace(c.stableNamespacedName.Namespace)) - if err != nil { - return false, err - } - - // must make sure the older is deleted firstly - sort.Slice(ds, func(i, j int) bool { - return ds[i].CreationTimestamp.Before(&ds[j].CreationTimestamp) - }) - - // delete all the canary deployments - for _, d := range ds { - // clean up finalizers first - if controllerutil.ContainsFinalizer(d, util.CanaryDeploymentFinalizer) { - updateErr := util.UpdateFinalizer(c.client, d, util.RemoveFinalizerOpType, util.CanaryDeploymentFinalizer) - if updateErr != nil && !errors.IsNotFound(updateErr) { - klog.Error("Error occurred when updating Deployment(%v), error: %v", client.ObjectKeyFromObject(d), updateErr) - return false, updateErr - } - return false, nil - } - - // delete the deployment - deleteErr = c.client.Delete(context.TODO(), d) - if deleteErr != nil && !errors.IsNotFound(deleteErr) { - klog.Errorf("Error occurred when deleting Deployment(%v), error: %v", client.ObjectKeyFromObject(d), deleteErr) - return false, deleteErr - } - } - } - - klog.V(3).Infof("Release Deployment(%v) Successfully", c.stableNamespacedName) - return true, nil -} - -// scale the deployment -func (c *deploymentController) patchDeploymentReplicas(deploy *apps.Deployment, replicas int32) error { - if *deploy.Spec.Replicas >= replicas { - return nil - } - - patch := map[string]interface{}{ - "spec": map[string]interface{}{ - "replicas": pointer.Int32Ptr(replicas), - }, - } - - cloneObj := deploy.DeepCopy() - patchByte, _ := json.Marshal(patch) - if err := c.client.Patch(context.TODO(), cloneObj, client.RawPatch(types.MergePatchType, patchByte)); err != nil { - c.recorder.Eventf(c.release, v1.EventTypeWarning, "PatchPartitionFailed", - "Failed to update the canary Deployment to the correct canary replicas %d, error: %v", replicas, err) - return err - } - - klog.InfoS("Submitted modified partition quest for canary Deployment", "Deployment", - client.ObjectKeyFromObject(deploy), "target canary replicas size", replicas, "batch", c.newStatus.CanaryStatus.CurrentBatch) - return nil -} - -// GetStablePodTemplateHash returns latest/stable revision hash of deployment -func (c *deploymentController) GetStablePodTemplateHash(deploy *apps.Deployment) (string, error) { - if deploy == nil { - return "", fmt.Errorf("workload cannot be found, may be deleted or not be created yet") - } - - rss, err := c.listReplicaSetsFor(deploy) - if err != nil { - return "", err - } - - sort.Slice(rss, func(i, j int) bool { - return rss[i].CreationTimestamp.Before(&rss[j].CreationTimestamp) - }) - - for _, rs := range rss { - if rs.Spec.Replicas != nil && *rs.Spec.Replicas > 0 { - return rs.Labels[apps.DefaultDeploymentUniqueLabelKey], nil - } - } - - return "", fmt.Errorf("cannot get stable pod-template-hash for deployment(%v)", client.ObjectKeyFromObject(deploy)) -} - -// listReplicaSetsFor list all owned replicaSets of deployment, including those have deletionTimestamp -func (c *deploymentController) listReplicaSetsFor(deploy *apps.Deployment) ([]*apps.ReplicaSet, error) { - deploySelector, err := metav1.LabelSelectorAsSelector(deploy.Spec.Selector) - if err != nil { - return nil, err - } - - rsList := &apps.ReplicaSetList{} - err = c.client.List(context.TODO(), rsList, utilclient.DisableDeepCopy, - &client.ListOptions{Namespace: deploy.Namespace, LabelSelector: deploySelector}) - if err != nil { - return nil, err - } - - var rss []*apps.ReplicaSet - for i := range rsList.Items { - rs := &rsList.Items[i] - if rs.DeletionTimestamp != nil { - continue - } - if owner := metav1.GetControllerOf(rs); owner == nil || owner.UID != deploy.UID { - continue - } - rss = append(rss, rs) - } - return rss, nil -} - -func (c *deploymentController) listCanaryDeployment(options ...client.ListOption) ([]*apps.Deployment, error) { - dList := &apps.DeploymentList{} - if err := c.client.List(context.TODO(), dList, options...); err != nil { - return nil, err - } - - var ds []*apps.Deployment - for i := range dList.Items { - d := &dList.Items[i] - o := metav1.GetControllerOf(d) - if o == nil || o.UID != c.release.UID { - continue - } - ds = append(ds, d) - } - - return ds, nil -} - -// the target workload size for the current batch -func (c *deploymentController) calculateCurrentCanary(totalSize int32) int32 { - targetSize := int32(calculateNewBatchTarget(&c.release.Spec.ReleasePlan, int(totalSize), int(c.newStatus.CanaryStatus.CurrentBatch))) - klog.InfoS("Calculated the number of pods in the canary Deployment after current batch", - "Deployment", c.stableNamespacedName, "BatchRelease", c.releaseKey, - "current batch", c.newStatus.CanaryStatus.CurrentBatch, "workload updateRevision size", targetSize) - return targetSize -} - -// the source workload size for the current batch -func (c *deploymentController) calculateCurrentStable(totalSize int32) int32 { - sourceSize := totalSize - c.calculateCurrentCanary(totalSize) - klog.InfoS("Calculated the number of pods in the stable Deployment after current batch", - "Deployment", c.stableNamespacedName, "BatchRelease", c.releaseKey, - "current batch", c.newStatus.CanaryStatus.CurrentBatch, "workload stableRevision size", sourceSize) - return sourceSize -} diff --git a/pkg/controller/batchrelease/workloads/deployment_canary_controller_test.go b/pkg/controller/batchrelease/workloads/deployment_canary_controller_test.go deleted file mode 100644 index b286bdf6..00000000 --- a/pkg/controller/batchrelease/workloads/deployment_canary_controller_test.go +++ /dev/null @@ -1,198 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "reflect" - "testing" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/openkruise/rollouts/api/v1alpha1" - apps "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/client-go/tools/record" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -var ( - releaseDeploy = &v1alpha1.BatchRelease{ - TypeMeta: metav1.TypeMeta{ - APIVersion: v1alpha1.GroupVersion.String(), - Kind: "BatchRelease", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "release", - Namespace: "application", - UID: uuid.NewUUID(), - }, - Spec: v1alpha1.BatchReleaseSpec{ - TargetRef: v1alpha1.ObjectRef{ - WorkloadRef: &v1alpha1.WorkloadRef{ - APIVersion: "apps/v1", - Kind: "Deployment", - Name: "sample", - }, - }, - ReleasePlan: v1alpha1.ReleasePlan{ - Batches: []v1alpha1.ReleaseBatch{ - { - CanaryReplicas: intstr.FromString("10%"), - }, - { - CanaryReplicas: intstr.FromString("50%"), - }, - { - CanaryReplicas: intstr.FromString("80%"), - }, - }, - }, - }, - } - - stableDeploy = &apps.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: apps.SchemeGroupVersion.String(), - Kind: "Deployment", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "sample", - Namespace: "application", - UID: types.UID("87076677"), - Generation: 2, - Labels: map[string]string{ - "app": "busybox", - apps.DefaultDeploymentUniqueLabelKey: "update-pod-hash", - }, - }, - Spec: apps.DeploymentSpec{ - Replicas: pointer.Int32Ptr(100), - Strategy: apps.DeploymentStrategy{ - Type: apps.RollingUpdateDeploymentStrategyType, - RollingUpdate: &apps.RollingUpdateDeployment{ - MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(1)}, - MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(2)}, - }, - }, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "busybox", - }, - }, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: containers("v2"), - }, - }, - }, - Status: apps.DeploymentStatus{ - Replicas: 100, - ReadyReplicas: 100, - UpdatedReplicas: 0, - AvailableReplicas: 100, - }, - } -) - -func TestDeploymentController(t *testing.T) { - RegisterFailHandler(Fail) - - cases := []struct { - Name string - Paused bool - Cleanup bool - }{ - { - Name: "paused=true, cleanup=true", - Paused: true, - Cleanup: true, - }, - { - Name: "paused=true, cleanup=false", - Paused: true, - Cleanup: false, - }, - { - Name: "paused=false cleanup=true", - Paused: false, - Cleanup: true, - }, - { - Name: "paused=false , cleanup=false", - Paused: false, - Cleanup: false, - }, - } - - for _, cs := range cases { - t.Run(cs.Name, func(t *testing.T) { - release := releaseDeploy.DeepCopy() - deploy := stableDeploy.DeepCopy() - cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(release, deploy).Build() - rec := record.NewFakeRecorder(100) - c := deploymentController{ - workloadController: workloadController{ - client: cli, - recorder: rec, - release: releaseDeploy, - newStatus: &releaseDeploy.Status, - }, - stableNamespacedName: client.ObjectKeyFromObject(stableDeploy), - canaryNamespacedName: client.ObjectKeyFromObject(stableDeploy), - } - oldObject := &apps.Deployment{} - Expect(cli.Get(context.TODO(), c.stableNamespacedName, oldObject)).NotTo(HaveOccurred()) - canary, err := c.claimDeployment(oldObject.DeepCopy(), nil) - Expect(canary).ShouldNot(BeNil()) - Expect(err).NotTo(HaveOccurred()) - - // The following logic should have been done in controller-runtime - { - dList := &apps.DeploymentList{} - Expect(cli.List(context.TODO(), dList)).NotTo(HaveOccurred()) - for i := range dList.Items { - d := &dList.Items[i] - d.SetGroupVersionKind(schema.GroupVersionKind{ - Group: "apps", Version: "v1", Kind: "Deployment", - }) - Expect(cli.Update(context.TODO(), d)).NotTo(HaveOccurred()) - } - } - - newObject := &apps.Deployment{} - Expect(cli.Get(context.TODO(), c.stableNamespacedName, newObject)).NotTo(HaveOccurred()) - _, err = c.releaseDeployment(newObject.DeepCopy(), cs.Cleanup) - Expect(err).NotTo(HaveOccurred()) - - newObject = &apps.Deployment{} - Expect(cli.Get(context.TODO(), c.stableNamespacedName, newObject)).NotTo(HaveOccurred()) - newObject.Spec.Paused = oldObject.Spec.Paused - Expect(reflect.DeepEqual(oldObject.Spec, newObject.Spec)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Labels, newObject.Labels)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Finalizers, newObject.Finalizers)).Should(BeTrue()) - Expect(reflect.DeepEqual(oldObject.Annotations, newObject.Annotations)).Should(BeTrue()) - }) - } -} diff --git a/pkg/controller/batchrelease/workloads/statefulset_like_controller.go b/pkg/controller/batchrelease/workloads/statefulset_like_controller.go deleted file mode 100644 index 9f9e12f9..00000000 --- a/pkg/controller/batchrelease/workloads/statefulset_like_controller.go +++ /dev/null @@ -1,188 +0,0 @@ -/* -Copyright 2019 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - - appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - apps "k8s.io/api/apps/v1" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type StatefulSetLikeController struct { - client.Client - recorder record.EventRecorder - release *appsv1alpha1.BatchRelease - namespacedName types.NamespacedName - workloadObj client.Object - gvk schema.GroupVersionKind - pods []*v1.Pod -} - -func NewStatefulSetLikeController(c client.Client, r record.EventRecorder, b *appsv1alpha1.BatchRelease, n types.NamespacedName, gvk schema.GroupVersionKind) UnifiedWorkloadController { - return &StatefulSetLikeController{ - Client: c, - recorder: r, - release: b, - namespacedName: n, - gvk: gvk, - } -} - -func (c *StatefulSetLikeController) GetWorkloadObject() (client.Object, error) { - if c.workloadObj == nil { - workloadObj := util.GetEmptyWorkloadObject(c.gvk) - if workloadObj == nil { - return nil, errors.NewNotFound(schema.GroupResource{Group: c.gvk.Group, Resource: c.gvk.Kind}, c.namespacedName.Name) - } - if err := c.Get(context.TODO(), c.namespacedName, workloadObj); err != nil { - return nil, err - } - c.workloadObj = workloadObj - } - return c.workloadObj, nil -} - -func (c *StatefulSetLikeController) GetWorkloadInfo() (*util.WorkloadInfo, error) { - set, err := c.GetWorkloadObject() - if err != nil { - return nil, err - } - - workloadInfo := util.ParseStatefulSetInfo(set, c.namespacedName) - workloadInfo.Paused = true - if workloadInfo.Status.UpdatedReadyReplicas <= 0 { - pods, err := c.ListOwnedPods() - if err != nil { - return nil, err - } - updatedReadyReplicas, err := c.countUpdatedReadyPods(pods, workloadInfo.Status.UpdateRevision) - if err != nil { - return nil, err - } - workloadInfo.Status.UpdatedReadyReplicas = updatedReadyReplicas - } - - return workloadInfo, nil -} - -func (c *StatefulSetLikeController) ClaimWorkload() (bool, error) { - set, err := c.GetWorkloadObject() - if err != nil { - return false, err - } - - err = claimWorkload(c.Client, c.release, set, map[string]interface{}{ - "type": apps.RollingUpdateStatefulSetStrategyType, - "rollingUpdate": map[string]interface{}{ - "partition": pointer.Int32(util.GetReplicas(set)), - }, - }) - if err != nil { - return false, err - } - - klog.V(3).Infof("Claim StatefulSet(%v) Successfully", c.namespacedName) - return true, nil -} - -func (c *StatefulSetLikeController) ReleaseWorkload(cleanup bool) (bool, error) { - set, err := c.GetWorkloadObject() - if err != nil { - if errors.IsNotFound(err) { - return true, nil - } - return false, err - } - - err = releaseWorkload(c.Client, set) - if err != nil { - c.recorder.Eventf(c.release, v1.EventTypeWarning, "ReleaseFailed", err.Error()) - return false, err - } - - klog.V(3).Infof("Release StatefulSet(%v) Successfully", c.namespacedName) - return true, nil -} - -func (c *StatefulSetLikeController) UpgradeBatch(canaryReplicasGoal, stableReplicasGoal int32) (bool, error) { - set, err := c.GetWorkloadObject() - if err != nil { - return false, err - } - - // if no needs to patch partition - partition := util.GetStatefulSetPartition(set) - if partition <= stableReplicasGoal { - return true, nil - } - - err = patchSpec(c.Client, set, map[string]interface{}{ - "updateStrategy": map[string]interface{}{ - "rollingUpdate": map[string]interface{}{ - "partition": pointer.Int32(stableReplicasGoal), - }, - }, - }) - if err != nil { - return false, err - } - - klog.V(3).Infof("Upgrade StatefulSet(%v) Partition to %v Successfully", c.namespacedName, stableReplicasGoal) - return true, nil -} - -func (c *StatefulSetLikeController) IsOrderedUpdate() (bool, error) { - set, err := c.GetWorkloadObject() - if err != nil { - return false, err - } - - return !util.IsStatefulSetUnorderedUpdate(set), nil -} - -func (c *StatefulSetLikeController) ListOwnedPods() ([]*v1.Pod, error) { - if c.pods != nil { - return c.pods, nil - } - set, err := c.GetWorkloadObject() - if err != nil { - return nil, err - } - c.pods, err = util.ListOwnedPods(c.Client, set) - return c.pods, err -} - -func (c *StatefulSetLikeController) countUpdatedReadyPods(pods []*v1.Pod, updateRevision string) (int32, error) { - activePods := util.FilterActivePods(pods) - updatedReadyReplicas := int32(0) - for _, pod := range activePods { - if util.IsConsistentWithRevision(pod, updateRevision) && util.IsPodReady(pod) { - updatedReadyReplicas++ - } - } - return updatedReadyReplicas, nil -} diff --git a/pkg/controller/batchrelease/workloads/workload_rollout_control_plane.go b/pkg/controller/batchrelease/workloads/workload_rollout_control_plane.go deleted file mode 100644 index 55d53e5b..00000000 --- a/pkg/controller/batchrelease/workloads/workload_rollout_control_plane.go +++ /dev/null @@ -1,407 +0,0 @@ -/* -Copyright 2022 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workloads - -import ( - "context" - "fmt" - - "github.com/openkruise/rollouts/api/v1alpha1" - "github.com/openkruise/rollouts/pkg/util" - v1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - "k8s.io/utils/pointer" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type UnifiedWorkloadController interface { - GetWorkloadInfo() (*util.WorkloadInfo, error) - ClaimWorkload() (bool, error) - ReleaseWorkload(cleanup bool) (bool, error) - UpgradeBatch(canaryReplicasGoal, stableReplicasGoal int32) (bool, error) - ListOwnedPods() ([]*v1.Pod, error) - IsOrderedUpdate() (bool, error) -} - -// UnifiedWorkloadRolloutControlPlane is responsible for handling rollout StatefulSet type of workloads -type UnifiedWorkloadRolloutControlPlane struct { - UnifiedWorkloadController - client client.Client - recorder record.EventRecorder - release *v1alpha1.BatchRelease - newStatus *v1alpha1.BatchReleaseStatus -} - -type NewUnifiedControllerFunc = func(c client.Client, r record.EventRecorder, p *v1alpha1.BatchRelease, n types.NamespacedName, gvk schema.GroupVersionKind) UnifiedWorkloadController - -// NewUnifiedWorkloadRolloutControlPlane creates a new workload rollout controller -func NewUnifiedWorkloadRolloutControlPlane(f NewUnifiedControllerFunc, c client.Client, r record.EventRecorder, p *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, n types.NamespacedName, gvk schema.GroupVersionKind) *UnifiedWorkloadRolloutControlPlane { - return &UnifiedWorkloadRolloutControlPlane{ - client: c, - recorder: r, - release: p, - newStatus: newStatus, - UnifiedWorkloadController: f(c, r, p, n, gvk), - } -} - -// VerifyWorkload verifies that the workload is ready to execute release plan -func (c *UnifiedWorkloadRolloutControlPlane) VerifyWorkload() (bool, error) { - return true, nil -} - -// prepareBeforeRollback makes sure that the updated pods have been patched no-need-update label. -// return values: -// - bool: whether all updated pods have been patched no-need-update label; -// - *int32: how many pods have been patched; -// - err: whether error occurs. -func (c *UnifiedWorkloadRolloutControlPlane) prepareBeforeRollback() (bool, *int32, error) { - if c.release.Annotations[util.RollbackInBatchAnnotation] == "" { - return true, nil, nil - } - - noNeedRollbackReplicas := int32(0) - rolloutID := c.release.Spec.ReleasePlan.RolloutID - if rolloutID == "" { - return true, &noNeedRollbackReplicas, nil - } - - workloadInfo, err := c.GetWorkloadInfo() - if err != nil { - return false, &noNeedRollbackReplicas, nil - } - - pods, err := c.ListOwnedPods() - if err != nil { - klog.Errorf("Failed to list pods for %v", workloadInfo.GVKWithName) - return false, &noNeedRollbackReplicas, err - } - - updateRevision := workloadInfo.Status.UpdateRevision - var filterPods []*v1.Pod - for i := range pods { - if !pods[i].DeletionTimestamp.IsZero() { - continue - } - if !util.IsConsistentWithRevision(pods[i], updateRevision) { - continue - } - if id, ok := pods[i].Labels[util.NoNeedUpdatePodLabel]; ok && id == rolloutID { - noNeedRollbackReplicas++ - continue - } - filterPods = append(filterPods, pods[i]) - } - - if len(filterPods) == 0 { - return true, &noNeedRollbackReplicas, nil - } - - for _, pod := range filterPods { - podClone := pod.DeepCopy() - body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, util.NoNeedUpdatePodLabel, rolloutID) - err = c.client.Patch(context.TODO(), podClone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) - if err != nil { - klog.Errorf("Failed to patch rollback labels[%s]=%s to pod %v", util.NoNeedUpdatePodLabel, rolloutID, client.ObjectKeyFromObject(pod)) - return false, &noNeedRollbackReplicas, err - } else { - klog.Info("Succeeded to patch rollback labels[%s]=%s to pod %v", util.NoNeedUpdatePodLabel, rolloutID, client.ObjectKeyFromObject(pod)) - } - noNeedRollbackReplicas++ - } - klog.Infof("BatchRelease(%v) find %v replicas no need to rollback", client.ObjectKeyFromObject(c.release), noNeedRollbackReplicas) - return false, &noNeedRollbackReplicas, nil -} - -// PrepareBeforeProgress makes sure that the source and target workload is under our control -func (c *UnifiedWorkloadRolloutControlPlane) PrepareBeforeProgress() (bool, *int32, error) { - done, noNeedRollbackReplicas, err := c.prepareBeforeRollback() - if err != nil || !done { - return false, nil, err - } - - // claim the workload is under our control - done, err = c.ClaimWorkload() - if !done || err != nil { - return false, noNeedRollbackReplicas, err - } - - // record revisions and replicas info to BatchRelease.Status - err = c.RecordWorkloadRevisionAndReplicas() - if err != nil { - return false, noNeedRollbackReplicas, err - } - - c.recorder.Event(c.release, v1.EventTypeNormal, "InitializedSuccessfully", "Rollout resource are initialized") - return true, noNeedRollbackReplicas, nil -} - -// UpgradeOneBatch calculates the number of pods we can upgrade once according to the rollout spec -// and then set the partition accordingly -// TODO: support advanced statefulSet reserveOrdinal feature0 -func (c *UnifiedWorkloadRolloutControlPlane) UpgradeOneBatch() (bool, error) { - workloadInfo, err := c.GetWorkloadInfo() - if err != nil { - return false, err - } - - if c.release.Status.ObservedWorkloadReplicas == 0 { - klog.Infof("BatchRelease(%v) observed workload replicas is 0, no need to upgrade", client.ObjectKeyFromObject(c.release)) - return true, nil - } - - // if the workload status is untrustworthy - if workloadInfo.Status.ObservedGeneration != workloadInfo.Generation { - return false, nil - } - - pods, err := c.ListOwnedPods() - if err != nil { - return false, err - } - - var noNeedRollbackReplicas int32 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - rolloutID := c.release.Spec.ReleasePlan.RolloutID - noNeedRollbackReplicas = countNoNeedRollbackReplicas(pods, c.newStatus.UpdateRevision, rolloutID) - c.newStatus.CanaryStatus.NoNeedUpdateReplicas = pointer.Int32(noNeedRollbackReplicas) - } - replicas := c.newStatus.ObservedWorkloadReplicas - currentBatch := c.newStatus.CanaryStatus.CurrentBatch - - // the number of canary pods should have in current batch in plan - plannedBatchCanaryReplicas := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedBatchCanaryReplicas := c.calculateCurrentCanary(replicas - noNeedRollbackReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedBatchStableReplicas := replicas - expectedBatchCanaryReplicas - - // if ordered update, partition is related with pod ordinals - // if unordered update, partition just like cloneSet partition - orderedUpdate, _ := c.IsOrderedUpdate() - if !orderedUpdate { - expectedBatchStableReplicas -= noNeedRollbackReplicas - } - - klog.V(3).InfoS("upgrade one batch, current info:", - "BatchRelease", client.ObjectKeyFromObject(c.release), - "currentBatch", currentBatch, - "replicas", replicas, - "noNeedRollbackReplicas", noNeedRollbackReplicas, - "plannedBatchCanaryReplicas", plannedBatchCanaryReplicas, - "expectedBatchCanaryReplicas", expectedBatchCanaryReplicas, - "expectedBatchStableReplicas", expectedBatchStableReplicas) - - isUpgradedDone, err := c.UpgradeBatch(expectedBatchCanaryReplicas, expectedBatchStableReplicas) - if err != nil || !isUpgradedDone { - return false, nil - } - - isPatchedDone, err := c.patchPodBatchLabel(pods, plannedBatchCanaryReplicas, expectedBatchStableReplicas) - if err != nil || !isPatchedDone { - return false, err - } - - c.recorder.Eventf(c.release, v1.EventTypeNormal, "SetBatchDone", - "Finished submitting all upgrade quests for batch %d", c.release.Status.CanaryStatus.CurrentBatch) - return true, nil -} - -// CheckOneBatchReady checks to see if the pods are all available according to the rollout plan -func (c *UnifiedWorkloadRolloutControlPlane) CheckOneBatchReady() (bool, error) { - workloadInfo, err := c.GetWorkloadInfo() - if err != nil { - return false, err - } - - if c.release.Status.ObservedWorkloadReplicas == 0 { - klog.Infof("BatchRelease(%v) observed workload replicas is 0, no need to check", client.ObjectKeyFromObject(c.release)) - return true, nil - } - - // if the workload status is untrustworthy - if workloadInfo.Status.ObservedGeneration != workloadInfo.Generation { - return false, nil - } - - var noNeedRollbackReplicas int32 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - noNeedRollbackReplicas = *c.newStatus.CanaryStatus.NoNeedUpdateReplicas - } - - replicas := c.newStatus.ObservedWorkloadReplicas - updatedReplicas := workloadInfo.Status.UpdatedReplicas - updatedReadyReplicas := workloadInfo.Status.UpdatedReadyReplicas - - currentBatch := c.newStatus.CanaryStatus.CurrentBatch - // the number of canary pods should have in current batch in plan - plannedUpdatedReplicas := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedUpdatedReplicas := c.calculateCurrentCanary(replicas - noNeedRollbackReplicas) - // the number of canary pods that consider rollback context and other real-world situations - expectedStableReplicas := replicas - expectedUpdatedReplicas - // the number of pods that should be upgraded in this batch - updatedReplicasInBatch := plannedUpdatedReplicas - if currentBatch > 0 { - updatedReplicasInBatch -= int32(calculateNewBatchTarget(&c.release.Spec.ReleasePlan, int(replicas), int(currentBatch-1))) - } - - // if ordered update, partition is related with pod ordinals - // if unordered update, partition just like cloneSet partition - orderedUpdate, _ := c.IsOrderedUpdate() - if !orderedUpdate { - expectedStableReplicas -= noNeedRollbackReplicas - } - - klog.V(3).InfoS("check one batch, current info:", - "BatchRelease", client.ObjectKeyFromObject(c.release), - "currentBatch", currentBatch, - "replicas", replicas, - "noNeedRollbackReplicas", noNeedRollbackReplicas, - "updatedReplicasInBatch", updatedReplicasInBatch, - "plannedUpdatedReplicas", plannedUpdatedReplicas, - "expectedUpdatedReplicas", expectedUpdatedReplicas, - "expectedStableReplicas", expectedStableReplicas) - - pods, err := c.ListOwnedPods() - if err != nil { - return false, err - } - - if !isBatchReady(c.release, pods, workloadInfo.MaxUnavailable, - plannedUpdatedReplicas, expectedUpdatedReplicas, updatedReplicas, updatedReadyReplicas) { - klog.Infof("BatchRelease(%v) batch is not ready yet, current batch=%d", klog.KObj(c.release), currentBatch) - return false, nil - } - - klog.Infof("BatchRelease(%v) %d batch is ready", klog.KObj(c.release), currentBatch) - return true, nil -} - -// FinalizeProgress makes sure the workload is all upgraded -func (c *UnifiedWorkloadRolloutControlPlane) FinalizeProgress(cleanup bool) (bool, error) { - if _, err := c.ReleaseWorkload(cleanup); err != nil { - return false, err - } - c.recorder.Eventf(c.release, v1.EventTypeNormal, "FinalizedSuccessfully", "Rollout resource are finalized: cleanup=%v", cleanup) - return true, nil -} - -// SyncWorkloadInfo return change type if workload was changed during release -func (c *UnifiedWorkloadRolloutControlPlane) SyncWorkloadInfo() (WorkloadEventType, *util.WorkloadInfo, error) { - // ignore the sync if the release plan is deleted - if c.release.DeletionTimestamp != nil { - return IgnoreWorkloadEvent, nil, nil - } - - workloadInfo, err := c.GetWorkloadInfo() - if err != nil { - if apierrors.IsNotFound(err) { - return WorkloadHasGone, nil, err - } - return "", nil, err - } - - // in case that the workload status is untrustworthy - if workloadInfo.Status.ObservedGeneration != workloadInfo.Generation { - klog.Warningf("%v is still reconciling, waiting for it to complete, generation: %v, observed: %v", - workloadInfo.GVKWithName, workloadInfo.Generation, workloadInfo.Status.ObservedGeneration) - return WorkloadStillReconciling, nil, nil - } - - // in case of that the updated revision of the workload is promoted - if workloadInfo.Status.UpdatedReplicas == workloadInfo.Status.Replicas { - return IgnoreWorkloadEvent, workloadInfo, nil - } - - // in case of that the workload is scaling - if *workloadInfo.Replicas != c.release.Status.ObservedWorkloadReplicas { - klog.Warningf("%v replicas changed during releasing, should pause and wait for it to complete, "+ - "replicas from: %v -> %v", workloadInfo.GVKWithName, c.release.Status.ObservedWorkloadReplicas, *workloadInfo.Replicas) - return WorkloadReplicasChanged, workloadInfo, nil - } - - // updateRevision == CurrentRevision means CloneSet is rolling back or newly-created. - if workloadInfo.Status.UpdateRevision == workloadInfo.Status.StableRevision && - // stableRevision == UpdateRevision means CloneSet is rolling back instead of newly-created. - c.newStatus.StableRevision == workloadInfo.Status.UpdateRevision && - // StableRevision != observed UpdateRevision means the rollback event have not been observed. - c.newStatus.StableRevision != c.newStatus.UpdateRevision { - klog.Warningf("Workload(%v) is rolling back in batches", workloadInfo.GVKWithName) - return WorkloadRollbackInBatch, workloadInfo, nil - } - - // in case of that the workload was changed - if workloadInfo.Status.UpdateRevision != c.release.Status.UpdateRevision { - klog.Warningf("%v updateRevision changed during releasing, should try to restart the release plan, "+ - "updateRevision from: %v -> %v", workloadInfo.GVKWithName, c.release.Status.UpdateRevision, workloadInfo.Status.UpdateRevision) - return WorkloadPodTemplateChanged, workloadInfo, nil - } - - return IgnoreWorkloadEvent, workloadInfo, nil -} - -// the canary workload size for the current batch -func (c *UnifiedWorkloadRolloutControlPlane) calculateCurrentCanary(totalSize int32) int32 { - canaryGoal := int32(calculateNewBatchTarget(&c.release.Spec.ReleasePlan, int(totalSize), int(c.release.Status.CanaryStatus.CurrentBatch))) - klog.InfoS("Calculated the number of pods in the target workload after current batch", "BatchRelease", client.ObjectKeyFromObject(c.release), - "current batch", c.release.Status.CanaryStatus.CurrentBatch, "workload canary goal replicas goal", canaryGoal) - return canaryGoal -} - -// the source workload size for the current batch -func (c *UnifiedWorkloadRolloutControlPlane) calculateCurrentStable(totalSize int32) int32 { - stableGoal := totalSize - c.calculateCurrentCanary(totalSize) - klog.InfoS("Calculated the number of pods in the target workload after current batch", "BatchRelease", client.ObjectKeyFromObject(c.release), - "current batch", c.release.Status.CanaryStatus.CurrentBatch, "workload stable goal replicas goal", stableGoal) - return stableGoal -} - -func (c *UnifiedWorkloadRolloutControlPlane) RecordWorkloadRevisionAndReplicas() error { - workloadInfo, err := c.GetWorkloadInfo() - if err != nil { - return err - } - - c.newStatus.ObservedWorkloadReplicas = *workloadInfo.Replicas - c.newStatus.StableRevision = workloadInfo.Status.StableRevision - c.newStatus.UpdateRevision = workloadInfo.Status.UpdateRevision - return nil -} - -func (c *UnifiedWorkloadRolloutControlPlane) patchPodBatchLabel(pods []*v1.Pod, plannedBatchCanaryReplicas, expectedBatchStableReplicas int32) (bool, error) { - rolloutID := c.release.Spec.ReleasePlan.RolloutID - if rolloutID == "" { - return true, nil - } - - updateRevision := c.release.Status.UpdateRevision - batchID := c.release.Status.CanaryStatus.CurrentBatch + 1 - if c.newStatus.CanaryStatus.NoNeedUpdateReplicas != nil { - orderedUpdate, _ := c.IsOrderedUpdate() - if orderedUpdate { - pods = filterPodsForOrderedRollback(pods, plannedBatchCanaryReplicas, expectedBatchStableReplicas, c.release.Status.ObservedWorkloadReplicas, rolloutID, updateRevision) - } else { - pods = filterPodsForUnorderedRollback(pods, plannedBatchCanaryReplicas, expectedBatchStableReplicas, c.release.Status.ObservedWorkloadReplicas, rolloutID, updateRevision) - } - } - return patchPodBatchLabel(c.client, pods, rolloutID, batchID, updateRevision, plannedBatchCanaryReplicas, client.ObjectKeyFromObject(c.release)) -} diff --git a/pkg/controller/rollout/batchrelease/inner_batchrelease.go b/pkg/controller/rollout/batchrelease/inner_batchrelease.go index 0d4fcca8..3ca17216 100644 --- a/pkg/controller/rollout/batchrelease/inner_batchrelease.go +++ b/pkg/controller/rollout/batchrelease/inner_batchrelease.go @@ -22,13 +22,10 @@ import ( "reflect" "strconv" - appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" rolloutv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" "github.com/openkruise/rollouts/pkg/util" - apps "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" @@ -171,7 +168,6 @@ func (r *innerBatchRelease) Promote(index int32, isRollback, checkReady bool) (b batch.Annotations[util.RollbackInBatchAnnotation] = r.rollout.Annotations[util.RollbackInBatchAnnotation] } - batch.Spec.Paused = false if batch.Labels == nil { batch.Labels = map[string]string{} } @@ -188,113 +184,52 @@ func (r *innerBatchRelease) Promote(index int32, isRollback, checkReady bool) (b return false, nil } -func (r *innerBatchRelease) resumeStableWorkload(checkReady bool) (bool, error) { - // cloneSet - switch r.rollout.Spec.ObjectRef.WorkloadRef.Kind { - case util.ControllerKruiseKindCS.Kind: - dName := r.rollout.Spec.ObjectRef.WorkloadRef.Name - obj := &appsv1alpha1.CloneSet{} - err := r.Get(context.TODO(), types.NamespacedName{Namespace: r.rollout.Namespace, Name: dName}, obj) - if err != nil { - if errors.IsNotFound(err) { - klog.Warningf("rollout(%s/%s) cloneSet(%s) not found, and return true", r.rollout.Namespace, r.rollout.Name, dName) - return true, nil - } - return false, err - } - // default partition.IntVal=0 - if !obj.Spec.UpdateStrategy.Paused && obj.Spec.UpdateStrategy.Partition.IntVal == 0 && obj.Spec.UpdateStrategy.Partition.Type == intstr.Int { +func (r *innerBatchRelease) resumeStableWorkload(waitReady bool) (bool, error) { + batch, err := r.FetchBatchRelease() + if err != nil { + if errors.IsNotFound(err) { return true, nil } + return false, err + } - err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err = r.Get(context.TODO(), types.NamespacedName{Namespace: r.rollout.Namespace, Name: dName}, obj); err != nil { - return err - } - obj.Spec.UpdateStrategy.Paused = false - obj.Spec.UpdateStrategy.Partition = nil - return r.Update(context.TODO(), obj) - }) - if err != nil { - klog.Errorf("update rollout(%s/%s) cloneSet failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error()) - return false, err - } - klog.Infof("resume rollout(%s/%s) cloneSet(paused=false,partition=nil) success", r.rollout.Namespace, r.rollout.Name) + // The Completed phase means batchRelease controller has processed all it + // should process. If BatchRelease phase is completed, we can do nothing. + if batch.Status.Phase == rolloutv1alpha1.RolloutPhaseCompleted { return true, nil + } - case util.ControllerKindDep.Kind: - // deployment - dName := r.rollout.Spec.ObjectRef.WorkloadRef.Name - obj := &apps.Deployment{} - err := r.Get(context.TODO(), types.NamespacedName{Namespace: r.rollout.Namespace, Name: dName}, obj) - if err != nil { - if errors.IsNotFound(err) { - klog.Warningf("rollout(%s/%s) stable deployment(%s) not found, and return true", r.rollout.Namespace, r.rollout.Name, dName) - return true, nil + // If BatchPartition is nil, BatchRelease will directly resume workload via: + // - * set workload Paused = false if it needs; + // - * set workload Partition = null if it needs. + if batch.Spec.ReleasePlan.BatchPartition == nil { + // - If checkReady is true, finalizing policy must be "WaitResume"; + // - If checkReady is false, finalizing policy must be NOT "WaitResume"; + // Otherwise, we should correct it. + switch batch.Spec.ReleasePlan.FinalizingPolicy { + case rolloutv1alpha1.WaitResumeFinalizingPolicyType: + if waitReady { // no need to patch again + return false, nil } - return false, err - } - // set deployment paused=false - if obj.Spec.Paused { - err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err = r.Get(context.TODO(), types.NamespacedName{Namespace: r.rollout.Namespace, Name: dName}, obj); err != nil { - return err - } - obj.Spec.Paused = false - return r.Update(context.TODO(), obj) - }) - if err != nil { - klog.Errorf("update rollout(%s/%s) stable deployment failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error()) - return false, err + default: + if !waitReady { // no need to patch again + return false, nil } - klog.Infof("resume rollout(%s/%s) stable deployment(paused=false) success", r.rollout.Namespace, r.rollout.Name) - } - - // Whether to wait for pods are ready - if !checkReady { - return true, nil - } - data := util.DumpJSON(obj.Status) - // wait for all pods are ready - maxUnavailable, _ := intstr.GetScaledValueFromIntOrPercent(obj.Spec.Strategy.RollingUpdate.MaxUnavailable, int(*obj.Spec.Replicas), true) - if obj.Status.ObservedGeneration != obj.Generation || obj.Status.UpdatedReplicas != *obj.Spec.Replicas || - obj.Status.Replicas != *obj.Spec.Replicas || *obj.Spec.Replicas-obj.Status.AvailableReplicas > int32(maxUnavailable) { - klog.Infof("rollout(%s/%s) stable deployment status(%s), and wait a moment", r.rollout.Namespace, r.rollout.Name, data) - return false, nil - } - klog.Infof("resume rollout(%s/%s) stable deployment(paused=false) status(%s) success", r.rollout.Namespace, r.rollout.Name, data) - return true, nil - - default: - // statefulset-like workloads - workloadRef := r.rollout.Spec.ObjectRef.WorkloadRef - workloadNsn := types.NamespacedName{Namespace: r.rollout.Namespace, Name: workloadRef.Name} - workloadGVK := schema.FromAPIVersionAndKind(workloadRef.APIVersion, workloadRef.Kind) - obj := &unstructured.Unstructured{} - obj.SetGroupVersionKind(workloadGVK) - err := r.Get(context.TODO(), workloadNsn, obj) - if err != nil { - if errors.IsNotFound(err) { - klog.Warningf("rollout(%s/%s) statefulset(%s) not found, and return true", r.rollout.Namespace, r.rollout.Name, workloadNsn.Name) - return true, nil - } - return false, err } + } - if util.GetStatefulSetPartition(obj) == 0 { - return true, nil - } + // Correct finalizing policy. + policy := rolloutv1alpha1.ImmediateFinalizingPolicyType + if waitReady { + policy = rolloutv1alpha1.WaitResumeFinalizingPolicyType + } - cloneObj := obj.DeepCopy() - body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":0}}}}`) - err = r.Patch(context.TODO(), cloneObj, client.RawPatch(types.MergePatchType, []byte(body))) - if err != nil { - klog.Errorf("patch rollout(%s/%s) statefulset failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error()) - return false, err - } - klog.Infof("resume rollout(%s/%s) statefulset(partition=0) success", r.rollout.Namespace, r.rollout.Name) - return true, nil + // Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here. + body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy) + if err = r.Patch(context.TODO(), batch, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return false, err } + return false, nil } func (r *innerBatchRelease) Finalize() (bool, error) { diff --git a/pkg/util/controller_finder.go b/pkg/util/controller_finder.go index e3075490..bc812372 100644 --- a/pkg/util/controller_finder.go +++ b/pkg/util/controller_finder.go @@ -238,7 +238,7 @@ func (r *ControllerFinder) getStatefulSetLikeWorkload(namespace string, ref *rol return nil, err } - workloadInfo := ParseStatefulSetInfo(set, key) + workloadInfo := ParseWorkload(set) if workloadInfo.Generation != workloadInfo.Status.ObservedGeneration { return &Workload{IsStatusConsistent: false}, nil } @@ -249,7 +249,7 @@ func (r *ControllerFinder) getStatefulSetLikeWorkload(namespace string, ref *rol CanaryReplicas: workloadInfo.Status.UpdatedReplicas, CanaryReadyReplicas: workloadInfo.Status.UpdatedReadyReplicas, ObjectMeta: workloadInfo.ObjectMeta, - Replicas: *workloadInfo.Replicas, + Replicas: workloadInfo.Replicas, PodTemplateHash: workloadInfo.Status.UpdateRevision, IsStatusConsistent: true, } diff --git a/pkg/util/expectation/resource_expectations.go b/pkg/util/expectation/resource_expectations.go new file mode 100644 index 00000000..fcc70d41 --- /dev/null +++ b/pkg/util/expectation/resource_expectations.go @@ -0,0 +1,159 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package expectations + +import ( + "flag" + "sync" + "time" + + "k8s.io/apimachinery/pkg/util/sets" +) + +// Action is the action, like create and delete. +type Action string + +const ( + // Create action + Create Action = "create" + // Delete action + Delete Action = "delete" +) + +var ( + ExpectationTimeout time.Duration + ResourceExpectations = NewResourceExpectations() +) + +func init() { + flag.DurationVar(&ExpectationTimeout, "expectation-timeout", time.Minute*5, "The expectation timeout. Defaults 5min") +} + +// Expectations is an interface that allows users to set and wait on expectations of resource creation and deletion. +type Expectations interface { + Expect(controllerKey string, action Action, name string) + Observe(controllerKey string, action Action, name string) + SatisfiedExpectations(controllerKey string) (bool, time.Duration, map[Action][]string) + DeleteExpectations(controllerKey string) + GetExpectations(controllerKey string) map[Action]sets.String +} + +// NewResourceExpectations returns a common Expectations. +func NewResourceExpectations() Expectations { + return &realResourceExpectations{ + controllerCache: make(map[string]*realControllerResourceExpectations), + } +} + +type realResourceExpectations struct { + sync.Mutex + // key: parent key, workload namespace/name + controllerCache map[string]*realControllerResourceExpectations +} + +type realControllerResourceExpectations struct { + // item: name for this object + objsCache map[Action]sets.String + firstUnsatisfiedTimestamp time.Time +} + +func (r *realResourceExpectations) GetExpectations(controllerKey string) map[Action]sets.String { + r.Lock() + defer r.Unlock() + + expectations := r.controllerCache[controllerKey] + if expectations == nil { + return nil + } + + res := make(map[Action]sets.String, len(expectations.objsCache)) + for k, v := range expectations.objsCache { + res[k] = sets.NewString(v.List()...) + } + + return res +} + +func (r *realResourceExpectations) Expect(controllerKey string, action Action, name string) { + r.Lock() + defer r.Unlock() + + expectations := r.controllerCache[controllerKey] + if expectations == nil { + expectations = &realControllerResourceExpectations{ + objsCache: make(map[Action]sets.String), + } + r.controllerCache[controllerKey] = expectations + } + + if s := expectations.objsCache[action]; s != nil { + s.Insert(name) + } else { + expectations.objsCache[action] = sets.NewString(name) + } +} + +func (r *realResourceExpectations) Observe(controllerKey string, action Action, name string) { + r.Lock() + defer r.Unlock() + + expectations := r.controllerCache[controllerKey] + if expectations == nil { + return + } + + s := expectations.objsCache[action] + if s == nil { + return + } + s.Delete(name) + + for _, s := range expectations.objsCache { + if s.Len() > 0 { + return + } + } + delete(r.controllerCache, controllerKey) +} + +func (r *realResourceExpectations) SatisfiedExpectations(controllerKey string) (bool, time.Duration, map[Action][]string) { + r.Lock() + defer r.Unlock() + + expectations := r.controllerCache[controllerKey] + if expectations == nil { + return true, 0, nil + } + + for a, s := range expectations.objsCache { + if s.Len() > 0 { + if expectations.firstUnsatisfiedTimestamp.IsZero() { + expectations.firstUnsatisfiedTimestamp = time.Now() + } + return false, time.Since(expectations.firstUnsatisfiedTimestamp), map[Action][]string{a: s.List()} + } + } + + delete(r.controllerCache, controllerKey) + return true, 0, nil +} + +func (r *realResourceExpectations) DeleteExpectations(controllerKey string) { + r.Lock() + defer r.Unlock() + delete(r.controllerCache, controllerKey) +} diff --git a/pkg/util/expectation/resource_expectations_test.go b/pkg/util/expectation/resource_expectations_test.go new file mode 100644 index 00000000..82ef6d07 --- /dev/null +++ b/pkg/util/expectation/resource_expectations_test.go @@ -0,0 +1,59 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package expectations + +import ( + "testing" +) + +func TestResourceExpectations(t *testing.T) { + e := NewResourceExpectations() + controllerKey01 := "default/cs01" + controllerKey02 := "default/cs02" + pod01 := "pod01" + pod02 := "pod02" + + e.Expect(controllerKey01, Create, pod01) + e.Expect(controllerKey01, Create, pod02) + e.Expect(controllerKey01, Delete, pod01) + if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok { + t.Fatalf("expected not satisfied") + } + + e.Observe(controllerKey01, Create, pod02) + e.Observe(controllerKey01, Create, pod01) + if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok { + t.Fatalf("expected not satisfied") + } + + e.Observe(controllerKey02, Delete, pod01) + if ok, _, _ := e.SatisfiedExpectations(controllerKey01); ok { + t.Fatalf("expected not satisfied") + } + + e.Observe(controllerKey01, Delete, pod01) + if ok, _, _ := e.SatisfiedExpectations(controllerKey01); !ok { + t.Fatalf("expected satisfied") + } + + e.Observe(controllerKey01, Create, pod01) + e.Observe(controllerKey01, Create, pod02) + e.DeleteExpectations(controllerKey01) + if ok, _, _ := e.SatisfiedExpectations(controllerKey01); !ok { + t.Fatalf("expected satisfied") + } +} diff --git a/pkg/util/parse_utils.go b/pkg/util/parse_utils.go index 6e1fccab..92caa62e 100644 --- a/pkg/util/parse_utils.go +++ b/pkg/util/parse_utils.go @@ -1,8 +1,25 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package util import ( "encoding/json" "fmt" + "reflect" appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" appsv1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" @@ -11,29 +28,26 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/klog/v2" - "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" ) -func ParseStatefulSetInfo(object client.Object, namespacedName types.NamespacedName) *WorkloadInfo { - workloadGVKWithName := fmt.Sprintf("%v(%v)", object.GetObjectKind().GroupVersionKind(), namespacedName) - selector, err := getSelector(object) - if err != nil { - klog.Errorf("Failed to parse selector for workload(%v)", workloadGVKWithName) +// ParseWorkload parse workload as WorkloadInfo +func ParseWorkload(object client.Object) *WorkloadInfo { + if object == nil || reflect.ValueOf(object).IsNil() { + return nil } + key := client.ObjectKeyFromObject(object) + gvk := object.GetObjectKind().GroupVersionKind() return &WorkloadInfo{ - ObjectMeta: *getMetadata(object), - MaxUnavailable: getStatefulSetMaxUnavailable(object), - Replicas: pointer.Int32(GetReplicas(object)), - Status: ParseWorkloadStatus(object), - Selector: selector, - GVKWithName: workloadGVKWithName, + LogKey: fmt.Sprintf("%s (%s)", key, gvk), + ObjectMeta: *getMetadata(object), + Replicas: GetReplicas(object), + Status: *ParseWorkloadStatus(object), } } +// IsStatefulSetRollingUpdate return true if updateStrategy of object is rollingUpdate type. func IsStatefulSetRollingUpdate(object client.Object) bool { switch o := object.(type) { case *apps.StatefulSet: @@ -51,6 +65,7 @@ func IsStatefulSetRollingUpdate(object client.Object) bool { } } +// SetStatefulSetPartition set partition to object func SetStatefulSetPartition(object client.Object, partition int32) { switch o := object.(type) { case *apps.StatefulSet: @@ -97,6 +112,7 @@ func SetStatefulSetPartition(object client.Object, partition int32) { } } +// GetStatefulSetPartition get partition of object func GetStatefulSetPartition(object client.Object) int32 { partition := int32(0) switch o := object.(type) { @@ -119,6 +135,7 @@ func GetStatefulSetPartition(object client.Object) int32 { return partition } +// IsStatefulSetUnorderedUpdate return true if the updateStrategy of object is unordered update func IsStatefulSetUnorderedUpdate(object client.Object) bool { switch o := object.(type) { case *apps.StatefulSet: @@ -136,6 +153,7 @@ func IsStatefulSetUnorderedUpdate(object client.Object) bool { } } +// getStatefulSetMaxUnavailable return maxUnavailable field of object func getStatefulSetMaxUnavailable(object client.Object) *intstr.IntOrString { switch o := object.(type) { case *apps.StatefulSet: @@ -156,6 +174,7 @@ func getStatefulSetMaxUnavailable(object client.Object) *intstr.IntOrString { } } +// ParseWorkloadStatus parse status of object as WorkloadStatus func ParseWorkloadStatus(object client.Object) *WorkloadStatus { switch o := object.(type) { case *apps.Deployment: @@ -165,6 +184,7 @@ func ParseWorkloadStatus(object client.Object) *WorkloadStatus { AvailableReplicas: o.Status.AvailableReplicas, UpdatedReplicas: o.Status.UpdatedReplicas, ObservedGeneration: o.Status.ObservedGeneration, + UpdateRevision: ComputeHash(&o.Spec.Template, nil), } case *appsv1alpha1.CloneSet: @@ -220,20 +240,22 @@ func ParseWorkloadStatus(object client.Object) *WorkloadStatus { // GetReplicas return replicas from client workload object func GetReplicas(object client.Object) int32 { + replicas := int32(1) switch o := object.(type) { case *apps.Deployment: - return *o.Spec.Replicas + replicas = *o.Spec.Replicas case *appsv1alpha1.CloneSet: - return *o.Spec.Replicas + replicas = *o.Spec.Replicas case *apps.StatefulSet: - return *o.Spec.Replicas + replicas = *o.Spec.Replicas case *appsv1beta1.StatefulSet: - return *o.Spec.Replicas + replicas = *o.Spec.Replicas case *unstructured.Unstructured: - return parseReplicasFromUnstructured(o) + replicas = parseReplicasFromUnstructured(o) default: panic("unsupported workload type to ParseReplicasFrom function") } + return replicas } // GetTemplate return pod template spec for client workload object @@ -354,6 +376,7 @@ func parseMetadataFromUnstructured(object *unstructured.Unstructured) *metav1.Ob return meta } +// unmarshalIntStr return *intstr.IntOrString func unmarshalIntStr(m interface{}) *intstr.IntOrString { field := &intstr.IntOrString{} data, _ := json.Marshal(m) diff --git a/pkg/util/parse_utils_test.go b/pkg/util/parse_utils_test.go index 493a90f1..cd5aaebf 100644 --- a/pkg/util/parse_utils_test.go +++ b/pkg/util/parse_utils_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package util import ( @@ -329,12 +345,11 @@ func TestWorkloadParse(t *testing.T) { Expect(err).NotTo(HaveOccurred()) uobject := &unstructured.Unstructured{Object: uo} Expect(reflect.DeepEqual(GetTemplate(uobject), &o.Spec.Template)).Should(BeTrue()) - statefulsetInfo := ParseStatefulSetInfo(uobject, client.ObjectKeyFromObject(uobject)) + statefulsetInfo := ParseWorkload(uobject) { - Expect(statefulsetInfo.MaxUnavailable).Should(BeNil()) Expect(reflect.DeepEqual(statefulsetInfo.ObjectMeta, o.ObjectMeta)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Generation, o.Generation)).Should(BeTrue()) - Expect(reflect.DeepEqual(statefulsetInfo.Replicas, o.Spec.Replicas)).Should(BeTrue()) + Expect(reflect.DeepEqual(statefulsetInfo.Replicas, *o.Spec.Replicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.Replicas, o.Status.Replicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.ReadyReplicas, o.Status.ReadyReplicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.AvailableReplicas, o.Status.AvailableReplicas)).Should(BeTrue()) @@ -343,21 +358,17 @@ func TestWorkloadParse(t *testing.T) { Expect(reflect.DeepEqual(statefulsetInfo.Status.StableRevision, o.Status.CurrentRevision)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.UpdateRevision, o.Status.UpdateRevision)).Should(BeTrue()) Expect(statefulsetInfo.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 0)) - selector, err := metav1.LabelSelectorAsSelector(o.Spec.Selector) - Expect(err).NotTo(HaveOccurred()) - Expect(reflect.DeepEqual(statefulsetInfo.Selector, selector)).Should(BeTrue()) } case *appsv1beta1.StatefulSet: uo, err := runtime.DefaultUnstructuredConverter.ToUnstructured(o) Expect(err).NotTo(HaveOccurred()) uobject := &unstructured.Unstructured{Object: uo} Expect(reflect.DeepEqual(GetTemplate(uobject), &o.Spec.Template)).Should(BeTrue()) - statefulsetInfo := ParseStatefulSetInfo(uobject, client.ObjectKeyFromObject(uobject)) + statefulsetInfo := ParseWorkload(uobject) { Expect(reflect.DeepEqual(statefulsetInfo.ObjectMeta, o.ObjectMeta)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Generation, o.Generation)).Should(BeTrue()) - Expect(reflect.DeepEqual(statefulsetInfo.Replicas, o.Spec.Replicas)).Should(BeTrue()) - Expect(reflect.DeepEqual(statefulsetInfo.MaxUnavailable, o.Spec.UpdateStrategy.RollingUpdate.MaxUnavailable)).Should(BeTrue()) + Expect(reflect.DeepEqual(statefulsetInfo.Replicas, *o.Spec.Replicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.Replicas, o.Status.Replicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.ReadyReplicas, o.Status.ReadyReplicas)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.AvailableReplicas, o.Status.AvailableReplicas)).Should(BeTrue()) @@ -366,9 +377,6 @@ func TestWorkloadParse(t *testing.T) { Expect(reflect.DeepEqual(statefulsetInfo.Status.StableRevision, o.Status.CurrentRevision)).Should(BeTrue()) Expect(reflect.DeepEqual(statefulsetInfo.Status.UpdateRevision, o.Status.UpdateRevision)).Should(BeTrue()) Expect(statefulsetInfo.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 0)) - selector, err := metav1.LabelSelectorAsSelector(o.Spec.Selector) - Expect(err).NotTo(HaveOccurred()) - Expect(reflect.DeepEqual(statefulsetInfo.Selector, selector)).Should(BeTrue()) } } }) diff --git a/pkg/util/pod_utils.go b/pkg/util/pod_utils.go index 89e380a5..842e374e 100644 --- a/pkg/util/pod_utils.go +++ b/pkg/util/pod_utils.go @@ -82,6 +82,7 @@ func IsConsistentWithRevision(pod *v1.Pod, revision string) bool { return false } +// IsEqualRevision return true if a and b have equal revision label func IsEqualRevision(a, b *v1.Pod) bool { if a.Labels[appsv1.DefaultDeploymentUniqueLabelKey] != "" && a.Labels[appsv1.DefaultDeploymentUniqueLabelKey] == b.Labels[appsv1.DefaultDeploymentUniqueLabelKey] { diff --git a/pkg/util/workloads_utils.go b/pkg/util/workloads_utils.go index 5fd20102..21bd8346 100644 --- a/pkg/util/workloads_utils.go +++ b/pkg/util/workloads_utils.go @@ -30,14 +30,13 @@ import ( "github.com/openkruise/rollouts/pkg/feature" apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + netv1 "k8s.io/api/networking/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" @@ -68,18 +67,49 @@ type WorkloadStatus struct { type WorkloadInfo struct { metav1.ObjectMeta - Paused bool - Replicas *int32 - GVKWithName string - Selector labels.Selector - MaxUnavailable *intstr.IntOrString - Status *WorkloadStatus + LogKey string + Replicas int32 + Status WorkloadStatus } -func NewWorkloadInfo() *WorkloadInfo { - return &WorkloadInfo{ - Status: &WorkloadStatus{}, +// IsStable return ture if observed generation >= generation +func (w *WorkloadInfo) IsStable() bool { + return w.Status.ObservedGeneration >= w.Generation +} + +// IsPromoted return true if replicas == updatedReplicas +func (w *WorkloadInfo) IsPromoted() bool { + return w.Status.Replicas == w.Status.UpdatedReplicas +} + +// IsScaling return true if observed replicas != replicas +func (w *WorkloadInfo) IsScaling(observed int32) bool { + if observed == -1 { + return false } + return w.Replicas != observed +} + +// IsRollback return true if workload stable revision equals to update revision. +// this function is edge-triggerred. +func (w *WorkloadInfo) IsRollback(observedStable, observedUpdate string) bool { + if observedUpdate == "" { + return false + } + // updateRevision == CurrentRevision means CloneSet is rolling back or newly-created. + return w.Status.UpdateRevision == w.Status.StableRevision && + // stableRevision == UpdateRevision means CloneSet is rolling back instead of newly-created. + observedStable == w.Status.UpdateRevision && + // StableRevision != observed UpdateRevision means the rollback event have not been observed. + observedStable != observedUpdate +} + +// IsRevisionNotEqual this function will return true if observed update revision != update revision. +func (w *WorkloadInfo) IsRevisionNotEqual(observed string) bool { + if observed == "" { + return false + } + return w.Status.UpdateRevision != observed } // DeepHashObject writes specified object to hash using the spew library @@ -269,8 +299,76 @@ func IsWorkloadType(object client.Object, t WorkloadType) bool { return WorkloadType(strings.ToLower(object.GetLabels()[WorkloadTypeLabel])) == t } -// GenRandomStr returns a safe encoded string with a specific length -func GenRandomStr(length int) string { - randStr := rand.String(length) - return rand.SafeEncodeString(randStr) +// DeploymentMaxUnavailable returns the maximum unavailable pods a rolling deployment can take. +func DeploymentMaxUnavailable(deployment *apps.Deployment) int32 { + strategy := deployment.Spec.Strategy + if strategy.Type != apps.RollingUpdateDeploymentStrategyType || *(deployment.Spec.Replicas) == 0 { + return int32(0) + } + // Error caught by validation + _, maxUnavailable, _ := resolveFenceposts(strategy.RollingUpdate.MaxSurge, strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas)) + if maxUnavailable > *deployment.Spec.Replicas { + return *deployment.Spec.Replicas + } + return maxUnavailable +} + +// resolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one +// step. For example: +// +// 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1) +// 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1) +// 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1) +// 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1) +func resolveFenceposts(maxSurge, maxUnavailable *intstr.IntOrString, desired int32) (int32, int32, error) { + surge, err := intstr.GetScaledValueFromIntOrPercent(intstr.ValueOrDefault(maxSurge, intstr.FromInt(0)), int(desired), true) + if err != nil { + return 0, 0, err + } + unavailable, err := intstr.GetScaledValueFromIntOrPercent(intstr.ValueOrDefault(maxUnavailable, intstr.FromInt(0)), int(desired), false) + if err != nil { + return 0, 0, err + } + + if surge == 0 && unavailable == 0 { + // Validation should never allow the user to explicitly use zero values for both maxSurge + // maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero. + // If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the + // theory that surge might not work due to quota. + unavailable = 1 + } + + return int32(surge), int32(unavailable), nil +} + +// GetEmptyObjectWithKey return an empty object with the same namespaced name +func GetEmptyObjectWithKey(object client.Object) client.Object { + var empty client.Object + switch object.(type) { + case *v1.Pod: + empty = &v1.Pod{} + case *v1.Service: + empty = &v1.Service{} + case *netv1.Ingress: + empty = &netv1.Ingress{} + case *apps.Deployment: + empty = &apps.Deployment{} + case *apps.ReplicaSet: + empty = &apps.ReplicaSet{} + case *apps.StatefulSet: + empty = &apps.StatefulSet{} + case *appsv1alpha1.CloneSet: + empty = &appsv1alpha1.CloneSet{} + case *appsv1beta1.StatefulSet: + empty = &appsv1beta1.StatefulSet{} + case *unstructured.Unstructured: + unstructure := &unstructured.Unstructured{} + unstructure.SetGroupVersionKind(object.GetObjectKind().GroupVersionKind()) + empty = unstructure + } + empty.SetName(object.GetName()) + empty.SetNamespace(object.GetNamespace()) + return empty } diff --git a/pkg/util/workloads_utils_test.go b/pkg/util/workloads_utils_test.go index 80ead213..f1f8ef2c 100644 --- a/pkg/util/workloads_utils_test.go +++ b/pkg/util/workloads_utils_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package util import ( diff --git a/pkg/webhook/rollout/validating/rollout_create_update_handler_test.go b/pkg/webhook/rollout/validating/rollout_create_update_handler_test.go index 2f2b413c..027f9ce0 100644 --- a/pkg/webhook/rollout/validating/rollout_create_update_handler_test.go +++ b/pkg/webhook/rollout/validating/rollout_create_update_handler_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package validating import ( diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index 5f8de092..b848dd91 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -19,6 +19,7 @@ package mutating import ( "context" "encoding/json" + "math" "net/http" kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" @@ -189,7 +190,7 @@ func (h *WorkloadHandler) handleStatefulSetLikeWorkload(newObj, oldObj *unstruct } changed = true - util.SetStatefulSetPartition(newObj, replicas) + util.SetStatefulSetPartition(newObj, math.MaxInt16) state := &util.RolloutState{RolloutName: rollout.Name} by, _ := json.Marshal(state) annotation := newObj.GetAnnotations() @@ -276,8 +277,7 @@ func (h *WorkloadHandler) handleCloneSet(newObj, oldObj *kruiseappsv1alpha1.Clon klog.Infof("cloneSet(%s/%s) will be in rollout progressing, and paused", newObj.Namespace, newObj.Name) changed = true - // need set workload paused = true - newObj.Spec.UpdateStrategy.Paused = true + // need set workload partition = 100% newObj.Spec.UpdateStrategy.Partition = &intstr.IntOrString{Type: intstr.String, StrVal: "100%"} state := &util.RolloutState{RolloutName: rollout.Name} by, _ := json.Marshal(state) diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 04098b22..13d6e638 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -19,6 +19,7 @@ package mutating import ( "context" "encoding/json" + "math" "reflect" "testing" @@ -428,7 +429,6 @@ func TestHandlerCloneSet(t *testing.T) { obj := cloneSetDemo.DeepCopy() obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo"}` - obj.Spec.UpdateStrategy.Paused = true obj.Spec.UpdateStrategy.Partition = &intstr.IntOrString{Type: intstr.String, StrVal: "100%"} return obj }, @@ -493,7 +493,7 @@ func TestHandleStatefulSet(t *testing.T) { obj := statefulset.DeepCopy() obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo"}` - obj.Spec.UpdateStrategy.RollingUpdate.Partition = pointer.Int32(10) + obj.Spec.UpdateStrategy.RollingUpdate.Partition = pointer.Int32(math.MaxInt16) return obj }, getRollout: func() *appsv1alpha1.Rollout { diff --git a/test/e2e/batchrelease_test.go b/test/e2e/batchrelease_test.go index e547f64d..1830f4df 100644 --- a/test/e2e/batchrelease_test.go +++ b/test/e2e/batchrelease_test.go @@ -375,7 +375,7 @@ var _ = SIGDescribe("BatchRelease", func() { clone := &rolloutsv1alpha1.BatchRelease{} Expect(GetObject(release.Namespace, release.Name, clone)).NotTo(HaveOccurred()) return clone.Status.Phase - }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCancelled)) + }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCompleted)) }) It("V1->V2: ScalingUp, Percentage, 100%, Succeeded", func() { @@ -799,7 +799,7 @@ var _ = SIGDescribe("BatchRelease", func() { clone := &rolloutsv1alpha1.BatchRelease{} Expect(GetObject(release.Namespace, release.Name, clone)).NotTo(HaveOccurred()) return clone.Status.Phase - }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCancelled)) + }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCompleted)) }) It("V1->V2: ScalingUp, Percentage, 100%, Succeeded", func() { @@ -1134,7 +1134,7 @@ var _ = SIGDescribe("BatchRelease", func() { clone := &rolloutsv1alpha1.BatchRelease{} Expect(GetObject(release.Namespace, release.Name, clone)).NotTo(HaveOccurred()) return clone.Status.Phase - }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCancelled)) + }, 15*time.Minute, 5*time.Second).Should(Equal(rolloutsv1alpha1.RolloutPhaseCompleted)) }) It("Rollback V1->V2: Delete BatchRelease, Percentage, 100%, Succeeded", func() { diff --git a/test/e2e/rollout_test.go b/test/e2e/rollout_test.go index ce4c965b..d165fc81 100644 --- a/test/e2e/rollout_test.go +++ b/test/e2e/rollout_test.go @@ -1366,7 +1366,7 @@ var _ = SIGDescribe("Rollout", func() { workload.Spec.Template.Spec.Containers[0].Env = newEnvs UpdateDeployment(workload) By("Update deployment env NODE_NAME from(version1) -> to(version2)") - time.Sleep(time.Second * 3) + time.Sleep(time.Second * 10) // check workload status & paused Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) @@ -1374,7 +1374,7 @@ var _ = SIGDescribe("Rollout", func() { By("check deployment status & paused success") // delete rollout - Expect(k8sClient.DeleteAllOf(context.TODO(), &rolloutsv1alpha1.Rollout{}, client.InNamespace(namespace), client.PropagationPolicy(metav1.DeletePropagationForeground))).Should(Succeed()) + Expect(k8sClient.Delete(context.TODO(), rollout, client.PropagationPolicy(metav1.DeletePropagationBackground))).Should(Succeed()) WaitRolloutNotFound(rollout.Name) WaitDeploymentAllPodsReady(workload) // check service & ingress & deployment @@ -4158,9 +4158,7 @@ var _ = SIGDescribe("Rollout", func() { CheckPodBatchLabel(workload.Namespace, workload.Spec.Selector, "2", "4", 1) CheckPodBatchLabel(workload.Namespace, workload.Spec.Selector, "2", "5", 1) }) - }) - KruiseDescribe("Test", func() { It("failure threshold", func() { By("Creating Rollout...") rollout := &rolloutsv1alpha1.Rollout{} diff --git a/test/e2e/test_data/rollout/advanced_statefulset.yaml b/test/e2e/test_data/rollout/advanced_statefulset.yaml index e2d19f17..db9ce702 100644 --- a/test/e2e/test_data/rollout/advanced_statefulset.yaml +++ b/test/e2e/test_data/rollout/advanced_statefulset.yaml @@ -18,7 +18,7 @@ spec: containers: - name: echoserver image: cilium/echoserver:latest - # imagePullPolicy: IfNotPresent + imagePullPolicy: IfNotPresent ports: - containerPort: 8080 env: diff --git a/test/e2e/test_data/rollout/native_statefulset.yaml b/test/e2e/test_data/rollout/native_statefulset.yaml index 11755276..2ced5586 100644 --- a/test/e2e/test_data/rollout/native_statefulset.yaml +++ b/test/e2e/test_data/rollout/native_statefulset.yaml @@ -18,7 +18,7 @@ spec: containers: - name: echoserver image: cilium/echoserver:latest - # imagePullPolicy: IfNotPresent + imagePullPolicy: IfNotPresent ports: - containerPort: 8080 env: