From 6968bd8972ea176a584b676f4cd25379169e9389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=91=B8=E9=B1=BC=E5=96=B5?= Date: Thu, 19 Dec 2024 20:36:32 +0800 Subject: [PATCH] feat: enhanced in-place update module to support vertical scaling (#1353) * feat: enhanced in-place update module to support vertical scaling Signed-off-by: LavenderQAQ * test: fixed pre-delete hook tests for advance daemonset Signed-off-by: LavenderQAQ * refactor: define vertically updated interfaces for different implementations Signed-off-by: LavenderQAQ * feat: remove offstream when vertical update only Signed-off-by: LavenderQAQ * 1. merge into default handler and add uts/e2e 2. add failed case in e2e test and change following comments 3. remove last resource in spec annotation 4. remove container level inplace-vertical-scaling api in interface Signed-off-by: Abner-1 --------- Signed-off-by: LavenderQAQ Signed-off-by: Abner-1 Co-authored-by: Abner-1 --- .github/workflows/e2e-1.18.yaml | 2 +- .github/workflows/e2e-1.24.yaml | 2 +- .github/workflows/e2e-1.28.yaml | 101 +- Makefile | 6 + apis/apps/pub/inplace_update.go | 13 + apis/apps/pub/zz_generated.deepcopy.go | 8 + apis/apps/v1alpha1/cloneset_types.go | 6 +- .../crd/bases/apps.kruise.io_clonesets.yaml | 6 + config/manager/manager.yaml | 4 +- .../cloneset/cloneset_event_handler.go | 7 + pkg/controller/cloneset/core/cloneset_core.go | 15 +- .../cloneset/core/cloneset_core_test.go | 106 ++ .../cloneset/sync/cloneset_sync_utils.go | 7 + .../cloneset/sync/cloneset_update_test.go | 3 + .../container_meta_controller.go | 5 +- .../kuberuntime/kuberuntime_container.go | 15 +- pkg/daemon/kuberuntime/labels.go | 5 + pkg/features/kruise_features.go | 5 + pkg/util/inplaceupdate/inplace_update.go | 29 +- .../inplaceupdate/inplace_update_defaults.go | 154 +- .../inplace_update_defaults_test.go | 1086 ++++++++++++- pkg/util/inplaceupdate/inplace_update_test.go | 5 + .../inplaceupdate/inplace_update_vertical.go | 185 +++ .../inplace_update_vertical_test.go | 271 ++++ .../pod/validating/pod_unavailable_budget.go | 5 +- pkg/webhook/pod/validating/workloadspread.go | 3 +- test/e2e/apps/daemonset.go | 13 +- test/e2e/apps/inplace_vpa.go | 1344 +++++++++++++++++ test/e2e/framework/cloneset_util.go | 18 +- test/e2e/framework/framework.go | 5 + test/e2e/framework/statefulset_utils.go | 10 + ...f-none-fg.yaml => kind-conf-with-vpa.yaml} | 4 +- tools/hack/create-cluster.sh | 2 +- tools/hack/run-kruise-e2e-test.sh | 5 +- 34 files changed, 3387 insertions(+), 68 deletions(-) create mode 100644 pkg/util/inplaceupdate/inplace_update_vertical.go create mode 100644 pkg/util/inplaceupdate/inplace_update_vertical_test.go create mode 100644 test/e2e/apps/inplace_vpa.go rename test/{kind-conf-none-fg.yaml => kind-conf-with-vpa.yaml} (64%) diff --git a/.github/workflows/e2e-1.18.yaml b/.github/workflows/e2e-1.18.yaml index 9b4ae3b3bb..b691404f19 100644 --- a/.github/workflows/e2e-1.18.yaml +++ b/.github/workflows/e2e-1.18.yaml @@ -34,7 +34,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf.yaml version: ${{ env.KIND_VERSION }} - name: Install-CSI run: | diff --git a/.github/workflows/e2e-1.24.yaml b/.github/workflows/e2e-1.24.yaml index caad76931c..4a4448afd2 100644 --- a/.github/workflows/e2e-1.24.yaml +++ b/.github/workflows/e2e-1.24.yaml @@ -35,7 +35,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf.yaml version: ${{ env.KIND_VERSION }} - name: Install-CSI run: | diff --git a/.github/workflows/e2e-1.28.yaml b/.github/workflows/e2e-1.28.yaml index 1e9b820dbf..1bc8478df5 100644 --- a/.github/workflows/e2e-1.28.yaml +++ b/.github/workflows/e2e-1.28.yaml @@ -34,7 +34,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Install-CSI run: | @@ -117,7 +117,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -196,7 +196,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -288,7 +288,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -380,7 +380,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -472,7 +472,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -542,7 +542,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -592,6 +592,89 @@ jobs: done < <(kubectl get pods -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $1}') fi exit $retVal + clonesetAndInplace: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@v1.10.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf-with-vpa.yaml + version: ${{ env.KIND_VERSION }} + - name: Install-CSI + run: | + make install-csi + + - name: Build image + run: | + export IMAGE="openkruise/kruise-manager:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-manager:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + NODES=$(kubectl get node | wc -l) + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "$NODES" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-system -o yaml + kubectl get pod -n kruise-system --no-headers | grep daemon | awk '{print $1}' | xargs kubectl logs -n kruise-system + kubectl get pod -n kruise-system --no-headers | grep daemon | awk '{print $1}' | xargs kubectl logs -n kruise-system --previous=true + set -e + if [ "$PODS" -eq "$NODES" ]; then + echo "Wait for kruise-manager and kruise-daemon ready successfully" + else + echo "Timeout to wait for kruise-manager and kruise-daemon ready" + exit 1 + fi + - name: Run E2E Tests + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -p -timeout 120m -v --focus='\[apps\] (InplaceVPA)' test/e2e + retVal=$? + restartCount=$(kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $4}') + if [ "${restartCount}" -eq "0" ];then + echo "Kruise-manager has not restarted" + else + kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers + echo "Kruise-manager has restarted, abort!!!" + kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system + exit 1 + fi + if [ "$retVal" -ne 0 ];then + echo "test fail, dump kruise-manager logs" + while read pod; do + kubectl logs -n kruise-system $pod + done < <(kubectl get pods -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $1}') + echo "test fail, dump kruise-daemon logs" + while read pod; do + kubectl logs -n kruise-system $pod + done < <(kubectl get pods -n kruise-system -l control-plane=daemon --no-headers | awk '{print $1}') + fi + exit $retVal + other: runs-on: ubuntu-20.04 steps: @@ -607,7 +690,7 @@ jobs: with: node_image: ${{ env.KIND_IMAGE }} cluster_name: ${{ env.KIND_CLUSTER_NAME }} - config: ./test/kind-conf-none-fg.yaml + config: ./test/kind-conf-with-vpa.yaml version: ${{ env.KIND_VERSION }} - name: Build image run: | @@ -648,7 +731,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - ./bin/ginkgo -timeout 90m -v --skip='\[apps\] (AppStatefulSetStorage|StatefulSet|PullImage|PullImages|ContainerRecreateRequest|DaemonSet|SidecarSet|EphemeralJob)' --skip='\[policy\] PodUnavailableBudget' test/e2e + ./bin/ginkgo -timeout 90m -v --skip='\[apps\] (InplaceVPA|AppStatefulSetStorage|StatefulSet|PullImage|PullImages|ContainerRecreateRequest|DaemonSet|SidecarSet|EphemeralJob)' --skip='\[policy\] PodUnavailableBudget' test/e2e retVal=$? restartCount=$(kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $4}') if [ "${restartCount}" -eq "0" ];then diff --git a/Makefile b/Makefile index 8cb1babf6c..8b7a10be2e 100644 --- a/Makefile +++ b/Makefile @@ -152,9 +152,15 @@ endif create-cluster: $(tools/kind) tools/hack/create-cluster.sh +DISABLE_CSI ?= false + .PHONY: install-csi install-csi: +ifeq ($(DISABLE_CSI), true) + @echo "CSI is disabled, skip" +else cd tools/hack/csi-driver-host-path; ./install-snapshot.sh +endif # delete-cluster deletes a kube cluster. .PHONY: delete-cluster diff --git a/apis/apps/pub/inplace_update.go b/apis/apps/pub/inplace_update.go index 8274473e21..21481b3bc0 100644 --- a/apis/apps/pub/inplace_update.go +++ b/apis/apps/pub/inplace_update.go @@ -62,12 +62,21 @@ type InPlaceUpdateState struct { // UpdateEnvFromMetadata indicates there are envs from annotations/labels that should be in-place update. UpdateEnvFromMetadata bool `json:"updateEnvFromMetadata,omitempty"` + // UpdateResources indicates there are resources that should be in-place update. + UpdateResources bool `json:"updateResources,omitempty"` + + // UpdateImages indicates there are images that should be in-place update. + UpdateImages bool `json:"updateImages,omitempty"` + // NextContainerImages is the containers with lower priority that waiting for in-place update images in next batch. NextContainerImages map[string]string `json:"nextContainerImages,omitempty"` // NextContainerRefMetadata is the containers with lower priority that waiting for in-place update labels/annotations in next batch. NextContainerRefMetadata map[string]metav1.ObjectMeta `json:"nextContainerRefMetadata,omitempty"` + // NextContainerResources is the containers with lower priority that waiting for in-place update resources in next batch. + NextContainerResources map[string]v1.ResourceRequirements `json:"nextContainerResources,omitempty"` + // PreCheckBeforeNext is the pre-check that must pass before the next containers can be in-place update. PreCheckBeforeNext *InPlaceUpdatePreCheckBeforeNext `json:"preCheckBeforeNext,omitempty"` @@ -140,6 +149,10 @@ type RuntimeContainerHashes struct { // PlainHash is the hash that directly calculated from pod.spec.container[x]. // Usually it is calculated by Kubelet and will be in annotation of each runtime container. PlainHash uint64 `json:"plainHash"` + // PlainHashWithoutResources is the hash that directly calculated from pod.spec.container[x] + // over fields with Resources field zero'd out. + // Usually it is calculated by Kubelet and will be in annotation of each runtime container. + PlainHashWithoutResources uint64 `json:"plainHashWithoutResources"` // ExtractedEnvFromMetadataHash is the hash that calculated from pod.spec.container[x], // whose envs from annotations/labels have already been extracted to the real values. ExtractedEnvFromMetadataHash uint64 `json:"extractedEnvFromMetadataHash,omitempty"` diff --git a/apis/apps/pub/zz_generated.deepcopy.go b/apis/apps/pub/zz_generated.deepcopy.go index 375d3de4bd..1326ad437d 100644 --- a/apis/apps/pub/zz_generated.deepcopy.go +++ b/apis/apps/pub/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package pub import ( + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -105,6 +106,13 @@ func (in *InPlaceUpdateState) DeepCopyInto(out *InPlaceUpdateState) { (*out)[key] = *val.DeepCopy() } } + if in.NextContainerResources != nil { + in, out := &in.NextContainerResources, &out.NextContainerResources + *out = make(map[string]corev1.ResourceRequirements, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } if in.PreCheckBeforeNext != nil { in, out := &in.PreCheckBeforeNext, &out.PreCheckBeforeNext *out = new(InPlaceUpdatePreCheckBeforeNext) diff --git a/apis/apps/v1alpha1/cloneset_types.go b/apis/apps/v1alpha1/cloneset_types.go index 860f6ae9c9..a90f9e9f56 100644 --- a/apis/apps/v1alpha1/cloneset_types.go +++ b/apis/apps/v1alpha1/cloneset_types.go @@ -17,10 +17,11 @@ limitations under the License. package v1alpha1 import ( - appspub "github.com/openkruise/kruise/apis/apps/pub" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + + appspub "github.com/openkruise/kruise/apis/apps/pub" ) const ( @@ -178,6 +179,8 @@ type CloneSetStatus struct { // UpdatedAvailableReplicas is the number of Pods created by the CloneSet controller from the CloneSet version // indicated by updateRevision and have a Ready Condition for at least minReadySeconds. + // Notice: when enable InPlaceWorkloadVerticalScaling, pod during resource resizing will also be unavailable. + // This means these pod will be counted in maxUnavailable. UpdatedAvailableReplicas int32 `json:"updatedAvailableReplicas,omitempty"` // ExpectedUpdatedReplicas is the number of Pods that should be updated by CloneSet controller. @@ -237,6 +240,7 @@ type CloneSetCondition struct { // +kubebuilder:printcolumn:name="DESIRED",type="integer",JSONPath=".spec.replicas",description="The desired number of pods." // +kubebuilder:printcolumn:name="UPDATED",type="integer",JSONPath=".status.updatedReplicas",description="The number of pods updated." // +kubebuilder:printcolumn:name="UPDATED_READY",type="integer",JSONPath=".status.updatedReadyReplicas",description="The number of pods updated and ready." +// +kubebuilder:printcolumn:name="UPDATED_AVAILABLE",type="integer",JSONPath=".status.updatedAvailableReplicas",description="The number of pods updated and available." // +kubebuilder:printcolumn:name="READY",type="integer",JSONPath=".status.readyReplicas",description="The number of pods ready." // +kubebuilder:printcolumn:name="TOTAL",type="integer",JSONPath=".status.replicas",description="The number of currently all pods." // +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp",description="CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC." diff --git a/config/crd/bases/apps.kruise.io_clonesets.yaml b/config/crd/bases/apps.kruise.io_clonesets.yaml index 0be49002ff..8b30e483e0 100644 --- a/config/crd/bases/apps.kruise.io_clonesets.yaml +++ b/config/crd/bases/apps.kruise.io_clonesets.yaml @@ -29,6 +29,10 @@ spec: jsonPath: .status.updatedReadyReplicas name: UPDATED_READY type: integer + - description: The number of pods updated and available. + jsonPath: .status.updatedAvailableReplicas + name: UPDATED_AVAILABLE + type: integer - description: The number of pods ready. jsonPath: .status.readyReplicas name: READY @@ -512,6 +516,8 @@ spec: description: |- UpdatedAvailableReplicas is the number of Pods created by the CloneSet controller from the CloneSet version indicated by updateRevision and have a Ready Condition for at least minReadySeconds. + Notice: when enable InPlaceWorkloadVerticalScaling, pod during resource resizing will also be unavailable. + This means these pod will be counted in maxUnavailable. format: int32 type: integer updatedReadyReplicas: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 6aacbf55f8..4544cf9f8b 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -60,8 +60,8 @@ spec: port: 8000 resources: limits: - cpu: 100m - memory: 200Mi + cpu: 2 + memory: 2Gi requests: cpu: 100m memory: 200Mi diff --git a/pkg/controller/cloneset/cloneset_event_handler.go b/pkg/controller/cloneset/cloneset_event_handler.go index 1c2ad58fad..d618f13d2b 100644 --- a/pkg/controller/cloneset/cloneset_event_handler.go +++ b/pkg/controller/cloneset/cloneset_event_handler.go @@ -138,6 +138,13 @@ func (e *podEventHandler) Update(ctx context.Context, evt event.UpdateEvent, q w // If it has a ControllerRef, that's all that matters. if curControllerRef != nil { + // TODO(Abner-1): delete it when fixes only resize resource + //old, _ := json.Marshal(oldPod) + //cur, _ := json.Marshal(curPod) + //patches, _ := jsonpatch.CreatePatch(old, cur) + //pjson, _ := json.Marshal(patches) + //klog.V(4).InfoS("Pod updated json", "pod", klog.KObj(curPod), "patch", pjson) + req := resolveControllerRef(curPod.Namespace, curControllerRef) if req == nil { return diff --git a/pkg/controller/cloneset/core/cloneset_core.go b/pkg/controller/cloneset/core/cloneset_core.go index 5c26b22861..0d42b85c8a 100644 --- a/pkg/controller/cloneset/core/cloneset_core.go +++ b/pkg/controller/cloneset/core/cloneset_core.go @@ -32,6 +32,8 @@ import ( appspub "github.com/openkruise/kruise/apis/apps/pub" appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" clonesetutils "github.com/openkruise/kruise/pkg/controller/cloneset/utils" + "github.com/openkruise/kruise/pkg/features" + utilfeature "github.com/openkruise/kruise/pkg/util/feature" "github.com/openkruise/kruise/pkg/util/inplaceupdate" ) @@ -201,14 +203,25 @@ func (c *commonControl) IgnorePodUpdateEvent(oldPod, curPod *v1.Pod) bool { } return false } + isPodInplaceUpdating := func(pod *v1.Pod) bool { + if len(pod.Labels) > 0 && appspub.LifecycleStateType(pod.Labels[appspub.LifecycleStateKey]) != appspub.LifecycleStateNormal { + return true + } + return false + } - if containsReadinessGate(curPod) { + if containsReadinessGate(curPod) || isPodInplaceUpdating(curPod) { opts := c.GetUpdateOptions() opts = inplaceupdate.SetOptionsDefaults(opts) if err := containersUpdateCompleted(curPod, opts.CheckContainersUpdateCompleted); err == nil { if cond := inplaceupdate.GetCondition(curPod); cond == nil || cond.Status != v1.ConditionTrue { return false } + // if InPlaceWorkloadVerticalScaling is enabled, we should not ignore the update event of updating pod + // for handling only in-place resource resize + if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) { + return false + } } } diff --git a/pkg/controller/cloneset/core/cloneset_core_test.go b/pkg/controller/cloneset/core/cloneset_core_test.go index 75da20c915..b15858ff17 100644 --- a/pkg/controller/cloneset/core/cloneset_core_test.go +++ b/pkg/controller/cloneset/core/cloneset_core_test.go @@ -4,7 +4,13 @@ import ( "reflect" "testing" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + appspub "github.com/openkruise/kruise/apis/apps/pub" appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" + "github.com/openkruise/kruise/pkg/features" + utilfeature "github.com/openkruise/kruise/pkg/util/feature" "github.com/openkruise/kruise/pkg/util/inplaceupdate" ) @@ -72,3 +78,103 @@ func Test_CommonControl_GetUpdateOptions(t *testing.T) { }) } } + +func TestIgnorePodUpdateEvent(t *testing.T) { + c := commonControl{CloneSet: &appsv1alpha1.CloneSet{}} + + tests := []struct { + name string + option func() + oldPod *v1.Pod + curPod *v1.Pod + expected bool + }{ + { + name: "updating pod without InPlaceWorkloadVerticalScaling", + option: func() { + utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, false) + }, + oldPod: &v1.Pod{}, + curPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: "{}", + }, + Labels: map[string]string{ + appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), + }, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: appspub.InPlaceUpdateReady, + Status: v1.ConditionTrue, + }, + }, + }, + }, + expected: true, + }, + { + name: "updating pod-condition false without InPlaceWorkloadVerticalScaling", + option: func() { + utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, false) + }, + oldPod: &v1.Pod{}, + curPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: "{}", + }, + Labels: map[string]string{ + appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), + }, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: appspub.InPlaceUpdateReady, + Status: v1.ConditionFalse, + }, + }, + }, + }, + expected: false, + }, + { + name: "updating pod with InPlaceWorkloadVerticalScaling", + option: func() { + utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, true) + }, + oldPod: &v1.Pod{}, + curPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: "{}", + }, + Labels: map[string]string{ + appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), + }, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: appspub.InPlaceUpdateReady, + Status: v1.ConditionTrue, + }, + }, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.option() + if got := c.IgnorePodUpdateEvent(tt.oldPod, tt.curPod); got != tt.expected { + t.Errorf("IgnorePodUpdateEvent() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/pkg/controller/cloneset/sync/cloneset_sync_utils.go b/pkg/controller/cloneset/sync/cloneset_sync_utils.go index b26aebb5ce..ee7014735e 100644 --- a/pkg/controller/cloneset/sync/cloneset_sync_utils.go +++ b/pkg/controller/cloneset/sync/cloneset_sync_utils.go @@ -17,6 +17,7 @@ limitations under the License. package sync import ( + "encoding/json" "flag" "math" "reflect" @@ -87,6 +88,12 @@ func (e expectationDiffs) isEmpty() bool { return reflect.DeepEqual(e, expectationDiffs{}) } +// String implement this to print information in klog +func (e expectationDiffs) String() string { + b, _ := json.Marshal(e) + return string(b) +} + type IsPodUpdateFunc func(pod *v1.Pod, updateRevision string) bool // This is the most important algorithm in cloneset-controller. diff --git a/pkg/controller/cloneset/sync/cloneset_update_test.go b/pkg/controller/cloneset/sync/cloneset_update_test.go index 13dd7d2e2e..e60f102a9f 100644 --- a/pkg/controller/cloneset/sync/cloneset_update_test.go +++ b/pkg/controller/cloneset/sync/cloneset_update_test.go @@ -306,6 +306,7 @@ func TestUpdate(t *testing.T) { Annotations: map[string]string{appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ Revision: "rev_new", UpdateTimestamp: now, + UpdateImages: true, LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{"c1": {ImageID: "image-id-xyz"}}, ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: now, Containers: []string{"c1"}}}, })}, @@ -386,6 +387,7 @@ func TestUpdate(t *testing.T) { appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ Revision: "rev_new", UpdateTimestamp: now, + UpdateImages: true, }), appspub.InPlaceUpdateGraceKey: `{"revision":"rev_new","containerImages":{"c1":"foo2"},"graceSeconds":3630}`, }, @@ -782,6 +784,7 @@ func TestUpdate(t *testing.T) { Annotations: map[string]string{ appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ Revision: "rev_new", + UpdateImages: true, UpdateTimestamp: metav1.NewTime(now.Time), LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{"c1": {ImageID: "image-id-xyz"}}, ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: now, Containers: []string{"c1"}}}, diff --git a/pkg/daemon/containermeta/container_meta_controller.go b/pkg/daemon/containermeta/container_meta_controller.go index 7ba912166d..dbaf5e748e 100644 --- a/pkg/daemon/containermeta/container_meta_controller.go +++ b/pkg/daemon/containermeta/container_meta_controller.go @@ -344,7 +344,10 @@ func (c *Controller) manageContainerMetaSet(pod *v1.Pod, kubePodStatus *kubeletc Name: status.Name, ContainerID: status.ID.String(), RestartCount: int32(status.RestartCount), - Hashes: appspub.RuntimeContainerHashes{PlainHash: status.Hash}, + Hashes: appspub.RuntimeContainerHashes{ + PlainHash: status.Hash, + PlainHashWithoutResources: status.HashWithoutResources, + }, } } if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceUpdateEnvFromMetadata) { diff --git a/pkg/daemon/kuberuntime/kuberuntime_container.go b/pkg/daemon/kuberuntime/kuberuntime_container.go index 7e15b1772a..45a4082aff 100644 --- a/pkg/daemon/kuberuntime/kuberuntime_container.go +++ b/pkg/daemon/kuberuntime/kuberuntime_container.go @@ -92,13 +92,14 @@ func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName strin Type: runtimeName, ID: status.Id, }, - Name: labeledInfo.ContainerName, - Image: status.Image.Image, - ImageID: status.ImageRef, - Hash: annotatedInfo.Hash, - RestartCount: annotatedInfo.RestartCount, - State: toKubeContainerState(status.State), - CreatedAt: time.Unix(0, status.CreatedAt), + Name: labeledInfo.ContainerName, + Image: status.Image.Image, + ImageID: status.ImageRef, + Hash: annotatedInfo.Hash, + HashWithoutResources: annotatedInfo.HashWithoutResources, + RestartCount: annotatedInfo.RestartCount, + State: toKubeContainerState(status.State), + CreatedAt: time.Unix(0, status.CreatedAt), } if status.State != runtimeapi.ContainerState_CONTAINER_CREATED { diff --git a/pkg/daemon/kuberuntime/labels.go b/pkg/daemon/kuberuntime/labels.go index 732af841e8..aec5f165a7 100644 --- a/pkg/daemon/kuberuntime/labels.go +++ b/pkg/daemon/kuberuntime/labels.go @@ -33,6 +33,7 @@ const ( podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" containerHashLabel = "io.kubernetes.container.hash" + containerHashWithoutResourcesLabel = "io.kubernetes.container.hashWithoutResources" containerRestartCountLabel = "io.kubernetes.container.restartCount" containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath" containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy" @@ -49,6 +50,7 @@ type labeledContainerInfo struct { type annotatedContainerInfo struct { Hash uint64 + HashWithoutResources uint64 RestartCount int PodDeletionGracePeriod *int64 PodTerminationGracePeriod *int64 @@ -79,6 +81,9 @@ func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedCo if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil { klog.ErrorS(err, "Unable to get label from annotations", "label", containerHashLabel, "annotations", annotations) } + if containerInfo.HashWithoutResources, err = getUint64ValueFromLabel(annotations, containerHashWithoutResourcesLabel); err != nil { + klog.ErrorS(err, "Unable to get label from annotations", "label", containerHashWithoutResourcesLabel, "annotations", annotations) + } if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil { klog.ErrorS(err, "Unable to get label from annotations", "label", containerRestartCountLabel, "annotations", annotations) } diff --git a/pkg/features/kruise_features.go b/pkg/features/kruise_features.go index fb0de51b9c..bfa086fb05 100644 --- a/pkg/features/kruise_features.go +++ b/pkg/features/kruise_features.go @@ -134,6 +134,10 @@ const ( // ForceDeleteTimeoutExpectationFeatureGate enable delete timeout expectation, for example: cloneSet ScaleExpectation ForceDeleteTimeoutExpectationFeatureGate = "ForceDeleteTimeoutExpectationGate" + + // InPlaceWorkloadVerticalScaling enable CloneSet/Advanced StatefulSet controller to support vertical scaling + // of managed Pods. + InPlaceWorkloadVerticalScaling featuregate.Feature = "InPlaceWorkloadVerticalScaling" ) var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ @@ -170,6 +174,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ EnableExternalCerts: {Default: false, PreRelease: featuregate.Alpha}, StatefulSetAutoResizePVCGate: {Default: false, PreRelease: featuregate.Alpha}, ForceDeleteTimeoutExpectationFeatureGate: {Default: false, PreRelease: featuregate.Alpha}, + InPlaceWorkloadVerticalScaling: {Default: false, PreRelease: featuregate.Alpha}, } func init() { diff --git a/pkg/util/inplaceupdate/inplace_update.go b/pkg/util/inplaceupdate/inplace_update.go index bf6653fef8..0af5aa1bdb 100644 --- a/pkg/util/inplaceupdate/inplace_update.go +++ b/pkg/util/inplaceupdate/inplace_update.go @@ -40,8 +40,9 @@ import ( ) var ( - containerImagePatchRexp = regexp.MustCompile("^/spec/containers/([0-9]+)/image$") - rfc6901Decoder = strings.NewReplacer("~1", "/", "~0", "~") + containerImagePatchRexp = regexp.MustCompile("^/spec/containers/([0-9]+)/image$") + containerResourcesPatchRexp = regexp.MustCompile("^/spec/containers/([0-9]+)/resources/.*$") + rfc6901Decoder = strings.NewReplacer("~1", "/", "~0", "~") Clock clock.Clock = clock.RealClock{} ) @@ -68,6 +69,7 @@ type UpdateOptions struct { PatchSpecToPod func(pod *v1.Pod, spec *UpdateSpec, state *appspub.InPlaceUpdateState) (*v1.Pod, error) CheckPodUpdateCompleted func(pod *v1.Pod) error CheckContainersUpdateCompleted func(pod *v1.Pod, state *appspub.InPlaceUpdateState) error + CheckPodNeedsBeUnready func(pod *v1.Pod, spec *UpdateSpec) bool GetRevision func(rev *apps.ControllerRevision) string } @@ -82,16 +84,21 @@ type Interface interface { type UpdateSpec struct { Revision string `json:"revision"` - ContainerImages map[string]string `json:"containerImages,omitempty"` - ContainerRefMetadata map[string]metav1.ObjectMeta `json:"containerRefMetadata,omitempty"` - MetaDataPatch []byte `json:"metaDataPatch,omitempty"` - UpdateEnvFromMetadata bool `json:"updateEnvFromMetadata,omitempty"` - GraceSeconds int32 `json:"graceSeconds,omitempty"` + ContainerImages map[string]string `json:"containerImages,omitempty"` + ContainerRefMetadata map[string]metav1.ObjectMeta `json:"containerRefMetadata,omitempty"` + ContainerResources map[string]v1.ResourceRequirements `json:"containerResources,omitempty"` + MetaDataPatch []byte `json:"metaDataPatch,omitempty"` + UpdateEnvFromMetadata bool `json:"updateEnvFromMetadata,omitempty"` + GraceSeconds int32 `json:"graceSeconds,omitempty"` OldTemplate *v1.PodTemplateSpec `json:"oldTemplate,omitempty"` NewTemplate *v1.PodTemplateSpec `json:"newTemplate,omitempty"` } +func (u *UpdateSpec) VerticalUpdateOnly() bool { + return len(u.ContainerResources) > 0 && len(u.ContainerImages) == 0 && !u.UpdateEnvFromMetadata +} + type realControl struct { podAdapter podadapter.Adapter revisionAdapter revisionadapter.Interface @@ -134,7 +141,7 @@ func (c *realControl) Refresh(pod *v1.Pod, opts *UpdateOptions) RefreshResult { } // check if there are containers with lower-priority that have to in-place update in next batch - if len(state.NextContainerImages) > 0 || len(state.NextContainerRefMetadata) > 0 { + if len(state.NextContainerImages) > 0 || len(state.NextContainerRefMetadata) > 0 || len(state.NextContainerResources) > 0 { // pre-check the previous updated containers if checkErr := doPreCheckBeforeNext(pod, state.PreCheckBeforeNext); checkErr != nil { @@ -257,6 +264,7 @@ func (c *realControl) updateNextBatch(pod *v1.Pod, opts *UpdateOptions) (bool, e ContainerImages: state.NextContainerImages, ContainerRefMetadata: state.NextContainerRefMetadata, UpdateEnvFromMetadata: state.UpdateEnvFromMetadata, + ContainerResources: state.NextContainerResources, } if clone, err = opts.PatchSpecToPod(clone, &spec, &state); err != nil { return err @@ -286,7 +294,8 @@ func (c *realControl) Update(pod *v1.Pod, oldRevision, newRevision *apps.Control // TODO(FillZpp): maybe we should check if the previous in-place update has completed // 2. update condition for pod with readiness-gate - if containsReadinessGate(pod) { + // When only workload resources are updated, they are marked as not needing to remove traffic + if opts.CheckPodNeedsBeUnready(pod, spec) { newCondition := v1.PodCondition{ Type: appspub.InPlaceUpdateReady, LastTransitionTime: metav1.NewTime(Clock.Now()), @@ -332,6 +341,8 @@ func (c *realControl) updatePodInPlace(pod *v1.Pod, spec *UpdateSpec, opts *Upda Revision: spec.Revision, UpdateTimestamp: metav1.NewTime(Clock.Now()), UpdateEnvFromMetadata: spec.UpdateEnvFromMetadata, + UpdateImages: len(spec.ContainerImages) > 0, + UpdateResources: len(spec.ContainerResources) > 0, } inPlaceUpdateStateJSON, _ := json.Marshal(inPlaceUpdateState) clone.Annotations[appspub.InPlaceUpdateStateKey] = string(inPlaceUpdateStateJSON) diff --git a/pkg/util/inplaceupdate/inplace_update_defaults.go b/pkg/util/inplaceupdate/inplace_update_defaults.go index e60b188c54..f2d6d1a319 100644 --- a/pkg/util/inplaceupdate/inplace_update_defaults.go +++ b/pkg/util/inplaceupdate/inplace_update_defaults.go @@ -62,16 +62,20 @@ func SetOptionsDefaults(opts *UpdateOptions) *UpdateOptions { opts.CheckContainersUpdateCompleted = defaultCheckContainersInPlaceUpdateCompleted } + if opts.CheckPodNeedsBeUnready == nil { + opts.CheckPodNeedsBeUnready = defaultCheckPodNeedsBeUnready + } + return opts } // defaultPatchUpdateSpecToPod returns new pod that merges spec into old pod func defaultPatchUpdateSpecToPod(pod *v1.Pod, spec *UpdateSpec, state *appspub.InPlaceUpdateState) (*v1.Pod, error) { - klog.V(5).InfoS("Begin to in-place update pod", "namespace", pod.Namespace, "name", pod.Name, "spec", util.DumpJSON(spec), "state", util.DumpJSON(state)) state.NextContainerImages = make(map[string]string) state.NextContainerRefMetadata = make(map[string]metav1.ObjectMeta) + state.NextContainerResources = make(map[string]v1.ResourceRequirements) if spec.MetaDataPatch != nil { cloneBytes, _ := json.Marshal(pod) @@ -100,7 +104,8 @@ func defaultPatchUpdateSpecToPod(pod *v1.Pod, spec *UpdateSpec, state *appspub.I c := &pod.Spec.Containers[i] _, existImage := spec.ContainerImages[c.Name] _, existMetadata := spec.ContainerRefMetadata[c.Name] - if !existImage && !existMetadata { + _, existResource := spec.ContainerResources[c.Name] + if !existImage && !existMetadata && !existResource { continue } priority := utilcontainerlaunchpriority.GetContainerPriority(c) @@ -140,8 +145,35 @@ func defaultPatchUpdateSpecToPod(pod *v1.Pod, spec *UpdateSpec, state *appspub.I if state.LastContainerStatuses == nil { state.LastContainerStatuses = map[string]appspub.InPlaceUpdateContainerStatus{} } - state.LastContainerStatuses[c.Name] = appspub.InPlaceUpdateContainerStatus{ImageID: c.ImageID} + if cs, ok := state.LastContainerStatuses[c.Name]; !ok { + state.LastContainerStatuses[c.Name] = appspub.InPlaceUpdateContainerStatus{ImageID: c.ImageID} + } else { + // now just update imageID + cs.ImageID = c.ImageID + } + } + } + + // update resources + if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) { + expectedResources := map[string]*v1.ResourceRequirements{} + for i := range pod.Spec.Containers { + c := &pod.Spec.Containers[i] + newResource, resourceExists := spec.ContainerResources[c.Name] + if !resourceExists { + continue + } + + if containersToUpdate.Has(c.Name) { + expectedResources[c.Name] = &newResource + } else { + state.NextContainerResources[c.Name] = newResource + } } + + // vertical update containers in a batch, + // or internal enterprise implementations can update+sync pod resources here at once + verticalUpdateImpl.UpdateResource(pod, expectedResources) } // update annotations and labels for the containers to update @@ -161,7 +193,7 @@ func defaultPatchUpdateSpecToPod(pod *v1.Pod, spec *UpdateSpec, state *appspub.I // add the containers that update this time into PreCheckBeforeNext, so that next containers can only // start to update when these containers have updated ready // TODO: currently we only support ContainersRequiredReady, not sure if we have to add ContainersPreferredReady in future - if len(state.NextContainerImages) > 0 || len(state.NextContainerRefMetadata) > 0 { + if len(state.NextContainerImages) > 0 || len(state.NextContainerRefMetadata) > 0 || len(state.NextContainerResources) > 0 { state.PreCheckBeforeNext = &appspub.InPlaceUpdatePreCheckBeforeNext{ContainersRequiredReady: containersToUpdate.List()} } else { state.PreCheckBeforeNext = nil @@ -259,6 +291,7 @@ func defaultCalculateInPlaceUpdateSpec(oldRevision, newRevision *apps.Controller updateSpec := &UpdateSpec{ Revision: newRevision.Name, ContainerImages: make(map[string]string), + ContainerResources: make(map[string]v1.ResourceRequirements), ContainerRefMetadata: make(map[string]metav1.ObjectMeta), GraceSeconds: opts.GracePeriodSeconds, } @@ -278,16 +311,33 @@ func defaultCalculateInPlaceUpdateSpec(oldRevision, newRevision *apps.Controller } return nil } - if op.Operation != "replace" || !containerImagePatchRexp.MatchString(op.Path) { + + if op.Operation != "replace" { return nil } - // for example: /spec/containers/0/image - words := strings.Split(op.Path, "/") - idx, _ := strconv.Atoi(words[3]) - if len(oldTemp.Spec.Containers) <= idx { - return nil + if containerImagePatchRexp.MatchString(op.Path) { + // for example: /spec/containers/0/image + words := strings.Split(op.Path, "/") + idx, _ := strconv.Atoi(words[3]) + if len(oldTemp.Spec.Containers) <= idx { + return nil + } + updateSpec.ContainerImages[oldTemp.Spec.Containers[idx].Name] = op.Value.(string) + continue + } + + // TODO(Abner-1): if pod qos changed, we should recreate the pod. + // I will resolve it in another PR + if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) && + containerResourcesPatchRexp.MatchString(op.Path) { + err = verticalUpdateImpl.UpdateInplaceUpdateMetadata(&op, oldTemp, updateSpec) + if err != nil { + klog.InfoS("UpdateInplaceUpdateMetadata error", "err", err) + return nil + } + continue } - updateSpec.ContainerImages[oldTemp.Spec.Containers[idx].Name] = op.Value.(string) + return nil } if len(metadataPatches) > 0 { @@ -344,6 +394,7 @@ func defaultCalculateInPlaceUpdateSpec(oldRevision, newRevision *apps.Controller } updateSpec.MetaDataPatch = patchBytes } + return updateSpec } @@ -361,10 +412,9 @@ func DefaultCheckInPlaceUpdateCompleted(pod *v1.Pod) error { } else if err := json.Unmarshal([]byte(stateStr), &inPlaceUpdateState); err != nil { return err } - if len(inPlaceUpdateState.NextContainerImages) > 0 || len(inPlaceUpdateState.NextContainerRefMetadata) > 0 { + if len(inPlaceUpdateState.NextContainerImages) > 0 || len(inPlaceUpdateState.NextContainerRefMetadata) > 0 || len(inPlaceUpdateState.NextContainerResources) > 0 { return fmt.Errorf("existing containers to in-place update in next batches") } - return defaultCheckContainersInPlaceUpdateCompleted(pod, &inPlaceUpdateState) } @@ -383,8 +433,20 @@ func defaultCheckContainersInPlaceUpdateCompleted(pod *v1.Pod, inPlaceUpdateStat } } + // only UpdateResources, we check resources in status updated + if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) && inPlaceUpdateState.UpdateResources { + if completed, err := verticalUpdateImpl.IsUpdateCompleted(pod); !completed { + return err + } + } + if runtimeContainerMetaSet != nil { - if checkAllContainersHashConsistent(pod, runtimeContainerMetaSet, plainHash) { + metaHashType := plainHash + if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) && inPlaceUpdateState.UpdateResources { + // if vertical scaling is enabled and update resources, we should compare plainHashWithoutResources + metaHashType = plainHashWithoutResources + } + if checkAllContainersHashConsistent(pod, runtimeContainerMetaSet, metaHashType) { klog.V(5).InfoS("Check Pod in-place update completed for all container hash consistent", "namespace", pod.Namespace, "name", pod.Name) return nil } @@ -424,6 +486,7 @@ type hashType string const ( plainHash hashType = "PlainHash" + plainHashWithoutResources hashType = "PlainHashWithoutResources" extractedEnvFromMetadataHash hashType = "ExtractedEnvFromMetadataHash" ) @@ -474,6 +537,15 @@ func checkAllContainersHashConsistent(pod *v1.Pod, runtimeContainerMetaSet *apps "metaHash", containerMeta.Hashes.PlainHash, "expectedHash", expectedHash) return false } + case plainHashWithoutResources: + containerSpecCopy := containerSpec.DeepCopy() + containerSpecCopy.Resources = v1.ResourceRequirements{} + if expectedHash := kubeletcontainer.HashContainer(containerSpecCopy); containerMeta.Hashes.PlainHashWithoutResources != expectedHash { + klog.InfoS("Find container in runtime-container-meta for Pod has different plain hash with spec(except resources)", + "containerName", containerSpecCopy.Name, "namespace", pod.Namespace, "podName", pod.Name, + "metaHash", containerMeta.Hashes.PlainHashWithoutResources, "expectedHash", expectedHash) + return false + } case extractedEnvFromMetadataHash: hasher := utilcontainermeta.NewEnvFromMetadataHasher() if expectedHash := hasher.GetExpectHash(containerSpec, pod); containerMeta.Hashes.ExtractedEnvFromMetadataHash != expectedHash { @@ -487,3 +559,57 @@ func checkAllContainersHashConsistent(pod *v1.Pod, runtimeContainerMetaSet *apps return true } + +const ( + cpuMask = 1 + memMask = 2 +) + +func defaultCheckPodNeedsBeUnready(pod *v1.Pod, spec *UpdateSpec) bool { + if !utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) || !spec.VerticalUpdateOnly() { + return containsReadinessGate(pod) + } + + // flag represents whether cpu or memory resource changed + resourceFlag := make(map[string]int) + for c, resizeResources := range spec.ContainerResources { + flag := 0 + _, limitExist := resizeResources.Limits[v1.ResourceCPU] + _, reqExist := resizeResources.Requests[v1.ResourceCPU] + if limitExist || reqExist { + flag |= cpuMask + } + _, limitExist = resizeResources.Limits[v1.ResourceMemory] + _, reqExist = resizeResources.Requests[v1.ResourceMemory] + if limitExist || reqExist { + flag |= memMask + } + resourceFlag[c] = flag + } + + // only changed resources and restart policy are considered + // For example: + // we should not restart the container + // when only resize cpu in container with memory RestartContainer RestartPolicy, + needRestart := false +OuterLoop: + for _, container := range pod.Spec.Containers { + if flag, exist := resourceFlag[container.Name]; exist { + for _, resizePolicy := range container.ResizePolicy { + if resizePolicy.RestartPolicy != v1.RestartContainer { + continue + } + if (resizePolicy.ResourceName == v1.ResourceCPU && (flag&cpuMask) != 0) || + (resizePolicy.ResourceName == v1.ResourceMemory && (flag&memMask) != 0) { + needRestart = true + break OuterLoop + } + } + } + } + if !needRestart { + return false + } + + return containsReadinessGate(pod) +} diff --git a/pkg/util/inplaceupdate/inplace_update_defaults_test.go b/pkg/util/inplaceupdate/inplace_update_defaults_test.go index dc1b12f84d..bbe36b8b23 100644 --- a/pkg/util/inplaceupdate/inplace_update_defaults_test.go +++ b/pkg/util/inplaceupdate/inplace_update_defaults_test.go @@ -20,9 +20,13 @@ import ( "encoding/json" "fmt" "reflect" + "strings" "testing" "time" + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" + appspub "github.com/openkruise/kruise/apis/apps/pub" "github.com/openkruise/kruise/pkg/features" "github.com/openkruise/kruise/pkg/util" @@ -122,14 +126,6 @@ func TestDefaultPatchUpdateSpecToPod(t *testing.T) { ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, }, expectedPatch: map[string]interface{}{ - //"metadata": map[string]interface{}{ - // "annotations": map[string]interface{}{ - // appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ - // LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{"c1": {ImageID: "containerd://c1-img"}}, - // ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, - // }), - // }, - //}, "spec": map[string]interface{}{ "containers": []map[string]interface{}{ { @@ -920,3 +916,1077 @@ func Test_defaultCalculateInPlaceUpdateSpec_VCTHash(t *testing.T) { testWhenEnable(false) } + +func getFakeControllerRevisionData() string { + oldData := `{ + "spec": { + "template": { + "$patch": "replace", + "metadata": { + "creationTimestamp": null, + "labels": { + "app": "nginx" + } + }, + "spec": { + "containers": [ + { + "env": [ + { + "name": "version", + "value": "v1" + } + ], + "image": "nginx:stable-alpine22", + "imagePullPolicy": "Always", + "name": "nginx", + "resources": { + "limits": { + "cpu": "2", + "memory": "4Gi", + "sigma/eni": "2" + }, + "requests": { + "cpu": "1", + "memory": "2Gi", + "sigma/eni": "2" + } + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "volumeMounts": [ + { + "mountPath": "/usr/share/nginx/html", + "name": "www-data" + } + ] + }, + { + "env": [ + { + "name": "version", + "value": "v1" + } + ], + "image": "nginx:stable-alpine22", + "imagePullPolicy": "Always", + "name": "nginx2", + "resources": { + "limits": { + "cpu": "2", + "memory": "4Gi", + "sigma/eni": "2" + }, + "requests": { + "cpu": "1", + "memory": "2Gi", + "sigma/eni": "2" + } + }, + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "volumeMounts": [ + { + "mountPath": "/usr/share/nginx/html", + "name": "www-data" + } + ] + } + ], + "dnsPolicy": "ClusterFirst", + "restartPolicy": "Always", + "schedulerName": "default-scheduler", + "securityContext": {}, + "terminationGracePeriodSeconds": 30 + } + } + } +}` + return oldData +} + +func TestDefaultCalculateInPlaceUpdateSpec(t *testing.T) { + baseRevision := &apps.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "old-revision", + Annotations: map[string]string{}, + }, + Data: runtime.RawExtension{ + Raw: []byte(getFakeControllerRevisionData()), + }, + } + revisionGetter := func(imageChanged, resourceChanged, otherChanged bool, updateContainerNum int) *apps.ControllerRevision { + base := getFakeControllerRevisionData() + if imageChanged { + base = strings.Replace(base, `"image": "nginx:stable-alpine22"`, `"image": "nginx:stable-alpine23"`, updateContainerNum) + } + if resourceChanged { + base = strings.Replace(base, `"cpu": "1",`, `"cpu": "2",`, updateContainerNum) + } + if otherChanged { + base = strings.Replace(base, `"imagePullPolicy": "Always",`, `"imagePullPolicy": "222",`, updateContainerNum) + } + return &apps.ControllerRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "new-revision", + Annotations: map[string]string{}, + }, + Data: runtime.RawExtension{ + Raw: []byte(base), + }, + } + } + // Define your test cases + tests := []struct { + name string + oldRevision *apps.ControllerRevision + newRevision *apps.ControllerRevision + opts *UpdateOptions + expectedResult *UpdateSpec + vpaEnabled bool + }{ + { + vpaEnabled: true, + name: "only change resource", + oldRevision: baseRevision, + newRevision: revisionGetter(false, true, false, 1), + opts: &UpdateOptions{}, + expectedResult: &UpdateSpec{ + Revision: "new-revision", + ContainerResources: map[string]v1.ResourceRequirements{ + "nginx": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + }, + }, + }, + { + vpaEnabled: true, + name: "change image and resource", + oldRevision: baseRevision, + newRevision: revisionGetter(true, true, false, 1), + opts: &UpdateOptions{}, + expectedResult: &UpdateSpec{ + Revision: "new-revision", + ContainerImages: map[string]string{ + "nginx": "nginx:stable-alpine23", + }, + ContainerResources: map[string]v1.ResourceRequirements{ + "nginx": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + }, + }, + }, + { + vpaEnabled: true, + name: "change other and resource", + oldRevision: baseRevision, + newRevision: revisionGetter(false, true, true, 1), + opts: &UpdateOptions{}, + expectedResult: nil, + }, + { + vpaEnabled: true, + name: "change all", + oldRevision: baseRevision, + newRevision: revisionGetter(true, true, true, 1), + opts: &UpdateOptions{}, + expectedResult: nil, + }, + // Add more test cases as needed + { + vpaEnabled: true, + name: "only change resource of two containers", + oldRevision: baseRevision, + newRevision: revisionGetter(false, true, false, 2), + opts: &UpdateOptions{}, + expectedResult: &UpdateSpec{ + Revision: "new-revision", + ContainerResources: map[string]v1.ResourceRequirements{ + "nginx": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + "nginx2": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + }, + }, + }, + { + vpaEnabled: true, + name: "change image and resource of two containers", + oldRevision: baseRevision, + newRevision: revisionGetter(true, true, false, 2), + opts: &UpdateOptions{}, + expectedResult: &UpdateSpec{ + Revision: "new-revision", + ContainerImages: map[string]string{ + "nginx": "nginx:stable-alpine23", + "nginx2": "nginx:stable-alpine23", + }, + ContainerResources: map[string]v1.ResourceRequirements{ + "nginx": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + "nginx2": { + Requests: v1.ResourceList{ + "cpu": resource.MustParse("2"), + }, + }, + }, + }, + }, + { + vpaEnabled: true, + name: "change other and resource of two containers", + oldRevision: baseRevision, + newRevision: revisionGetter(false, true, true, 2), + opts: &UpdateOptions{}, + expectedResult: nil, + }, + { + vpaEnabled: true, + name: "change all of two containers", + oldRevision: baseRevision, + newRevision: revisionGetter(true, true, true, 2), + opts: &UpdateOptions{}, + expectedResult: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defer utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, tt.vpaEnabled)() + result := defaultCalculateInPlaceUpdateSpec(tt.oldRevision, tt.newRevision, tt.opts) + + if !apiequality.Semantic.DeepEqual(tt.expectedResult, result) { + t.Fatalf("expected updateSpec \n%v\n but got \n%v", util.DumpJSON(tt.expectedResult), util.DumpJSON(result)) + } + }) + } +} + +func getTestPodWithResource() *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"label-k1": "foo", "label-k2": "foo"}, + Annotations: map[string]string{"annotation-k1": "foo"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Image: "c1-img", + Env: []v1.EnvVar{ + {Name: appspub.ContainerLaunchBarrierEnvName, ValueFrom: &v1.EnvVarSource{ConfigMapKeyRef: &v1.ConfigMapKeySelector{Key: "p_20"}}}, + {Name: "config", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.labels['label-k1']"}}}, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("1Gi"), + v1.ResourceCPU: resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + ResizePolicy: []v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, + RestartPolicy: v1.NotRequired, + }, + }, + }, + { + Name: "c2", + Image: "c2-img", + Env: []v1.EnvVar{ + {Name: appspub.ContainerLaunchBarrierEnvName, ValueFrom: &v1.EnvVarSource{ConfigMapKeyRef: &v1.ConfigMapKeySelector{Key: "p_10"}}}, + {Name: "config", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.labels['label-k2']"}}}, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("1Gi"), + v1.ResourceCPU: resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + ResizePolicy: []v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceMemory, + RestartPolicy: v1.RestartContainer, + }, + }, + }, + }, + }, + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "c1", + ImageID: "containerd://c1-img", + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("1Gi"), + v1.ResourceCPU: resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + { + Name: "c2", + ImageID: "containerd://c2-img", + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("1Gi"), + v1.ResourceCPU: resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + }, + } +} + +func TestDefaultPatchUpdateSpecToPod_Resource(t *testing.T) { + // disableVPA cases already be tested in TestDefaultPatchUpdateSpecToPod + now := time.Now() + Clock = testingclock.NewFakeClock(now) + pod := getTestPodWithResource() + + // Define the test cases + tests := []struct { + name string + spec *UpdateSpec + state *appspub.InPlaceUpdateState + expectedState *appspub.InPlaceUpdateState + expectedPatch map[string]interface{} + vpaEnabled bool + }{ + { + name: "only change container 0 resource cpu", + spec: &UpdateSpec{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c1": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + state: &appspub.InPlaceUpdateState{}, + expectedState: &appspub.InPlaceUpdateState{ + ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, + }, + expectedPatch: map[string]interface{}{ + "spec": map[string]interface{}{ + "containers": []map[string]interface{}{ + { + "name": "c1", + "resources": map[string]interface{}{ + "requests": map[string]interface{}{ + "memory": "2Gi", + "cpu": "2", + }, + }, + }, + }, + }, + }, + vpaEnabled: true, + }, + { + name: "change container 0 resource cpu and image", + spec: &UpdateSpec{ + ContainerImages: map[string]string{"c1": "c1-img-new"}, + ContainerResources: map[string]v1.ResourceRequirements{ + "c1": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + state: &appspub.InPlaceUpdateState{}, + expectedState: &appspub.InPlaceUpdateState{ + LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{ + "c1": { + ImageID: "containerd://c1-img", + }, + }, + ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, + }, + expectedPatch: map[string]interface{}{ + "spec": map[string]interface{}{ + "containers": []map[string]interface{}{ + { + "name": "c1", + "image": "c1-img-new", + "resources": map[string]interface{}{ + "requests": map[string]interface{}{ + "memory": "2Gi", + "cpu": "2", + }, + }, + }, + }, + }, + }, + vpaEnabled: true, + }, + { + name: "change two containers resource cpu and image step1", + spec: &UpdateSpec{ + ContainerImages: map[string]string{"c1": "c1-img-new", "c2": "c1-img-new"}, + ContainerResources: map[string]v1.ResourceRequirements{ + "c1": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + state: &appspub.InPlaceUpdateState{}, + expectedState: &appspub.InPlaceUpdateState{ + LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{ + "c1": { + ImageID: "containerd://c1-img", + }, + }, + NextContainerResources: map[string]v1.ResourceRequirements{ + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + PreCheckBeforeNext: &appspub.InPlaceUpdatePreCheckBeforeNext{ + ContainersRequiredReady: []string{"c1"}, + }, + NextContainerImages: map[string]string{"c2": "c1-img-new"}, + ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, + }, + expectedPatch: map[string]interface{}{ + "spec": map[string]interface{}{ + "containers": []map[string]interface{}{ + { + "name": "c1", + "image": "c1-img-new", + "resources": map[string]interface{}{ + "requests": map[string]interface{}{ + "memory": "2Gi", + "cpu": "2", + }, + }, + }, + }, + }, + }, + vpaEnabled: true, + }, + { + name: "change two containers resource cpu and image step2", + spec: &UpdateSpec{ + ContainerImages: map[string]string{"c2": "c1-img-new"}, + ContainerResources: map[string]v1.ResourceRequirements{ + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + state: &appspub.InPlaceUpdateState{ + LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{ + "c1": { + ImageID: "containerd://c2-img", + }, + }, + NextContainerResources: map[string]v1.ResourceRequirements{ + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + PreCheckBeforeNext: &appspub.InPlaceUpdatePreCheckBeforeNext{ + ContainersRequiredReady: []string{"c1"}, + }, + NextContainerImages: map[string]string{"c2": "c1-img-new"}, + ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}}, + }, + expectedState: &appspub.InPlaceUpdateState{ + LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{ + "c1": { + ImageID: "containerd://c2-img", + }, + "c2": { + ImageID: "containerd://c2-img", + }, + }, + ContainerBatchesRecord: []appspub.InPlaceUpdateContainerBatch{{Timestamp: metav1.NewTime(now), Containers: []string{"c1"}}, {Timestamp: metav1.NewTime(now), Containers: []string{"c2"}}}, + }, + expectedPatch: map[string]interface{}{ + "spec": map[string]interface{}{ + "containers": []map[string]interface{}{ + { + "name": "c2", + "image": "c1-img-new", + "resources": map[string]interface{}{ + "requests": map[string]interface{}{ + "memory": "2Gi", + "cpu": "2", + }, + }, + }, + }, + }, + }, + vpaEnabled: true, + }, + } + + // Initialize the vertical update operator + verticalUpdateImpl = &NativeVerticalUpdate{} + + // Run the test cases + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + defer utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, tc.vpaEnabled)() + gotPod, err := defaultPatchUpdateSpecToPod(pod.DeepCopy(), tc.spec, tc.state) + if err != nil { + t.Fatal(err) + } + + if !apiequality.Semantic.DeepEqual(tc.state, tc.expectedState) { + t.Fatalf("expected state \n%v\n but got \n%v", util.DumpJSON(tc.expectedState), util.DumpJSON(tc.state)) + } + + originPodJS, _ := json.Marshal(pod) + patchJS, _ := json.Marshal(tc.expectedPatch) + expectedPodJS, err := strategicpatch.StrategicMergePatch(originPodJS, patchJS, &v1.Pod{}) + if err != nil { + t.Fatal(err) + } + expectedPod := &v1.Pod{} + if err := json.Unmarshal(expectedPodJS, expectedPod); err != nil { + t.Fatal(err) + } + expectedPod.Annotations[appspub.InPlaceUpdateStateKey] = util.DumpJSON(tc.state) + if !apiequality.Semantic.DeepEqual(gotPod, expectedPod) { + t.Fatalf("expected pod \n%v\n but got \n%v", util.DumpJSON(expectedPod), util.DumpJSON(gotPod)) + } + }) + } +} + +func createFakePod(imageInject, resourceInject, stateInject bool, num, imageOKNum, resourceOKNumber int) *v1.Pod { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + apps.StatefulSetRevisionLabel: "new-revision", + }, + Annotations: map[string]string{}, + Name: "test-pod", + }, + } + + for i := 0; i < num; i++ { + name := fmt.Sprintf("c%d", i) + pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{ + Name: name, + }) + pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, v1.ContainerStatus{ + Name: name, + }) + } + state := appspub.InPlaceUpdateState{Revision: "new-revision", LastContainerStatuses: map[string]appspub.InPlaceUpdateContainerStatus{}} + for i := 0; i < num; i++ { + imageID := fmt.Sprintf("img0%d", i) + lastStatus := appspub.InPlaceUpdateContainerStatus{} + if imageInject { + pod.Spec.Containers[i].Image = fmt.Sprintf("busybox:test%d", i) + pod.Status.ContainerStatuses[i].ImageID = imageID + + lastImgId := "different-img01" + img := lastImgId + if i < imageOKNum { + // ok => imgId != lastImageId + img = fmt.Sprintf("img0%d", i) + } + pod.Status.ContainerStatuses[i].ImageID = img + lastStatus.ImageID = lastImgId + } + if resourceInject { + defaultCPU := resource.MustParse("200m") + defaultMem := resource.MustParse("200Mi") + lastCPU := resource.MustParse("100m") + lastMem := resource.MustParse("100Mi") + pod.Spec.Containers[i].Resources = v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: defaultCPU, + v1.ResourceMemory: defaultMem, + }, + } + pod.Status.ContainerStatuses[i].Resources = &v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: defaultCPU, + v1.ResourceMemory: defaultMem, + }, + } + if i >= resourceOKNumber { + pod.Status.ContainerStatuses[i].Resources = &v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: lastCPU, + v1.ResourceMemory: lastMem, + }, + } + } + } + state.LastContainerStatuses[pod.Spec.Containers[i].Name] = lastStatus + } + + if stateInject { + if resourceInject { + state.UpdateResources = true + } + v, _ := json.Marshal(state) + pod.Annotations[appspub.InPlaceUpdateStateKey] = string(v) + } + return pod +} + +func TestDefaultCheckInPlaceUpdateCompleted_Resource(t *testing.T) { + // 构建测试用例 + tests := []struct { + name string + pod *v1.Pod + expectError bool + vpaEnabled bool + }{ + // normal case with feature gate disabled + { + name: "empty pod", + pod: createFakePod(false, false, false, 1, 0, 0), + expectError: false, + vpaEnabled: false, + }, + { + name: "image ok", + pod: createFakePod(true, false, true, 1, 1, 0), + expectError: false, + vpaEnabled: false, + }, + { + name: "image not ok", + pod: createFakePod(true, false, true, 1, 0, 0), + expectError: true, + vpaEnabled: false, + }, + { + name: "all image ok", + pod: createFakePod(true, false, true, 2, 2, 0), + expectError: false, + vpaEnabled: false, + }, + { + name: "remain image not ok", + pod: createFakePod(true, false, true, 2, 1, 0), + expectError: true, + vpaEnabled: false, + }, + { + name: "all image ok with resource ok", + pod: createFakePod(true, true, true, 2, 2, 2), + expectError: false, + vpaEnabled: false, + }, + { + name: "all image ok with resource not ok", + pod: createFakePod(true, true, true, 2, 2, 1), + expectError: false, + vpaEnabled: false, + }, + { + name: "remain image not ok with resource not ok", + pod: createFakePod(true, true, true, 2, 1, 1), + expectError: true, + vpaEnabled: false, + }, + // normal case with feature gate enabled + { + name: "empty pod", + pod: createFakePod(false, false, false, 1, 0, 0), + expectError: false, + vpaEnabled: true, + }, + { + name: "image ok", + pod: createFakePod(true, false, true, 1, 1, 0), + expectError: false, + vpaEnabled: true, + }, + { + name: "image not ok", + pod: createFakePod(true, false, true, 1, 0, 0), + expectError: true, + vpaEnabled: true, + }, + { + name: "all image ok", + pod: createFakePod(true, false, true, 2, 2, 0), + expectError: false, + vpaEnabled: true, + }, + { + name: "remain image not ok", + pod: createFakePod(true, false, true, 2, 1, 0), + expectError: true, + vpaEnabled: true, + }, + { + name: "all image ok with resource ok", + pod: createFakePod(true, true, true, 2, 2, 2), + expectError: false, + vpaEnabled: true, + }, + { + name: "all image ok with resource not ok", + pod: createFakePod(true, true, true, 2, 2, 1), + expectError: true, + vpaEnabled: true, + }, + { + name: "remain image not ok with resource not ok", + pod: createFakePod(true, true, true, 2, 1, 1), + expectError: true, + vpaEnabled: true, + }, + { + name: "only resource not ok", + pod: createFakePod(true, true, true, 3, 3, 1), + expectError: true, + vpaEnabled: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Logf("case: %v vpa-enabled: %v", tt.name, tt.vpaEnabled) + defer utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, tt.vpaEnabled)() + err := DefaultCheckInPlaceUpdateCompleted(tt.pod) + if tt.expectError { + //t.Logf("get error: %v", err) + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestDefaultCheckPodNeedsBeUnready(t *testing.T) { + // Setup test cases + tests := []struct { + name string + pod *v1.Pod + spec *UpdateSpec + expected bool + vpaEnabled bool + }{ + { + name: "contains ReadinessGates, vpa disabled", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{}, + }, + expected: true, + vpaEnabled: false, + }, + { + name: "contains no ReadinessGates1", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{}, + }, + expected: false, + vpaEnabled: false, + }, + { + name: "contains ReadinessGates, vpa enabled and VerticalUpdateOnly", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, vpa enabled but not VerticalUpdateOnly", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + ContainerResources: map[string]v1.ResourceRequirements{}, + }, + expected: true, + vpaEnabled: true, + }, + { + name: "contains no ReadinessGates2", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{}, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains no ReadinessGates3", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{}, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{}, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, other restart container", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "11", + ResizePolicy: []v1.ContainerResizePolicy{ + {ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer}, + }, + }, + { + Name: "c2", + }, + }, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c2": { + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("2Gi"), + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, resize cpu and cpu restart policy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "11", + ResizePolicy: []v1.ContainerResizePolicy{ + {ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer}, + }, + }, + }, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{ + "11": { + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("100m"), + }, + }, + }, + }, + expected: true, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, resize mem and cpu restart policy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "11", + ResizePolicy: []v1.ContainerResizePolicy{ + {ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer}, + }, + }, + }, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{ + "11": { + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + }, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, resize mem and cpu restart policy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "11", + ResizePolicy: []v1.ContainerResizePolicy{ + {ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer}, + {ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired}, + }, + }, + }, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + ContainerResources: map[string]v1.ResourceRequirements{ + "11": { + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + }, + }, + expected: false, + vpaEnabled: true, + }, + { + name: "contains ReadinessGates, vpa disabled and resize mem and cpu restart policy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "11", + ResizePolicy: []v1.ContainerResizePolicy{ + {ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer}, + {ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired}, + }, + }, + }, + ReadinessGates: []v1.PodReadinessGate{ + {ConditionType: appspub.InPlaceUpdateReady}, + }, + }, + }, + spec: &UpdateSpec{ + + ContainerResources: map[string]v1.ResourceRequirements{ + "11": { + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + }, + }, + expected: true, + vpaEnabled: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + defer utilfeature.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlaceWorkloadVerticalScaling, tc.vpaEnabled)() + result := defaultCheckPodNeedsBeUnready(tc.pod, tc.spec) + assert.Equal(t, tc.expected, result) + }) + } +} diff --git a/pkg/util/inplaceupdate/inplace_update_test.go b/pkg/util/inplaceupdate/inplace_update_test.go index 7b19102fbe..af3295e7ec 100644 --- a/pkg/util/inplaceupdate/inplace_update_test.go +++ b/pkg/util/inplaceupdate/inplace_update_test.go @@ -64,6 +64,7 @@ func TestCalculateInPlaceUpdateSpec(t *testing.T) { expectedSpec: &UpdateSpec{ Revision: "new-revision", ContainerImages: map[string]string{"c1": "foo2"}, + ContainerResources: map[string]v1.ResourceRequirements{}, ContainerRefMetadata: make(map[string]metav1.ObjectMeta), }, }, @@ -79,6 +80,7 @@ func TestCalculateInPlaceUpdateSpec(t *testing.T) { expectedSpec: &UpdateSpec{ Revision: "new-revision", ContainerImages: map[string]string{"c1": "foo2"}, + ContainerResources: map[string]v1.ResourceRequirements{}, ContainerRefMetadata: make(map[string]metav1.ObjectMeta), }, }, @@ -94,6 +96,7 @@ func TestCalculateInPlaceUpdateSpec(t *testing.T) { expectedSpec: &UpdateSpec{ Revision: "new-revision", ContainerImages: map[string]string{"c1": "foo2"}, + ContainerResources: map[string]v1.ResourceRequirements{}, ContainerRefMetadata: make(map[string]metav1.ObjectMeta), MetaDataPatch: []byte(`{"metadata":{"labels":{"k1":"v1"}}}`), }, @@ -110,6 +113,7 @@ func TestCalculateInPlaceUpdateSpec(t *testing.T) { expectedSpec: &UpdateSpec{ Revision: "new-revision", ContainerImages: map[string]string{"c1": "foo2"}, + ContainerResources: map[string]v1.ResourceRequirements{}, ContainerRefMetadata: map[string]metav1.ObjectMeta{"c1": {Labels: map[string]string{"k": "v2"}}}, MetaDataPatch: []byte(`{"metadata":{"labels":{"k1":"v1"}}}`), UpdateEnvFromMetadata: true, @@ -127,6 +131,7 @@ func TestCalculateInPlaceUpdateSpec(t *testing.T) { expectedSpec: &UpdateSpec{ Revision: "new-revision", ContainerImages: map[string]string{"c1": "foo2"}, + ContainerResources: map[string]v1.ResourceRequirements{}, ContainerRefMetadata: make(map[string]metav1.ObjectMeta), MetaDataPatch: []byte(`{"metadata":{"$deleteFromPrimitiveList/finalizers":["fz1"],"$setElementOrder/finalizers":["fz2"],"labels":{"k1":"v1","k2":null}}}`), }, diff --git a/pkg/util/inplaceupdate/inplace_update_vertical.go b/pkg/util/inplaceupdate/inplace_update_vertical.go new file mode 100644 index 0000000000..ea34d988ac --- /dev/null +++ b/pkg/util/inplaceupdate/inplace_update_vertical.go @@ -0,0 +1,185 @@ +/* +Copyright 2023 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplaceupdate + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "github.com/appscode/jsonpatch" + "github.com/google/go-cmp/cmp" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// interface for In-place workload vertical scaling +type VerticalUpdateInterface interface { + // UpdateInplaceUpdateMetadata validates and applies the resource patch to the UpdateSpec. + UpdateInplaceUpdateMetadata(op *jsonpatch.Operation, oldTemp *v1.PodTemplateSpec, updateSpec *UpdateSpec) error + + // UpdateResource some or all containers of a pod can be updated at once within this interface. + // container or pod level resources can be updated by this interface + UpdateResource(pod *v1.Pod, expectedResources map[string]*v1.ResourceRequirements) + // IsUpdateCompleted To determine whether the pod has been successfully vertical updated + IsUpdateCompleted(pod *v1.Pod) (bool, error) +} + +var verticalUpdateImpl VerticalUpdateInterface = nil + +// To register vertical update operations, +// you can register different vertical update implementations here +func init() { + // Now, we assume that there is a single standard per cluster, so register in init() + // TODO(Abner-1): Perhaps we should dynamically select the verticalUpdateImpl based on the pod metadata being processed. + // give us more suggestions if you need + if verticalUpdateImpl == nil { + verticalUpdateImpl = &NativeVerticalUpdate{} + } +} + +// NativeVerticalUpdate represents the vertical scaling of k8s standard +type NativeVerticalUpdate struct{} + +var _ VerticalUpdateInterface = &NativeVerticalUpdate{} + +func (v *NativeVerticalUpdate) UpdateInplaceUpdateMetadata(op *jsonpatch.Operation, oldTemp *v1.PodTemplateSpec, updateSpec *UpdateSpec) error { + // for example: /spec/containers/0/resources/limits/cpu + words := strings.Split(op.Path, "/") + if len(words) != 7 { + return fmt.Errorf("invalid resource path: %s", op.Path) + } + idx, err := strconv.Atoi(words[3]) + if err != nil || len(oldTemp.Spec.Containers) <= idx { + return fmt.Errorf("invalid container index: %s", op.Path) + } + if op.Operation == "remove" || op.Operation == "add" { + // Before k8s 1.32, we can not resize resources for a container with no limit or request + // TODO(Abner-1) change it if 1.32 released and allowing this operation + return errors.New("can not add or remove resources") + } + + if op.Value == nil { + return errors.New("json patch value is nil") + } + quantity, err := resource.ParseQuantity(op.Value.(string)) + if err != nil { + return fmt.Errorf("parse quantity error: %v", err) + } + + if !v.CanResourcesResizeInPlace(words[6]) { + return fmt.Errorf("disallowed inplace update resource: %s", words[6]) + } + + cName := oldTemp.Spec.Containers[idx].Name + if _, ok := updateSpec.ContainerResources[cName]; !ok { + updateSpec.ContainerResources[cName] = v1.ResourceRequirements{ + Limits: make(v1.ResourceList), + Requests: make(v1.ResourceList), + } + } + switch words[5] { + case "limits": + updateSpec.ContainerResources[cName].Limits[v1.ResourceName(words[6])] = quantity + case "requests": + updateSpec.ContainerResources[cName].Requests[v1.ResourceName(words[6])] = quantity + } + return nil +} + +// updateContainerResource implements vertical updates by directly modifying the container's resources, +// conforming to the k8s community standard +func (v *NativeVerticalUpdate) updateContainerResource(container *v1.Container, newResource *v1.ResourceRequirements) { + if container == nil || newResource == nil { + return + } + for key, quantity := range newResource.Limits { + if !v.CanResourcesResizeInPlace(string(key)) { + continue + } + container.Resources.Limits[key] = quantity + } + for key, quantity := range newResource.Requests { + if !v.CanResourcesResizeInPlace(string(key)) { + continue + } + container.Resources.Requests[key] = quantity + } +} + +// isContainerUpdateCompleted directly determines whether the current container is vertically updated by the spec and status of the container, +// which conforms to the k8s community standard +func (v *NativeVerticalUpdate) isContainerUpdateCompleted(container *v1.Container, containerStatus *v1.ContainerStatus) bool { + if containerStatus == nil || containerStatus.Resources == nil || container == nil { + return false + } + if !cmp.Equal(container.Resources.Limits, containerStatus.Resources.Limits) || + !cmp.Equal(container.Resources.Requests, containerStatus.Resources.Requests) { + return false + } + return true +} + +func (v *NativeVerticalUpdate) UpdateResource(pod *v1.Pod, expectedResources map[string]*v1.ResourceRequirements) { + if len(expectedResources) == 0 { + // pod level hook, ignore in native implementation + return + } + for i := range pod.Spec.Containers { + c := &pod.Spec.Containers[i] + newResource, resourceExists := expectedResources[c.Name] + if !resourceExists { + continue + } + v.updateContainerResource(c, newResource) + } + return +} + +func (v *NativeVerticalUpdate) IsUpdateCompleted(pod *v1.Pod) (bool, error) { + containers := make(map[string]*v1.Container, len(pod.Spec.Containers)) + for i := range pod.Spec.Containers { + c := &pod.Spec.Containers[i] + containers[c.Name] = c + } + if len(pod.Status.ContainerStatuses) != len(containers) { + return false, fmt.Errorf("some container status is not reported") + } + for _, cs := range pod.Status.ContainerStatuses { + if !v.isContainerUpdateCompleted(containers[cs.Name], &cs) { + return false, fmt.Errorf("container %s resources not changed", cs.Name) + } + } + return true, nil +} + +// only cpu and memory are allowed to be inplace updated +var allowedResizeResourceKey = map[string]bool{ + string(v1.ResourceCPU): true, + string(v1.ResourceMemory): true, +} + +func (v *NativeVerticalUpdate) CanResourcesResizeInPlace(resourceKey string) bool { + allowed, exist := allowedResizeResourceKey[resourceKey] + return exist && allowed +} + +// Internal implementation of vertical updates +// type VerticalUpdateInternal struct{} + +// var _ VerticalUpdateInterface = &VerticalUpdateInternal{} diff --git a/pkg/util/inplaceupdate/inplace_update_vertical_test.go b/pkg/util/inplaceupdate/inplace_update_vertical_test.go new file mode 100644 index 0000000000..b0bab45f0c --- /dev/null +++ b/pkg/util/inplaceupdate/inplace_update_vertical_test.go @@ -0,0 +1,271 @@ +package inplaceupdate + +import ( + "testing" + + "github.com/appscode/jsonpatch" + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestValidateResourcePatch(t *testing.T) { + oldTemp := &v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "test-container", + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")}, + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("512Mi")}, + }, + }, + { + Name: "test-container2", + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")}, + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("512Mi")}, + }, + }, + }, + }, + } + + tests := []struct { + name string + op *jsonpatch.Operation + validateFn func(updateSpec *UpdateSpec) bool + expectedErr bool + }{ + { + name: "valid patch for cpu limits", + op: &jsonpatch.Operation{ + Path: "/spec/containers/0/resources/limits/cpu", + Value: "200m", + }, + validateFn: func(updateSpec *UpdateSpec) bool { + res := updateSpec.ContainerResources["test-container"].Limits + return res.Cpu().String() == "200m" + }, + expectedErr: false, + }, + { + name: "valid patch for memory requests", + op: &jsonpatch.Operation{ + Path: "/spec/containers/0/resources/requests/memory", + Value: "1Gi", + }, + validateFn: func(updateSpec *UpdateSpec) bool { + res := updateSpec.ContainerResources["test-container"].Requests + return res.Memory().String() == "1Gi" + }, + expectedErr: false, + }, + { + name: "invalid patch for non-existent container", + op: &jsonpatch.Operation{ + Path: "/spec/containers/2/resources/limits/cpu", + Value: "200m", + }, + expectedErr: true, + }, + { + name: "invalid patch for non-standard resource", + op: &jsonpatch.Operation{ + Path: "/spec/containers/0/resources/limits/gpu", + Value: "1", + }, + expectedErr: true, + }, + { + name: "invalid patch for non-quantity value", + op: &jsonpatch.Operation{ + Path: "/spec/containers/0/resources/limits/cpu", + Value: "not-a-quantity", + }, + expectedErr: true, + }, + { + name: "add resource", + op: &jsonpatch.Operation{ + Operation: "add", + Path: "/spec/containers/0/resources/limits/cpu", + Value: "10m", + }, + expectedErr: true, + }, + { + name: "remove resource", + op: &jsonpatch.Operation{ + Operation: "remove", + Path: "/spec/containers/0/resources/limits/cpu", + }, + expectedErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + updateSpec := &UpdateSpec{ + ContainerResources: make(map[string]v1.ResourceRequirements), + } + vu := &NativeVerticalUpdate{} + err := vu.UpdateInplaceUpdateMetadata(tt.op, oldTemp, updateSpec) + if tt.expectedErr { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + } + if tt.validateFn != nil { + ok := tt.validateFn(updateSpec) + assert.True(t, ok) + } + }) + } +} + +func TestIsContainerUpdateCompleted(t *testing.T) { + v := NativeVerticalUpdate{} + + tests := []struct { + name string + container v1.Container + containerStatus v1.ContainerStatus + expectedResult bool + }{ + { + name: "Test status ok", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("100m"), "memory": resource.MustParse("128Mi")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("64Mi")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("100m"), "memory": resource.MustParse("128Mi")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("64Mi")}, + }, + }, + + expectedResult: true, + }, + { + name: "Test status not ok - cpu limit", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("100m")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("200m")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m")}, + }, + }, + + expectedResult: false, + }, + { + name: "Test status not ok - mem limit", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("100Mi")}, + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("50Mi")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("200Mi")}, + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("50Mi")}, + }, + }, + expectedResult: false, + }, + { + name: "Test status not ok - only mem limit", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("100Mi")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("200Mi")}, + }, + }, + expectedResult: false, + }, + { + name: "Test status not ok - only mem request", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("100Mi")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("50Mi")}, + }, + }, + expectedResult: false, + }, + { + name: "container and containerStatus are empty", + container: v1.Container{ + Resources: v1.ResourceRequirements{}, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{}, + }, + expectedResult: true, + }, + { + name: "container is empty", + container: v1.Container{ + Resources: v1.ResourceRequirements{}, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("200m")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m")}, + }, + }, + expectedResult: false, + }, + { + name: "containerStatus is nil", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("200m")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: nil, + }, + expectedResult: false, + }, + { + name: "containerStatus is empty", + container: v1.Container{ + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{"cpu": resource.MustParse("200m")}, + Requests: v1.ResourceList{"cpu": resource.MustParse("50m")}, + }, + }, + containerStatus: v1.ContainerStatus{ + Resources: &v1.ResourceRequirements{}, + }, + expectedResult: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := v.isContainerUpdateCompleted(&tt.container, &tt.containerStatus) + assert.Equal(t, tt.expectedResult, result) + }) + } +} diff --git a/pkg/webhook/pod/validating/pod_unavailable_budget.go b/pkg/webhook/pod/validating/pod_unavailable_budget.go index a7c1047450..39a8ceb43a 100644 --- a/pkg/webhook/pod/validating/pod_unavailable_budget.go +++ b/pkg/webhook/pod/validating/pod_unavailable_budget.go @@ -19,8 +19,6 @@ package validating import ( "context" - policyv1alpha1 "github.com/openkruise/kruise/apis/policy/v1alpha1" - "github.com/openkruise/kruise/pkg/control/pubcontrol" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,6 +27,9 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/apis/policy" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + policyv1alpha1 "github.com/openkruise/kruise/apis/policy/v1alpha1" + "github.com/openkruise/kruise/pkg/control/pubcontrol" ) // parameters: diff --git a/pkg/webhook/pod/validating/workloadspread.go b/pkg/webhook/pod/validating/workloadspread.go index 64c1709579..217a239e43 100644 --- a/pkg/webhook/pod/validating/workloadspread.go +++ b/pkg/webhook/pod/validating/workloadspread.go @@ -19,7 +19,6 @@ package validating import ( "context" - wsutil "github.com/openkruise/kruise/pkg/util/workloadspread" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -28,6 +27,8 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/apis/policy" + wsutil "github.com/openkruise/kruise/pkg/util/workloadspread" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) diff --git a/test/e2e/apps/daemonset.go b/test/e2e/apps/daemonset.go index d56bc0c891..007207d239 100644 --- a/test/e2e/apps/daemonset.go +++ b/test/e2e/apps/daemonset.go @@ -7,11 +7,6 @@ import ( "github.com/onsi/ginkgo" "github.com/onsi/gomega" - appspub "github.com/openkruise/kruise/apis/apps/pub" - appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" - kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" - "github.com/openkruise/kruise/pkg/util/lifecycle" - "github.com/openkruise/kruise/test/e2e/framework" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -23,6 +18,12 @@ import ( clientset "k8s.io/client-go/kubernetes" podutil "k8s.io/kubernetes/pkg/api/v1/pod" daemonutil "k8s.io/kubernetes/pkg/controller/daemon/util" + + appspub "github.com/openkruise/kruise/apis/apps/pub" + appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" + kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" + "github.com/openkruise/kruise/pkg/util/lifecycle" + "github.com/openkruise/kruise/test/e2e/framework" ) var _ = SIGDescribe("DaemonSet", func() { @@ -351,6 +352,8 @@ var _ = SIGDescribe("DaemonSet", func() { v1.ResourceCPU: resource.MustParse("120m"), }, } + // when enable InPlaceWorkloadVerticalScaling feature, just resize request will not delete pod + ads.Spec.Template.Spec.Containers[0].Env = append(ads.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "test", Value: "test"}) }) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "error to update daemon") diff --git a/test/e2e/apps/inplace_vpa.go b/test/e2e/apps/inplace_vpa.go new file mode 100644 index 0000000000..485bde160d --- /dev/null +++ b/test/e2e/apps/inplace_vpa.go @@ -0,0 +1,1344 @@ +package apps + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "strings" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/pkg/util/slice" + imageutils "k8s.io/kubernetes/test/utils/image" + "k8s.io/utils/diff" + utilpointer "k8s.io/utils/pointer" + + appspub "github.com/openkruise/kruise/apis/apps/pub" + appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" + appsv1beta1 "github.com/openkruise/kruise/apis/apps/v1beta1" + kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" + "github.com/openkruise/kruise/pkg/util" + "github.com/openkruise/kruise/test/e2e/framework" +) + +var _ = SIGDescribe("InplaceVPA", func() { + f := framework.NewDefaultFramework("inplace-vpa") + var ns string + var c clientset.Interface + var kc kruiseclientset.Interface + var tester *framework.CloneSetTester + var randStr string + IsKubernetesVersionLessThan127 := func() bool { + if v, err := c.Discovery().ServerVersion(); err != nil { + framework.Logf("Failed to discovery server version: %v", err) + } else if minor, err := strconv.Atoi(v.Minor); err != nil || minor < 27 { + return true + } + return false + } + + ginkgo.BeforeEach(func() { + c = f.ClientSet + kc = f.KruiseClientSet + ns = f.Namespace.Name + tester = framework.NewCloneSetTester(c, kc, ns) + randStr = rand.String(10) + + if IsKubernetesVersionLessThan127() { + ginkgo.Skip("skip this e2e case, it can only run on K8s >= 1.27") + } + }) + + oldResource := v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + } + newResource := v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("800m"), + v1.ResourceMemory: resource.MustParse("800Mi"), + }, + } + // TODO(Abner-1)update only inplace resources may fail in kind e2e. + // I will resolve it in another PR + //framework.KruiseDescribe("CloneSet Updating with only inplace resource", func() { + // var err error + // testUpdateResource := func(fn func(spec *v1.PodSpec), resizePolicy []v1.ContainerResizePolicy) { + // cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + // cs.Spec.Template.Spec.Containers[0].ResizePolicy = resizePolicy + // imageConfig := imageutils.GetConfig(imageutils.Nginx) + // imageConfig.SetVersion("alpine") + // cs.Spec.Template.Spec.Containers[0].Image = imageConfig.GetE2EImage() + // cs, err = tester.CreateCloneSet(cs) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + // + // ginkgo.By("Wait for replicas satisfied") + // gomega.Eventually(func() int32 { + // cs, err = tester.GetCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // return cs.Status.Replicas + // }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + // + // ginkgo.By("Wait for all pods ready") + // gomega.Eventually(func() int32 { + // cs, err = tester.GetCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // return cs.Status.ReadyReplicas + // }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + // + // pods, err := tester.ListPodsForCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // gomega.Expect(len(pods)).Should(gomega.Equal(1)) + // oldPodResource := getPodResource(pods[0]) + // + // err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + // if cs.Annotations == nil { + // cs.Annotations = map[string]string{} + // } + // fn(&cs.Spec.Template.Spec) + // }) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // lastGeneration := cs.Generation + // ginkgo.By("Wait for CloneSet generation consistent") + // gomega.Eventually(func() bool { + // cs, err = tester.GetCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // return cs.Generation == cs.Status.ObservedGeneration + // }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + // + // framework.Logf("CloneSet last %v, generation %v, observedGeneration %v", lastGeneration, cs.Generation, cs.Status.ObservedGeneration) + // start := time.Now() + // ginkgo.By("Wait for all pods updated and ready") + // a, b, c := getResourcesInfo(pods[0]) + // gomega.Eventually(func() int32 { + // cs, err = tester.GetCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // + // pods, err = tester.ListPodsForCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // a1, b1, c1 := getResourcesInfo(pods[0]) + // if a1 != a || b1 != b || c1 != c { + // framework.Logf("updateSpec %v", a1) + // framework.Logf("spec %v", b1) + // framework.Logf("container status %v ", c1) + // a, b, c = a1, b1, c1 + // } + // SkipTestWhenCgroupError(pods[0]) + // + // return cs.Status.UpdatedAvailableReplicas + // }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + // duration := time.Since(start) + // framework.Logf("cloneset with replica resize resource consume %vs", duration.Seconds()) + // + // ginkgo.By("Verify the resource changed and status=spec") + // pods, err = tester.ListPodsForCloneSet(cs.Name) + // gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(true)) + // } + // testWithResizePolicy := func(resizePolicy []v1.ContainerResizePolicy) { + // // This can't be Conformance yet. + // ginkgo.FIt("in-place update resources scale down 1", func() { + // fn := func(spec *v1.PodSpec) { + // ginkgo.By("scale down cpu and memory request") + // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("100m") + // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("100Mi") + // } + // testUpdateResource(fn, resizePolicy) + // }) + // //ginkgo.FIt("in-place update resources scale down 2", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale down cpu and memory limit") + // // spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("800m") + // // spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("800Mi") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // //ginkgo.FIt("in-place update resources scale down 3", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale down cpu and memory request&limit") + // // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("100m") + // // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("100Mi") + // // spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("800m") + // // spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("800Mi") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // + // // This can't be Conformance yet. + // ginkgo.FIt("in-place update resources scale up 1", func() { + // fn := func(spec *v1.PodSpec) { + // ginkgo.By("scale up cpu and memory request") + // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("300m") + // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("300Mi") + // } + // testUpdateResource(fn, resizePolicy) + // }) + // //ginkgo.FIt("in-place update resources scale up 2", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale up cpu and memory limit") + // // spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("2") + // // spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("2Gi") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // ginkgo.FIt("in-place update resources scale up 3", func() { + // fn := func(spec *v1.PodSpec) { + // ginkgo.By("scale up cpu and memory request&limit") + // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("300m") + // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("300Mi") + // //spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("2") + // //spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("2Gi") + // } + // testUpdateResource(fn, resizePolicy) + // }) + // + // // This can't be Conformance yet. + // ginkgo.FIt("in-place update resources scale up only cpu 1", func() { + // fn := func(spec *v1.PodSpec) { + // ginkgo.By("scale up cpu request") + // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("300m") + // } + // testUpdateResource(fn, resizePolicy) + // }) + // //ginkgo.FIt("in-place update resources scale up only cpu limit", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale up cpu limit") + // // spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("2") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // //ginkgo.FIt("in-place update resources scale up only cpu 3", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale up cpu request&limit") + // // spec.Containers[0].Resources.Requests[v1.ResourceCPU] = resource.MustParse("300m") + // // spec.Containers[0].Resources.Limits[v1.ResourceCPU] = resource.MustParse("2") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // + // // This can't be Conformance yet. + // ginkgo.FIt("in-place update resources scale up only mem 1", func() { + // fn := func(spec *v1.PodSpec) { + // ginkgo.By("scale up memory request") + // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("300Mi") + // } + // testUpdateResource(fn, resizePolicy) + // }) + // //ginkgo.FIt("in-place update resources scale up only mem limit", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale up memory limit") + // // spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("2Gi") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // //ginkgo.FIt("in-place update resources scale up only mem 3", func() { + // // fn := func(spec *v1.PodSpec) { + // // ginkgo.By("scale up memory request&limit") + // // spec.Containers[0].Resources.Requests[v1.ResourceMemory] = resource.MustParse("300Mi") + // // spec.Containers[0].Resources.Limits[v1.ResourceMemory] = resource.MustParse("2Gi") + // // } + // // testUpdateResource(fn, resizePolicy) + // //}) + // } + // + // ginkgo.By("inplace update resources with RestartContainer policy") + // testWithResizePolicy([]v1.ContainerResizePolicy{ + // { + // ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + // }, + // { + // ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + // }, + // }) + // + // ginkgo.By("inplace update resources with NotRequired policy") + // testWithResizePolicy([]v1.ContainerResizePolicy{ + // { + // ResourceName: v1.ResourceCPU, RestartPolicy: v1.NotRequired, + // }, + // { + // ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired, + // }, + // }) + // + // ginkgo.By("inplace update resources with cpu RestartContainer policy") + // testWithResizePolicy([]v1.ContainerResizePolicy{ + // { + // ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + // }, + // { + // ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired, + // }, + // }) + // + // ginkgo.By("inplace update resources with memory RestartContainer policy") + // testWithResizePolicy([]v1.ContainerResizePolicy{ + // { + // ResourceName: v1.ResourceCPU, RestartPolicy: v1.NotRequired, + // }, + // { + // ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + // }, + // }) + //}) + + framework.KruiseDescribe("CloneSet failed to inplace update resource", func() { + var err error + largeResource := v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("100"), + v1.ResourceMemory: resource.MustParse("1000Gi"), + }, + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("800"), + v1.ResourceMemory: resource.MustParse("8000Gi"), + }, + } + testResizePolicyFailed := func(resizePolicy []v1.ContainerResizePolicy) { + testUpdateResource := func(fn func(pod *v1.PodTemplateSpec), resizePolicy []v1.ContainerResizePolicy) { + j, _ := json.Marshal(resizePolicy) + ginkgo.By(fmt.Sprintf("resize policy %v", string(j))) + cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + cs.Spec.Template.Spec.Containers[0].ResizePolicy = resizePolicy + cs.Spec.Template.Spec.Containers[0].Image = NginxImage + cs.Spec.Template.ObjectMeta.Labels["test-env"] = "foo" + cs.Spec.Template.Spec.Containers[0].Env = append(cs.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{ + Name: "TEST_ENV", + ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.labels['test-env']"}}, + }) + cs, err = tester.CreateCloneSet(cs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + + ginkgo.By("Wait for replicas satisfied") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.Replicas + }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Wait for all pods ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.ReadyReplicas + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + pods, err := tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + oldPodUID := pods[0].UID + oldContainerStatus := pods[0].Status.ContainerStatuses[0] + oldPodResource := getPodResource(pods[0]) + + ginkgo.By("Update CloneSet with large resource") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + if cs.Annotations == nil { + cs.Annotations = map[string]string{} + } + fn(&cs.Spec.Template) + cs.Spec.Template.Spec.Containers[0].Resources = largeResource + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + updatedVersion := cs.Status.UpdateRevision + ginkgo.By("Wait for one pods updated and rejected") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Cloneset updatedReplicas %v updatedReady %v updatedAvailableReplicas %v ", + cs.Status.UpdatedReplicas, cs.Status.UpdatedReadyReplicas, cs.Status.UpdatedAvailableReplicas) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, pod := range pods { + revision := pod.Labels[apps.ControllerRevisionHashLabelKey] + if strings.Contains(updatedVersion, revision) { + if pod.Status.Resize == v1.PodResizeStatusInfeasible { + return 1 + } + } + } + + SkipTestWhenCgroupError(pods[0]) + return 0 + }, 120*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Update CloneSet with input resource") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + if cs.Annotations == nil { + cs.Annotations = map[string]string{} + } + fn(&cs.Spec.Template) + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + a, b, c := getResourcesInfo(pods[0]) + ginkgo.By("Wait for all pods updated and ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Cloneset updatedReplicas %v updatedReady %v updatedAvailableReplicas %v ", + cs.Status.UpdatedReplicas, cs.Status.UpdatedReadyReplicas, cs.Status.UpdatedAvailableReplicas) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + a1, b1, c1 := getResourcesInfo(pods[0]) + if a1 != a || b1 != b || c1 != c { + framework.Logf("updateSpec %v", a1) + framework.Logf("spec %v", b1) + framework.Logf("container status %v ", c1) + a, b, c = a1, b1, c1 + } + SkipTestWhenCgroupError(pods[0]) + return cs.Status.UpdatedAvailableReplicas + }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify the containerID changed and restartCount should not be 0") + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + newPodUID := pods[0].UID + newContainerStatus := pods[0].Status.ContainerStatuses[0] + + gomega.Expect(oldPodUID).Should(gomega.Equal(newPodUID)) + gomega.Expect(newContainerStatus.ContainerID).NotTo(gomega.Equal(oldContainerStatus.ContainerID)) + gomega.Expect(newContainerStatus.RestartCount).ShouldNot(gomega.Equal(int32(0))) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(true)) + } + // This can't be Conformance yet. + ginkgo.It("in-place update image and resource", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update image and resource") + spec.Containers[0].Image = NewNginxImage + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + // This can't be Conformance yet. + ginkgo.FIt("in-place update resource and env from label", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update resource and env from label") + pod.Labels["test-env"] = "bar" + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + // This can't be Conformance yet. + ginkgo.It("in-place update image, resource and env from label", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update image, resource and env from label") + spec.Containers[0].Image = NewNginxImage + pod.Labels["test-env"] = "bar" + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + } + + ginkgo.By("inplace update resources with RestartContainer policy") + testResizePolicyFailed([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + }, + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with memory RestartContainer policy") + testResizePolicyFailed([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with cpu RestartContainer policy") + testResizePolicyFailed([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with NotRequired policy") + testResizePolicyFailed([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.NotRequired, + }, + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired, + }, + }) + }) + + framework.KruiseDescribe("CloneSet Updating with inplace resource", func() { + var err error + testWithResizePolicy := func(resizePolicy []v1.ContainerResizePolicy) { + testUpdateResource := func(fn func(pod *v1.PodTemplateSpec), resizePolicy []v1.ContainerResizePolicy) { + j, _ := json.Marshal(resizePolicy) + ginkgo.By(fmt.Sprintf("resize policy %v", string(j))) + cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + cs.Spec.Template.Spec.Containers[0].ResizePolicy = resizePolicy + cs.Spec.Template.Spec.Containers[0].Image = NginxImage + cs.Spec.Template.ObjectMeta.Labels["test-env"] = "foo" + cs.Spec.Template.Spec.Containers[0].Env = append(cs.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{ + Name: "TEST_ENV", + ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.labels['test-env']"}}, + }) + cs, err = tester.CreateCloneSet(cs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + + ginkgo.By("Wait for replicas satisfied") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.Replicas + }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Wait for all pods ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.ReadyReplicas + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + pods, err := tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + oldPodUID := pods[0].UID + oldContainerStatus := pods[0].Status.ContainerStatuses[0] + oldPodResource := getPodResource(pods[0]) + + ginkgo.By("Update CloneSet") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + if cs.Annotations == nil { + cs.Annotations = map[string]string{} + } + fn(&cs.Spec.Template) + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + a, b, c := getResourcesInfo(pods[0]) + ginkgo.By("Wait for all pods updated and ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Cloneset updatedReplicas %v updatedReady %v updatedAvailableReplicas %v ", + cs.Status.UpdatedReplicas, cs.Status.UpdatedReadyReplicas, cs.Status.UpdatedAvailableReplicas) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + a1, b1, c1 := getResourcesInfo(pods[0]) + if a1 != a || b1 != b || c1 != c { + framework.Logf("updateSpec %v", a1) + framework.Logf("spec %v", b1) + framework.Logf("container status %v ", c1) + a, b, c = a1, b1, c1 + } + SkipTestWhenCgroupError(pods[0]) + return cs.Status.UpdatedAvailableReplicas + }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify the containerID changed and restartCount should be 1") + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + newPodUID := pods[0].UID + newContainerStatus := pods[0].Status.ContainerStatuses[0] + + gomega.Expect(oldPodUID).Should(gomega.Equal(newPodUID)) + gomega.Expect(newContainerStatus.ContainerID).NotTo(gomega.Equal(oldContainerStatus.ContainerID)) + gomega.Expect(newContainerStatus.RestartCount).Should(gomega.Equal(int32(1))) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(true)) + } + // This can't be Conformance yet. + ginkgo.It("in-place update image and resource", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update image and resource") + spec.Containers[0].Image = NewNginxImage + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + // This can't be Conformance yet. + ginkgo.FIt("in-place update resource and env from label", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update resource and env from label") + pod.Labels["test-env"] = "bar" + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + // This can't be Conformance yet. + ginkgo.It("in-place update image, resource and env from label", func() { + fn := func(pod *v1.PodTemplateSpec) { + spec := &pod.Spec + ginkgo.By("in-place update image, resource and env from label") + spec.Containers[0].Image = NewNginxImage + pod.Labels["test-env"] = "bar" + spec.Containers[0].Resources = newResource + } + testUpdateResource(fn, resizePolicy) + }) + + framework.ConformanceIt("in-place update two container image, resource with priorities successfully", func() { + cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + cs.Spec.Template.Spec.Containers[0].ResizePolicy = resizePolicy + cs.Spec.Template.Spec.Containers = append(cs.Spec.Template.Spec.Containers, v1.Container{ + Name: "redis", + Image: RedisImage, + Command: []string{"sleep", "999"}, + Env: []v1.EnvVar{{Name: appspub.ContainerLaunchPriorityEnvName, Value: "10"}}, + Lifecycle: &v1.Lifecycle{PostStart: &v1.LifecycleHandler{Exec: &v1.ExecAction{Command: []string{"sleep", "10"}}}}, + }) + cs.Spec.Template.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(3) + cs, err = tester.CreateCloneSet(cs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + + ginkgo.By("Wait for replicas satisfied") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.Replicas + }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Wait for all pods ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.ReadyReplicas + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + pods, err := tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + oldPodResource := getPodResource(pods[0]) + + ginkgo.By("Update images of nginx and redis") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + cs.Spec.Template.Spec.Containers[0].Image = NewNginxImage + cs.Spec.Template.Spec.Containers[1].Image = imageutils.GetE2EImage(imageutils.BusyBox) + cs.Spec.Template.Spec.Containers[0].Resources = newResource + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + ginkgo.By("Wait for all pods updated and ready") + a, b, c := getResourcesInfo(pods[0]) + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + a1, b1, c1 := getResourcesInfo(pods[0]) + if a1 != a || b1 != b || c1 != c { + framework.Logf("updateSpec %v", a1) + framework.Logf("spec %v", b1) + framework.Logf("container status %v ", c1) + a, b, c = a1, b1, c1 + } + SkipTestWhenCgroupError(pods[0]) + + return cs.Status.UpdatedAvailableReplicas + }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify two containers have all updated in-place") + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + + pod := pods[0] + nginxContainerStatus := util.GetContainerStatus("nginx", pod) + redisContainerStatus := util.GetContainerStatus("redis", pod) + gomega.Expect(nginxContainerStatus.RestartCount).Should(gomega.Equal(int32(1))) + gomega.Expect(redisContainerStatus.RestartCount).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify nginx should be stopped after new redis has started 10s") + gomega.Expect(nginxContainerStatus.LastTerminationState.Terminated.FinishedAt.After(redisContainerStatus.State.Running.StartedAt.Time.Add(time.Second*10))). + Should(gomega.Equal(true), fmt.Sprintf("nginx finish at %v is not after redis start %v + 10s", + nginxContainerStatus.LastTerminationState.Terminated.FinishedAt, + redisContainerStatus.State.Running.StartedAt)) + + ginkgo.By("Verify in-place update state in two batches") + inPlaceUpdateState := appspub.InPlaceUpdateState{} + gomega.Expect(pod.Annotations[appspub.InPlaceUpdateStateKey]).ShouldNot(gomega.BeEmpty()) + err = json.Unmarshal([]byte(pod.Annotations[appspub.InPlaceUpdateStateKey]), &inPlaceUpdateState) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(inPlaceUpdateState.ContainerBatchesRecord)).Should(gomega.Equal(2)) + gomega.Expect(inPlaceUpdateState.ContainerBatchesRecord[0].Containers).Should(gomega.Equal([]string{"redis"})) + gomega.Expect(inPlaceUpdateState.ContainerBatchesRecord[1].Containers).Should(gomega.Equal([]string{"nginx"})) + gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(true)) + }) + + framework.ConformanceIt("in-place update two container image, resource with priorities, should not update the next when the previous one failed", func() { + cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + cs.Spec.Template.Spec.Containers = append(cs.Spec.Template.Spec.Containers, v1.Container{ + Name: "redis", + Image: RedisImage, + Env: []v1.EnvVar{{Name: appspub.ContainerLaunchPriorityEnvName, Value: "10"}}, + Lifecycle: &v1.Lifecycle{PostStart: &v1.LifecycleHandler{Exec: &v1.ExecAction{Command: []string{"sleep", "10"}}}}, + }) + cs.Spec.Template.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(3) + cs, err = tester.CreateCloneSet(cs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + + ginkgo.By("Wait for replicas satisfied") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.Replicas + }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Wait for all pods ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.ReadyReplicas + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + pods, err := tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + oldPodResource := getPodResource(pods[0]) + + ginkgo.By("Update images of nginx and redis") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + cs.Spec.Template.Spec.Containers[0].Image = NewNginxImage + cs.Spec.Template.Spec.Containers[1].Image = imageutils.GetE2EImage(imageutils.BusyBox) + cs.Spec.Template.Spec.Containers[0].Resources = newResource + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + ginkgo.By("Wait for redis failed to start") + var pod *v1.Pod + gomega.Eventually(func() *v1.ContainerStateTerminated { + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + pod = pods[0] + redisContainerStatus := util.GetContainerStatus("redis", pod) + return redisContainerStatus.LastTerminationState.Terminated + }, 60*time.Second, time.Second).ShouldNot(gomega.BeNil()) + + gomega.Eventually(func() *v1.ContainerStateWaiting { + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + pod = pods[0] + redisContainerStatus := util.GetContainerStatus("redis", pod) + return redisContainerStatus.State.Waiting + }, 60*time.Second, time.Second).ShouldNot(gomega.BeNil()) + + nginxContainerStatus := util.GetContainerStatus("nginx", pod) + gomega.Expect(nginxContainerStatus.RestartCount).Should(gomega.Equal(int32(0))) + + ginkgo.By("Verify in-place update state only one batch and remain next") + inPlaceUpdateState := appspub.InPlaceUpdateState{} + gomega.Expect(pod.Annotations[appspub.InPlaceUpdateStateKey]).ShouldNot(gomega.BeEmpty()) + err = json.Unmarshal([]byte(pod.Annotations[appspub.InPlaceUpdateStateKey]), &inPlaceUpdateState) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(inPlaceUpdateState.ContainerBatchesRecord)).Should(gomega.Equal(1)) + gomega.Expect(inPlaceUpdateState.ContainerBatchesRecord[0].Containers).Should(gomega.Equal([]string{"redis"})) + gomega.Expect(inPlaceUpdateState.NextContainerImages).Should(gomega.Equal(map[string]string{"nginx": NewNginxImage})) + gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(false)) + }) + + //This can't be Conformance yet. + ginkgo.It("in-place update two container image, resource and env from metadata with priorities", func() { + cs := tester.NewCloneSet("clone-"+randStr, 1, appsv1alpha1.CloneSetUpdateStrategy{Type: appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType}) + cs.Spec.Template.Spec.Containers[0].ResizePolicy = resizePolicy + cs.Spec.Template.Annotations = map[string]string{"config": "foo"} + cs.Spec.Template.Spec.Containers = append(cs.Spec.Template.Spec.Containers, v1.Container{ + Name: "redis", + Image: RedisImage, + Env: []v1.EnvVar{ + {Name: appspub.ContainerLaunchPriorityEnvName, Value: "10"}, + {Name: "CONFIG", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.annotations['config']"}}}, + }, + Lifecycle: &v1.Lifecycle{PostStart: &v1.LifecycleHandler{Exec: &v1.ExecAction{Command: []string{"sleep", "10"}}}}, + }) + cs, err = tester.CreateCloneSet(cs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(cs.Spec.UpdateStrategy.Type).To(gomega.Equal(appsv1alpha1.InPlaceIfPossibleCloneSetUpdateStrategyType)) + + ginkgo.By("Wait for replicas satisfied") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.Replicas + }, 3*time.Second, time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Wait for all pods ready") + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Status.ReadyReplicas + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + pods, err := tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + oldPodResource := getPodResource(pods[0]) + + ginkgo.By("Update nginx image and config annotation") + err = tester.UpdateCloneSet(cs.Name, func(cs *appsv1alpha1.CloneSet) { + cs.Spec.Template.Spec.Containers[0].Image = NewNginxImage + cs.Spec.Template.Annotations["config"] = "bar" + cs.Spec.Template.Spec.Containers[0].Resources = newResource + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Wait for CloneSet generation consistent") + gomega.Eventually(func() bool { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return cs.Generation == cs.Status.ObservedGeneration + }, 10*time.Second, 3*time.Second).Should(gomega.Equal(true)) + + ginkgo.By("Wait for all pods updated and ready") + a, b, c := getResourcesInfo(pods[0]) + gomega.Eventually(func() int32 { + cs, err = tester.GetCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + a1, b1, c1 := getResourcesInfo(pods[0]) + if a1 != a || b1 != b || c1 != c { + framework.Logf("updateSpec %v", a1) + framework.Logf("spec %v", b1) + framework.Logf("container status %v ", c1) + a, b, c = a1, b1, c1 + } + SkipTestWhenCgroupError(pods[0]) + + return cs.Status.UpdatedAvailableReplicas + }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify two containers have all updated in-place") + pods, err = tester.ListPodsForCloneSet(cs.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(pods)).Should(gomega.Equal(1)) + + pod := pods[0] + nginxContainerStatus := util.GetContainerStatus("nginx", pod) + redisContainerStatus := util.GetContainerStatus("redis", pod) + gomega.Expect(nginxContainerStatus.RestartCount).Should(gomega.Equal(int32(1))) + gomega.Expect(redisContainerStatus.RestartCount).Should(gomega.Equal(int32(1))) + + ginkgo.By("Verify nginx should be stopped after new redis has started") + gomega.Expect(nginxContainerStatus.LastTerminationState.Terminated.FinishedAt.After(redisContainerStatus.State.Running.StartedAt.Time.Add(time.Second*10))). + Should(gomega.Equal(true), fmt.Sprintf("nginx finish at %v is not after redis start %v + 10s", + nginxContainerStatus.LastTerminationState.Terminated.FinishedAt, + redisContainerStatus.State.Running.StartedAt)) + + ginkgo.By("Verify in-place update state in two batches") + inPlaceUpdateState := appspub.InPlaceUpdateState{} + gomega.Expect(pod.Annotations[appspub.InPlaceUpdateStateKey]).ShouldNot(gomega.BeEmpty()) + err = json.Unmarshal([]byte(pod.Annotations[appspub.InPlaceUpdateStateKey]), &inPlaceUpdateState) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(inPlaceUpdateState.ContainerBatchesRecord)).Should(gomega.Equal(2)) + gomega.Expect(inPlaceUpdateState.ContainerBatchesRecord[0].Containers).Should(gomega.Equal([]string{"redis"})) + gomega.Expect(inPlaceUpdateState.ContainerBatchesRecord[1].Containers).Should(gomega.Equal([]string{"nginx"})) + gomega.Expect(checkPodResource(pods, oldPodResource, []string{"redis"})).Should(gomega.Equal(true)) + }) + } + + ginkgo.By("inplace update resources with RestartContainer policy") + testWithResizePolicy([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + }, + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with memory RestartContainer policy") + testWithResizePolicy([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with cpu RestartContainer policy") + testWithResizePolicy([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.RestartContainer, + }, + }) + ginkgo.By("inplace update resources with NotRequired policy") + testWithResizePolicy([]v1.ContainerResizePolicy{ + { + ResourceName: v1.ResourceCPU, RestartPolicy: v1.NotRequired, + }, + { + ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired, + }, + }) + }) + + framework.KruiseDescribe("Basic StatefulSet functionality [StatefulSetBasic]", func() { + ssName := "ss" + labels := map[string]string{ + "foo": "bar", + "baz": "blah", + } + headlessSvcName := "test" + var statefulPodMounts, podMounts []v1.VolumeMount + var ss *appsv1beta1.StatefulSet + + ginkgo.BeforeEach(func() { + statefulPodMounts = []v1.VolumeMount{{Name: "datadir", MountPath: "/data/"}} + podMounts = []v1.VolumeMount{{Name: "home", MountPath: "/home"}} + ss = framework.NewStatefulSet(ssName, ns, headlessSvcName, 2, statefulPodMounts, podMounts, labels) + + ginkgo.By("Creating service " + headlessSvcName + " in namespace " + ns) + headlessService := framework.CreateServiceSpec(headlessSvcName, "", true, labels) + _, err := c.CoreV1().Services(ns).Create(context.TODO(), headlessService, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + testfn := func(ss *appsv1beta1.StatefulSet) { + e := v1.EnvVar{ + Name: "test-env", + ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.labels['label-k2']"}}, + } + ss.Spec.Template.Spec.Containers[0].Env = append(ss.Spec.Template.Spec.Containers[0].Env, e) + if len(ss.Spec.Template.Labels) == 0 { + ss.Spec.Template.Labels = map[string]string{} + } + ss.Spec.Template.Labels["label-k2"] = "hello" + + oldImage := ss.Spec.Template.Spec.Containers[0].Image + currentRevision, updateRevision := ss.Status.CurrentRevision, ss.Status.UpdateRevision + updateFn := func(set *appsv1beta1.StatefulSet) { + currentRevision = set.Status.CurrentRevision + ss.Spec.Template.Labels["label-k2"] = "test" + container := &set.Spec.Template.Spec.Containers[0] + container.Resources = newResource + } + sst := framework.NewStatefulSetTester(c, kc) + + validaFn := func(pods []v1.Pod) { + ss = sst.WaitForStatus(ss) + updateRevision = ss.Status.UpdateRevision + baseValidFn(pods, oldImage, updateRevision) + for i := range pods { + diff.ObjectDiff(pods[i].Spec.Containers[0].Resources, newResource) + } + } + rollbackFn := func(set *appsv1beta1.StatefulSet) { + ss.Spec.Template.Labels["label-k2"] = "hello" + container := &set.Spec.Template.Spec.Containers[0] + container.Resources = oldResource + } + validaFn2 := func(pods []v1.Pod) { + ss = sst.WaitForStatus(ss) + baseValidFn(pods, oldImage, currentRevision) + for i := range pods { + diff.ObjectDiff(pods[i].Spec.Containers[0].Resources, oldResource) + } + } + rollbackWithUpdateFnTest(c, kc, ns, ss, updateFn, rollbackFn, validaFn, validaFn2) + } + + ginkgo.It("should perform rolling updates(including resources) and roll backs with pvcs", func() { + ginkgo.By("Creating a new StatefulSet") + ss.Spec.UpdateStrategy.RollingUpdate = &appsv1beta1.RollingUpdateStatefulSetStrategy{ + PodUpdatePolicy: appsv1beta1.InPlaceIfPossiblePodUpdateStrategyType, + } + ss.Spec.Template.Spec.ReadinessGates = []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}} + testfn(ss) + }) + + ginkgo.It("should perform rolling updates(including resources) and roll backs", func() { + ginkgo.By("Creating a new StatefulSet") + ss = framework.NewStatefulSet("ss2", ns, headlessSvcName, 3, nil, nil, labels) + ss.Spec.UpdateStrategy.RollingUpdate = &appsv1beta1.RollingUpdateStatefulSetStrategy{ + PodUpdatePolicy: appsv1beta1.InPlaceIfPossiblePodUpdateStrategyType, + } + ss.Spec.Template.Spec.ReadinessGates = []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}} + testfn(ss) + }) + + }) + + framework.KruiseDescribe("Basic DaemonSet functionality [DaemonSetBasic]", func() { + var tester *framework.DaemonSetTester + ginkgo.BeforeEach(func() { + c = f.ClientSet + kc = f.KruiseClientSet + ns = f.Namespace.Name + tester = framework.NewDaemonSetTester(c, kc, ns) + }) + dsName := "e2e-ds" + + ginkgo.AfterEach(func() { + if ginkgo.CurrentGinkgoTestDescription().Failed { + framework.DumpDebugInfo(c, ns) + } + framework.Logf("Deleting DaemonSet %s/%s in cluster", ns, dsName) + tester.DeleteDaemonSet(ns, dsName) + }) + newImage := NewNginxImage + framework.ConformanceIt("should upgrade resources and image one by one on steps if there is pre-update hook", func() { + label := map[string]string{framework.DaemonSetNameLabel: dsName} + hookKey := "my-pre-update" + + ginkgo.By(fmt.Sprintf("Creating DaemonSet %q with pre-delete hook", dsName)) + maxUnavailable := intstr.IntOrString{IntVal: int32(1)} + ads := tester.NewDaemonSet(dsName, label, WebserverImage, appsv1alpha1.DaemonSetUpdateStrategy{ + Type: appsv1alpha1.RollingUpdateDaemonSetStrategyType, + RollingUpdate: &appsv1alpha1.RollingUpdateDaemonSet{ + Type: appsv1alpha1.InplaceRollingUpdateType, + MaxUnavailable: &maxUnavailable, + }, + }) + ads.Spec.Template.Labels = map[string]string{framework.DaemonSetNameLabel: dsName, hookKey: "true"} + ads.Spec.Template.Spec.Containers[0].Resources = oldResource + ds, err := tester.CreateDaemonSet(ads) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Check that daemon pods launch on every node of the cluster") + err = wait.PollImmediate(framework.DaemonSetRetryPeriod, framework.DaemonSetRetryTimeout, tester.CheckRunningOnAllNodes(ds)) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "error waiting for daemon pod to start") + + err = tester.CheckDaemonStatus(dsName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + oldPodList, err := tester.ListDaemonPods(label) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Only update daemonset resources and image") + err = tester.UpdateDaemonSet(ds.Name, func(ads *appsv1alpha1.DaemonSet) { + ads.Spec.Template.Spec.Containers[0].Image = newImage + ads.Spec.Template.Spec.Containers[0].Resources = newResource + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "error to update daemon") + + gomega.Eventually(func() int64 { + ads, err = tester.GetDaemonSet(dsName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return ads.Status.ObservedGeneration + }, time.Second*30, time.Second*3).Should(gomega.Equal(int64(2))) + + ginkgo.By("Wait for all pods updated and ready") + lastId := len(oldPodList.Items) - 1 + a, b, c := getResourcesInfo(&oldPodList.Items[lastId]) + gomega.Eventually(func() int32 { + ads, err = tester.GetDaemonSet(dsName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + updatedAvailable := ads.Status.UpdatedNumberScheduled + updatedAvailable -= ads.Status.NumberUnavailable + framework.Logf("UpdatedNumber %v Unavailable %v UpdatedAvailable %v", ads.Status.UpdatedNumberScheduled, + ads.Status.NumberUnavailable, updatedAvailable) + + oldPodList, err = tester.ListDaemonPods(label) + pod := &oldPodList.Items[lastId] + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + a1, b1, c1 := getResourcesInfo(pod) + if a1 != a || b1 != b || c1 != c { + framework.Logf("updateSpec %v", a1) + framework.Logf("spec %v", b1) + framework.Logf("container status %v ", c1) + a, b, c = a1, b1, c1 + } + SkipTestWhenCgroupError(pod) + return updatedAvailable + }, 600*time.Second, 3*time.Second).Should(gomega.Equal(int32(len(oldPodList.Items)))) + + ginkgo.By("Verify the podUId changed and restartCount should be 1") + pods, err := tester.ListDaemonPods(label) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + lastStatus := map[string]*v1.ResourceRequirements{"busybox": &oldResource} + for _, pod := range pods.Items { + gomega.Expect(checkPodResource([]*v1.Pod{&pod}, lastStatus, []string{"redis"})).Should(gomega.Equal(true)) + gomega.Expect(pod.Status.ContainerStatuses[0].RestartCount).Should(gomega.Equal(int32(1))) + } + }) + }) +}) + +func ResourceEqual(spec, status *v1.ResourceRequirements) bool { + + if spec == nil && status == nil { + return true + } + if status == nil || spec == nil { + return false + } + if spec.Requests != nil { + if status.Requests == nil { + return false + } + if !spec.Requests.Cpu().Equal(*status.Requests.Cpu()) || + !spec.Requests.Memory().Equal(*status.Requests.Memory()) { + return false + } + } + if spec.Limits != nil { + if status.Limits == nil { + return false + } + if !spec.Limits.Cpu().Equal(*status.Limits.Cpu()) || + !spec.Limits.Memory().Equal(*status.Limits.Memory()) { + return false + } + } + return true +} + +func getPodResource(pod *v1.Pod) map[string]*v1.ResourceRequirements { + containerResource := map[string]*v1.ResourceRequirements{} + for _, c := range pod.Spec.Containers { + c := c + containerResource[c.Name] = &c.Resources + } + return containerResource +} + +func checkPodResource(pods []*v1.Pod, last map[string]*v1.ResourceRequirements, ignoreContainer []string) (res bool) { + defer func() { + if !res && len(last) == 1 { + pjson, _ := json.Marshal(pods) + ljson, _ := json.Marshal(last) + framework.Logf("pod %v, last resource %v", string(pjson), string(ljson)) + } + }() + for _, pod := range pods { + containerResource := getPodResource(pod) + for _, c := range pod.Spec.Containers { + if slice.ContainsString(ignoreContainer, c.Name, nil) { + continue + } + lastResource := last[c.Name] + if ResourceEqual(&c.Resources, lastResource) { + framework.Logf("container %s resource unchanged", c.Name) + // resource unchanged + return false + } + } + for _, cs := range pod.Status.ContainerStatuses { + cname := cs.Name + spec := containerResource[cname] + if !ResourceEqual(spec, cs.Resources) { + framework.Logf("container %v spec != status", cname) + // resource spec != status + return false + } + } + } + + // resource changed and spec = status + return true +} + +func baseValidFn(pods []v1.Pod, image string, revision string) { + for i := range pods { + gomega.Expect(pods[i].Spec.Containers[0].Image).To(gomega.Equal(image), + fmt.Sprintf(" Pod %s/%s has image %s not have image %s", + pods[i].Namespace, + pods[i].Name, + pods[i].Spec.Containers[0].Image, + image)) + gomega.Expect(pods[i].Labels[apps.StatefulSetRevisionLabel]).To(gomega.Equal(revision), + fmt.Sprintf("Pod %s/%s revision %s is not equal to revision %s", + pods[i].Namespace, + pods[i].Name, + pods[i].Labels[apps.StatefulSetRevisionLabel], + revision)) + } +} +func rollbackWithUpdateFnTest(c clientset.Interface, kc kruiseclientset.Interface, ns string, ss *appsv1beta1.StatefulSet, + updateFn, rollbackFn func(update *appsv1beta1.StatefulSet), validateFn1, validateFn2 func([]v1.Pod)) { + sst := framework.NewStatefulSetTester(c, kc) + sst.SetHTTPProbe(ss) + ss, err := kc.AppsV1beta1().StatefulSets(ns).Create(context.TODO(), ss, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + sst.WaitForRunningAndReady(*ss.Spec.Replicas, ss) + ss = sst.WaitForStatus(ss) + currentRevision, updateRevision := ss.Status.CurrentRevision, ss.Status.UpdateRevision + gomega.Expect(currentRevision).To(gomega.Equal(updateRevision), + fmt.Sprintf("StatefulSet %s/%s created with update revision %s not equal to current revision %s", + ss.Namespace, ss.Name, updateRevision, currentRevision)) + pods := sst.GetPodList(ss) + for i := range pods.Items { + gomega.Expect(pods.Items[i].Labels[apps.StatefulSetRevisionLabel]).To(gomega.Equal(currentRevision), + fmt.Sprintf("Pod %s/%s revision %s is not equal to current revision %s", + pods.Items[i].Namespace, + pods.Items[i].Name, + pods.Items[i].Labels[apps.StatefulSetRevisionLabel], + currentRevision)) + } + sst.SortStatefulPods(pods) + err = sst.BreakPodHTTPProbe(ss, &pods.Items[1]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ss, pods = sst.WaitForPodNotReady(ss, pods.Items[1].Name) + newImage := NewNginxImage + oldImage := ss.Spec.Template.Spec.Containers[0].Image + + ginkgo.By(fmt.Sprintf("Updating StatefulSet template: update image from %s to %s", oldImage, newImage)) + gomega.Expect(oldImage).NotTo(gomega.Equal(newImage), "Incorrect test setup: should update to a different image") + ss, err = framework.UpdateStatefulSetWithRetries(kc, ns, ss.Name, updateFn) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Creating a new revision") + ss = sst.WaitForStatus(ss) + currentRevision, updateRevision = ss.Status.CurrentRevision, ss.Status.UpdateRevision + gomega.Expect(currentRevision).NotTo(gomega.Equal(updateRevision), + "Current revision should not equal update revision during rolling update") + + ginkgo.By("Updating Pods in reverse ordinal order") + pods = sst.GetPodList(ss) + sst.SortStatefulPods(pods) + err = sst.RestorePodHTTPProbe(ss, &pods.Items[1]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ss, pods = sst.WaitForPodReady(ss, pods.Items[1].Name) + ss, pods = sst.WaitForRollingUpdate(ss) + gomega.Expect(ss.Status.CurrentRevision).To(gomega.Equal(updateRevision), + fmt.Sprintf("StatefulSet %s/%s current revision %s does not equal update revision %s on update completion", + ss.Namespace, + ss.Name, + ss.Status.CurrentRevision, + updateRevision)) + validateFn1(pods.Items) + + ginkgo.By("Rolling back to a previous revision") + err = sst.BreakPodHTTPProbe(ss, &pods.Items[1]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ss, pods = sst.WaitForPodNotReady(ss, pods.Items[1].Name) + priorRevision := currentRevision + ss, err = framework.UpdateStatefulSetWithRetries(kc, ns, ss.Name, rollbackFn) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ss = sst.WaitForStatus(ss) + currentRevision, updateRevision = ss.Status.CurrentRevision, ss.Status.UpdateRevision + gomega.Expect(currentRevision).NotTo(gomega.Equal(updateRevision), + "Current revision should not equal update revision during roll back") + gomega.Expect(priorRevision).To(gomega.Equal(updateRevision), + "Prior revision should equal update revision during roll back") + + ginkgo.By("Rolling back update in reverse ordinal order") + pods = sst.GetPodList(ss) + sst.SortStatefulPods(pods) + sst.RestorePodHTTPProbe(ss, &pods.Items[1]) + ss, pods = sst.WaitForPodReady(ss, pods.Items[1].Name) + ss, pods = sst.WaitForRollingUpdate(ss) + gomega.Expect(ss.Status.CurrentRevision).To(gomega.Equal(priorRevision), + fmt.Sprintf("StatefulSet %s/%s current revision %s does not equal prior revision %s on rollback completion", + ss.Namespace, + ss.Name, + ss.Status.CurrentRevision, + updateRevision)) + validateFn2(pods.Items) +} + +func SkipTestWhenCgroupError(pod *v1.Pod) { + if IsPodCreateError(pod) { + ginkgo.Skip("create pod error and message is cpu.cfs_quota_us: invalid argument: unknown\n" + + "This may be caused by runc version or kernel version.") + } +} + +func getResourcesInfo(po *v1.Pod) (string, string, string) { + if po == nil { + return "", "", "" + } + lastState := "" + if len(po.Annotations) > 0 { + lastState = po.Annotations[appspub.InPlaceUpdateStateKey] + } + specResources := po.Spec.Containers[0].Resources + containerStatus := po.Status.ContainerStatuses[0] + + specResourcesJson, _ := json.Marshal(specResources) + containerStatusJson, _ := json.Marshal(containerStatus) + return lastState, string(specResourcesJson), string(containerStatusJson) +} +func IsPodCreateError(pod *v1.Pod) bool { + if pod == nil { + return false + } + if len(pod.Status.ContainerStatuses) == 0 { + return false + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.LastTerminationState.Terminated != nil { + lastTermination := cs.LastTerminationState.Terminated + if lastTermination.Reason == "StartError" && + strings.Contains(lastTermination.Message, "cpu.cfs_quota_us: invalid argument: unknown") { + return true + } + } + } + return false +} diff --git a/test/e2e/framework/cloneset_util.go b/test/e2e/framework/cloneset_util.go index f6c0b17388..b65e61afae 100644 --- a/test/e2e/framework/cloneset_util.go +++ b/test/e2e/framework/cloneset_util.go @@ -20,14 +20,16 @@ import ( "context" "sort" - appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" - kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" - "github.com/openkruise/kruise/pkg/util" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/util/retry" imageutils "k8s.io/kubernetes/test/utils/image" + + appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" + kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" + "github.com/openkruise/kruise/pkg/util" ) type CloneSetTester struct { @@ -66,6 +68,16 @@ func (t *CloneSetTester) NewCloneSet(name string, replicas int32, updateStrategy Env: []v1.EnvVar{ {Name: "test", Value: "foo"}, }, + Resources: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, }, }, }, diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 7dfbb159f1..cb8348c4a5 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -315,6 +315,11 @@ func KruiseDescribe(text string, body func()) bool { return ginkgo.Describe("[kruise.io] "+text, body) } +// KruisePDescribe is a wrapper function for ginkgo describe. Adds namespacing. +func KruisePDescribe(text string, body func()) bool { + return ginkgo.PDescribe("[kruise.io] "+text, body) +} + // ConformanceIt is a wrapper function for ginkgo It. Adds "[Conformance]" tag and makes static analysis easier. func ConformanceIt(text string, body interface{}, timeout ...float64) bool { return ginkgo.It(text+" [Conformance]", body, timeout...) diff --git a/test/e2e/framework/statefulset_utils.go b/test/e2e/framework/statefulset_utils.go index e9d1fac8ce..d57cd0d331 100644 --- a/test/e2e/framework/statefulset_utils.go +++ b/test/e2e/framework/statefulset_utils.go @@ -874,6 +874,16 @@ func NewStatefulSet(name, ns, governingSvcName string, replicas int32, statefulP Image: imageutils.GetE2EImage(imageutils.Nginx), VolumeMounts: mounts, ImagePullPolicy: v1.PullIfNotPresent, + Resources: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, }, }, Volumes: vols, diff --git a/test/kind-conf-none-fg.yaml b/test/kind-conf-with-vpa.yaml similarity index 64% rename from test/kind-conf-none-fg.yaml rename to test/kind-conf-with-vpa.yaml index 25d972d418..0aeb86d0c1 100644 --- a/test/kind-conf-none-fg.yaml +++ b/test/kind-conf-with-vpa.yaml @@ -4,4 +4,6 @@ nodes: - role: control-plane - role: worker - role: worker - - role: worker \ No newline at end of file + - role: worker +featureGates: + InPlacePodVerticalScaling: true \ No newline at end of file diff --git a/tools/hack/create-cluster.sh b/tools/hack/create-cluster.sh index 0f99c72b24..9a2c962093 100755 --- a/tools/hack/create-cluster.sh +++ b/tools/hack/create-cluster.sh @@ -18,7 +18,7 @@ set -euo pipefail # Setup default values CLUSTER_NAME=${CLUSTER_NAME:-"ci-testing"} KIND_NODE_TAG=${KIND_NODE_TAG:-"v1.28.7"} -KIND_CONFIG=${KIND_CONFIG:-"test/kind-conf-none-fg.yaml"} +KIND_CONFIG=${KIND_CONFIG:-"test/kind-conf-with-vpa.yaml"} echo "$KIND_NODE_TAG" echo "$CLUSTER_NAME" diff --git a/tools/hack/run-kruise-e2e-test.sh b/tools/hack/run-kruise-e2e-test.sh index d65fd61060..3a995442a9 100755 --- a/tools/hack/run-kruise-e2e-test.sh +++ b/tools/hack/run-kruise-e2e-test.sh @@ -18,7 +18,10 @@ set -ex export KUBECONFIG=${HOME}/.kube/config make ginkgo set +e -./bin/ginkgo -timeout 60m -v --focus='\[apps\] StatefulSet' test/e2e +./bin/ginkgo -p -timeout 60m -v --focus='\[apps\] InplaceVPA' test/e2e +#./bin/ginkgo -p -timeout 60m -v --focus='\[apps\] CloneSet' test/e2e +#./bin/ginkgo -p -timeout 60m -v --focus='\[apps\] (CloneSet|InplaceVPA)' test/e2e + retVal=$? restartCount=$(kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $4}') if [ "${restartCount}" -eq "0" ];then