From 6a437ce0c507addbed83c2d719226e008c2f40e6 Mon Sep 17 00:00:00 2001 From: vadasambar Date: Tue, 14 Mar 2023 11:01:58 +0530 Subject: [PATCH] feat: add annotation to ignore local storage volume during scale down - this is so that scale down is not blocked on local storage volume - for pods where it is okay to ignore local storage volume Signed-off-by: vadasambar fix: tests failing - there was a problem in the logic Signed-off-by: vadasambar test: add unit test for `IgnoreLocalStorageVolumeKey` Signed-off-by: vadasambar refactor: use `IgnoreLocalStorageVolumeKey` in tests instead of hardcoding the annotation Signed-off-by: vadasambar fix: wording for test name - `pod with EmptyDir but IgnoreLocalStorageVolumeKey annotation` -> `pod with EmptyDir and IgnoreLocalStorageVolumeKey annotation` Signed-off-by: vadasambar fix: simulator drain tests failing - set local storage vol name (required) Signed-off-by: vadasambar refactor: add support for multiple vals in `safe-to-evict-local-volume` annotation - add more unit tests Signed-off-by: vadasambar refactor: rename ignore local vol key `safe-to-evict-local-volume` -> `safe-to-evict-local-volumes` - abtract code to process annotation into a separate fn - shorten name for test cases Signed-off-by: vadasambar docs: update FAQ with info about `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar docs: add the FAQ for `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar docs: fix formatting for `safe-to-evict-local-volumes` in FAQ Signed-off-by: vadasambar docs: format the `safe-to-evict-local-volumes` as a bullet Signed-off-by: vadasambar docs: fix `Unless` -> `unless` to make it consistent with other lines Signed-off-by: vadasambar test: add an extra test for mismatching local vol value in annotation Signed-off-by: vadasambar docs: make the wording clearer - for `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar (cherry picked from commit b663f138a4c8a1bbdd033be2e1449b097f08e7b2) --- cluster-autoscaler/FAQ.md | 7 +- cluster-autoscaler/simulator/drain_test.go | 2 + cluster-autoscaler/utils/drain/drain.go | 25 +- cluster-autoscaler/utils/drain/drain_test.go | 242 +++++++++++++++++++ 4 files changed, 271 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index 853adc00688d..a23fbfecb163 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -88,7 +88,12 @@ Cluster Autoscaler decreases the size of the cluster when some nodes are consist * are not run on the node by default, * * don't have a [pod disruption budget](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#how-disruption-budgets-work) set or their PDB is too restrictive (since CA 0.6). * Pods that are not backed by a controller object (so not created by deployment, replica set, job, stateful set etc). * -* Pods with local storage. * +* Pods with local storage. * + - unless the pod has the following annotation set: + ``` + "cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes": "volume-1,volume-2,.." + ``` + and all of the pod's local volumes are listed in the annotation value. * Pods that cannot be moved elsewhere due to various constraints (lack of resources, non-matching node selectors or affinity, matching anti-affinity, etc) * Pods that have the following annotation set: diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 46ebb7542a47..dee97f4f2f9c 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -115,6 +115,7 @@ func TestGetPodsToMove(t *testing.T) { Spec: apiv1.PodSpec{ Volumes: []apiv1.Volume{ { + Name: "empty-vol", VolumeSource: apiv1.VolumeSource{ EmptyDir: &apiv1.EmptyDirVolumeSource{}, }, @@ -136,6 +137,7 @@ func TestGetPodsToMove(t *testing.T) { Spec: apiv1.PodSpec{ Volumes: []apiv1.Volume{ { + Name: "my-repo", VolumeSource: apiv1.VolumeSource{ GitRepo: &apiv1.GitRepoVolumeSource{ Repository: "my-repo", diff --git a/cluster-autoscaler/utils/drain/drain.go b/cluster-autoscaler/utils/drain/drain.go index 4dd8efc58ff8..fe1c8e8cbc11 100644 --- a/cluster-autoscaler/utils/drain/drain.go +++ b/cluster-autoscaler/utils/drain/drain.go @@ -18,6 +18,7 @@ package drain import ( "fmt" + "strings" "time" apiv1 "k8s.io/api/core/v1" @@ -38,6 +39,8 @@ const ( // PodSafeToEvictKey - annotation that ignores constraints to evict a pod like not being replicated, being on // kube-system namespace or having a local storage. PodSafeToEvictKey = "cluster-autoscaler.kubernetes.io/safe-to-evict" + // SafeToEvictLocalVolumesKey - annotation that ignores (doesn't block on) a local storage volume during node scale down + SafeToEvictLocalVolumesKey = "cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes" ) // BlockingPod represents a pod which is blocking the scale down of a node. @@ -219,7 +222,7 @@ func GetPodsForDeletionOnNodeDrain( return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name) } } - if HasLocalStorage(pod) && skipNodesWithLocalStorage { + if HasBlockingLocalStorage(pod) && skipNodesWithLocalStorage { return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name) } if hasNotSafeToEvictAnnotation(pod) { @@ -250,16 +253,30 @@ func isPodTerminal(pod *apiv1.Pod) bool { return pod.Status.Phase == apiv1.PodFailed } -// HasLocalStorage returns true if pod has any local storage. -func HasLocalStorage(pod *apiv1.Pod) bool { +// HasBlockingLocalStorage returns true if pod has any local storage +// without pod annotation `: ,...` +func HasBlockingLocalStorage(pod *apiv1.Pod) bool { + isNonBlocking := getNonBlockingVolumes(pod) for _, volume := range pod.Spec.Volumes { - if isLocalVolume(&volume) { + if isLocalVolume(&volume) && !isNonBlocking[volume.Name] { return true } } return false } +func getNonBlockingVolumes(pod *apiv1.Pod) map[string]bool { + isNonBlocking := map[string]bool{} + annotationVal := pod.GetAnnotations()[SafeToEvictLocalVolumesKey] + if annotationVal != "" { + vols := strings.Split(annotationVal, ",") + for _, v := range vols { + isNonBlocking[v] = true + } + } + return isNonBlocking +} + func isLocalVolume(volume *apiv1.Volume) bool { return volume.HostPath != nil || volume.EmptyDir != nil } diff --git a/cluster-autoscaler/utils/drain/drain_test.go b/cluster-autoscaler/utils/drain/drain_test.go index f6e5de7a7638..8042d51e3cfd 100644 --- a/cluster-autoscaler/utils/drain/drain_test.go +++ b/cluster-autoscaler/utils/drain/drain_test.go @@ -210,6 +210,178 @@ func TestDrain(t *testing.T) { }, } + emptyDirSafeToEvictVolumeSingleVal := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "scratch", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValEmpty := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValNonMatching := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValNonMatching := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValEmpty := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + SafeToEvictLocalVolumesKey: ",", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + terminalPod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -489,6 +661,76 @@ func TestDrain(t *testing.T) { expectBlockingPod: &BlockingPod{Pod: emptydirPod, Reason: LocalStorageRequested}, expectDaemonSetPods: []*apiv1.Pod{}, }, + { + description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", + pods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: false, + expectPods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, + expectBlockingPod: &BlockingPod{}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValEmpty}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: true, + expectPods: []*apiv1.Pod{}, + expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, Reason: LocalStorageRequested}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValNonMatching}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: true, + expectPods: []*apiv1.Pod{}, + expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, Reason: LocalStorageRequested}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: false, + expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + expectBlockingPod: &BlockingPod{}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValNonMatching}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: true, + expectPods: []*apiv1.Pod{}, + expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, Reason: LocalStorageRequested}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: true, + expectPods: []*apiv1.Pod{}, + expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, Reason: LocalStorageRequested}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, + { + description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValEmpty}, + pdbs: []*policyv1.PodDisruptionBudget{}, + rcs: []*apiv1.ReplicationController{&rc}, + expectFatal: true, + expectPods: []*apiv1.Pod{}, + expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, Reason: LocalStorageRequested}, + expectDaemonSetPods: []*apiv1.Pod{}, + }, { description: "failed pod", pods: []*apiv1.Pod{failedPod},