Skip to content

Commit

Permalink
Move CrashLoopBackOff container state from PodLifeTime to TooManyRest…
Browse files Browse the repository at this point in the history
…arts plugin
  • Loading branch information
a7i committed Jun 8, 2023
1 parent 9aad51f commit 59bb1fd
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 4 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,13 @@ include `podRestartThreshold`, which is the number of restarts (summed over all
should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
into that calculation.

You can also specify `states` parameter to **only** evict pods matching the following conditions:
- [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running`
- [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) of: `CrashLoopBackOff`

If a value for `states` or `podStatusPhases` is not specified,
Pods in any state (even `Running`) are considered for eviction.

**Parameters:**

|Name|Type|
Expand All @@ -569,6 +576,7 @@ into that calculation.
|`includingInitContainers`|bool|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`states`|list(string)|Only supported in v0.28+|

**Example:**

Expand Down
15 changes: 15 additions & 0 deletions examples/too-many-restarts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: "descheduler/v1alpha2"
kind: "DeschedulerPolicy"
profiles:
- name: ProfileName
pluginConfig:
- name: "RemovePodsHavingTooManyRestarts"
args:
podRestartThreshold: 100
includingInitContainers: true
states:
- CrashLoopBackOff
plugins:
deschedule:
enabled:
- "RemovePodsHavingTooManyRestarts"
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,28 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
excludedNamespaces = sets.New(tooManyRestartsArgs.Namespaces.Exclude...)
}

podFilter := podutil.WrapFilterFuncs(handle.Evictor().Filter, handle.Evictor().PreEvictionFilter)

if len(tooManyRestartsArgs.States) > 0 {
states := sets.New(tooManyRestartsArgs.States...)
podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
if states.Has(string(pod.Status.Phase)) {
return true
}

for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) {
return true
}
}

return false
})
}

// We can combine Filter and PreEvictionFilter since for this strategy it does not matter where we run PreEvictionFilter
podFilter, err := podutil.NewOptions().
WithFilter(podutil.WrapFilterFuncs(handle.Evictor().Filter, handle.Evictor().PreEvictionFilter)).
WithFilter(podFilter).
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(tooManyRestartsArgs.LabelSelector).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
node4 := test.BuildTestNode("node4", 200, 3000, 10, nil)
node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil)

pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil))

createRemovePodsHavingTooManyRestartsAgrs := func(
podRestartThresholds int32,
includingInitContainers bool,
Expand All @@ -126,6 +124,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxPodsToEvictPerNode *uint
maxNoOfPodsToEvictPerNamespace *uint
nodeFit bool
applyFunc func([]*v1.Pod)
}{
{
description: "All pods have total restarts under threshold, no pod evictions",
Expand Down Expand Up @@ -191,7 +190,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxNoOfPodsToEvictPerNamespace: &uint3,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tained, 0 pod evictions",
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tainted, 0 pod evictions",
args: createRemovePodsHavingTooManyRestartsAgrs(1, true),
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
Expand Down Expand Up @@ -222,10 +221,68 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
},
{
description: "pods are in CrashLoopBackOff with states=CrashLoopBackOff, 3 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
if len(pod.Status.ContainerStatuses) > 0 {
pod.Status.ContainerStatuses[0].State = v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CrashLoopBackOff"},
}
}
}
},
},
{
description: "pods without CrashLoopBackOff with states=CrashLoopBackOff, 0 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
},
{
description: "pods running with state=Running, 3 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
pod.Status.Phase = v1.PodRunning
}
},
},
{
description: "pods pending with state=Running, 0 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
pod.Status.Phase = v1.PodPending
}
},
},
}

for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
pods := append(
initPods(node1),
test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil),
test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil),
)
if tc.applyFunc != nil {
tc.applyFunc(pods)
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ type RemovePodsHavingTooManyRestartsArgs struct {
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
PodRestartThreshold int32 `json:"podRestartThreshold"`
IncludingInitContainers bool `json:"includingInitContainers"`
States []string `json:"states"`
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ package removepodshavingtoomanyrestarts
import (
"fmt"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
)

// ValidateRemovePodsHavingTooManyRestartsArgs validates RemovePodsHavingTooManyRestarts arguments
Expand All @@ -38,5 +40,17 @@ func ValidateRemovePodsHavingTooManyRestartsArgs(obj runtime.Object) error {
return fmt.Errorf("invalid PodsHavingTooManyRestarts threshold")
}

allowedStates := sets.New(
// Pod phases:
string(v1.PodRunning),

// Container state reasons:
"CrashLoopBackOff",
)

if !allowedStates.HasAll(args.States...) {
return fmt.Errorf("states must be one of %v", allowedStates.UnsortedList())
}

return nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package removepodshavingtoomanyrestarts

import (
"testing"

v1 "k8s.io/api/core/v1"
)

func TestValidateRemovePodsHavingTooManyRestartsArgs(t *testing.T) {
testCases := []struct {
description string
args *RemovePodsHavingTooManyRestartsArgs
expectError bool
}{
{
description: "valid arg, no errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{string(v1.PodRunning)},
},
expectError: false,
},
{
description: "invalid PodRestartThreshold arg, expects errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 0,
},
expectError: true,
},
{
description: "invalid States arg, expects errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{string(v1.PodFailed)},
},
expectError: true,
},
{
description: "allows CrashLoopBackOff state",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{"CrashLoopBackOff"},
},
expectError: false,
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := ValidateRemovePodsHavingTooManyRestartsArgs(tc.args)

hasError := err != nil
if tc.expectError != hasError {
t.Error("unexpected arg validation behavior")
}
})
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 59bb1fd

Please sign in to comment.