diff --git a/cluster-autoscaler/simulator/utilization/info.go b/cluster-autoscaler/simulator/utilization/info.go index 30c78cb95f29..54365bf84715 100644 --- a/cluster-autoscaler/simulator/utilization/info.go +++ b/cluster-autoscaler/simulator/utilization/info.go @@ -26,6 +26,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + resourcehelper "k8s.io/kubernetes/pkg/api/v1/resource" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" klog "k8s.io/klog/v2" @@ -89,40 +90,37 @@ func CalculateUtilizationOfResource(nodeInfo *schedulerframework.NodeInfo, resou if nodeAllocatable.MilliValue() == 0 { return 0, fmt.Errorf("%v is 0 at %s", resourceName, nodeInfo.Node().Name) } - podsRequest := resource.MustParse("0") + + opts := resourcehelper.PodResourcesOptions{} // if skipDaemonSetPods = True, DaemonSet pods resourses will be subtracted // from the node allocatable and won't be added to pods requests // the same with the Mirror pod. + podsRequest := resource.MustParse("0") daemonSetAndMirrorPodsUtilization := resource.MustParse("0") for _, podInfo := range nodeInfo.Pods { + requestedResourceList := resourcehelper.PodRequests(podInfo.Pod, opts) + resourceValue := requestedResourceList[resourceName] + // factor daemonset pods out of the utilization calculations if skipDaemonSetPods && pod_util.IsDaemonSetPod(podInfo.Pod) { - for _, container := range podInfo.Pod.Spec.Containers { - if resourceValue, found := container.Resources.Requests[resourceName]; found { - daemonSetAndMirrorPodsUtilization.Add(resourceValue) - } - } + daemonSetAndMirrorPodsUtilization.Add(resourceValue) continue } + // factor mirror pods out of the utilization calculations if skipMirrorPods && pod_util.IsMirrorPod(podInfo.Pod) { - for _, container := range podInfo.Pod.Spec.Containers { - if resourceValue, found := container.Resources.Requests[resourceName]; found { - daemonSetAndMirrorPodsUtilization.Add(resourceValue) - } - } + daemonSetAndMirrorPodsUtilization.Add(resourceValue) continue } + // ignore Pods that should be terminated if drain.IsPodLongTerminating(podInfo.Pod, currentTime) { continue } - for _, container := range podInfo.Pod.Spec.Containers { - if resourceValue, found := container.Resources.Requests[resourceName]; found { - podsRequest.Add(resourceValue) - } - } + + podsRequest.Add(resourceValue) } + return float64(podsRequest.MilliValue()) / float64(nodeAllocatable.MilliValue()-daemonSetAndMirrorPodsUtilization.MilliValue()), nil } diff --git a/cluster-autoscaler/simulator/utilization/info_test.go b/cluster-autoscaler/simulator/utilization/info_test.go index d6d697bd1a35..d68e92f2b882 100644 --- a/cluster-autoscaler/simulator/utilization/info_test.go +++ b/cluster-autoscaler/simulator/utilization/info_test.go @@ -21,6 +21,7 @@ import ( "time" apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/kubernetes/pkg/kubelet/types" @@ -34,6 +35,43 @@ func TestCalculate(t *testing.T) { pod := BuildTestPod("p1", 100, 200000) pod2 := BuildTestPod("p2", -1, -1) + podWithInitContainers := BuildTestPod("p-init", 100, 200000) + restartAlways := apiv1.ContainerRestartPolicyAlways + podWithInitContainers.Spec.InitContainers = []apiv1.Container{ + // restart always + { + Resources: apiv1.ResourceRequirements{ + Requests: apiv1.ResourceList{ + apiv1.ResourceCPU: *resource.NewMilliQuantity(50, resource.DecimalSI), + apiv1.ResourceMemory: *resource.NewQuantity(100000, resource.DecimalSI), + }, + }, + RestartPolicy: &restartAlways, + }, + // non-restartable, should be excluded from calculations + { + Resources: apiv1.ResourceRequirements{ + Requests: apiv1.ResourceList{ + apiv1.ResourceCPU: *resource.NewMilliQuantity(5, resource.DecimalSI), + apiv1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + }, + } + + podWithLargeNonRestartableInitContainers := BuildTestPod("p-large-init", 100, 200000) + podWithLargeNonRestartableInitContainers.Spec.InitContainers = []apiv1.Container{ + // large non-restartable should "overwhelm" the pod utilization calc + // see formula: https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#resources-calculation-for-scheduling-and-pod-admission + { + Resources: apiv1.ResourceRequirements{ + Requests: apiv1.ResourceList{ + apiv1.ResourceCPU: *resource.NewMilliQuantity(50000, resource.DecimalSI), + apiv1.ResourceMemory: *resource.NewQuantity(100000000, resource.DecimalSI), + }, + }, + }, + } node := BuildTestNode("node1", 2000, 2000000) SetNodeReadyState(node, true, time.Time{}) nodeInfo := newNodeInfo(node, pod, pod, pod2) @@ -42,14 +80,25 @@ func TestCalculate(t *testing.T) { utilInfo, err := Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) + assert.Equal(t, 0.1, utilInfo.CpuUtil) - node2 := BuildTestNode("node1", 2000, -1) + node2 := BuildTestNode("node2", 2000, -1) nodeInfo = newNodeInfo(node2, pod, pod, pod2) gpuConfig = GetGpuConfigFromNode(nodeInfo.Node()) _, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) assert.Error(t, err) + node3 := BuildTestNode("node3", 2000, 2000000) + SetNodeReadyState(node3, true, time.Time{}) + nodeInfo = newNodeInfo(node3, pod, podWithInitContainers, podWithLargeNonRestartableInitContainers) + + gpuConfig = GetGpuConfigFromNode(nodeInfo.Node()) + utilInfo, err = Calculate(nodeInfo, false, false, gpuConfig, testTime) + assert.NoError(t, err) + assert.InEpsilon(t, 50.25, utilInfo.Utilization, 0.01) + assert.Equal(t, 25.125, utilInfo.CpuUtil) + daemonSetPod3 := BuildTestPod("p3", 100, 200000) daemonSetPod3.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "")