From 90114ebabfb7f1b60222c2660ca02dc6f924e675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 21 Nov 2024 19:35:07 +0100 Subject: [PATCH] CA: implement DRA integration tests for StaticAutoscaler --- .../cloudprovider/test/test_cloud_provider.go | 5 + .../core/static_autoscaler_dra_test.go | 777 ++++++++++++++++++ .../core/static_autoscaler_test.go | 111 ++- cluster-autoscaler/core/test/common.go | 4 +- .../testsnapshot/test_snapshot.go | 36 +- 5 files changed, 873 insertions(+), 60 deletions(-) create mode 100644 cluster-autoscaler/core/static_autoscaler_dra_test.go diff --git a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go index 4ab9d114a0b5..cb90548581b2 100644 --- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go @@ -297,6 +297,11 @@ func (tcp *TestCloudProvider) SetResourceLimiter(resourceLimiter *cloudprovider. tcp.resourceLimiter = resourceLimiter } +// SetMachineTemplates sets template NodeInfos per-machine-type. +func (tcp *TestCloudProvider) SetMachineTemplates(machineTemplates map[string]*framework.NodeInfo) { + tcp.machineTemplates = machineTemplates +} + // Cleanup this is a function to close resources associated with the cloud provider func (tcp *TestCloudProvider) Cleanup() error { return nil diff --git a/cluster-autoscaler/core/static_autoscaler_dra_test.go b/cluster-autoscaler/core/static_autoscaler_dra_test.go new file mode 100644 index 000000000000..779b3f51d579 --- /dev/null +++ b/cluster-autoscaler/core/static_autoscaler_dra_test.go @@ -0,0 +1,777 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "flag" + "fmt" + "math" + "slices" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + resourceapi "k8s.io/api/resource/v1beta1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apiserver/pkg/util/feature" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/context" + scaledownstatus "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status" + "k8s.io/autoscaler/cluster-autoscaler/estimator" + "k8s.io/autoscaler/cluster-autoscaler/processors/status" + "k8s.io/autoscaler/cluster-autoscaler/simulator" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + featuretesting "k8s.io/component-base/featuregate/testing" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" +) + +const ( + exampleDriver = "dra.example.com" + + gpuDevice = "gpuDevice" + gpuAttribute = "gpuType" + gpuTypeA = "gpuA" + gpuTypeB = "gpuB" + + nicDevice = "nicDevice" + nicAttribute = "nicType" + nicTypeA = "nicA" + + globalDevice = "globalDev" + globalDevAttribute = "globalDevType" + globalDevTypeA = "globalDevA" +) + +var ( + defaultDeviceClass = &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default-class", + Namespace: "default", + }, + Spec: resourceapi.DeviceClassSpec{ + Selectors: nil, + Config: nil, + }, + } +) + +type fakeScaleUpStatusProcessor struct { + lastStatus *status.ScaleUpStatus +} + +func (f *fakeScaleUpStatusProcessor) Process(context *context.AutoscalingContext, status *status.ScaleUpStatus) { + f.lastStatus = status +} + +func (f *fakeScaleUpStatusProcessor) CleanUp() { +} + +type fakeScaleDownStatusProcessor struct { + lastStatus *scaledownstatus.ScaleDownStatus +} + +func (f *fakeScaleDownStatusProcessor) Process(context *context.AutoscalingContext, status *scaledownstatus.ScaleDownStatus) { + f.lastStatus = status +} + +func (f *fakeScaleDownStatusProcessor) CleanUp() { +} + +type testDeviceRequest struct { + name string + selectors []string + count int64 + all bool +} + +type testDevice struct { + name string + attributes map[string]string + capacity map[string]string +} + +type testAllocation struct { + request string + driver string + pool string + device string +} + +type sliceNodeAvailability struct { + node string + nodes []string + all bool +} + +type testPod struct { + pod *apiv1.Pod + claims []*resourceapi.ResourceClaim +} + +type testNodeGroupDef struct { + name string + cpu, mem int64 + slicesTemplateFunc func(nodeName string) []*resourceapi.ResourceSlice +} + +type noScaleUpDef struct { + podName string + podNamePrefix string + podCount int +} + +type noScaleDownDef struct { + nodeName string + nodeNamePrefix string + nodeCount int + reason simulator.UnremovableReason +} + +func TestStaticAutoscalerDynamicResources(t *testing.T) { + // Uncomment to get logs from the DRA plugin. + var fs flag.FlagSet + klog.InitFlags(&fs) + assert.NoError(t, fs.Set("v", "5")) + + featuretesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.DynamicResourceAllocation, true) + + now := time.Now() + + node1GpuA1slice := &testNodeGroupDef{name: "node1GpuA1slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 1, 0, []testDevice{{name: gpuDevice + "-0", attributes: map[string]string{gpuAttribute: gpuTypeA}}})} + node1GpuB1slice := &testNodeGroupDef{name: "node1GpuB1slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 1, 0, []testDevice{{name: gpuDevice + "-0", attributes: map[string]string{gpuAttribute: gpuTypeB}}})} + node3GpuA1slice := &testNodeGroupDef{name: "node3GpuA1slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 1, 0, testDevices(gpuDevice, 3, map[string]string{gpuAttribute: gpuTypeA}, nil))} + node3GpuA3slice := &testNodeGroupDef{name: "node3GpuA3slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 3, 0, testDevices(gpuDevice, 3, map[string]string{gpuAttribute: gpuTypeA}, nil))} + node1Nic1slice := &testNodeGroupDef{name: "node1Nic1slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 1, 0, []testDevice{{name: nicDevice + "-0", attributes: map[string]string{nicAttribute: nicTypeA}}})} + node1Gpu1Nic1slice := &testNodeGroupDef{name: "node1Gpu1Nic1slice", cpu: 1000, mem: 1000, slicesTemplateFunc: nodeTemplateResourceSlices(exampleDriver, 1, 0, []testDevice{ + {name: gpuDevice + "-0", attributes: map[string]string{gpuAttribute: gpuTypeA}}, + {name: nicDevice + "-0", attributes: map[string]string{nicAttribute: nicTypeA}}, + })} + + baseBigPod := BuildTestPod("", 600, 100) + baseSmallPod := BuildTestPod("", 100, 100) + + req1GpuA := testDeviceRequest{name: "req1GpuA", count: 1, selectors: singleAttrSelector(exampleDriver, gpuAttribute, gpuTypeA)} + req2GpuA := testDeviceRequest{name: "req2GpuA", count: 2, selectors: singleAttrSelector(exampleDriver, gpuAttribute, gpuTypeA)} + req1GpuB := testDeviceRequest{name: "req1GpuB", count: 1, selectors: singleAttrSelector(exampleDriver, gpuAttribute, gpuTypeB)} + req1Nic := testDeviceRequest{name: "req1Nic", count: 1, selectors: singleAttrSelector(exampleDriver, nicAttribute, nicTypeA)} + req1Global := testDeviceRequest{name: "req1Global", count: 1, selectors: singleAttrSelector(exampleDriver, globalDevAttribute, globalDevTypeA)} + + sharedGpuBClaim := testResourceClaim("sharedGpuBClaim", nil, "", []testDeviceRequest{req1GpuB}, nil, nil) + sharedAllocatedGlobalClaim := testResourceClaim("sharedGlobalClaim", nil, "", []testDeviceRequest{req1Global}, []testAllocation{{request: req1Global.name, driver: exampleDriver, pool: "global-pool", device: globalDevice + "-0"}}, nil) + + testCases := map[string]struct { + nodeGroups map[*testNodeGroupDef]int + templatePods map[string][]testPod + pods []testPod + extraResourceSlices []*resourceapi.ResourceSlice + extraResourceClaims []*resourceapi.ResourceClaim + expectedScaleUps map[string]int + expectedScaleDowns map[string][]string + expectedNoScaleUps []noScaleUpDef + expectedNoScaleDowns []noScaleDownDef + }{ + "scale-up: one pod per node, one device per node": { + // 1xGPU nodes, pods requesting 1xGPU: 1 scheduled, 3 unschedulable -> 3 nodes needed + nodeGroups: map[*testNodeGroupDef]int{node1GpuA1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA}), + scheduledPod(baseSmallPod, "scheduled-0", node1GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + ), + expectedScaleUps: map[string]int{node1GpuA1slice.name: 3}, + }, + "scale-up: multiple pods per node, pods requesting one device": { + // 3xGPU nodes, pods requesting 1xGPU: 2 scheduled, 10 unschedulable -> 3 nodes needed + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 10, []testDeviceRequest{req1GpuA}), + scheduledPod(baseSmallPod, "scheduled-0", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + scheduledPod(baseSmallPod, "scheduled-1", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-1"}}), + ), + expectedScaleUps: map[string]int{node3GpuA1slice.name: 3}, + }, + "scale-up: multiple pods per node, pods requesting multiple identical devices": { + // 3xGPU nodes, 1 scheduled pod requesting 1xGPU, 4 unschedulable pods requesting 2xGPU -> 3 nodes needed" + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 4, []testDeviceRequest{req2GpuA}), + scheduledPod(baseSmallPod, "scheduled-0", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + ), + expectedScaleUps: map[string]int{node3GpuA1slice.name: 3}, + }, + "scale-up: multiple devices in one slice work correctly": { + // 3xGPU nodes, 1 scheduled pod requesting 2xGPU, 5 unschedulable pods requesting 1xGPU -> 2 nodes needed" + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 5, []testDeviceRequest{req1GpuA}), + scheduledPod(baseSmallPod, "scheduled-0", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + ), + expectedScaleUps: map[string]int{node3GpuA1slice.name: 2}, + }, + "scale-up: multiple devices in multiple slices work correctly": { + // 3xGPU nodes, 1 scheduled pod requesting 2xGPU, 5 unschedulable pods requesting 1xGPU -> 2 nodes needed" + nodeGroups: map[*testNodeGroupDef]int{node3GpuA3slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 5, []testDeviceRequest{req1GpuA}), + scheduledPod(baseSmallPod, "scheduled-0", node3GpuA3slice.name+"-0", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + ), + expectedScaleUps: map[string]int{node3GpuA3slice.name: 2}, + }, + "scale-up: one pod per node, pods requesting multiple different devices": { + nodeGroups: map[*testNodeGroupDef]int{node1Gpu1Nic1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA, req1Nic}), + scheduledPod(baseSmallPod, "scheduled-0", node1Gpu1Nic1slice.name+"-0", map[*testDeviceRequest][]string{&req1Nic: {nicDevice + "-0"}, &req1GpuA: {gpuDevice + "-0"}}), + ), + expectedScaleUps: map[string]int{node1Gpu1Nic1slice.name: 3}, + }, + "scale-up: scale from 0 nodes in a node group": { + nodeGroups: map[*testNodeGroupDef]int{node1Gpu1Nic1slice: 0}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA, req1Nic}), + ), + expectedScaleUps: map[string]int{node1Gpu1Nic1slice.name: 3}, + }, + "scale-up: scale from 0 nodes in a node group, with pods on the template nodes consuming DRA resources": { + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 0}, + templatePods: map[string][]testPod{ + node3GpuA1slice.name: { + scheduledPod(baseSmallPod, "template-0", node3GpuA1slice.name+"-template", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + scheduledPod(baseSmallPod, "template-1", node3GpuA1slice.name+"-template", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-1"}}), + }, + }, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA}), + ), + expectedScaleUps: map[string]int{node3GpuA1slice.name: 3}, + }, + "scale-up: scale from 0 nodes in a node group, with pods on the template nodes consuming DRA resources, including shared claims": { + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 0}, + extraResourceClaims: []*resourceapi.ResourceClaim{sharedAllocatedGlobalClaim}, + templatePods: map[string][]testPod{ + node3GpuA1slice.name: { + scheduledPod(baseSmallPod, "template-0", node3GpuA1slice.name+"-template", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}, sharedAllocatedGlobalClaim), + scheduledPod(baseSmallPod, "template-1", node3GpuA1slice.name+"-template", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-1"}}, sharedAllocatedGlobalClaim), + }, + }, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA}, sharedAllocatedGlobalClaim), + ), + expectedScaleUps: map[string]int{node3GpuA1slice.name: 3}, + }, + "no scale-up: pods requesting multiple different devices, but they're on different nodes": { + nodeGroups: map[*testNodeGroupDef]int{node1GpuA1slice: 1, node1Nic1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 3, []testDeviceRequest{req1GpuA, req1Nic}), + ), + }, + "scale-up: pods requesting a shared, unallocated claim": { + extraResourceClaims: []*resourceapi.ResourceClaim{sharedGpuBClaim}, + nodeGroups: map[*testNodeGroupDef]int{node1GpuB1slice: 1}, + pods: append( + unscheduledPods(baseSmallPod, "unschedulable", 13, nil, sharedGpuBClaim), + scheduledPod(baseSmallPod, "scheduled-0", node1GpuB1slice.name+"-0", map[*testDeviceRequest][]string{&req1GpuB: {gpuDevice + "-0"}}), + ), + // All pods request a shared claim to a node-local resource - only 1 node can work. + expectedScaleUps: map[string]int{node1GpuB1slice.name: 1}, + // The claim is bound to a node, and the node only fits 10 pods because of CPU. The 3 extra pods shouldn't trigger a scale-up. + expectedNoScaleUps: []noScaleUpDef{{podNamePrefix: "unschedulable", podCount: 3}}, + }, + "scale-down: empty single-device nodes": { + nodeGroups: map[*testNodeGroupDef]int{node1GpuA1slice: 3}, + pods: []testPod{ + scheduledPod(baseBigPod, "scheduled-0", node1GpuA1slice.name+"-1", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + }, + expectedScaleDowns: map[string][]string{node1GpuA1slice.name: {node1GpuA1slice.name + "-0", node1GpuA1slice.name + "-2"}}, + expectedNoScaleDowns: []noScaleDownDef{{nodeName: node1GpuA1slice.name + "-1", reason: simulator.NotUnderutilized}}, + }, + "scale-down: single-device nodes with drain": { + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 3}, + pods: []testPod{ + scheduledPod(baseSmallPod, "scheduled-0", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + scheduledPod(baseSmallPod, "scheduled-1", node3GpuA1slice.name+"-1", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + scheduledPod(baseSmallPod, "scheduled-2", node3GpuA1slice.name+"-2", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + scheduledPod(baseSmallPod, "scheduled-3", node3GpuA1slice.name+"-2", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-1"}}), + }, + expectedScaleDowns: map[string][]string{node3GpuA1slice.name: {node3GpuA1slice.name + "-2"}}, + expectedNoScaleDowns: []noScaleDownDef{ + {nodeName: node1GpuA1slice.name + "-0", reason: simulator.NoPlaceToMovePods}, + {nodeName: node1GpuA1slice.name + "-1", reason: simulator.NoPlaceToMovePods}, + }, + }, + "no scale-down: no place to reschedule": { + nodeGroups: map[*testNodeGroupDef]int{node3GpuA1slice: 3}, + pods: []testPod{ + scheduledPod(baseBigPod, "scheduled-0", node3GpuA1slice.name+"-0", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + scheduledPod(baseBigPod, "scheduled-1", node3GpuA1slice.name+"-1", map[*testDeviceRequest][]string{&req2GpuA: {gpuDevice + "-0", gpuDevice + "-1"}}), + scheduledPod(baseBigPod, "scheduled-2", node3GpuA1slice.name+"-1", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-2"}}), + scheduledPod(baseSmallPod, "scheduled-3", node3GpuA1slice.name+"-2", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-0"}}), + scheduledPod(baseSmallPod, "scheduled-4", node3GpuA1slice.name+"-2", map[*testDeviceRequest][]string{&req1GpuA: {gpuDevice + "-1"}}), + }, + expectedNoScaleDowns: []noScaleDownDef{ + {nodeName: node3GpuA1slice.name + "-0", reason: simulator.NoPlaceToMovePods}, + {nodeName: node3GpuA1slice.name + "-1", reason: simulator.NotUnderutilized}, + {nodeName: node3GpuA1slice.name + "-2", reason: simulator.NoPlaceToMovePods}, + }, + }, + } + + for tcName, tc := range testCases { + t.Run(tcName, func(t *testing.T) { + var nodeGroups []*nodeGroup + var allNodes []*apiv1.Node + allResourceSlices := tc.extraResourceSlices + for nodeGroupDef, count := range tc.nodeGroups { + var nodes []*apiv1.Node + for i := range count { + node := BuildTestNode(fmt.Sprintf("%s-%d", nodeGroupDef.name, i), nodeGroupDef.cpu, nodeGroupDef.mem) + SetNodeReadyState(node, true, now) + nodes = append(nodes, node) + if nodeGroupDef.slicesTemplateFunc != nil { + slicesForNode := nodeGroupDef.slicesTemplateFunc(node.Name) + allResourceSlices = append(allResourceSlices, slicesForNode...) + } + } + + templateNode := BuildTestNode(fmt.Sprintf("%s-template", nodeGroupDef.name), nodeGroupDef.cpu, nodeGroupDef.mem) + templateSlices := nodeGroupDef.slicesTemplateFunc(templateNode.Name) + templateNodeInfo := framework.NewNodeInfo(templateNode, templateSlices) + for _, pod := range tc.templatePods[nodeGroupDef.name] { + WithDSController()(pod.pod) + templateNodeInfo.AddPod(framework.NewPodInfo(pod.pod, pod.claims)) + } + + nodeGroups = append(nodeGroups, &nodeGroup{ + name: nodeGroupDef.name, + template: templateNodeInfo, + min: 0, + max: 10, + nodes: nodes, + }) + allNodes = append(allNodes, nodes...) + } + + var allPods []*apiv1.Pod + allResourceClaims := tc.extraResourceClaims + for _, pod := range tc.pods { + allPods = append(allPods, pod.pod) + allResourceClaims = append(allResourceClaims, pod.claims...) + } + + allExpectedScaleDowns := 0 + + mocks := newCommonMocks() + mocks.readyNodeLister.SetNodes(allNodes) + mocks.allNodeLister.SetNodes(allNodes) + mocks.daemonSetLister.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil) + mocks.podDisruptionBudgetLister.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil) + mocks.allPodLister.On("List").Return(allPods, nil) + for nodeGroup, delta := range tc.expectedScaleUps { + mocks.onScaleUp.On("ScaleUp", nodeGroup, delta).Return(nil).Once() + } + for nodeGroup, nodes := range tc.expectedScaleDowns { + for _, node := range nodes { + mocks.onScaleDown.On("ScaleDown", nodeGroup, node).Return(nil).Once() + allExpectedScaleDowns++ + } + } + mocks.resourceClaimLister = &fakeAllObjectsLister[*resourceapi.ResourceClaim]{objects: allResourceClaims} + mocks.resourceSliceLister = &fakeAllObjectsLister[*resourceapi.ResourceSlice]{objects: allResourceSlices} + mocks.deviceClassLister = &fakeAllObjectsLister[*resourceapi.DeviceClass]{objects: []*resourceapi.DeviceClass{defaultDeviceClass}} + + setupConfig := &autoscalerSetupConfig{ + autoscalingOptions: config.AutoscalingOptions{ + NodeGroupDefaults: config.NodeGroupAutoscalingOptions{ + ScaleDownUnneededTime: time.Minute, + ScaleDownUnreadyTime: time.Minute, + ScaleDownUtilizationThreshold: 0.7, + MaxNodeProvisionTime: time.Hour, + }, + EstimatorName: estimator.BinpackingEstimatorName, + MaxBinpackingTime: 1 * time.Hour, + MaxNodeGroupBinpackingDuration: 1 * time.Hour, + ScaleDownSimulationTimeout: 1 * time.Hour, + OkTotalUnreadyCount: 9999999, + MaxTotalUnreadyPercentage: 1.0, + ScaleDownEnabled: true, + MaxScaleDownParallelism: 10, + MaxDrainParallelism: 10, + NodeDeletionBatcherInterval: 0 * time.Second, + NodeDeleteDelayAfterTaint: 1 * time.Millisecond, + MaxNodesTotal: 1000, + MaxCoresTotal: 1000, + MaxMemoryTotal: 100000000, + ScaleUpFromZero: true, + DynamicResourceAllocationEnabled: true, + }, + nodeGroups: nodeGroups, + nodeStateUpdateTime: now, + mocks: mocks, + optionsBlockDefaulting: true, + nodesDeleted: make(chan bool, allExpectedScaleDowns), + } + + autoscaler, err := setupAutoscaler(setupConfig) + assert.NoError(t, err) + + scaleUpProcessor := &fakeScaleUpStatusProcessor{} + scaleDownProcessor := &fakeScaleDownStatusProcessor{} + autoscaler.processors.ScaleUpStatusProcessor = scaleUpProcessor + autoscaler.processors.ScaleDownStatusProcessor = scaleDownProcessor + + if len(tc.expectedScaleDowns) > 0 { + err = autoscaler.RunOnce(now) + assert.NoError(t, err) + } + + err = autoscaler.RunOnce(now.Add(2 * time.Minute)) + assert.NoError(t, err) + + for range allExpectedScaleDowns { + select { + case <-setupConfig.nodesDeleted: + return + case <-time.After(20 * time.Second): + t.Fatalf("Node deletes not finished") + } + } + + if len(tc.expectedNoScaleUps) > 0 || len(tc.expectedNoScaleDowns) > 0 { + err = autoscaler.RunOnce(now.Add(4 * time.Minute)) + assert.NoError(t, err) + + for _, noScaleUp := range tc.expectedNoScaleUps { + assertNoScaleUpReported(t, scaleUpProcessor.lastStatus, noScaleUp) + } + for _, noScaleDown := range tc.expectedNoScaleDowns { + assertNoScaleDownReported(t, scaleDownProcessor.lastStatus, noScaleDown) + } + } + + mock.AssertExpectationsForObjects(t, setupConfig.mocks.allPodLister, + setupConfig.mocks.podDisruptionBudgetLister, setupConfig.mocks.daemonSetLister, setupConfig.mocks.onScaleUp, setupConfig.mocks.onScaleDown) + }) + } +} + +func assertNoScaleUpReported(t *testing.T, status *status.ScaleUpStatus, wantNoScaleUp noScaleUpDef) { + matchingPrefix := 0 + for _, noScaleUpPod := range status.PodsRemainUnschedulable { + if wantNoScaleUp.podName != "" && wantNoScaleUp.podName == noScaleUpPod.Pod.Name { + return + } + if wantNoScaleUp.podNamePrefix != "" && strings.HasPrefix(noScaleUpPod.Pod.Name, wantNoScaleUp.podNamePrefix) { + matchingPrefix++ + } + } + assert.Equal(t, wantNoScaleUp.podCount, matchingPrefix) +} + +func assertNoScaleDownReported(t *testing.T, status *scaledownstatus.ScaleDownStatus, wantNoScaleDown noScaleDownDef) { + matchingPrefix := 0 + for _, unremovableNode := range status.UnremovableNodes { + if wantNoScaleDown.nodeName != "" && wantNoScaleDown.nodeName == unremovableNode.Node.Name { + assert.Equal(t, wantNoScaleDown.reason, unremovableNode.Reason) + return + } + if wantNoScaleDown.nodeNamePrefix != "" && strings.HasPrefix(unremovableNode.Node.Name, wantNoScaleDown.nodeNamePrefix) { + assert.Equal(t, wantNoScaleDown.reason, unremovableNode.Reason) + matchingPrefix++ + } + } + assert.Equal(t, wantNoScaleDown.nodeCount, matchingPrefix) +} + +func singleAttrSelector(driver, attribute, value string) []string { + return []string{fmt.Sprintf("device.attributes[%q].%s == %q", driver, attribute, value)} +} + +func unscheduledPods(basePod *apiv1.Pod, podBaseName string, podCount int, requests []testDeviceRequest, sharedClaims ...*resourceapi.ResourceClaim) []testPod { + var result []testPod + for i := range podCount { + pod := unscheduledPod(basePod, fmt.Sprintf("%s-%d", podBaseName, i), podBaseName, requests, sharedClaims...) + result = append(result, pod) + } + return result +} + +func scheduledPod(basePod *apiv1.Pod, podName, nodeName string, requests map[*testDeviceRequest][]string, sharedClaims ...*resourceapi.ResourceClaim) testPod { + allocations := map[string][]testAllocation{} + var reqs []testDeviceRequest + for request, devices := range requests { + reqs = append(reqs, *request) + for _, device := range devices { + allocations[request.name] = append(allocations[request.name], testAllocation{ + request: request.name, + driver: exampleDriver, + pool: nodeName, + device: device, + }) + } + } + return createTestPod(basePod, podName, podName+"-controller", nodeName, len(requests), reqs, allocations, sharedClaims) +} + +func unscheduledPod(basePod *apiv1.Pod, podName, controllerName string, requests []testDeviceRequest, extraClaims ...*resourceapi.ResourceClaim) testPod { + pod := createTestPod(basePod, podName, controllerName, "", len(requests), requests, nil, extraClaims) + MarkUnschedulable()(pod.pod) + return pod +} + +func createTestPod(basePod *apiv1.Pod, podName, controllerName, nodeName string, claimCount int, requests []testDeviceRequest, allocations map[string][]testAllocation, sharedClaims []*resourceapi.ResourceClaim) testPod { + pod := basePod.DeepCopy() + pod.Name = podName + pod.UID = types.UID(podName) + pod.Spec.NodeName = nodeName + pod.OwnerReferences = GenerateOwnerReferences(controllerName, "ReplicaSet", "apps/v1", types.UID(controllerName)) + claims := resourceClaimsForPod(pod, nodeName, claimCount, requests, allocations) + for i, claim := range claims { + claimRef := fmt.Sprintf("claim-ref-%d", i) + claimTemplateName := fmt.Sprintf("%s-%s-template", pod.Name, claimRef) // For completeness only. + WithResourceClaim(claimRef, claim.Name, claimTemplateName)(pod) + } + for i, extraClaim := range sharedClaims { + WithResourceClaim(fmt.Sprintf("claim-ref-extra-%d", i), extraClaim.Name, "")(pod) + } + return testPod{pod: pod, claims: claims} +} + +func resourceClaimsForPod(pod *apiv1.Pod, nodeName string, claimCount int, requests []testDeviceRequest, allocations map[string][]testAllocation) []*resourceapi.ResourceClaim { + if claimCount == 0 || len(requests) == 0 { + return nil + } + + slices.SortFunc(requests, func(a, b testDeviceRequest) int { + if a.name < b.name { + return -1 + } + if a.name == b.name { + return 0 + } + return 1 + }) + + requestsPerClaim := int64(math.Ceil(float64(len(requests)) / float64(claimCount))) + + requestIndex := 0 + var claims []*resourceapi.ResourceClaim + for claimIndex := range claimCount { + name := fmt.Sprintf("%s-claim-%d", pod.Name, claimIndex) + + var claimRequests []testDeviceRequest + var claimAllocations []testAllocation + for range requestsPerClaim { + request := requests[requestIndex] + claimRequests = append(claimRequests, request) + if allocs, found := allocations[request.name]; found { + claimAllocations = append(claimAllocations, allocs...) + } + + requestIndex++ + if requestIndex >= len(requests) { + break + } + } + + claims = append(claims, testResourceClaim(name, pod, nodeName, claimRequests, claimAllocations, nil)) + } + + return claims +} + +func testResourceClaim(claimName string, owningPod *apiv1.Pod, nodeName string, requests []testDeviceRequest, allocations []testAllocation, reservedFor []*apiv1.Pod) *resourceapi.ResourceClaim { + var deviceRequests []resourceapi.DeviceRequest + for _, request := range requests { + var selectors []resourceapi.DeviceSelector + for _, selector := range request.selectors { + selectors = append(selectors, resourceapi.DeviceSelector{CEL: &resourceapi.CELDeviceSelector{Expression: selector}}) + } + deviceRequest := resourceapi.DeviceRequest{ + Name: request.name, + DeviceClassName: "default-class", + Selectors: selectors, + } + if request.all { + deviceRequest.AllocationMode = resourceapi.DeviceAllocationModeAll + } else { + deviceRequest.AllocationMode = resourceapi.DeviceAllocationModeExactCount + deviceRequest.Count = request.count + } + deviceRequests = append(deviceRequests, deviceRequest) + } + + claim := &resourceapi.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + UID: types.UID(claimName), + }, + Spec: resourceapi.ResourceClaimSpec{ + Devices: resourceapi.DeviceClaim{ + Requests: deviceRequests, + }, + }, + } + if owningPod != nil { + claim.OwnerReferences = GenerateOwnerReferences(owningPod.Name, "Pod", "v1", owningPod.UID) + } + if len(allocations) > 0 { + var deviceAllocations []resourceapi.DeviceRequestAllocationResult + for _, allocation := range allocations { + deviceAllocations = append(deviceAllocations, resourceapi.DeviceRequestAllocationResult{ + Driver: allocation.driver, + Request: allocation.request, + Pool: allocation.pool, + Device: allocation.device, + }) + } + var nodeSelector *apiv1.NodeSelector + if nodeName != "" { + nodeSelector = &apiv1.NodeSelector{ + NodeSelectorTerms: []apiv1.NodeSelectorTerm{{ + MatchFields: []apiv1.NodeSelectorRequirement{ + { + Key: "metadata.name", + Operator: apiv1.NodeSelectorOpIn, + Values: []string{nodeName}, + }, + }, + }}, + } + } + var podReservations []resourceapi.ResourceClaimConsumerReference + if owningPod != nil { + podReservations = []resourceapi.ResourceClaimConsumerReference{ + { + APIGroup: "", + Resource: "pods", + Name: owningPod.Name, + UID: owningPod.UID, + }, + } + } else { + for _, pod := range podReservations { + podReservations = append(podReservations, resourceapi.ResourceClaimConsumerReference{ + APIGroup: "", + Resource: "pods", + Name: pod.Name, + UID: pod.UID, + }) + } + } + claim.Status = resourceapi.ResourceClaimStatus{ + Allocation: &resourceapi.AllocationResult{ + Devices: resourceapi.DeviceAllocationResult{ + Results: deviceAllocations, + }, + NodeSelector: nodeSelector, + }, + ReservedFor: podReservations, + } + } + return claim +} + +func testDevices(namePrefix string, count int, attributes map[string]string, capacity map[string]string) []testDevice { + var result []testDevice + for i := range count { + result = append(result, testDevice{name: fmt.Sprintf("%s-%d", namePrefix, i), attributes: attributes, capacity: capacity}) + } + return result +} + +func nodeTemplateResourceSlices(driver string, poolSliceCount, poolGen int64, deviceDefs []testDevice) func(nodeName string) []*resourceapi.ResourceSlice { + return func(nodeName string) []*resourceapi.ResourceSlice { + return testResourceSlices(driver, nodeName, poolSliceCount, poolGen, sliceNodeAvailability{node: nodeName}, deviceDefs) + } +} + +func testResourceSlices(driver, poolName string, poolSliceCount, poolGen int64, avail sliceNodeAvailability, deviceDefs []testDevice) []*resourceapi.ResourceSlice { + var slices []*resourceapi.ResourceSlice + for sliceIndex := range poolSliceCount { + slice := &resourceapi.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-gen%d-slice%d", driver, poolName, poolGen, sliceIndex), + }, + Spec: resourceapi.ResourceSliceSpec{ + Driver: driver, + Pool: resourceapi.ResourcePool{ + Name: poolName, + Generation: poolGen, + ResourceSliceCount: poolSliceCount, + }, + }, + } + + if avail.node != "" { + slice.Spec.NodeName = avail.node + } else if avail.all { + slice.Spec.AllNodes = true + } else if len(avail.nodes) > 0 { + slice.Spec.NodeSelector = &apiv1.NodeSelector{ + NodeSelectorTerms: []apiv1.NodeSelectorTerm{ + {MatchFields: []apiv1.NodeSelectorRequirement{{Key: "metadata.name", Operator: apiv1.NodeSelectorOpIn, Values: avail.nodes}}}, + }, + } + } + + slices = append(slices, slice) + } + + var devices []resourceapi.Device + for _, deviceDef := range deviceDefs { + device := resourceapi.Device{ + Name: deviceDef.name, + Basic: &resourceapi.BasicDevice{ + Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{}, + Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{}, + }, + } + for name, val := range deviceDef.attributes { + val := val + device.Basic.Attributes[resourceapi.QualifiedName(driver+"/"+name)] = resourceapi.DeviceAttribute{StringValue: &val} + } + for name, quantity := range deviceDef.capacity { + device.Basic.Capacity[resourceapi.QualifiedName(name)] = resourceapi.DeviceCapacity{Value: resource.MustParse(quantity)} + } + devices = append(devices, device) + } + + devPerSlice := int64(math.Ceil(float64(len(devices)) / float64(poolSliceCount))) + addedToSlice := int64(0) + sliceIndex := 0 + for _, device := range devices { + if addedToSlice >= devPerSlice { + sliceIndex += 1 + addedToSlice = 0 + } + slice := slices[sliceIndex] + slice.Spec.Devices = append(slice.Spec.Devices, device) + addedToSlice += 1 + } + return slices +} diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index acb0df84e679..aabb4cde56f8 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -35,6 +35,7 @@ import ( appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" + resourceapi "k8s.io/api/resource/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" @@ -55,7 +56,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/estimator" "k8s.io/autoscaler/cluster-autoscaler/observers/loopstart" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" - "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates" @@ -63,6 +63,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" + draprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/provider" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" @@ -167,10 +168,11 @@ func setUpScaleDownActuator(ctx *context.AutoscalingContext, autoscalingOptions } type nodeGroup struct { - name string - nodes []*apiv1.Node - min int - max int + name string + nodes []*apiv1.Node + template *framework.NodeInfo + min int + max int } type scaleCall struct { ng string @@ -190,6 +192,17 @@ func (p *scaleDownStatusProcessorMock) Process(_ *context.AutoscalingContext, st func (p *scaleDownStatusProcessorMock) CleanUp() { } +type fakeAllObjectsLister[T any] struct { + objects []T +} + +func (l *fakeAllObjectsLister[T]) ListAll() ([]T, error) { + if l == nil { + return nil, nil + } + return l.objects, nil +} + type commonMocks struct { readyNodeLister *kube_util.TestNodeLister allNodeLister *kube_util.TestNodeLister @@ -198,6 +211,10 @@ type commonMocks struct { daemonSetLister *daemonSetListerMock nodeDeletionTracker *deletiontracker.NodeDeletionTracker + resourceClaimLister *fakeAllObjectsLister[*resourceapi.ResourceClaim] + resourceSliceLister *fakeAllObjectsLister[*resourceapi.ResourceSlice] + deviceClassLister *fakeAllObjectsLister[*resourceapi.DeviceClass] + onScaleUp *onScaleUpMock onScaleDown *onScaleDownMock } @@ -211,16 +228,20 @@ func newCommonMocks() *commonMocks { daemonSetLister: &daemonSetListerMock{}, onScaleUp: &onScaleUpMock{}, onScaleDown: &onScaleDownMock{}, + resourceClaimLister: &fakeAllObjectsLister[*resourceapi.ResourceClaim]{}, + resourceSliceLister: &fakeAllObjectsLister[*resourceapi.ResourceSlice]{}, + deviceClassLister: &fakeAllObjectsLister[*resourceapi.DeviceClass]{}, } } type autoscalerSetupConfig struct { - nodeGroups []*nodeGroup - nodeStateUpdateTime time.Time - autoscalingOptions config.AutoscalingOptions - clusterStateConfig clusterstate.ClusterStateRegistryConfig - mocks *commonMocks - nodesDeleted chan bool + nodeGroups []*nodeGroup + nodeStateUpdateTime time.Time + autoscalingOptions config.AutoscalingOptions + optionsBlockDefaulting bool + clusterStateConfig clusterstate.ClusterStateRegistryConfig + mocks *commonMocks + nodesDeleted chan bool } func setupCloudProvider(config *autoscalerSetupConfig) (*testprovider.TestCloudProvider, error) { @@ -232,7 +253,7 @@ func setupCloudProvider(config *autoscalerSetupConfig) (*testprovider.TestCloudP config.nodesDeleted <- true return ret }) - + nodeGroupTemplates := map[string]*framework.NodeInfo{} for _, ng := range config.nodeGroups { provider.AddNodeGroup(ng.name, ng.min, ng.max, len(ng.nodes)) for _, node := range ng.nodes { @@ -242,16 +263,23 @@ func setupCloudProvider(config *autoscalerSetupConfig) (*testprovider.TestCloudP if reflectedNg == nil { return nil, fmt.Errorf("Nodegroup '%v' found as nil after setting up cloud provider", ng.name) } + if ng.template != nil { + nodeGroupTemplates[ng.name] = ng.template + } } + provider.SetMachineTemplates(nodeGroupTemplates) return provider, nil } -func setupAutoscalingContext(opts config.AutoscalingOptions, provider cloudprovider.CloudProvider, processorCallbacks callbacks.ProcessorCallbacks) (context.AutoscalingContext, error) { - context, err := NewScaleTestAutoscalingContext(opts, &fake.Clientset{}, nil, provider, processorCallbacks, nil) - if err != nil { - return context, err - } - return context, nil +func applySaneDefaultOpts(ctx *context.AutoscalingContext) { + ctx.AutoscalingOptions.MaxScaleDownParallelism = 10 + ctx.AutoscalingOptions.MaxDrainParallelism = 1 + ctx.AutoscalingOptions.NodeDeletionBatcherInterval = 0 * time.Second + ctx.AutoscalingOptions.NodeDeleteDelayAfterTaint = 1 * time.Second + ctx.AutoscalingOptions.ScaleDownSimulationTimeout = 10 * time.Second + ctx.AutoscalingOptions.SkipNodesWithSystemPods = true + ctx.AutoscalingOptions.SkipNodesWithLocalStorage = true + ctx.AutoscalingOptions.SkipNodesWithCustomControllerPods = true } func setupAutoscaler(config *autoscalerSetupConfig) (*StaticAutoscaler, error) { @@ -265,44 +293,49 @@ func setupAutoscaler(config *autoscalerSetupConfig) (*StaticAutoscaler, error) { allNodes = append(allNodes, ng.nodes...) } - // Create context with mocked lister registry. + // Create all necessary autoscaler dependencies, applying the mocks from config. processorCallbacks := newStaticAutoscalerProcessorCallbacks() - - context, err := setupAutoscalingContext(config.autoscalingOptions, provider, processorCallbacks) - + listerRegistry := kube_util.NewListerRegistry(config.mocks.allNodeLister, config.mocks.readyNodeLister, config.mocks.allPodLister, + config.mocks.podDisruptionBudgetLister, config.mocks.daemonSetLister, nil, nil, nil, nil) + ctx, err := NewScaleTestAutoscalingContext(config.autoscalingOptions, &fake.Clientset{}, listerRegistry, provider, processorCallbacks, nil) if err != nil { return nil, err } + if !config.optionsBlockDefaulting { + // Apply sane default options that make testing scale-down etc. possible - if not explicitly stated in the config that this is not desired. + applySaneDefaultOpts(&ctx) + } - setUpScaleDownActuator(&context, config.autoscalingOptions) - - listerRegistry := kube_util.NewListerRegistry(config.mocks.allNodeLister, config.mocks.readyNodeLister, config.mocks.allPodLister, - config.mocks.podDisruptionBudgetLister, config.mocks.daemonSetLister, - nil, nil, nil, nil) - context.ListerRegistry = listerRegistry - - ngConfigProcesssor := nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.autoscalingOptions.NodeGroupDefaults) - - processors := processorstest.NewTestProcessors(&context) - - clusterState := clusterstate.NewClusterStateRegistry(provider, config.clusterStateConfig, context.LogRecorder, NewBackoff(), ngConfigProcesssor, processors.AsyncNodeGroupStateChecker) - + processors := processorstest.NewTestProcessors(&ctx) + clusterState := clusterstate.NewClusterStateRegistry(provider, config.clusterStateConfig, ctx.LogRecorder, NewBackoff(), processors.NodeGroupConfigProcessor, processors.AsyncNodeGroupStateChecker) clusterState.UpdateNodes(allNodes, nil, config.nodeStateUpdateTime) + processors.ScaleStateNotifier.Register(clusterState) - sdPlanner, sdActuator := newScaleDownPlannerAndActuator(&context, processors, clusterState, config.mocks.nodeDeletionTracker) suOrchestrator := orchestrator.New() + suOrchestrator.Initialize(&ctx, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{}) - suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{}) + deleteOptions := options.NewNodeDeleteOptions(ctx.AutoscalingOptions) + drainabilityRules := rules.Default(deleteOptions) + draProvider := draprovider.NewProvider(config.mocks.resourceClaimLister, config.mocks.resourceSliceLister, config.mocks.deviceClassLister) + nodeDeletionTracker := config.mocks.nodeDeletionTracker + if nodeDeletionTracker == nil { + nodeDeletionTracker = deletiontracker.NewNodeDeletionTracker(0 * time.Second) + } + ctx.ScaleDownActuator = actuation.NewActuator(&ctx, clusterState, nodeDeletionTracker, deleteOptions, drainabilityRules, processors.NodeGroupConfigProcessor, draProvider) + sdPlanner := planner.New(&ctx, processors, deleteOptions, drainabilityRules) + + processorCallbacks.scaleDownPlanner = sdPlanner autoscaler := &StaticAutoscaler{ - AutoscalingContext: &context, + AutoscalingContext: &ctx, clusterStateRegistry: clusterState, scaleDownPlanner: sdPlanner, - scaleDownActuator: sdActuator, + scaleDownActuator: ctx.ScaleDownActuator, scaleUpOrchestrator: suOrchestrator, processors: processors, loopStartNotifier: loopstart.NewObserversList(nil), processorCallbacks: processorCallbacks, + draProvider: draProvider, } return autoscaler, nil diff --git a/cluster-autoscaler/core/test/common.go b/cluster-autoscaler/core/test/common.go index 5abeffa27e85..9680901727b5 100644 --- a/cluster-autoscaler/core/test/common.go +++ b/cluster-autoscaler/core/test/common.go @@ -44,6 +44,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/labels" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" kube_client "k8s.io/client-go/kubernetes" @@ -178,7 +179,7 @@ func NewScaleTestAutoscalingContext( if debuggingSnapshotter == nil { debuggingSnapshotter = debuggingsnapshot.NewDebuggingSnapshotter(false) } - clusterSnapshot, err := testsnapshot.NewTestSnapshot() + clusterSnapshot, fwHandle, err := testsnapshot.NewTestSnapshotAndHandle() if err != nil { return context.AutoscalingContext{}, err } @@ -192,6 +193,7 @@ func NewScaleTestAutoscalingContext( }, CloudProvider: provider, ClusterSnapshot: clusterSnapshot, + FrameworkHandle: fwHandle, ExpanderStrategy: random.NewStrategy(), ProcessorCallbacks: processorCallbacks, DebuggingSnapshotter: debuggingSnapshotter, diff --git a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go index 6e94d6cb4ae0..04941b26c494 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go @@ -28,38 +28,34 @@ type testFailer interface { Fatalf(format string, args ...any) } -// NewTestSnapshot returns an instance of ClusterSnapshot that can be used in tests. -func NewTestSnapshot() (clustersnapshot.ClusterSnapshot, error) { - testFwHandle, err := framework.NewTestFrameworkHandle() - if err != nil { - return nil, err - } - return predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), testFwHandle, true), nil -} - // NewTestSnapshotOrDie returns an instance of ClusterSnapshot that can be used in tests. func NewTestSnapshotOrDie(t testFailer) clustersnapshot.ClusterSnapshot { - snapshot, err := NewTestSnapshot() + snapshot, _, err := NewTestSnapshotAndHandle() if err != nil { t.Fatalf("NewTestSnapshotOrDie: couldn't create test ClusterSnapshot: %v", err) } return snapshot } -// NewCustomTestSnapshot returns an instance of ClusterSnapshot with a specific ClusterSnapshotStore that can be used in tests. -func NewCustomTestSnapshot(snapshotStore clustersnapshot.ClusterSnapshotStore) (clustersnapshot.ClusterSnapshot, error) { - testFwHandle, err := framework.NewTestFrameworkHandle() - if err != nil { - return nil, err - } - return predicate.NewPredicateSnapshot(snapshotStore, testFwHandle, true), nil -} - // NewCustomTestSnapshotOrDie returns an instance of ClusterSnapshot with a specific ClusterSnapshotStore that can be used in tests. func NewCustomTestSnapshotOrDie(t testFailer, snapshotStore clustersnapshot.ClusterSnapshotStore) clustersnapshot.ClusterSnapshot { - result, err := NewCustomTestSnapshot(snapshotStore) + result, _, err := NewCustomTestSnapshotAndHandle(snapshotStore) if err != nil { t.Fatalf("NewCustomTestSnapshotOrDie: couldn't create test ClusterSnapshot: %v", err) } return result } + +// NewTestSnapshotAndHandle returns an instance of ClusterSnapshot and a framework handle that can be used in tests. +func NewTestSnapshotAndHandle() (clustersnapshot.ClusterSnapshot, *framework.Handle, error) { + return NewCustomTestSnapshotAndHandle(store.NewBasicSnapshotStore()) +} + +// NewCustomTestSnapshotAndHandle returns an instance of ClusterSnapshot with a specific ClusterSnapshotStore that can be used in tests. +func NewCustomTestSnapshotAndHandle(snapshotStore clustersnapshot.ClusterSnapshotStore) (clustersnapshot.ClusterSnapshot, *framework.Handle, error) { + testFwHandle, err := framework.NewTestFrameworkHandle() + if err != nil { + return nil, nil, err + } + return predicate.NewPredicateSnapshot(snapshotStore, testFwHandle, true), testFwHandle, nil +}