diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index dc8d50093221..21affaeae393 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -45,6 +45,10 @@ const ( MaxNodeStartupTime = 15 * time.Minute ) +var ( + errMaxNodeProvisionTimeProviderNotSet = errors.New("MaxNodeProvisionTimeProvider was not set in cluster state") +) + type maxNodeProvisionTimeProvider interface { // GetMaxNodeProvisionTime returns MaxNodeProvisionTime value that should be used for the given NodeGroup. GetMaxNodeProvisionTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error) @@ -143,7 +147,7 @@ type ClusterStateRegistry struct { } // NewClusterStateRegistry creates new ClusterStateRegistry. -func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff, maxNodeProvisionTimeProvider maxNodeProvisionTimeProvider) *ClusterStateRegistry { +func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff) *ClusterStateRegistry { emptyStatus := &api.ClusterAutoscalerStatus{ ClusterwideConditions: make([]api.ClusterAutoscalerCondition, 0), NodeGroupStatuses: make([]api.NodeGroupStatus, 0), @@ -167,7 +171,6 @@ func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config C cloudProviderNodeInstancesCache: utils.NewCloudProviderNodeInstancesCache(cloudProvider), interrupt: make(chan struct{}), scaleUpFailures: make(map[string][]ScaleUpFailure), - maxNodeProvisionTimeProvider: maxNodeProvisionTimeProvider, } } @@ -193,13 +196,21 @@ func (csr *ClusterStateRegistry) RegisterOrUpdateScaleUp(nodeGroup cloudprovider csr.registerOrUpdateScaleUpNoLock(nodeGroup, delta, currentTime) } +// RegisterProviders registers providers in the cluster state registry. +func (csr *ClusterStateRegistry) RegisterProviders(maxNodeProvisionTimeProvider maxNodeProvisionTimeProvider) { + csr.maxNodeProvisionTimeProvider = maxNodeProvisionTimeProvider +} + // MaxNodeProvisionTime returns MaxNodeProvisionTime value that should be used for the given NodeGroup. func (csr *ClusterStateRegistry) MaxNodeProvisionTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error) { + if csr.maxNodeProvisionTimeProvider == nil { + return 0, errMaxNodeProvisionTimeProviderNotSet + } return csr.maxNodeProvisionTimeProvider.GetMaxNodeProvisionTime(nodeGroup) } func (csr *ClusterStateRegistry) registerOrUpdateScaleUpNoLock(nodeGroup cloudprovider.NodeGroup, delta int, currentTime time.Time) { - maxNodeProvisionTime, err := csr.maxNodeProvisionTimeProvider.GetMaxNodeProvisionTime(nodeGroup) + maxNodeProvisionTime, err := csr.MaxNodeProvisionTime(nodeGroup) if err != nil { klog.Warningf("Couldn't update scale up request: failed to get maxNodeProvisionTime for node group %s: %w", nodeGroup.Id(), err) return @@ -605,7 +616,7 @@ func (csr *ClusterStateRegistry) updateReadinessStats(currentTime time.Time) { continue } perNgCopy := perNodeGroup[nodeGroup.Id()] - maxNodeProvisionTime, err := csr.maxNodeProvisionTimeProvider.GetMaxNodeProvisionTime(nodeGroup) + maxNodeProvisionTime, err := csr.MaxNodeProvisionTime(nodeGroup) if err != nil { klog.Warningf("Failed to get maxNodeProvisionTime for node %s in node group %s: %w", unregistered.Node.Name, nodeGroup.Id(), err) continue diff --git a/cluster-autoscaler/clusterstate/clusterstate_test.go b/cluster-autoscaler/clusterstate/clusterstate_test.go index 9ff4eea5006f..aedc25836714 100644 --- a/cluster-autoscaler/clusterstate/clusterstate_test.go +++ b/cluster-autoscaler/clusterstate/clusterstate_test.go @@ -72,8 +72,8 @@ func TestOKWithScaleUp(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(time.Minute)) clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 4, time.Now()) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) @@ -114,8 +114,8 @@ func TestEmptyOK(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now.Add(-5*time.Second)) assert.NoError(t, err) assert.True(t, clusterstate.IsClusterHealthy()) @@ -155,7 +155,8 @@ func TestOKOneUnreadyNode(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.True(t, clusterstate.IsClusterHealthy()) @@ -193,8 +194,8 @@ func TestNodeWithoutNodeGroupDontCrash(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{noNgNode}, nil, now) assert.NoError(t, err) assert.Empty(t, clusterstate.GetScaleUpFailures()) @@ -221,8 +222,8 @@ func TestOKOneUnreadyNodeWithScaleDownCandidate(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) clusterstate.UpdateScaleDownCandidates([]*apiv1.Node{ng1_1}, now) @@ -287,8 +288,8 @@ func TestMissingNodes(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.True(t, clusterstate.IsClusterHealthy()) @@ -330,8 +331,8 @@ func TestTooManyUnready(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.False(t, clusterstate.IsClusterHealthy()) @@ -360,8 +361,8 @@ func TestUnreadyLongAfterCreation(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Unready)) @@ -393,8 +394,8 @@ func TestNotStarted(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted)) @@ -431,7 +432,8 @@ func TestExpiredScaleUp(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), NewStaticMaxNodeProvisionTimeProvider(2*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(2 * time.Minute)) clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 4, now.Add(-3*time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) assert.NoError(t, err) @@ -456,8 +458,8 @@ func TestRegisterScaleDown(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) now := time.Now() @@ -526,8 +528,8 @@ func TestUpcomingNodes(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1, ng5_2}, nil, now) assert.NoError(t, err) assert.Empty(t, clusterstate.GetScaleUpFailures()) @@ -574,8 +576,8 @@ func TestTaintBasedNodeDeletion(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2}, nil, now) assert.NoError(t, err) assert.Empty(t, clusterstate.GetScaleUpFailures()) @@ -596,8 +598,8 @@ func TestIncorrectSize(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) now := time.Now() clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now.Add(-5*time.Minute)) incorrect := clusterstate.incorrectNodeGroupSizes["ng1"] @@ -633,8 +635,8 @@ func TestUnregisteredNodes(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(10*time.Second)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(10 * time.Second)) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, time.Now().Add(-time.Minute)) assert.NoError(t, err) @@ -683,8 +685,8 @@ func TestCloudProviderDeletedNodes(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(10*time.Second)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(10 * time.Second)) now.Add(time.Minute) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, noNgNode}, nil, now) @@ -885,8 +887,8 @@ func TestScaleUpBackoff(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(120*time.Second)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(120 * time.Second)) // After failed scale-up, node group should be still healthy, but should backoff from scale-ups clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 1, now.Add(-180*time.Second)) @@ -953,8 +955,8 @@ func TestGetClusterSize(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + }, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) // There are 2 actual nodes in 2 node groups with target sizes of 5 and 1. clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, notAutoscaledNode}, nil, now) @@ -1001,7 +1003,8 @@ func TestUpdateScaleUp(t *testing.T) { }, fakeLogRecorder, newBackoff(), - NewStaticMaxNodeProvisionTimeProvider(10*time.Second)) + ) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(10 * time.Second)) clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 100, now) assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Increase, 100) @@ -1039,7 +1042,8 @@ func TestScaleUpFailures(t *testing.T) { fakeClient := &fake.Clientset{} fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap") - clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{}, fakeLogRecorder, newBackoff(), NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{}, fakeLogRecorder, newBackoff()) + clusterstate.RegisterProviders(NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterstate.RegisterFailedScaleUp(provider.GetNodeGroup("ng1"), metrics.Timeout, now) clusterstate.RegisterFailedScaleUp(provider.GetNodeGroup("ng2"), metrics.Timeout, now) diff --git a/cluster-autoscaler/clusterstate/max_node_provision_time_provider.go b/cluster-autoscaler/clusterstate/providers/max_node_provision_time_provider.go similarity index 66% rename from cluster-autoscaler/clusterstate/max_node_provision_time_provider.go rename to cluster-autoscaler/clusterstate/providers/max_node_provision_time_provider.go index 2c37b9c0f911..076082ba7230 100644 --- a/cluster-autoscaler/clusterstate/max_node_provision_time_provider.go +++ b/cluster-autoscaler/clusterstate/providers/max_node_provision_time_provider.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clusterstate +package providers import ( "time" @@ -25,7 +25,7 @@ import ( ) // NewDefaultMaxNodeProvisionTimeProvider returns the default maxNodeProvisionTimeProvider which uses the NodeGroupConfigProcessor. -func NewDefaultMaxNodeProvisionTimeProvider(context *context.AutoscalingContext, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor) maxNodeProvisionTimeProvider { +func NewDefaultMaxNodeProvisionTimeProvider(context *context.AutoscalingContext, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor) *defultMaxNodeProvisionTimeProvider { return &defultMaxNodeProvisionTimeProvider{context: context, nodeGroupConfigProcessor: nodeGroupConfigProcessor} } @@ -38,17 +38,3 @@ type defultMaxNodeProvisionTimeProvider struct { func (p *defultMaxNodeProvisionTimeProvider) GetMaxNodeProvisionTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error) { return p.nodeGroupConfigProcessor.GetMaxNodeProvisionTime(p.context, nodeGroup) } - -// NewStaticMaxNodeProvisionTimeProvider returns static maxNodeProvisionTimeProvider which returns constant MaxNodeProvisionTime for every NodeGroup. Can be used for convenient testing. -func NewStaticMaxNodeProvisionTimeProvider(maxNodeProvisionTime time.Duration) maxNodeProvisionTimeProvider { - return &staticMaxNodeProvisionTimeProvider{maxNodeProvisionTime} -} - -type staticMaxNodeProvisionTimeProvider struct { - staticMaxNodeProvisionTime time.Duration -} - -// GetMaxNodeProvisionTime returns constant MaxNodeProvisionTime value that should be used for every NodeGroup. -func (p *staticMaxNodeProvisionTimeProvider) GetMaxNodeProvisionTime(cloudprovider.NodeGroup) (time.Duration, error) { - return p.staticMaxNodeProvisionTime, nil -} diff --git a/cluster-autoscaler/clusterstate/testutils.go b/cluster-autoscaler/clusterstate/testutils.go new file mode 100644 index 000000000000..df6004a6a966 --- /dev/null +++ b/cluster-autoscaler/clusterstate/testutils.go @@ -0,0 +1,37 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterstate + +import ( + "time" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" +) + +// NewMockMaxNodeProvisionTimeProvider returns static maxNodeProvisionTimeProvider which returns constant MaxNodeProvisionTime for every NodeGroup. +func NewMockMaxNodeProvisionTimeProvider(maxNodeProvisionTime time.Duration) *staticMockMaxNodeProvisionTimeProvider { + return &staticMockMaxNodeProvisionTimeProvider{maxNodeProvisionTime} +} + +type staticMockMaxNodeProvisionTimeProvider struct { + staticMaxNodeProvisionTime time.Duration +} + +// GetMaxNodeProvisionTime returns constant MaxNodeProvisionTime value that should be used for every NodeGroup. +func (p *staticMockMaxNodeProvisionTimeProvider) GetMaxNodeProvisionTime(cloudprovider.NodeGroup) (time.Duration, error) { + return p.staticMaxNodeProvisionTime, nil +} diff --git a/cluster-autoscaler/context/autoscaling_context.go b/cluster-autoscaler/context/autoscaling_context.go index ffee60eca5cb..040d0db8261e 100644 --- a/cluster-autoscaler/context/autoscaling_context.go +++ b/cluster-autoscaler/context/autoscaling_context.go @@ -18,6 +18,7 @@ package context import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/clusterstate" "k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown" @@ -60,6 +61,8 @@ type AutoscalingContext struct { ScaleDownActuator scaledown.Actuator // RemainingPdbTracker tracks the remaining pod disruption budget RemainingPdbTracker pdb.RemainingPdbTracker + // ClusterStateRegistry tracks the health of the node groups and pending scale-ups and scale-downs + ClusterStateRegistry *clusterstate.ClusterStateRegistry } // AutoscalingKubeClients contains all Kubernetes API clients, @@ -105,7 +108,9 @@ func NewAutoscalingContext( estimatorBuilder estimator.EstimatorBuilder, processorCallbacks processor_callbacks.ProcessorCallbacks, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter, - remainingPdbTracker pdb.RemainingPdbTracker) *AutoscalingContext { + remainingPdbTracker pdb.RemainingPdbTracker, + clusterStateRegistry *clusterstate.ClusterStateRegistry, +) *AutoscalingContext { return &AutoscalingContext{ AutoscalingOptions: options, CloudProvider: cloudProvider, @@ -117,6 +122,7 @@ func NewAutoscalingContext( ProcessorCallbacks: processorCallbacks, DebuggingSnapshotter: debuggingSnapshotter, RemainingPdbTracker: remainingPdbTracker, + ClusterStateRegistry: clusterStateRegistry, } } diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go index 93dfa7c01fe5..c9ad13faab57 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go @@ -805,7 +805,8 @@ func TestStartDeletion(t *testing.T) { if err != nil { t.Fatalf("Couldn't set up autoscaling context: %v", err) } - csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff()) + csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) for _, bucket := range tc.emptyNodes { for _, node := range bucket.Nodes { err := ctx.ClusterSnapshot.AddNodeWithPods(node, tc.pods[node.Name]) @@ -1061,7 +1062,8 @@ func TestStartDeletionInBatchBasic(t *testing.T) { if err != nil { t.Fatalf("Couldn't set up autoscaling context: %v", err) } - csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff()) + csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) ndt := deletiontracker.NewNodeDeletionTracker(0) ndb := NewNodeDeletionBatcher(&ctx, csr, ndt, deleteInterval) evictor := Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom} diff --git a/cluster-autoscaler/core/scaledown/actuation/delete_in_batch_test.go b/cluster-autoscaler/core/scaledown/actuation/delete_in_batch_test.go index 9370f538cf43..2e41032fd2c1 100644 --- a/cluster-autoscaler/core/scaledown/actuation/delete_in_batch_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/delete_in_batch_test.go @@ -162,7 +162,8 @@ func TestRemove(t *testing.T) { }) ctx, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, fakeClient, nil, provider, nil, nil) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) if err != nil { t.Fatalf("Couldn't set up autoscaling context: %v", err) } diff --git a/cluster-autoscaler/core/scaledown/legacy/legacy_test.go b/cluster-autoscaler/core/scaledown/legacy/legacy_test.go index 173df9fdbacb..9197aa8e087a 100644 --- a/cluster-autoscaler/core/scaledown/legacy/legacy_test.go +++ b/cluster-autoscaler/core/scaledown/legacy/legacy_test.go @@ -146,7 +146,8 @@ func TestFindUnneededNodes(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd allNodes := []*apiv1.Node{n1, n2, n3, n4, n5, n7, n8, n9} @@ -277,7 +278,8 @@ func TestFindUnneededGPUNodes(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd allNodes := []*apiv1.Node{n1, n2, n3} @@ -392,7 +394,8 @@ func TestFindUnneededWithPerNodeGroupThresholds(t *testing.T) { context, err := NewScaleTestAutoscalingContext(globalOptions, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, allNodes, allPods) @@ -475,7 +478,8 @@ func TestPodsWithPreemptionsFindUnneededNodes(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd @@ -539,7 +543,8 @@ func TestFindUnneededMaxCandidates(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd @@ -623,7 +628,8 @@ func TestFindUnneededEmptyNodes(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd @@ -680,7 +686,8 @@ func TestFindUnneededNodePool(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) sd := wrapper.sd clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, pods) @@ -771,7 +778,8 @@ func TestScaleDown(t *testing.T) { assert.NoError(t, err) nodes := []*apiv1.Node{n1, n2} - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p1, p2}) autoscalererr = wrapper.UpdateClusterState(nodes, nodes, nil, time.Now().Add(-5*time.Minute)) @@ -1028,7 +1036,8 @@ func simpleScaleDownEmpty(t *testing.T, config *ScaleTestConfig) { context, err := NewScaleTestAutoscalingContext(config.Options, fakeClient, registry, provider, nil, nil) assert.NoError(t, err) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, config.NodeDeletionTracker) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{}) autoscalererr = wrapper.UpdateClusterState(nodes, nodes, nil, time.Now().Add(-5*time.Minute)) @@ -1123,7 +1132,8 @@ func TestNoScaleDownUnready(t *testing.T) { nodes := []*apiv1.Node{n1, n2} // N1 is unready so it requires a bigger unneeded time. - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p2}) autoscalererr = wrapper.UpdateClusterState(nodes, nodes, nil, time.Now().Add(-5*time.Minute)) @@ -1237,7 +1247,8 @@ func TestScaleDownNoMove(t *testing.T) { nodes := []*apiv1.Node{n1, n2} - clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterStateRegistry.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) wrapper := newWrapperForTesting(&context, clusterStateRegistry, nil) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p1, p2}) autoscalererr = wrapper.UpdateClusterState(nodes, nodes, nil, time.Now().Add(-5*time.Minute)) diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go index da7c4cdee95f..17bf9c81a797 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go @@ -970,7 +970,8 @@ func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestR Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) clusterState := clusterstate. - NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) processors := NewTestProcessors(&context) orchestrator := New() @@ -1071,7 +1072,8 @@ func TestScaleUpUnhealthy(t *testing.T) { nodes := []*apiv1.Node{n1, n2} nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) p3 := BuildTestPod("p-new", 550, 0) @@ -1116,7 +1118,8 @@ func TestBinpackingLimiter(t *testing.T) { Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) extraPod := BuildTestPod("p-new", 500, 0) @@ -1171,7 +1174,8 @@ func TestScaleUpNoHelp(t *testing.T) { nodes := []*apiv1.Node{n1} nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) p3 := BuildTestPod("p-new", 500, 0) @@ -1325,7 +1329,8 @@ func TestComputeSimilarNodeGroups(t *testing.T) { assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) assert.NoError(t, clusterState.UpdateNodes(nodes, nodeInfos, time.Now())) suOrchestrator := &ScaleUpOrchestrator{} @@ -1389,7 +1394,8 @@ func TestScaleUpBalanceGroups(t *testing.T) { assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) pods := make([]*apiv1.Pod, 0) @@ -1451,7 +1457,8 @@ func TestScaleUpAutoprovisionedNodeGroup(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, fakeClient, listers, provider, nil, nil) assert.NoError(t, err) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) processors := NewTestProcessors(&context) processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} @@ -1506,7 +1513,8 @@ func TestScaleUpBalanceAutoprovisionedNodeGroups(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, fakeClient, listers, provider, nil, nil) assert.NoError(t, err) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) processors := NewTestProcessors(&context) processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} @@ -1567,7 +1575,8 @@ func TestScaleUpToMeetNodeGroupMinSize(t *testing.T) { nodes := []*apiv1.Node{n1, n2} nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) processors := NewTestProcessors(&context) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) suOrchestrator := New() diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 772f2030e218..0868daa75008 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -35,6 +35,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/clusterstate" + "k8s.io/autoscaler/cluster-autoscaler/clusterstate/providers" "k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/context" @@ -142,6 +143,11 @@ func NewStaticAutoscaler( remainingPdbTracker pdb.RemainingPdbTracker, scaleUpOrchestrator scaleup.Orchestrator) *StaticAutoscaler { + clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ + MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage, + OkTotalUnreadyCount: opts.OkTotalUnreadyCount, + } + clusterStateRegistry := clusterstate.NewClusterStateRegistry(cloudProvider, clusterStateConfig, autoscalingKubeClients.LogRecorder, backoff) processorCallbacks := newStaticAutoscalerProcessorCallbacks() autoscalingContext := context.NewAutoscalingContext( opts, @@ -153,15 +159,11 @@ func NewStaticAutoscaler( estimatorBuilder, processorCallbacks, debuggingSnapshotter, - remainingPdbTracker) - - clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ - MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage, - OkTotalUnreadyCount: opts.OkTotalUnreadyCount, - } + remainingPdbTracker, + clusterStateRegistry) taintConfig := taints.NewTaintConfig(opts) - clusterStateRegistry := clusterstate.NewClusterStateRegistry(autoscalingContext.CloudProvider, clusterStateConfig, autoscalingContext.LogRecorder, backoff, clusterstate.NewDefaultMaxNodeProvisionTimeProvider(autoscalingContext, processors.NodeGroupConfigProcessor)) + clusterStateRegistry.RegisterProviders(providers.NewDefaultMaxNodeProvisionTimeProvider(autoscalingContext, processors.NodeGroupConfigProcessor)) processors.ScaleDownCandidatesNotifier.Register(clusterStateRegistry) deleteOptions := simulator.NewNodeDeleteOptions(opts) diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index 1f30c19cb7d2..a02dc3dcc22c 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -235,7 +235,8 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { OkTotalUnreadyCount: 1, } processors := NewTestProcessors(&context) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(t, &context, processors, clusterState) suOrchestrator := orchestrator.New() suOrchestrator.Initialize(&context, processors, clusterState, taints.TaintConfig{}) @@ -452,7 +453,8 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 0, } - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(t, &context, processors, clusterState) suOrchestrator := orchestrator.New() @@ -596,7 +598,8 @@ func TestStaticAutoscalerRunOnceWithALongUnregisteredNode(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 1, } - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) // broken node detected as unregistered nodes := []*apiv1.Node{n1} @@ -758,7 +761,8 @@ func TestStaticAutoscalerRunOncePodsWithPriorities(t *testing.T) { } processors := NewTestProcessors(&context) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(t, &context, processors, clusterState) suOrchestrator := orchestrator.New() suOrchestrator.Initialize(&context, processors, clusterState, taints.TaintConfig{}) @@ -893,7 +897,8 @@ func TestStaticAutoscalerRunOnceWithFilteringOnBinPackingEstimator(t *testing.T) } processors := NewTestProcessors(&context) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(t, &context, processors, clusterState) autoscaler := &StaticAutoscaler{ @@ -993,7 +998,8 @@ func TestStaticAutoscalerRunOnceWithFilteringOnUpcomingNodesEnabledNoScaleUp(t * } processors := NewTestProcessors(&context) - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime)) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(t, &context, processors, clusterState) autoscaler := &StaticAutoscaler{ @@ -1050,9 +1056,9 @@ func TestStaticAutoscalerInstanceCreationErrors(t *testing.T) { OkTotalUnreadyCount: 1, } - staticMaxNodeProvisionTimeProvider := clusterstate.NewStaticMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime) - - clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), staticMaxNodeProvisionTimeProvider) + staticMaxNodeProvisionTimeProvider := clusterstate.NewMockMaxNodeProvisionTimeProvider(options.NodeGroupDefaults.MaxNodeProvisionTime) + clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(staticMaxNodeProvisionTimeProvider) autoscaler := &StaticAutoscaler{ AutoscalingContext: &context, clusterStateRegistry: clusterState, @@ -1290,7 +1296,8 @@ func TestStaticAutoscalerInstanceCreationErrors(t *testing.T) { return false }, nil) - clusterState = clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), staticMaxNodeProvisionTimeProvider) + clusterState = clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff()) + clusterState.RegisterProviders(staticMaxNodeProvisionTimeProvider) clusterState.RefreshCloudProviderNodeInstancesCache() autoscaler.clusterStateRegistry = clusterState @@ -1422,7 +1429,8 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) { // Create CSR with unhealthy cluster protection effectively disabled, to guarantee we reach the tested logic. csrConfig := clusterstate.ClusterStateRegistryConfig{OkTotalUnreadyCount: nodeGroupCount * unreadyNodesCount} - csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute)) + csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff()) + csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) // Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test. actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), simulator.NodeDeleteOptions{}) @@ -1519,8 +1527,8 @@ func TestRemoveFixNodeTargetSize(t *testing.T) { clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, NewBackoff(), - clusterstate.NewStaticMaxNodeProvisionTimeProvider(context.AutoscalingOptions.NodeGroupDefaults.MaxNodeProvisionTime)) + }, fakeLogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(context.AutoscalingOptions.NodeGroupDefaults.MaxNodeProvisionTime)) err := clusterState.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now.Add(-time.Hour)) assert.NoError(t, err) @@ -1568,8 +1576,8 @@ func TestRemoveOldUnregisteredNodes(t *testing.T) { clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, NewBackoff(), - clusterstate.NewStaticMaxNodeProvisionTimeProvider(context.AutoscalingOptions.NodeGroupDefaults.MaxNodeProvisionTime)) + }, fakeLogRecorder, NewBackoff()) + clusterState.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(context.AutoscalingOptions.NodeGroupDefaults.MaxNodeProvisionTime)) err := clusterState.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now.Add(-time.Hour)) assert.NoError(t, err)