Skip to content

Commit

Permalink
CSR: fix a bug in GetClusterSize
Browse files Browse the repository at this point in the history
Currently, GetClusterSize reports the target number for all autoscaled
node groups, but the actual number for _all_ node groups, even those
that are not autoscaled. This commit fixes that behavior so that both
target and actual size reported are from autoscaled node groups only.
  • Loading branch information
towca committed Nov 20, 2019
1 parent 30256a6 commit f64b6cd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
10 changes: 6 additions & 4 deletions cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -979,16 +979,18 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma
return notRegistered
}

// GetClusterSize calculates and returns cluster's current size and target size. The current size is the
// actual number of nodes provisioned in Kubernetes, the target size is the number of nodes the CA wants.
func (csr *ClusterStateRegistry) GetClusterSize() (currentSize, targetSize int) {
// GetAutoscaledNodesCount calculates and returns the actual and the target number of nodes
// belonging to autoscaled node groups in the cluster.
func (csr *ClusterStateRegistry) GetAutoscaledNodesCount() (currentSize, targetSize int) {
csr.Lock()
defer csr.Unlock()

for _, accRange := range csr.acceptableRanges {
targetSize += accRange.CurrentTarget
}
currentSize = csr.totalReadiness.Registered - csr.totalReadiness.NotStarted - csr.totalReadiness.LongNotStarted
for _, readiness := range csr.perNodeGroupReadiness {
currentSize += readiness.Registered - readiness.NotStarted - readiness.LongNotStarted
}
return currentSize, targetSize
}

Expand Down
22 changes: 14 additions & 8 deletions cluster-autoscaler/clusterstate/clusterstate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,8 @@ func TestGetClusterSize(t *testing.T) {
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
notAutoscaledNode := BuildTestNode("notAutoscaledNode", 1000, 1000)
SetNodeReadyState(notAutoscaledNode, true, now.Add(-time.Minute))

provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 5)
Expand All @@ -682,6 +684,10 @@ func TestGetClusterSize(t *testing.T) {
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)

// Add a node not belonging to any autoscaled node group. This is to make sure that GetAutoscaledNodesCount doesn't
// take nodes from non-autoscaled node groups into account.
provider.AddNode("notAutoscaledNode", notAutoscaledNode)

fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false)
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
Expand All @@ -690,30 +696,30 @@ func TestGetClusterSize(t *testing.T) {
}, fakeLogRecorder, newBackoff())

// There are 2 actual nodes in 2 node groups with target sizes of 5 and 1.
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
currentSize, targetSize := clusterstate.GetClusterSize()
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, notAutoscaledNode}, nil, now)
currentSize, targetSize := clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 2, currentSize)
assert.Equal(t, 6, targetSize)

// Current size should increase after a new node is added.
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, ng2_1}, nil, now.Add(time.Minute))
currentSize, targetSize = clusterstate.GetClusterSize()
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 6, targetSize)

// Target size should increase after a new node group is added.
provider.AddNodeGroup("ng3", 1, 10, 1)
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, ng2_1}, nil, now.Add(2*time.Minute))
currentSize, targetSize = clusterstate.GetClusterSize()
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(2*time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 7, targetSize)

// Target size should change after a node group changes its target size.
for _, ng := range provider.NodeGroups() {
ng.(*testprovider.TestNodeGroup).SetTargetSize(10)
}
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, ng2_1}, nil, now.Add(3*time.Minute))
currentSize, targetSize = clusterstate.GetClusterSize()
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(3*time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 30, targetSize)
}
Expand Down

0 comments on commit f64b6cd

Please sign in to comment.