Skip to content

Commit

Permalink
compare_nodegroups: Tolerate small differences in memory capacity
Browse files Browse the repository at this point in the history
The current comparator expects memory capacity values to be identical.
However across AWS, Azure and GCP I quite often see very small
differences in capacity, typically 8-16Ki. When this occurs the
nodegroups are considered not equal when balancing is in effect which
is unfortunate because, in reality, they are identical.

This change will now tolerate a 128Ki difference before memory
capacity values are considered unequal.
  • Loading branch information
frobware committed Sep 6, 2019
1 parent e87aea4 commit e8b3c2a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
27 changes: 22 additions & 5 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ const (
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxMemoryDifferenceInKiloBytes describes how much memory
// capacity can differ but still be considered equal.
MaxMemoryDifferenceInKiloBytes = 128000
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
Expand Down Expand Up @@ -110,14 +113,28 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
free[res] = append(free[res], freeRes)
}
}
// For capacity we require exact match.
// If this is ever changed, enforcing MaxCoresTotal and MaxMemoryTotal limits
// as it is now may no longer work.
for _, qtyList := range capacity {
if len(qtyList) != 2 || qtyList[0].Cmp(qtyList[1]) != 0 {

for kind, qtyList := range capacity {
if len(qtyList) != 2 {
return false
}
switch kind {
case apiv1.ResourceMemory:
// For memory capacity we allow a small tolerance
memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value()))
if memoryDifference > MaxMemoryDifferenceInKiloBytes {
return false
}
default:
// For other capacity types we require exact match.
// If this is ever changed, enforcing MaxCoresTotal limits
// as it is now may no longer work.
if qtyList[0].Cmp(qtyList[1]) != 0 {
return false
}
}
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, IsNodeInfoSimilar, true)
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)

// Different memory capacity within tolerance
n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI)
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI)
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLabels(t *testing.T) {
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
Expand Down

0 comments on commit e8b3c2a

Please sign in to comment.