Skip to content

Commit

Permalink
Allow 1.5% tolerance in memory capacity when comparing nodegroups
Browse files Browse the repository at this point in the history
In testing, AWS M5 instances can on occasion display approximately a 1% difference
in memory capacity between availability zones, deployed with the same launch
configuration and same AMI.
Allow a 1.5% tolerance to give some buffer on the actual amount of memory discrepancy
since in testing, some examples were just over 1% (eg 1.05%, 1.1%).
Tests are included with capacity values taken from real instances to prevent future
regression.
  • Loading branch information
JoelSpeed committed Jun 10, 2020
1 parent 952085e commit be1d9cb
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 20 deletions.
32 changes: 17 additions & 15 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ const (
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxMemoryDifferenceInKiloBytes describes how much memory
// capacity can differ but still be considered equal.
MaxMemoryDifferenceInKiloBytes = 256000
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between
// groups in the same NodeGroupSet
MaxCapacityMemoryDifferenceRatio = 0.015
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
Expand All @@ -53,21 +53,25 @@ var BasicIgnoredLabels = map[string]bool{
// similar enough to be considered a part of a single NodeGroupSet.
type NodeInfoComparator func(n1, n2 *schedulerframework.NodeInfo) bool

func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
func resourceMapsWithinTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
maxDifferenceRatio float64) bool {
for _, qtyList := range resources {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
if larger-smaller > larger*maxDifferenceRatio {
if !resourceListWithinTolerance(qtyList, maxDifferenceRatio) {
return false
}
}
return true
}

func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
return larger-smaller <= larger*maxDifferenceRatio
}

func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
labels := make(map[string][]string)
for _, node := range nodes {
Expand Down Expand Up @@ -131,9 +135,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
}
switch kind {
case apiv1.ResourceMemory:
// For memory capacity we allow a small tolerance
memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value()))
if memoryDifference > MaxMemoryDifferenceInKiloBytes {
if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) {
return false
}
default:
Expand All @@ -147,10 +149,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
}
if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) {
if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,60 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)
n1 := BuildTestNode("node1", 1000, 1000)

// Different memory capacity within tolerance
n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI)
n2 := BuildTestNode("node2", 1000, 1000)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI)
checkNodesSimilar(t, n1, n2, comparator, true)

// Different memory capacity exceeds tolerance
n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI)
n3 := BuildTestNode("node3", 1000, 1000)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI)
checkNodesSimilar(t, n1, n3, comparator, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})

// Use realistic memory capacity (taken from real nodes)
// 15944120 KB ~= 16GiB (m5.xLarge)
q1 := resource.MustParse("16116152Ki")
q2 := resource.MustParse("15944120Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, comparator, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("16438475Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, comparator, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})

// Use realistic memory capacity (taken from real nodes)
// 257217528 KB ~= 256GiB (m5.16xLarge)
q1 := resource.MustParse("259970052Ki")
q2 := resource.MustParse("257217528Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, comparator, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("265169453Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, comparator, false)
}

Expand Down

0 comments on commit be1d9cb

Please sign in to comment.