diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index e6d89570d01..4cfe951f201 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -31,11 +31,9 @@ const ( // MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods) // can differ between groups in the same NodeGroupSet MaxFreeDifferenceRatio = 0.05 -) - -var ( - // MaxMemoryDifference describes how much memory capacity can differ but still be considered equal. - MaxMemoryDifference = resource.MustParse("256Mi") + // MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between + // groups in the same NodeGroupSet + MaxCapacityMemoryDifferenceRatio = 0.015 ) // BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity @@ -58,18 +56,25 @@ type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity, maxDifferenceRatio float64) bool { for _, qtyList := range resources { - if len(qtyList) != 2 { - return false - } - larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - if larger-smaller > larger*maxDifferenceRatio { + if !compareResourceListWithTolerance(qtyList, maxDifferenceRatio) { return false } } return true } +func compareResourceListWithTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool { + if len(qtyList) != 2 { + return false + } + larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + if larger-smaller > larger*maxDifferenceRatio { + return false + } + return true +} + func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { labels := make(map[string][]string) for _, node := range nodes { @@ -125,11 +130,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL } switch kind { case apiv1.ResourceMemory: - // For memory capacity we allow a small tolerance - difference := absSub(qtyList[0], qtyList[1]) - - // Cmp compares two quantities and returns -1 for less than, 0 for equal, 1 for greater than - if difference.Cmp(MaxMemoryDifference) > 0 { + if !compareResourceListWithTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) { return false } default: @@ -156,16 +157,3 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL return true } - -// absSub returns the absolute value of a - b -func absSub(a, b resource.Quantity) *resource.Quantity { - q := &resource.Quantity{} - a.DeepCopyInto(q) - q.Sub(b) - - if q.Sign() == -1 { - q.Neg() - } - - return q -} diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index e032829677b..069ce476b44 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -97,20 +97,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { } func TestNodesSimilarVariousMemoryRequirements(t *testing.T) { - n1 := BuildTestNode("node1", 1000, MaxMemoryDifference.Value()) + n1 := BuildTestNode("node1", 1000, 1000) // Different memory capacity within tolerance - n2 := BuildTestNode("node2", 1000, MaxMemoryDifference.Value()) - n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifference.Value(), resource.DecimalSI) + n2 := BuildTestNode("node2", 1000, 1000) + n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI) checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different memory capacity exceeds tolerance - n3 := BuildTestNode("node3", 1000, MaxMemoryDifference.Value()) - n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifference.Value()+1, resource.DecimalSI) + n3 := BuildTestNode("node3", 1000, 1000) + n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI) checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) } -func TestNodesSimilarVariousLargeMemoryRequirements(t *testing.T) { +func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) { // Use realistic memory capacity (taken from real nodes) // 15944120 KB ~= 16GiB (m5.xLarge) q1 := resource.MustParse("16116152Ki") @@ -124,10 +124,28 @@ func TestNodesSimilarVariousLargeMemoryRequirements(t *testing.T) { checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different memory capacity exceeds tolerance - // Same value as q1 but with MaxMemoryDifference added twice - q3 := resource.MustParse("16116152Ki") - q3.Add(MaxMemoryDifference) - q3.Add(MaxMemoryDifference) + // Value of q1 * 1.02 + q3 := resource.MustParse("16438475Ki") + n3 := BuildTestNode("node3", 1000, q3.Value()) + checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) +} + +func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) { + // Use realistic memory capacity (taken from real nodes) + // 257217528 KB ~= 256GiB (m5.16xLarge) + q1 := resource.MustParse("259970052Ki") + q2 := resource.MustParse("257217528Ki") + + n1 := BuildTestNode("node1", 1000, q1.Value()) + + // Different memory capacity within tolerance + // Value taken from another m5.xLarge in a different zone + n2 := BuildTestNode("node2", 1000, q2.Value()) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) + + // Different memory capacity exceeds tolerance + // Value of q1 * 1.02 + q3 := resource.MustParse("265169453Ki") n3 := BuildTestNode("node3", 1000, q3.Value()) checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) }