diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index 864d3d5484e9..27f7ebc75817 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -31,9 +31,9 @@ const ( // MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods) // can differ between groups in the same NodeGroupSet MaxFreeDifferenceRatio = 0.05 - // MaxMemoryDifferenceInKiloBytes describes how much memory - // capacity can differ but still be considered equal. - MaxMemoryDifferenceInKiloBytes = 256000 + // MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between + // groups in the same NodeGroupSet + MaxCapacityMemoryDifferenceRatio = 0.015 ) // BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity @@ -50,21 +50,25 @@ var BasicIgnoredLabels = map[string]bool{ // similar enough to be considered a part of a single NodeGroupSet. type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool -func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity, +func resourceMapsWithinTolerance(resources map[apiv1.ResourceName][]resource.Quantity, maxDifferenceRatio float64) bool { for _, qtyList := range resources { - if len(qtyList) != 2 { - return false - } - larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) - if larger-smaller > larger*maxDifferenceRatio { + if !resourceListWithinTolerance(qtyList, maxDifferenceRatio) { return false } } return true } +func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool { + if len(qtyList) != 2 { + return false + } + larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue())) + return larger-smaller <= larger*maxDifferenceRatio +} + func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { labels := make(map[string][]string) for _, node := range nodes { @@ -120,9 +124,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL } switch kind { case apiv1.ResourceMemory: - // For memory capacity we allow a small tolerance - memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value())) - if memoryDifference > MaxMemoryDifferenceInKiloBytes { + if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) { return false } default: @@ -136,10 +138,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL } // For allocatable and free we allow resource quantities to be within a few % of each other - if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) { + if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) { return false } - if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) { + if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) { return false } diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index b36ed92b98da..2703a0da940b 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -97,16 +97,58 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { } func TestNodesSimilarVariousMemoryRequirements(t *testing.T) { - n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes) + n1 := BuildTestNode("node1", 1000, 1000) // Different memory capacity within tolerance - n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes) - n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI) + n2 := BuildTestNode("node2", 1000, 1000) + n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI) checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different memory capacity exceeds tolerance - n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes) - n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI) + n3 := BuildTestNode("node3", 1000, 1000) + n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI) + checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) +} + +func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) { + + // Use realistic memory capacity (taken from real nodes) + // 15944120 KB ~= 16GiB (m5.xLarge) + q1 := resource.MustParse("16116152Ki") + q2 := resource.MustParse("15944120Ki") + + n1 := BuildTestNode("node1", 1000, q1.Value()) + + // Different memory capacity within tolerance + // Value taken from another m5.xLarge in a different zone + n2 := BuildTestNode("node2", 1000, q2.Value()) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) + + // Different memory capacity exceeds tolerance + // Value of q1 * 1.02 + q3 := resource.MustParse("16438475Ki") + n3 := BuildTestNode("node3", 1000, q3.Value()) + checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) +} + +func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) { + + // Use realistic memory capacity (taken from real nodes) + // 257217528 KB ~= 256GiB (m5.16xLarge) + q1 := resource.MustParse("259970052Ki") + q2 := resource.MustParse("257217528Ki") + + n1 := BuildTestNode("node1", 1000, q1.Value()) + + // Different memory capacity within tolerance + // Value taken from another m5.xLarge in a different zone + n2 := BuildTestNode("node2", 1000, q2.Value()) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) + + // Different memory capacity exceeds tolerance + // Value of q1 * 1.02 + q3 := resource.MustParse("265169453Ki") + n3 := BuildTestNode("node3", 1000, q3.Value()) checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) }