From e8b3c2a111eb9b3b4fe15b4d4081175267ff6d76 Mon Sep 17 00:00:00 2001 From: Andrew McDermott Date: Fri, 6 Sep 2019 14:41:18 +0100 Subject: [PATCH] compare_nodegroups: Tolerate small differences in memory capacity The current comparator expects memory capacity values to be identical. However across AWS, Azure and GCP I quite often see very small differences in capacity, typically 8-16Ki. When this occurs the nodegroups are considered not equal when balancing is in effect which is unfortunate because, in reality, they are identical. This change will now tolerate a 128Ki difference before memory capacity values are considered unequal. --- .../nodegroupset/compare_nodegroups.go | 27 +++++++++++++++---- .../nodegroupset/compare_nodegroups_test.go | 14 ++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index 3b4ac009930..50c0be5c053 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -31,6 +31,9 @@ const ( // MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods) // can differ between groups in the same NodeGroupSet MaxFreeDifferenceRatio = 0.05 + // MaxMemoryDifferenceInKiloBytes describes how much memory + // capacity can differ but still be considered equal. + MaxMemoryDifferenceInKiloBytes = 128000 ) // BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity @@ -110,14 +113,28 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL free[res] = append(free[res], freeRes) } } - // For capacity we require exact match. - // If this is ever changed, enforcing MaxCoresTotal and MaxMemoryTotal limits - // as it is now may no longer work. - for _, qtyList := range capacity { - if len(qtyList) != 2 || qtyList[0].Cmp(qtyList[1]) != 0 { + + for kind, qtyList := range capacity { + if len(qtyList) != 2 { return false } + switch kind { + case apiv1.ResourceMemory: + // For memory capacity we allow a small tolerance + memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value())) + if memoryDifference > MaxMemoryDifferenceInKiloBytes { + return false + } + default: + // For other capacity types we require exact match. + // If this is ever changed, enforcing MaxCoresTotal limits + // as it is now may no longer work. + if qtyList[0].Cmp(qtyList[1]) != 0 { + return false + } + } } + // For allocatable and free we allow resource quantities to be within a few % of each other if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) { return false diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index c66e66c8d71..b36ed92b98d 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -96,6 +96,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, IsNodeInfoSimilar, true) } +func TestNodesSimilarVariousMemoryRequirements(t *testing.T) { + n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes) + + // Different memory capacity within tolerance + n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes) + n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) + + // Different memory capacity exceeds tolerance + n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes) + n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI) + checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false) +} + func TestNodesSimilarVariousLabels(t *testing.T) { n1 := BuildTestNode("node1", 1000, 2000) n1.ObjectMeta.Labels["test-label"] = "test-value"