Skip to content

Commit

Permalink
Cherry-pick kubernetes#3124: Allow small tolerance on memory capacity…
Browse files Browse the repository at this point in the history
… when comparing nodegroups
  • Loading branch information
k8s-ci-robot authored and wwentland committed Jul 26, 2020
1 parent 0d7d0f4 commit e9f5dba
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 21 deletions.
33 changes: 17 additions & 16 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ const (
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxMemoryDifferenceInKiloBytes describes how much memory
// capacity can differ but still be considered equal.
MaxMemoryDifferenceInKiloBytes = 256000
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between
// groups in the same NodeGroupSet
MaxCapacityMemoryDifferenceRatio = 0.015
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
Expand All @@ -44,28 +44,31 @@ var BasicIgnoredLabels = map[string]bool{
apiv1.LabelZoneFailureDomain: true,
apiv1.LabelZoneRegion: true,
"beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9.
"kops.k8s.io/instancegroup": true, // this is a label used by kops to identify "instance group" names. it's value is variable, defeating check of similar node groups
}

// NodeInfoComparator is a function that tells if two nodes are from NodeGroups
// similar enough to be considered a part of a single NodeGroupSet.
type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool

func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
func resourceMapsWithinTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
maxDifferenceRatio float64) bool {
for _, qtyList := range resources {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
if larger-smaller > larger*maxDifferenceRatio {
if !resourceListWithinTolerance(qtyList, maxDifferenceRatio) {
return false
}
}
return true
}

func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
return larger-smaller <= larger*maxDifferenceRatio
}

func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool {
labels := make(map[string][]string)
for _, node := range nodes {
Expand Down Expand Up @@ -121,9 +124,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}
switch kind {
case apiv1.ResourceMemory:
// For memory capacity we allow a small tolerance
memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value()))
if memoryDifference > MaxMemoryDifferenceInKiloBytes {
if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) {
return false
}
default:
Expand All @@ -137,10 +138,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
}
if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) {
if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,58 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)
n1 := BuildTestNode("node1", 1000, 1000)

// Different memory capacity within tolerance
n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI)
n2 := BuildTestNode("node2", 1000, 1000)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI)
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI)
n3 := BuildTestNode("node3", 1000, 1000)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI)
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {

// Use realistic memory capacity (taken from real nodes)
// 15944120 KB ~= 16GiB (m5.xLarge)
q1 := resource.MustParse("16116152Ki")
q2 := resource.MustParse("15944120Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("16438475Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {

// Use realistic memory capacity (taken from real nodes)
// 257217528 KB ~= 256GiB (m5.16xLarge)
q1 := resource.MustParse("259970052Ki")
q2 := resource.MustParse("257217528Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("265169453Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

Expand Down

0 comments on commit e9f5dba

Please sign in to comment.