Skip to content

Commit

Permalink
UPSTREAM: <carry>: openshift: add custom nodeset comparator
Browse files Browse the repository at this point in the history
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1731011

In OpenShift clusters we see very small memory differences between
instances, typically 8Ki. The default comparator which is used when
--balance-similar-node-groups is enabled only considers nodes in a
nodegroup to be equal when the memory capacity is identical. This new
comparator is identical to the default but will tolerate a 128Ki
difference in memory capacity.

With this change, and with --balance-similar-node-groups enabled, I
now see the following:

```console
$ oc get machinesets
NAME                               DESIRED   CURRENT   READY   AVAILABLE   AGE
amcdermo-w6m6z-worker-us-east-2a   3         3         3       3           24h
amcdermo-w6m6z-worker-us-east-2b   4         4         4       4           24h
amcdermo-w6m6z-worker-us-east-2c   4         4         4       4           24h
e2e-59443-w-0                      3         3         1       1           20m
e2e-59443-w-1                      3         3         1       1           20m
e2e-59443-w-2                      3         3         1       1           20m
e2e-59443-w-3                      3         3         1       1           20m
e2e-59443-w-4                      3         3         1       1           20m
e2e-59443-w-5                      3         3         1       1           20m
e2e-59443-w-6                      3         3         1       1           20m
e2e-59443-w-7                      3         3         1       1           20m
e2e-59443-w-8                      3         3         1       1           20m
```
  • Loading branch information
frobware committed Aug 23, 2019
1 parent 49fac1f commit b8ec486
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
9 changes: 9 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/openshiftmachineapi"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/core"
"k8s.io/autoscaler/cluster-autoscaler/estimator"
Expand Down Expand Up @@ -281,6 +282,14 @@ func buildAutoscaler() (core.Autoscaler, error) {
processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodegroupset.IsGkeNodeInfoSimilar}

}
if autoscalingOptions.CloudProviderName == openshiftmachineapi.ProviderName {
// We do this because of:
// - https://bugzilla.redhat.com/show_bug.cgi?id=1731011
// - https://bugzilla.redhat.com/show_bug.cgi?id=1733235
processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodegroupset.IsOpenShiftMachineApiNodeInfoSimilar}

}
opts := core.AutoscalerOptions{
AutoscalingOptions: autoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package nodegroupset

import (
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)

// maxMemoryDifferenceInKiloBytes describes how much
// Node.Status.Capacity can differ but still be considered equal.
var maxMemoryDifferenceInKiloBytes = resource.MustParse("128Ki")

// IsOpenShiftMachineApiNodeInfoSimilar compares if two nodes should
// be considered part of the same NodeGroupSet.
func IsOpenShiftMachineApiNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool {
return isOpenShiftNodeInfoSimilar(n1, n2)
}

// Note: this is a copy of isNodeInfoSimilar() and the only change is
// to tolerate a small memory capacity difference.
func isOpenShiftNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool {
capacity := make(map[apiv1.ResourceName][]resource.Quantity)
allocatable := make(map[apiv1.ResourceName][]resource.Quantity)
free := make(map[apiv1.ResourceName][]resource.Quantity)
nodes := []*schedulernodeinfo.NodeInfo{n1, n2}
for _, node := range nodes {
for res, quantity := range node.Node().Status.Capacity {
capacity[res] = append(capacity[res], quantity)
}
for res, quantity := range node.Node().Status.Allocatable {
allocatable[res] = append(allocatable[res], quantity)
}
requested := node.RequestedResource()
for res, quantity := range (&requested).ResourceList() {
freeRes := node.Node().Status.Allocatable[res].DeepCopy()
freeRes.Sub(quantity)
free[res] = append(free[res], freeRes)
}
}

// For capacity we allow quantities to be within a few Kb
// because we find that some cloud instances are slightly
// smaller/larger than other, typically 8-16Ki.
// See:
// https://bugzilla.redhat.com/show_bug.cgi?id=1731011
// https://bugzilla.redhat.com/show_bug.cgi?id=1733235
for _, qtyList := range capacity {
if len(qtyList) != 2 || !compareResourceEqualWithTolerance(qtyList[0], qtyList[1], maxMemoryDifferenceInKiloBytes) {
return false
}
}
// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
}
if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) {
return false
}

ignoredLabels := map[string]bool{
apiv1.LabelHostname: true,
apiv1.LabelZoneFailureDomain: true,
apiv1.LabelZoneRegion: true,
"beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9.
}

labels := make(map[string][]string)
for _, node := range nodes {
for label, value := range node.Node().ObjectMeta.Labels {
ignore, _ := ignoredLabels[label]
if !ignore {
labels[label] = append(labels[label], value)
}
}
}
for _, labelValues := range labels {
if len(labelValues) != 2 || labelValues[0] != labelValues[1] {
return false
}
}
return true
}

func compareResourceEqualWithTolerance(x, y, tolerance resource.Quantity) bool {
x.Sub(y)
if x.Sign() == -1 {
x.Neg()
}
return x.Cmp(tolerance) != 1 // <= max
}

0 comments on commit b8ec486

Please sign in to comment.