Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to balance nodes using specified labels #3839

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ type AutoscalingOptions struct {
MaxBulkSoftTaintTime time.Duration
// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
IgnoredTaints []string
// BalancingLabels is a list of labels to use when comparing if two node groups are similar.
BalancingLabels []string
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
BalancingExtraIgnoredLabels []string
Expand Down
22 changes: 20 additions & 2 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ var (
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.")

ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use when comparing if two node groups are similar")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
Expand All @@ -195,6 +196,11 @@ func createAutoscalingOptions() config.AutoscalingOptions {
if err != nil {
klog.Fatalf("Failed to parse flags: %v", err)
}

err = validateBalancingLabelFlags(*balancingLabelsFlag, *balancingIgnoreLabelsFlag)
if err != nil {
klog.Fatalf("Failed to parse flags: %v", err)
}
return config.AutoscalingOptions{
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
Expand Down Expand Up @@ -240,6 +246,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingLabels: *balancingLabelsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
Expand Down Expand Up @@ -309,7 +316,11 @@ func buildAutoscaler() (core.Autoscaler, error) {
opts.Processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor()

nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
nodeInfoComparatorBuilderArgs := autoscalingOptions.BalancingExtraIgnoredLabels
if len(autoscalingOptions.BalancingLabels) > 0 {
nodeInfoComparatorBuilder = nodegroupset.CreateGenericNodeInfoLabelComparator
nodeInfoComparatorBuilderArgs = autoscalingOptions.BalancingLabels
} else if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
} else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName {
nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator
Expand All @@ -318,7 +329,7 @@ func buildAutoscaler() (core.Autoscaler, error) {
}

opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
Comparator: nodeInfoComparatorBuilder(nodeInfoComparatorBuilderArgs),
}

// These metrics should be published only once.
Expand Down Expand Up @@ -542,3 +553,10 @@ func parseSingleGpuLimit(limits string) (config.GpuLimits, error) {
}
return parsedGpuLimits, nil
}

func validateBalancingLabelFlags(balancingLabelsFlag, balancingIgnoreLabelsFlag []string) error {
if len(balancingLabelsFlag) > 0 && len(balancingIgnoreLabelsFlag) > 0 {
return fmt.Errorf("cannot set --balancing-labels and --balancing-ignored-labels at the same time")
}
return nil
}
78 changes: 53 additions & 25 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,13 @@ func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio
return larger-smaller <= larger*maxDifferenceRatio
}

func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
func compareLabels(nodes []*schedulerframework.NodeInfo, explicitLabels, ignoredLabels map[string]bool) bool {
labels := make(map[string][]string)
for _, node := range nodes {
for label, value := range node.Node().ObjectMeta.Labels {
ignore, _ := ignoredLabels[label]
if !ignore {
include, _ := explicitLabels[label]
if include || (len(explicitLabels) == 0 && !ignore) {
labels[label] = append(labels[label], value)
}
}
Expand All @@ -90,31 +91,10 @@ func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[strin
return true
}

// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
genericIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
genericIgnoredLabels[k] = v
}
for _, k := range extraIgnoredLabels {
genericIgnoredLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels)
}
}

// IsCloudProviderNodeInfoSimilar returns true if two NodeInfos are similar enough to consider
// that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are
// somewhat arbitrary, but generally we check if resources provided by both nodes
// are similar enough to likely be the same type of machine and if the set of labels
// is the same (except for a set of labels passed in to be ignored like hostname or zone).
func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
func isCloudProviderNodeTypeSimilar(nodes []*schedulerframework.NodeInfo) bool {
capacity := make(map[apiv1.ResourceName][]resource.Quantity)
allocatable := make(map[apiv1.ResourceName][]resource.Quantity)
free := make(map[apiv1.ResourceName][]resource.Quantity)
nodes := []*schedulerframework.NodeInfo{n1, n2}
for _, node := range nodes {
for res, quantity := range node.Node().Status.Capacity {
capacity[res] = append(capacity[res], quantity)
Expand Down Expand Up @@ -156,7 +136,55 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
return false
}

if !compareLabels(nodes, ignoredLabels) {
return true
}

// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
genericIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
genericIgnoredLabels[k] = v
}
for _, k := range extraIgnoredLabels {
genericIgnoredLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels)
}
}

// CreateGenericNodeInfoLabelComparator returns a generic comparator that checks for node with matching labels alongside node group similarity
func CreateGenericNodeInfoLabelComparator(labels []string) NodeInfoComparator {
return func(n1, n2 *schedulerframework.NodeInfo) bool {
includedLabels := make(map[string]bool)
for _, l := range labels {
includedLabels[l] = true
}

if !compareLabels([]*schedulerframework.NodeInfo{n1, n2}, includedLabels, make(map[string]bool)) {
return false
}

if !isCloudProviderNodeTypeSimilar([]*schedulerframework.NodeInfo{n1, n2}) {
return false
}
return true
}
}

// IsCloudProviderNodeInfoSimilar returns true if two NodeInfos are similar enough to consider
// that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are
// somewhat arbitrary, but generally we check if resources provided by both nodes
// are similar enough to likely be the same type of machine and if the set of labels
// is the same (except for a set of labels passed in to be ignored like hostname or zone).
func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
nodes := []*schedulerframework.NodeInfo{n1, n2}

if !isCloudProviderNodeTypeSimilar(nodes) {
return false
}
if !compareLabels(nodes, make(map[string]bool), ignoredLabels) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,32 @@ func TestNodesSimilarVariousLabels(t *testing.T) {
n2.ObjectMeta.Labels["example.com/ready"] = "false"
checkNodesSimilar(t, n1, n2, comparator, true)
}

func TestNodesWithVariousExplicitLabels(t *testing.T) {
comparator := CreateGenericNodeInfoLabelComparator([]string{"test-label", "foo"})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "winnie the pooh"

n2 := BuildTestNode("node2", 1000, 2000)
n2.ObjectMeta.Labels["test-label"] = "test-value"

n3 := BuildTestNode("node3", 1000, 1000)
n3.ObjectMeta.Labels["test-label"] = "test-value"

// Matching explicit balancing labels
checkNodesSimilar(t, n1, n2, comparator, true)

// Matching explicit balancing labels but dissimilar nodes
checkNodesSimilar(t, n1, n3, comparator, false)

n2.ObjectMeta.Labels["test-label"] = "something-else"

// Mismatch in one of explicit balancing labels in the nodes
checkNodesSimilar(t, n1, n2, comparator, false)

// Balancing label present on one node, but missing on another
comparator = CreateGenericNodeInfoLabelComparator([]string{"test-label", "character", "foo"})

checkNodesSimilar(t, n1, n2, comparator, false)
}