diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index 4dcd95245f5..df0327dcd14 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -148,6 +148,9 @@ type AutoscalingOptions struct { // BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar. // Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored. BalancingExtraIgnoredLabels []string + // BalancingLabels is a list of labels to use when comparing if two node groups are similar. + // If this is set, only labels are used to compare node groups. It is mutually exclusive with BalancingExtraIgnoredLabels. + BalancingLabels []string // AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs. AWSUseStaticInstanceList bool // ConcurrentGceRefreshes is the maximum number of concurrently refreshed instance groups or instance templates. diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 37459a175bf..a06e2baa8dd 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -172,6 +172,7 @@ var ( ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") + balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.") enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled") @@ -248,6 +249,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { NewPodScaleUpDelay: *newPodScaleUpDelay, IgnoredTaints: *ignoreTaintsFlag, BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag, + BalancingLabels: *balancingLabelsFlag, KubeConfigPath: *kubeConfigFile, NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout, AWSUseStaticInstanceList: *awsUseStaticInstanceList, @@ -318,17 +320,19 @@ func buildAutoscaler() (core.Autoscaler, error) { opts.Processors = ca_processors.DefaultProcessors() opts.Processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor() - nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator - if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName { - nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator + var nodeInfoComparator nodegroupset.NodeInfoComparator + if len(autoscalingOptions.BalancingLabels) > 0 { + nodeInfoComparator = nodegroupset.CreateLabelNodeInfoComparator(autoscalingOptions.BalancingLabels) + } else if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName { + nodeInfoComparator = nodegroupset.CreateAzureNodeInfoComparator(autoscalingOptions.BalancingExtraIgnoredLabels) } else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName { - nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator + nodeInfoComparator = nodegroupset.CreateAwsNodeInfoComparator(autoscalingOptions.BalancingExtraIgnoredLabels) } else if autoscalingOptions.CloudProviderName == cloudprovider.GceProviderName { - nodeInfoComparatorBuilder = nodegroupset.CreateGceNodeInfoComparator + nodeInfoComparator = nodegroupset.CreateGceNodeInfoComparator(autoscalingOptions.BalancingExtraIgnoredLabels) } opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{ - Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels), + Comparator: nodeInfoComparator, } // These metrics should be published only once. diff --git a/cluster-autoscaler/processors/nodegroupset/label_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/label_nodegroups.go new file mode 100644 index 00000000000..c38f30fb005 --- /dev/null +++ b/cluster-autoscaler/processors/nodegroupset/label_nodegroups.go @@ -0,0 +1,41 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodegroupset + +import ( + klog "k8s.io/klog/v2" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" +) + +// CreateLabelNodeInfoComparator returns a comparator that checks for node group similarity using the given labels. +func CreateLabelNodeInfoComparator(labels []string) NodeInfoComparator { + return func(n1, n2 *schedulerframework.NodeInfo) bool { + return areLabelsSame(n1, n2, labels) + } +} + +func areLabelsSame(n1, n2 *schedulerframework.NodeInfo, labels []string) bool { + for _, label := range labels { + val1 := n1.Node().ObjectMeta.Labels[label] + val2 := n2.Node().ObjectMeta.Labels[label] + if val1 != val2 { + klog.V(4).Infof("%s label did not match. %s: %s, %s: %s", label, n1.Node().Name, val1, n2.Node().Name, val2) + return false + } + } + return true +}