diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index de038ad6c7d..0f107c51417 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -135,6 +135,8 @@ type AutoscalingOptions struct { MaxBulkSoftTaintTime time.Duration // IgnoredTaints is a list of taints to ignore when considering a node template for scheduling. IgnoredTaints []string + // BalancingLabels is a list of labels to use when comparing if two node groups are similar. + BalancingLabels []string // BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar. // Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored. BalancingExtraIgnoredLabels []string diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 3e0e37bd5b1..430d73016c2 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -170,6 +170,7 @@ var ( newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.") ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") + balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use when comparing if two node groups are similar") balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.") @@ -195,6 +196,11 @@ func createAutoscalingOptions() config.AutoscalingOptions { if err != nil { klog.Fatalf("Failed to parse flags: %v", err) } + + err = validateBalancingLabelFlags(*balancingLabelsFlag, *balancingIgnoreLabelsFlag) + if err != nil { + klog.Fatalf("Failed to parse flags: %v", err) + } return config.AutoscalingOptions{ CloudConfig: *cloudConfig, CloudProviderName: *cloudProviderFlag, @@ -240,6 +246,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { Regional: *regional, NewPodScaleUpDelay: *newPodScaleUpDelay, IgnoredTaints: *ignoreTaintsFlag, + BalancingLabels: *balancingLabelsFlag, BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag, KubeConfigPath: *kubeConfigFile, NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout, @@ -309,7 +316,11 @@ func buildAutoscaler() (core.Autoscaler, error) { opts.Processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor() nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator - if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName { + nodeInfoComparatorBuilderArgs := autoscalingOptions.BalancingExtraIgnoredLabels + if len(autoscalingOptions.BalancingLabels) > 0 { + nodeInfoComparatorBuilder = nodegroupset.CreateGenericNodeInfoLabelComparator + nodeInfoComparatorBuilderArgs = autoscalingOptions.BalancingLabels + } else if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName { nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator } else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName { nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator @@ -318,7 +329,7 @@ func buildAutoscaler() (core.Autoscaler, error) { } opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{ - Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels), + Comparator: nodeInfoComparatorBuilder(nodeInfoComparatorBuilderArgs), } // These metrics should be published only once. @@ -542,3 +553,10 @@ func parseSingleGpuLimit(limits string) (config.GpuLimits, error) { } return parsedGpuLimits, nil } + +func validateBalancingLabelFlags(balancingLabelsFlag, balancingIgnoreLabelsFlag []string) error { + if len(balancingLabelsFlag) > 0 && len(balancingIgnoreLabelsFlag) > 0 { + return fmt.Errorf("cannot set --balancing-labels and --balancing-ignored-labels at the same time") + } + return nil +} diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index 70906405587..74015714d98 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -72,12 +72,13 @@ func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio return larger-smaller <= larger*maxDifferenceRatio } -func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool { +func compareLabels(nodes []*schedulerframework.NodeInfo, explicitLabels, ignoredLabels map[string]bool) bool { labels := make(map[string][]string) for _, node := range nodes { for label, value := range node.Node().ObjectMeta.Labels { ignore, _ := ignoredLabels[label] - if !ignore { + include, _ := explicitLabels[label] + if include || (len(explicitLabels) == 0 && !ignore) { labels[label] = append(labels[label], value) } } @@ -90,31 +91,10 @@ func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[strin return true } -// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity -func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator { - genericIgnoredLabels := make(map[string]bool) - for k, v := range BasicIgnoredLabels { - genericIgnoredLabels[k] = v - } - for _, k := range extraIgnoredLabels { - genericIgnoredLabels[k] = true - } - - return func(n1, n2 *schedulerframework.NodeInfo) bool { - return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels) - } -} - -// IsCloudProviderNodeInfoSimilar returns true if two NodeInfos are similar enough to consider -// that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are -// somewhat arbitrary, but generally we check if resources provided by both nodes -// are similar enough to likely be the same type of machine and if the set of labels -// is the same (except for a set of labels passed in to be ignored like hostname or zone). -func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool { +func isCloudProviderNodeTypeSimilar(nodes []*schedulerframework.NodeInfo) bool { capacity := make(map[apiv1.ResourceName][]resource.Quantity) allocatable := make(map[apiv1.ResourceName][]resource.Quantity) free := make(map[apiv1.ResourceName][]resource.Quantity) - nodes := []*schedulerframework.NodeInfo{n1, n2} for _, node := range nodes { for res, quantity := range node.Node().Status.Capacity { capacity[res] = append(capacity[res], quantity) @@ -156,7 +136,55 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored return false } - if !compareLabels(nodes, ignoredLabels) { + return true +} + +// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity +func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator { + genericIgnoredLabels := make(map[string]bool) + for k, v := range BasicIgnoredLabels { + genericIgnoredLabels[k] = v + } + for _, k := range extraIgnoredLabels { + genericIgnoredLabels[k] = true + } + + return func(n1, n2 *schedulerframework.NodeInfo) bool { + return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels) + } +} + +// CreateGenericNodeInfoLabelComparator returns a generic comparator that checks for node with matching labels alongside node group similarity +func CreateGenericNodeInfoLabelComparator(labels []string) NodeInfoComparator { + return func(n1, n2 *schedulerframework.NodeInfo) bool { + includedLabels := make(map[string]bool) + for _, l := range labels { + includedLabels[l] = true + } + + if !compareLabels([]*schedulerframework.NodeInfo{n1, n2}, includedLabels, make(map[string]bool)) { + return false + } + + if !isCloudProviderNodeTypeSimilar([]*schedulerframework.NodeInfo{n1, n2}) { + return false + } + return true + } +} + +// IsCloudProviderNodeInfoSimilar returns true if two NodeInfos are similar enough to consider +// that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are +// somewhat arbitrary, but generally we check if resources provided by both nodes +// are similar enough to likely be the same type of machine and if the set of labels +// is the same (except for a set of labels passed in to be ignored like hostname or zone). +func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool { + nodes := []*schedulerframework.NodeInfo{n1, n2} + + if !isCloudProviderNodeTypeSimilar(nodes) { + return false + } + if !compareLabels(nodes, make(map[string]bool), ignoredLabels) { return false } diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index 43236ad9077..7f42ed29e3b 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -197,3 +197,32 @@ func TestNodesSimilarVariousLabels(t *testing.T) { n2.ObjectMeta.Labels["example.com/ready"] = "false" checkNodesSimilar(t, n1, n2, comparator, true) } + +func TestNodesWithVariousExplicitLabels(t *testing.T) { + comparator := CreateGenericNodeInfoLabelComparator([]string{"test-label", "foo"}) + n1 := BuildTestNode("node1", 1000, 2000) + n1.ObjectMeta.Labels["test-label"] = "test-value" + n1.ObjectMeta.Labels["character"] = "winnie the pooh" + + n2 := BuildTestNode("node2", 1000, 2000) + n2.ObjectMeta.Labels["test-label"] = "test-value" + + n3 := BuildTestNode("node3", 1000, 1000) + n3.ObjectMeta.Labels["test-label"] = "test-value" + + // Matching explicit balancing labels + checkNodesSimilar(t, n1, n2, comparator, true) + + // Matching explicit balancing labels but dissimilar nodes + checkNodesSimilar(t, n1, n3, comparator, false) + + n2.ObjectMeta.Labels["test-label"] = "something-else" + + // Mismatch in one of explicit balancing labels in the nodes + checkNodesSimilar(t, n1, n2, comparator, false) + + // Balancing label present on one node, but missing on another + comparator = CreateGenericNodeInfoLabelComparator([]string{"test-label", "character", "foo"}) + + checkNodesSimilar(t, n1, n2, comparator, false) +}