Skip to content

Commit

Permalink
Initial work on explicity specify labels to use for node balancing.
Browse files Browse the repository at this point in the history
  • Loading branch information
rtnpro committed Jan 21, 2021
1 parent 0972eca commit a76493f
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 29 deletions.
3 changes: 3 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ type AutoscalingOptions struct {
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
BalancingExtraIgnoredLabels []string
// BalancingNodeLabels is a list of labels to consider when comparing if two node groups are similar.
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
BalancingNodeLabels []string
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
AWSUseStaticInstanceList bool
// ConcurrentGceRefreshes is the maximum number of concurrently refreshed instance groups or instance templates.
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/scale_test_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func NewTestProcessors() *processors.AutoscalingProcessors {
return &processors.AutoscalingProcessors{
PodListProcessor: NewFilterOutSchedulablePodListProcessor(),
NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{},
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, []string{}),
// TODO(bskiba): change scale up test so that this can be a NoOpProcessor
ScaleUpStatusProcessor: &status.EventingScaleUpStatusProcessor{},
ScaleDownStatusProcessor: &status.NoOpScaleDownStatusProcessor{},
Expand Down
4 changes: 3 additions & 1 deletion cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ var (

ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
balancingNodeLabelsFlag = multiStringFlag("balancing-node-label", "Specifies a label to consider in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
Expand Down Expand Up @@ -241,6 +242,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
BalancingNodeLabels: *balancingNodeLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
Expand Down Expand Up @@ -318,7 +320,7 @@ func buildAutoscaler() (core.Autoscaler, error) {
}

opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels, autoscalingOptions.BalancingNodeLabels),
}

// These metrics should be published only once.
Expand Down
9 changes: 7 additions & 2 deletions cluster-autoscaler/processors/nodegroupset/aws_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ import (
// CreateAwsNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar,
// even if they have different AWS-specific labels.
func CreateAwsNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateAwsNodeInfoComparator(extraIgnoredLabels, extraNodeLabels []string) NodeInfoComparator {
awsIgnoredLabels := map[string]bool{
"alpha.eksctl.io/instance-id": true, // this is a label used by eksctl to identify instances.
"alpha.eksctl.io/nodegroup-name": true, // this is a label used by eksctl to identify "node group" names.
"eks.amazonaws.com/nodegroup": true, // this is a label used by eks to identify "node group".
"k8s.amazonaws.com/eniConfig": true, // this is a label used by the AWS CNI for custom networking.
"lifecycle": true, // this is a label used by the AWS for spot.
}
awsIncludedLabels := make(map[string]bool)

for k, v := range BasicIgnoredLabels {
awsIgnoredLabels[k] = v
Expand All @@ -40,7 +41,11 @@ func CreateAwsNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator
awsIgnoredLabels[k] = true
}

for _, k := range extraNodeLabels {
awsIncludedLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, awsIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, awsIgnoredLabels, awsIncludedLabels)
}
}
10 changes: 8 additions & 2 deletions cluster-autoscaler/processors/nodegroupset/azure_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ func nodesFromSameAzureNodePool(n1, n2 *schedulerframework.NodeInfo) bool {
// CreateAzureNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they either belong to the same Azure agentpool
// or match usual conditions checked by IsCloudProviderNodeInfoSimilar, even if they have different agentpool labels.
func CreateAzureNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateAzureNodeInfoComparator(extraIgnoredLabels, extraNodeLabels []string) NodeInfoComparator {
azureIgnoredLabels := make(map[string]bool)
azureIncludedLabels := make(map[string]bool)

for k, v := range BasicIgnoredLabels {
azureIgnoredLabels[k] = v
}
Expand All @@ -42,10 +44,14 @@ func CreateAzureNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparat
azureIgnoredLabels[k] = true
}

for _, k := range extraNodeLabels {
azureIncludedLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
if nodesFromSameAzureNodePool(n1, n2) {
return true
}
return IsCloudProviderNodeInfoSimilar(n1, n2, azureIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, azureIgnoredLabels, azureIncludedLabels)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
)

func TestIsAzureNodeInfoSimilar(t *testing.T) {
comparator := CreateAzureNodeInfoComparator([]string{"example.com/ready"})
comparator := CreateAzureNodeInfoComparator([]string{"example.com/ready"}, []string{})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "thing"
Expand Down Expand Up @@ -71,12 +71,12 @@ func TestIsAzureNodeInfoSimilar(t *testing.T) {
func TestFindSimilarNodeGroupsAzureBasic(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{}, []string{})}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestFindSimilarNodeGroupsAzureByLabel(t *testing.T) {
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{}, []string{})}
context := &context.AutoscalingContext{}

n1 := BuildTestNode("n1", 1000, 1000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func basicSimilarNodeGroupsTest(
func TestFindSimilarNodeGroups(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, []string{})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

Expand All @@ -94,7 +94,7 @@ func TestFindSimilarNodeGroupsCustomLabels(t *testing.T) {
ni1.Node().Labels["example.com/ready"] = "true"
ni2.Node().Labels["example.com/ready"] = "false"

processor := NewDefaultNodeGroupSetProcessor([]string{"example.com/ready"})
processor := NewDefaultNodeGroupSetProcessor([]string{"example.com/ready"}, []string{})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

Expand All @@ -112,7 +112,7 @@ func TestFindSimilarNodeGroupsCustomComparator(t *testing.T) {
}

func TestBalanceSingleGroup(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, []string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand All @@ -132,7 +132,7 @@ func TestBalanceSingleGroup(t *testing.T) {
}

func TestBalanceUnderMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, []string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down Expand Up @@ -182,7 +182,7 @@ func TestBalanceUnderMaxSize(t *testing.T) {
}

func TestBalanceHittingMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor([]string{})
processor := NewDefaultNodeGroupSetProcessor([]string{}, []string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down
18 changes: 15 additions & 3 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package nodegroupset

import (
"math"
"fmt"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -91,17 +92,23 @@ func compareLabels(nodes []*schedulerframework.NodeInfo, ignoredLabels map[strin
}

// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateGenericNodeInfoComparator(extraIgnoredLabels, extraNodeLabels []string) NodeInfoComparator {
genericIgnoredLabels := make(map[string]bool)
genericIncludedLabels := make(map[string]bool)

for k, v := range BasicIgnoredLabels {
genericIgnoredLabels[k] = v
}
for _, k := range extraIgnoredLabels {
genericIgnoredLabels[k] = true
}

for _, k := range extraNodeLabels {
genericIncludedLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels, genericIncludedLabels)
}
}

Expand All @@ -110,7 +117,7 @@ func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoCompar
// somewhat arbitrary, but generally we check if resources provided by both nodes
// are similar enough to likely be the same type of machine and if the set of labels
// is the same (except for a set of labels passed in to be ignored like hostname or zone).
func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels map[string]bool) bool {
func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignoredLabels, includedLabels map[string]bool) bool {
capacity := make(map[apiv1.ResourceName][]resource.Quantity)
allocatable := make(map[apiv1.ResourceName][]resource.Quantity)
free := make(map[apiv1.ResourceName][]resource.Quantity)
Expand Down Expand Up @@ -160,5 +167,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulerframework.NodeInfo, ignored
return false
}

fmt.Println(includedLabels, len(includedLabels))
if len(includedLabels) > 0 && !compareLabels(nodes, includedLabels) {
return false
}

return true
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []
}

func TestIdenticalNodesSimilar(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})
n1 := BuildTestNode("node1", 1000, 2000)
n2 := BuildTestNode("node2", 1000, 2000)
checkNodesSimilar(t, n1, n2, comparator, true)
}

func TestNodesSimilarVariousRequirements(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})
n1 := BuildTestNode("node1", 1000, 2000)

// Different CPU capacity
Expand All @@ -74,7 +74,7 @@ func TestNodesSimilarVariousRequirements(t *testing.T) {
}

func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})
n1 := BuildTestNode("node1", 1000, 2000)
p1 := BuildTestPod("pod1", 500, 1000)
p1.Spec.NodeName = "node1"
Expand All @@ -100,7 +100,7 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})
n1 := BuildTestNode("node1", 1000, 1000)

// Different memory capacity within tolerance
Expand All @@ -115,7 +115,7 @@ func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
}

func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})

// Use realistic memory capacity (taken from real nodes)
// 15944120 KB ~= 16GiB (m5.xLarge)
Expand All @@ -137,7 +137,7 @@ func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {
}

func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{})
comparator := CreateGenericNodeInfoComparator([]string{}, []string{})

// Use realistic memory capacity (taken from real nodes)
// 257217528 KB ~= 256GiB (m5.16xLarge)
Expand All @@ -159,7 +159,7 @@ func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {
}

func TestNodesSimilarVariousLabels(t *testing.T) {
comparator := CreateGenericNodeInfoComparator([]string{"example.com/ready"})
comparator := CreateGenericNodeInfoComparator([]string{"example.com/ready"}, []string{})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "winnie the pooh"
Expand Down
9 changes: 7 additions & 2 deletions cluster-autoscaler/processors/nodegroupset/gce_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ import (
// CreateGceNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar,
// even if they have different GCE-specific labels.
func CreateGceNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
func CreateGceNodeInfoComparator(extraIgnoredLabels, extraNodeLabels []string) NodeInfoComparator {
gceIgnoredLabels := map[string]bool{
"topology.gke.io/zone": true,
}
gceIncludedLabels := make(map[string]bool)

for k, v := range BasicIgnoredLabels {
gceIgnoredLabels[k] = v
Expand All @@ -36,7 +37,11 @@ func CreateGceNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator
gceIgnoredLabels[k] = true
}

for _, k := range extraNodeLabels {
gceIncludedLabels[k] = true
}

return func(n1, n2 *schedulerframework.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, gceIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, gceIgnoredLabels, gceIncludedLabels)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ func (n *NoOpNodeGroupSetProcessor) BalanceScaleUpBetweenGroups(context *context
func (n *NoOpNodeGroupSetProcessor) CleanUp() {}

// NewDefaultNodeGroupSetProcessor creates an instance of NodeGroupSetProcessor.
func NewDefaultNodeGroupSetProcessor(ignoredLabels []string) NodeGroupSetProcessor {
func NewDefaultNodeGroupSetProcessor(ignoredLabels, includedLabels []string) NodeGroupSetProcessor {
return &BalancingNodeGroupSetProcessor{
Comparator: CreateGenericNodeInfoComparator(ignoredLabels),
Comparator: CreateGenericNodeInfoComparator(ignoredLabels, includedLabels),
}
}
2 changes: 1 addition & 1 deletion cluster-autoscaler/processors/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func DefaultProcessors() *AutoscalingProcessors {
return &AutoscalingProcessors{
PodListProcessor: pods.NewDefaultPodListProcessor(),
NodeGroupListProcessor: nodegroups.NewDefaultNodeGroupListProcessor(),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, []string{}),
ScaleUpStatusProcessor: status.NewDefaultScaleUpStatusProcessor(),
ScaleDownNodeProcessor: nodes.NewPreFilteringScaleDownNodeProcessor(),
ScaleDownStatusProcessor: status.NewDefaultScaleDownStatusProcessor(),
Expand Down

0 comments on commit a76493f

Please sign in to comment.