From d9c7e269a907344dcd8c68bdeed446f1338977a3 Mon Sep 17 00:00:00 2001 From: Kubernetes Prow Robot Date: Wed, 27 Sep 2023 02:41:22 -0700 Subject: [PATCH] Merge pull request #6132 from atwamahmoud/status-taints-support Adds support for startup taints and extends support for status taints --- .../config/autoscaling_options.go | 6 +- .../core/scaledown/actuation/actuator_test.go | 161 +++++++++++++++++- cluster-autoscaler/core/static_autoscaler.go | 2 +- cluster-autoscaler/main.go | 7 +- cluster-autoscaler/utils/kubernetes/ready.go | 6 +- cluster-autoscaler/utils/taints/taints.go | 59 ++++--- .../utils/taints/taints_test.go | 89 +++++++--- 7 files changed, 270 insertions(+), 60 deletions(-) diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index f1b913f84ef4..419776067ef3 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -194,10 +194,10 @@ type AutoscalingOptions struct { MaxBulkSoftTaintTime time.Duration // MaxPodEvictionTime sets the maximum time CA tries to evict a pod before giving up. MaxPodEvictionTime time.Duration - // IgnoredTaints is a list of taints CA considers to reflect transient node + // StartupTaints is a list of taints CA considers to reflect transient node // status that should be removed when creating a node template for scheduling. - // The ignored taints are expected to appear during node startup. - IgnoredTaints []string + // startup taints are expected to appear during node startup. + StartupTaints []string // StatusTaints is a list of taints CA considers to reflect transient node // status that should be removed when creating a node template for scheduling. // The status taints are expected to appear during node lifetime, after startup. diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go index 6c12da493270..bf672378060a 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go @@ -788,11 +788,162 @@ func TestStartDeletion(t *testing.T) { return true, nil, nil }) - // Hook node deletion at the level of cloud provider, to gather which nodes were deleted, and to fail the deletion for - // certain nodes to simulate errors. - provider := testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error { - if tc.failedNodeDeletion[node] { - return fmt.Errorf("SIMULATED ERROR: won't remove node") + // Set up other needed structures and options. + opts := config.AutoscalingOptions{ + MaxScaleDownParallelism: 10, + MaxDrainParallelism: 5, + MaxPodEvictionTime: 0, + DaemonSetEvictionForEmptyNodes: true, + } + + allPods := []*apiv1.Pod{} + + for _, pods := range tc.pods { + allPods = append(allPods, pods...) + } + + podLister := kube_util.NewTestPodLister(allPods) + pdbLister := kube_util.NewTestPodDisruptionBudgetLister([]*policyv1.PodDisruptionBudget{}) + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{ds}) + if err != nil { + t.Fatalf("Couldn't create daemonset lister") + } + + registry := kube_util.NewListerRegistry(nil, nil, podLister, pdbLister, dsLister, nil, nil, nil, nil) + ctx, err := NewScaleTestAutoscalingContext(opts, fakeClient, registry, provider, nil, nil) + if err != nil { + t.Fatalf("Couldn't set up autoscaling context: %v", err) + } + csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute})) + for _, bucket := range emptyNodeGroupViews { + for _, node := range bucket.Nodes { + err := ctx.ClusterSnapshot.AddNodeWithPods(node, tc.pods[node.Name]) + if err != nil { + t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err) + } + } + } + for _, bucket := range drainNodeGroupViews { + for _, node := range bucket.Nodes { + pods, found := tc.pods[node.Name] + if !found { + t.Fatalf("Drain node %q doesn't have pods defined in the test case.", node.Name) + } + err := ctx.ClusterSnapshot.AddNodeWithPods(node, pods) + if err != nil { + t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err) + } + } + } + + wantScaleDownStatus := &status.ScaleDownStatus{ + Result: tc.wantStatus.result, + } + for _, scaleDownNodeInfo := range tc.wantStatus.scaledDownNodes { + statusScaledDownNode := &status.ScaleDownNode{ + Node: generateNode(scaleDownNodeInfo.name), + NodeGroup: tc.nodeGroups[scaleDownNodeInfo.nodeGroup], + EvictedPods: scaleDownNodeInfo.evictedPods, + UtilInfo: scaleDownNodeInfo.utilInfo, + } + wantScaleDownStatus.ScaledDownNodes = append(wantScaleDownStatus.ScaledDownNodes, statusScaledDownNode) + } + + // Create Actuator, run StartDeletion, and verify the error. + ndt := deletiontracker.NewNodeDeletionTracker(0) + ndb := NewNodeDeletionBatcher(&ctx, csr, ndt, 0*time.Second) + evictor := Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom} + actuator := Actuator{ + ctx: &ctx, clusterState: csr, nodeDeletionTracker: ndt, + nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, evictor), + budgetProcessor: budgets.NewScaleDownBudgetProcessor(&ctx), + configGetter: nodegroupconfig.NewDefaultNodeGroupConfigProcessor(ctx.NodeGroupDefaults), + } + gotStatus, gotErr := actuator.StartDeletion(allEmptyNodes, allDrainNodes) + if diff := cmp.Diff(tc.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { + t.Errorf("StartDeletion error diff (-want +got):\n%s", diff) + } + + // Verify ScaleDownStatus looks as expected. + ignoreSdNodeOrder := cmpopts.SortSlices(func(a, b *status.ScaleDownNode) bool { return a.Node.Name < b.Node.Name }) + ignoreTimestamps := cmpopts.IgnoreFields(status.ScaleDownStatus{}, "NodeDeleteResultsAsOf") + cmpNg := cmp.Comparer(func(a, b *testprovider.TestNodeGroup) bool { return a.Id() == b.Id() }) + statusCmpOpts := cmp.Options{ignoreSdNodeOrder, ignoreTimestamps, cmpNg, cmpopts.EquateEmpty()} + if diff := cmp.Diff(wantScaleDownStatus, gotStatus, statusCmpOpts); diff != "" { + t.Errorf("StartDeletion status diff (-want +got):\n%s", diff) + } + + // Verify that all expected nodes were deleted using the cloud provider hook. + var gotDeletedNodes []string + nodesLoop: + for i := 0; i < len(tc.wantDeletedNodes); i++ { + select { + case deletedNode := <-deletedNodes: + gotDeletedNodes = append(gotDeletedNodes, deletedNode) + case <-time.After(3 * time.Second): + t.Errorf("Timeout while waiting for deleted nodes.") + break nodesLoop + } + } + ignoreStrOrder := cmpopts.SortSlices(func(a, b string) bool { return a < b }) + if diff := cmp.Diff(tc.wantDeletedNodes, gotDeletedNodes, ignoreStrOrder); diff != "" { + t.Errorf("deletedNodes diff (-want +got):\n%s", diff) + } + + // Verify that all expected pods were deleted using the fake k8s client hook. + var gotDeletedPods []string + podsLoop: + for i := 0; i < len(tc.wantDeletedPods); i++ { + select { + case deletedPod := <-deletedPods: + gotDeletedPods = append(gotDeletedPods, deletedPod) + case <-time.After(3 * time.Second): + t.Errorf("Timeout while waiting for deleted pods.") + break podsLoop + } + } + if diff := cmp.Diff(tc.wantDeletedPods, gotDeletedPods, ignoreStrOrder); diff != "" { + t.Errorf("deletedPods diff (-want +got):\n%s", diff) + } + + // Verify that all expected taint updates happened using the fake k8s client hook. + allUpdatesCount := 0 + for _, updates := range tc.wantTaintUpdates { + allUpdatesCount += len(updates) + } + gotTaintUpdates := make(map[string][][]apiv1.Taint) + taintsLoop: + for i := 0; i < allUpdatesCount; i++ { + select { + case taintUpdate := <-taintUpdates: + gotTaintUpdates[taintUpdate.nodeName] = append(gotTaintUpdates[taintUpdate.nodeName], taintUpdate.taints) + case <-time.After(3 * time.Second): + t.Errorf("Timeout while waiting for taint updates.") + break taintsLoop + } + } + startupTaintValue := cmpopts.IgnoreFields(apiv1.Taint{}, "Value") + if diff := cmp.Diff(tc.wantTaintUpdates, gotTaintUpdates, startupTaintValue, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("taintUpdates diff (-want +got):\n%s", diff) + } + + // Wait for all expected deletions to be reported in NodeDeletionTracker. Reporting happens shortly after the deletion + // in cloud provider we sync to above and so this will usually not wait at all. However, it can still happen + // that there is a delay between cloud provider deletion and reporting, in which case the results are not there yet + // and we need to wait for them before asserting. + err = waitForDeletionResultsCount(actuator.nodeDeletionTracker, len(tc.wantNodeDeleteResults), 3*time.Second, 200*time.Millisecond) + if err != nil { + t.Errorf("Timeout while waiting for node deletion results") + } + + // Run StartDeletion again to gather node deletion results for deletions started in the previous call, and verify + // that they look as expected. + gotNextStatus, gotNextErr := actuator.StartDeletion(nil, nil) + if gotNextErr != nil { + t.Errorf("StartDeletion unexpected error: %v", gotNextErr) + } + if diff := cmp.Diff(tc.wantNodeDeleteResults, gotNextStatus.NodeDeleteResults, cmpopts.EquateEmpty(), cmpopts.EquateErrors()); diff != "" { + t.Errorf("NodeDeleteResults diff (-want +got):\n%s", diff) } deletedNodes <- node return nil diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index e3828149931a..ea61b6f02eeb 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -894,7 +894,7 @@ func (a *StaticAutoscaler) obtainNodeLists(cp cloudprovider.CloudProvider) ([]*a // our normal handling for booting up nodes deal with this. // TODO: Remove this call when we handle dynamically provisioned resources. allNodes, readyNodes = a.processors.CustomResourcesProcessor.FilterOutNodesWithUnreadyResources(a.AutoscalingContext, allNodes, readyNodes) - allNodes, readyNodes = taints.FilterOutNodesWithIgnoredTaints(a.taintConfig.IgnoredTaints, allNodes, readyNodes) + allNodes, readyNodes = taints.FilterOutNodesWithStartupTaints(a.taintConfig, allNodes, readyNodes) return allNodes, readyNodes, nil } diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 1baf7c22ae47..f92273e15d5f 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -190,7 +190,9 @@ var ( regional = flag.Bool("regional", false, "Cluster is regional.") newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up. Can be increased for individual pods through annotation 'cluster-autoscaler.kubernetes.io/pod-scale-up-delay'.") - ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") + ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)") + startupTaintsFlag = multiStringFlag("startup-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Equivalent to ignore-taint)") + statusTaintsFlag = multiStringFlag("status-taint", "Specifies a taint to ignore in node templates when considering to scale a node group but nodes will not be treated as unready") balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") @@ -327,7 +329,8 @@ func createAutoscalingOptions() config.AutoscalingOptions { ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff, Regional: *regional, NewPodScaleUpDelay: *newPodScaleUpDelay, - IgnoredTaints: *ignoreTaintsFlag, + StartupTaints: append(*ignoreTaintsFlag, *startupTaintsFlag...), + StatusTaints: *statusTaintsFlag, BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag, BalancingLabels: *balancingLabelsFlag, KubeConfigPath: *kubeConfigFile, diff --git a/cluster-autoscaler/utils/kubernetes/ready.go b/cluster-autoscaler/utils/kubernetes/ready.go index 09175de42297..bc01d1d18e0a 100644 --- a/cluster-autoscaler/utils/kubernetes/ready.go +++ b/cluster-autoscaler/utils/kubernetes/ready.go @@ -35,10 +35,10 @@ const ( // still upcoming due to a missing resource (e.g. GPU). ResourceUnready NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/resource-not-ready" - // IgnoreTaint is a fake identifier used internally by Cluster Autoscaler + // StartupNodes is a fake identifier used internally by Cluster Autoscaler // to indicate nodes that appear Ready in the API, but are treated as - // still upcoming due to applied ignore taint. - IgnoreTaint NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/ignore-taint" + // still upcoming due to applied startup taint. + StartupNodes NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/startup-taint" ) // IsNodeReadyAndSchedulable returns true if the node is ready and schedulable. diff --git a/cluster-autoscaler/utils/taints/taints.go b/cluster-autoscaler/utils/taints/taints.go index 80383db74ab7..f6dab9fb78bc 100644 --- a/cluster-autoscaler/utils/taints/taints.go +++ b/cluster-autoscaler/utils/taints/taints.go @@ -46,6 +46,12 @@ const ( // IgnoreTaintPrefix any taint starting with it will be filtered out from autoscaler template node. IgnoreTaintPrefix = "ignore-taint.cluster-autoscaler.kubernetes.io/" + // StartupTaintPrefix (Same as IgnoreTaintPrefix) any taint starting with it will be filtered out from autoscaler template node. + StartupTaintPrefix = "startup-taint.cluster-autoscaler.kubernetes.io/" + + // StatusTaintPrefix any taint starting with it will be filtered out from autoscaler template node but unlike IgnoreTaintPrefix & StartupTaintPrefix it should not be trated as unready. + StatusTaintPrefix = "status-taint.cluster-autoscaler.kubernetes.io/" + gkeNodeTerminationHandlerTaint = "cloud.google.com/impending-node-termination" // AWS: Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods @@ -57,16 +63,18 @@ type TaintKeySet map[string]bool // TaintConfig is a config of taints that require special handling type TaintConfig struct { - IgnoredTaints TaintKeySet - StatusTaints TaintKeySet + StartupTaints TaintKeySet + StatusTaints TaintKeySet + StartupTaintPrefixes []string + StatusTaintPrefixes []string } // NewTaintConfig returns the taint config extracted from options func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig { - ignoredTaints := make(TaintKeySet) - for _, taintKey := range opts.IgnoredTaints { - klog.V(4).Infof("Ignoring taint %s on all NodeGroups", taintKey) - ignoredTaints[taintKey] = true + startupTaints := make(TaintKeySet) + for _, taintKey := range opts.StartupTaints { + klog.V(4).Infof("Startup taint %s on all NodeGroups", taintKey) + startupTaints[taintKey] = true } statusTaints := make(TaintKeySet) @@ -76,8 +84,10 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig { } return TaintConfig{ - IgnoredTaints: ignoredTaints, - StatusTaints: statusTaints, + StartupTaints: startupTaints, + StatusTaints: statusTaints, + StartupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix}, + StatusTaintPrefixes: []string{StatusTaintPrefix}, } } @@ -319,6 +329,15 @@ func CleanAllTaints(nodes []*apiv1.Node, client kube_client.Interface, recorder } } +func matchesAnyPrefix(prefixes []string, key string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(key, prefix) { + return true + } + } + return false +} + // SanitizeTaints returns filtered taints func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint { var newTaints []apiv1.Taint @@ -344,12 +363,12 @@ func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint continue } - if _, exists := taintConfig.IgnoredTaints[taint.Key]; exists { - klog.V(4).Infof("Removing ignored taint %s, when creating template from node", taint.Key) + if _, exists := taintConfig.StartupTaints[taint.Key]; exists { + klog.V(4).Infof("Removing startup taint %s, when creating template from node", taint.Key) continue } - - if strings.HasPrefix(taint.Key, IgnoreTaintPrefix) { + shouldRemoveBasedOnPrefix := matchesAnyPrefix(taintConfig.StartupTaintPrefixes, taint.Key) || matchesAnyPrefix(taintConfig.StatusTaintPrefixes, taint.Key) + if shouldRemoveBasedOnPrefix { klog.V(4).Infof("Removing taint %s based on prefix, when creation template from node", taint.Key) continue } @@ -364,12 +383,12 @@ func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint return newTaints } -// FilterOutNodesWithIgnoredTaints override the condition status of the given nodes to mark them as NotReady when they have +// FilterOutNodesWithStartupTaints override the condition status of the given nodes to mark them as NotReady when they have // filtered taints. -func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyNodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) { +func FilterOutNodesWithStartupTaints(taintConfig TaintConfig, allNodes, readyNodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) { newAllNodes := make([]*apiv1.Node, 0) newReadyNodes := make([]*apiv1.Node, 0) - nodesWithIgnoredTaints := make(map[string]*apiv1.Node) + nodesWithStartupTaints := make(map[string]*apiv1.Node) for _, node := range readyNodes { if len(node.Spec.Taints) == 0 { newReadyNodes = append(newReadyNodes, node) @@ -377,11 +396,11 @@ func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyN } ready := true for _, t := range node.Spec.Taints { - _, hasIgnoredTaint := ignoredTaints[t.Key] - if hasIgnoredTaint || strings.HasPrefix(t.Key, IgnoreTaintPrefix) { + _, hasStartupTaint := taintConfig.StartupTaints[t.Key] + if hasStartupTaint || matchesAnyPrefix(taintConfig.StartupTaintPrefixes, t.Key) { ready = false - nodesWithIgnoredTaints[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.IgnoreTaint) - klog.V(3).Infof("Overriding status of node %v, which seems to have ignored taint %q", node.Name, t.Key) + nodesWithStartupTaints[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.StartupNodes) + klog.V(3).Infof("Overriding status of node %v, which seems to have startup taint %q", node.Name, t.Key) break } } @@ -391,7 +410,7 @@ func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyN } // Override any node with ignored taint with its "unready" copy for _, node := range allNodes { - if newNode, found := nodesWithIgnoredTaints[node.Name]; found { + if newNode, found := nodesWithStartupTaints[node.Name]; found { newAllNodes = append(newAllNodes, newNode) } else { newAllNodes = append(newAllNodes, node) diff --git a/cluster-autoscaler/utils/taints/taints_test.go b/cluster-autoscaler/utils/taints/taints_test.go index ea8032e59528..4243fd698644 100644 --- a/cluster-autoscaler/utils/taints/taints_test.go +++ b/cluster-autoscaler/utils/taints/taints_test.go @@ -277,7 +277,7 @@ func buildFakeClientWithConflicts(t *testing.T, nodes ...*apiv1.Node) *fake.Clie return fakeClient } -func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { +func TestFilterOutNodesWithStartupTaints(t *testing.T) { isReady := func(t *testing.T, node *apiv1.Node) bool { for _, condition := range node.Status.Conditions { if condition.Type == apiv1.NodeReady { @@ -295,29 +295,30 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { } for name, tc := range map[string]struct { - readyNodes int - allNodes int - ignoredTaints TaintKeySet - node *apiv1.Node + readyNodes int + allNodes int + startupTaints TaintKeySet + startupTaintsPrefixes []string + node *apiv1.Node }{ - "empty ignored taints, no node": { + "empty startup taints, no node": { readyNodes: 0, allNodes: 0, - ignoredTaints: map[string]bool{}, + startupTaints: map[string]bool{}, node: nil, }, - "one ignored taint, no node": { + "one startup taint, no node": { readyNodes: 0, allNodes: 0, - ignoredTaints: map[string]bool{ + startupTaints: map[string]bool{ "my-taint": true, }, node: nil, }, - "one ignored taint, one ready untainted node": { + "one startup taint, one ready untainted node": { readyNodes: 1, allNodes: 1, - ignoredTaints: map[string]bool{ + startupTaints: map[string]bool{ "my-taint": true, }, node: &apiv1.Node{ @@ -333,10 +334,10 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { }, }, }, - "one ignored taint, one unready tainted node": { + "one startup taint, one unready tainted node": { readyNodes: 0, allNodes: 1, - ignoredTaints: map[string]bool{ + startupTaints: map[string]bool{ "my-taint": true, }, node: &apiv1.Node{ @@ -358,10 +359,11 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { }, }, }, - "no ignored taint, one unready prefixed tainted node": { - readyNodes: 0, - allNodes: 1, - ignoredTaints: map[string]bool{}, + "no startup taint, one node unready prefixed with startup taint prefix (Compatibility)": { + readyNodes: 0, + allNodes: 1, + startupTaints: map[string]bool{}, + startupTaintsPrefixes: []string{IgnoreTaintPrefix}, node: &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "notReadyTainted", @@ -381,10 +383,34 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { }, }, }, - "no ignored taint, two taints": { + "no startup taint, one node unready prefixed with startup taint prefix": { + readyNodes: 0, + allNodes: 1, + startupTaints: map[string]bool{}, + startupTaintsPrefixes: []string{StartupTaintPrefix}, + node: &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "notReadyTainted", + CreationTimestamp: metav1.NewTime(time.Now()), + }, + Spec: apiv1.NodeSpec{ + Taints: []apiv1.Taint{ + { + Key: StartupTaintPrefix + "another-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + }, + }, + Status: apiv1.NodeStatus{ + Conditions: []apiv1.NodeCondition{readyCondition}, + }, + }, + }, + "no startup taint, two taints": { readyNodes: 1, allNodes: 1, - ignoredTaints: map[string]bool{}, + startupTaints: map[string]bool{}, node: &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "ReadyTainted", @@ -415,7 +441,11 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { if tc.node != nil { nodes = append(nodes, tc.node) } - allNodes, readyNodes := FilterOutNodesWithIgnoredTaints(tc.ignoredTaints, nodes, nodes) + taintConfig := TaintConfig{ + StartupTaints: tc.startupTaints, + StartupTaintPrefixes: tc.startupTaintsPrefixes, + } + allNodes, readyNodes := FilterOutNodesWithStartupTaints(taintConfig, nodes, nodes) assert.Equal(t, tc.allNodes, len(allNodes)) assert.Equal(t, tc.readyNodes, len(readyNodes)) @@ -456,8 +486,13 @@ func TestSanitizeTaints(t *testing.T) { Effect: apiv1.TaintEffectNoSchedule, }, { - Key: ReschedulerTaintKey, - Value: "test1", + Key: StatusTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: StartupTaintPrefix + "some-taint", + Value: "myValue", Effect: apiv1.TaintEffectNoSchedule, }, { @@ -497,11 +532,13 @@ func TestSanitizeTaints(t *testing.T) { }, } taintConfig := TaintConfig{ - IgnoredTaints: map[string]bool{"ignore-me": true}, - StatusTaints: map[string]bool{"status-me": true}, + StartupTaints: map[string]bool{"ignore-me": true}, + StatusTaints: map[string]bool{"status-me": true}, + StartupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix}, } newTaints := SanitizeTaints(node.Spec.Taints, taintConfig) - require.Equal(t, len(newTaints), 1) - assert.Equal(t, newTaints[0].Key, "test-taint") + require.Equal(t, 2, len(newTaints)) + assert.Equal(t, newTaints[0].Key, StatusTaintPrefix+"some-taint") + assert.Equal(t, newTaints[1].Key, "test-taint") }