diff --git a/Makefile b/Makefile index 48fc55187d..3484e90b46 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ delete: ## Delete the controller from your ~/.kube/config cluster test: ## Run tests go test ./pkg/... \ -race \ - -timeout 15m \ + -timeout 20m \ --ginkgo.focus="${FOCUS}" \ --ginkgo.randomize-all \ --ginkgo.v \ diff --git a/go.mod b/go.mod index ecde6ca4e7..4240b96ec8 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/patrickmn/go-cache v2.1.0+incompatible github.com/prometheus/client_golang v1.19.1 github.com/prometheus/client_model v0.6.1 - github.com/samber/lo v1.43.0 + github.com/samber/lo v1.44.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/sync v0.7.0 diff --git a/go.sum b/go.sum index 5c2539b1f1..efd14fad50 100644 --- a/go.sum +++ b/go.sum @@ -287,8 +287,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/samber/lo v1.43.0 h1:ts0VhPi8+ZQZFVLv/2Vkgt2Cds05FM2v3Enmv+YMBtg= -github.com/samber/lo v1.43.0/go.mod h1:w7R6fO7h2lrnx/s0bWcZ55vXJI89p5UPM6+kyDL373E= +github.com/samber/lo v1.44.0 h1:5il56KxRE+GHsm1IR+sZ/6J42NODigFiqCWpSc2dybA= +github.com/samber/lo v1.44.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= diff --git a/pkg/apis/v1beta1/taints.go b/pkg/apis/v1beta1/taints.go index 9eac470499..ff7ba0de90 100644 --- a/pkg/apis/v1beta1/taints.go +++ b/pkg/apis/v1beta1/taints.go @@ -26,6 +26,7 @@ import ( const ( DisruptionTaintKey = apis.Group + "/disruption" DisruptingNoScheduleTaintValue = "disrupting" + UnregisteredTaintKey = apis.Group + "/unregistered" ) var ( @@ -36,6 +37,10 @@ var ( Effect: v1.TaintEffectNoSchedule, Value: DisruptingNoScheduleTaintValue, } + UnregisteredNoExecuteTaint = v1.Taint{ + Key: UnregisteredTaintKey, + Effect: v1.TaintEffectNoExecute, + } ) func IsDisruptingTaint(taint v1.Taint) bool { diff --git a/pkg/cloudprovider/types.go b/pkg/cloudprovider/types.go index f0a79790e4..3adf4759ac 100644 --- a/pkg/cloudprovider/types.go +++ b/pkg/cloudprovider/types.go @@ -199,7 +199,7 @@ func (its InstanceTypes) SatisfiesMinValues(requirements scheduling.Requirements // Truncate truncates the InstanceTypes based on the passed-in requirements // It returns an error if it isn't possible to truncate the instance types on maxItems without violating minValues func (its InstanceTypes) Truncate(requirements scheduling.Requirements, maxItems int) (InstanceTypes, error) { - truncatedInstanceTypes := InstanceTypes(lo.Slice(its.OrderByPrice(requirements), 0, maxItems)) + truncatedInstanceTypes := lo.Slice(its.OrderByPrice(requirements), 0, maxItems) // Only check for a validity of NodeClaim if its requirement has minValues in it. if requirements.HasMinValues() { if _, err := truncatedInstanceTypes.SatisfiesMinValues(requirements); err != nil { diff --git a/pkg/controllers/nodeclaim/disruption/suite_test.go b/pkg/controllers/nodeclaim/disruption/suite_test.go index 0ecc27f732..b9ea398aad 100644 --- a/pkg/controllers/nodeclaim/disruption/suite_test.go +++ b/pkg/controllers/nodeclaim/disruption/suite_test.go @@ -113,6 +113,7 @@ var _ = Describe("Disruption", func() { Expect(nodeClaim.StatusConditions().Get(v1beta1.ConditionTypeEmpty).IsTrue()).To(BeTrue()) }) It("should remove multiple disruption conditions simultaneously", func() { + cp.Drifted = "" nodePool.Spec.Disruption.ExpireAfter.Duration = nil nodePool.Spec.Disruption.ConsolidateAfter = &v1beta1.NillableDuration{Duration: nil} @@ -124,7 +125,7 @@ var _ = Describe("Disruption", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimDisruptionController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - Expect(nodeClaim.StatusConditions().Get(v1beta1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1beta1.ConditionTypeDrifted)).To(BeNil()) Expect(nodeClaim.StatusConditions().Get(v1beta1.ConditionTypeEmpty)).To(BeNil()) }) }) diff --git a/pkg/controllers/nodeclaim/lifecycle/initialization_test.go b/pkg/controllers/nodeclaim/lifecycle/initialization_test.go index d85109bf4b..724401f047 100644 --- a/pkg/controllers/nodeclaim/lifecycle/initialization_test.go +++ b/pkg/controllers/nodeclaim/lifecycle/initialization_test.go @@ -61,11 +61,12 @@ var _ = Describe("Initialization", func() { node := test.Node(test.NodeOptions{ ProviderID: nodeClaim.Status.ProviderID, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) - ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) + ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) Expect(ExpectStatusConditionExists(nodeClaim, v1beta1.ConditionTypeRegistered).Status).To(Equal(metav1.ConditionTrue)) @@ -122,10 +123,11 @@ var _ = Describe("Initialization", func() { v1.ResourceMemory: resource.MustParse("80Mi"), v1.ResourcePods: resource.MustParse("110"), }, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint - ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) @@ -165,7 +167,9 @@ var _ = Describe("Initialization", func() { v1.ResourcePods: resource.MustParse("110"), }, ReadyStatus: v1.ConditionFalse, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) @@ -213,10 +217,11 @@ var _ = Describe("Initialization", func() { v1.ResourceMemory: resource.MustParse("80Mi"), v1.ResourcePods: resource.MustParse("110"), }, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint - ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) @@ -263,8 +268,10 @@ var _ = Describe("Initialization", func() { v1.ResourceMemory: resource.MustParse("80Mi"), v1.ResourcePods: resource.MustParse("110"), }, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) @@ -330,8 +337,10 @@ var _ = Describe("Initialization", func() { v1.ResourceMemory: resource.MustParse("80Mi"), v1.ResourcePods: resource.MustParse("110"), }, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint // Should add the startup taints to the node @@ -401,8 +410,10 @@ var _ = Describe("Initialization", func() { v1.ResourceMemory: resource.MustParse("80Mi"), v1.ResourcePods: resource.MustParse("110"), }, + Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}, }) ExpectApplied(ctx, env.Client, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint // Shouldn't consider the node ready since the startup taints still exist @@ -480,6 +491,7 @@ var _ = Describe("Initialization", func() { Effect: v1.TaintEffectNoSchedule, Value: "true", }, + v1beta1.UnregisteredNoExecuteTaint, }, }) ExpectApplied(ctx, env.Client, node) @@ -549,6 +561,7 @@ var _ = Describe("Initialization", func() { Effect: v1.TaintEffectNoSchedule, Value: "true", }, + v1beta1.UnregisteredNoExecuteTaint, }, }) ExpectApplied(ctx, env.Client, node) diff --git a/pkg/controllers/nodeclaim/lifecycle/registration.go b/pkg/controllers/nodeclaim/lifecycle/registration.go index 7dec23cef1..fdab67ffb7 100644 --- a/pkg/controllers/nodeclaim/lifecycle/registration.go +++ b/pkg/controllers/nodeclaim/lifecycle/registration.go @@ -61,9 +61,17 @@ func (r *Registration) Reconcile(ctx context.Context, nodeClaim *v1beta1.NodeCla } return reconcile.Result{}, fmt.Errorf("getting node for nodeclaim, %w", err) } + _, hasStartupTaint := lo.Find(node.Spec.Taints, func(t v1.Taint) bool { + return t.MatchTaint(&v1beta1.UnregisteredNoExecuteTaint) + }) + // check if sync succeeded but setting the registered status condition failed + // if sync succeeded, then the label will be present and the taint will be gone + if _, ok := node.Labels[v1beta1.NodeRegisteredLabelKey]; !ok && !hasStartupTaint { + return reconcile.Result{}, fmt.Errorf("missing required startup taint, %s", v1beta1.UnregisteredTaintKey) + } ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", node.Name))) if err = r.syncNode(ctx, nodeClaim, node); err != nil { - return reconcile.Result{}, fmt.Errorf("syncing node, %w", err) + return reconcile.Result{}, err } log.FromContext(ctx).Info("registered nodeclaim") nodeClaim.StatusConditions().SetTrue(v1beta1.ConditionTypeRegistered) @@ -88,12 +96,16 @@ func (r *Registration) syncNode(ctx context.Context, nodeClaim *v1beta1.NodeClai // Sync all taints inside NodeClaim into the Node taints node.Spec.Taints = scheduling.Taints(node.Spec.Taints).Merge(nodeClaim.Spec.Taints) node.Spec.Taints = scheduling.Taints(node.Spec.Taints).Merge(nodeClaim.Spec.StartupTaints) + // Remove karpenter.sh/unregistered taint + node.Spec.Taints = lo.Reject(node.Spec.Taints, func(t v1.Taint, _ int) bool { + return t.MatchTaint(&v1beta1.UnregisteredNoExecuteTaint) + }) node.Labels = lo.Assign(node.Labels, nodeClaim.Labels, map[string]string{ v1beta1.NodeRegisteredLabelKey: "true", }) if !equality.Semantic.DeepEqual(stored, node) { - if err := r.kubeClient.Patch(ctx, node, client.StrategicMergeFrom(stored)); err != nil { - return fmt.Errorf("syncing node labels, %w", err) + if err := r.kubeClient.Update(ctx, node); err != nil { + return fmt.Errorf("syncing node, %w", err) } } return nil diff --git a/pkg/controllers/nodeclaim/lifecycle/registration_test.go b/pkg/controllers/nodeclaim/lifecycle/registration_test.go index e47f19cd0d..d65e34f823 100644 --- a/pkg/controllers/nodeclaim/lifecycle/registration_test.go +++ b/pkg/controllers/nodeclaim/lifecycle/registration_test.go @@ -46,7 +46,7 @@ var _ = Describe("Registration", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) @@ -66,14 +66,14 @@ var _ = Describe("Registration", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) ExpectOwnerReferenceExists(node, nodeClaim) }) - It("should sync the karpenter.sh/registered label to the Node when the Node comes online", func() { + It("should sync the karpenter.sh/registered label to the Node and remove the karpenter.sh/unregistered taint when the Node comes online", func() { nodeClaim := test.NodeClaim(v1beta1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -85,11 +85,28 @@ var _ = Describe("Registration", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) Expect(node.Labels).To(HaveKeyWithValue(v1beta1.NodeRegisteredLabelKey, "true")) + Expect(node.Spec.Taints).To(Not(ContainElement(v1beta1.UnregisteredNoExecuteTaint))) + }) + It("should fail registration if the karpenter.sh/unregistered taint is not present on the node and the node isn't labeled as registered", func() { + nodeClaim := test.NodeClaim(v1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1beta1.NodePoolLabelKey: nodePool.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, nodePool, nodeClaim) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + _ = ExpectObjectReconcileFailed(ctx, env.Client, nodeClaimController, nodeClaim) }) It("should sync the labels to the Node when the Node comes online", func() { nodeClaim := test.NodeClaim(v1beta1.NodeClaim{ @@ -107,7 +124,7 @@ var _ = Describe("Registration", func() { Expect(nodeClaim.Labels).To(HaveKeyWithValue("custom-label", "custom-value")) Expect(nodeClaim.Labels).To(HaveKeyWithValue("other-custom-label", "other-custom-value")) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) @@ -135,7 +152,7 @@ var _ = Describe("Registration", func() { Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.DoNotDisruptAnnotationKey, "true")) Expect(nodeClaim.Annotations).To(HaveKeyWithValue("my-custom-annotation", "my-custom-value")) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) @@ -183,7 +200,7 @@ var _ = Describe("Registration", func() { }, )) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) @@ -251,7 +268,7 @@ var _ = Describe("Registration", func() { }, )) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) @@ -305,7 +322,7 @@ var _ = Describe("Registration", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) + node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID, Taints: []v1.Taint{v1beta1.UnregisteredNoExecuteTaint}}) ExpectApplied(ctx, env.Client, node) ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) node = ExpectExists(ctx, env.Client, node) diff --git a/pkg/test/expectations/expectations.go b/pkg/test/expectations/expectations.go index 849cbd0df1..ce37b73517 100644 --- a/pkg/test/expectations/expectations.go +++ b/pkg/test/expectations/expectations.go @@ -357,6 +357,7 @@ func ExpectNodeClaimDeployed(ctx context.Context, c client.Client, cloudProvider // Mock the nodeclaim launch and node joining at the apiserver node := test.NodeClaimLinkedNode(nc) + node.Spec.Taints = lo.Reject(node.Spec.Taints, func(t v1.Taint, _ int) bool { return t.MatchTaint(&v1beta1.UnregisteredNoExecuteTaint) }) node.Labels = lo.Assign(node.Labels, map[string]string{v1beta1.NodeRegisteredLabelKey: "true"}) ExpectApplied(ctx, c, nc, node) return nc, node, nil @@ -408,6 +409,7 @@ func ExpectMakeNodesInitialized(ctx context.Context, c client.Client, nodes ...* ExpectMakeNodesReady(ctx, c, nodes...) for i := range nodes { + nodes[i].Spec.Taints = lo.Reject(nodes[i].Spec.Taints, func(t v1.Taint, _ int) bool { return t.MatchTaint(&v1beta1.UnregisteredNoExecuteTaint) }) nodes[i].Labels[v1beta1.NodeRegisteredLabelKey] = "true" nodes[i].Labels[v1beta1.NodeInitializedLabelKey] = "true" ExpectApplied(ctx, c, nodes[i]) diff --git a/pkg/test/nodes.go b/pkg/test/nodes.go index 2d9a2a1f83..c26e4bd552 100644 --- a/pkg/test/nodes.go +++ b/pkg/test/nodes.go @@ -68,7 +68,7 @@ func Node(overrides ...NodeOptions) *v1.Node { } func NodeClaimLinkedNode(nodeClaim *v1beta1.NodeClaim) *v1.Node { - return Node( + n := Node( NodeOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: nodeClaim.Labels, @@ -81,4 +81,6 @@ func NodeClaimLinkedNode(nodeClaim *v1beta1.NodeClaim) *v1.Node { ProviderID: nodeClaim.Status.ProviderID, }, ) + n.Spec.Taints = append(n.Spec.Taints, v1beta1.UnregisteredNoExecuteTaint) + return n }