diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go index 9c201109e613..5fa401dcb67e 100644 --- a/api/v1beta1/common_types.go +++ b/api/v1beta1/common_types.go @@ -146,6 +146,16 @@ const ( VariableDefinitionFromInline = "inline" ) +// NodeUninitializedTaint can be added to Nodes at creation by the bootstrap provider, e.g. the +// KubeadmBootstrap provider will add the taint. +// This taint is used to prevent workloads to be scheduled on Nodes before the node is initialized by Cluster API. +// As of today the Node initialization consists of syncing labels from Machines to Nodes. Once the labels +// have been initially synced the taint is removed from the Node. +var NodeUninitializedTaint = corev1.Taint{ + Key: "node.cluster.x-k8s.io/uninitialized", + Effect: corev1.TaintEffectNoSchedule, +} + const ( // TemplateSuffix is the object kind suffix used by template types. TemplateSuffix = "Template" diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go index e12305a9c14e..ee8f63994902 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go @@ -63,6 +63,24 @@ const ( KubeadmConfigControllerName = "kubeadmconfig-controller" ) +var ( + // controlPlaneTaint is the taint that kubeadm applies to the control plane nodes + // for Kubernetes version >= v1.24.0. + // The values are copied from kubeadm codebase. + controlPlaneTaint = corev1.Taint{ + Key: "node-role.kubernetes.io/control-plane", + Effect: corev1.TaintEffectNoSchedule, + } + + // oldControlPlaneTaint is the taint that kubeadm applies to the control plane nodes + // for Kubernetes version < v1.25.0. + // The values are copied from kubeadm codebase. + oldControlPlaneTaint = corev1.Taint{ + Key: "node-role.kubernetes.io/master", + Effect: corev1.TaintEffectNoSchedule, + } +) + const ( // DefaultTokenTTL is the default TTL used for tokens. DefaultTokenTTL = 15 * time.Minute @@ -415,7 +433,15 @@ func (r *KubeadmConfigReconciler) handleClusterNotInitialized(ctx context.Contex }, } } - initdata, err := kubeadmtypes.MarshalInitConfigurationForVersion(scope.Config.Spec.InitConfiguration, parsedVersion) + + // Add the node uninitialized taint to the list of taints. + // DeepCopy the InitConfiguration to prevent updating the actual KubeadmConfig. + // Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node + // is initialized by ClusterAPI. + initConfiguration := scope.Config.Spec.InitConfiguration.DeepCopy() + addNodeUninitializedTaint(&initConfiguration.NodeRegistration, true, parsedVersion) + + initdata, err := kubeadmtypes.MarshalInitConfigurationForVersion(initConfiguration, parsedVersion) if err != nil { scope.Error(err, "Failed to marshal init configuration") return ctrl.Result{}, err @@ -551,7 +577,14 @@ func (r *KubeadmConfigReconciler) joinWorker(ctx context.Context, scope *Scope) return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion) } - joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(scope.Config.Spec.JoinConfiguration, parsedVersion) + // Add the node uninitialized taint to the list of taints. + // DeepCopy the JoinConfiguration to prevent updating the actual KubeadmConfig. + // Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node + // is initialized by ClusterAPI. + joinConfiguration := scope.Config.Spec.JoinConfiguration.DeepCopy() + addNodeUninitializedTaint(&joinConfiguration.NodeRegistration, false, parsedVersion) + + joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(joinConfiguration, parsedVersion) if err != nil { scope.Error(err, "Failed to marshal join configuration") return ctrl.Result{}, err @@ -657,7 +690,14 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion) } - joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(scope.Config.Spec.JoinConfiguration, parsedVersion) + // Add the node uninitialized taint to the list of taints. + // DeepCopy the JoinConfiguration to prevent updating the actual KubeadmConfig. + // Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node + // is initialized by ClusterAPI. + joinConfiguration := scope.Config.Spec.JoinConfiguration.DeepCopy() + addNodeUninitializedTaint(&joinConfiguration.NodeRegistration, true, parsedVersion) + + joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(joinConfiguration, parsedVersion) if err != nil { scope.Error(err, "Failed to marshal join configuration") return ctrl.Result{}, err @@ -1066,3 +1106,43 @@ func (r *KubeadmConfigReconciler) ensureBootstrapSecretOwnersRef(ctx context.Con } return nil } + +// addNodeUninitializedTaint adds the NodeUninitializedTaint to the nodeRegistration. +// Note: If isControlPlane is true then it also adds the control plane taint if the initial set of taints is nil. +// This is to ensure consistency with kubeadm's defaulting behavior. +func addNodeUninitializedTaint(nodeRegistration *bootstrapv1.NodeRegistrationOptions, isControlPlane bool, kubernetesVersion semver.Version) { + var taints []corev1.Taint + taints = nodeRegistration.Taints + if hasTaint(taints, clusterv1.NodeUninitializedTaint) { + return + } + + // For a control plane, kubeadm adds the default control plane taint if the provided taints are nil. + // Since we are adding the uninitialized taint we also have to add the default taints kubeadm would have added if + // the taints were nil. + if isControlPlane && taints == nil { + // Note: Kubeadm uses a different default control plane taint depending on the kubernetes version. + // Ref: https://github.com/kubernetes/kubeadm/issues/2200 + if kubernetesVersion.LT(semver.MustParse("1.24.0")) { + taints = []corev1.Taint{oldControlPlaneTaint} + } else if kubernetesVersion.GTE(semver.MustParse("1.24.0")) && kubernetesVersion.LT(semver.MustParse("1.25.0")) { + taints = []corev1.Taint{ + oldControlPlaneTaint, + controlPlaneTaint, + } + } else { + taints = []corev1.Taint{controlPlaneTaint} + } + } + taints = append(taints, clusterv1.NodeUninitializedTaint) + nodeRegistration.Taints = taints +} + +func hasTaint(taints []corev1.Taint, targetTaint corev1.Taint) bool { + for _, taint := range taints { + if taint.MatchTaint(&targetTaint) { + return true + } + } + return false +} diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go index 4038c3a16f88..54e9304cecc3 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go @@ -24,6 +24,7 @@ import ( "testing" "time" + "github.com/blang/semver" ignition "github.com/flatcar/ignition/config/v2_3" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -2193,6 +2194,112 @@ func TestKubeadmConfigReconciler_ResolveUsers(t *testing.T) { } } +func TestAddNodeUninitializedTaint(t *testing.T) { + dummyTaint := corev1.Taint{ + Key: "dummy-taint", + Value: "", + Effect: corev1.TaintEffectNoSchedule, + } + + tests := []struct { + name string + nodeRegistration *bootstrapv1.NodeRegistrationOptions + kubernetesVersion semver.Version + isControlPlane bool + wantTaints []corev1.Taint + }{ + { + name: "for control plane with version < v1.24.0, if taints is nil it should add the uninitialized and the master taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: nil, + }, + kubernetesVersion: semver.MustParse("1.23.0"), + isControlPlane: true, + wantTaints: []corev1.Taint{ + oldControlPlaneTaint, + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for control plane with version >= v1.24.0 and < v1.25.0, if taints is nil it should add the uninitialized, control-plane and the master taints", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: nil, + }, + kubernetesVersion: semver.MustParse("1.24.5"), + isControlPlane: true, + wantTaints: []corev1.Taint{ + oldControlPlaneTaint, + controlPlaneTaint, + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for control plane with version >= v1.25.0, if taints is nil it should add the uninitialized and the control-plane taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: nil, + }, + kubernetesVersion: semver.MustParse("1.25.0"), + isControlPlane: true, + wantTaints: []corev1.Taint{ + controlPlaneTaint, + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for control plane, if taints is not nil it should only add the uninitialized taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: []corev1.Taint{}, + }, + kubernetesVersion: semver.MustParse("1.26.0"), + isControlPlane: true, + wantTaints: []corev1.Taint{ + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for control plane, if it already has some taints it should add the uninitialized taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: []corev1.Taint{dummyTaint}, + }, + kubernetesVersion: semver.MustParse("1.26.0"), + isControlPlane: true, + wantTaints: []corev1.Taint{ + dummyTaint, + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for worker, it should add the uninitialized taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{}, + kubernetesVersion: semver.MustParse("1.26.0"), + isControlPlane: false, + wantTaints: []corev1.Taint{ + clusterv1.NodeUninitializedTaint, + }, + }, + { + name: "for worker, if it already has some taints it should add the uninitialized taint", + nodeRegistration: &bootstrapv1.NodeRegistrationOptions{ + Taints: []corev1.Taint{dummyTaint}, + }, + kubernetesVersion: semver.MustParse("1.26.0"), + isControlPlane: false, + wantTaints: []corev1.Taint{ + dummyTaint, + clusterv1.NodeUninitializedTaint, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + addNodeUninitializedTaint(tt.nodeRegistration, tt.isControlPlane, tt.kubernetesVersion) + g.Expect(tt.nodeRegistration.Taints).To(Equal(tt.wantTaints)) + }) + } +} + // test utils. // newWorkerMachineForCluster returns a Machine with the passed Cluster's information and a pre-configured name. diff --git a/docs/book/src/developer/providers/bootstrap.md b/docs/book/src/developer/providers/bootstrap.md index c25eed0e7e99..1ff1fef5093f 100644 --- a/docs/book/src/developer/providers/bootstrap.md +++ b/docs/book/src/developer/providers/bootstrap.md @@ -123,6 +123,13 @@ The following diagram shows the typical logic for a bootstrap provider: A bootstrap provider's bootstrap data must create `/run/cluster-api/bootstrap-success.complete` (or `C:\run\cluster-api\bootstrap-success.complete` for Windows machines) upon successful bootstrapping of a Kubernetes node. This allows infrastructure providers to detect and act on bootstrap failures. +## Taint Nodes at creation + +A bootstrap provider can optionally taint nodes at creation with `node.cluster.x-k8s.io/uninitialized:NoSchedule`. +This taint is used to prevent workloads to be scheduled on Nodes before the node is initialized by Cluster API. +As of today the Node initialization consists of syncing labels from Machines to Nodes. Once the labels have been +initially synced the taint is removed form the Node. + ## RBAC ### Provider controller diff --git a/exp/internal/controllers/machinepool_controller_noderef.go b/exp/internal/controllers/machinepool_controller_noderef.go index 275027b0b6d4..1d680ed058fe 100644 --- a/exp/internal/controllers/machinepool_controller_noderef.go +++ b/exp/internal/controllers/machinepool_controller_noderef.go @@ -30,6 +30,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/util/taints" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" @@ -110,9 +111,10 @@ func (r *MachinePoolReconciler) reconcileNodeRefs(ctx context.Context, cluster * clusterv1.OwnerKindAnnotation: mp.Kind, clusterv1.OwnerNameAnnotation: mp.Name, } - if annotations.AddAnnotations(node, desired) { + // Add annotations and drop NodeUninitializedTaint. + if annotations.AddAnnotations(node, desired) || taints.RemoveNodeTaint(node, clusterv1.NodeUninitializedTaint) { if err := patchHelper.Patch(ctx, node); err != nil { - log.V(2).Info("Failed patch node to set annotations", "err", err, "node name", node.Name) + log.V(2).Info("Failed patch node to set annotations and drop taints", "err", err, "node name", node.Name) return ctrl.Result{}, err } } diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 20b83e39dd6e..89ac0a2dfdde 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -34,6 +34,7 @@ import ( "sigs.k8s.io/cluster-api/api/v1beta1/index" "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/internal/util/ssa" + "sigs.k8s.io/cluster-api/internal/util/taints" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" @@ -130,6 +131,15 @@ func (r *Reconciler) reconcileNode(ctx context.Context, cluster *clusterv1.Clust if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to apply labels to Node") } + // Update `node` with the new version of the object. + if err := r.Client.Scheme().Convert(updatedNode, node, nil); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to convert node to structured object %s", klog.KObj(node)) + } + + // Reconcile node taints + if err := r.reconcileNodeTaints(ctx, remoteClient, node); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile taints on Node %s", klog.KObj(node)) + } // Do the remaining node health checks, then set the node health to true if all checks pass. status, message := summarizeNodeConditions(node) @@ -258,3 +268,17 @@ func (r *Reconciler) getNode(ctx context.Context, c client.Reader, providerID *n return &nodeList.Items[0], nil } + +func (r *Reconciler) reconcileNodeTaints(ctx context.Context, remoteClient client.Client, node *corev1.Node) error { + patchHelper, err := patch.NewHelper(node, remoteClient) + if err != nil { + return errors.Wrapf(err, "failed to create patch helper for Node %s", klog.KObj(node)) + } + // Drop the NodeUninitializedTaint taint on the node. + if taints.RemoveNodeTaint(node, clusterv1.NodeUninitializedTaint) { + if err := patchHelper.Patch(ctx, node); err != nil { + return errors.Wrapf(err, "failed to patch Node %s to modify taints", klog.KObj(node)) + } + } + return nil +} diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index ede053801e7b..f71d109724b4 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -27,6 +27,7 @@ import ( "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -459,3 +460,70 @@ func TestGetManagedLabels(t *testing.T) { got := getManagedLabels(allLabels) g.Expect(got).To(BeEquivalentTo(managedLabels)) } + +func TestReconcileNodeTaints(t *testing.T) { + tests := []struct { + name string + node *corev1.Node + }{ + { + name: "if the node has the uninitialized taint it should be dropped from the node", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + clusterv1.NodeUninitializedTaint, + }, + }, + }, + }, + { + name: "if the node is a control plane and has the uninitialized taint it should be dropped from the node", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + { + Key: "node-role.kubernetes.io/control-plane", + Effect: corev1.TaintEffectNoSchedule, + }, + clusterv1.NodeUninitializedTaint, + }, + }, + }, + }, + { + name: "if the node does not have the uninitialized taint it should remain absent from the node", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-1", + }, + Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + fakeClient := fake.NewClientBuilder().WithObjects(tt.node).WithScheme(fakeScheme).Build() + nodeBefore := tt.node.DeepCopy() + r := &Reconciler{} + g.Expect(r.reconcileNodeTaints(ctx, fakeClient, tt.node)).Should(Succeed()) + // Verify the NodeUninitializedTaint is dropped. + g.Expect(tt.node.Spec.Taints).ShouldNot(ContainElement(clusterv1.NodeUninitializedTaint)) + // Verify all other taints are same. + for _, taint := range nodeBefore.Spec.Taints { + if !taint.MatchTaint(&clusterv1.NodeUninitializedTaint) { + g.Expect(tt.node.Spec.Taints).Should(ContainElement(taint)) + } + } + }) + } +} diff --git a/internal/controllers/machine/suite_test.go b/internal/controllers/machine/suite_test.go index 3143e769999b..d8439e520f34 100644 --- a/internal/controllers/machine/suite_test.go +++ b/internal/controllers/machine/suite_test.go @@ -27,6 +27,7 @@ import ( . "github.com/onsi/gomega" "github.com/onsi/gomega/types" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -55,6 +56,7 @@ func init() { _ = clientgoscheme.AddToScheme(fakeScheme) _ = clusterv1.AddToScheme(fakeScheme) _ = apiextensionsv1.AddToScheme(fakeScheme) + _ = corev1.AddToScheme(fakeScheme) } func TestMain(m *testing.M) { diff --git a/internal/util/taints/taints.go b/internal/util/taints/taints.go new file mode 100644 index 000000000000..2d07ce4229fd --- /dev/null +++ b/internal/util/taints/taints.go @@ -0,0 +1,38 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package taints implements taint helper functions. +package taints + +import ( + corev1 "k8s.io/api/core/v1" +) + +// RemoveNodeTaint drops the taint from the list of node taints. +// It returns true if the taints are modified, false otherwise. +func RemoveNodeTaint(node *corev1.Node, drop corev1.Taint) bool { + droppedTaint := false + taints := []corev1.Taint{} + for _, taint := range node.Spec.Taints { + if taint.MatchTaint(&drop) { + droppedTaint = true + continue + } + taints = append(taints, taint) + } + node.Spec.Taints = taints + return droppedTaint +} diff --git a/internal/util/taints/taints_test.go b/internal/util/taints/taints_test.go new file mode 100644 index 000000000000..f830c9573756 --- /dev/null +++ b/internal/util/taints/taints_test.go @@ -0,0 +1,68 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package taints + +import ( + "testing" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" +) + +func TestRemoveNodeTaint(t *testing.T) { + taint1 := corev1.Taint{Key: "taint1", Effect: corev1.TaintEffectNoSchedule} + taint2 := corev1.Taint{Key: "taint2", Effect: corev1.TaintEffectNoSchedule} + + tests := []struct { + name string + node *corev1.Node + dropTaint corev1.Taint + wantTaints []corev1.Taint + wantModified bool + }{ + { + name: "dropping taint from node should return true", + node: &corev1.Node{Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + taint1, + taint2, + }}}, + dropTaint: taint1, + wantTaints: []corev1.Taint{taint2}, + wantModified: true, + }, + { + name: "drop non-existing taint should return false", + node: &corev1.Node{Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + taint2, + }}}, + dropTaint: taint1, + wantTaints: []corev1.Taint{taint2}, + wantModified: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got := RemoveNodeTaint(tt.node, tt.dropTaint) + g.Expect(got).To(Equal(tt.wantModified)) + g.Expect(tt.node.Spec.Taints).To(Equal(tt.wantTaints)) + }) + } +}