Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Taint nodes with PreferNoSchedule during rollouts #10223

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/v1beta1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,14 @@ const (
MachineSetPreflightCheckControlPlaneIsStable MachineSetPreflightCheck = "ControlPlaneIsStable"
)

// NodeOutdatedRevisionTaint can be added to Nodes at rolling updates in general triggered by updating MachineDeployment
// This taint is used to prevent unnecessary pod churn, i.e., as the first node is drained, pods previously running on
// that node are scheduled onto nodes who have yet to be replaced, but will be torn down soon.
var NodeOutdatedRevisionTaint = corev1.Taint{
Key: "node.cluster.x-k8s.io/outdated-revision",
Effect: corev1.TaintEffectPreferNoSchedule,
}

// NodeUninitializedTaint can be added to Nodes at creation by the bootstrap provider, e.g. the
// KubeadmBootstrap provider will add the taint.
// This taint is used to prevent workloads to be scheduled on Nodes before the node is initialized by Cluster API.
Expand Down
16 changes: 16 additions & 0 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
if err != nil {
return err
}
msToMachines, err := util.MachineSetToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
if err != nil {
return err
}
mdToMachines, err := util.MachineDeploymentToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
if err != nil {
return err
}

if r.nodeDeletionRetryTimeout.Nanoseconds() == 0 {
r.nodeDeletionRetryTimeout = 10 * time.Second
Expand All @@ -122,6 +130,14 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
),
)).
Watches(
&clusterv1.MachineSet{},
handler.EnqueueRequestsFromMapFunc(msToMachines),
).
Watches(
&clusterv1.MachineDeployment{},
handler.EnqueueRequestsFromMapFunc(mdToMachines),
).
Build(r)
if err != nil {
return errors.Wrap(err, "failed setting up with a controller manager")
Expand Down
67 changes: 65 additions & 2 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/api/v1beta1/index"
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
"sigs.k8s.io/cluster-api/internal/util/taints"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
Expand Down Expand Up @@ -133,7 +135,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
_, nodeHadInterruptibleLabel := node.Labels[clusterv1.InterruptibleLabel]

// Reconcile node taints
if err := r.patchNode(ctx, remoteClient, node, nodeLabels, nodeAnnotations); err != nil {
if err := r.patchNode(ctx, remoteClient, node, nodeLabels, nodeAnnotations, machine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile Node %s", klog.KObj(node))
}
if !nodeHadInterruptibleLabel && interruptible {
Expand Down Expand Up @@ -255,7 +257,7 @@ func (r *Reconciler) getNode(ctx context.Context, c client.Reader, providerID st

// PatchNode is required to workaround an issue on Node.Status.Address which is incorrectly annotated as patchStrategy=merge
// and this causes SSA patch to fail in case there are two addresses with the same key https://github.com/kubernetes-sigs/cluster-api/issues/8417
func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, node *corev1.Node, newLabels, newAnnotations map[string]string) error {
func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, node *corev1.Node, newLabels, newAnnotations map[string]string, m *clusterv1.Machine) error {
newNode := node.DeepCopy()

// Adds the annotations CAPI sets on the node.
Expand Down Expand Up @@ -292,9 +294,70 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
// Drop the NodeUninitializedTaint taint on the node given that we are reconciling labels.
hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint)

// Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet
isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
if err != nil {
return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name))
}
if isOutdated {
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
} else {
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
}

if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges {
return nil
}

return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node))
}

func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) {
if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel {
return false, nil
}

// Resolve the MachineSet name via owner references because the label value
// could also be a hash.
objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
if err != nil {
return false, err
}
ms := &clusterv1.MachineSet{}
if err := c.Get(ctx, *objKey, ms); err != nil {
return false, err
}
md := &clusterv1.MachineDeployment{}
objKey = &client.ObjectKey{
Namespace: m.ObjectMeta.Namespace,
Name: m.Labels[clusterv1.MachineDeploymentNameLabel],
}
if err := c.Get(ctx, *objKey, md); err != nil {
return false, err
}
msRev, err := mdutil.Revision(ms)
if err != nil {
return false, err
}
mdRev, err := mdutil.Revision(md)
if err != nil {
return false, err
}
if msRev < mdRev {
return true, nil
}
return false, nil
}

func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) {
for _, ref := range obj.GetOwnerReferences() {
gv, err := schema.ParseGroupVersion(ref.APIVersion)
if err != nil {
return nil, err
}
if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group {
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil
}
}
return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName()))
}
Loading
Loading