From 789b3a0847c8bc66a708c28787cff2fa03ed40f6 Mon Sep 17 00:00:00 2001 From: killianmuldoon Date: Wed, 18 Oct 2023 17:08:30 +0100 Subject: [PATCH] Retry Node delete when CCT is locked Signed-off-by: killianmuldoon --- internal/controllers/machine/machine_controller.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 7ffe710dff5c..772b371bddba 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -604,7 +604,11 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster)) if err != nil { - log.Error(err, "Error creating a remote client while deleting Machine, won't retry") + if errors.Is(err, remote.ErrClusterLocked) { + log.V(5).Info("Requeuing drain Node because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry") return ctrl.Result{}, nil } restConfig = rest.CopyConfig(restConfig) @@ -700,7 +704,10 @@ func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { - log.Error(err, "Error creating a remote client for cluster while deleting Machine, won't retry") + if errors.Is(err, remote.ErrClusterLocked) { + return errors.Wrapf(err, "failed deleting Node because another worker has the lock on the ClusterCacheTracker") + } + log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry") return nil }