diff --git a/controlplane/kubeadm/api/v1alpha4/conversion.go b/controlplane/kubeadm/api/v1alpha4/conversion.go
index 9da266d9213d..1637b272a82c 100644
--- a/controlplane/kubeadm/api/v1alpha4/conversion.go
+++ b/controlplane/kubeadm/api/v1alpha4/conversion.go
@@ -180,6 +180,10 @@ func (src *KubeadmControlPlaneTemplate) ConvertTo(dstRaw conversion.Hub) error {
 		dst.Spec.Template.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy = restored.Spec.Template.Spec.KubeadmConfigSpec.InitConfiguration.NodeRegistration.ImagePullPolicy
 	}
 
+	if restored.Spec.Template.Spec.RemediationStrategy != nil {
+		dst.Spec.Template.Spec.RemediationStrategy = restored.Spec.Template.Spec.RemediationStrategy
+	}
+
 	return nil
 }
 
diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go
index dd512fe26967..52a6276ba132 100644
--- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go
+++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook.go
@@ -171,6 +171,8 @@ func (in *KubeadmControlPlane) ValidateUpdate(old runtime.Object) error {
 		{spec, "machineTemplate", "nodeDeletionTimeout"},
 		{spec, "replicas"},
 		{spec, "version"},
+		{spec, "remediationStrategy"},
+		{spec, "remediationStrategy", "*"},
 		{spec, "rolloutAfter"},
 		{spec, "rolloutBefore", "*"},
 		{spec, "rolloutStrategy", "*"},
diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go
index 3ead11553c28..360e345ec187 100644
--- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go
+++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go
@@ -407,6 +407,11 @@ func TestKubeadmControlPlaneValidateUpdate(t *testing.T) {
 	validUpdate.Spec.RolloutBefore = &RolloutBefore{
 		CertificatesExpiryDays: pointer.Int32(14),
 	}
+	validUpdate.Spec.RemediationStrategy = &RemediationStrategy{
+		MaxRetry:         pointer.Int32(50),
+		MinHealthyPeriod: &metav1.Duration{Duration: 10 * time.Hour},
+		RetryPeriod:      metav1.Duration{Duration: 10 * time.Minute},
+	}
 	validUpdate.Spec.KubeadmConfigSpec.Format = bootstrapv1.CloudConfig
 
 	scaleToZero := before.DeepCopy()
diff --git a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go
index 4817b71b18fb..9fab688e84f7 100644
--- a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go
+++ b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_types.go
@@ -91,6 +91,10 @@ type KubeadmControlPlaneTemplateResourceSpec struct {
 	// +optional
 	// +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1}}
 	RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"`
+
+	// The RemediationStrategy that controls how control plane machine remediation happens.
+	// +optional
+	RemediationStrategy *RemediationStrategy `json:"remediationStrategy,omitempty"`
 }
 
 // KubeadmControlPlaneTemplateMachineTemplate defines the template for Machines
diff --git a/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go b/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go
index 88d4bca9de22..5d6d56bccd09 100644
--- a/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go
+++ b/controlplane/kubeadm/api/v1beta1/zz_generated.deepcopy.go
@@ -324,6 +324,11 @@ func (in *KubeadmControlPlaneTemplateResourceSpec) DeepCopyInto(out *KubeadmCont
 		*out = new(RolloutStrategy)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.RemediationStrategy != nil {
+		in, out := &in.RemediationStrategy, &out.RemediationStrategy
+		*out = new(RemediationStrategy)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeadmControlPlaneTemplateResourceSpec.
diff --git a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml
index a70a55892c40..92fcc11ab3d7 100644
--- a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml
+++ b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanetemplates.yaml
@@ -2382,6 +2382,55 @@ spec:
                               time limitations.
                             type: string
                         type: object
+                      remediationStrategy:
+                        description: The RemediationStrategy that controls how control
+                          plane machine remediation happens.
+                        properties:
+                          maxRetry:
+                            description: "MaxRetry is the Max number of retries while
+                              attempting to remediate an unhealthy machine. A retry
+                              happens when a machine that was created as a replacement
+                              for an unhealthy machine also fails. For example, given
+                              a control plane with three machines M1, M2, M3: \n M1
+                              become unhealthy; remediation happens, and M1-1 is created
+                              as a replacement. If M1-1 (replacement of M1) has problems
+                              while bootstrapping it will become unhealthy, and then
+                              be remediated; such operation is considered a retry,
+                              remediation-retry #1. If M1-2 (replacement of M1-2)
+                              becomes unhealthy, remediation-retry #2 will happen,
+                              etc. \n A retry could happen only after RetryPeriod
+                              from the previous retry. If a machine is marked as unhealthy
+                              after MinHealthyPeriod from the previous remediation
+                              expired, this is not considered a retry anymore because
+                              the new issue is assumed unrelated from the previous
+                              one. \n If not set, the remedation will be retried infinitely."
+                            format: int32
+                            type: integer
+                          minHealthyPeriod:
+                            description: "MinHealthyPeriod defines the duration after
+                              which KCP will consider any failure to a machine unrelated
+                              from the previous one. In this case the remediation
+                              is not considered a retry anymore, and thus the retry
+                              counter restarts from 0. For example, assuming MinHealthyPeriod
+                              is set to 1h (default) \n M1 become unhealthy; remediation
+                              happens, and M1-1 is created as a replacement. If M1-1
+                              (replacement of M1) has problems within the 1hr after
+                              the creation, also this machine will be remediated and
+                              this operation is considered a retry - a problem related
+                              to the original issue happened to M1 -. \n If instead
+                              the problem on M1-1 is happening after MinHealthyPeriod
+                              expired, e.g. four days after m1-1 has been created
+                              as a remediation of M1, the problem on M1-1 is considered
+                              unrelated to the original issue happened to M1. \n If
+                              not set, this value is defaulted to 1h."
+                            type: string
+                          retryPeriod:
+                            description: "RetryPeriod is the duration that KCP should
+                              wait before remediating a machine being created as a
+                              replacement for an unhealthy machine (a retry). \n If
+                              not set, a retry will happen immediately."
+                            type: string
+                        type: object
                       rolloutAfter:
                         description: RolloutAfter is a field to indicate a rollout
                           should be performed after the specified time even if no
diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go
index f6ef0dc13b48..a94370537cd2 100644
--- a/controlplane/kubeadm/internal/controllers/helpers.go
+++ b/controlplane/kubeadm/internal/controllers/helpers.go
@@ -341,7 +341,7 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
 	}
 
 	// Remove the annotation tracking that a remediation is in progress (the remediation completed when
-	// the replacement machine have been created above).
+	// the replacement machine has been created above).
 	delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation)
 
 	return nil
diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go
index c0a6b6f9479d..72a0eb8236aa 100644
--- a/controlplane/kubeadm/internal/controllers/remediation.go
+++ b/controlplane/kubeadm/internal/controllers/remediation.go
@@ -73,11 +73,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 		return ctrl.Result{}, kerrors.NewAggregate(errList)
 	}
 
-	// Returns if another remediation is in progress but the new machine is not yet created.
-	if _, ok := controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
-		return ctrl.Result{}, nil
-	}
-
 	// Gets all machines that have `MachineHealthCheckSucceeded=False` (indicating a problem was detected on the machine)
 	// and `MachineOwnerRemediated` present, indicating that this controller is responsible for performing remediation.
 	unhealthyMachines := controlPlane.UnhealthyMachines()
@@ -99,6 +94,16 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 		return ctrl.Result{}, nil
 	}
 
+	log = log.WithValues("Machine", klog.KObj(machineToBeRemediated), "initialized", controlPlane.KCP.Status.Initialized)
+
+	// Returns if another remediation is in progress but the new Machine is not yet created.
+	// Note: This condition is checked after we check for unhealthy Machines and if machineToBeRemediated
+	// is being deleted to avoid unnecessary logs if no further remediation should be done.
+	if _, ok := controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
+		log.Info("Another remediation is already in progress. Skipping remediation.")
+		return ctrl.Result{}, nil
+	}
+
 	patchHelper, err := patch.NewHelper(machineToBeRemediated, r.Client)
 	if err != nil {
 		return ctrl.Result{}, err
@@ -118,7 +123,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 
 	// Before starting remediation, run preflight checks in order to verify it is safe to remediate.
 	// If any of the following checks fails, we'll surface the reason in the MachineOwnerRemediated condition.
-	log = log.WithValues("Machine", klog.KObj(machineToBeRemediated), "initialized", controlPlane.KCP.Status.Initialized)
 
 	// Check if KCP is allowed to remediate considering retry limits:
 	// - Remediation cannot happen because retryPeriod is not yet expired.
@@ -132,9 +136,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 		return ctrl.Result{}, nil
 	}
 
-	// Executes checks that applies only if the control plane is already initialized; in this case KCP can
-	// remediate only if it can safely assume that the operation preserves the operation state of the existing cluster (or at least it doesn't make it worst).
 	if controlPlane.KCP.Status.Initialized {
+		// Executes checks that apply only if the control plane is already initialized; in this case KCP can
+		// remediate only if it can safely assume that the operation preserves the operation state of the
+		// existing cluster (or at least it doesn't make it worse).
+
 		// The cluster MUST have more than one replica, because this is the smallest cluster size that allows any etcd failure tolerance.
 		if controlPlane.Machines.Len() <= 1 {
 			log.Info("A control plane machine needs remediation, but the number of current replicas is less or equal to 1. Skipping remediation", "Replicas", controlPlane.Machines.Len())
@@ -163,22 +169,14 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 				return ctrl.Result{}, nil
 			}
 		}
-	}
-
-	// Prepare the info for tracking the remediation progress into the RemediationInProgressAnnotation.
-	remediationInProgressValue, err := remediationInProgressData.Marshal()
-	if err != nil {
-		return ctrl.Result{}, err
-	}
 
-	// Start remediating the unhealthy control plane machine by deleting it.
-	// A new machine will come up completing the operation as part of the regular
+		// Start remediating the unhealthy control plane machine by deleting it.
+		// A new machine will come up completing the operation as part of the regular reconcile.
 
-	// If the control plane is initialized, before deleting the machine:
-	// - if the machine hosts the etcd leader, forward etcd leadership to another machine.
-	// - delete the etcd member hosted on the machine being deleted.
-	// - remove the etcd member from the kubeadm config map (only for kubernetes version older than v1.22.0)
-	if controlPlane.KCP.Status.Initialized {
+		// If the control plane is initialized, before deleting the machine:
+		// - if the machine hosts the etcd leader, forward etcd leadership to another machine.
+		// - delete the etcd member hosted on the machine being deleted.
+		// - remove the etcd member from the kubeadm config map (only for kubernetes version older than v1.22.0)
 		workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
 		if err != nil {
 			log.Error(err, "Failed to create client to workload cluster")
@@ -227,6 +225,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
 	log.Info("Remediating unhealthy machine")
 	conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "")
 
+	// Prepare the info for tracking the remediation progress into the RemediationInProgressAnnotation.
+	remediationInProgressValue, err := remediationInProgressData.Marshal()
+	if err != nil {
+		return ctrl.Result{}, err
+	}
+
 	// Set annotations tracking remediation details so they can be picked up by the machine
 	// that will be created as part of the scale up action that completes the remediation.
 	annotations.AddAnnotations(controlPlane.KCP, map[string]string{
@@ -263,49 +267,39 @@ func (r *KubeadmControlPlaneReconciler) checkRetryLimits(log logr.Logger, machin
 		return remediationInProgressData, true, nil
 	}
 
-	// Gets MinHealthySeconds and RetryDelaySeconds from the remediation strategy, or use defaults.
+	// Gets MinHealthyPeriod and RetryPeriod from the remediation strategy, or use defaults.
 	minHealthyPeriod := controlplanev1.DefaultMinHealthyPeriod
 	if controlPlane.KCP.Spec.RemediationStrategy != nil && controlPlane.KCP.Spec.RemediationStrategy.MinHealthyPeriod != nil {
 		minHealthyPeriod = controlPlane.KCP.Spec.RemediationStrategy.MinHealthyPeriod.Duration
 	}
-
 	retryPeriod := time.Duration(0)
 	if controlPlane.KCP.Spec.RemediationStrategy != nil {
 		retryPeriod = controlPlane.KCP.Spec.RemediationStrategy.RetryPeriod.Duration
 	}
 
 	// Gets the timestamp of the last remediation; if missing, default to a value
-	// that ensures both MinHealthySeconds and RetryDelaySeconds are expired.
+	// that ensures both MinHealthyPeriod and RetryPeriod are expired.
 	// NOTE: this could potentially lead to executing more retries than expected or to executing retries before than
 	// expected, but this is considered acceptable when the system recovers from someone/something changes or deletes
 	// the RemediationForAnnotation on Machines.
-	max := func(x, y time.Duration) time.Duration {
-		if x < y {
-			return y
-		}
-		return x
-	}
-
 	lastRemediationTime := reconciliationTime.Add(-2 * max(minHealthyPeriod, retryPeriod))
 	if !lastRemediationData.Timestamp.IsZero() {
 		lastRemediationTime = lastRemediationData.Timestamp.Time
 	}
 
-	// Check if the machine being remediated has been created as a remediation for a previous unhealthy machine.
-	// NOTE: if someone/something changes or deletes the RemediationForAnnotation on Machines, this could potentially
-	// lead to executing more retries than expected, but this is considered acceptable in such a case.
-	machineRemediationFor := remediationInProgressData.Machine
-	if lastRemediationData.Machine != "" {
-		// If the remediation is happening before minHealthyPeriod is expired, then KCP considers this
-		// as a remediation for the same previously unhealthy machine.
-		if lastRemediationTime.Add(minHealthyPeriod).After(reconciliationTime) {
-			machineRemediationFor = lastRemediationData.Machine
-			log = log.WithValues("RemediationRetryFor", klog.KRef(machineToBeRemediated.Namespace, machineRemediationFor))
-		}
+	// Once we get here we already know that there was a last remediation for the Machine.
+	// If the current remediation is happening before minHealthyPeriod is expired, then KCP considers this
+	// as a remediation for the same previously unhealthy machine.
+	// NOTE: If someone/something changes the RemediationForAnnotation on Machines (e.g. changes the Timestamp),
+	// this could potentially lead to executing more retries than expected, but this is considered acceptable in such a case.
+	var retryForSameMachineInProgress bool
+	if lastRemediationTime.Add(minHealthyPeriod).After(reconciliationTime) {
+		retryForSameMachineInProgress = true
+		log = log.WithValues("RemediationRetryFor", klog.KRef(machineToBeRemediated.Namespace, lastRemediationData.Machine))
 	}
 
-	// If remediation is happening for a different machine, this is the first try of a new retry sequence.
-	if lastRemediationData.Machine != machineRemediationFor {
+	// If the retry for the same machine is not in progress, this is the first try of a new retry sequence.
+	if !retryForSameMachineInProgress {
 		return remediationInProgressData, true, nil
 	}
 
@@ -315,7 +309,7 @@ func (r *KubeadmControlPlaneReconciler) checkRetryLimits(log logr.Logger, machin
 	// Check if remediation can happen because retryPeriod is passed.
 	if lastRemediationTime.Add(retryPeriod).After(reconciliationTime) {
 		log.Info(fmt.Sprintf("A control plane machine needs remediation, but the operation already failed in the latest %s. Skipping remediation", retryPeriod))
-		conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest %s (RetryDelay)", retryPeriod)
+		conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest %s (RetryPeriod)", retryPeriod)
 		return remediationInProgressData, false, nil
 	}
 
@@ -329,12 +323,20 @@ func (r *KubeadmControlPlaneReconciler) checkRetryLimits(log logr.Logger, machin
 		}
 	}
 
-	// All the check passed, increase the remediation number.
+	// All the check passed, increase the remediation retry count.
 	remediationInProgressData.RetryCount++
 
 	return remediationInProgressData, true, nil
 }
 
+// max calculates the maximum duration.
+func max(x, y time.Duration) time.Duration {
+	if x < y {
+		return y
+	}
+	return x
+}
+
 // canSafelyRemoveEtcdMember assess if it is possible to remove the member hosted on the machine to be remediated
 // without loosing etcd quorum.
 //
diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go
index 3766cbacbf56..26183baa1cc7 100644
--- a/controlplane/kubeadm/internal/controllers/remediation_test.go
+++ b/controlplane/kubeadm/internal/controllers/remediation_test.go
@@ -106,7 +106,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 	t.Run("reconcileUnhealthyMachines return early if another remediation is in progress", func(t *testing.T) {
 		g := NewWithT(t)
 
-		m := getDeletingMachine(ns.Name, "m1-unhealthy-deleting-", withMachineHealthCheckFailed())
+		m := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withStuckRemediation())
 		conditions.MarkFalse(m, clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineHasFailureReason, clusterv1.ConditionSeverityWarning, "")
 		conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "")
 		controlPlane := &internal.ControlPlane{
@@ -142,6 +142,8 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 		}
 		ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane)
 
+		g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation))
+
 		g.Expect(ret.IsZero()).To(BeTrue()) // Remediation skipped
 		g.Expect(err).ToNot(HaveOccurred())
 	})
@@ -309,7 +311,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 		removeFinalizer(g, m1)
 		g.Expect(env.Cleanup(ctx, m1, m2, m3)).To(Succeed())
 	})
-	t.Run("Remediation does not happen if RetryDelay is not yet passed", func(t *testing.T) {
+	t.Run("Remediation does not happen if RetryPeriod is not yet passed", func(t *testing.T) {
 		g := NewWithT(t)
 
 		m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed(), withWaitBeforeDeleteFinalizer(), withRemediateForAnnotation(MustMarshalRemediationData(&RemediationData{
@@ -327,7 +329,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 					Version:  "v1.19.1",
 					RemediationStrategy: &controlplanev1.RemediationStrategy{
 						MaxRetry:    utilpointer.Int32(3),
-						RetryPeriod: metav1.Duration{Duration: controlplanev1.DefaultMinHealthyPeriod}, // RetryDelaySeconds not yet expired.
+						RetryPeriod: metav1.Duration{Duration: controlplanev1.DefaultMinHealthyPeriod}, // RetryPeriod not yet expired.
 					},
 				},
 			},
@@ -352,7 +354,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 
 		g.Expect(controlPlane.KCP.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation))
 
-		assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest 1h0m0s (RetryDelay)")
+		assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because the operation already failed in the latest 1h0m0s (RetryPeriod)")
 
 		err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1)
 		g.Expect(err).ToNot(HaveOccurred())
@@ -401,7 +403,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
 
 		g.Expect(env.Cleanup(ctx, m)).To(Succeed())
 	})
-	t.Run("Remediation does not happen if there is a deleting machine", func(t *testing.T) {
+	t.Run("Remediation does not happen if there is another machine being deleted (not the one to be remediated)", func(t *testing.T) {
 		g := NewWithT(t)
 
 		m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed())
@@ -1146,7 +1148,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) {
 
 		err = env.Get(ctx, client.ObjectKey{Namespace: m3.Namespace, Name: m3.Name}, m3)
 		g.Expect(err).ToNot(HaveOccurred())
-		g.Expect(m2.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse())
+		g.Expect(m3.ObjectMeta.DeletionTimestamp.IsZero()).To(BeFalse())
 
 		removeFinalizer(g, m3)
 		g.Expect(env.Cleanup(ctx, m3)).To(Succeed())
diff --git a/docs/book/src/reference/labels_and_annotations.md b/docs/book/src/reference/labels_and_annotations.md
index be03817863d3..5a6e5089bf24 100644
--- a/docs/book/src/reference/labels_and_annotations.md
+++ b/docs/book/src/reference/labels_and_annotations.md
@@ -49,5 +49,5 @@
 | controlplane.cluster.x-k8s.io/skip-coredns                       | It explicitly skips reconciling CoreDNS if set.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | controlplane.cluster.x-k8s.io/skip-kube-proxy                    | It explicitly skips reconciling kube-proxy if set.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration      | It is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP.                                                                                                                                                                                                                                                                                                                                                    |
-| controlplane.cluster.x-k8s.io/remediation-in-progress            | It is a KCP remediation is that tracks that the system is in between having deleted an unhealthy machine and recreating its replacement.                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| controlplane.cluster.x-k8s.io/remediation-in-progress            | It is a KCP annotation that tracks that the system is in between having deleted an unhealthy machine and recreating its replacement.                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | controlplane.cluster.x-k8s.io/remediation-for                    | It is a machine annotation that links a new machine to the unhealthy machine it is replacing.                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
diff --git a/test/e2e/kcp_remediations.go b/test/e2e/kcp_remediations.go
index c0e69cd0a0bc..64fc229b0b59 100644
--- a/test/e2e/kcp_remediations.go
+++ b/test/e2e/kcp_remediations.go
@@ -19,16 +19,13 @@ package e2e
 import (
 	"context"
 	"fmt"
-	"io"
 	"os"
-	"os/exec"
 	"path/filepath"
-	"runtime"
-	"strings"
 	"time"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	authenticationv1 "k8s.io/api/authentication/v1"
 	corev1 "k8s.io/api/core/v1"
 	rbacv1 "k8s.io/api/rbac/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -117,7 +114,7 @@ func KCPRemediationSpec(ctx context.Context, inputGetter func() KCPRemediationSp
 			Proxy:                input.BootstrapClusterProxy,
 			ArtifactFolder:       input.ArtifactFolder,
 			SpecName:             specName,
-			Flavor:               input.Flavor,
+			Flavor:               pointer.StringDeref(input.Flavor, "kcp-remediation"),
 
 			// values to be injected in the template
 
@@ -126,7 +123,7 @@ func KCPRemediationSpec(ctx context.Context, inputGetter func() KCPRemediationSp
 			// NOTE: this func also setups credentials/RBAC rules and everything necessary to get the authenticationToken.
 			AuthenticationToken: getAuthenticationToken(ctx, input.BootstrapClusterProxy, namespace.Name),
 			// Address to be used for accessing the management cluster from a workload cluster.
-			ServerAddr: getServerAddr(input.BootstrapClusterProxy.GetKubeconfigPath()),
+			ServerAddr: getServerAddr(ctx, input.BootstrapClusterProxy),
 		})
 
 		// The first CP machine comes up but it does not complete bootstrap
@@ -235,7 +232,7 @@ func KCPRemediationSpec(ctx context.Context, inputGetter func() KCPRemediationSp
 		Expect(secondMachine.Status.NodeRef).To(BeNil())
 		log.Logf("Machine %s is up but still bootstrapping", secondMachineName)
 
-		// Intentionally trigger remediation on the second CP, and validate and validate also this one is deleted and a replacement should come up.
+		// Intentionally trigger remediation on the second CP and validate that also this one is deleted and a replacement should come up.
 
 		By("REMEDIATING SECOND CONTROL PLANE MACHINE")
 
@@ -432,17 +429,15 @@ type createWorkloadClusterAndWaitInput struct {
 	Proxy                framework.ClusterProxy
 	ArtifactFolder       string
 	SpecName             string
-	Flavor               *string
+	Flavor               string
 	Namespace            string
-	AuthenticationToken  []byte
+	AuthenticationToken  string
 	ServerAddr           string
 }
 
-// createWorkloadClusterAndWait creates a workload cluster ard return as soon as the cluster infrastructure is ready.
+// createWorkloadClusterAndWait creates a workload cluster and return as soon as the cluster infrastructure is ready.
 // NOTE: we are not using the same func used by other tests because it would fail if the control plane doesn't come up,
-//
-//	which instead is expected in this case.
-//
+// which instead is expected in this case.
 // NOTE: clusterResources is filled only partially.
 func createWorkloadClusterAndWait(ctx context.Context, input createWorkloadClusterAndWaitInput) (clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult) {
 	clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
@@ -457,7 +452,7 @@ func createWorkloadClusterAndWait(ctx context.Context, input createWorkloadClust
 		KubeconfigPath: input.Proxy.GetKubeconfigPath(),
 
 		// select template
-		Flavor: pointer.StringDeref(input.Flavor, "kcp-remediation"),
+		Flavor: input.Flavor,
 		// define template variables
 		Namespace:                input.Namespace,
 		ClusterName:              clusterName,
@@ -469,7 +464,7 @@ func createWorkloadClusterAndWait(ctx context.Context, input createWorkloadClust
 		LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.Proxy.GetName()),
 		// Adds authenticationToken, server address and namespace variables to be injected in the cluster template.
 		ClusterctlVariables: map[string]string{
-			"TOKEN":     string(input.AuthenticationToken),
+			"TOKEN":     input.AuthenticationToken,
 			"SERVER":    input.ServerAddr,
 			"NAMESPACE": input.Namespace,
 		},
@@ -560,11 +555,10 @@ func waitForMachines(ctx context.Context, input waitForMachinesInput) (allMachin
 
 	// Waits for the desired set of machines to exist.
 	log.Logf("Waiting for %d machines, must have %s, must not have %s", input.ExpectedReplicas, expectedOldMachines.UnsortedList(), expectedDeletedMachines.UnsortedList())
-	Eventually(func() bool {
+	Eventually(func(g Gomega) {
 		// Gets the list of machines
-		if err := input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption); err != nil {
-			return false
-		}
+		g.Expect(input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption)).To(Succeed())
+
 		allMachines = sets.Set[string]{}
 		for i := range machineList.Items {
 			allMachines.Insert(machineList.Items[i].Name)
@@ -578,17 +572,17 @@ func waitForMachines(ctx context.Context, input waitForMachinesInput) (allMachin
 		log.Logf(" - expected %d, got %d: %s, of which new %s, must have check: %t, must not have check: %t", input.ExpectedReplicas, allMachines.Len(), allMachines.UnsortedList(), newMachines.UnsortedList(), allMachines.HasAll(expectedOldMachines.UnsortedList()...), !allMachines.HasAny(expectedDeletedMachines.UnsortedList()...))
 
 		// Ensures all the expected old machines are still there.
-		if !allMachines.HasAll(expectedOldMachines.UnsortedList()...) {
-			return false
-		}
+		g.Expect(allMachines.HasAll(expectedOldMachines.UnsortedList()...)).To(BeTrue(),
+			"Got machines: %s, must contain all of: %s", allMachines.UnsortedList(), expectedOldMachines.UnsortedList())
 
 		// Ensures none of the machines to be deleted is still there.
-		if allMachines.HasAny(expectedDeletedMachines.UnsortedList()...) {
-			return false
-		}
+		g.Expect(!allMachines.HasAny(expectedDeletedMachines.UnsortedList()...)).To(BeTrue(),
+			"Got machines: %s, must not contain any of: %s", allMachines.UnsortedList(), expectedDeletedMachines.UnsortedList())
 
-		return allMachines.Len() == input.ExpectedReplicas
-	}, input.WaitForMachinesIntervals...).Should(BeTrue(), "Failed to get the expected list of machines: got %s (expected %d machines, must have %s, must not have %s)", allMachines.UnsortedList(), input.ExpectedReplicas, expectedOldMachines.UnsortedList(), expectedDeletedMachines.UnsortedList())
+		g.Expect(allMachines).To(HaveLen(input.ExpectedReplicas), "Got %d machines, must be %d", len(allMachines), input.ExpectedReplicas)
+	}, input.WaitForMachinesIntervals...).Should(Succeed(),
+		"Failed to get the expected list of machines: got %s (expected %d machines, must have %s, must not have %s)",
+		allMachines.UnsortedList(), input.ExpectedReplicas, expectedOldMachines.UnsortedList(), expectedDeletedMachines.UnsortedList())
 	log.Logf("Got %d machines: %s", allMachines.Len(), allMachines.UnsortedList())
 
 	// Ensures the desired set of machines is stable (no further machines are created or deleted).
@@ -611,27 +605,31 @@ func waitForMachines(ctx context.Context, input waitForMachinesInput) (allMachin
 }
 
 // getServerAddr returns the address to be used for accessing the management cluster from a workload cluster.
-func getServerAddr(kubeconfigPath string) string {
-	kubeConfig, err := clientcmd.LoadFromFile(kubeconfigPath)
-	Expect(err).ToNot(HaveOccurred(), "failed to load management cluster's kubeconfig file")
+func getServerAddr(ctx context.Context, clusterProxy framework.ClusterProxy) string {
+	// With CAPD, we can't just access the bootstrap cluster via 127.0.0.1:<port> from the
+	// workload cluster. Instead we retrieve the server name from the cluster-info ConfigMap in the bootstrap
+	// cluster (e.g. "https://test-z45p9k-control-plane:6443")
+	// Note: This has been tested with MacOS,Linux and Prow.
+	clusterInfoCM := &corev1.ConfigMap{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "cluster-info",
+			Namespace: metav1.NamespacePublic,
+		},
+	}
+	Expect(clusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(clusterInfoCM), clusterInfoCM)).To(Succeed())
+	Expect(clusterInfoCM.Data).To(HaveKey("kubeconfig"))
 
-	clusterName := kubeConfig.Contexts[kubeConfig.CurrentContext].Cluster
-	Expect(clusterName).ToNot(BeEmpty(), "failed to identify current cluster name in management cluster's kubeconfig file")
+	kubeConfigString := clusterInfoCM.Data["kubeconfig"]
 
-	serverAddr := kubeConfig.Clusters[clusterName].Server
-	Expect(serverAddr).ToNot(BeEmpty(), "failed to identify current server address in management cluster's kubeconfig file")
+	kubeConfig, err := clientcmd.Load([]byte(kubeConfigString))
+	Expect(err).ToNot(HaveOccurred())
 
-	// On CAPD, if not running on Linux, we need to use Docker's proxy to connect back to the host
-	// to the CAPD cluster. Moby on Linux doesn't use the host.docker.internal DNS name.
-	if runtime.GOOS != "linux" {
-		serverAddr = strings.ReplaceAll(serverAddr, "127.0.0.1", "host.docker.internal")
-	}
-	return serverAddr
+	return kubeConfig.Clusters[""].Server
 }
 
 // getAuthenticationToken returns a bearer authenticationToken with minimal RBAC permissions to access the mhc-test ConfigMap that will be used
 // to control machines bootstrap during the remediation tests.
-func getAuthenticationToken(ctx context.Context, managementClusterProxy framework.ClusterProxy, namespace string) []byte {
+func getAuthenticationToken(ctx context.Context, managementClusterProxy framework.ClusterProxy, namespace string) string {
 	sa := &corev1.ServiceAccount{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "mhc-test",
@@ -677,21 +675,13 @@ func getAuthenticationToken(ctx context.Context, managementClusterProxy framewor
 	}
 	Expect(managementClusterProxy.GetClient().Create(ctx, roleBinding)).To(Succeed(), "failed to create mhc-test role binding")
 
-	cmd := exec.CommandContext(ctx, "kubectl", fmt.Sprintf("--kubeconfig=%s", managementClusterProxy.GetKubeconfigPath()), fmt.Sprintf("--namespace=%s", namespace), "create", "token", "mhc-test") //nolint:gosec
-	stdout, err := cmd.StdoutPipe()
-	Expect(err).ToNot(HaveOccurred(), "failed to get stdout for kubectl create authenticationToken")
-	stderr, err := cmd.StderrPipe()
-	Expect(err).ToNot(HaveOccurred(), "failed to get stderr for kubectl create authenticationToken")
-
-	Expect(cmd.Start()).To(Succeed(), "failed to run kubectl create authenticationToken")
-
-	output, err := io.ReadAll(stdout)
-	Expect(err).ToNot(HaveOccurred(), "failed to read stdout from kubectl create authenticationToken")
-	errout, err := io.ReadAll(stderr)
-	Expect(err).ToNot(HaveOccurred(), "failed to read stderr from kubectl create authenticationToken")
-
-	Expect(cmd.Wait()).To(Succeed(), "failed to wait kubectl create authenticationToken")
-	Expect(errout).To(BeEmpty())
+	tokenRequest := &authenticationv1.TokenRequest{
+		Spec: authenticationv1.TokenRequestSpec{
+			ExpirationSeconds: pointer.Int64(2 * 60 * 60), // 2 hours.
+		},
+	}
+	tokenRequest, err := managementClusterProxy.GetClientSet().CoreV1().ServiceAccounts(namespace).CreateToken(ctx, "mhc-test", tokenRequest, metav1.CreateOptions{})
+	Expect(err).ToNot(HaveOccurred())
 
-	return output
+	return tokenRequest.Status.Token
 }
diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go
index 7dd5737d1197..7920065ae40f 100644
--- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go
+++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go
@@ -316,7 +316,6 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
 				for {
 					select {
 					case <-timeoutCtx.Done():
-						log.Info("Cancelling Bootstrap due to timeout")
 						return
 					default:
 						updatedDockerMachine := &infrav1.DockerMachine{}
@@ -353,13 +352,6 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
 		return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
 	}
 
-	// If the control plane is not yet initialized, there is no API server to contact to get the ProviderID for the Node
-	// hosted on this machine, so return early.
-	// NOTE: we are using RequeueAfter with a short interval in order to make test execution time more stable.
-	if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
-		return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
-	}
-
 	// Usually a cloud provider will do this, but there is no docker-cloud provider.
 	// Requeue if there is an error, as this is likely momentary load balancer
 	// state changes during control plane provisioning.