From c97f63a06fdb8bc5e3c794dcf65dab982de7b5da Mon Sep 17 00:00:00 2001 From: Ben Moss Date: Wed, 24 Jun 2020 16:20:05 +0000 Subject: [PATCH] Add MHC remediation to KCP --- .../kubeadm/controllers/controller.go | 14 ++- controlplane/kubeadm/controllers/scale.go | 30 ++---- .../kubeadm/controllers/scale_test.go | 68 +----------- controlplane/kubeadm/controllers/upgrade.go | 13 +++ .../kubeadm/controllers/upgrade_test.go | 9 -- .../kubeadm/internal/control_plane.go | 27 ++++- .../kubeadm/internal/control_plane_test.go | 100 ++++++++++++++++++ .../kubeadm/internal/machine_collection.go | 10 ++ .../machinefilters/machine_filters.go | 17 +++ 9 files changed, 187 insertions(+), 101 deletions(-) diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 0d4d7751b011..58eace2548ac 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -279,6 +279,10 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * controlPlane := internal.NewControlPlane(cluster, kcp, ownedMachines) + if controlPlane.HasDeletingMachine() || controlPlane.ProvisioningMachines().Len() > 0 { + return ctrl.Result{}, nil + } + // Aggregate the operational state of all the machines; while aggregating we are adding the // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef()) @@ -305,6 +309,12 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * numMachines := len(ownedMachines) desiredReplicas := int(*kcp.Spec.Replicas) + if numMachines > 0 && controlPlane.UnhealthyMachines().Len() == 0 { + if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { + return ctrl.Result{}, err + } + } + switch { // We are creating the first replica case numMachines < desiredReplicas && numMachines == 0: @@ -317,8 +327,8 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // Create a new Machine w/ join logger.Info("Scaling up control plane", "Desired", desiredReplicas, "Existing", numMachines) return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) - // We are scaling down - case numMachines > desiredReplicas: + // We are scaling down + case numMachines > desiredReplicas || controlPlane.UnhealthyMachines().Len() > 0: logger.Info("Scaling down control plane", "Desired", desiredReplicas, "Existing", numMachines) return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane) } diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index cedc48bc6b6c..d7b09b16feaf 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -26,7 +26,6 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters" - capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" ctrl "sigs.k8s.io/controller-runtime" ) @@ -63,11 +62,6 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := controlPlane.Logger() - // reconcileHealth returns err if there is a machine being delete which is a required condition to check before scaling up - if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } - // Create the bootstrap configuration bootstrapSpec := controlPlane.JoinControlPlaneConfig() fd := controlPlane.FailureDomainWithFewestMachines() @@ -89,10 +83,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( ) (ctrl.Result, error) { logger := controlPlane.Logger() - if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) if err != nil { logger.Error(err, "Failed to create client to workload cluster") @@ -109,6 +99,11 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.New("failed to pick control plane Machine to delete") } + if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil { + logger.Error(err, "Failed to remove machine from kubeadm ConfigMap") + return ctrl.Result{}, err + } + // If etcd leadership is on machine that is about to be deleted, move it to the newest member available. etcdLeaderCandidate := controlPlane.Machines.Newest() if err := workloadCluster.ForwardEtcdLeadership(ctx, machineToDelete, etcdLeaderCandidate); err != nil { @@ -120,18 +115,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, err } - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { - logger.V(2).Info("Waiting for control plane to pass control plane health check before removing a control plane machine", "cause", err) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", - "Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err) - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - - } - if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil { - logger.Error(err, "Failed to remove machine from kubeadm ConfigMap") - return ctrl.Result{}, err - } - logger = logger.WithValues("machine", machineToDelete) if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to delete control plane machine") @@ -149,5 +132,8 @@ func selectMachineForScaleDown(controlPlane *internal.ControlPlane) (*clusterv1. if needingUpgrade := controlPlane.MachinesNeedingRollout(); needingUpgrade.Len() > 0 { machines = needingUpgrade } + if unhealthy := controlPlane.UnhealthyMachines(); unhealthy.Len() > 0 { + machines = unhealthy + } return controlPlane.MachineInFailureDomainWithMostMachines(machines) } diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go index 42905d35663b..23e1a14bfb7b 100644 --- a/controlplane/kubeadm/controllers/scale_test.go +++ b/controlplane/kubeadm/controllers/scale_test.go @@ -33,7 +33,6 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/hash" - capierrors "sigs.k8s.io/cluster-api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -84,7 +83,7 @@ func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { } func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { - t.Run("creates a control plane Machine if health checks pass", func(t *testing.T) { + t.Run("creates a control plane Machine", func(t *testing.T) { g := NewWithT(t) cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() @@ -125,71 +124,6 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { g.Expect(fakeClient.List(context.Background(), &controlPlaneMachines)).To(Succeed()) g.Expect(controlPlaneMachines.Items).To(HaveLen(3)) }) - t.Run("does not create a control plane Machine if health checks fail", func(t *testing.T) { - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() - initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} - - beforeMachines := internal.NewFilterableMachineCollection() - for i := 0; i < 2; i++ { - m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster.DeepCopy(), kcp.DeepCopy(), true) - beforeMachines = beforeMachines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) - } - - testCases := []struct { - name string - etcdUnHealthy bool - controlPlaneUnHealthy bool - }{ - { - name: "etcd health check fails", - etcdUnHealthy: true, - }, - { - name: "controlplane component health check fails", - controlPlaneUnHealthy: true, - }, - } - for _, tc := range testCases { - g := NewWithT(t) - - fakeClient := newFakeClient(g, initObjs...) - fmc := &fakeManagementCluster{ - Machines: beforeMachines.DeepCopy(), - ControlPlaneHealthy: !tc.controlPlaneUnHealthy, - EtcdHealthy: !tc.etcdUnHealthy, - } - - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: fmc, - managementClusterUncached: fmc, - Log: log.Log, - recorder: record.NewFakeRecorder(32), - } - controlPlane := &internal.ControlPlane{ - KCP: kcp, - Cluster: cluster, - Machines: beforeMachines, - } - - _, err := r.scaleUpControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), controlPlane) - g.Expect(err).To(HaveOccurred()) - g.Expect(err).To(MatchError(&capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter})) - - controlPlaneMachines := &clusterv1.MachineList{} - g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines))) - - endMachines := internal.NewFilterableMachineCollectionFromMachineList(controlPlaneMachines) - for _, m := range endMachines { - bm, ok := beforeMachines[m.Name] - bm.SetResourceVersion("1") - g.Expect(ok).To(BeTrue()) - g.Expect(m).To(Equal(bm)) - } - } - }) } func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.T) { diff --git a/controlplane/kubeadm/controllers/upgrade.go b/controlplane/kubeadm/controllers/upgrade.go index 8b53ff58f800..7430ea94a8ae 100644 --- a/controlplane/kubeadm/controllers/upgrade.go +++ b/controlplane/kubeadm/controllers/upgrade.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "fmt" "github.com/blang/semver" "github.com/pkg/errors" @@ -41,35 +42,42 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) if err != nil { logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster)) + fmt.Println(0) return ctrl.Result{}, err } parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version) if err != nil { + fmt.Println(1) return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) } if err := workloadCluster.ReconcileKubeletRBACRole(ctx, parsedVersion); err != nil { + fmt.Println(2) return ctrl.Result{}, errors.Wrap(err, "failed to reconcile the remote kubelet RBAC role") } if err := workloadCluster.ReconcileKubeletRBACBinding(ctx, parsedVersion); err != nil { + fmt.Println(3) return ctrl.Result{}, errors.Wrap(err, "failed to reconcile the remote kubelet RBAC binding") } // Ensure kubeadm cluster role & bindings for v1.18+ // as per https://github.com/kubernetes/kubernetes/commit/b117a928a6c3f650931bdac02a41fca6680548c4 if err := workloadCluster.AllowBootstrapTokensToGetNodes(ctx); err != nil { + fmt.Println(4) return ctrl.Result{}, errors.Wrap(err, "failed to set role and role binding for kubeadm") } if err := workloadCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, parsedVersion); err != nil { + fmt.Println(5) return ctrl.Result{}, errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map") } if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { imageRepository := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ImageRepository if err := workloadCluster.UpdateImageRepositoryInKubeadmConfigMap(ctx, imageRepository); err != nil { + fmt.Println(6) return ctrl.Result{}, errors.Wrap(err, "failed to update the image repository in the kubeadm config map") } } @@ -77,22 +85,27 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil { meta := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta if err := workloadCluster.UpdateEtcdVersionInKubeadmConfigMap(ctx, meta.ImageRepository, meta.ImageTag); err != nil { + fmt.Println(7) return ctrl.Result{}, errors.Wrap(err, "failed to update the etcd version in the kubeadm config map") } } if err := workloadCluster.UpdateKubeletConfigMap(ctx, parsedVersion); err != nil { + fmt.Println(8) return ctrl.Result{}, errors.Wrap(err, "failed to upgrade kubelet config map") } status, err := workloadCluster.ClusterStatus(ctx) if err != nil { + fmt.Println(9) return ctrl.Result{}, err } if status.Nodes <= *kcp.Spec.Replicas { // scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs + fmt.Println(10) return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) } + fmt.Println(11) return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane) } diff --git a/controlplane/kubeadm/controllers/upgrade_test.go b/controlplane/kubeadm/controllers/upgrade_test.go index 7dbd859c046b..a2238cd4e138 100644 --- a/controlplane/kubeadm/controllers/upgrade_test.go +++ b/controlplane/kubeadm/controllers/upgrade_test.go @@ -27,7 +27,6 @@ import ( "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" - capierrors "sigs.k8s.io/cluster-api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -87,14 +86,6 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { bothMachines := &clusterv1.MachineList{} g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) g.Expect(bothMachines.Items).To(HaveLen(2)) - - // run upgrade a second time, simulate that the node has not appeared yet but the machine exists - r.managementCluster.(*fakeManagementCluster).ControlPlaneHealthy = false - _, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) - g.Expect(err).To(Equal(&capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter})) - g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(bothMachines.Items).To(HaveLen(2)) - controlPlane.Machines = internal.NewFilterableMachineCollectionFromMachineList(bothMachines) // manually increase number of nodes, make control plane healthy again diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 405d62f278be..69db85836b00 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -17,8 +17,9 @@ limitations under the License. package internal import ( + "errors" + "github.com/go-logr/logr" - "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apiserver/pkg/storage/names" @@ -29,6 +30,9 @@ import ( "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters" ) +// MachineHealthCheck remediation is only supported on clusters with >= 3 machines to avoid disrupting etcd consensus +const minimumClusterSizeForRemediation = 3 + // ControlPlane holds business logic around control planes. // It should never need to connect to a service, that responsibility lies outside of this struct. type ControlPlane struct { @@ -222,3 +226,24 @@ func (c *ControlPlane) NeedsReplacementNode() bool { func (c *ControlPlane) HasDeletingMachine() bool { return len(c.Machines.Filter(machinefilters.HasDeletionTimestamp)) > 0 } + +// ProvisioningMachines returns machines that are still booting. In the case +// of 3 node or larger clusters, it excludes unhealthy machines. +func (c *ControlPlane) ProvisioningMachines() FilterableMachineCollection { + machines := c.Machines.Filter(machinefilters.IsProvisioning). + Filter(machinefilters.Not(machinefilters.IsFailed)) + + if c.Machines.Len() < minimumClusterSizeForRemediation { + return machines + } + return machines.Filter(machinefilters.Not(machinefilters.NeedsRemediation)) +} + +// UnhealthyMachines returns the machines that need remediation. If cluster +// size is less than 3, will return an empty list. +func (c *ControlPlane) UnhealthyMachines() FilterableMachineCollection { + if c.Machines.Len() < minimumClusterSizeForRemediation { + return nil + } + return c.Machines.Filter(machinefilters.NeedsRemediation) +} diff --git a/controlplane/kubeadm/internal/control_plane_test.go b/controlplane/kubeadm/internal/control_plane_test.go index 17adcbfd62a3..8c46f0d6b6b9 100644 --- a/controlplane/kubeadm/internal/control_plane_test.go +++ b/controlplane/kubeadm/internal/control_plane_test.go @@ -29,6 +29,9 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/hash" + capierrors "sigs.k8s.io/cluster-api/errors" + "sigs.k8s.io/cluster-api/util/conditions" ) func TestControlPlane(t *testing.T) { @@ -200,6 +203,61 @@ var _ = Describe("Control Plane", func() { }) +func TestMachinePhaseFilters(t *testing.T) { + testCases := []struct { + name string + test func(g *WithT, cp *ControlPlane) + }{ + { + name: "machines without a node or infrastructure ready are provisioning", + test: func(g *WithT, cp *ControlPlane) { + cp.Machines = NewFilterableMachineCollection( + machine("1", withNodeRef(), withInfrastructureReady()), + machine("2"), + machine("3", withNodeRef(), withInfrastructureReady()), + ) + g.Expect(cp.ProvisioningMachines().Names()).To(ConsistOf("2")) + g.Expect(cp.UnhealthyMachines().Names()).To(BeEmpty()) + }, + }, + { + name: "machines with a failure message or reason are not provisioning or ready", + test: func(g *WithT, cp *ControlPlane) { + cp.Machines = NewFilterableMachineCollection( + machine("1", withNodeRef(), withFailureReason("foo")), + machine("2", withNodeRef(), withFailureMessage("foo")), + machine("3", withInfrastructureReady(), withFailureReason("bar")), + machine("4", withInfrastructureReady(), withFailureMessage("bar")), + machine("5", withInfrastructureReady(), withNodeRef(), withFailureMessage("baz")), + machine("6", withInfrastructureReady(), withNodeRef(), withFailureReason("baz")), + ) + g.Expect(cp.ProvisioningMachines().Names()).To(BeEmpty()) + g.Expect(cp.UnhealthyMachines().Names()).To(BeEmpty()) + }, + }, + { + name: "machines with an unhealthy annotation in clusters with at least 3 machines are unhealthy", + test: func(g *WithT, cp *ControlPlane) { + cp.Machines = NewFilterableMachineCollection( + machine("1", withNeedsRemediationCondition()), + machine("2", withNodeRef(), withInfrastructureReady()), + machine("3"), + ) + g.Expect(cp.ProvisioningMachines().Names()).To(ConsistOf("3")) + g.Expect(cp.UnhealthyMachines().Names()).To(ConsistOf("1")) + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + kcp := &controlplanev1.KubeadmControlPlane{} + cp := &ControlPlane{KCP: kcp} + tc.test(g, cp) + }) + } +} + func failureDomain(controlPlane bool) clusterv1.FailureDomainSpec { return clusterv1.FailureDomainSpec{ ControlPlane: controlPlane, @@ -217,3 +275,45 @@ func withHash(hash string) machineOpt { m.SetLabels(map[string]string{controlplanev1.KubeadmControlPlaneHashLabelKey: hash}) } } + +func withTimestamp(t time.Time) machineOpt { + return func(m *clusterv1.Machine) { + m.CreationTimestamp = metav1.NewTime(t) + } +} + +func withValidHash(kcp controlplanev1.KubeadmControlPlaneSpec) machineOpt { + return func(m *clusterv1.Machine) { + withHash(hash.Compute(&kcp))(m) + } +} + +func withNeedsRemediationCondition() machineOpt { + return func(m *clusterv1.Machine) { + conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, "some reason", "some severity", "") + } +} + +func withNodeRef() machineOpt { + return func(m *clusterv1.Machine) { + m.Status.NodeRef = &corev1.ObjectReference{} + } +} + +func withInfrastructureReady() machineOpt { + return func(m *clusterv1.Machine) { + m.Status.InfrastructureReady = true + } +} + +func withFailureReason(reason string) machineOpt { + return func(m *clusterv1.Machine) { + failureReason := capierrors.MachineStatusError(reason) + m.Status.FailureReason = &failureReason + } +} +func withFailureMessage(msg string) machineOpt { + return func(m *clusterv1.Machine) { + m.Status.FailureMessage = pointer.StringPtr(msg) + } +} diff --git a/controlplane/kubeadm/internal/machine_collection.go b/controlplane/kubeadm/internal/machine_collection.go index c1db81c8b192..296daa64b902 100644 --- a/controlplane/kubeadm/internal/machine_collection.go +++ b/controlplane/kubeadm/internal/machine_collection.go @@ -145,3 +145,13 @@ func (s FilterableMachineCollection) ConditionGetters() []conditions.Getter { } return res } + +// Names returns a slice of the names of each machine in the collection. +// Useful for logging and test assertions. +func (s FilterableMachineCollection) Names() []string { + names := make([]string, 0, s.Len()) + for _, m := range s { + names = append(names, m.Name) + } + return names +} diff --git a/controlplane/kubeadm/internal/machinefilters/machine_filters.go b/controlplane/kubeadm/internal/machinefilters/machine_filters.go index 69d4e70e6dfd..ee9a040a6394 100644 --- a/controlplane/kubeadm/internal/machinefilters/machine_filters.go +++ b/controlplane/kubeadm/internal/machinefilters/machine_filters.go @@ -197,3 +197,20 @@ func ControlPlaneSelectorForCluster(clusterName string) labels.Selector { must(labels.NewRequirement(clusterv1.MachineControlPlaneLabelName, selection.Exists, []string{})), ) } + +// NeedsRemediation returns whether the machine has the +// MachineOwnerRemediatedCondition set to false. +func NeedsRemediation(m *clusterv1.Machine) bool { + return conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedCondition) +} + +// IsProvisioning returns whether the machine is missing its NodeRef or does +// not have InfrastructureReady set to true. +func IsProvisioning(m *clusterv1.Machine) bool { + return m.Status.NodeRef == nil || !m.Status.InfrastructureReady +} + +// IsFailed returns whether the machine has a FailureMessage or a FailureReason. +func IsFailed(m *clusterv1.Machine) bool { + return m.Status.FailureMessage != nil || m.Status.FailureReason != nil +}