Skip to content

Commit

Permalink
Merge pull request #9725 from Jont828/mpm-capd-ownerref-fix
Browse files Browse the repository at this point in the history
🌱 Remove requeues in DockerMachinePool
  • Loading branch information
k8s-ci-robot authored Nov 30, 2023
2 parents 0cd7db2 + 610afe0 commit 0240b58
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package controllers
import (
"context"
"fmt"
"time"

"github.com/pkg/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -43,6 +42,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
utilexp "sigs.k8s.io/cluster-api/exp/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/test/infrastructure/container"
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1"
infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1"
Expand All @@ -57,8 +57,7 @@ const (
// dockerMachinePoolLabel is the label used to identify the DockerMachinePool that is responsible for a Docker container.
dockerMachinePoolLabel = "docker.cluster.x-k8s.io/machine-pool"

// requeueAfter is how long to wait before checking again to see if the DockerMachines are still provisioning or deleting.
requeueAfter = 10 * time.Second
dockerMachinePoolControllerName = "dockermachinepool-controller"
)

// DockerMachinePoolReconciler reconciles a DockerMachinePool object.
Expand All @@ -71,6 +70,7 @@ type DockerMachinePoolReconciler struct {
// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

ssaCache ssa.Cache
recorder record.EventRecorder
externalTracker external.ObjectTracker
}
Expand Down Expand Up @@ -140,7 +140,7 @@ func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Re

// Handle deleted machines
if !dockerMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool)
return ctrl.Result{}, r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool)
}

// Add finalizer and the InfrastructureMachineKind if they aren't already present, and requeue if either were added.
Expand Down Expand Up @@ -194,21 +194,22 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr
return errors.Wrap(err, "failed setting up with a controller manager")
}

r.recorder = mgr.GetEventRecorderFor("dockermachinepool-controller")
r.recorder = mgr.GetEventRecorderFor(dockerMachinePoolControllerName)
r.externalTracker = external.ObjectTracker{
Controller: c,
Cache: mgr.GetCache(),
}
r.ssaCache = ssa.NewCache()

return nil
}

func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) {
func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error {
log := ctrl.LoggerFrom(ctx)

dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool)
if err != nil {
return ctrl.Result{}, err
return err
}

if len(dockerMachineList.Items) > 0 {
Expand All @@ -229,10 +230,9 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust
}

if len(errs) > 0 {
return ctrl.Result{}, kerrors.NewAggregate(errs)
return kerrors.NewAggregate(errs)
}

return ctrl.Result{RequeueAfter: requeueAfter}, nil
return nil
}

// Once there are no DockerMachines left, ensure there are no Docker containers left behind.
Expand All @@ -243,21 +243,21 @@ func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, clust
// List Docker containers, i.e. external machines in the cluster.
externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name)
return errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name)
}

// Providers should similarly ensure that all infrastructure instances are deleted even if the InfraMachine has not been created yet.
for _, externalMachine := range externalMachines {
log.Info("Deleting Docker container", "container", externalMachine.Name())
if err := externalMachine.Delete(ctx); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name())
return errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name())
}
}

// Once all DockerMachines and Docker containers are deleted, remove the finalizer.
controllerutil.RemoveFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer)

return ctrl.Result{}, nil
return nil
}

func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) {
Expand Down Expand Up @@ -318,11 +318,7 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust
return ctrl.Result{}, nil
}

dockerMachinePool.Status.Ready = false
conditions.MarkFalse(dockerMachinePool, expv1.ReplicasReadyCondition, expv1.WaitingForReplicasReadyReason, clusterv1.ConditionSeverityInfo, "")

// if some machine is still provisioning, force reconcile in few seconds to check again infrastructure.
return ctrl.Result{RequeueAfter: requeueAfter}, nil
return r.updateStatus(ctx, cluster, machinePool, dockerMachinePool, dockerMachineList.Items)
}

func getDockerMachines(ctx context.Context, c client.Client, cluster clusterv1.Cluster, machinePool expv1.MachinePool, dockerMachinePool infraexpv1.DockerMachinePool) (*infrav1.DockerMachineList, error) {
Expand Down Expand Up @@ -380,6 +376,64 @@ func dockerMachineToDockerMachinePool(_ context.Context, o client.Object) []ctrl
return nil
}

// updateStatus updates the Status field for the MachinePool object.
// It checks for the current state of the replicas and updates the Status of the MachineSet.
func (r *DockerMachinePoolReconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, dockerMachines []infrav1.DockerMachine) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)

// List the Docker containers. This corresponds to an InfraMachinePool instance for providers.
labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name}
externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to list all external machines in the cluster")
}

externalMachineMap := make(map[string]*docker.Machine)
for _, externalMachine := range externalMachines {
externalMachineMap[externalMachine.Name()] = externalMachine
}
// We can use reuse getDeletionCandidates to get the list of ready DockerMachines and avoid another API call, even though we aren't deleting them here.
_, readyMachines, err := r.getDeletionCandidates(ctx, dockerMachines, externalMachineMap, machinePool, dockerMachinePool)
if err != nil {
return ctrl.Result{}, err
}

readyReplicaCount := len(readyMachines)
desiredReplicas := int(*machinePool.Spec.Replicas)

switch {
// We are scaling up
case readyReplicaCount < desiredReplicas:
conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount)
// We are scaling down
case readyReplicaCount > desiredReplicas:
conditions.MarkFalse(dockerMachinePool, clusterv1.ResizedCondition, clusterv1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down MachineSet to %d replicas (actual %d)", desiredReplicas, readyReplicaCount)
default:
// Make sure last resize operation is marked as completed.
// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
if len(dockerMachines) == readyReplicaCount {
if conditions.IsFalse(dockerMachinePool, clusterv1.ResizedCondition) {
log.Info("All the replicas are ready", "replicas", readyReplicaCount)
}
conditions.MarkTrue(dockerMachinePool, clusterv1.ResizedCondition)
}
// This means that there was no error in generating the desired number of machine objects
conditions.MarkTrue(dockerMachinePool, clusterv1.MachinesCreatedCondition)
}

getters := make([]conditions.Getter, 0, len(dockerMachines))
for i := range dockerMachines {
getters = append(getters, &dockerMachines[i])
}

// Aggregate the operational state of all the machines; while aggregating we are adding the
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(dockerMachinePool, expv1.ReplicasReadyCondition, getters, conditions.AddSourceRef())

return ctrl.Result{}, nil
}

func patchDockerMachinePool(ctx context.Context, patchHelper *patch.Helper, dockerMachinePool *infraexpv1.DockerMachinePool) error {
conditions.SetSummary(dockerMachinePool,
conditions.WithConditions(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/util/ssa"
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1"
infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker"
Expand Down Expand Up @@ -142,16 +143,22 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex
// Create a DockerMachine for each Docker container so we surface the information to the user. Use the same name as the Docker container for the Docker Machine for ease of lookup.
// Providers should iterate through their infrastructure instances and ensure that each instance has a corresponding InfraMachine.
for _, machine := range externalMachines {
if _, ok := dockerMachineMap[machine.Name()]; !ok {
if existingMachine, ok := dockerMachineMap[machine.Name()]; ok {
log.V(2).Info("Patching existing DockerMachine", "name", existingMachine.Name)
desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, &existingMachine)
if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil {
return errors.Wrapf(err, "failed to update DockerMachine %q", klog.KObj(desiredMachine))
}

dockerMachineMap[desiredMachine.Name] = *desiredMachine
} else {
log.V(2).Info("Creating a new DockerMachine for Docker container", "container", machine.Name())
dockerMachine, err := r.createDockerMachine(ctx, machine.Name(), cluster, machinePool, dockerMachinePool)
if err != nil {
desiredMachine := computeDesiredDockerMachine(machine.Name(), cluster, machinePool, dockerMachinePool, nil)
if err := ssa.Patch(ctx, r.Client, dockerMachinePoolControllerName, desiredMachine); err != nil {
return errors.Wrap(err, "failed to create a new docker machine")
}

dockerMachineMap[dockerMachine.Name] = *dockerMachine
} else {
log.V(4).Info("DockerMachine already exists, nothing to do", "name", machine.Name())
dockerMachineMap[desiredMachine.Name] = *desiredMachine
}
}

Expand Down Expand Up @@ -233,30 +240,15 @@ func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Contex
return nil
}

// createDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool.
// computeDesiredDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool.
// These DockerMachines have the clusterv1.ClusterNameLabel and clusterv1.MachinePoolNameLabel to support MachinePool Machines.
func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (*infrav1.DockerMachine, error) {
log := ctrl.LoggerFrom(ctx)

labels := map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
clusterv1.MachinePoolNameLabel: format.MustFormatValue(machinePool.Name),
}
func computeDesiredDockerMachine(name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, existingDockerMachine *infrav1.DockerMachine) *infrav1.DockerMachine {
dockerMachine := &infrav1.DockerMachine{
ObjectMeta: metav1.ObjectMeta{
Namespace: dockerMachinePool.Namespace,
Name: name,
Labels: labels,
Labels: make(map[string]string),
Annotations: make(map[string]string),
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: dockerMachinePool.APIVersion,
Kind: dockerMachinePool.Kind,
Name: dockerMachinePool.Name,
UID: dockerMachinePool.UID,
},
// Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned.
},
},
Spec: infrav1.DockerMachineSpec{
CustomImage: dockerMachinePool.Spec.Template.CustomImage,
Expand All @@ -265,13 +257,22 @@ func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, n
},
}

log.V(2).Info("Creating DockerMachine", "dockerMachine", dockerMachine.Name)

if err := r.Client.Create(ctx, dockerMachine); err != nil {
return nil, errors.Wrap(err, "failed to create dockerMachine")
if existingDockerMachine != nil {
dockerMachine.SetUID(existingDockerMachine.UID)
dockerMachine.SetOwnerReferences(existingDockerMachine.OwnerReferences)
}

return dockerMachine, nil
// Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned.
dockerMachine.SetOwnerReferences(util.EnsureOwnerRef(dockerMachine.OwnerReferences, metav1.OwnerReference{
APIVersion: dockerMachinePool.APIVersion,
Kind: dockerMachinePool.Kind,
Name: dockerMachinePool.Name,
UID: dockerMachinePool.UID,
}))
dockerMachine.Labels[clusterv1.ClusterNameLabel] = cluster.Name
dockerMachine.Labels[clusterv1.MachinePoolNameLabel] = format.MustFormatValue(machinePool.Name)

return dockerMachine
}

// deleteMachinePoolMachine attempts to delete a DockerMachine and its associated owner Machine if it exists.
Expand Down

0 comments on commit 0240b58

Please sign in to comment.