Skip to content

Commit

Permalink
Add machinepool support to clusterclass
Browse files Browse the repository at this point in the history
  • Loading branch information
richardcase authored and willie-yao committed Aug 23, 2023
1 parent 67b5644 commit 8fae422
Show file tree
Hide file tree
Showing 36 changed files with 1,969 additions and 84 deletions.
13 changes: 13 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,19 @@ const (
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
TopologyReconciledMachineDeploymentsUpgradeDeferredReason = "MachineDeploymentsUpgradeDeferred"

// TopologyReconciledMachinePoolsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
TopologyReconciledMachinePoolsUpgradePendingReason = "MachinePoolsUpgradePending"

// TopologyReconciledMachinePoolsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is yet to be created.
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
TopologyReconciledMachinePoolsCreatePendingReason = "MachinePoolsCreatePending"

// TopologyReconciledMachinePoolsUpgradeDeferredReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
TopologyReconciledMachinePoolsUpgradeDeferredReason = "MachinePoolsUpgradeDeferred"

// TopologyReconciledHookBlockingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the lifecycle hooks is blocking.
TopologyReconciledHookBlockingReason = "LifecycleHookBlocking"
Expand Down
11 changes: 11 additions & 0 deletions cmd/clusterctl/client/cluster/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,17 @@ func clusterClassUsesTemplate(cc *clusterv1.ClusterClass, templateRef *corev1.Ob
}
}

for _, mpClass := range cc.Spec.Workers.MachinePools {
// Check the bootstrap ref
if equalRef(mpClass.Template.Bootstrap.Ref, templateRef) {
return true
}
// Check the infrastructure ref.
if equalRef(mpClass.Template.Infrastructure.Ref, templateRef) {
return true
}
}

return false
}

Expand Down
12 changes: 12 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
- machinepools
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
Expand Down
4 changes: 3 additions & 1 deletion controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ func (r *MachineHealthCheckReconciler) SetupWithManager(ctx context.Context, mgr

// ClusterTopologyReconciler reconciles a managed topology for a Cluster object.
type ClusterTopologyReconciler struct {
Client client.Client
Client client.Client
Tracker *remote.ClusterCacheTracker
// APIReader is used to list MachineSets directly via the API server to avoid
// race conditions caused by an outdated cache.
APIReader client.Reader
Expand All @@ -162,6 +163,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct
return (&clustertopologycontroller.Reconciler{
Client: r.Client,
APIReader: r.APIReader,
Tracker: r.Tracker,
RuntimeClient: r.RuntimeClient,
UnstructuredCachingClient: r.UnstructuredCachingClient,
WatchFilterValue: r.WatchFilterValue,
Expand Down
11 changes: 10 additions & 1 deletion internal/controllers/clusterclass/clusterclass_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,20 @@ func (r *Reconciler) reconcileExternalReferences(ctx context.Context, clusterCla
}
}

for _, mpClass := range clusterClass.Spec.Workers.MachinePools {
if mpClass.Template.Bootstrap.Ref != nil {
refs = append(refs, mpClass.Template.Bootstrap.Ref)
}
if mpClass.Template.Infrastructure.Ref != nil {
refs = append(refs, mpClass.Template.Infrastructure.Ref)
}
}

// Ensure all referenced objects are owned by the ClusterClass.
// Nb. Some external objects can be referenced multiple times in the ClusterClass,
// but we only want to set the owner reference once per unique external object.
// For example the same KubeadmConfigTemplate could be referenced in multiple MachineDeployment
// classes.
// or MachinePool classes.
errs := []error{}
reconciledRefs := sets.Set[string]{}
outdatedRefs := map[*corev1.ObjectReference]*corev1.ObjectReference{}
Expand Down
28 changes: 27 additions & 1 deletion internal/controllers/topology/cluster/blueprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
Topology: cluster.Spec.Topology,
ClusterClass: clusterClass,
MachineDeployments: map[string]*scope.MachineDeploymentBlueprint{},
MachinePools: map[string]*scope.MachinePoolBlueprint{},
}

var err error
Expand Down Expand Up @@ -82,7 +83,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
// Get the bootstrap machine template.
machineDeploymentBlueprint.BootstrapTemplate, err = r.getReference(ctx, machineDeploymentClass.Template.Bootstrap.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get bootstrap machine template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
return nil, errors.Wrapf(err, "failed to get bootstrap config template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
}

// If the machineDeploymentClass defines a MachineHealthCheck add it to the blueprint.
Expand All @@ -92,5 +93,30 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
blueprint.MachineDeployments[machineDeploymentClass.Class] = machineDeploymentBlueprint
}

// Loop over the machine pool classes in ClusterClass
// and fetch the related templates.
for _, machinePoolClass := range blueprint.ClusterClass.Spec.Workers.MachinePools {
machinePoolBlueprint := &scope.MachinePoolBlueprint{}

// Make sure to copy the metadata from the blueprint, which is later layered
// with the additional metadata defined in the Cluster's topology section
// for the MachinePool that is created or updated.
machinePoolClass.Template.Metadata.DeepCopyInto(&machinePoolBlueprint.Metadata)

// Get the InfrastructureMachinePoolTemplate.
machinePoolBlueprint.InfrastructureMachinePoolTemplate, err = r.getReference(ctx, machinePoolClass.Template.Infrastructure.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get InfrastructureMachinePoolTemplate for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
}

// Get the bootstrap config.
machinePoolBlueprint.BootstrapTemplate, err = r.getReference(ctx, machinePoolClass.Template.Bootstrap.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get bootstrap config for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
}

blueprint.MachinePools[machinePoolClass.Class] = machinePoolBlueprint
}

return blueprint, nil
}
42 changes: 40 additions & 2 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/api/v1beta1/index"
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog"
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
"sigs.k8s.io/cluster-api/feature"
Expand All @@ -57,13 +59,15 @@ import (
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinehealthchecks,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;delete

// Reconciler reconciles a managed topology for a Cluster object.
type Reconciler struct {
Client client.Client
Client client.Client
Tracker *remote.ClusterCacheTracker
// APIReader is used to list MachineSets directly via the API server to avoid
// race conditions caused by an outdated cache.
APIReader client.Reader
Expand Down Expand Up @@ -103,6 +107,12 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
// Only trigger Cluster reconciliation if the MachineDeployment is topology owned.
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
).
Watches(
&expv1.MachinePool{},
handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster),
// Only trigger Cluster reconciliation if the MachinePool is topology owned.
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
).
WithOptions(options).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
Build(r)
Expand Down Expand Up @@ -193,7 +203,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
}

// Handle normal reconciliation loop.
return r.reconcile(ctx, s)
result, err := r.reconcile(ctx, s)
if err != nil {
// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
// the current cluster because of concurrent access.
if errors.Is(err, remote.ErrClusterLocked) {
log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
return ctrl.Result{Requeue: true}, nil
}
}
return result, err
}

// reconcile handles cluster reconciliation.
Expand Down Expand Up @@ -360,6 +379,25 @@ func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Obje
}}
}

// machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
// for Cluster to update when one of its own MachinePools gets updated.
func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request {
mp, ok := o.(*expv1.MachinePool)
if !ok {
panic(fmt.Sprintf("Expected a MachinePool but got a %T", o))
}
if mp.Spec.ClusterName == "" {
return nil
}

return []ctrl.Request{{
NamespacedName: types.NamespacedName{
Namespace: mp.Namespace,
Name: mp.Spec.ClusterName,
},
}}
}

func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) {
// Call the BeforeClusterDelete hook if the 'ok-to-delete' annotation is not set
// and add the annotation to the cluster after receiving a successful non-blocking response.
Expand Down
17 changes: 17 additions & 0 deletions internal/controllers/topology/cluster/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,23 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason
case s.UpgradeTracker.MachinePools.IsAnyPendingCreate():
fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()),
)
reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason
case s.UpgradeTracker.MachinePools.DeferredUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.",
computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason
}

switch {
Expand Down
Loading

0 comments on commit 8fae422

Please sign in to comment.