From 8ae27c16d617af3f8fab433647f3c4afb71ee228 Mon Sep 17 00:00:00 2001 From: Richard Case Date: Thu, 30 Mar 2023 17:31:30 +0100 Subject: [PATCH] Add machinepool support to clusterclass --- api/v1beta1/condition_consts.go | 13 + cmd/clusterctl/client/cluster/topology.go | 11 + config/rbac/role.yaml | 12 + controllers/alias.go | 4 +- .../clusterclass/clusterclass_controller.go | 11 +- .../controllers/topology/cluster/blueprint.go | 28 +- .../topology/cluster/cluster_controller.go | 42 ++- .../topology/cluster/conditions.go | 17 + .../topology/cluster/current_state.go | 129 +++++++- .../topology/cluster/desired_state.go | 306 ++++++++++++++++++ .../topology/cluster/patches/engine.go | 116 ++++++- .../patches/inline/json_patch_generator.go | 29 ++ .../topology/cluster/patches/template.go | 26 +- .../cluster/patches/variables/variables.go | 120 ++++++- .../topology/cluster/reconcile_state.go | 272 +++++++++++++++- .../topology/cluster/scope/blueprint.go | 23 ++ .../topology/cluster/scope/scope.go | 3 + .../topology/cluster/scope/state.go | 59 ++++ .../topology/cluster/scope/upgradetracker.go | 142 ++++++++ .../topology/cluster/suite_test.go | 2 + internal/controllers/topology/cluster/util.go | 5 + internal/log/log.go | 15 + internal/topology/check/compatibility.go | 19 ++ .../variables/cluster_variable_defaulting.go | 4 +- .../variables/cluster_variable_validation.go | 4 +- internal/webhooks/cluster.go | 30 +- internal/webhooks/clusterclass.go | 69 +++- internal/webhooks/patch_validation.go | 109 +++++-- main.go | 1 + .../handlers/topologymutation/handler.go | 105 +++++- ...r.x-k8s.io_dockermachinepooltemplates.yaml | 135 ++++++++ .../docker/config/crd/kustomization.yaml | 1 + .../dockermachinepooltemplate_types.go | 64 ++++ .../exp/api/v1beta1/zz_generated.deepcopy.go | 91 ++++++ .../cluster-template-development.yaml | 4 + .../templates/clusterclass-quick-start.yaml | 37 ++- 36 files changed, 1974 insertions(+), 84 deletions(-) create mode 100644 test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepooltemplates.yaml create mode 100644 test/infrastructure/docker/exp/api/v1beta1/dockermachinepooltemplate_types.go diff --git a/api/v1beta1/condition_consts.go b/api/v1beta1/condition_consts.go index 7b1a5483b7b4..5e2bc212ecdf 100644 --- a/api/v1beta1/condition_consts.go +++ b/api/v1beta1/condition_consts.go @@ -299,6 +299,19 @@ const ( // not yet completed because the upgrade for at least one of the MachineDeployments has been deferred. TopologyReconciledMachineDeploymentsUpgradeDeferredReason = "MachineDeploymentsUpgradeDeferred" + // TopologyReconciledMachinePoolsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec. + TopologyReconciledMachinePoolsUpgradePendingReason = "MachinePoolsUpgradePending" + + // TopologyReconciledMachinePoolsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is yet to be created. + // This generally happens because new MachinePool creations are held off while the ControlPlane is not stable. + TopologyReconciledMachinePoolsCreatePendingReason = "MachinePoolsCreatePending" + + // TopologyReconciledMachinePoolsUpgradeDeferredReason (Severity=Info) documents reconciliation of a Cluster topology + // not yet completed because the upgrade for at least one of the MachinePools has been deferred. + TopologyReconciledMachinePoolsUpgradeDeferredReason = "MachinePoolsUpgradeDeferred" + // TopologyReconciledHookBlockingReason (Severity=Info) documents reconciliation of a Cluster topology // not yet completed because at least one of the lifecycle hooks is blocking. TopologyReconciledHookBlockingReason = "LifecycleHookBlocking" diff --git a/cmd/clusterctl/client/cluster/topology.go b/cmd/clusterctl/client/cluster/topology.go index 8998578e9d80..1c8180a5b959 100644 --- a/cmd/clusterctl/client/cluster/topology.go +++ b/cmd/clusterctl/client/cluster/topology.go @@ -802,6 +802,17 @@ func clusterClassUsesTemplate(cc *clusterv1.ClusterClass, templateRef *corev1.Ob } } + for _, mpClass := range cc.Spec.Workers.MachinePools { + // Check the bootstrap ref + if equalRef(mpClass.Template.Bootstrap.Ref, templateRef) { + return true + } + // Check the infrastructure ref. + if equalRef(mpClass.Template.Infrastructure.Ref, templateRef) { + return true + } + } + return false } diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index d0f5a7d10c66..c78f191ceff1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -187,6 +187,18 @@ rules: - patch - update - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinepools + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - cluster.x-k8s.io resources: diff --git a/controllers/alias.go b/controllers/alias.go index a48394c425fa..7123686e5145 100644 --- a/controllers/alias.go +++ b/controllers/alias.go @@ -143,7 +143,8 @@ func (r *MachineHealthCheckReconciler) SetupWithManager(ctx context.Context, mgr // ClusterTopologyReconciler reconciles a managed topology for a Cluster object. type ClusterTopologyReconciler struct { - Client client.Client + Client client.Client + Tracker *remote.ClusterCacheTracker // APIReader is used to list MachineSets directly via the API server to avoid // race conditions caused by an outdated cache. APIReader client.Reader @@ -162,6 +163,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct return (&clustertopologycontroller.Reconciler{ Client: r.Client, APIReader: r.APIReader, + Tracker: r.Tracker, RuntimeClient: r.RuntimeClient, UnstructuredCachingClient: r.UnstructuredCachingClient, WatchFilterValue: r.WatchFilterValue, diff --git a/internal/controllers/clusterclass/clusterclass_controller.go b/internal/controllers/clusterclass/clusterclass_controller.go index d524c247007e..d9b86f6fffbc 100644 --- a/internal/controllers/clusterclass/clusterclass_controller.go +++ b/internal/controllers/clusterclass/clusterclass_controller.go @@ -169,11 +169,20 @@ func (r *Reconciler) reconcileExternalReferences(ctx context.Context, clusterCla } } + for _, mpClass := range clusterClass.Spec.Workers.MachinePools { + if mpClass.Template.Bootstrap.Ref != nil { + refs = append(refs, mpClass.Template.Bootstrap.Ref) + } + if mpClass.Template.Infrastructure.Ref != nil { + refs = append(refs, mpClass.Template.Infrastructure.Ref) + } + } + // Ensure all referenced objects are owned by the ClusterClass. // Nb. Some external objects can be referenced multiple times in the ClusterClass, // but we only want to set the owner reference once per unique external object. // For example the same KubeadmConfigTemplate could be referenced in multiple MachineDeployment - // classes. + // or MachinePool classes. errs := []error{} reconciledRefs := sets.Set[string]{} outdatedRefs := map[*corev1.ObjectReference]*corev1.ObjectReference{} diff --git a/internal/controllers/topology/cluster/blueprint.go b/internal/controllers/topology/cluster/blueprint.go index a455beb17aa4..eb5cde78a4b8 100644 --- a/internal/controllers/topology/cluster/blueprint.go +++ b/internal/controllers/topology/cluster/blueprint.go @@ -34,6 +34,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste Topology: cluster.Spec.Topology, ClusterClass: clusterClass, MachineDeployments: map[string]*scope.MachineDeploymentBlueprint{}, + MachinePools: map[string]*scope.MachinePoolBlueprint{}, } var err error @@ -82,7 +83,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste // Get the bootstrap machine template. machineDeploymentBlueprint.BootstrapTemplate, err = r.getReference(ctx, machineDeploymentClass.Template.Bootstrap.Ref) if err != nil { - return nil, errors.Wrapf(err, "failed to get bootstrap machine template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class) + return nil, errors.Wrapf(err, "failed to get bootstrap config template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class) } // If the machineDeploymentClass defines a MachineHealthCheck add it to the blueprint. @@ -92,5 +93,30 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste blueprint.MachineDeployments[machineDeploymentClass.Class] = machineDeploymentBlueprint } + // Loop over the machine pool classes in ClusterClass + // and fetch the related templates. + for _, machinePoolClass := range blueprint.ClusterClass.Spec.Workers.MachinePools { + machinePoolBlueprint := &scope.MachinePoolBlueprint{} + + // Make sure to copy the metadata from the blueprint, which is later layered + // with the additional metadata defined in the Cluster's topology section + // for the MachinePool that is created or updated. + machinePoolClass.Template.Metadata.DeepCopyInto(&machinePoolBlueprint.Metadata) + + // Get the InfrastructureMachinePoolTemplate. + machinePoolBlueprint.InfrastructureMachinePoolTemplate, err = r.getReference(ctx, machinePoolClass.Template.Infrastructure.Ref) + if err != nil { + return nil, errors.Wrapf(err, "failed to get InfrastructureMachinePoolTemplate for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class) + } + + // Get the bootstrap config. + machinePoolBlueprint.BootstrapTemplate, err = r.getReference(ctx, machinePoolClass.Template.Bootstrap.Ref) + if err != nil { + return nil, errors.Wrapf(err, "failed to get bootstrap config for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class) + } + + blueprint.MachinePools[machinePoolClass.Class] = machinePoolBlueprint + } + return blueprint, nil } diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index 64a2bb36c442..a8ec867f30db 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -36,6 +36,8 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/api/v1beta1/index" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/remote" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/feature" @@ -57,13 +59,15 @@ import ( // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinehealthchecks,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;delete // Reconciler reconciles a managed topology for a Cluster object. type Reconciler struct { - Client client.Client + Client client.Client + Tracker *remote.ClusterCacheTracker // APIReader is used to list MachineSets directly via the API server to avoid // race conditions caused by an outdated cache. APIReader client.Reader @@ -103,6 +107,12 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt // Only trigger Cluster reconciliation if the MachineDeployment is topology owned. builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))), ). + Watches( + &expv1.MachinePool{}, + handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster), + // Only trigger Cluster reconciliation if the MachinePool is topology owned. + builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))), + ). WithOptions(options). WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). Build(r) @@ -193,7 +203,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } // Handle normal reconciliation loop. - return r.reconcile(ctx, s) + result, err := r.reconcile(ctx, s) + if err != nil { + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + if errors.Is(err, remote.ErrClusterLocked) { + log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + } + return result, err } // reconcile handles cluster reconciliation. @@ -360,6 +379,25 @@ func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Obje }} } +// machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation +// for Cluster to update when one of its own MachinePools gets updated. +func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request { + mp, ok := o.(*expv1.MachinePool) + if !ok { + panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) + } + if mp.Spec.ClusterName == "" { + return nil + } + + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: mp.Namespace, + Name: mp.Spec.ClusterName, + }, + }} +} + func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { // Call the BeforeClusterDelete hook if the 'ok-to-delete' annotation is not set // and add the annotation to the cluster after receiving a successful non-blocking response. diff --git a/internal/controllers/topology/cluster/conditions.go b/internal/controllers/topology/cluster/conditions.go index ed0140ed66d6..30390a5989cc 100644 --- a/internal/controllers/topology/cluster/conditions.go +++ b/internal/controllers/topology/cluster/conditions.go @@ -143,6 +143,23 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason + case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade(): + fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.", + computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()), + s.Blueprint.Topology.Version, + ) + reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason + case s.UpgradeTracker.MachinePools.IsAnyPendingCreate(): + fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.", + computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()), + ) + reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason + case s.UpgradeTracker.MachinePools.DeferredUpgrade(): + fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.", + computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()), + s.Blueprint.Topology.Version, + ) + reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason } switch { diff --git a/internal/controllers/topology/cluster/current_state.go b/internal/controllers/topology/cluster/current_state.go index 9302aa425237..46c891149413 100644 --- a/internal/controllers/topology/cluster/current_state.go +++ b/internal/controllers/topology/cluster/current_state.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/controllers/topology/cluster/scope" tlog "sigs.k8s.io/cluster-api/internal/log" @@ -63,11 +64,19 @@ func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scop // A Cluster may have zero or more MachineDeployments and a Cluster is expected to have zero MachineDeployments on // first reconcile. - m, err := r.getCurrentMachineDeploymentState(ctx, s.Blueprint.MachineDeployments, currentState.Cluster) + md, err := r.getCurrentMachineDeploymentState(ctx, s.Blueprint.MachineDeployments, currentState.Cluster) if err != nil { return nil, err } - currentState.MachineDeployments = m + currentState.MachineDeployments = md + + // A Cluster may have zero or more MachinePools and a Cluster is expected to have zero MachinePools on + // first reconcile. + mp, err := r.getCurrentMachinePoolState(ctx, s.Blueprint.MachinePools, currentState.Cluster) + if err != nil { + return nil, err + } + currentState.MachinePools = mp return currentState, nil } @@ -272,6 +281,108 @@ func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, bluep return state, nil } +// getCurrentMachinePoolState queries for all MachinePools and filters them for their linked Cluster and +// whether they are managed by a ClusterClass using labels. A Cluster may have zero or more MachinePools. Zero is +// expected on first reconcile. If MachinePools are found for the Cluster their Infrastructure and Bootstrap references +// are inspected. Where these are not found the function will throw an error. +func (r *Reconciler) getCurrentMachinePoolState(ctx context.Context, blueprintMachinePools map[string]*scope.MachinePoolBlueprint, cluster *clusterv1.Cluster) (map[string]*scope.MachinePoolState, error) { + state := make(scope.MachinePoolsStateMap) + + // List all the machine pools in the current cluster and in a managed topology. + mp := &expv1.MachinePoolList{} + err := r.Client.List(ctx, mp, + client.MatchingLabels{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.ClusterTopologyOwnedLabel: "", + }, + client.InNamespace(cluster.Namespace), + ) + if err != nil { + return nil, errors.Wrap(err, "failed to read MachinePools for managed topology") + } + + // Loop over each machine pool and create the current + // state by retrieving all required references. + for i := range mp.Items { + m := &mp.Items[i] + + // Retrieve the name which is assigned in Cluster's topology + // from a well-defined label. + mpTopologyName, ok := m.ObjectMeta.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + if !ok || mpTopologyName == "" { + return nil, fmt.Errorf("failed to find label %s in %s", clusterv1.ClusterTopologyMachinePoolNameLabel, tlog.KObj{Obj: m}) + } + + // Make sure that the name of the MachinePool stays unique. + // If we've already seen a MachinePool with the same name + // this is an error, probably caused from manual modifications or a race condition. + if _, ok := state[mpTopologyName]; ok { + return nil, fmt.Errorf("duplicate %s found for label %s: %s", tlog.KObj{Obj: m}, clusterv1.ClusterTopologyMachinePoolNameLabel, mpTopologyName) + } + + // Gets the bootstrapRef. + bootstrapRef := m.Spec.Template.Spec.Bootstrap.ConfigRef + if bootstrapRef == nil { + return nil, fmt.Errorf("%s does not have a reference to a Bootstrap Config", tlog.KObj{Obj: m}) + } + // Gets the infraRef. + infraRef := &m.Spec.Template.Spec.InfrastructureRef + if infraRef.Name == "" { + return nil, fmt.Errorf("%s does not have a reference to a InfrastructureMachineTemplate", tlog.KObj{Obj: m}) + } + + // If the mpTopology exists in the Cluster, lookup the corresponding mpBluePrint and align + // the apiVersions in the bootstrapRef and infraRef. + // If the mpTopology doesn't exist, do nothing (this can happen if the mpTopology was deleted). + // **Note** We can't check if the MachinePool has a DeletionTimestamp, because at this point it could not be set yet. + if mpTopologyExistsInCluster, mpClassName := getMPClassName(cluster, mpTopologyName); mpTopologyExistsInCluster { + mpBluePrint, ok := blueprintMachinePools[mpClassName] + if !ok { + return nil, fmt.Errorf("failed to find MachinePool class %s in ClusterClass", mpClassName) + } + bootstrapRef, err = alignRefAPIVersion(mpBluePrint.BootstrapTemplate, bootstrapRef) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) + } + infraRef, err = alignRefAPIVersion(mpBluePrint.InfrastructureMachinePoolTemplate, infraRef) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) + } + } + + // Get the BootstrapObject + bootstrapObject, err := r.getReference(ctx, bootstrapRef) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) + } + // check that the referenced object has the ClusterTopologyOwnedLabel label. + // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not + // owned by the topology. + if !labels.IsTopologyOwned(bootstrapObject) { + return nil, fmt.Errorf("bootstrap object %s referenced from MP %s is not topology owned", tlog.KObj{Obj: bootstrapObject}, tlog.KObj{Obj: m}) + } + + // Get the InfraMachinePoolObject. + infraMachinePoolObject, err := r.getReference(ctx, infraRef) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) + } + // check that the referenced object has the ClusterTopologyOwnedLabel label. + // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not + // owned by the topology. + if !labels.IsTopologyOwned(infraMachinePoolObject) { + return nil, fmt.Errorf("InfrastructureMachinePool object %s referenced from MP %s is not topology owned", tlog.KObj{Obj: infraMachinePoolObject}, tlog.KObj{Obj: m}) + } + + state[mpTopologyName] = &scope.MachinePoolState{ + Object: m, + BootstrapObject: bootstrapObject, + InfrastructureMachinePoolObject: infraMachinePoolObject, + } + } + return state, nil +} + // alignRefAPIVersion returns an aligned copy of the currentRef so it matches the apiVersion in ClusterClass. // This is required so the topology controller can diff current and desired state objects of the same // version during reconcile. @@ -312,3 +423,17 @@ func getMDClassName(cluster *clusterv1.Cluster, mdTopologyName string) (bool, st } return false, "" } + +// getMPClassName retrieves the MPClass name by looking up the MPTopology in the Cluster. +func getMPClassName(cluster *clusterv1.Cluster, mpTopologyName string) (bool, string) { + if cluster.Spec.Topology.Workers == nil { + return false, "" + } + + for _, mpTopology := range cluster.Spec.Topology.Workers.MachinePools { + if mpTopology.Name == mpTopologyName { + return true, mpTopology.Class + } + } + return false, "" +} diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index 08eb7aded783..fe8a7df64fb6 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -31,6 +31,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/feature" @@ -75,6 +76,24 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* } s.UpgradeTracker.MachineDeployments.MarkUpgrading(mdUpgradingNames...) + // Mark all the MachinePools that are currently upgrading. + // This captured information is used for: + // - Building the TopologyReconciled condition. + // - Make upgrade decisions on the control plane. + // - Making upgrade decisions on machine pools. + if len(s.Current.MachinePools) > 0 { + client, err := r.Tracker.GetClient(ctx, client.ObjectKeyFromObject(s.Current.Cluster)) + if err != nil { + return nil, err + } + // Mark all the MachinePools that are currently upgrading. + mpUpgradingNames, err := s.Current.MachinePools.Upgrading(ctx, client) + if err != nil { + return nil, errors.Wrap(err, "failed to check if any MachinePool is upgrading") + } + s.UpgradeTracker.MachinePools.MarkUpgrading(mpUpgradingNames...) + } + // Compute the desired state of the ControlPlane object, eventually adding a reference to the // InfrastructureMachineTemplate generated by the previous step. if desiredState.ControlPlane.Object, err = r.computeControlPlane(ctx, s, desiredState.ControlPlane.InfrastructureMachineTemplate); err != nil { @@ -108,6 +127,15 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* } } + // If required, compute the desired state of the MachinePools from the list of MachinePoolTopologies + // defined in the cluster. + if s.Blueprint.HasMachinePools() { + desiredState.MachinePools, err = r.computeMachinePools(ctx, s) + if err != nil { + return nil, errors.Wrapf(err, "failed to compute MachinePools") + } + } + // Apply patches the desired state according to the patches from the ClusterClass, variables from the Cluster // and builtin variables. // NOTE: We have to make sure all spec fields that were explicitly set in desired objects during the computation above @@ -873,6 +901,284 @@ func isMachineDeploymentDeferred(clusterTopology *clusterv1.Topology, mdTopology return false } +// computeMachinePools computes the desired state of the list of MachinePools. +func (r *Reconciler) computeMachinePools(ctx context.Context, s *scope.Scope) (scope.MachinePoolsStateMap, error) { + machinePoolsStateMap := make(scope.MachinePoolsStateMap) + for _, mpTopology := range s.Blueprint.Topology.Workers.MachinePools { + desiredMachinePool, err := computeMachinePool(ctx, s, mpTopology) + if err != nil { + return nil, errors.Wrapf(err, "failed to compute MachinePool for topology %q", mpTopology.Name) + } + machinePoolsStateMap[mpTopology.Name] = desiredMachinePool + } + return machinePoolsStateMap, nil +} + +// computeMachinePool computes the desired state for a MachinePoolTopology. +// The generated machinePool object is calculated using the values from the machinePoolTopology and +// the machinePool class. +func computeMachinePool(_ context.Context, s *scope.Scope, machinePoolTopology clusterv1.MachinePoolTopology) (*scope.MachinePoolState, error) { + desiredMachinePool := &scope.MachinePoolState{} + + // Gets the blueprint for the MachinePool class. + className := machinePoolTopology.Class + machinePoolBlueprint, ok := s.Blueprint.MachinePools[className] + if !ok { + return nil, errors.Errorf("MachinePool class %s not found in %s", className, tlog.KObj{Obj: s.Blueprint.ClusterClass}) + } + + var machinePoolClass *clusterv1.MachinePoolClass + for _, mpClass := range s.Blueprint.ClusterClass.Spec.Workers.MachinePools { + mpClass := mpClass + if mpClass.Class == className { + machinePoolClass = &mpClass + break + } + } + if machinePoolClass == nil { + return nil, errors.Errorf("MachinePool class %s not found in %s", className, tlog.KObj{Obj: s.Blueprint.ClusterClass}) + } + + // Compute the bootstrap config. + currentMachinePool := s.Current.MachinePools[machinePoolTopology.Name] + var currentBootstrapConfigRef *corev1.ObjectReference + if currentMachinePool != nil && currentMachinePool.BootstrapObject != nil { + currentBootstrapConfigRef = currentMachinePool.Object.Spec.Template.Spec.Bootstrap.ConfigRef + } + var err error + desiredMachinePool.BootstrapObject, err = templateToObject(templateToInput{ + template: machinePoolBlueprint.BootstrapTemplate, + templateClonedFromRef: contract.ObjToRef(machinePoolBlueprint.BootstrapTemplate), + cluster: s.Current.Cluster, + namePrefix: bootstrapTemplateNamePrefix(s.Current.Cluster.Name, machinePoolTopology.Name), + currentObjectRef: currentBootstrapConfigRef, + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to compute bootstrap object for topology %q", machinePoolTopology.Name) + } + + bootstrapObjectLabels := desiredMachinePool.BootstrapObject.GetLabels() + if bootstrapObjectLabels == nil { + bootstrapObjectLabels = map[string]string{} + } + // Add ClusterTopologyMachinePoolLabel to the generated Bootstrap template + bootstrapObjectLabels[clusterv1.ClusterTopologyMachinePoolNameLabel] = machinePoolTopology.Name + desiredMachinePool.BootstrapObject.SetLabels(bootstrapObjectLabels) + + // Compute the Infrastructure ref. + var currentInfraMachinePoolRef *corev1.ObjectReference + if currentMachinePool != nil && currentMachinePool.InfrastructureMachinePoolObject != nil { + currentInfraMachinePoolRef = ¤tMachinePool.Object.Spec.Template.Spec.InfrastructureRef + } + desiredMachinePool.InfrastructureMachinePoolObject, err = templateToObject(templateToInput{ + template: machinePoolBlueprint.InfrastructureMachinePoolTemplate, + templateClonedFromRef: contract.ObjToRef(machinePoolBlueprint.InfrastructureMachinePoolTemplate), + cluster: s.Current.Cluster, + namePrefix: infrastructureMachinePoolNamePrefix(s.Current.Cluster.Name, machinePoolTopology.Name), + currentObjectRef: currentInfraMachinePoolRef, + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to compute infrastructure object for topology %q", machinePoolTopology.Name) + } + + infraMachinePoolObjectLabels := desiredMachinePool.InfrastructureMachinePoolObject.GetLabels() + if infraMachinePoolObjectLabels == nil { + infraMachinePoolObjectLabels = map[string]string{} + } + // Add ClusterTopologyMachinePoolLabel to the generated InfrastructureMachinePool object + infraMachinePoolObjectLabels[clusterv1.ClusterTopologyMachinePoolNameLabel] = machinePoolTopology.Name + desiredMachinePool.InfrastructureMachinePoolObject.SetLabels(infraMachinePoolObjectLabels) + version := computeMachinePoolVersion(s, machinePoolTopology, currentMachinePool) + + // Compute values that can be set both in the MachinePoolClass and in the MachinePoolTopology + failureDomains := machinePoolClass.FailureDomains + if machinePoolTopology.FailureDomains != nil { + failureDomains = machinePoolTopology.FailureDomains + } + + nodeDrainTimeout := machinePoolClass.NodeDrainTimeout + if machinePoolTopology.NodeDrainTimeout != nil { + nodeDrainTimeout = machinePoolTopology.NodeDrainTimeout + } + + nodeVolumeDetachTimeout := machinePoolClass.NodeVolumeDetachTimeout + if machinePoolTopology.NodeVolumeDetachTimeout != nil { + nodeVolumeDetachTimeout = machinePoolTopology.NodeVolumeDetachTimeout + } + + nodeDeletionTimeout := machinePoolClass.NodeDeletionTimeout + if machinePoolTopology.NodeDeletionTimeout != nil { + nodeDeletionTimeout = machinePoolTopology.NodeDeletionTimeout + } + + // Compute the MachinePool object. + desiredBootstrapConfigRef, err := calculateRefDesiredAPIVersion(currentBootstrapConfigRef, desiredMachinePool.BootstrapObject) + if err != nil { + return nil, errors.Wrap(err, "failed to calculate desired bootstrap config ref") + } + desiredInfraMachinePoolRef, err := calculateRefDesiredAPIVersion(currentInfraMachinePoolRef, desiredMachinePool.InfrastructureMachinePoolObject) + if err != nil { + return nil, errors.Wrap(err, "failed to calculate desired infrastructure machine pool ref") + } + + desiredMachinePoolObj := &expv1.MachinePool{ + TypeMeta: metav1.TypeMeta{ + Kind: expv1.GroupVersion.WithKind("MachinePool").Kind, + APIVersion: expv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-%s-", s.Current.Cluster.Name, machinePoolTopology.Name)), + Namespace: s.Current.Cluster.Namespace, + }, + Spec: expv1.MachinePoolSpec{ + ClusterName: s.Current.Cluster.Name, + FailureDomains: failureDomains, + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + ClusterName: s.Current.Cluster.Name, + Version: pointer.String(version), + Bootstrap: clusterv1.Bootstrap{ConfigRef: desiredBootstrapConfigRef}, + InfrastructureRef: *desiredInfraMachinePoolRef, + NodeDrainTimeout: nodeDrainTimeout, + NodeVolumeDetachTimeout: nodeVolumeDetachTimeout, + NodeDeletionTimeout: nodeDeletionTimeout, + }, + }, + }, + } + + // If an existing MachinePool is present, override the MachinePool generate name + // re-using the existing name (this will help in reconcile). + if currentMachinePool != nil && currentMachinePool.Object != nil { + desiredMachinePoolObj.SetName(currentMachinePool.Object.Name) + } + + // Apply annotations + machinePoolAnnotations := util.MergeMap(machinePoolTopology.Metadata.Annotations, machinePoolBlueprint.Metadata.Annotations) + // Ensure the annotations used to control the upgrade sequence are never propagated. + delete(machinePoolAnnotations, clusterv1.ClusterTopologyHoldUpgradeSequenceAnnotation) + delete(machinePoolAnnotations, clusterv1.ClusterTopologyDeferUpgradeAnnotation) + desiredMachinePoolObj.SetAnnotations(machinePoolAnnotations) + desiredMachinePoolObj.Spec.Template.Annotations = machinePoolAnnotations + + // Apply Labels + // NOTE: On top of all the labels applied to managed objects we are applying the ClusterTopologyMachinePoolLabel + // keeping track of the MachinePool name from the Topology; this will be used to identify the object in next reconcile loops. + machinePoolLabels := util.MergeMap(machinePoolTopology.Metadata.Labels, machinePoolBlueprint.Metadata.Labels) + if machinePoolLabels == nil { + machinePoolLabels = map[string]string{} + } + machinePoolLabels[clusterv1.ClusterNameLabel] = s.Current.Cluster.Name + machinePoolLabels[clusterv1.ClusterTopologyOwnedLabel] = "" + machinePoolLabels[clusterv1.ClusterTopologyMachinePoolNameLabel] = machinePoolTopology.Name + desiredMachinePoolObj.SetLabels(machinePoolLabels) + + // Also set the labels in .spec.template.labels so that they are propagated to + // MachineSet.labels and MachineSet.spec.template.labels and thus to Machine.labels. + // Note: the labels in MachineSet are used to properly cleanup templates when the MachineSet is deleted. + desiredMachinePoolObj.Spec.Template.Labels = machinePoolLabels + + // Set the desired replicas. + desiredMachinePoolObj.Spec.Replicas = machinePoolTopology.Replicas + + desiredMachinePool.Object = desiredMachinePoolObj + + return desiredMachinePool, nil +} + +// computeMachinePoolVersion calculates the version of the desired machine pool. +// The version is calculated using the state of the current machine pools, +// the current control plane and the version defined in the topology. +func computeMachinePoolVersion(s *scope.Scope, machinePoolTopology clusterv1.MachinePoolTopology, currentMPState *scope.MachinePoolState) string { + desiredVersion := s.Blueprint.Topology.Version + // If creating a new machine pool, mark it as pending if the control plane is not + // yet stable. Creating a new MP while the control plane is upgrading can lead to unexpected race conditions. + // Example: join could fail if the load balancers are slow in detecting when CP machines are + // being deleted. + if currentMPState == nil || currentMPState.Object == nil { + if !isControlPlaneStable(s) || s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { + s.UpgradeTracker.MachinePools.MarkPendingCreate(machinePoolTopology.Name) + } + return desiredVersion + } + + // Get the current version of the machine pool. + currentVersion := *currentMPState.Object.Spec.Template.Spec.Version + + // Return early if the currentVersion is already equal to the desiredVersion + // no further checks required. + if currentVersion == desiredVersion { + return currentVersion + } + + // Return early if the upgrade for the MachinePool is deferred. + if isMachinePoolDeferred(s.Blueprint.Topology, machinePoolTopology) { + s.UpgradeTracker.MachinePools.MarkDeferredUpgrade(currentMPState.Object.Name) + s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) + return currentVersion + } + + // Return early if the AfterControlPlaneUpgrade hook returns a blocking response. + if s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { + s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) + return currentVersion + } + + // Return early if the upgrade concurrency is reached. + if s.UpgradeTracker.MachinePools.UpgradeConcurrencyReached() { + s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) + return currentVersion + } + + // Return early if the Control Plane is not stable. Do not pick up the desiredVersion yet. + // Return the current version of the machine pool. We will pick up the new version after the control + // plane is stable. + if !isControlPlaneStable(s) { + s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) + return currentVersion + } + + // Control plane and machine pools are stable. + // Ready to pick up the topology version. + s.UpgradeTracker.MachinePools.MarkUpgrading(currentMPState.Object.Name) + return desiredVersion +} + +// isMachinePoolDeferred returns true if the upgrade for the mpTopology is deferred. +// This is the case when either: +// - the mpTopology has the ClusterTopologyDeferUpgradeAnnotation annotation. +// - the mpTopology has the ClusterTopologyHoldUpgradeSequenceAnnotation annotation. +// - another mp topology which is before mpTopology in the workers.machinePools list has the +// ClusterTopologyHoldUpgradeSequenceAnnotation annotation. +func isMachinePoolDeferred(clusterTopology *clusterv1.Topology, mpTopology clusterv1.MachinePoolTopology) bool { + // If mpTopology has the ClusterTopologyDeferUpgradeAnnotation annotation => mp is deferred. + if _, ok := mpTopology.Metadata.Annotations[clusterv1.ClusterTopologyDeferUpgradeAnnotation]; ok { + return true + } + + // If mpTopology has the ClusterTopologyHoldUpgradeSequenceAnnotation annotation => mp is deferred. + if _, ok := mpTopology.Metadata.Annotations[clusterv1.ClusterTopologyHoldUpgradeSequenceAnnotation]; ok { + return true + } + + for _, mp := range clusterTopology.Workers.MachinePools { + // If another mp topology with the ClusterTopologyHoldUpgradeSequenceAnnotation annotation + // is found before the mpTopology => mp is deferred. + if _, ok := mp.Metadata.Annotations[clusterv1.ClusterTopologyHoldUpgradeSequenceAnnotation]; ok { + return true + } + + // If mpTopology is found before a mp topology with the ClusterTopologyHoldUpgradeSequenceAnnotation + // annotation => mp is not deferred. + if mp.Name == mpTopology.Name { + return false + } + } + + // This case should be impossible as mpTopology should have been found in workers.machinePools. + return false +} + type templateToInput struct { template *unstructured.Unstructured templateClonedFromRef *corev1.ObjectReference diff --git a/internal/controllers/topology/cluster/patches/engine.go b/internal/controllers/topology/cluster/patches/engine.go index aa0f866999a0..54bfe9cb0b55 100644 --- a/internal/controllers/topology/cluster/patches/engine.go +++ b/internal/controllers/topology/cluster/patches/engine.go @@ -29,6 +29,7 @@ import ( "k8s.io/klog/v2" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/internal/contract" @@ -173,6 +174,10 @@ func addVariablesForPatch(blueprint *scope.ClusterBlueprint, desired *scope.Clus for _, md := range desired.MachineDeployments { mdStateIndex[md.Object.Name] = md } + mpStateIndex := map[string]*scope.MachinePoolState{} + for _, mp := range desired.MachinePools { + mpStateIndex[mp.Object.Name] = mp + } for i, item := range req.Items { // If the item is a Control Plane add the Control Plane variables. if item.HolderReference.FieldPath == "spec.controlPlaneRef" { @@ -201,6 +206,22 @@ func addVariablesForPatch(blueprint *scope.ClusterBlueprint, desired *scope.Clus return errors.Wrapf(err, "failed to calculate variables for %s", klog.KObj(md.Object)) } item.Variables = mdVariables + } else if item.HolderReference.Kind == "MachinePool" { + mp, ok := mpStateIndex[item.HolderReference.Name] + if !ok { + return errors.Errorf("could not find desired state for MachinePool %s", klog.KObj(mp.Object)) + } + mpTopology, err := getMPTopologyFromMP(blueprint, mp.Object) + if err != nil { + return err + } + + // Calculate MachinePool variables. + mpVariables, err := variables.MachinePool(mpTopology, mp.Object, mp.BootstrapObject, mp.InfrastructureMachinePoolObject, definitionFrom, patchVariableDefinitions) + if err != nil { + return errors.Wrapf(err, "failed to calculate variables for %s", klog.KObj(mp.Object)) + } + item.Variables = mpVariables } req.Items[i] = item } @@ -219,6 +240,18 @@ func getMDTopologyFromMD(blueprint *scope.ClusterBlueprint, md *clusterv1.Machin return mdTopology, nil } +func getMPTopologyFromMP(blueprint *scope.ClusterBlueprint, mp *expv1.MachinePool) (*clusterv1.MachinePoolTopology, error) { + topologyName, ok := mp.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + if !ok { + return nil, errors.Errorf("failed to get topology name for %s", klog.KObj(mp)) + } + mpTopology, err := lookupMPTopology(blueprint.Topology, topologyName) + if err != nil { + return nil, err + } + return mpTopology, nil +} + // createRequest creates a GeneratePatchesRequest based on the ClusterBlueprint and the desired state. // NOTE: GenerateRequestTemplates are created for the templates of each individual MachineDeployment in the desired // state. This is necessary because some builtin variables are MachineDeployment specific. For example version and @@ -301,6 +334,46 @@ func createRequest(blueprint *scope.ClusterBlueprint, desired *scope.ClusterStat req.Items = append(req.Items, *t) } + // Add BootstrapConfigTemplate and InfrastructureMachinePoolTemplate for all MachinePoolTopologies + // in the Cluster. + // NOTE: We intentionally iterate over MachinePool in the Cluster instead of over + // MachinePoolClasses in the ClusterClass because each MachinePool in a topology + // has its own state, e.g. version or replicas. This state is used to calculate builtin variables, + // which can then be used e.g. to compute the machine image for a specific Kubernetes version. + for mpTopologyName, mp := range desired.MachinePools { + // Lookup MachinePoolTopology definition from cluster.spec.topology. + mpTopology, err := lookupMPTopology(blueprint.Topology, mpTopologyName) + if err != nil { + return nil, err + } + + // Get corresponding MachinePoolClass from the ClusterClass. + mpClass, ok := blueprint.MachinePools[mpTopology.Class] + if !ok { + return nil, errors.Errorf("failed to lookup MachinePool class %q in ClusterClass", mpTopology.Class) + } + + // Add the BootstrapTemplate. + t, err := newRequestItemBuilder(mpClass.BootstrapTemplate). + WithHolder(mp.Object, "spec.template.spec.bootstrap.configRef"). + Build() + if err != nil { + return nil, errors.Wrapf(err, "failed to prepare BootstrapConfig template %s for MachinePool topology %s for patching", + tlog.KObj{Obj: mpClass.BootstrapTemplate}, mpTopologyName) + } + req.Items = append(req.Items, *t) + + // Add the InfrastructureMachineTemplate. + t, err = newRequestItemBuilder(mpClass.InfrastructureMachinePoolTemplate). + WithHolder(mp.Object, "spec.template.spec.infrastructureRef"). + Build() + if err != nil { + return nil, errors.Wrapf(err, "failed to prepare InfrastructureMachinePoolTemplate %s for MachinePool topology %s for patching", + tlog.KObj{Obj: mpClass.InfrastructureMachinePoolTemplate}, mpTopologyName) + } + req.Items = append(req.Items, *t) + } + return req, nil } @@ -314,6 +387,16 @@ func lookupMDTopology(topology *clusterv1.Topology, mdTopologyName string) (*clu return nil, errors.Errorf("failed to lookup MachineDeployment topology %q in Cluster.spec.topology.workers.machineDeployments", mdTopologyName) } +// lookupMPTopology looks up the MachinePoolTopology based on a mpTopologyName in a topology. +func lookupMPTopology(topology *clusterv1.Topology, mpTopologyName string) (*clusterv1.MachinePoolTopology, error) { + for _, mpTopology := range topology.Workers.MachinePools { + if mpTopology.Name == mpTopologyName { + return &mpTopology, nil + } + } + return nil, errors.Errorf("failed to lookup MachinePool topology %q in Cluster.spec.topology.workers.machinePools", mpTopologyName) +} + // createPatchGenerator creates a patch generator for the given patch. // NOTE: Currently only inline JSON patches are supported; in the future we will add // external patches as well. @@ -427,7 +510,7 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches var err error // Update the InfrastructureCluster. - infrastructureClusterTemplate, err := getTemplateAsUnstructured(req, "Cluster", "spec.infrastructureRef", "") + infrastructureClusterTemplate, err := getTemplateAsUnstructured(req, "Cluster", "spec.infrastructureRef", requestTopologyName{}) if err != nil { return err } @@ -436,7 +519,7 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches } // Update the ControlPlane. - controlPlaneTemplate, err := getTemplateAsUnstructured(req, "Cluster", "spec.controlPlaneRef", "") + controlPlaneTemplate, err := getTemplateAsUnstructured(req, "Cluster", "spec.controlPlaneRef", requestTopologyName{}) if err != nil { return err } @@ -455,7 +538,7 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches // If the ClusterClass mandates the ControlPlane has InfrastructureMachines, // update the InfrastructureMachineTemplate for ControlPlane machines. if blueprint.HasControlPlaneInfrastructureMachine() { - infrastructureMachineTemplate, err := getTemplateAsUnstructured(req, desired.ControlPlane.Object.GetKind(), strings.Join(contract.ControlPlane().MachineTemplate().InfrastructureRef().Path(), "."), "") + infrastructureMachineTemplate, err := getTemplateAsUnstructured(req, desired.ControlPlane.Object.GetKind(), strings.Join(contract.ControlPlane().MachineTemplate().InfrastructureRef().Path(), "."), requestTopologyName{}) if err != nil { return err } @@ -466,8 +549,9 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches // Update the templates for all MachineDeployments. for mdTopologyName, md := range desired.MachineDeployments { + topologyName := requestTopologyName{mdTopologyName: mdTopologyName} // Update the BootstrapConfigTemplate. - bootstrapTemplate, err := getTemplateAsUnstructured(req, "MachineDeployment", "spec.template.spec.bootstrap.configRef", mdTopologyName) + bootstrapTemplate, err := getTemplateAsUnstructured(req, "MachineDeployment", "spec.template.spec.bootstrap.configRef", topologyName) if err != nil { return err } @@ -476,7 +560,7 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches } // Update the InfrastructureMachineTemplate. - infrastructureMachineTemplate, err := getTemplateAsUnstructured(req, "MachineDeployment", "spec.template.spec.infrastructureRef", mdTopologyName) + infrastructureMachineTemplate, err := getTemplateAsUnstructured(req, "MachineDeployment", "spec.template.spec.infrastructureRef", topologyName) if err != nil { return err } @@ -485,6 +569,28 @@ func updateDesiredState(ctx context.Context, req *runtimehooksv1.GeneratePatches } } + // Update the templates for all MachinePools. + for mpTopologyName, mp := range desired.MachinePools { + topologyName := requestTopologyName{mpTopologyName: mpTopologyName} + // Update the BootstrapConfig. + bootstrapTemplate, err := getTemplateAsUnstructured(req, "MachinePool", "spec.template.spec.bootstrap.configRef", topologyName) + if err != nil { + return err + } + if err := patchObject(ctx, mp.BootstrapObject, bootstrapTemplate); err != nil { + return err + } + + // Update the InfrastructureMachinePool. + infrastructureMachinePoolTemplate, err := getTemplateAsUnstructured(req, "MachinePool", "spec.template.spec.infrastructureRef", topologyName) + if err != nil { + return err + } + if err := patchObject(ctx, mp.InfrastructureMachinePoolObject, infrastructureMachinePoolTemplate); err != nil { + return err + } + } + return nil } diff --git a/internal/controllers/topology/cluster/patches/inline/json_patch_generator.go b/internal/controllers/topology/cluster/patches/inline/json_patch_generator.go index bcbf80cd9c06..1c80acb634a7 100644 --- a/internal/controllers/topology/cluster/patches/inline/json_patch_generator.go +++ b/internal/controllers/topology/cluster/patches/inline/json_patch_generator.go @@ -186,6 +186,35 @@ func matchesSelector(req *runtimehooksv1.GeneratePatchesRequestItem, templateVar } } + // Check if the request is for a BootstrapConfigTemplate or an InfrastructureMachinePoolTemplate + // of one of the configured MachinePoolClasses. + if selector.MatchResources.MachinePoolClass != nil { + if req.HolderReference.Kind == "MachinePool" && + (req.HolderReference.FieldPath == "spec.template.spec.bootstrap.configRef" || + req.HolderReference.FieldPath == "spec.template.spec.infrastructureRef") { + // Read the builtin.machinePool.class variable. + templateMPClassJSON, err := patchvariables.GetVariableValue(templateVariables, "builtin.machinePool.class") + + // If the builtin variable could be read. + if err == nil { + // If templateMPClass matches one of the configured MachinePoolClasses. + for _, mpClass := range selector.MatchResources.MachinePoolClass.Names { + // We have to quote mpClass as templateMPClassJSON is a JSON string (e.g. "default-worker"). + if mpClass == "*" || string(templateMPClassJSON.Raw) == strconv.Quote(mpClass) { + return true + } + unquoted, _ := strconv.Unquote(string(templateMPClassJSON.Raw)) + if strings.HasPrefix(mpClass, "*") && strings.HasSuffix(unquoted, strings.TrimPrefix(mpClass, "*")) { + return true + } + if strings.HasSuffix(mpClass, "*") && strings.HasPrefix(unquoted, strings.TrimSuffix(mpClass, "*")) { + return true + } + } + } + } + } + return false } diff --git a/internal/controllers/topology/cluster/patches/template.go b/internal/controllers/topology/cluster/patches/template.go index b156145c3010..892058b7b054 100644 --- a/internal/controllers/topology/cluster/patches/template.go +++ b/internal/controllers/topology/cluster/patches/template.go @@ -42,6 +42,12 @@ type requestItemBuilder struct { holder runtimehooksv1.HolderReference } +// requestTopologyName is used to specify the topology name to match in a GeneratePatchesRequest. +type requestTopologyName struct { + mdTopologyName string + mpTopologyName string +} + // newRequestItemBuilder returns a new requestItemBuilder. func newRequestItemBuilder(template *unstructured.Unstructured) *requestItemBuilder { return &requestItemBuilder{ @@ -86,12 +92,12 @@ func (t *requestItemBuilder) Build() (*runtimehooksv1.GeneratePatchesRequestItem // getTemplateAsUnstructured is a utility func that returns a template matching the holderKind, holderFieldPath // and mdTopologyName from a GeneratePatchesRequest. -func getTemplateAsUnstructured(req *runtimehooksv1.GeneratePatchesRequest, holderKind, holderFieldPath, mdTopologyName string) (*unstructured.Unstructured, error) { +func getTemplateAsUnstructured(req *runtimehooksv1.GeneratePatchesRequest, holderKind, holderFieldPath string, topologyNames requestTopologyName) (*unstructured.Unstructured, error) { // Find the requestItem. - requestItem := getRequestItem(req, holderKind, holderFieldPath, mdTopologyName) + requestItem := getRequestItem(req, holderKind, holderFieldPath, topologyNames) if requestItem == nil { - return nil, errors.Errorf("failed to get request item with holder kind %q, holder field path %q and MD topology name %q", holderKind, holderFieldPath, mdTopologyName) + return nil, errors.Errorf("failed to get request item with holder kind %q, holder field path %q, MD topology name %q, and MP topology name %q", holderKind, holderFieldPath, topologyNames.mdTopologyName, topologyNames.mpTopologyName) } // Unmarshal the template. @@ -114,7 +120,7 @@ func getRequestItemByUID(req *runtimehooksv1.GeneratePatchesRequest, uid types.U } // getRequestItem is a utility func that returns a template matching the holderKind, holderFiledPath and mdTopologyName from a GeneratePatchesRequest. -func getRequestItem(req *runtimehooksv1.GeneratePatchesRequest, holderKind, holderFieldPath, mdTopologyName string) *runtimehooksv1.GeneratePatchesRequestItem { +func getRequestItem(req *runtimehooksv1.GeneratePatchesRequest, holderKind, holderFieldPath string, topologyNames requestTopologyName) *runtimehooksv1.GeneratePatchesRequestItem { for _, template := range req.Items { if holderKind != "" && template.HolderReference.Kind != holderKind { continue @@ -122,11 +128,17 @@ func getRequestItem(req *runtimehooksv1.GeneratePatchesRequest, holderKind, hold if holderFieldPath != "" && template.HolderReference.FieldPath != holderFieldPath { continue } - if mdTopologyName != "" { + if topologyNames.mdTopologyName != "" { templateVariables := toMap(template.Variables) - v, err := variables.GetVariableValue(templateVariables, "builtin.machineDeployment.topologyName") - if err != nil || string(v.Raw) != strconv.Quote(mdTopologyName) { + if err != nil || string(v.Raw) != strconv.Quote(topologyNames.mdTopologyName) { + continue + } + } + if topologyNames.mpTopologyName != "" { + templateVariables := toMap(template.Variables) + v, err := variables.GetVariableValue(templateVariables, "builtin.machinePool.topologyName") + if err != nil || string(v.Raw) != strconv.Quote(topologyNames.mpTopologyName) { continue } } diff --git a/internal/controllers/topology/cluster/patches/variables/variables.go b/internal/controllers/topology/cluster/patches/variables/variables.go index 27d98167098c..6fc7ff8f1457 100644 --- a/internal/controllers/topology/cluster/patches/variables/variables.go +++ b/internal/controllers/topology/cluster/patches/variables/variables.go @@ -26,6 +26,7 @@ import ( "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/internal/contract" ) @@ -42,6 +43,7 @@ type Builtins struct { Cluster *ClusterBuiltins `json:"cluster,omitempty"` ControlPlane *ControlPlaneBuiltins `json:"controlPlane,omitempty"` MachineDeployment *MachineDeploymentBuiltins `json:"machineDeployment,omitempty"` + MachinePool *MachinePoolBuiltins `json:"machinePool,omitempty"` } // ClusterBuiltins represents builtin cluster variables. @@ -144,29 +146,62 @@ type MachineDeploymentBuiltins struct { Replicas *int64 `json:"replicas,omitempty"` // Bootstrap is the value of the .spec.template.spec.bootstrap field of the MachineDeployment. - Bootstrap *MachineDeploymentBootstrapBuiltins `json:"bootstrap,omitempty"` + Bootstrap *MachineBootstrapBuiltins `json:"bootstrap,omitempty"` - // InfrastructureRef is the value of the .spec.template.spec.bootstrap field of the MachineDeployment. - InfrastructureRef *MachineDeploymentInfrastructureRefBuiltins `json:"infrastructureRef,omitempty"` + // InfrastructureRef is the value of the .spec.template.spec.infrastructureRef field of the MachineDeployment. + InfrastructureRef *MachineInfrastructureRefBuiltins `json:"infrastructureRef,omitempty"` } -// MachineDeploymentBootstrapBuiltins is the value of the .spec.template.spec.bootstrap field -// of the MachineDeployment. -type MachineDeploymentBootstrapBuiltins struct { +// MachinePoolBuiltins represents builtin MachinePool variables. +// NOTE: These variables are only set for templates belonging to a MachinePool. +type MachinePoolBuiltins struct { + // Version is the Kubernetes version of the MachinePool, + // to which the current template belongs to. + // NOTE: Please note that this version is the version we are currently reconciling towards. + // It can differ from the current version of the MachinePool machines while an upgrade process is + // being orchestrated. + Version string `json:"version,omitempty"` + + // Class is the class name of the MachinePool, + // to which the current template belongs to. + Class string `json:"class,omitempty"` + + // Name is the name of the MachinePool, + // to which the current template belongs to. + Name string `json:"name,omitempty"` + + // TopologyName is the topology name of the MachinePool, + // to which the current template belongs to. + TopologyName string `json:"topologyName,omitempty"` + + // Replicas is the value of the replicas field of the MachinePool, + // to which the current template belongs to. + Replicas *int64 `json:"replicas,omitempty"` + + // Bootstrap is the value of the .spec.template.spec.bootstrap field of the MachinePool. + Bootstrap *MachineBootstrapBuiltins `json:"bootstrap,omitempty"` + + // InfrastructureRef is the value of the .spec.template.spec.infrastructureRef field of the MachinePool. + InfrastructureRef *MachineInfrastructureRefBuiltins `json:"infrastructureRef,omitempty"` +} + +// MachineBootstrapBuiltins is the value of the .spec.template.spec.bootstrap field +// of the MachineDeployment or MachinePool. +type MachineBootstrapBuiltins struct { // ConfigRef is the value of the .spec.template.spec.bootstrap.configRef field of the MachineDeployment. - ConfigRef *MachineDeploymentBootstrapConfigRefBuiltins `json:"configRef,omitempty"` + ConfigRef *MachineBootstrapConfigRefBuiltins `json:"configRef,omitempty"` } -// MachineDeploymentBootstrapConfigRefBuiltins is the value of the .spec.template.spec.bootstrap.configRef -// field of the MachineDeployment. -type MachineDeploymentBootstrapConfigRefBuiltins struct { +// MachineBootstrapConfigRefBuiltins is the value of the .spec.template.spec.bootstrap.configRef +// field of the MachineDeployment or MachinePool. +type MachineBootstrapConfigRefBuiltins struct { // Name of the bootstrap.configRef. Name string `json:"name,omitempty"` } -// MachineDeploymentInfrastructureRefBuiltins is the value of the .spec.template.spec.infrastructureRef field -// of the MachineDeployment. -type MachineDeploymentInfrastructureRefBuiltins struct { +// MachineInfrastructureRefBuiltins is the value of the .spec.template.spec.infrastructureRef field +// of the MachineDeployment or MachinePool. +type MachineInfrastructureRefBuiltins struct { // Name of the infrastructureRef. Name string `json:"name,omitempty"` } @@ -305,15 +340,15 @@ func MachineDeployment(mdTopology *clusterv1.MachineDeploymentTopology, md *clus } if mdBootstrapTemplate != nil { - builtin.MachineDeployment.Bootstrap = &MachineDeploymentBootstrapBuiltins{ - ConfigRef: &MachineDeploymentBootstrapConfigRefBuiltins{ + builtin.MachineDeployment.Bootstrap = &MachineBootstrapBuiltins{ + ConfigRef: &MachineBootstrapConfigRefBuiltins{ Name: mdBootstrapTemplate.GetName(), }, } } if mdInfrastructureMachineTemplate != nil { - builtin.MachineDeployment.InfrastructureRef = &MachineDeploymentInfrastructureRefBuiltins{ + builtin.MachineDeployment.InfrastructureRef = &MachineInfrastructureRefBuiltins{ Name: mdInfrastructureMachineTemplate.GetName(), } } @@ -327,6 +362,59 @@ func MachineDeployment(mdTopology *clusterv1.MachineDeploymentTopology, md *clus return variables, nil } +// MachinePool returns variables that apply to templates belonging to a MachinePool. +func MachinePool(mpTopology *clusterv1.MachinePoolTopology, mp *expv1.MachinePool, mpBootstrapObject, mpInfrastructureMachinePool *unstructured.Unstructured, definitionFrom string, patchVariableDefinitions map[string]bool) ([]runtimehooksv1.Variable, error) { + variables := []runtimehooksv1.Variable{} + + // Add variables overrides for the MachinePool. + if mpTopology.Variables != nil { + for _, variable := range mpTopology.Variables.Overrides { + // Add the variable if it is defined for the current patch or it is defined for all the patches. + if variable.DefinitionFrom == emptyDefinitionFrom || variable.DefinitionFrom == definitionFrom { + // Add the variable if it has a definition from this patch in the ClusterClass. + if _, ok := patchVariableDefinitions[variable.Name]; ok { + variables = append(variables, runtimehooksv1.Variable{Name: variable.Name, Value: variable.Value}) + } + } + } + } + + // Construct builtin variable. + builtin := Builtins{ + MachinePool: &MachinePoolBuiltins{ + Version: *mp.Spec.Template.Spec.Version, + Class: mpTopology.Class, + Name: mp.Name, + TopologyName: mpTopology.Name, + }, + } + if mp.Spec.Replicas != nil { + builtin.MachinePool.Replicas = pointer.Int64(int64(*mp.Spec.Replicas)) + } + + if mpBootstrapObject != nil { + builtin.MachinePool.Bootstrap = &MachineBootstrapBuiltins{ + ConfigRef: &MachineBootstrapConfigRefBuiltins{ + Name: mpBootstrapObject.GetName(), + }, + } + } + + if mpInfrastructureMachinePool != nil { + builtin.MachinePool.InfrastructureRef = &MachineInfrastructureRefBuiltins{ + Name: mpInfrastructureMachinePool.GetName(), + } + } + + variable, err := toVariable(BuiltinsName, builtin) + if err != nil { + return nil, err + } + variables = append(variables, *variable) + + return variables, nil +} + // toVariable converts name and value to a variable. func toVariable(name string, value interface{}) (*runtimehooksv1.Variable, error) { marshalledValue, err := json.Marshal(value) diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index ce90b6048352..6fc58cb21ba9 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -36,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/internal/contract" @@ -89,7 +90,12 @@ func (r *Reconciler) reconcileState(ctx context.Context, s *scope.Scope) error { } // Reconcile desired state of the MachineDeployment objects. - return r.reconcileMachineDeployments(ctx, s) + if err := r.reconcileMachineDeployments(ctx, s); err != nil { + return err + } + + // Reconcile desired state of the MachinePool object and return. + return r.reconcileMachinePools(ctx, s) } // Reconcile the Cluster shim, a temporary object used a mean to collect objects/templates @@ -251,7 +257,11 @@ func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) == 0 && // Machine deployments are not upgrading or not about to upgrade !s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() && // No MachineDeployments are pending create !s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() && // No MachineDeployments are pending an upgrade - !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { // No MachineDeployments have deferred an upgrade + !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() && // No MachineDeployments have deferred an upgrade + len(s.UpgradeTracker.MachinePools.UpgradingNames()) == 0 && // Machine pools are not upgrading or not about to upgrade + !s.UpgradeTracker.MachinePools.IsAnyPendingCreate() && // No MachinePools are pending create + !s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade() && // No MachinePools are pending an upgrade + !s.UpgradeTracker.MachinePools.DeferredUpgrade() { // No MachinePools have deferred an upgrade // Everything is stable and the cluster can be considered fully upgraded. hookRequest := &runtimehooksv1.AfterClusterUpgradeRequest{ Cluster: *s.Current.Cluster, @@ -721,14 +731,244 @@ func (r *Reconciler) deleteMachineDeployment(ctx context.Context, cluster *clust return nil } -type machineDeploymentDiff struct { +// reconcileMachinePools reconciles the desired state of the MachinePool objects. +func (r *Reconciler) reconcileMachinePools(ctx context.Context, s *scope.Scope) error { + diff := calculateMachinePoolDiff(s.Current.MachinePools, s.Desired.MachinePools) + + // Create MachinePools. + if len(diff.toCreate) > 0 { + currentMPTopologyNames, err := r.getCurrentMachinePools(ctx, s) + if err != nil { + return err + } + for _, mpTopologyName := range diff.toCreate { + mp := s.Desired.MachinePools[mpTopologyName] + + // Skip the MP creation if the MP already exists. + if currentMPTopologyNames.Has(mpTopologyName) { + log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) + log.V(3).Infof(fmt.Sprintf("Skipping creation of MachinePool %s because MachinePool for topology %s already exists (only considered creation because of stale cache)", tlog.KObj{Obj: mp.Object}, mpTopologyName)) + continue + } + + if err := r.createMachinePool(ctx, s, mp); err != nil { + return err + } + } + } + + // Update MachinePools. + for _, mpTopologyName := range diff.toUpdate { + currentMP := s.Current.MachinePools[mpTopologyName] + desiredMP := s.Desired.MachinePools[mpTopologyName] + if err := r.updateMachinePool(ctx, s, currentMP, desiredMP); err != nil { + return err + } + } + + // Delete MachinePools. + for _, mpTopologyName := range diff.toDelete { + mp := s.Current.MachinePools[mpTopologyName] + if err := r.deleteMachinePool(ctx, s.Current.Cluster, mp); err != nil { + return err + } + } + + return nil +} + +// getCurrentMachinePools gets the current list of MachinePools via the APIReader. +func (r *Reconciler) getCurrentMachinePools(ctx context.Context, s *scope.Scope) (sets.Set[string], error) { + // TODO: We should consider using PartialObjectMetadataList here. Currently this doesn't work as our + // implementation for topology dryrun doesn't support PartialObjectMetadataList. + mpList := &expv1.MachinePoolList{} + err := r.APIReader.List(ctx, mpList, + client.MatchingLabels{ + clusterv1.ClusterNameLabel: s.Current.Cluster.Name, + clusterv1.ClusterTopologyOwnedLabel: "", + }, + client.InNamespace(s.Current.Cluster.Namespace), + ) + if err != nil { + return nil, errors.Wrap(err, "failed to read MachinePools for managed topology") + } + + currentMPs := sets.Set[string]{} + for _, mp := range mpList.Items { + mpTopologyName, ok := mp.ObjectMeta.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + if ok || mpTopologyName != "" { + currentMPs.Insert(mpTopologyName) + } + } + return currentMPs, nil +} + +// createMachinePool creates a MachinePool and the corresponding templates. +func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp *scope.MachinePoolState) error { + // Do not create the MachinePool if it is marked as pending create. + // This will also block MHC creation because creating the MHC without the corresponding + // MachinePool is unnecessary. + mpTopologyName, ok := mp.Object.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + if !ok || mpTopologyName == "" { + // Note: This is only an additional safety check and should not happen. The label will always be added when computing + // the desired MachinePool. + return errors.Errorf("new MachinePool is missing the %q label", clusterv1.ClusterTopologyMachinePoolNameLabel) + } + // Return early if the MachinePool is pending create. + if s.UpgradeTracker.MachinePools.IsPendingCreate(mpTopologyName) { + return nil + } + + log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) + cluster := s.Current.Cluster + infraCtx, _ := log.WithObject(mp.InfrastructureMachinePoolObject).Into(ctx) + if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ + cluster: cluster, + desired: mp.InfrastructureMachinePoolObject, + }); err != nil { + return errors.Wrapf(err, "failed to create %s", mp.Object.Kind) + } + + bootstrapCtx, _ := log.WithObject(mp.BootstrapObject).Into(ctx) + if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{ + cluster: cluster, + desired: mp.BootstrapObject, + }); err != nil { + return errors.Wrapf(err, "failed to create %s", mp.Object.Kind) + } + + log = log.WithObject(mp.Object) + log.Infof(fmt.Sprintf("Creating %s", tlog.KObj{Obj: mp.Object})) + helper, err := r.patchHelperFactory(ctx, nil, mp.Object) + if err != nil { + return createErrorWithoutObjectName(ctx, err, mp.Object) + } + if err := helper.Patch(ctx); err != nil { + return createErrorWithoutObjectName(ctx, err, mp.Object) + } + r.recorder.Eventf(cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: mp.Object}) + + // Wait until MachinePool is visible in the cache. + // Note: We have to do this because otherwise using a cached client in current state could + // miss a newly created MachinePool (because the cache might be stale). + err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + key := client.ObjectKey{Namespace: mp.Object.Namespace, Name: mp.Object.Name} + if err := r.Client.Get(ctx, key, &expv1.MachinePool{}); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, nil + }) + if err != nil { + return errors.Wrapf(err, "failed waiting for MachinePool %s to be visible in the cache after create", mp.Object.Kind) + } + + return nil +} + +// updateMachinePool updates a MachinePool. Also rotates the corresponding Templates if necessary. +func (r *Reconciler) updateMachinePool(ctx context.Context, s *scope.Scope, currentMP, desiredMP *scope.MachinePoolState) error { + log := tlog.LoggerFrom(ctx).WithMachinePool(desiredMP.Object) + + // Return early if the MachinePool is pending an upgrade. + // Do not reconcile the MachinePool yet to avoid updating the MachinePool while it is still pending a + // version upgrade. This will prevent the MachinePool from performing a double rollout. + if s.UpgradeTracker.MachinePools.IsPendingUpgrade(currentMP.Object.Name) { + return nil + } + + cluster := s.Current.Cluster + infraCtx, _ := log.WithObject(desiredMP.InfrastructureMachinePoolObject).Into(ctx) + if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ + cluster: cluster, + current: currentMP.InfrastructureMachinePoolObject, + desired: desiredMP.InfrastructureMachinePoolObject, + versionGetter: contract.ControlPlane().Version().Get, + }); err != nil { + return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMP.Object}) + } + + bootstrapCtx, _ := log.WithObject(desiredMP.BootstrapObject).Into(ctx) + if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{ + cluster: cluster, + current: currentMP.BootstrapObject, + desired: desiredMP.BootstrapObject, + versionGetter: contract.ControlPlane().Version().Get, + }); err != nil { + return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMP.Object}) + } + + // Check differences between current and desired MachinePool, and eventually patch the current object. + log = log.WithObject(desiredMP.Object) + patchHelper, err := r.patchHelperFactory(ctx, currentMP.Object, desiredMP.Object) + if err != nil { + return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: currentMP.Object}) + } + if !patchHelper.HasChanges() { + log.V(3).Infof("No changes for %s", tlog.KObj{Obj: currentMP.Object}) + return nil + } + + log.Infof("Patching %s", tlog.KObj{Obj: currentMP.Object}) + if err := patchHelper.Patch(ctx); err != nil { + return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: currentMP.Object}) + } + r.recorder.Eventf(cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: currentMP.Object}, logMachinePoolVersionChange(currentMP.Object, desiredMP.Object)) + + // Wait until MachinePool is updated in the cache. + // Note: We have to do this because otherwise using a cached client in current state could + // return a stale state of a MachinePool we just patched (because the cache might be stale). + // Note: It is good enough to check that the resource version changed. Other controllers might have updated the + // MachinePool as well, but the combination of the patch call above without a conflict and a changed resource + // version here guarantees that we see the changes of our own update. + err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + key := client.ObjectKey{Namespace: currentMP.Object.GetNamespace(), Name: currentMP.Object.GetName()} + cachedMP := &expv1.MachinePool{} + if err := r.Client.Get(ctx, key, cachedMP); err != nil { + return false, err + } + return currentMP.Object.GetResourceVersion() != cachedMP.GetResourceVersion(), nil + }) + if err != nil { + return errors.Wrapf(err, "failed waiting for MachinePool %s to be updated in the cache after patch", tlog.KObj{Obj: currentMP.Object}) + } + + // We want to call both cleanup functions even if one of them fails to clean up as much as possible. + return nil +} + +func logMachinePoolVersionChange(current, desired *expv1.MachinePool) string { + if current.Spec.Template.Spec.Version == nil || desired.Spec.Template.Spec.Version == nil { + return "" + } + + if *current.Spec.Template.Spec.Version != *desired.Spec.Template.Spec.Version { + return fmt.Sprintf(" with version change from %s to %s", *current.Spec.Template.Spec.Version, *desired.Spec.Template.Spec.Version) + } + return "" +} + +// deleteMachinePool deletes a MachinePool. +func (r *Reconciler) deleteMachinePool(ctx context.Context, cluster *clusterv1.Cluster, mp *scope.MachinePoolState) error { + log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object).WithObject(mp.Object) + log.Infof("Deleting %s", tlog.KObj{Obj: mp.Object}) + if err := r.Client.Delete(ctx, mp.Object); err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete %s", tlog.KObj{Obj: mp.Object}) + } + r.recorder.Eventf(cluster, corev1.EventTypeNormal, deleteEventReason, "Deleted %q", tlog.KObj{Obj: mp.Object}) + return nil +} + +type machineDiff struct { toCreate, toUpdate, toDelete []string } // calculateMachineDeploymentDiff compares two maps of MachineDeploymentState and calculates which // MachineDeployments should be created, updated or deleted. -func calculateMachineDeploymentDiff(current, desired map[string]*scope.MachineDeploymentState) machineDeploymentDiff { - var diff machineDeploymentDiff +func calculateMachineDeploymentDiff(current, desired map[string]*scope.MachineDeploymentState) machineDiff { + var diff machineDiff for md := range desired { if _, ok := current[md]; ok { @@ -747,6 +987,28 @@ func calculateMachineDeploymentDiff(current, desired map[string]*scope.MachineDe return diff } +// calculateMachinePoolDiff compares two maps of MachinePoolState and calculates which +// MachinePools should be created, updated or deleted. +func calculateMachinePoolDiff(current, desired map[string]*scope.MachinePoolState) machineDiff { + var diff machineDiff + + for mp := range desired { + if _, ok := current[mp]; ok { + diff.toUpdate = append(diff.toUpdate, mp) + } else { + diff.toCreate = append(diff.toCreate, mp) + } + } + + for mp := range current { + if _, ok := desired[mp]; !ok { + diff.toDelete = append(diff.toDelete, mp) + } + } + + return diff +} + type unstructuredVersionGetter func(obj *unstructured.Unstructured) (*string, error) type reconcileReferencedObjectInput struct { diff --git a/internal/controllers/topology/cluster/scope/blueprint.go b/internal/controllers/topology/cluster/scope/blueprint.go index 54a9f71750cb..1f93ba84258e 100644 --- a/internal/controllers/topology/cluster/scope/blueprint.go +++ b/internal/controllers/topology/cluster/scope/blueprint.go @@ -39,6 +39,9 @@ type ClusterBlueprint struct { // MachineDeployments holds the MachineDeploymentBlueprints derived from ClusterClass. MachineDeployments map[string]*MachineDeploymentBlueprint + + // MachinePools holds the MachinePoolBlueprints derived from ClusterClass. + MachinePools map[string]*MachinePoolBlueprint } // ControlPlaneBlueprint holds the templates required for computing the desired state of a managed control plane. @@ -73,6 +76,21 @@ type MachineDeploymentBlueprint struct { MachineHealthCheck *clusterv1.MachineHealthCheckClass } +// MachinePoolBlueprint holds the templates required for computing the desired state of a managed MachinePool; +// it also holds a copy of the MachinePool metadata from Cluster.Topology, thus providing all the required info +// in a single place. +type MachinePoolBlueprint struct { + // Metadata holds the metadata for a MachinePool. + // NOTE: This is a convenience copy of the metadata field from Cluster.Spec.Topology.Workers.MachinePools[x]. + Metadata clusterv1.ObjectMeta + + // BootstrapTemplate holds the bootstrap template for a MachinePool referenced from ClusterClass. + BootstrapTemplate *unstructured.Unstructured + + // InfrastructureMachinePoolTemplate holds the infrastructure machine template for a MachinePool referenced from ClusterClass. + InfrastructureMachinePoolTemplate *unstructured.Unstructured +} + // HasControlPlaneInfrastructureMachine checks whether the clusterClass mandates the controlPlane has infrastructureMachines. func (b *ClusterBlueprint) HasControlPlaneInfrastructureMachine() bool { return b.ClusterClass.Spec.ControlPlane.MachineInfrastructure != nil && b.ClusterClass.Spec.ControlPlane.MachineInfrastructure.Ref != nil @@ -137,3 +155,8 @@ func (b *ClusterBlueprint) MachineDeploymentMachineHealthCheckClass(md *clusterv func (b *ClusterBlueprint) HasMachineDeployments() bool { return b.Topology.Workers != nil && len(b.Topology.Workers.MachineDeployments) > 0 } + +// HasMachinePools checks whether the topology has MachinePools. +func (b *ClusterBlueprint) HasMachinePools() bool { + return b.Topology.Workers != nil && len(b.Topology.Workers.MachinePools) > 0 +} diff --git a/internal/controllers/topology/cluster/scope/scope.go b/internal/controllers/topology/cluster/scope/scope.go index 272bfe69c27c..9ef41844d0f7 100644 --- a/internal/controllers/topology/cluster/scope/scope.go +++ b/internal/controllers/topology/cluster/scope/scope.go @@ -50,9 +50,11 @@ func New(cluster *clusterv1.Cluster) *Scope { // Determine the maximum upgrade concurrency from the annotation on the cluster. maxMDUpgradeConcurrency := 1 + maxMPUpgradeConcurrency := 1 if concurrency, ok := cluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation]; ok { // The error can be ignored because the webhook ensures that the value is a positive integer. maxMDUpgradeConcurrency, _ = strconv.Atoi(concurrency) + maxMPUpgradeConcurrency, _ = strconv.Atoi(concurrency) } return &Scope{ Blueprint: &ClusterBlueprint{}, @@ -61,6 +63,7 @@ func New(cluster *clusterv1.Cluster) *Scope { }, UpgradeTracker: NewUpgradeTracker( MaxMDUpgradeConcurrency(maxMDUpgradeConcurrency), + MaxMPUpgradeConcurrency(maxMPUpgradeConcurrency), ), HookResponseTracker: NewHookResponseTracker(), } diff --git a/internal/controllers/topology/cluster/scope/state.go b/internal/controllers/topology/cluster/scope/state.go index 9a40cee53249..7b2ca82a12a5 100644 --- a/internal/controllers/topology/cluster/scope/state.go +++ b/internal/controllers/topology/cluster/scope/state.go @@ -21,11 +21,13 @@ import ( "fmt" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" ) // ClusterState holds all the objects representing the state of a managed Cluster topology. @@ -43,6 +45,9 @@ type ClusterState struct { // MachineDeployments holds the machine deployments in the Cluster. MachineDeployments MachineDeploymentsStateMap + + // MachinePools holds the MachinePools in the Cluster. + MachinePools MachinePoolsStateMap } // ControlPlaneState holds all the objects representing the state of a managed control plane. @@ -123,3 +128,57 @@ func (md *MachineDeploymentState) IsUpgrading(ctx context.Context, c client.Clie } return false, nil } + +// MachinePoolsStateMap holds a collection of MachinePool states. +type MachinePoolsStateMap map[string]*MachinePoolState + +// Upgrading returns the list of the machine pools +// that are upgrading. +func (mps MachinePoolsStateMap) Upgrading(ctx context.Context, c client.Client) ([]string, error) { + names := []string{} + for _, mp := range mps { + upgrading, err := mp.IsUpgrading(ctx, c) + if err != nil { + return nil, errors.Wrap(err, "failed to list upgrading MachinePools") + } + if upgrading { + names = append(names, mp.Object.Name) + } + } + return names, nil +} + +// MachinePoolState holds all the objects representing the state of a managed pool. +type MachinePoolState struct { + // Object holds the MachinePool object. + Object *expv1.MachinePool + + // BootstrapObject holds the MachinePool bootstrap object. + BootstrapObject *unstructured.Unstructured + + // InfrastructureMachinePoolObject holds the infrastructure machine template referenced by the MachinePool object. + InfrastructureMachinePoolObject *unstructured.Unstructured +} + +// IsUpgrading determines if the MachinePool is upgrading. +// A machine deployment is considered upgrading if at least one of the Machines of this +// MachinePool has a different version. +func (mp *MachinePoolState) IsUpgrading(ctx context.Context, c client.Client) (bool, error) { + // If the MachinePool has no version there is no definitive way to check if it is upgrading. Therefore, return false. + // Note: This case should not happen. + if mp.Object.Spec.Template.Spec.Version == nil { + return false, nil + } + mpVersion := *mp.Object.Spec.Template.Spec.Version + // Check if the kubelet versions of the MachinePool noderefs match the MachinePool version. + for _, nodeRef := range mp.Object.Status.NodeRefs { + node := &corev1.Node{} + if err := c.Get(ctx, client.ObjectKey{Name: nodeRef.Name}, node); err != nil { + return false, fmt.Errorf("failed to check if MachinePool %s is upgrading: failed to get Node %s", mp.Object.Name, nodeRef.Name) + } + if mpVersion != node.Status.NodeInfo.KubeletVersion { + return true, nil + } + } + return false, nil +} diff --git a/internal/controllers/topology/cluster/scope/upgradetracker.go b/internal/controllers/topology/cluster/scope/upgradetracker.go index 3ac1590365a9..9f7897d284fc 100644 --- a/internal/controllers/topology/cluster/scope/upgradetracker.go +++ b/internal/controllers/topology/cluster/scope/upgradetracker.go @@ -22,6 +22,7 @@ import "k8s.io/apimachinery/pkg/util/sets" type UpgradeTracker struct { ControlPlane ControlPlaneUpgradeTracker MachineDeployments MachineDeploymentUpgradeTracker + MachinePools MachinePoolUpgradeTracker } // ControlPlaneUpgradeTracker holds the current upgrade status of the Control Plane. @@ -99,9 +100,46 @@ type MachineDeploymentUpgradeTracker struct { maxMachineDeploymentUpgradeConcurrency int } +// MachinePoolUpgradeTracker holds the current upgrade status and makes upgrade +// decisions for MachinePools. +type MachinePoolUpgradeTracker struct { + // pendingCreateTopologyNames is the set of MachinePool topology names that are newly added to the + // Cluster Topology but will not be created in the current reconcile loop. + // By marking a MachinePool topology as pendingCreate we skip creating the MachinePool. + // Nb. We use MachinePool topology names instead of MachinePool names because the new MachinePool + // names can keep changing for each reconcile loop leading to continuous updates to the TopologyReconciled condition. + pendingCreateTopologyNames sets.Set[string] + + // pendingUpgradeNames is the set of MachinePool names that are not going to pick up the new version + // in the current reconcile loop. + // By marking a MachinePool as pendingUpgrade we skip reconciling the MachinePool. + pendingUpgradeNames sets.Set[string] + + // deferredNames is the set of MachinePool names that are not going to pick up the new version + // in the current reconcile loop because they are deferred by the user. + // Note: If a MachinePool is marked as deferred it should also be marked as pendingUpgrade. + deferredNames sets.Set[string] + + // upgradingNames is the set of MachinePool names that are upgrading. This set contains the names of + // MachinePools that are currently upgrading and the names of MachinePools that will pick up the upgrade + // in the current reconcile loop. + // Note: This information is used to: + // - decide if ControlPlane can be upgraded. + // - calculate MachinePool upgrade concurrency. + // - update TopologyReconciled Condition. + // - decide if the AfterClusterUpgrade hook can be called. + upgradingNames sets.Set[string] + + // maxMachinePoolUpgradeConcurrency defines the maximum number of MachinePools that should be in an + // upgrading state. This includes the MachinePools that are currently upgrading and the MachinePools that + // will start the upgrade after the current reconcile loop. + maxMachinePoolUpgradeConcurrency int +} + // UpgradeTrackerOptions contains the options for NewUpgradeTracker. type UpgradeTrackerOptions struct { maxMDUpgradeConcurrency int + maxMPUpgradeConcurrency int } // UpgradeTrackerOption returns an option for the NewUpgradeTracker function. @@ -118,6 +156,15 @@ func (m MaxMDUpgradeConcurrency) ApplyToUpgradeTracker(options *UpgradeTrackerOp options.maxMDUpgradeConcurrency = int(m) } +// MaxMPUpgradeConcurrency sets the upper limit for the number of Machine Pools that can upgrade +// concurrently. +type MaxMPUpgradeConcurrency int + +// ApplyToUpgradeTracker applies the given UpgradeTrackerOptions. +func (m MaxMPUpgradeConcurrency) ApplyToUpgradeTracker(options *UpgradeTrackerOptions) { + options.maxMPUpgradeConcurrency = int(m) +} + // NewUpgradeTracker returns an upgrade tracker with empty tracking information. func NewUpgradeTracker(opts ...UpgradeTrackerOption) *UpgradeTracker { options := &UpgradeTrackerOptions{} @@ -128,6 +175,10 @@ func NewUpgradeTracker(opts ...UpgradeTrackerOption) *UpgradeTracker { // The concurrency should be at least 1. options.maxMDUpgradeConcurrency = 1 } + if options.maxMPUpgradeConcurrency < 1 { + // The concurrency should be at least 1. + options.maxMPUpgradeConcurrency = 1 + } return &UpgradeTracker{ MachineDeployments: MachineDeploymentUpgradeTracker{ pendingCreateTopologyNames: sets.Set[string]{}, @@ -136,6 +187,13 @@ func NewUpgradeTracker(opts ...UpgradeTrackerOption) *UpgradeTracker { upgradingNames: sets.Set[string]{}, maxMachineDeploymentUpgradeConcurrency: options.maxMDUpgradeConcurrency, }, + MachinePools: MachinePoolUpgradeTracker{ + pendingCreateTopologyNames: sets.Set[string]{}, + pendingUpgradeNames: sets.Set[string]{}, + deferredNames: sets.Set[string]{}, + upgradingNames: sets.Set[string]{}, + maxMachinePoolUpgradeConcurrency: options.maxMPUpgradeConcurrency, + }, } } @@ -222,3 +280,87 @@ func (m *MachineDeploymentUpgradeTracker) DeferredUpgradeNames() []string { func (m *MachineDeploymentUpgradeTracker) DeferredUpgrade() bool { return len(m.deferredNames) != 0 } + +// MarkUpgrading marks a MachinePool as currently upgrading or about to upgrade. +func (m *MachinePoolUpgradeTracker) MarkUpgrading(names ...string) { + for _, name := range names { + m.upgradingNames.Insert(name) + } +} + +// UpgradingNames returns the list of machine pools that are upgrading or +// are about to upgrade. +func (m *MachinePoolUpgradeTracker) UpgradingNames() []string { + return sets.List(m.upgradingNames) +} + +// UpgradeConcurrencyReached returns true if the number of MachinePools upgrading is at the concurrency limit. +func (m *MachinePoolUpgradeTracker) UpgradeConcurrencyReached() bool { + return m.upgradingNames.Len() >= m.maxMachinePoolUpgradeConcurrency +} + +// MarkPendingCreate marks a machine pool topology that is pending to be created. +// This is generally used to capture machine pools that are yet to be created +// because the control plane is not yet stable. +func (m *MachinePoolUpgradeTracker) MarkPendingCreate(mdTopologyName string) { + m.pendingCreateTopologyNames.Insert(mdTopologyName) +} + +// IsPendingCreate returns true is the MachinePool topology is marked as pending create. +func (m *MachinePoolUpgradeTracker) IsPendingCreate(mdTopologyName string) bool { + return m.pendingCreateTopologyNames.Has(mdTopologyName) +} + +// IsAnyPendingCreate returns true if any of the machine pools are pending +// to be created. Returns false, otherwise. +func (m *MachinePoolUpgradeTracker) IsAnyPendingCreate() bool { + return len(m.pendingCreateTopologyNames) != 0 +} + +// PendingCreateTopologyNames returns the list of machine pool topology names that +// are pending create. +func (m *MachinePoolUpgradeTracker) PendingCreateTopologyNames() []string { + return sets.List(m.pendingCreateTopologyNames) +} + +// MarkPendingUpgrade marks a machine pool as in need of an upgrade. +// This is generally used to capture machine pools that have not yet +// picked up the topology version. +func (m *MachinePoolUpgradeTracker) MarkPendingUpgrade(name string) { + m.pendingUpgradeNames.Insert(name) +} + +// IsPendingUpgrade returns true is the MachinePool marked as pending upgrade. +func (m *MachinePoolUpgradeTracker) IsPendingUpgrade(name string) bool { + return m.pendingUpgradeNames.Has(name) +} + +// IsAnyPendingUpgrade returns true if any of the machine pools are pending +// an upgrade. Returns false, otherwise. +func (m *MachinePoolUpgradeTracker) IsAnyPendingUpgrade() bool { + return len(m.pendingUpgradeNames) != 0 +} + +// PendingUpgradeNames returns the list of machine pool names that +// are pending an upgrade. +func (m *MachinePoolUpgradeTracker) PendingUpgradeNames() []string { + return sets.List(m.pendingUpgradeNames) +} + +// MarkDeferredUpgrade marks that the upgrade for a MachinePool +// has been deferred. +func (m *MachinePoolUpgradeTracker) MarkDeferredUpgrade(name string) { + m.deferredNames.Insert(name) +} + +// DeferredUpgradeNames returns the list of MachinePool names for +// which the upgrade has been deferred. +func (m *MachinePoolUpgradeTracker) DeferredUpgradeNames() []string { + return sets.List(m.deferredNames) +} + +// DeferredUpgrade returns true if the upgrade has been deferred for any of the +// MachinePools. Returns false, otherwise. +func (m *MachinePoolUpgradeTracker) DeferredUpgrade() bool { + return len(m.deferredNames) != 0 +} diff --git a/internal/controllers/topology/cluster/suite_test.go b/internal/controllers/topology/cluster/suite_test.go index 5befa6a1a5f8..ce29c90d006d 100644 --- a/internal/controllers/topology/cluster/suite_test.go +++ b/internal/controllers/topology/cluster/suite_test.go @@ -33,6 +33,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/api/v1beta1/index" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/controllers/clusterclass" "sigs.k8s.io/cluster-api/internal/test/envtest" ) @@ -47,6 +48,7 @@ func init() { _ = clientgoscheme.AddToScheme(fakeScheme) _ = clusterv1.AddToScheme(fakeScheme) _ = apiextensionsv1.AddToScheme(fakeScheme) + _ = expv1.AddToScheme(fakeScheme) } func TestMain(m *testing.M) { setupIndexes := func(ctx context.Context, mgr ctrl.Manager) { diff --git a/internal/controllers/topology/cluster/util.go b/internal/controllers/topology/cluster/util.go index ecbb68c91ed7..29eed8494b2d 100644 --- a/internal/controllers/topology/cluster/util.go +++ b/internal/controllers/topology/cluster/util.go @@ -37,6 +37,11 @@ func infrastructureMachineTemplateNamePrefix(clusterName, machineDeploymentTopol return fmt.Sprintf("%s-%s-", clusterName, machineDeploymentTopologyName) } +// infrastructureMachinePoolNamePrefix calculates the name prefix for a InfrastructureMachinePool. +func infrastructureMachinePoolNamePrefix(clusterName, machinePoolTopologyName string) string { + return fmt.Sprintf("%s-%s-", clusterName, machinePoolTopologyName) +} + // infrastructureMachineTemplateNamePrefix calculates the name prefix for a InfrastructureMachineTemplate. func controlPlaneInfrastructureMachineTemplateNamePrefix(clusterName string) string { return fmt.Sprintf("%s-", clusterName) diff --git a/internal/log/log.go b/internal/log/log.go index e7055e4830fc..1ba151f0c5ba 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" ) // LoggerFrom returns a logger with predefined values from a context.Context. @@ -60,6 +61,9 @@ type Logger interface { // WithMachineDeployment adds to the logger information about the MachineDeployment object being processed. WithMachineDeployment(md *clusterv1.MachineDeployment) Logger + // WithMachinePool adds to the logger information about the MachinePool object being processed. + WithMachinePool(mp *expv1.MachinePool) Logger + // WithValues adds key-value pairs of context to a logger. WithValues(keysAndValues ...interface{}) Logger @@ -123,6 +127,17 @@ func (l *topologyReconcileLogger) WithMachineDeployment(md *clusterv1.MachineDep } } +// WithMachinePool adds to the logger information about the MachinePool object being processed. +func (l *topologyReconcileLogger) WithMachinePool(mp *expv1.MachinePool) Logger { + topologyName := mp.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + return &topologyReconcileLogger{ + Logger: l.Logger.WithValues( + "MachinePool", klog.KObj(mp), + "MachinePoolTopology", topologyName, + ), + } +} + // WithValues adds key-value pairs of context to a logger. func (l *topologyReconcileLogger) WithValues(keysAndValues ...interface{}) Logger { l.Logger = l.Logger.WithValues(keysAndValues...) diff --git a/internal/topology/check/compatibility.go b/internal/topology/check/compatibility.go index 13dac608b845..f57a56fb75b0 100644 --- a/internal/topology/check/compatibility.go +++ b/internal/topology/check/compatibility.go @@ -267,6 +267,25 @@ func MachineDeploymentClassesAreUnique(clusterClass *clusterv1.ClusterClass) fie return allErrs } +// MachinePoolClassesAreUnique checks that no two MachinePoolClasses in a ClusterClass share a name. +func MachinePoolClassesAreUnique(clusterClass *clusterv1.ClusterClass) field.ErrorList { + var allErrs field.ErrorList + classes := sets.Set[string]{} + for i, class := range clusterClass.Spec.Workers.MachinePools { + if classes.Has(class.Class) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "workers", "machinePools").Index(i).Child("class"), + class.Class, + fmt.Sprintf("MachinePool class must be unique. MachinePool with class %q is defined more than once", class.Class), + ), + ) + } + classes.Insert(class.Class) + } + return allErrs +} + // MachineDeploymentTopologiesAreValidAndDefinedInClusterClass checks that each MachineDeploymentTopology name is not empty // and unique, and each class in use is defined in ClusterClass.spec.Workers.MachineDeployments. func MachineDeploymentTopologiesAreValidAndDefinedInClusterClass(desired *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { diff --git a/internal/topology/variables/cluster_variable_defaulting.go b/internal/topology/variables/cluster_variable_defaulting.go index f0393db22d81..442998f2fe40 100644 --- a/internal/topology/variables/cluster_variable_defaulting.go +++ b/internal/topology/variables/cluster_variable_defaulting.go @@ -34,8 +34,8 @@ func DefaultClusterVariables(values []clusterv1.ClusterVariable, definitions []c return defaultClusterVariables(values, definitions, true, fldPath) } -// DefaultMachineDeploymentVariables defaults MachineDeploymentVariables. -func DefaultMachineDeploymentVariables(values []clusterv1.ClusterVariable, definitions []clusterv1.ClusterClassStatusVariable, fldPath *field.Path) ([]clusterv1.ClusterVariable, field.ErrorList) { +// DefaultMachineVariables defaults MachineDeploymentVariables and MachinePoolVariables. +func DefaultMachineVariables(values []clusterv1.ClusterVariable, definitions []clusterv1.ClusterClassStatusVariable, fldPath *field.Path) ([]clusterv1.ClusterVariable, field.ErrorList) { return defaultClusterVariables(values, definitions, false, fldPath) } diff --git a/internal/topology/variables/cluster_variable_validation.go b/internal/topology/variables/cluster_variable_validation.go index b586b1b447b6..992bc600c67d 100644 --- a/internal/topology/variables/cluster_variable_validation.go +++ b/internal/topology/variables/cluster_variable_validation.go @@ -35,8 +35,8 @@ func ValidateClusterVariables(values []clusterv1.ClusterVariable, definitions [] return validateClusterVariables(values, definitions, true, fldPath) } -// ValidateMachineDeploymentVariables validates ValidateMachineDeploymentVariables. -func ValidateMachineDeploymentVariables(values []clusterv1.ClusterVariable, definitions []clusterv1.ClusterClassStatusVariable, fldPath *field.Path) field.ErrorList { +// ValidateMachineVariables validates MachineDeployment and MachinePool variables. +func ValidateMachineVariables(values []clusterv1.ClusterVariable, definitions []clusterv1.ClusterClassStatusVariable, fldPath *field.Path) field.ErrorList { return validateClusterVariables(values, definitions, false, fldPath) } diff --git a/internal/webhooks/cluster.go b/internal/webhooks/cluster.go index 61eff982db7f..b9dbfde380a5 100644 --- a/internal/webhooks/cluster.go +++ b/internal/webhooks/cluster.go @@ -506,9 +506,17 @@ func DefaultAndValidateVariables(cluster *clusterv1.Cluster, clusterClass *clust if md.Variables == nil || len(md.Variables.Overrides) == 0 { continue } - allErrs = append(allErrs, variables.ValidateMachineDeploymentVariables(md.Variables.Overrides, clusterClass.Status.Variables, + allErrs = append(allErrs, variables.ValidateMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables, field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides"))...) } + for i, mp := range cluster.Spec.Topology.Workers.MachinePools { + // Continue if there are no variable overrides. + if mp.Variables == nil || len(mp.Variables.Overrides) == 0 { + continue + } + allErrs = append(allErrs, variables.ValidateMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables, + field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides"))...) + } } return allErrs } @@ -536,7 +544,7 @@ func DefaultVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.Cluste if md.Variables == nil || len(md.Variables.Overrides) == 0 { continue } - defaultedVariables, errs := variables.DefaultMachineDeploymentVariables(md.Variables.Overrides, clusterClass.Status.Variables, + defaultedVariables, errs := variables.DefaultMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables, field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides")) if len(errs) > 0 { allErrs = append(allErrs, errs...) @@ -544,6 +552,19 @@ func DefaultVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.Cluste md.Variables.Overrides = defaultedVariables } } + for i, mp := range cluster.Spec.Topology.Workers.MachinePools { + // Continue if there are no variable overrides. + if mp.Variables == nil || len(mp.Variables.Overrides) == 0 { + continue + } + defaultedVariables, errs := variables.DefaultMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables, + field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides")) + if len(errs) > 0 { + allErrs = append(allErrs, errs...) + } else { + mp.Variables.Overrides = defaultedVariables + } + } } return allErrs } @@ -642,6 +663,11 @@ func validateTopologyMetadata(topology *clusterv1.Topology, fldPath *field.Path) fldPath.Child("workers", "machineDeployments").Index(idx).Child("metadata"), )...) } + for idx, mp := range topology.Workers.MachinePools { + allErrs = append(allErrs, mp.Metadata.Validate( + fldPath.Child("workers", "machinePools").Index(idx).Child("metadata"), + )...) + } } return allErrs } diff --git a/internal/webhooks/clusterclass.go b/internal/webhooks/clusterclass.go index ecf3bb9b7f0b..f1368c041a3a 100644 --- a/internal/webhooks/clusterclass.go +++ b/internal/webhooks/clusterclass.go @@ -77,6 +77,12 @@ func (webhook *ClusterClass) Default(_ context.Context, obj runtime.Object) erro defaultNamespace(in.Spec.Workers.MachineDeployments[i].Template.Bootstrap.Ref, in.Namespace) defaultNamespace(in.Spec.Workers.MachineDeployments[i].Template.Infrastructure.Ref, in.Namespace) } + + for i := range in.Spec.Workers.MachinePools { + defaultNamespace(in.Spec.Workers.MachinePools[i].Template.Bootstrap.Ref, in.Namespace) + defaultNamespace(in.Spec.Workers.MachinePools[i].Template.Infrastructure.Ref, in.Namespace) + } + return nil } @@ -145,6 +151,9 @@ func (webhook *ClusterClass) validate(ctx context.Context, oldClusterClass, newC // Ensure all MachineDeployment classes are unique. allErrs = append(allErrs, check.MachineDeploymentClassesAreUnique(newClusterClass)...) + // Ensure all MachinePool classes are unique. + allErrs = append(allErrs, check.MachinePoolClassesAreUnique(newClusterClass)...) + // Ensure MachineHealthChecks are valid. allErrs = append(allErrs, validateMachineHealthCheckClasses(newClusterClass)...) @@ -176,6 +185,10 @@ func (webhook *ClusterClass) validate(ctx context.Context, oldClusterClass, newC allErrs = append(allErrs, webhook.validateRemovedMachineDeploymentClassesAreNotUsed(clusters, oldClusterClass, newClusterClass)...) + // Ensure no MachinePoolClass currently in use has been removed from the ClusterClass. + allErrs = append(allErrs, + webhook.validateRemovedMachinePoolClassesAreNotUsed(clusters, oldClusterClass, newClusterClass)...) + // Ensure no MachineHealthCheck currently in use has been removed from the ClusterClass. allErrs = append(allErrs, validateUpdatesToMachineHealthCheckClasses(clusters, oldClusterClass, newClusterClass)...) @@ -256,7 +269,7 @@ func validateUpdatesToMachineHealthCheckClasses(clusters []clusterv1.Cluster, ol func (webhook *ClusterClass) validateRemovedMachineDeploymentClassesAreNotUsed(clusters []clusterv1.Cluster, oldClusterClass, newClusterClass *clusterv1.ClusterClass) field.ErrorList { var allErrs field.ErrorList - removedClasses := webhook.removedMachineClasses(oldClusterClass, newClusterClass) + removedClasses := webhook.removedMachineDeploymentClasses(oldClusterClass, newClusterClass) // If no classes have been removed return early as no further checks are needed. if len(removedClasses) == 0 { return nil @@ -277,20 +290,55 @@ func (webhook *ClusterClass) validateRemovedMachineDeploymentClassesAreNotUsed(c return allErrs } -func (webhook *ClusterClass) removedMachineClasses(oldClusterClass, newClusterClass *clusterv1.ClusterClass) sets.Set[string] { +func (webhook *ClusterClass) validateRemovedMachinePoolClassesAreNotUsed(clusters []clusterv1.Cluster, oldClusterClass, newClusterClass *clusterv1.ClusterClass) field.ErrorList { + var allErrs field.ErrorList + + removedClasses := webhook.removedMachinePoolClasses(oldClusterClass, newClusterClass) + // If no classes have been removed return early as no further checks are needed. + if len(removedClasses) == 0 { + return nil + } + // Error if any Cluster using the ClusterClass uses a MachinePoolClass that has been removed. + for _, c := range clusters { + for _, machinePoolTopology := range c.Spec.Topology.Workers.MachinePools { + if removedClasses.Has(machinePoolTopology.Class) { + // TODO(killianmuldoon): Same as above for MachineDeployments + allErrs = append(allErrs, field.Forbidden(field.NewPath("spec", "workers", "machinePools"), + fmt.Sprintf("MachinePoolClass %q cannot be deleted because it is used by Cluster %q", + machinePoolTopology.Class, c.Name), + )) + } + } + } + return allErrs +} + +func (webhook *ClusterClass) removedMachineDeploymentClasses(oldClusterClass, newClusterClass *clusterv1.ClusterClass) sets.Set[string] { removedClasses := sets.Set[string]{} - classes := webhook.classNamesFromWorkerClass(newClusterClass.Spec.Workers) + mdClasses := webhook.classNamesFromMDWorkerClass(newClusterClass.Spec.Workers) for _, oldClass := range oldClusterClass.Spec.Workers.MachineDeployments { - if !classes.Has(oldClass.Class) { + if !mdClasses.Has(oldClass.Class) { removedClasses.Insert(oldClass.Class) } } return removedClasses } -// classNamesFromWorkerClass returns the set of MachineDeployment class names. -func (webhook *ClusterClass) classNamesFromWorkerClass(w clusterv1.WorkersClass) sets.Set[string] { +func (webhook *ClusterClass) removedMachinePoolClasses(oldClusterClass, newClusterClass *clusterv1.ClusterClass) sets.Set[string] { + removedClasses := sets.Set[string]{} + + mpClasses := webhook.classNamesFromMPWorkerClass(newClusterClass.Spec.Workers) + for _, oldClass := range oldClusterClass.Spec.Workers.MachinePools { + if !mpClasses.Has(oldClass.Class) { + removedClasses.Insert(oldClass.Class) + } + } + return removedClasses +} + +// classNamesFromMDWorkerClass returns the set of MachineDeployment class names. +func (webhook *ClusterClass) classNamesFromMDWorkerClass(w clusterv1.WorkersClass) sets.Set[string] { classes := sets.Set[string]{} for _, class := range w.MachineDeployments { classes.Insert(class.Class) @@ -298,6 +346,15 @@ func (webhook *ClusterClass) classNamesFromWorkerClass(w clusterv1.WorkersClass) return classes } +// classNamesFromMPWorkerClass returns the set of MachinePool class names. +func (webhook *ClusterClass) classNamesFromMPWorkerClass(w clusterv1.WorkersClass) sets.Set[string] { + classes := sets.Set[string]{} + for _, class := range w.MachinePools { + classes.Insert(class.Class) + } + return classes +} + func (webhook *ClusterClass) getClustersUsingClusterClass(ctx context.Context, clusterClass *clusterv1.ClusterClass) ([]clusterv1.Cluster, error) { clusters := &clusterv1.ClusterList{} err := webhook.Client.List(ctx, clusters, diff --git a/internal/webhooks/patch_validation.go b/internal/webhooks/patch_validation.go index 8e7f98892246..5fe57c1136b7 100644 --- a/internal/webhooks/patch_validation.go +++ b/internal/webhooks/patch_validation.go @@ -167,7 +167,8 @@ func validateSelectors(selector clusterv1.PatchSelector, class *clusterv1.Cluste // Return an error if none of the possible selectors are enabled. if !(selector.MatchResources.InfrastructureCluster || selector.MatchResources.ControlPlane || - (selector.MatchResources.MachineDeploymentClass != nil && len(selector.MatchResources.MachineDeploymentClass.Names) > 0)) { + (selector.MatchResources.MachineDeploymentClass != nil && len(selector.MatchResources.MachineDeploymentClass.Names) > 0) || + (selector.MatchResources.MachinePoolClass != nil && len(selector.MatchResources.MachinePoolClass.Names) > 0)) { return append(allErrs, field.Invalid( path, @@ -207,33 +208,9 @@ func validateSelectors(selector clusterv1.PatchSelector, class *clusterv1.Cluste if selector.MatchResources.MachineDeploymentClass != nil && len(selector.MatchResources.MachineDeploymentClass.Names) > 0 { for i, name := range selector.MatchResources.MachineDeploymentClass.Names { match := false - if strings.Contains(name, "*") { - // selector can at most have a single * rune - if strings.Count(name, "*") > 1 { - allErrs = append(allErrs, field.Invalid( - path.Child("matchResources", "machineDeploymentClass", "names").Index(i), - name, - "selector can at most contain a single \"*\" rune")) - break - } - - // the * rune can appear only at the beginning, or ending of the selector. - if strings.Contains(name, "*") && !(strings.HasPrefix(name, "*") || strings.HasSuffix(name, "*")) { - // templateMDClass can only have "*" rune at the start or end of the string - allErrs = append(allErrs, field.Invalid( - path.Child("matchResources", "machineDeploymentClass", "names").Index(i), - name, - "\"*\" rune can only appear at the beginning, or ending of the selector")) - break - } - // a valid selector without "*" should comply with Kubernetes naming standards. - if validation.IsQualifiedName(strings.ReplaceAll(name, "*", "a")) != nil { - allErrs = append(allErrs, field.Invalid( - path.Child("matchResources", "machineDeploymentClass", "names").Index(i), - name, - "selector does not comply with the Kubernetes naming standards")) - break - } + err := validateSelectorName(name, path, "machineDeploymentClass", i) + if err != nil { + allErrs = append(allErrs, err) } for _, md := range class.Spec.Workers.MachineDeployments { var matches bool @@ -263,9 +240,74 @@ func validateSelectors(selector clusterv1.PatchSelector, class *clusterv1.Cluste } } + if selector.MatchResources.MachinePoolClass != nil && len(selector.MatchResources.MachinePoolClass.Names) > 0 { + for i, name := range selector.MatchResources.MachinePoolClass.Names { + match := false + err := validateSelectorName(name, path, "machinePoolClass", i) + if err != nil { + allErrs = append(allErrs, err) + } + for _, mp := range class.Spec.Workers.MachinePools { + var matches bool + if mp.Class == name || name == "*" { + matches = true + } else if strings.HasPrefix(name, "*") && strings.HasSuffix(mp.Class, strings.TrimPrefix(name, "*")) { + matches = true + } else if strings.HasSuffix(name, "*") && strings.HasPrefix(mp.Class, strings.TrimSuffix(name, "*")) { + matches = true + } + + if matches { + if selectorMatchTemplate(selector, mp.Template.Infrastructure.Ref) || + selectorMatchTemplate(selector, mp.Template.Bootstrap.Ref) { + match = true + break + } + } + } + if !match { + allErrs = append(allErrs, field.Invalid( + path.Child("matchResources", "machinePoolClass", "names").Index(i), + name, + "selector is enabled but matches neither the bootstrap ref nor the infrastructure ref of a MachinePool class", + )) + } + } + } + return allErrs } +// validateSelectorName validates if the selector name is valid. +func validateSelectorName(name string, path *field.Path, resourceName string, index int) *field.Error { + if strings.Contains(name, "*") { + // selector can at most have a single * rune + if strings.Count(name, "*") > 1 { + return field.Invalid( + path.Child("matchResources", resourceName, "names").Index(index), + name, + "selector can at most contain a single \"*\" rune") + } + + // the * rune can appear only at the beginning, or ending of the selector. + if strings.Contains(name, "*") && !(strings.HasPrefix(name, "*") || strings.HasSuffix(name, "*")) { + // templateMDClass or templateMPClass can only have "*" rune at the start or end of the string + return field.Invalid( + path.Child("matchResources", resourceName, "names").Index(index), + name, + "\"*\" rune can only appear at the beginning, or ending of the selector") + } + // a valid selector without "*" should comply with Kubernetes naming standards. + if validation.IsQualifiedName(strings.ReplaceAll(name, "*", "a")) != nil { + return field.Invalid( + path.Child("matchResources", resourceName, "names").Index(index), + name, + "selector does not comply with the Kubernetes naming standards") + } + } + return nil +} + // selectorMatchTemplate returns true if APIVersion and Kind for the given selector match the reference. func selectorMatchTemplate(selector clusterv1.PatchSelector, reference *corev1.ObjectReference) bool { if reference == nil { @@ -458,6 +500,17 @@ var builtinVariables = sets.Set[string]{}.Insert( // MachineDeployment ref builtins. "builtin.machineDeployment.bootstrap.configRef.name", "builtin.machineDeployment.infrastructureRef.name", + + // MachinePool builtins. + "builtin.machinePool", + "builtin.machinePool.class", + "builtin.machinePool.name", + "builtin.machinePool.replicas", + "builtin.machinePool.topologyName", + "builtin.machinePool.version", + // MachinePool ref builtins. + "builtin.machinePool.bootstrap.configRef.name", + "builtin.machinePool.infrastructureRef.name", ) // validateIndexAccess checks to see if the jsonPath is attempting to add an element in the array i.e. access by number diff --git a/main.go b/main.go index 60bae5354570..dcb64fb2f3b1 100644 --- a/main.go +++ b/main.go @@ -446,6 +446,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { Client: mgr.GetClient(), APIReader: mgr.GetAPIReader(), RuntimeClient: runtimeClient, + Tracker: tracker, UnstructuredCachingClient: unstructuredCachingClient, WatchFilterValue: watchFilterValue, }).SetupWithManager(ctx, mgr, concurrency(clusterTopologyConcurrency)); err != nil { diff --git a/test/extension/handlers/topologymutation/handler.go b/test/extension/handlers/topologymutation/handler.go index dc61ce92140d..a364d763713c 100644 --- a/test/extension/handlers/topologymutation/handler.go +++ b/test/extension/handlers/topologymutation/handler.go @@ -39,6 +39,7 @@ import ( runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/exp/runtime/topologymutation" infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" + infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/infrastructure/kind" "sigs.k8s.io/cluster-api/util/version" ) @@ -114,6 +115,11 @@ func (h *ExtensionHandlers) GeneratePatches(ctx context.Context, req *runtimehoo log.Error(err, "error patching DockerMachineTemplate") return errors.Wrap(err, "error patching DockerMachineTemplate") } + case *infraexpv1.DockerMachinePoolTemplate: + if err := patchDockerMachinePoolTemplate(ctx, obj, variables); err != nil { + log.Error(err, "error patching DockerMachinePoolTemplate") + return errors.Wrap(err, "error patching DockerMachinePoolTemplate") + } } return nil }) @@ -210,16 +216,21 @@ func patchKubeadmControlPlaneTemplate(ctx context.Context, kcpTemplate *controlp func patchKubeadmConfigTemplate(ctx context.Context, k *bootstrapv1.KubeadmConfigTemplate, templateVariables map[string]apiextensionsv1.JSON) error { log := ctrl.LoggerFrom(ctx) - // Only patch the customImage if this DockerMachineTemplate belongs to a MachineDeployment with class "default-class" + // Only patch the customImage if this DockerMachineTemplate belongs to a MachineDeployment or MachinePool with class "default-class" // NOTE: This works by checking the existence of a builtin variable that exists only for templates liked to MachineDeployments. - mdClass, found, err := topologymutation.GetStringVariable(templateVariables, "builtin.machineDeployment.class") + mdClass, mdFound, err := topologymutation.GetStringVariable(templateVariables, "builtin.machineDeployment.class") + if err != nil { + return errors.Wrap(err, "could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs") + } + + mpClass, mpFound, err := topologymutation.GetStringVariable(templateVariables, "builtin.machinePool.class") if err != nil { return errors.Wrap(err, "could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs") } // This is a required variable. Return an error if it's not found. // NOTE: this should never happen because it is enforced by the patch engine. - if !found { + if !mdFound && !mpFound { return errors.New("could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs: variable \"builtin.machineDeployment.class\" not found") } @@ -256,6 +267,41 @@ func patchKubeadmConfigTemplate(ctx context.Context, k *bootstrapv1.KubeadmConfi k.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs["cgroup-driver"] = cgroupDriverCgroupfs } } + + if mpClass == "default-worker" { + // If the Kubernetes version from builtin.machinePool.version is below 1.24.0 set "cgroup-driver": "cgroupDriverCgroupfs" to + // - InitConfiguration.KubeletExtraArgs + // - JoinConfiguration.KubeletExtraArgs + // NOTE: machinePool version might be different than Cluster.version or other machinePool's versions; + // the builtin variables provides the right version to use. + mpVersion, found, err := topologymutation.GetStringVariable(templateVariables, "builtin.machinePool.version") + if err != nil { + return errors.Wrap(err, "could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs") + } + + // This is a required variable. Return an error if it's not found. + if !found { + return errors.New("could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs: variable \"builtin.machinePool.version\" not found") + } + machinePoolVersion, err := version.ParseMajorMinorPatchTolerant(mpVersion) + if err != nil { + return errors.Wrap(err, "could not set cgroup-driver to KubeadmConfigTemplate template kubeletExtraArgs") + } + if version.Compare(machinePoolVersion, cgroupDriverPatchVersionCeiling) == -1 { + log.Info(fmt.Sprintf("Setting KubeadmConfigTemplate cgroup-driver to %q", cgroupDriverCgroupfs)) + + // Set the cgroupDriver in the JoinConfiguration. + if k.Spec.Template.Spec.JoinConfiguration == nil { + k.Spec.Template.Spec.JoinConfiguration = &bootstrapv1.JoinConfiguration{} + } + if k.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs == nil { + k.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs = map[string]string{} + } + + k.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs["cgroup-driver"] = cgroupDriverCgroupfs + } + } + return nil } @@ -313,6 +359,59 @@ func patchDockerMachineTemplate(ctx context.Context, dockerMachineTemplate *infr return errors.New("no version variables found for DockerMachineTemplate patch") } +// patchDockerMachinePoolTemplate patches the DockerMachinePoolTemplate. +// It sets the CustomImage to an image for the version in use by the MachinePool. +// NOTE: this patch is not required anymore after the introduction of the kind mapper in kind, however we keep it +// as example of version aware patches. +func patchDockerMachinePoolTemplate(ctx context.Context, dockerMachinePoolTemplate *infraexpv1.DockerMachinePoolTemplate, templateVariables map[string]apiextensionsv1.JSON) error { + log := ctrl.LoggerFrom(ctx) + + // If the DockerMachinePoolTemplate belongs to the ControlPlane, set the images using the ControlPlane version. + // NOTE: ControlPlane version might be different than Cluster.version or MachinePool's versions; + // the builtin variables provides the right version to use. + // NOTE: This works by checking the existence of a builtin variable that exists only for templates liked to the ControlPlane. + cpVersion, found, err := topologymutation.GetStringVariable(templateVariables, "builtin.controlPlane.version") + if err != nil { + return errors.Wrap(err, "could not set customImage to control plane dockerMachinePoolTemplate") + } + if found { + semVer, err := version.ParseMajorMinorPatchTolerant(cpVersion) + if err != nil { + return errors.Wrap(err, "could not parse control plane version") + } + kindMapping := kind.GetMapping(semVer, "") + + log.Info(fmt.Sprintf("Setting MachinePool custom image to %q", kindMapping.Image)) + dockerMachinePoolTemplate.Spec.Template.Spec.Template.CustomImage = kindMapping.Image + // return early if we have successfully patched a control plane dockerMachineTemplate + return nil + } + + // If the DockerMachinePoolTemplate belongs to a MachinePool, set the images the MachinePool version. + // NOTE: MachinePool version might be different than Cluster.version or other MachinePool's versions; + // the builtin variables provides the right version to use. + // NOTE: This works by checking the existence of a built in variable that exists only for templates liked to MachinePools. + mpVersion, found, err := topologymutation.GetStringVariable(templateVariables, "builtin.machinePool.version") + if err != nil { + return errors.Wrap(err, "could not set customImage to MachinePool DockerMachinePoolTemplate") + } + if found { + semVer, err := version.ParseMajorMinorPatchTolerant(mpVersion) + if err != nil { + return errors.Wrap(err, "could not parse MachinePool version") + } + kindMapping := kind.GetMapping(semVer, "") + + log.Info(fmt.Sprintf("Setting MachinePool customImage to %q", kindMapping.Image)) + dockerMachinePoolTemplate.Spec.Template.Spec.Template.CustomImage = kindMapping.Image + return nil + } + + // If the Docker Machine didn't have variables for either a control plane or a machinePool return an error. + // NOTE: this should never happen because it is enforced by the patch engine. + return errors.New("no version variables found for DockerMachinePoolTemplate patch") +} + // ValidateTopology implements the HandlerFunc for the ValidateTopology hook. // Cluster API E2E currently are just validating the hook gets called. // NOTE: custom RuntimeExtension must implement the body of this func according to the specific use case. diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepooltemplates.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepooltemplates.yaml new file mode 100644 index 000000000000..a53918c8b13c --- /dev/null +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepooltemplates.yaml @@ -0,0 +1,135 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: dockermachinepooltemplates.infrastructure.cluster.x-k8s.io +spec: + group: infrastructure.cluster.x-k8s.io + names: + categories: + - cluster-api + kind: DockerMachinePoolTemplate + listKind: DockerMachinePoolTemplateList + plural: dockermachinepooltemplates + singular: dockermachinepooltemplate + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Time duration since creation of DockerMachinePoolTemplate + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: DockerMachinePoolTemplate is the Schema for the dockermachinepooltemplates + API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: DockerMachinePoolTemplateSpec defines the desired state of + DockerMachinePoolTemplate. + properties: + template: + description: DockerMachinePoolTemplateResource describes the data + needed to create a DockerMachine from a template. + properties: + metadata: + description: 'Standard object''s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata' + properties: + annotations: + additionalProperties: + type: string + description: 'Annotations is an unstructured key value map + stored with a resource that may be set by external tools + to store and retrieve arbitrary metadata. They are not queryable + and should be preserved when modifying objects. More info: + http://kubernetes.io/docs/user-guide/annotations' + type: object + labels: + additionalProperties: + type: string + description: 'Map of string keys and values that can be used + to organize and categorize (scope and select) objects. May + match selectors of replication controllers and services. + More info: http://kubernetes.io/docs/user-guide/labels' + type: object + type: object + spec: + description: DockerMachinePoolSpec defines the desired state of + DockerMachinePool. + properties: + providerID: + description: ProviderID is the identification ID of the Machine + Pool + type: string + providerIDList: + description: ProviderIDList is the list of identification + IDs of machine instances managed by this Machine Pool + items: + type: string + type: array + template: + description: Template contains the details used to build a + replica machine within the Machine Pool + properties: + customImage: + description: CustomImage allows customizing the container + image that is used for running the machine + type: string + extraMounts: + description: ExtraMounts describes additional mount points + for the node container These may be used to bind a hostPath + items: + description: Mount specifies a host volume to mount + into a container. This is a simplified version of + kind v1alpha4.Mount types. + properties: + containerPath: + description: Path of the mount within the container. + type: string + hostPath: + description: Path of the mount on the host. If the + hostPath doesn't exist, then runtimes should report + error. If the hostpath is a symbolic link, runtimes + should follow the symlink and mount the real destination + to container. + type: string + readOnly: + description: If set, the mount is read-only. + type: boolean + type: object + type: array + preLoadImages: + description: PreLoadImages allows to pre-load images in + a newly created machine. This can be used to speed up + tests by avoiding e.g. to download CNI images on all + the containers. + items: + type: string + type: array + type: object + type: object + required: + - spec + type: object + required: + - template + type: object + type: object + served: true + storage: true + subresources: {} diff --git a/test/infrastructure/docker/config/crd/kustomization.yaml b/test/infrastructure/docker/config/crd/kustomization.yaml index 427d52d4bed6..3bcde1a5c866 100644 --- a/test/infrastructure/docker/config/crd/kustomization.yaml +++ b/test/infrastructure/docker/config/crd/kustomization.yaml @@ -14,6 +14,7 @@ resources: - bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml - bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml - bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml + - bases/infrastructure.cluster.x-k8s.io_dockermachinepooltemplates.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: diff --git a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepooltemplate_types.go b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepooltemplate_types.go new file mode 100644 index 000000000000..b4f16926739e --- /dev/null +++ b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepooltemplate_types.go @@ -0,0 +1,64 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +// DockerMachinePoolTemplateSpec defines the desired state of DockerMachinePoolTemplate. +type DockerMachinePoolTemplateSpec struct { + Template DockerMachinePoolTemplateResource `json:"template"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:resource:path=dockermachinepooltemplates,scope=Namespaced,categories=cluster-api +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of DockerMachinePoolTemplate" + +// DockerMachinePoolTemplate is the Schema for the dockermachinepooltemplates API. +type DockerMachinePoolTemplate struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec DockerMachinePoolTemplateSpec `json:"spec,omitempty"` +} + +// +kubebuilder:object:root=true + +// DockerMachinePoolTemplateList contains a list of DockerMachinePoolTemplate. +type DockerMachinePoolTemplateList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []DockerMachinePoolTemplate `json:"items"` +} + +func init() { + objectTypes = append(objectTypes, &DockerMachinePool{}, &DockerMachinePoolList{}) +} + +// DockerMachinePoolTemplateResource describes the data needed to create a DockerMachine from a template. +type DockerMachinePoolTemplateResource struct { + // Standard object's metadata. + // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + // +optional + ObjectMeta clusterv1.ObjectMeta `json:"metadata,omitempty"` + + Spec DockerMachinePoolSpec `json:"spec"` +} diff --git a/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go b/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go index 97e75d41f55b..60d859b5e249 100644 --- a/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go @@ -189,3 +189,94 @@ func (in *DockerMachinePoolStatus) DeepCopy() *DockerMachinePoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DockerMachinePoolTemplate) DeepCopyInto(out *DockerMachinePoolTemplate) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachinePoolTemplate. +func (in *DockerMachinePoolTemplate) DeepCopy() *DockerMachinePoolTemplate { + if in == nil { + return nil + } + out := new(DockerMachinePoolTemplate) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *DockerMachinePoolTemplate) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DockerMachinePoolTemplateList) DeepCopyInto(out *DockerMachinePoolTemplateList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]DockerMachinePoolTemplate, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachinePoolTemplateList. +func (in *DockerMachinePoolTemplateList) DeepCopy() *DockerMachinePoolTemplateList { + if in == nil { + return nil + } + out := new(DockerMachinePoolTemplateList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *DockerMachinePoolTemplateList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DockerMachinePoolTemplateResource) DeepCopyInto(out *DockerMachinePoolTemplateResource) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachinePoolTemplateResource. +func (in *DockerMachinePoolTemplateResource) DeepCopy() *DockerMachinePoolTemplateResource { + if in == nil { + return nil + } + out := new(DockerMachinePoolTemplateResource) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DockerMachinePoolTemplateSpec) DeepCopyInto(out *DockerMachinePoolTemplateSpec) { + *out = *in + in.Template.DeepCopyInto(&out.Template) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachinePoolTemplateSpec. +func (in *DockerMachinePoolTemplateSpec) DeepCopy() *DockerMachinePoolTemplateSpec { + if in == nil { + return nil + } + out := new(DockerMachinePoolTemplateSpec) + in.DeepCopyInto(out) + return out +} diff --git a/test/infrastructure/docker/templates/cluster-template-development.yaml b/test/infrastructure/docker/templates/cluster-template-development.yaml index 03a4c8efa8b0..da40be954ca9 100644 --- a/test/infrastructure/docker/templates/cluster-template-development.yaml +++ b/test/infrastructure/docker/templates/cluster-template-development.yaml @@ -34,4 +34,8 @@ spec: - class: default-worker name: md-0 replicas: ${WORKER_MACHINE_COUNT} + machinePools: + - class: default-worker + name: mp-0 + replicas: ${WORKER_MACHINE_COUNT} --- diff --git a/test/infrastructure/docker/templates/clusterclass-quick-start.yaml b/test/infrastructure/docker/templates/clusterclass-quick-start.yaml index f3cc62e674b6..497b5d024be9 100644 --- a/test/infrastructure/docker/templates/clusterclass-quick-start.yaml +++ b/test/infrastructure/docker/templates/clusterclass-quick-start.yaml @@ -32,6 +32,19 @@ spec: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate name: quick-start-default-worker-machinetemplate + machinePools: + - class: default-worker + template: + bootstrap: + ref: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: quick-start-default-worker-bootstraptemplate + infrastructure: + ref: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachinePoolTemplate + name: quick-start-default-worker-machinepooltemplate variables: - name: imageRepository required: true @@ -139,6 +152,19 @@ spec: valueFrom: template: | kindest/node:{{ .builtin.machineDeployment.version | replace "+" "_" }} + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachinePoolTemplate + matchResources: + machinePoolClass: + names: + - default-worker + jsonPatches: + - op: add + path: "/spec/template/spec/template/customImage" + valueFrom: + template: | + kindest/node:{{ .builtin.machinePool.version | replace "+" "_" }} - selector: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate @@ -246,6 +272,15 @@ spec: - containerPath: "/var/run/docker.sock" hostPath: "/var/run/docker.sock" --- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachinePoolTemplate +metadata: + name: quick-start-default-worker-machinepooltemplate +spec: + template: + spec: + template: {} +--- apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 kind: KubeadmConfigTemplate metadata: @@ -254,4 +289,4 @@ spec: template: spec: joinConfiguration: - nodeRegistration: {} # node registration parameters are automatically injected by CAPD according to the kindest/node image in use. + nodeRegistration: {} # node registration parameters are automatically injected by CAPD according to the kindest/node image in use. \ No newline at end of file