Skip to content

Commit

Permalink
ClusterClass: use exact versions from ClusterClass, stop api bump in CC
Browse files Browse the repository at this point in the history
  • Loading branch information
sbueringer committed Sep 16, 2022
1 parent cd9e8e5 commit ceea81f
Show file tree
Hide file tree
Showing 9 changed files with 396 additions and 90 deletions.
49 changes: 14 additions & 35 deletions internal/controllers/clusterclass/clusterclass_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
tlog "sigs.k8s.io/cluster-api/internal/log"
"sigs.k8s.io/cluster-api/util/annotations"
utilconversion "sigs.k8s.io/cluster-api/util/conversion"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
)
Expand Down Expand Up @@ -91,21 +90,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
return ctrl.Result{}, nil
}

// We use the patchHelper to patch potential changes to the ObjectReferences in ClusterClass.
patchHelper, err := patch.NewHelper(clusterClass, r.Client)
if err != nil {
return ctrl.Result{}, err
}

defer func() {
if err := patchHelper.Patch(ctx, clusterClass); err != nil {
reterr = kerrors.NewAggregate([]error{
reterr,
errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: clusterClass})},
)
}
}()

return r.reconcile(ctx, clusterClass)
}

Expand Down Expand Up @@ -133,38 +117,33 @@ func (r *Reconciler) reconcile(ctx context.Context, clusterClass *clusterv1.Clus
}
}

// Ensure all the referenced objects are owned by the ClusterClass and that references are
// upgraded to the latest contract.
// Nb. Some external objects can be referenced multiple times in the ClusterClass. We
// update the API contracts of all the references but we set the owner reference on the unique
// external object only once.
// Ensure all the referenced objects are owned by the ClusterClass.
// Nb. Some external objects can be referenced multiple times in the ClusterClass.
// We set the owner reference on the unique external object only once.
errs := []error{}
patchedRefs := sets.NewString()
reconciledRefs := sets.NewString()
for i := range refs {
ref := refs[i]
uniqueKey := uniqueObjectRefKey(ref)
if err := r.reconcileExternal(ctx, clusterClass, ref, !patchedRefs.Has(uniqueKey)); err != nil {

// Continue as we only have to reconcile every referenced object once.
if reconciledRefs.Has(uniqueKey) {
continue
}

if err := r.reconcileExternal(ctx, clusterClass, ref); err != nil {
errs = append(errs, err)
continue
}
patchedRefs.Insert(uniqueKey)
reconciledRefs.Insert(uniqueKey)
}

return ctrl.Result{}, kerrors.NewAggregate(errs)
}

func (r *Reconciler) reconcileExternal(ctx context.Context, clusterClass *clusterv1.ClusterClass, ref *corev1.ObjectReference, setOwnerRef bool) error {
func (r *Reconciler) reconcileExternal(ctx context.Context, clusterClass *clusterv1.ClusterClass, ref *corev1.ObjectReference) error {
log := ctrl.LoggerFrom(ctx)

if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, r.APIReader, ref); err != nil {
return errors.Wrapf(err, "failed to update reference API contract of %s", tlog.KRef{Ref: ref})
}

// If we dont need to set the ownerReference then return early.
if !setOwnerRef {
return nil
}

obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, clusterClass.Namespace)
if err != nil {
if apierrors.IsNotFound(errors.Cause(err)) {
Expand All @@ -173,7 +152,7 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, clusterClass *cluste
return errors.Wrapf(err, "failed to get the external object for the cluster class. refGroupVersionKind: %s, refName: %s", ref.GroupVersionKind(), ref.Name)
}

// If external ref is paused, return early.
// If referenced object is paused, return early.
if annotations.HasPaused(obj) {
log.V(3).Info("External object referenced is paused", "refGroupVersionKind", ref.GroupVersionKind(), "refName", ref.Name)
return nil
Expand Down
99 changes: 88 additions & 11 deletions internal/controllers/topology/cluster/current_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ import (
"fmt"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -41,7 +43,7 @@ func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scop
// Reference to the InfrastructureCluster can be nil and is expected to be on the first reconcile.
// In this case the method should still be allowed to continue.
if currentState.Cluster.Spec.InfrastructureRef != nil {
infra, err := r.getCurrentInfrastructureClusterState(ctx, currentState.Cluster)
infra, err := r.getCurrentInfrastructureClusterState(ctx, s.Blueprint, currentState.Cluster)
if err != nil {
return nil, err
}
Expand All @@ -52,7 +54,7 @@ func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scop
// should still be allowed to continue.
currentState.ControlPlane = &scope.ControlPlaneState{}
if currentState.Cluster.Spec.ControlPlaneRef != nil {
cp, err := r.getCurrentControlPlaneState(ctx, currentState.Cluster, s.Blueprint)
cp, err := r.getCurrentControlPlaneState(ctx, s.Blueprint, currentState.Cluster)
if err != nil {
return nil, err
}
Expand All @@ -61,7 +63,7 @@ func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scop

// A Cluster may have zero or more MachineDeployments and a Cluster is expected to have zero MachineDeployments on
// first reconcile.
m, err := r.getCurrentMachineDeploymentState(ctx, currentState.Cluster)
m, err := r.getCurrentMachineDeploymentState(ctx, s.Blueprint, currentState.Cluster)
if err != nil {
return nil, err
}
Expand All @@ -72,8 +74,12 @@ func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scop

// getCurrentInfrastructureClusterState looks for the state of the InfrastructureCluster. If a reference is set but not
// found, either from an error or the object not being found, an error is thrown.
func (r *Reconciler) getCurrentInfrastructureClusterState(ctx context.Context, cluster *clusterv1.Cluster) (*unstructured.Unstructured, error) {
infra, err := r.getReference(ctx, cluster.Spec.InfrastructureRef)
func (r *Reconciler) getCurrentInfrastructureClusterState(ctx context.Context, blueprint *scope.ClusterBlueprint, cluster *clusterv1.Cluster) (*unstructured.Unstructured, error) {
ref, err := calculateCurrentRef(blueprint.InfrastructureClusterTemplate, cluster.Spec.InfrastructureRef)
if err != nil {
return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.InfrastructureRef})
}
infra, err := r.getReference(ctx, ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.InfrastructureRef})
}
Expand All @@ -89,12 +95,16 @@ func (r *Reconciler) getCurrentInfrastructureClusterState(ctx context.Context, c
// getCurrentControlPlaneState returns information on the ControlPlane being used by the Cluster. If a reference is not found,
// an error is thrown. If the ControlPlane requires MachineInfrastructure according to its ClusterClass an error will be
// thrown if the ControlPlane has no MachineTemplates.
func (r *Reconciler) getCurrentControlPlaneState(ctx context.Context, cluster *clusterv1.Cluster, blueprint *scope.ClusterBlueprint) (*scope.ControlPlaneState, error) {
func (r *Reconciler) getCurrentControlPlaneState(ctx context.Context, blueprint *scope.ClusterBlueprint, cluster *clusterv1.Cluster) (*scope.ControlPlaneState, error) {
var err error
res := &scope.ControlPlaneState{}

// Get the control plane object.
res.Object, err = r.getReference(ctx, cluster.Spec.ControlPlaneRef)
ref, err := calculateCurrentRef(blueprint.ControlPlane.Template, cluster.Spec.ControlPlaneRef)
if err != nil {
return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.ControlPlaneRef})
}
res.Object, err = r.getReference(ctx, ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.ControlPlaneRef})
}
Expand All @@ -115,7 +125,11 @@ func (r *Reconciler) getCurrentControlPlaneState(ctx context.Context, cluster *c
if err != nil {
return res, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate reference for %s", tlog.KObj{Obj: res.Object})
}
res.InfrastructureMachineTemplate, err = r.getReference(ctx, machineInfrastructureRef)
ref, err = calculateCurrentRef(blueprint.ControlPlane.InfrastructureMachineTemplate, machineInfrastructureRef)
if err != nil {
return nil, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate for %s", tlog.KObj{Obj: res.Object})
}
res.InfrastructureMachineTemplate, err = r.getReference(ctx, ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate for %s", tlog.KObj{Obj: res.Object})
}
Expand Down Expand Up @@ -143,7 +157,7 @@ func (r *Reconciler) getCurrentControlPlaneState(ctx context.Context, cluster *c
// whether they are managed by a ClusterClass using labels. A Cluster may have zero or more MachineDeployments. Zero is
// expected on first reconcile. If MachineDeployments are found for the Cluster their Infrastructure and Bootstrap references
// are inspected. Where these are not found the function will throw an error.
func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, cluster *clusterv1.Cluster) (map[string]*scope.MachineDeploymentState, error) {
func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, blueprint *scope.ClusterBlueprint, cluster *clusterv1.Cluster) (map[string]*scope.MachineDeploymentState, error) {
state := make(scope.MachineDeploymentsStateMap)

// List all the machine deployments in the current cluster and in a managed topology.
Expand Down Expand Up @@ -178,12 +192,26 @@ func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, clust
return nil, fmt.Errorf("duplicate %s found for label %s: %s", tlog.KObj{Obj: m}, clusterv1.ClusterTopologyMachineDeploymentLabelName, mdTopologyName)
}

mdClassName := getMDClassName(cluster, mdTopologyName)
if mdClassName == "" {
return nil, fmt.Errorf("failed to find MachineDeployment topology %s in %s", mdTopologyName, tlog.KObj{Obj: cluster})
}

mdBluePrint, ok := blueprint.MachineDeployments[mdClassName]
if !ok {
return nil, fmt.Errorf("failed to find MachineDeployment class %s in ClusterClass", mdClassName)
}

// Gets the BootstrapTemplate
bootstrapRef := m.Spec.Template.Spec.Bootstrap.ConfigRef
if bootstrapRef == nil {
return nil, fmt.Errorf("%s does not have a reference to a Bootstrap Config", tlog.KObj{Obj: m})
}
b, err := r.getReference(ctx, bootstrapRef)
ref, err := calculateCurrentRef(mdBluePrint.BootstrapTemplate, bootstrapRef)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m}))
}
b, err := r.getReference(ctx, ref)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m}))
}
Expand All @@ -199,7 +227,11 @@ func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, clust
if infraRef.Name == "" {
return nil, fmt.Errorf("%s does not have a reference to a InfrastructureMachineTemplate", tlog.KObj{Obj: m})
}
infra, err := r.getReference(ctx, &infraRef)
ref, err = calculateCurrentRef(mdBluePrint.InfrastructureMachineTemplate, &infraRef)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m}))
}
infra, err := r.getReference(ctx, ref)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m}))
}
Expand Down Expand Up @@ -232,3 +264,48 @@ func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, clust
}
return state, nil
}

// calculateCurrentRef calculates the ref to use to get an object based on the ref in ClusterClass
// and the ref in the managed topology (Cluster, KCP, MD, ...).
// If group and kind in both refs are the same we use the version from ClusterClass.
// We have to do this so the topology controller can work on the exact version specified
// in the ClusterClass by diffing current and desired state objects of the same version during reconcile.
// If group or kind are different we use the ref from the managed topology. In this case
// the group/kind was changed in the ClusterClass. We can't use the version from ClusterClass as
// it might not match the old group/kind. But that's fine as a change in group/kind will end up in a diff and a
// rollout anyway.
// FIXME(sbueringer) unit test
func calculateCurrentRef(templateFromClusterClass *unstructured.Unstructured, topologyRef *corev1.ObjectReference) (*corev1.ObjectReference, error) {
topologyGK, err := schema.ParseGroupVersion(topologyRef.APIVersion)
if err != nil {
return nil, errors.Wrapf(err, "failed to parse apiVersion: %q", topologyRef.APIVersion)
}

apiVersion := topologyRef.APIVersion
// Use apiVersion from ClusterClass if group and kind is the same.
if templateFromClusterClass.GroupVersionKind().Group == topologyGK.Group &&
templateFromClusterClass.GetKind() == topologyRef.Kind {
apiVersion = templateFromClusterClass.GetAPIVersion()
}

return &corev1.ObjectReference{
APIVersion: apiVersion,
Kind: topologyRef.Kind,
Namespace: topologyRef.Namespace,
Name: topologyRef.Name,
}, nil
}

// getMDClassName retrieves the MDClass name by looking up the MDTopology in the Cluster.
func getMDClassName(cluster *clusterv1.Cluster, mdTopologyName string) string {
if cluster.Spec.Topology.Workers == nil {
return ""
}

for _, mdTopology := range cluster.Spec.Topology.Workers.MachineDeployments {
if mdTopology.Name == mdTopologyName {
return mdTopology.Class
}
}
return ""
}
Loading

0 comments on commit ceea81f

Please sign in to comment.