From ae31c7c8fca559ff2a76d951e2d324872ab5cfdf Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Tue, 31 Aug 2021 19:53:07 +0200 Subject: [PATCH] Implement template deletion for topology-owned MD and MS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com Co-authored-by: fabriziopandini --- api/v1alpha4/machinedeployment_types.go | 6 + api/v1alpha4/machineset_types.go | 6 + config/rbac/role.yaml | 51 ++++- controllers/external/util.go | 13 ++ controllers/machinedeployment_sync.go | 16 +- .../{controller.go => cluster_controller.go} | 5 +- controllers/topology/desired_state.go | 8 + controllers/topology/desired_state_test.go | 3 + controllers/topology/internal/log/log.go | 15 ++ .../topology/machinedeployment_controller.go | 151 ++++++++++++++ .../machinedeployment_controller_test.go | 138 +++++++++++++ controllers/topology/machineset_controller.go | 193 ++++++++++++++++++ .../topology/machineset_controller_test.go | 188 +++++++++++++++++ controllers/topology/reconcile_state.go | 10 +- controllers/topology/util.go | 111 ++++++++++ controllers/topology/util_test.go | 96 +++++++++ internal/testtypes/builders.go | 65 ++++++ main.go | 18 ++ util/labels/helpers.go | 6 + util/predicates/generic_predicates.go | 32 +++ util/util.go | 2 +- 21 files changed, 1122 insertions(+), 11 deletions(-) rename controllers/topology/{controller.go => cluster_controller.go} (97%) create mode 100644 controllers/topology/machinedeployment_controller.go create mode 100644 controllers/topology/machinedeployment_controller_test.go create mode 100644 controllers/topology/machineset_controller.go create mode 100644 controllers/topology/machineset_controller_test.go diff --git a/api/v1alpha4/machinedeployment_types.go b/api/v1alpha4/machinedeployment_types.go index 805a9dcc924c..3389aa0b26eb 100644 --- a/api/v1alpha4/machinedeployment_types.go +++ b/api/v1alpha4/machinedeployment_types.go @@ -21,6 +21,12 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) +const ( + // MachineDeploymentTopologyFinalizer is the finalizer used by the topology MachineDeployment controller to + // clean up referenced template resources if necessary when a MachineDeployment is being deleted. + MachineDeploymentTopologyFinalizer = "machinedeployment.topology.cluster.x-k8s.io" +) + // MachineDeploymentStrategyType defines the type of MachineDeployment rollout strategies. type MachineDeploymentStrategyType string diff --git a/api/v1alpha4/machineset_types.go b/api/v1alpha4/machineset_types.go index 8dd526b65269..e043a5e1a2f4 100644 --- a/api/v1alpha4/machineset_types.go +++ b/api/v1alpha4/machineset_types.go @@ -24,6 +24,12 @@ import ( capierrors "sigs.k8s.io/cluster-api/errors" ) +const ( + // MachineSetTopologyFinalizer is the finalizer used by the topology MachineDeployment controller to + // clean up referenced template resources if necessary when a MachineSet is being deleted. + MachineSetTopologyFinalizer = "machineset.topology.cluster.x-k8s.io" +) + // ANCHOR: MachineSetSpec // MachineSetSpec defines the desired state of MachineSet. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 153c80e7cb34..29a39004c3f3 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -66,7 +66,14 @@ rules: - cluster.x-k8s.io resources: - clusterclasses - - machinedeployments + verbs: + - get + - list + - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - clusters verbs: - get - list @@ -98,6 +105,29 @@ rules: - patch - update - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + - machinedeployments/finalizers + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - cluster.x-k8s.io resources: @@ -162,6 +192,25 @@ rules: - get - list - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinesets + verbs: + - get + - list + - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinesets + - machinesets/finalizers + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - cluster.x-k8s.io resources: diff --git a/controllers/external/util.go b/controllers/external/util.go index 3e45a2034701..ce06ff5d5d89 100644 --- a/controllers/external/util.go +++ b/controllers/external/util.go @@ -50,6 +50,19 @@ func Get(ctx context.Context, c client.Client, ref *corev1.ObjectReference, name return obj, nil } +// Delete uses the client and reference to delete an external, unstructured object. +func Delete(ctx context.Context, c client.Client, ref *corev1.ObjectReference) error { + obj := new(unstructured.Unstructured) + obj.SetAPIVersion(ref.APIVersion) + obj.SetKind(ref.Kind) + obj.SetName(ref.Name) + obj.SetNamespace(ref.Namespace) + if err := c.Delete(ctx, obj); err != nil { + return errors.Wrapf(err, "failed to delete %s external object %q/%q", obj.GetKind(), obj.GetNamespace(), obj.GetName()) + } + return nil +} + // CloneTemplateInput is the input to CloneTemplate. type CloneTemplateInput struct { // Client is the controller runtime client. diff --git a/controllers/machinedeployment_sync.go b/controllers/machinedeployment_sync.go index 83693c0a94dd..c3264ff7a00f 100644 --- a/controllers/machinedeployment_sync.go +++ b/controllers/machinedeployment_sync.go @@ -31,11 +31,14 @@ import ( "k8s.io/client-go/util/retry" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/mdutil" + "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) // sync is responsible for reconciling deployments on scaling events or when they @@ -173,13 +176,24 @@ func (r *MachineDeploymentReconciler) getNewMachineSet(ctx context.Context, d *c }, } + if feature.Gates.Enabled(feature.ClusterTopology) { + // If the MachineDeployment is owned by a Cluster Topology, + // add the finalizer to allow the topology controller to + // clean up resources when the MachineSet is deleted. + // MachineSets are deleted during rollout (e.g. template rotation) and + // after MachineDeployment deletion. + if labels.IsTopologyOwned(d) { + controllerutil.AddFinalizer(&newMS, clusterv1.MachineSetTopologyFinalizer) + } + } + if d.Spec.Strategy.RollingUpdate.DeletePolicy != nil { newMS.Spec.DeletePolicy = *d.Spec.Strategy.RollingUpdate.DeletePolicy } // Add foregroundDeletion finalizer to MachineSet if the MachineDeployment has it if sets.NewString(d.Finalizers...).Has(metav1.FinalizerDeleteDependents) { - newMS.Finalizers = []string{metav1.FinalizerDeleteDependents} + controllerutil.AddFinalizer(&newMS, metav1.FinalizerDeleteDependents) } allMSs := append(oldMSs, &newMS) diff --git a/controllers/topology/controller.go b/controllers/topology/cluster_controller.go similarity index 97% rename from controllers/topology/controller.go rename to controllers/topology/cluster_controller.go index 050768f04d6b..57f6b4bb5007 100644 --- a/controllers/topology/controller.go +++ b/controllers/topology/cluster_controller.go @@ -39,7 +39,8 @@ import ( // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io;controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses;machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch // ClusterReconciler reconciles a managed topology for a Cluster object. @@ -57,7 +58,7 @@ type ClusterReconciler struct { func (r *ClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { c, err := ctrl.NewControllerManagedBy(mgr). For(&clusterv1.Cluster{}). - Named("cluster/topology"). + Named("topology/cluster"). Watches( &source.Kind{Type: &clusterv1.ClusterClass{}}, handler.EnqueueRequestsFromMapFunc(r.clusterClassToCluster), diff --git a/controllers/topology/desired_state.go b/controllers/topology/desired_state.go index bf4478f6c1ac..84dc83fb7b66 100644 --- a/controllers/topology/desired_state.go +++ b/controllers/topology/desired_state.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/topology/internal/contract" tlog "sigs.k8s.io/cluster-api/controllers/topology/internal/log" "sigs.k8s.io/cluster-api/controllers/topology/internal/scope" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) // computeDesiredState computes the desired state of the cluster topology. @@ -301,6 +302,13 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, machineDeployme }, } + // If it's a new MachineDeployment, set the finalizer. + // Note: we only add it on creation to avoid race conditions later on when + // the MachineDeployment topology controller removes the finalizer. + if currentMachineDeployment == nil { + controllerutil.AddFinalizer(desiredMachineDeploymentObj, clusterv1.MachineDeploymentTopologyFinalizer) + } + // If an existing MachineDeployment is present, override the MachineDeployment generate name // re-using the existing name (this will help in reconcile). if currentMachineDeployment != nil && currentMachineDeployment.Object != nil { diff --git a/controllers/topology/desired_state_test.go b/controllers/topology/desired_state_test.go index d1e0c93e2ba3..ea53a0405f9d 100644 --- a/controllers/topology/desired_state_test.go +++ b/controllers/topology/desired_state_test.go @@ -21,6 +21,7 @@ import ( "testing" "sigs.k8s.io/cluster-api/internal/testtypes" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -512,6 +513,7 @@ func TestComputeMachineDeployment(t *testing.T) { g.Expect(actualMd.Labels).To(HaveKeyWithValue(clusterv1.ClusterTopologyMachineDeploymentLabelName, "big-pool-of-machines")) g.Expect(actualMd.Labels).To(HaveKey(clusterv1.ClusterTopologyOwnedLabel)) + g.Expect(controllerutil.ContainsFinalizer(actualMd, clusterv1.MachineDeploymentTopologyFinalizer)).To(BeTrue()) g.Expect(actualMd.Spec.Template.ObjectMeta.Labels).To(HaveKeyWithValue("foo", "baz")) g.Expect(actualMd.Spec.Template.ObjectMeta.Labels).To(HaveKeyWithValue("fizz", "buzz")) @@ -559,6 +561,7 @@ func TestComputeMachineDeployment(t *testing.T) { g.Expect(actualMd.Labels).To(HaveKeyWithValue(clusterv1.ClusterTopologyMachineDeploymentLabelName, "big-pool-of-machines")) g.Expect(actualMd.Labels).To(HaveKey(clusterv1.ClusterTopologyOwnedLabel)) + g.Expect(controllerutil.ContainsFinalizer(actualMd, clusterv1.MachineDeploymentTopologyFinalizer)).To(BeFalse()) g.Expect(actualMd.Spec.Template.ObjectMeta.Labels).To(HaveKeyWithValue("foo", "baz")) g.Expect(actualMd.Spec.Template.ObjectMeta.Labels).To(HaveKeyWithValue("fizz", "buzz")) diff --git a/controllers/topology/internal/log/log.go b/controllers/topology/internal/log/log.go index dee8e3977705..fae4b537ba71 100644 --- a/controllers/topology/internal/log/log.go +++ b/controllers/topology/internal/log/log.go @@ -47,6 +47,10 @@ type Logger interface { // a resources being part of the Cluster by reconciled. WithObject(obj client.Object) Logger + // WithRef adds to the logger information about the object ref being modified by reconcile, which in most case it is + // a resources being part of the Cluster by reconciled. + WithRef(ref *corev1.ObjectReference) Logger + // WithMachineDeployment adds to the logger information about the MachineDeployment object being processed. WithMachineDeployment(md *clusterv1.MachineDeployment) Logger @@ -80,6 +84,17 @@ func (l *topologyReconcileLogger) WithObject(obj client.Object) Logger { return l } +// WithRef adds to the logger information about the object ref being modified by reconcile, which in most case it is +// a resources being part of the Cluster by reconciled. +func (l *topologyReconcileLogger) WithRef(ref *corev1.ObjectReference) Logger { + l.Logger = l.Logger.WithValues( + "object groupVersion", ref.APIVersion, + "object kind", ref.Kind, + "object", ref.Name, + ) + return l +} + // WithMachineDeployment adds to the logger information about the MachineDeployment object being processed. func (l *topologyReconcileLogger) WithMachineDeployment(md *clusterv1.MachineDeployment) Logger { topologyName := md.Labels[clusterv1.ClusterTopologyMachineDeploymentLabelName] diff --git a/controllers/topology/machinedeployment_controller.go b/controllers/topology/machinedeployment_controller.go new file mode 100644 index 000000000000..6b65c7f5c26d --- /dev/null +++ b/controllers/topology/machinedeployment_controller.go @@ -0,0 +1,151 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topology + +import ( + "context" + + "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + tlog "sigs.k8s.io/cluster-api/controllers/topology/internal/log" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/cluster-api/util/predicates" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=delete +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments;machinedeployments/finalizers,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets,verbs=get;list;watch + +// MachineDeploymentReconciler deletes referenced templates during deletion of topology-owned MachineDeployments. +// The templates are only deleted, if they are not used in other MachineDeployments or MachineSets which are not in deleting state, +// i.e. the templates would otherwise be orphaned after the MachineDeployment deletion completes. +// Note: To achieve this the cluster topology controller sets a finalizer to hook into the MachineDeployment deletions. +type MachineDeploymentReconciler struct { + Client client.Client + // APIReader is used to list MachineSets directly via the API server to avoid + // race conditions caused by an outdated cache. + APIReader client.Reader + WatchFilterValue string +} + +func (r *MachineDeploymentReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + err := ctrl.NewControllerManagedBy(mgr). + For(&clusterv1.MachineDeployment{}). + Named("topology/machinedeployment"). + WithOptions(options). + WithEventFilter(predicates.All(ctrl.LoggerFrom(ctx), + predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), + predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx)), + )). + Complete(r) + if err != nil { + return errors.Wrap(err, "failed setting up with a controller manager") + } + + return nil +} + +// Reconcile deletes referenced templates during deletion of topology-owned MachineDeployments. +// The templates are only deleted, if they are not used in other MachineDeployments or MachineSets which are not in deleting state, +// i.e. the templates would otherwise be orphaned after the MachineDeployment deletion completes. +// Additional context: +// * MachineDeployment deletion: +// * MachineDeployments are deleted and garbage collected first (without waiting until all MachineSets are also deleted). +// * After that, deletion of MachineSets is automatically triggered by Kubernetes based on owner references. +// Note: We assume templates are not reused by different MachineDeployments, which is only true for topology-owned +// MachineDeployments. +// We don't have to set the finalizer, as it's already set during MachineDeployment creation +// in the cluster topology controller. +func (r *MachineDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + // Fetch the MachineDeployment instance. + md := &clusterv1.MachineDeployment{} + if err := r.Client.Get(ctx, req.NamespacedName, md); err != nil { + if apierrors.IsNotFound(err) { + // Object not found, return. + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + return ctrl.Result{}, errors.Wrapf(err, "failed to get MachineDeployment/%s", req.NamespacedName.Name) + } + + cluster, err := util.GetClusterByName(ctx, r.Client, md.Namespace, md.Spec.ClusterName) + if err != nil { + return ctrl.Result{}, err + } + + // Return early if the object or Cluster is paused. + if annotations.IsPaused(cluster, md) { + log.Info("Reconciliation is paused for this object") + return ctrl.Result{}, nil + } + + // Handle deletion reconciliation loop. + if !md.ObjectMeta.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, md) + } + + // Nothing to do. + return ctrl.Result{}, nil +} + +// reconcileDelete deletes templates referenced in a MachineDeployment, if the templates are not used by other +// MachineDeployments or MachineSets. +func (r *MachineDeploymentReconciler) reconcileDelete(ctx context.Context, md *clusterv1.MachineDeployment) (ctrl.Result, error) { + // Get the corresponding MachineSets. + msList, err := getMachineSetsForDeployment(ctx, r.APIReader, client.ObjectKeyFromObject(md)) + if err != nil { + return ctrl.Result{}, err + } + + // Calculate which templates are still in use by MachineDeployments or MachineSets which are not in deleting state. + templatesInUse, err := calculateTemplatesInUse(md, msList) + if err != nil { + return ctrl.Result{}, err + } + + // Delete unused templates. + ref := md.Spec.Template.Spec.Bootstrap.ConfigRef + if err := deleteTemplateIfUnused(ctx, r.Client, templatesInUse, ref); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete bootstrap template for %s", tlog.KObj{Obj: md}) + } + ref = &md.Spec.Template.Spec.InfrastructureRef + if err := deleteTemplateIfUnused(ctx, r.Client, templatesInUse, ref); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete infrastructure template for %s", tlog.KObj{Obj: md}) + } + + // Remove the finalizer so the MachineDeployment can be garbage collected by Kubernetes. + patchHelper, err := patch.NewHelper(md, r.Client) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: md}) + } + controllerutil.RemoveFinalizer(md, clusterv1.MachineDeploymentTopologyFinalizer) + if err := patchHelper.Patch(ctx, md); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: md}) + } + + return ctrl.Result{}, nil +} diff --git a/controllers/topology/machinedeployment_controller_test.go b/controllers/topology/machinedeployment_controller_test.go new file mode 100644 index 000000000000..3fb591aed469 --- /dev/null +++ b/controllers/topology/machinedeployment_controller_test.go @@ -0,0 +1,138 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topology + +import ( + "testing" + + . "github.com/onsi/gomega" + "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/internal/testtypes" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +func TestMachineDeploymentReconciler_ReconcileDelete(t *testing.T) { + deletionTimeStamp := metav1.Now() + + mdBT := testtypes.NewBootstrapTemplateBuilder(metav1.NamespaceDefault, "mdBT").Build() + mdIMT := testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "mdIMT").Build() + md := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithBootstrapTemplate(mdBT). + WithInfrastructureTemplate(mdIMT). + Build() + md.SetDeletionTimestamp(&deletionTimeStamp) + + t.Run("Should delete templates of a MachineDeployment", func(t *testing.T) { + g := NewWithT(t) + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(md, mdBT, mdIMT). + Build() + + r := &MachineDeploymentReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, md) + g.Expect(err).ToNot(HaveOccurred()) + + afterMD := &clusterv1.MachineDeployment{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(md), afterMD)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMD, clusterv1.MachineDeploymentTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, mdBT)).To(BeFalse()) + g.Expect(templateExists(fakeClient, mdIMT)).To(BeFalse()) + }) + + t.Run("Should delete infra template of a MachineDeployment without a bootstrap template", func(t *testing.T) { + g := NewWithT(t) + + mdWithoutBootstrapTemplateIMT := testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "mdWithoutBootstrapTemplateIMT").Build() + mdWithoutBootstrapTemplate := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "mdWithoutBootstrapTemplate"). + WithInfrastructureTemplate(mdWithoutBootstrapTemplateIMT). + Build() + mdWithoutBootstrapTemplate.SetDeletionTimestamp(&deletionTimeStamp) + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(mdWithoutBootstrapTemplate, mdWithoutBootstrapTemplateIMT). + Build() + + r := &MachineDeploymentReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, mdWithoutBootstrapTemplate) + g.Expect(err).ToNot(HaveOccurred()) + + afterMD := &clusterv1.MachineDeployment{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(mdWithoutBootstrapTemplate), afterMD)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMD, clusterv1.MachineDeploymentTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, mdWithoutBootstrapTemplateIMT)).To(BeFalse()) + }) + + t.Run("Should not delete templates of a MachineDeployment when they are still in use in a MachineSet", func(t *testing.T) { + g := NewWithT(t) + + ms := testtypes.NewMachineSetBuilder(md.Namespace, "ms"). + WithBootstrapTemplate(mdBT). + WithInfrastructureTemplate(mdIMT). + WithLabels(map[string]string{ + clusterv1.MachineDeploymentLabelName: md.Name, + }). + Build() + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(md, ms, mdBT, mdIMT). + Build() + + r := &MachineDeploymentReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, md) + g.Expect(err).ToNot(HaveOccurred()) + + afterMD := &clusterv1.MachineDeployment{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(md), afterMD)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMD, clusterv1.MachineDeploymentTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, mdBT)).To(BeTrue()) + g.Expect(templateExists(fakeClient, mdIMT)).To(BeTrue()) + }) +} + +func templateExists(fakeClient client.Reader, template *unstructured.Unstructured) bool { + obj := &unstructured.Unstructured{} + obj.SetKind(template.GetKind()) + obj.SetAPIVersion(template.GetAPIVersion()) + + err := fakeClient.Get(ctx, client.ObjectKeyFromObject(template), obj) + if err != nil && !apierrors.IsNotFound(err) { + panic(errors.Wrapf(err, "failed to get %s/%s", template.GetKind(), template.GetName())) + } + return err == nil +} diff --git a/controllers/topology/machineset_controller.go b/controllers/topology/machineset_controller.go new file mode 100644 index 000000000000..f289f1cfd7a6 --- /dev/null +++ b/controllers/topology/machineset_controller.go @@ -0,0 +1,193 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topology + +import ( + "context" + + "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + tlog "sigs.k8s.io/cluster-api/controllers/topology/internal/log" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/cluster-api/util/predicates" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=delete +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets;machinesets/finalizers,verbs=get;list;watch;update;patch + +// MachineSetReconciler deletes referenced templates during deletion of topology-owned MachineSets. +// The templates are only deleted, if they are not used in other MachineDeployments or MachineSets which are not in deleting state, +// i.e. the templates would otherwise be orphaned after the MachineSet deletion completes. +// Note: To achieve this the reconciler sets a finalizer to hook into the MachineSet deletions. +type MachineSetReconciler struct { + Client client.Client + // APIReader is used to list MachineSets directly via the API server to avoid + // race conditions caused by an outdated cache. + APIReader client.Reader + WatchFilterValue string +} + +func (r *MachineSetReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + err := ctrl.NewControllerManagedBy(mgr). + For(&clusterv1.MachineSet{}). + Named("topology/machineset"). + WithOptions(options). + WithEventFilter(predicates.All(ctrl.LoggerFrom(ctx), + predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), + predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx)), + )). + Complete(r) + if err != nil { + return errors.Wrap(err, "failed setting up with a controller manager") + } + + return nil +} + +// Reconcile deletes referenced templates during deletion of topology-owned MachineSets. +// The templates are only deleted, if they are not used in other MachineDeployments or MachineSets which are not in deleting state, +// i.e. the templates would otherwise be orphaned after the MachineSet deletion completes. +// Additional context: +// * MachineSet deletion: +// * MachineSets are deleted and garbage collected first (without waiting until all Machines are also deleted) +// * After that, deletion of Machines is automatically triggered by Kubernetes based on owner references. +// Note: We assume templates are not reused by different MachineDeployments, which is (only) true for topology-owned +// MachineDeployments. +// We don't have to set the finalizer, as it's already set during MachineSet creation +// in the MachineSet controller. +func (r *MachineSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + // Fetch the MachineSet instance. + ms := &clusterv1.MachineSet{} + if err := r.Client.Get(ctx, req.NamespacedName, ms); err != nil { + if apierrors.IsNotFound(err) { + // Object not found, return. + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + return ctrl.Result{}, errors.Wrapf(err, "failed to get MachineSet/%s", req.NamespacedName.Name) + } + + cluster, err := util.GetClusterByName(ctx, r.Client, ms.Namespace, ms.Spec.ClusterName) + if err != nil { + return ctrl.Result{}, err + } + + // Return early if the object or Cluster is paused. + if annotations.IsPaused(cluster, ms) { + log.Info("Reconciliation is paused for this object") + return ctrl.Result{}, nil + } + + // Handle deletion reconciliation loop. + if !ms.ObjectMeta.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, ms) + } + + // Nothing to do. + return ctrl.Result{}, nil +} + +// reconcileDelete deletes templates referenced in a MachineSet, if the templates are not used by other +// MachineDeployments or MachineSets. +func (r *MachineSetReconciler) reconcileDelete(ctx context.Context, ms *clusterv1.MachineSet) (ctrl.Result, error) { + // Gets the name of the MachineDeployment that controls this MachineSet. + mdName, err := getMachineDeploymentName(ms) + if err != nil { + return ctrl.Result{}, err + } + + // Get all the MachineSets for the MachineDeployment. + msList, err := getMachineSetsForDeployment(ctx, r.APIReader, *mdName) + if err != nil { + return ctrl.Result{}, err + } + + // Fetch the MachineDeployment instance, if it still exists. + // Note: This can happen because MachineDeployments are deleted before their corresponding MachineSets. + md := &clusterv1.MachineDeployment{} + if err := r.Client.Get(ctx, *mdName, md); err != nil { + if !apierrors.IsNotFound(err) { + // Error reading the object - requeue the request. + return ctrl.Result{}, errors.Wrapf(err, "failed to get MachineDeployment/%s", mdName.Name) + } + // If the MachineDeployment doesn't exist anymore, set md to nil, so we can handle that case correctly below. + md = nil + } + + // Calculate which templates are still in use by MachineDeployments or MachineSets which are not in deleting state. + templatesInUse, err := calculateTemplatesInUse(md, msList) + if err != nil { + return ctrl.Result{}, err + } + + // Delete unused templates. + ref := ms.Spec.Template.Spec.Bootstrap.ConfigRef + if err := deleteTemplateIfUnused(ctx, r.Client, templatesInUse, ref); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete bootstrap template for %s", tlog.KObj{Obj: ms}) + } + ref = &ms.Spec.Template.Spec.InfrastructureRef + if err := deleteTemplateIfUnused(ctx, r.Client, templatesInUse, ref); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete infrastructure template for %s", tlog.KObj{Obj: ms}) + } + + // Remove the finalizer so the MachineSet can be garbage collected by Kubernetes. + patchHelper, err := patch.NewHelper(ms, r.Client) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: ms}) + } + controllerutil.RemoveFinalizer(ms, clusterv1.MachineSetTopologyFinalizer) + if err := patchHelper.Patch(ctx, ms); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: ms}) + } + + return ctrl.Result{}, nil +} + +// getMachineDeploymentName calculates the MachineDeployment name based on owner references. +func getMachineDeploymentName(ms *clusterv1.MachineSet) (*types.NamespacedName, error) { + for _, ref := range ms.OwnerReferences { + if ref.Kind != "MachineDeployment" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.Errorf("could not calculate MachineDeployment name for %s: invalid apiVersion %q: %v", + tlog.KObj{Obj: ms}, ref.APIVersion, err) + } + if gv.Group == clusterv1.GroupVersion.Group { + return &client.ObjectKey{Namespace: ms.Namespace, Name: ref.Name}, nil + } + } + + // Note: Once we set an owner reference to a MachineDeployment in a MachineSet it stays there + // and is not deleted when the MachineDeployment is deleted. So we assume there's something wrong, + // if we couldn't find a MachineDeployment owner reference. + return nil, errors.Errorf("could not calculate MachineDeployment name for %s", tlog.KObj{Obj: ms}) +} diff --git a/controllers/topology/machineset_controller_test.go b/controllers/topology/machineset_controller_test.go new file mode 100644 index 000000000000..6336166638ce --- /dev/null +++ b/controllers/topology/machineset_controller_test.go @@ -0,0 +1,188 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topology + +import ( + "testing" + + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/internal/testtypes" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +func TestMachineSetReconciler_ReconcileDelete(t *testing.T) { + deletionTimeStamp := metav1.Now() + + mdName := "md" + + msBT := testtypes.NewBootstrapTemplateBuilder(metav1.NamespaceDefault, "msBT").Build() + msIMT := testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msIMT").Build() + ms := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "ms"). + WithBootstrapTemplate(msBT). + WithInfrastructureTemplate(msIMT). + WithLabels(map[string]string{ + clusterv1.MachineDeploymentLabelName: mdName, + }). + Build() + ms.SetDeletionTimestamp(&deletionTimeStamp) + ms.SetOwnerReferences([]metav1.OwnerReference{ + { + Kind: "MachineDeployment", + APIVersion: clusterv1.GroupVersion.String(), + Name: "md", + }, + }) + + t.Run("Should delete templates of a MachineSet", func(t *testing.T) { + g := NewWithT(t) + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(ms, msBT, msIMT). + Build() + + r := &MachineSetReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, ms) + g.Expect(err).ToNot(HaveOccurred()) + + afterMS := &clusterv1.MachineSet{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(ms), afterMS)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMS, clusterv1.MachineSetTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, msBT)).To(BeFalse()) + g.Expect(templateExists(fakeClient, msIMT)).To(BeFalse()) + }) + + t.Run("Should delete infra template of a MachineSet without a bootstrap template", func(t *testing.T) { + g := NewWithT(t) + + msWithoutBootstrapTemplateIMT := testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msWithoutBootstrapTemplateIMT").Build() + msWithoutBootstrapTemplate := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "msWithoutBootstrapTemplate"). + WithInfrastructureTemplate(msWithoutBootstrapTemplateIMT). + WithLabels(map[string]string{ + clusterv1.MachineDeploymentLabelName: mdName, + }). + Build() + msWithoutBootstrapTemplate.SetDeletionTimestamp(&deletionTimeStamp) + msWithoutBootstrapTemplate.SetOwnerReferences([]metav1.OwnerReference{ + { + Kind: "MachineDeployment", + APIVersion: clusterv1.GroupVersion.String(), + Name: "md", + }, + }) + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(msWithoutBootstrapTemplate, msWithoutBootstrapTemplateIMT). + Build() + + r := &MachineSetReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, msWithoutBootstrapTemplate) + g.Expect(err).ToNot(HaveOccurred()) + + afterMS := &clusterv1.MachineSet{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(msWithoutBootstrapTemplate), afterMS)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMS, clusterv1.MachineSetTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, msWithoutBootstrapTemplateIMT)).To(BeFalse()) + }) + + t.Run("Should not delete templates of a MachineSet when they are still in use in a MachineDeployment", func(t *testing.T) { + g := NewWithT(t) + + md := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithBootstrapTemplate(msBT). + WithInfrastructureTemplate(msIMT). + Build() + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(md, ms, msBT, msIMT). + Build() + + r := &MachineSetReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, ms) + g.Expect(err).ToNot(HaveOccurred()) + + afterMS := &clusterv1.MachineSet{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(ms), afterMS)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMS, clusterv1.MachineSetTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, msBT)).To(BeTrue()) + g.Expect(templateExists(fakeClient, msIMT)).To(BeTrue()) + }) + + t.Run("Should not delete templates of a MachineSet when they are still in use in another MachineSet", func(t *testing.T) { + g := NewWithT(t) + + md := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithBootstrapTemplate(msBT). + WithInfrastructureTemplate(msIMT). + Build() + md.SetDeletionTimestamp(&deletionTimeStamp) + + // anotherMS is another MachineSet of the same MachineDeployment using the same templates. + // Because anotherMS is not in deleting, reconcileDelete should not delete the templates. + anotherMS := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "anotherMS"). + WithBootstrapTemplate(msBT). + WithInfrastructureTemplate(msIMT). + WithLabels(map[string]string{ + clusterv1.MachineDeploymentLabelName: mdName, + }). + Build() + anotherMS.SetOwnerReferences([]metav1.OwnerReference{ + { + Kind: "MachineDeployment", + APIVersion: clusterv1.GroupVersion.String(), + Name: "md", + }, + }) + + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(md, anotherMS, ms, msBT, msIMT). + Build() + + r := &MachineSetReconciler{ + Client: fakeClient, + APIReader: fakeClient, + } + _, err := r.reconcileDelete(ctx, ms) + g.Expect(err).ToNot(HaveOccurred()) + + afterMS := &clusterv1.MachineSet{} + g.Expect(fakeClient.Get(ctx, client.ObjectKeyFromObject(ms), afterMS)).To(Succeed()) + + g.Expect(controllerutil.ContainsFinalizer(afterMS, clusterv1.MachineSetTopologyFinalizer)).To(BeFalse()) + g.Expect(templateExists(fakeClient, msBT)).To(BeTrue()) + g.Expect(templateExists(fakeClient, msIMT)).To(BeTrue()) + }) +} diff --git a/controllers/topology/reconcile_state.go b/controllers/topology/reconcile_state.go index b6971e21b7b2..3af9360234ac 100644 --- a/controllers/topology/reconcile_state.go +++ b/controllers/topology/reconcile_state.go @@ -206,26 +206,24 @@ func (r *ClusterReconciler) updateMachineDeployment(ctx context.Context, cluster log := tlog.LoggerFrom(ctx).WithMachineDeployment(desiredMD.Object) ctx, _ = log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) - _, err := r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ + if _, err := r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ ref: &desiredMD.Object.Spec.Template.Spec.InfrastructureRef, current: currentMD.InfrastructureMachineTemplate, desired: desiredMD.InfrastructureMachineTemplate, templateNamePrefix: infrastructureMachineTemplateNamePrefix(clusterName, mdTopologyName), compatibilityChecker: check.ReferencedObjectsAreCompatible, - }) - if err != nil { + }); err != nil { return errors.Wrapf(err, "failed to update %s", tlog.KObj{Obj: currentMD.Object}) } ctx, _ = log.WithObject(desiredMD.BootstrapTemplate).Into(ctx) - _, err = r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ + if _, err := r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ ref: desiredMD.Object.Spec.Template.Spec.Bootstrap.ConfigRef, current: currentMD.BootstrapTemplate, desired: desiredMD.BootstrapTemplate, templateNamePrefix: bootstrapTemplateNamePrefix(clusterName, mdTopologyName), compatibilityChecker: check.ObjectsAreInTheSameNamespace, - }) - if err != nil { + }); err != nil { return errors.Wrapf(err, "failed to update %s", tlog.KObj{Obj: currentMD.Object}) } diff --git a/controllers/topology/util.go b/controllers/topology/util.go index 1e2f6132de1f..18a18c5b9408 100644 --- a/controllers/topology/util.go +++ b/controllers/topology/util.go @@ -22,9 +22,15 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/external" + tlog "sigs.k8s.io/cluster-api/controllers/topology/internal/log" utilconversion "sigs.k8s.io/cluster-api/util/conversion" + "sigs.k8s.io/controller-runtime/pkg/client" ) // bootstrapTemplateNamePrefix calculates the name prefix for a BootstrapTemplate. @@ -68,3 +74,108 @@ func refToUnstructured(ref *corev1.ObjectReference) *unstructured.Unstructured { uns.SetName(ref.Name) return uns } + +// getMachineSetsForDeployment returns a list of MachineSets associated with a MachineDeployment. +func getMachineSetsForDeployment(ctx context.Context, c client.Reader, md types.NamespacedName) ([]*clusterv1.MachineSet, error) { + // List MachineSets based on the MachineDeployment label. + msList := &clusterv1.MachineSetList{} + if err := c.List(ctx, msList, + client.InNamespace(md.Namespace), client.MatchingLabels{clusterv1.MachineDeploymentLabelName: md.Name}); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineSets for MachineDeployment/%s", md.Name) + } + + // Copy the MachineSets to an array of MachineSet pointers, to avoid MachineSet copying later. + res := make([]*clusterv1.MachineSet, 0, len(msList.Items)) + for i := range msList.Items { + res = append(res, &msList.Items[i]) + } + return res, nil +} + +// calculateTemplatesInUse returns all templates referenced in non-deleting MachineDeployment and MachineSets. +func calculateTemplatesInUse(md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) (map[string]bool, error) { + templatesInUse := map[string]bool{} + + // Templates of the MachineSet are still in use if the MachineSet is not in deleting state. + for _, ms := range msList { + if !ms.DeletionTimestamp.IsZero() { + continue + } + + bootstrapRef := ms.Spec.Template.Spec.Bootstrap.ConfigRef + infrastructureRef := &ms.Spec.Template.Spec.InfrastructureRef + if err := addTemplateRef(templatesInUse, bootstrapRef, infrastructureRef); err != nil { + return nil, errors.Wrapf(err, "failed to add templates of %s to templatesInUse", tlog.KObj{Obj: ms}) + } + } + + // If MachineDeployment has already been deleted or still exists and is in deleting state, then there + // are no templates referenced in the MachineDeployment which are still in use, so let's return here. + if md == nil || !md.DeletionTimestamp.IsZero() { + return templatesInUse, nil + } + + // Otherwise, the templates of the MachineDeployment are still in use. + bootstrapRef := md.Spec.Template.Spec.Bootstrap.ConfigRef + infrastructureRef := &md.Spec.Template.Spec.InfrastructureRef + if err := addTemplateRef(templatesInUse, bootstrapRef, infrastructureRef); err != nil { + return nil, errors.Wrapf(err, "failed to add templates of %s to templatesInUse", tlog.KObj{Obj: md}) + } + return templatesInUse, nil +} + +// deleteTemplateIfUnused deletes the template (ref), if it is not in use (i.e. in templatesInUse). +func deleteTemplateIfUnused(ctx context.Context, c client.Client, templatesInUse map[string]bool, ref *corev1.ObjectReference) error { + // If ref is nil, do nothing (this can happen, because bootstrap templates are optional). + if ref == nil { + return nil + } + + log := tlog.LoggerFrom(ctx).WithRef(ref) + + refID, err := templateRefID(ref) + if err != nil { + return errors.Wrapf(err, "failed to calculate templateRefID") + } + + // If the template is still in use, do nothing. + if templatesInUse[refID] { + log.V(3).Infof("Not deleting %s, because it's still in use", tlog.KRef{Ref: ref}) + return nil + } + + log.Infof("Deleting %s", tlog.KRef{Ref: ref}) + if err := external.Delete(ctx, c, ref); err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete %s", tlog.KRef{Ref: ref}) + } + return nil +} + +// addTemplateRef adds the refs to the refMap with the templateRefID as key. +func addTemplateRef(refMap map[string]bool, refs ...*corev1.ObjectReference) error { + for _, ref := range refs { + if ref != nil { + refID, err := templateRefID(ref) + if err != nil { + return errors.Wrapf(err, "failed to calculate templateRefID") + } + refMap[refID] = true + } + } + return nil +} + +// templateRefID returns the templateRefID of a ObjectReference in the format: g/k/name. +// Note: We don't include the version as references with different versions should be treated as equal. +func templateRefID(ref *corev1.ObjectReference) (string, error) { + if ref == nil { + return "", nil + } + + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return "", errors.Wrapf(err, "failed to parse apiVersion %q", ref.APIVersion) + } + + return fmt.Sprintf("%s/%s/%s", gv.Group, ref.Kind, ref.Name), nil +} diff --git a/controllers/topology/util_test.go b/controllers/topology/util_test.go index ca5f4de7734b..de8baa0d9cce 100644 --- a/controllers/topology/util_test.go +++ b/controllers/topology/util_test.go @@ -19,6 +19,8 @@ package topology import ( "testing" + "github.com/pkg/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/internal/testtypes" "github.com/google/go-cmp/cmp" @@ -114,3 +116,97 @@ func TestGetReference(t *testing.T) { }) } } + +func TestCalculateTemplatesInUse(t *testing.T) { + t.Run("Calculate templates in use with regular MachineDeployment and MachineSet", func(t *testing.T) { + g := NewWithT(t) + + md := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithBootstrapTemplate(testtypes.NewBootstrapTemplateBuilder(metav1.NamespaceDefault, "mdBT").Build()). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "mdIMT").Build()). + Build() + ms := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "ms"). + WithBootstrapTemplate(testtypes.NewBootstrapTemplateBuilder(metav1.NamespaceDefault, "msBT").Build()). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msIMT").Build()). + Build() + + actual, err := calculateTemplatesInUse(md, []*clusterv1.MachineSet{ms}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actual).To(HaveLen(4)) + + g.Expect(actual).To(HaveKey(mustTemplateRefID(md.Spec.Template.Spec.Bootstrap.ConfigRef))) + g.Expect(actual).To(HaveKey(mustTemplateRefID(&md.Spec.Template.Spec.InfrastructureRef))) + + g.Expect(actual).To(HaveKey(mustTemplateRefID(ms.Spec.Template.Spec.Bootstrap.ConfigRef))) + g.Expect(actual).To(HaveKey(mustTemplateRefID(&ms.Spec.Template.Spec.InfrastructureRef))) + }) + + t.Run("Calculate templates in use with MachineDeployment and MachineSet without BootstrapTemplate", func(t *testing.T) { + g := NewWithT(t) + + mdWithoutBootstrapTemplate := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "mdIMT").Build()). + Build() + msWithoutBootstrapTemplate := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "ms"). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msIMT").Build()). + Build() + + actual, err := calculateTemplatesInUse(mdWithoutBootstrapTemplate, []*clusterv1.MachineSet{msWithoutBootstrapTemplate}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actual).To(HaveLen(2)) + + g.Expect(actual).To(HaveKey(mustTemplateRefID(&mdWithoutBootstrapTemplate.Spec.Template.Spec.InfrastructureRef))) + + g.Expect(actual).To(HaveKey(mustTemplateRefID(&msWithoutBootstrapTemplate.Spec.Template.Spec.InfrastructureRef))) + }) + + t.Run("Calculate templates in use with MachineDeployment and MachineSet ignore templates when resources in deleting", func(t *testing.T) { + g := NewWithT(t) + + deletionTimeStamp := metav1.Now() + + mdInDeleting := testtypes.NewMachineDeploymentBuilder(metav1.NamespaceDefault, "md"). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "mdIMT").Build()). + Build() + mdInDeleting.SetDeletionTimestamp(&deletionTimeStamp) + + msInDeleting := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "ms"). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msIMT").Build()). + Build() + msInDeleting.SetDeletionTimestamp(&deletionTimeStamp) + + actual, err := calculateTemplatesInUse(mdInDeleting, []*clusterv1.MachineSet{msInDeleting}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actual).To(HaveLen(0)) + + g.Expect(actual).ToNot(HaveKey(mustTemplateRefID(&mdInDeleting.Spec.Template.Spec.InfrastructureRef))) + + g.Expect(actual).ToNot(HaveKey(mustTemplateRefID(&msInDeleting.Spec.Template.Spec.InfrastructureRef))) + }) + + t.Run("Calculate templates in use without MachineDeployment and with MachineSet", func(t *testing.T) { + g := NewWithT(t) + + ms := testtypes.NewMachineSetBuilder(metav1.NamespaceDefault, "ms"). + WithBootstrapTemplate(testtypes.NewBootstrapTemplateBuilder(metav1.NamespaceDefault, "msBT").Build()). + WithInfrastructureTemplate(testtypes.NewInfrastructureMachineTemplateBuilder(metav1.NamespaceDefault, "msIMT").Build()). + Build() + + actual, err := calculateTemplatesInUse(nil, []*clusterv1.MachineSet{ms}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(actual).To(HaveLen(2)) + + g.Expect(actual).To(HaveKey(mustTemplateRefID(ms.Spec.Template.Spec.Bootstrap.ConfigRef))) + g.Expect(actual).To(HaveKey(mustTemplateRefID(&ms.Spec.Template.Spec.InfrastructureRef))) + }) +} + +// mustTemplateRefID returns the templateRefID as calculated by templateRefID, but panics +// if templateRefID returns an error. +func mustTemplateRefID(ref *corev1.ObjectReference) string { + refID, err := templateRefID(ref) + if err != nil { + panic(errors.Wrap(err, "failed to calculate templateRefID")) + } + return refID +} diff --git a/internal/testtypes/builders.go b/internal/testtypes/builders.go index 5d5af0eb5caf..75a79e8a526f 100644 --- a/internal/testtypes/builders.go +++ b/internal/testtypes/builders.go @@ -522,6 +522,71 @@ func (m *MachineDeploymentBuilder) Build() *clusterv1.MachineDeployment { return obj } +// MachineSetBuilder holds the variables and objects needed to build a generic MachineSet. +type MachineSetBuilder struct { + namespace string + name string + bootstrapTemplate *unstructured.Unstructured + infrastructureTemplate *unstructured.Unstructured + replicas *int32 + labels map[string]string +} + +// NewMachineSetBuilder creates a MachineSetBuilder with the given name and namespace. +func NewMachineSetBuilder(namespace, name string) *MachineSetBuilder { + return &MachineSetBuilder{ + name: name, + namespace: namespace, + } +} + +// WithBootstrapTemplate adds the passed Unstructured object to the MachineSetBuilder as a bootstrapTemplate. +func (m *MachineSetBuilder) WithBootstrapTemplate(ref *unstructured.Unstructured) *MachineSetBuilder { + m.bootstrapTemplate = ref + return m +} + +// WithInfrastructureTemplate adds the passed unstructured object to the MachineSet builder as an infrastructureMachineTemplate. +func (m *MachineSetBuilder) WithInfrastructureTemplate(ref *unstructured.Unstructured) *MachineSetBuilder { + m.infrastructureTemplate = ref + return m +} + +// WithLabels adds the given labels to the MachineSetBuilder. +func (m *MachineSetBuilder) WithLabels(labels map[string]string) *MachineSetBuilder { + m.labels = labels + return m +} + +// WithReplicas sets the number of replicas for the MachineSetClassBuilder. +func (m *MachineSetBuilder) WithReplicas(replicas *int32) *MachineSetBuilder { + m.replicas = replicas + return m +} + +// Build creates a new MachineSet with the variables and objects passed to the MachineSetBuilder. +func (m *MachineSetBuilder) Build() *clusterv1.MachineSet { + obj := &clusterv1.MachineSet{ + TypeMeta: metav1.TypeMeta{ + Kind: "MachineSet", + APIVersion: clusterv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: m.name, + Namespace: m.namespace, + Labels: m.labels, + }, + } + obj.Spec.Replicas = m.replicas + if m.bootstrapTemplate != nil { + obj.Spec.Template.Spec.Bootstrap.ConfigRef = objToRef(m.bootstrapTemplate) + } + if m.infrastructureTemplate != nil { + obj.Spec.Template.Spec.InfrastructureRef = *objToRef(m.infrastructureTemplate) + } + return obj +} + // objToRef returns a reference to the given object. func objToRef(obj client.Object) *corev1.ObjectReference { gvk := obj.GetObjectKind().GroupVersionKind() diff --git a/main.go b/main.go index 66eeb92cc131..c7d3abc0ee88 100644 --- a/main.go +++ b/main.go @@ -284,6 +284,24 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { setupLog.Error(err, "unable to create controller", "controller", "ClusterTopology") os.Exit(1) } + + if err := (&topology.MachineDeploymentReconciler{ + Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), + WatchFilterValue: watchFilterValue, + }).SetupWithManager(ctx, mgr, controller.Options{}); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "MachineDeploymentTopology") + os.Exit(1) + } + + if err := (&topology.MachineSetReconciler{ + Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), + WatchFilterValue: watchFilterValue, + }).SetupWithManager(ctx, mgr, controller.Options{}); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "MachineSetTopology") + os.Exit(1) + } } if err := (&controllers.ClusterReconciler{ Client: mgr.GetClient(), diff --git a/util/labels/helpers.go b/util/labels/helpers.go index 187825204b9f..6de98c118929 100644 --- a/util/labels/helpers.go +++ b/util/labels/helpers.go @@ -22,6 +22,12 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" ) +// IsTopologyOwned returns true if the object has the `topology.cluster.x-k8s.io/owned` label. +func IsTopologyOwned(o metav1.Object) bool { + _, ok := o.GetLabels()[clusterv1.ClusterTopologyOwnedLabel] + return ok +} + // HasWatchLabel returns true if the object has a label with the WatchLabel key matching the given value. func HasWatchLabel(o metav1.Object, labelValue string) bool { val, ok := o.GetLabels()[clusterv1.WatchLabel] diff --git a/util/predicates/generic_predicates.go b/util/predicates/generic_predicates.go index 3a75a36d1738..47e33b0ca208 100644 --- a/util/predicates/generic_predicates.go +++ b/util/predicates/generic_predicates.go @@ -241,3 +241,35 @@ func processIfNotExternallyManaged(logger logr.Logger, obj client.Object) bool { log.V(6).Info("Resource is managed, will attempt to map resource") return true } + +// ResourceIsTopologyOwned returns a predicate that returns true only if the resource has +// the `topology.cluster.x-k8s.io/owned` label. +func ResourceIsTopologyOwned(logger logr.Logger) predicate.Funcs { + return predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + return processIfTopologyOwned(logger.WithValues("predicate", "updateEvent"), e.ObjectNew) + }, + CreateFunc: func(e event.CreateEvent) bool { + return processIfTopologyOwned(logger.WithValues("predicate", "createEvent"), e.Object) + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return processIfTopologyOwned(logger.WithValues("predicate", "deleteEvent"), e.Object) + }, + GenericFunc: func(e event.GenericEvent) bool { + return processIfTopologyOwned(logger.WithValues("predicate", "genericEvent"), e.Object) + }, + } +} + +func processIfTopologyOwned(logger logr.Logger, obj client.Object) bool { + kind := strings.ToLower(obj.GetObjectKind().GroupVersionKind().Kind) + log := logger.WithValues("namespace", obj.GetNamespace(), kind, obj.GetName()) + if labels.IsTopologyOwned(obj) { + log.V(6).Info("Resource is topology owned, will attempt to map resource") + return true + } + // We intentionally log this line only on level 6, because it will be very frequently + // logged for MachineDeployments and MachineSets not owned by a topology. + log.V(6).Info("Resource is not topology owned, will not attempt to map resource") + return false +} diff --git a/util/util.go b/util/util.go index 9fa0818617b7..a4d9de37794f 100644 --- a/util/util.go +++ b/util/util.go @@ -176,7 +176,7 @@ func GetClusterByName(ctx context.Context, c client.Client, namespace, name stri } if err := c.Get(ctx, key, cluster); err != nil { - return nil, err + return nil, errors.Wrapf(err, "failed to get Cluster/%s", name) } return cluster, nil