From e83192cff35b5f90747fa43bb02f6b60bd1ac594 Mon Sep 17 00:00:00 2001 From: Kashif Khan Date: Tue, 13 Aug 2024 15:25:13 +0300 Subject: [PATCH] Add ClusterCacheTracker Signed-off-by: Kashif Khan --- controllers/metal3cluster_controller.go | 20 ++++++- controllers/metal3data_controller.go | 2 + controllers/metal3datatemplate_controller.go | 2 + controllers/metal3labelsync_controller.go | 2 + controllers/metal3machine_controller.go | 2 + .../metal3machinetemplate_controller.go | 2 + controllers/metal3remediation_controller.go | 2 + main.go | 55 ++++++++++++++++++- 8 files changed, 84 insertions(+), 3 deletions(-) diff --git a/controllers/metal3cluster_controller.go b/controllers/metal3cluster_controller.go index 7ae22201f2..70107cf438 100644 --- a/controllers/metal3cluster_controller.go +++ b/controllers/metal3cluster_controller.go @@ -28,6 +28,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" @@ -53,6 +54,7 @@ const ( // Metal3ClusterReconciler reconciles a Metal3Cluster object. type Metal3ClusterReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger WatchFilterValue string @@ -124,11 +126,25 @@ func (r *Metal3ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques // Handle deleted clusters if !metal3Cluster.DeletionTimestamp.IsZero() { - return reconcileDelete(ctx, clusterMgr) + res, err := reconcileDelete(ctx, clusterMgr) + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + if errors.Is(err, remote.ErrClusterLocked) { + clusterLog.Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + return res, err } // Handle non-deleted clusters - return reconcileNormal(ctx, clusterMgr) + res, err := reconcileNormal(ctx, clusterMgr) + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + if errors.Is(err, remote.ErrClusterLocked) { + clusterLog.Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + return res, err } func patchMetal3Cluster(ctx context.Context, patchHelper *patch.Helper, metal3Cluster *infrav1.Metal3Cluster, options ...patch.Option) error { diff --git a/controllers/metal3data_controller.go b/controllers/metal3data_controller.go index 077e89bdc4..ede644f44f 100644 --- a/controllers/metal3data_controller.go +++ b/controllers/metal3data_controller.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/patch" @@ -45,6 +46,7 @@ const ( // Metal3DataReconciler reconciles a Metal3Data object. type Metal3DataReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger WatchFilterValue string diff --git a/controllers/metal3datatemplate_controller.go b/controllers/metal3datatemplate_controller.go index d23a6fed64..d5d136e43f 100644 --- a/controllers/metal3datatemplate_controller.go +++ b/controllers/metal3datatemplate_controller.go @@ -26,6 +26,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" @@ -43,6 +44,7 @@ const ( // Metal3DataTemplateReconciler reconciles a Metal3DataTemplate object. type Metal3DataTemplateReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger WatchFilterValue string diff --git a/controllers/metal3labelsync_controller.go b/controllers/metal3labelsync_controller.go index fccff77ef8..686cc555cc 100644 --- a/controllers/metal3labelsync_controller.go +++ b/controllers/metal3labelsync_controller.go @@ -35,6 +35,7 @@ import ( "k8s.io/client-go/tools/cache" k8strings "k8s.io/utils/strings" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/patch" @@ -60,6 +61,7 @@ const ( // Metal3LabelSyncReconciler reconciles label updates to BareMetalHost objects with the corresponding K Node objects in the workload cluster. type Metal3LabelSyncReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger CapiClientGetter baremetal.ClientGetter diff --git a/controllers/metal3machine_controller.go b/controllers/metal3machine_controller.go index 3737ea3e96..fac410724d 100644 --- a/controllers/metal3machine_controller.go +++ b/controllers/metal3machine_controller.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" @@ -50,6 +51,7 @@ const ( type Metal3MachineReconciler struct { Client client.Client ManagerFactory baremetal.ManagerFactoryInterface + Tracker *remote.ClusterCacheTracker Log logr.Logger CapiClientGetter baremetal.ClientGetter WatchFilterValue string diff --git a/controllers/metal3machinetemplate_controller.go b/controllers/metal3machinetemplate_controller.go index 63cbd6603c..f94f5a142a 100644 --- a/controllers/metal3machinetemplate_controller.go +++ b/controllers/metal3machinetemplate_controller.go @@ -23,6 +23,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" @@ -45,6 +46,7 @@ const ( // Metal3MachineTemplateReconciler reconciles a Metal3MachineTemplate object. type Metal3MachineTemplateReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger WatchFilterValue string diff --git a/controllers/metal3remediation_controller.go b/controllers/metal3remediation_controller.go index 60a403502c..0f06aa4be9 100644 --- a/controllers/metal3remediation_controller.go +++ b/controllers/metal3remediation_controller.go @@ -29,6 +29,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/client-go/kubernetes/typed/core/v1" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" @@ -39,6 +40,7 @@ import ( // Metal3RemediationReconciler reconciles a Metal3Remediation object. type Metal3RemediationReconciler struct { client.Client + Tracker *remote.ClusterCacheTracker ManagerFactory baremetal.ManagerFactoryInterface Log logr.Logger IsOutOfServiceTaintEnabled bool diff --git a/main.go b/main.go index 7c0f384ccc..ed982ac531 100644 --- a/main.go +++ b/main.go @@ -48,6 +48,7 @@ import ( _ "k8s.io/component-base/logs/json/register" "k8s.io/klog/v2/klogr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" caipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1alpha1" "sigs.k8s.io/cluster-api/util/flags" ctrl "sigs.k8s.io/controller-runtime" @@ -67,12 +68,15 @@ const ( var ( myscheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") + controllerName = "cluster-api-provider-metal3-manager" waitForMetal3Controller = false enableLeaderElection bool leaderElectionLeaseDuration time.Duration leaderElectionRenewDeadline time.Duration leaderElectionRetryPeriod time.Duration syncPeriod time.Duration + clusterCacheTrackerClientQPS float32 + clusterCacheTrackerClientBurst int metal3MachineConcurrency int metal3ClusterConcurrency int metal3DataTemplateConcurrency int @@ -120,7 +124,7 @@ func main() { restConfig := ctrl.GetConfigOrDie() restConfig.QPS = restConfigQPS restConfig.Burst = restConfigBurst - restConfig.UserAgent = "cluster-api-provider-metal3-manager" + restConfig.UserAgent = "controllerName" tlsOptions, metricsOptions, err := flags.GetManagerOptions(managerOptions) if err != nil { @@ -267,6 +271,20 @@ func initFlags(fs *pflag.FlagSet) { "The minimum interval at which watched resources are reconciled (e.g. 15m)", ) + fs.Float32Var( + &clusterCacheTrackerClientQPS, + "clustercachetracker-client-qps", + 20, + "Maximum queries per second from the cluster cache tracker clients to the Kubernetes API server of workload clusters.", + ) + + fs.IntVar( + &clusterCacheTrackerClientBurst, + "clustercachetracker-client-burst", + 30, + "Maximum number of queries that should be allowed in one burst from the cluster cache tracker clients to the Kubernetes API server of workload clusters.", + ) + fs.IntVar( &webhookPort, "webhook-port", @@ -356,8 +374,37 @@ func setupChecks(mgr ctrl.Manager) { } func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { + secretCachingClient, err := client.New(mgr.GetConfig(), client.Options{ + HTTPClient: mgr.GetHTTPClient(), + Cache: &client.CacheOptions{ + Reader: mgr.GetCache(), + }, + }) + if err != nil { + setupLog.Error(err, "unable to create secret caching client") + os.Exit(1) + } + + // Set up a ClusterCacheTracker and ClusterCacheReconciler to provide to controllers + // requiring a connection to a remote cluster + tracker, err := remote.NewClusterCacheTracker( + mgr, + remote.ClusterCacheTrackerOptions{ + SecretCachingClient: secretCachingClient, + ControllerName: controllerName, + Log: &ctrl.Log, + ClientQPS: clusterCacheTrackerClientQPS, + ClientBurst: clusterCacheTrackerClientBurst, + }, + ) + if err != nil { + setupLog.Error(err, "Unable to create cluster cache tracker") + os.Exit(1) + } + if err := (&controllers.Metal3MachineReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3Machine"), CapiClientGetter: infraremote.NewClusterClient, @@ -369,6 +416,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { if err := (&controllers.Metal3ClusterReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3Cluster"), WatchFilterValue: watchFilterValue, @@ -379,6 +427,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { if err := (&controllers.Metal3DataTemplateReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3DataTemplate"), WatchFilterValue: watchFilterValue, @@ -389,6 +438,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { if err := (&controllers.Metal3DataReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3Data"), WatchFilterValue: watchFilterValue, @@ -399,6 +449,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { if err := (&controllers.Metal3LabelSyncReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3LabelSync"), CapiClientGetter: infraremote.NewClusterClient, @@ -409,6 +460,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { if err := (&controllers.Metal3MachineTemplateReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3MachineTemplate"), }).SetupWithManager(ctx, mgr, concurrency(metal3MachineTemplateConcurrency)); err != nil { @@ -422,6 +474,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { } if err := (&controllers.Metal3RemediationReconciler{ Client: mgr.GetClient(), + Tracker: tracker, ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), Log: ctrl.Log.WithName("controllers").WithName("Metal3Remediation"), IsOutOfServiceTaintEnabled: isOOSTSupported,