From 479ca19b2987d9ebf0d59645b51d1947bd38edda Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Wed, 21 Jun 2023 12:48:36 +0200 Subject: [PATCH] Improve KCP to use only one workload cluster for each reconcile --- .../kubeadm/internal/control_plane.go | 31 ++- .../internal/controllers/controller.go | 221 ++++++++++-------- .../internal/controllers/controller_test.go | 95 ++++++-- .../kubeadm/internal/controllers/helpers.go | 12 +- .../internal/controllers/helpers_test.go | 30 ++- .../internal/controllers/remediation.go | 9 +- .../internal/controllers/remediation_test.go | 21 ++ .../kubeadm/internal/controllers/scale.go | 31 ++- .../internal/controllers/scale_test.go | 19 +- .../kubeadm/internal/controllers/status.go | 70 +++--- .../internal/controllers/status_test.go | 44 +++- .../internal/controllers/suite_test.go | 8 +- .../kubeadm/internal/controllers/upgrade.go | 43 ++-- .../internal/controllers/upgrade_test.go | 22 +- 14 files changed, 423 insertions(+), 233 deletions(-) diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 0a255bd61fb5..7e7ba17d65e7 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -51,10 +51,13 @@ type ControlPlane struct { // See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405 KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig InfraResources map[string]*unstructured.Unstructured + + managementCluster ManagementCluster + workloadCluster WorkloadCluster } // NewControlPlane returns an instantiated ControlPlane. -func NewControlPlane(ctx context.Context, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) { +func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) { infraObjects, err := getInfraResources(ctx, client, ownedMachines) if err != nil { return nil, err @@ -80,6 +83,7 @@ func NewControlPlane(ctx context.Context, client client.Client, cluster *cluster KubeadmConfigs: kubeadmConfigs, InfraResources: infraObjects, reconciliationTime: metav1.Now(), + managementCluster: managementCluster, }, nil } @@ -268,3 +272,28 @@ func (c *ControlPlane) SetPatchHelpers(patchHelpers map[string]*patch.Helper) { c.machinesPatchHelpers[machineName] = patchHelper } } + +// GetWorkloadCluster builds a cluster object. +// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine. +func (c *ControlPlane) GetWorkloadCluster(ctx context.Context) (WorkloadCluster, error) { + if c.workloadCluster != nil { + return c.workloadCluster, nil + } + + workloadCluster, err := c.managementCluster.GetWorkloadCluster(ctx, client.ObjectKeyFromObject(c.Cluster)) + if err != nil { + return nil, err + } + c.workloadCluster = workloadCluster + return c.workloadCluster, nil +} + +// InjectTestManagementCluster allows to inject a test ManagementCluster during tests. +// NOTE: This approach allows to keep the managementCluster field private, which will +// prevent people from using managementCluster.GetWorkloadCluster because it creates a new +// instance of WorkloadCluster at every call. People instead should use ControlPlane.GetWorkloadCluster +// that creates only a single instance of WorkloadCluster for each reconcile. +func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementCluster) { + c.managementCluster = managementCluster + c.workloadCluster = nil +} diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index c55791bec6f9..f52825cdeaf2 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -188,9 +188,21 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. return ctrl.Result{}, nil } + // Initialize the control plane scope; this includes also checking for orphan machines and + // adopt them if necessary. + controlPlane, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + if err != nil { + return ctrl.Result{}, err + } + if adoptableMachineFound { + // if there are no errors but at least one CP machine has been adopted, then requeue and + // wait for the update event for the ownership to be set. + return ctrl.Result{}, nil + } + defer func() { // Always attempt to update status. - if err := r.updateStatus(ctx, kcp, cluster); err != nil { + if err := r.updateStatus(ctx, controlPlane); err != nil { var connFailure *internal.RemoteClusterConnectionError if errors.As(err, &connFailure) { log.Info("Could not connect to workload cluster to fetch status", "err", err.Error()) @@ -218,7 +230,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. if !kcp.ObjectMeta.DeletionTimestamp.IsZero() { // Handle deletion reconciliation loop. - res, err = r.reconcileDelete(ctx, cluster, kcp) + res, err = r.reconcileDelete(ctx, controlPlane) // Requeue if the reconcile failed because the ClusterCacheTracker was locked for // the current cluster because of concurrent access. if errors.Is(err, remote.ErrClusterLocked) { @@ -229,7 +241,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. } // Handle normal reconciliation loop. - res, err = r.reconcile(ctx, cluster, kcp) + res, err = r.reconcile(ctx, controlPlane) // Requeue if the reconcile failed because the ClusterCacheTracker was locked for // the current cluster because of concurrent access. if errors.Is(err, remote.ErrClusterLocked) { @@ -239,6 +251,51 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. return res, err } +// initControlPlaneScope initializes the control plane scope; this includes also checking for orphan machines and +// adopt them if necessary. +// The func also returns a boolean indicating if adoptableMachine have been found and processed, but this doesn't imply those machines +// have been actually adopted). +func (r *KubeadmControlPlaneReconciler) initControlPlaneScope(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (*internal.ControlPlane, bool, error) { + log := ctrl.LoggerFrom(ctx) + + // Return early if the cluster is not yet in a state where control plane machines exists + if !cluster.Status.InfrastructureReady || !cluster.Spec.ControlPlaneEndpoint.IsValid() { + controlPlane, err := internal.NewControlPlane(ctx, r.managementCluster, r.Client, cluster, kcp, collections.Machines{}) + if err != nil { + log.Error(err, "failed to initialize control plane scope") + return nil, false, err + } + return controlPlane, false, nil + } + + // Read control plane machines + controlPlaneMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, cluster, collections.ControlPlaneMachines(cluster.Name)) + if err != nil { + log.Error(err, "failed to retrieve control plane machines for cluster") + return nil, false, err + } + + // If we are not deleting the CP, adopt stand alone CP machines if any + adoptableMachines := controlPlaneMachines.Filter(collections.AdoptableControlPlaneMachines(cluster.Name)) + if kcp.ObjectMeta.DeletionTimestamp.IsZero() && len(adoptableMachines) > 0 { + return nil, true, r.adoptMachines(ctx, kcp, adoptableMachines, cluster) + } + + ownedMachines := controlPlaneMachines.Filter(collections.OwnedMachines(kcp)) + if kcp.ObjectMeta.DeletionTimestamp.IsZero() && len(ownedMachines) != len(controlPlaneMachines) { + err := errors.New("not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode") + log.Error(err, "KCP cannot reconcile") + return nil, false, err + } + + controlPlane, err := internal.NewControlPlane(ctx, r.managementCluster, r.Client, cluster, kcp, ownedMachines) + if err != nil { + log.Error(err, "failed to initialize control plane scope") + return nil, false, err + } + return controlPlane, false, nil +} + func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kcp *controlplanev1.KubeadmControlPlane) error { // Always update the readyCondition by summarizing the state of other conditions. conditions.SetSummary(kcp, @@ -270,78 +327,40 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc } // reconcile handles KubeadmControlPlane reconciliation. -func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (res ctrl.Result, reterr error) { +func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPlane *internal.ControlPlane) (res ctrl.Result, reterr error) { log := ctrl.LoggerFrom(ctx) log.Info("Reconcile KubeadmControlPlane") // Make sure to reconcile the external infrastructure reference. - if err := r.reconcileExternalReference(ctx, cluster, &kcp.Spec.MachineTemplate.InfrastructureRef); err != nil { + if err := r.reconcileExternalReference(ctx, controlPlane.Cluster, &controlPlane.KCP.Spec.MachineTemplate.InfrastructureRef); err != nil { return ctrl.Result{}, err } // Wait for the cluster infrastructure to be ready before creating machines - if !cluster.Status.InfrastructureReady { + if !controlPlane.Cluster.Status.InfrastructureReady { log.Info("Cluster infrastructure is not ready yet") return ctrl.Result{}, nil } - // Generate Cluster Certificates if needed - config := kcp.Spec.KubeadmConfigSpec.DeepCopy() - config.JoinConfiguration = nil - if config.ClusterConfiguration == nil { - config.ClusterConfiguration = &bootstrapv1.ClusterConfiguration{} - } - certificates := secret.NewCertificatesForInitialControlPlane(config.ClusterConfiguration) - controllerRef := metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind)) - if err := certificates.LookupOrGenerate(ctx, r.Client, util.ObjectKey(cluster), *controllerRef); err != nil { - log.Error(err, "unable to lookup or create cluster certificates") - conditions.MarkFalse(kcp, controlplanev1.CertificatesAvailableCondition, controlplanev1.CertificatesGenerationFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - return ctrl.Result{}, err + // Reconcile cluster certificates. + if result, err := r.reconcileClusterCertificates(ctx, controlPlane); !result.IsZero() || err != nil { + return result, err } - conditions.MarkTrue(kcp, controlplanev1.CertificatesAvailableCondition) // If ControlPlaneEndpoint is not set, return early - if !cluster.Spec.ControlPlaneEndpoint.IsValid() { + if !controlPlane.Cluster.Spec.ControlPlaneEndpoint.IsValid() { log.Info("Cluster does not yet have a ControlPlaneEndpoint defined") return ctrl.Result{}, nil } // Generate Cluster Kubeconfig if needed - if result, err := r.reconcileKubeconfig(ctx, cluster, kcp); !result.IsZero() || err != nil { + if result, err := r.reconcileKubeconfig(ctx, controlPlane); !result.IsZero() || err != nil { if err != nil { log.Error(err, "failed to reconcile Kubeconfig") } return result, err } - controlPlaneMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, cluster, collections.ControlPlaneMachines(cluster.Name)) - if err != nil { - log.Error(err, "failed to retrieve control plane machines for cluster") - return ctrl.Result{}, err - } - - adoptableMachines := controlPlaneMachines.Filter(collections.AdoptableControlPlaneMachines(cluster.Name)) - if len(adoptableMachines) > 0 { - // We adopt the Machines and then wait for the update event for the ownership reference to re-queue them so the cache is up-to-date - err = r.adoptMachines(ctx, kcp, adoptableMachines, cluster) - return ctrl.Result{}, err - } - if err := ensureCertificatesOwnerRef(ctx, r.Client, util.ObjectKey(cluster), certificates, *controllerRef); err != nil { - return ctrl.Result{}, err - } - - ownedMachines := controlPlaneMachines.Filter(collections.OwnedMachines(kcp)) - if len(ownedMachines) != len(controlPlaneMachines) { - log.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode") - return ctrl.Result{}, nil - } - - controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines) - if err != nil { - log.Error(err, "failed to initialize control plane") - return ctrl.Result{}, err - } - if !r.disableInPlacePropagation { if err := r.syncMachines(ctx, controlPlane); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines") @@ -350,7 +369,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // Aggregate the operational state of all the machines; while aggregating we are adding the // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. - conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) + conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution. @@ -376,7 +395,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * case len(needRollout) > 0: log.Info("Rolling out Control Plane machines", "needRollout", needRollout.Names()) conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(controlPlane.Machines)-len(needRollout)) - return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needRollout) + return r.upgradeControlPlane(ctx, controlPlane, needRollout) default: // make sure last upgrade operation is marked as completed. // NOTE: we are checking the condition already exists in order to avoid to set this condition at the first @@ -387,8 +406,8 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * } // If we've made it this far, we can assume that all ownedMachines are up to date - numMachines := len(ownedMachines) - desiredReplicas := int(*kcp.Spec.Replicas) + numMachines := len(controlPlane.Machines) + desiredReplicas := int(*controlPlane.KCP.Spec.Replicas) switch { // We are creating the first replica @@ -396,21 +415,21 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // Create new Machine w/ init log.Info("Initializing control plane", "Desired", desiredReplicas, "Existing", numMachines) conditions.MarkFalse(controlPlane.KCP, controlplanev1.AvailableCondition, controlplanev1.WaitingForKubeadmInitReason, clusterv1.ConditionSeverityInfo, "") - return r.initializeControlPlane(ctx, cluster, kcp, controlPlane) + return r.initializeControlPlane(ctx, controlPlane) // We are scaling up case numMachines < desiredReplicas && numMachines > 0: // Create a new Machine w/ join log.Info("Scaling up control plane", "Desired", desiredReplicas, "Existing", numMachines) - return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) + return r.scaleUpControlPlane(ctx, controlPlane) // We are scaling down case numMachines > desiredReplicas: log.Info("Scaling down control plane", "Desired", desiredReplicas, "Existing", numMachines) // The last parameter (i.e. machines needing to be rolled out) should always be empty here. - return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane, collections.Machines{}) + return r.scaleDownControlPlane(ctx, controlPlane, collections.Machines{}) } // Get the workload cluster client. - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { log.V(2).Info("cannot get remote client to workload cluster, will requeue", "cause", err) return ctrl.Result{Requeue: true}, nil @@ -423,19 +442,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // We intentionally only parse major/minor/patch so that the subsequent code // also already applies to beta versions of new releases. - parsedVersion, err := version.ParseMajorMinorPatchTolerant(kcp.Spec.Version) + parsedVersion, err := version.ParseMajorMinorPatchTolerant(controlPlane.KCP.Spec.Version) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) } // Update kube-proxy daemonset. - if err := workloadCluster.UpdateKubeProxyImageInfo(ctx, kcp, parsedVersion); err != nil { + if err := workloadCluster.UpdateKubeProxyImageInfo(ctx, controlPlane.KCP, parsedVersion); err != nil { log.Error(err, "failed to update kube-proxy daemonset") return ctrl.Result{}, err } // Update CoreDNS deployment. - if err := workloadCluster.UpdateCoreDNS(ctx, kcp, parsedVersion); err != nil { + if err := workloadCluster.UpdateCoreDNS(ctx, controlPlane.KCP, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update CoreDNS deployment") } @@ -450,32 +469,46 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * return ctrl.Result{}, nil } +// reconcileClusterCertificates ensures that all the cluster certificates exists and +// enforces all the expected owner ref on them. +func (r *KubeadmControlPlaneReconciler) reconcileClusterCertificates(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + // Generate Cluster Certificates if needed + config := controlPlane.KCP.Spec.KubeadmConfigSpec.DeepCopy() + config.JoinConfiguration = nil + if config.ClusterConfiguration == nil { + config.ClusterConfiguration = &bootstrapv1.ClusterConfiguration{} + } + certificates := secret.NewCertificatesForInitialControlPlane(config.ClusterConfiguration) + controllerRef := metav1.NewControllerRef(controlPlane.KCP, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind)) + if err := certificates.LookupOrGenerate(ctx, r.Client, util.ObjectKey(controlPlane.Cluster), *controllerRef); err != nil { + log.Error(err, "unable to lookup or create cluster certificates") + conditions.MarkFalse(controlPlane.KCP, controlplanev1.CertificatesAvailableCondition, controlplanev1.CertificatesGenerationFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + return ctrl.Result{}, err + } + + if err := r.ensureCertificatesOwnerRef(ctx, util.ObjectKey(controlPlane.Cluster), certificates, *controllerRef); err != nil { + return ctrl.Result{}, err + } + + conditions.MarkTrue(controlPlane.KCP, controlplanev1.CertificatesAvailableCondition) + return ctrl.Result{}, nil +} + // reconcileDelete handles KubeadmControlPlane deletion. // The implementation does not take non-control plane workloads into consideration. This may or may not change in the future. // Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064. -func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (ctrl.Result, error) { +func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) log.Info("Reconcile KubeadmControlPlane deletion") - // Gets all machines, not just control plane machines. - allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, cluster) - if err != nil { - return ctrl.Result{}, err - } - ownedMachines := allMachines.Filter(collections.OwnedMachines(kcp)) - // If no control plane machines remain, remove the finalizer - if len(ownedMachines) == 0 { - controllerutil.RemoveFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer) + if len(controlPlane.Machines) == 0 { + controllerutil.RemoveFinalizer(controlPlane.KCP, controlplanev1.KubeadmControlPlaneFinalizer) return ctrl.Result{}, nil } - controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines) - if err != nil { - log.Error(err, "failed to initialize control plane") - return ctrl.Result{}, err - } - // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Ignoring failures given that we are deleting if _, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil { @@ -486,25 +519,31 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. // However, during delete we are hiding the counter (1 of x) because it does not make sense given that // all the machines are deleted in parallel. - conditions.SetAggregate(kcp, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) + conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) + + // Gets all machines, not just control plane machines. + allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, controlPlane.Cluster) + if err != nil { + return ctrl.Result{}, err + } allMachinePools := &expv1.MachinePoolList{} // Get all machine pools. if feature.Gates.Enabled(feature.MachinePool) { - allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, cluster) + allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, controlPlane.Cluster) if err != nil { return ctrl.Result{}, err } } // Verify that only control plane machines remain - if len(allMachines) != len(ownedMachines) || len(allMachinePools.Items) != 0 { + if len(allMachines) != len(controlPlane.Machines) || len(allMachinePools.Items) != 0 { log.Info("Waiting for worker nodes to be deleted first") - conditions.MarkFalse(kcp, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first") + conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first") return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil } // Delete control plane machines in parallel - machinesToDelete := ownedMachines.Filter(collections.Not(collections.HasDeletionTimestamp)) + machinesToDelete := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) var errs []error for i := range machinesToDelete { m := machinesToDelete[i] @@ -516,11 +555,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu } if len(errs) > 0 { err := kerrors.NewAggregate(errs) - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedDelete", - "Failed to delete control plane Machines for cluster %s/%s control plane: %v", cluster.Namespace, cluster.Name, err) + r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedDelete", + "Failed to delete control plane Machines for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err) return ctrl.Result{}, err } - conditions.MarkFalse(kcp, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil } @@ -632,7 +671,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx cont return ctrl.Result{}, nil } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster") } @@ -683,7 +722,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context return ctrl.Result{}, nil } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { // Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss. return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster") @@ -722,7 +761,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context // Ignore machines which are being deleted. machines := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to reconcile certificate expiries: cannot get remote client to workload cluster") } @@ -889,15 +928,15 @@ func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, k } // ensureCertificatesOwnerRef ensures an ownerReference to the owner is added on the Secrets holding certificates. -func ensureCertificatesOwnerRef(ctx context.Context, ctrlclient client.Client, clusterKey client.ObjectKey, certificates secret.Certificates, owner metav1.OwnerReference) error { +func (r *KubeadmControlPlaneReconciler) ensureCertificatesOwnerRef(ctx context.Context, clusterKey client.ObjectKey, certificates secret.Certificates, owner metav1.OwnerReference) error { for _, c := range certificates { s := &corev1.Secret{} secretKey := client.ObjectKey{Namespace: clusterKey.Namespace, Name: secret.Name(clusterKey.Name, c.Purpose)} - if err := ctrlclient.Get(ctx, secretKey, s); err != nil { + if err := r.Client.Get(ctx, secretKey, s); err != nil { return errors.Wrapf(err, "failed to get Secret %s", secretKey) } - patchHelper, err := patch.NewHelper(s, ctrlclient) + patchHelper, err := patch.NewHelper(s, r.Client) if err != nil { return errors.Wrapf(err, "failed to create patchHelper for Secret %s", secretKey) } diff --git a/controlplane/kubeadm/internal/controllers/controller_test.go b/controlplane/kubeadm/internal/controllers/controller_test.go index 79e698e64625..8952b43594a8 100644 --- a/controlplane/kubeadm/internal/controllers/controller_test.go +++ b/controlplane/kubeadm/internal/controllers/controller_test.go @@ -520,7 +520,9 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { managementClusterUncached: fmc, } - g.Expect(r.reconcile(ctx, cluster, kcp)).To(Equal(ctrl.Result{})) + _, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(adoptableMachineFound).To(BeTrue()) machineList := &clusterv1.MachineList{} g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) @@ -614,7 +616,9 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { managementClusterUncached: fmc, } - g.Expect(r.reconcile(ctx, cluster, kcp)).To(Equal(ctrl.Result{})) + _, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(adoptableMachineFound).To(BeTrue()) machineList := &clusterv1.MachineList{} g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) @@ -698,10 +702,9 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { managementClusterUncached: fmc, } - result, err := r.reconcile(ctx, cluster, kcp) - g.Expect(result).To(Equal(ctrl.Result{})) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("has just been deleted")) + _, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(adoptableMachineFound).To(BeFalse()) machineList := &clusterv1.MachineList{} g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) @@ -711,7 +714,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { } }) - t.Run("refuses to adopt Machines that are more than one version old", func(t *testing.T) { + t.Run("Do not adopt Machines that are more than one version old", func(t *testing.T) { g := NewWithT(t) cluster, kcp, tmpl := createClusterWithControlPlane(metav1.NamespaceDefault) @@ -753,7 +756,10 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { managementClusterUncached: fmc, } - g.Expect(r.reconcile(ctx, cluster, kcp)).To(Equal(ctrl.Result{})) + _, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(adoptableMachineFound).To(BeTrue()) + // Message: Warning AdoptionFailed Could not adopt Machine test/test0: its version ("v1.15.0") is outside supported +/- one minor version skew from KCP's ("v1.17.0") g.Expect(recorder.Events).To(Receive(ContainSubstring("minor version"))) @@ -818,7 +824,8 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { fakeClient := newFakeClient(objs...) - err = ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) + r := KubeadmControlPlaneReconciler{Client: fakeClient} + err = r.ensureCertificatesOwnerRef(ctx, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -858,7 +865,9 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { } fakeClient := newFakeClient(objs...) - err := ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) + + r := KubeadmControlPlaneReconciler{Client: fakeClient} + err := r.ensureCertificatesOwnerRef(ctx, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -901,7 +910,9 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { } fakeClient := newFakeClient(objs...) - err := ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) + + r := KubeadmControlPlaneReconciler{Client: fakeClient} + err := r.ensureCertificatesOwnerRef(ctx, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -1088,18 +1099,20 @@ func TestReconcileCertificateExpiries(t *testing.T) { machineWithoutNodeRefKubeadmConfig, ) - controlPlane, err := internal.NewControlPlane(ctx, fakeClient, cluster, kcp, ownedMachines) - g.Expect(err).ToNot(HaveOccurred()) + managementCluster := &fakeManagementCluster{ + Workload: fakeWorkloadCluster{ + APIServerCertificateExpiry: &detectedExpiry, + }, + } r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: &fakeManagementCluster{ - Workload: fakeWorkloadCluster{ - APIServerCertificateExpiry: &detectedExpiry, - }, - }, + Client: fakeClient, + managementCluster: managementCluster, } + controlPlane, err := internal.NewControlPlane(ctx, managementCluster, fakeClient, cluster, kcp, ownedMachines) + g.Expect(err).ToNot(HaveOccurred()) + _, err = r.reconcileCertificateExpiries(ctx, controlPlane) g.Expect(err).NotTo(HaveOccurred()) @@ -2014,9 +2027,11 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer) initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()} + machines := collections.New() for i := 0; i < 3; i++ { m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true) initObjs = append(initObjs, m) + machines.Insert(m) } fakeClient := newFakeClient(initObjs...) @@ -2031,7 +2046,13 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileDelete(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + + result, err := r.reconcileDelete(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter})) g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) @@ -2040,7 +2061,12 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { g.Expect(fakeClient.List(ctx, &controlPlaneMachines)).To(Succeed()) g.Expect(controlPlaneMachines.Items).To(BeEmpty()) - result, err = r.reconcileDelete(ctx, cluster, kcp) + controlPlane = &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err = r.reconcileDelete(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{})) g.Expect(err).NotTo(HaveOccurred()) g.Expect(kcp.Finalizers).To(BeEmpty()) @@ -2064,9 +2090,11 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachine.DeepCopy()} + machines := collections.New() for i := 0; i < 3; i++ { m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true) initObjs = append(initObjs, m) + machines.Insert(m) } fakeClient := newFakeClient(initObjs...) @@ -2080,7 +2108,13 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileDelete(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + + result, err := r.reconcileDelete(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter})) g.Expect(err).ToNot(HaveOccurred()) @@ -2113,9 +2147,11 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachinePool.DeepCopy()} + machines := collections.New() for i := 0; i < 3; i++ { m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true) initObjs = append(initObjs, m) + machines.Insert(m) } fakeClient := newFakeClient(initObjs...) @@ -2129,7 +2165,13 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileDelete(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + + result, err := r.reconcileDelete(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter})) g.Expect(err).ToNot(HaveOccurred()) @@ -2160,7 +2202,12 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileDelete(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err := r.reconcileDelete(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{})) g.Expect(err).NotTo(HaveOccurred()) g.Expect(kcp.Finalizers).To(BeEmpty()) diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index 67562ecaf7cf..5083c04c55fc 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -47,16 +47,16 @@ import ( "sigs.k8s.io/cluster-api/util/secret" ) -func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (ctrl.Result, error) { +func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - endpoint := cluster.Spec.ControlPlaneEndpoint + endpoint := controlPlane.Cluster.Spec.ControlPlaneEndpoint if endpoint.IsZero() { return ctrl.Result{}, nil } - controllerOwnerRef := *metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind)) - clusterName := util.ObjectKey(cluster) + controllerOwnerRef := *metav1.NewControllerRef(controlPlane.KCP, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind)) + clusterName := util.ObjectKey(controlPlane.Cluster) configSecret, err := secret.GetFromNamespacedName(ctx, r.Client, clusterName, secret.Kubeconfig) switch { case apierrors.IsNotFound(err): @@ -76,12 +76,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, return ctrl.Result{}, errors.Wrap(err, "failed to retrieve kubeconfig Secret") } - if err := r.adoptKubeconfigSecret(ctx, configSecret, kcp); err != nil { + if err := r.adoptKubeconfigSecret(ctx, configSecret, controlPlane.KCP); err != nil { return ctrl.Result{}, err } // only do rotation on owned secrets - if !util.IsControlledBy(configSecret, kcp) { + if !util.IsControlledBy(configSecret, controlPlane.KCP) { return ctrl.Result{}, nil } diff --git a/controlplane/kubeadm/internal/controllers/helpers_test.go b/controlplane/kubeadm/internal/controllers/helpers_test.go index d5ef09233eb8..53b161eabf3d 100644 --- a/controlplane/kubeadm/internal/controllers/helpers_test.go +++ b/controlplane/kubeadm/internal/controllers/helpers_test.go @@ -34,6 +34,7 @@ import ( bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/secret" @@ -77,7 +78,12 @@ func TestReconcileKubeconfigEmptyAPIEndpoints(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileKubeconfig(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err := r.reconcileKubeconfig(ctx, controlPlane) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(BeZero()) @@ -126,7 +132,12 @@ func TestReconcileKubeconfigMissingCACertificate(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileKubeconfig(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err := r.reconcileKubeconfig(ctx, controlPlane) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: dependentCertRequeueAfter})) @@ -192,7 +203,12 @@ func TestReconcileKubeconfigSecretDoesNotAdoptsUserSecrets(t *testing.T) { recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileKubeconfig(ctx, cluster, kcp) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err := r.reconcileKubeconfig(ctx, controlPlane) g.Expect(err).To(Succeed()) g.Expect(result).To(BeZero()) @@ -251,7 +267,13 @@ func TestKubeadmControlPlaneReconciler_reconcileKubeconfig(t *testing.T) { Client: fakeClient, recorder: record.NewFakeRecorder(32), } - result, err := r.reconcileKubeconfig(ctx, cluster, kcp) + + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + + result, err := r.reconcileKubeconfig(ctx, controlPlane) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{})) diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go index 72a0eb8236aa..d1c8fd44efe9 100644 --- a/controlplane/kubeadm/internal/controllers/remediation.go +++ b/controlplane/kubeadm/internal/controllers/remediation.go @@ -29,12 +29,10 @@ import ( kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" - "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -177,7 +175,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C // - if the machine hosts the etcd leader, forward etcd leadership to another machine. // - delete the etcd member hosted on the machine being deleted. // - remove the etcd member from the kubeadm config map (only for kubernetes version older than v1.22.0) - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { log.Error(err, "Failed to create client to workload cluster") return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") @@ -355,10 +353,7 @@ func max(x, y time.Duration) time.Duration { func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) { log := ctrl.LoggerFrom(ctx) - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, client.ObjectKey{ - Namespace: controlPlane.Cluster.Namespace, - Name: controlPlane.Cluster.Name, - }) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { return false, errors.Wrapf(err, "failed to get client for workload cluster %s", controlPlane.Cluster.Name) } diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go index 26183baa1cc7..8d5342a83cb1 100644 --- a/controlplane/kubeadm/internal/controllers/remediation_test.go +++ b/controlplane/kubeadm/internal/controllers/remediation_test.go @@ -461,6 +461,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -504,6 +505,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -686,6 +688,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -737,6 +740,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -789,6 +793,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -841,6 +846,8 @@ func TestReconcileUnhealthyMachines(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) + _, err = r.reconcileUnhealthyMachines(ctx, controlPlane) g.Expect(err).ToNot(HaveOccurred()) @@ -1097,6 +1104,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -1132,6 +1140,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { EtcdMembersResult: nodes(controlPlane.Machines), }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err = r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -1207,6 +1216,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.reconcileUnhealthyMachines(ctx, controlPlane) @@ -1278,6 +1288,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) @@ -1309,6 +1320,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1346,6 +1358,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1376,6 +1389,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) @@ -1407,6 +1421,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1445,6 +1460,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1476,6 +1492,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) @@ -1509,6 +1526,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1542,6 +1560,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) @@ -1577,6 +1596,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeTrue()) @@ -1612,6 +1632,7 @@ func TestCanSafelyRemoveEtcdMember(t *testing.T) { }, }, } + controlPlane.InjectTestManagementCluster(r.managementCluster) ret, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, m1) g.Expect(ret).To(BeFalse()) diff --git a/controlplane/kubeadm/internal/controllers/scale.go b/controlplane/kubeadm/internal/controllers/scale.go index 68d966f904da..67268c204f70 100644 --- a/controlplane/kubeadm/internal/controllers/scale.go +++ b/controlplane/kubeadm/internal/controllers/scale.go @@ -31,33 +31,32 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" - "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" ) -func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { +func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) // Perform an uncached read of all the owned machines. This check is in place to make sure // that the controller cache is not misbehaving and we end up initializing the cluster more than once. - ownedMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, cluster, collections.OwnedMachines(kcp)) + ownedMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, controlPlane.Cluster, collections.OwnedMachines(controlPlane.KCP)) if err != nil { logger.Error(err, "failed to perform an uncached read of control plane machines for cluster") return ctrl.Result{}, err } if len(ownedMachines) > 0 { return ctrl.Result{}, errors.Errorf( - "control plane has already been initialized, found %d owned machine for cluster %s/%s: controller cache or management cluster is misbehaving", - len(ownedMachines), cluster.Namespace, cluster.Name, + "control plane has already been initialized, found %d owned machine for cluster %s: controller cache or management cluster is misbehaving", + len(ownedMachines), klog.KObj(controlPlane.Cluster), ) } bootstrapSpec := controlPlane.InitialControlPlaneConfig() fd := controlPlane.NextFailureDomainForScaleUp() - if err := r.cloneConfigsAndGenerateMachine(ctx, cluster, kcp, bootstrapSpec, fd); err != nil { + if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil { logger.Error(err, "Failed to create initial control plane Machine") - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedInitialization", "Failed to create initial control plane Machine for cluster %s/%s control plane: %v", cluster.Namespace, cluster.Name, err) + r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedInitialization", "Failed to create initial control plane Machine for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err) return ctrl.Result{}, err } @@ -65,7 +64,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte return ctrl.Result{Requeue: true}, nil } -func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { +func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) // Run preflight checks to ensure that the control plane is stable before proceeding with a scale up/scale down operation; if not, wait. @@ -76,9 +75,9 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, // Create the bootstrap configuration bootstrapSpec := controlPlane.JoinControlPlaneConfig() fd := controlPlane.NextFailureDomainForScaleUp() - if err := r.cloneConfigsAndGenerateMachine(ctx, cluster, kcp, bootstrapSpec, fd); err != nil { + if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil { logger.Error(err, "Failed to create additional control plane Machine") - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedScaleUp", "Failed to create additional control plane Machine for cluster %s/%s control plane: %v", cluster.Namespace, cluster.Name, err) + r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedScaleUp", "Failed to create additional control plane Machine for cluster % control plane: %v", klog.KObj(controlPlane.Cluster), err) return ctrl.Result{}, err } @@ -88,8 +87,6 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( ctx context.Context, - cluster *clusterv1.Cluster, - kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane, outdatedMachines collections.Machines, ) (ctrl.Result, error) { @@ -107,7 +104,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return result, err } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { logger.Error(err, "Failed to create client to workload cluster") return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") @@ -131,9 +128,9 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( } } - parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version) + parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) } if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete, parsedVersion); err != nil { @@ -144,8 +141,8 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( logger = logger.WithValues("Machine", klog.KObj(machineToDelete)) if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to delete control plane machine") - r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedScaleDown", - "Failed to delete control plane Machine %s for cluster %s/%s control plane: %v", machineToDelete.Name, cluster.Namespace, cluster.Name, err) + r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedScaleDown", + "Failed to delete control plane Machine %s for cluster %s control plane: %v", machineToDelete.Name, klog.KObj(controlPlane.Cluster), err) return ctrl.Result{}, err } diff --git a/controlplane/kubeadm/internal/controllers/scale_test.go b/controlplane/kubeadm/internal/controllers/scale_test.go index 045e2fc83c3a..a9caad651687 100644 --- a/controlplane/kubeadm/internal/controllers/scale_test.go +++ b/controlplane/kubeadm/internal/controllers/scale_test.go @@ -78,7 +78,7 @@ func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { KCP: kcp, } - result, err := r.initializeControlPlane(ctx, cluster, kcp, controlPlane) + result, err := r.initializeControlPlane(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).NotTo(HaveOccurred()) @@ -155,7 +155,7 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { Machines: fmc.Machines, } - result, err := r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) + result, err := r.scaleUpControlPlane(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).ToNot(HaveOccurred()) @@ -216,7 +216,11 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { disableInPlacePropagation: true, } - result, err := r.reconcile(context.Background(), cluster, kcp) + controlPlane, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(adoptableMachineFound).To(BeFalse()) + + result, err := r.reconcile(context.Background(), controlPlane) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: preflightFailedRequeueAfter})) @@ -267,8 +271,9 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing. Cluster: cluster, Machines: machines, } + controlPlane.InjectTestManagementCluster(r.managementCluster) - result, err := r.scaleDownControlPlane(context.Background(), cluster, kcp, controlPlane, controlPlane.Machines) + result, err := r.scaleDownControlPlane(context.Background(), controlPlane, controlPlane.Machines) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) @@ -307,8 +312,9 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing. Cluster: cluster, Machines: machines, } + controlPlane.InjectTestManagementCluster(r.managementCluster) - result, err := r.scaleDownControlPlane(context.Background(), cluster, kcp, controlPlane, controlPlane.Machines) + result, err := r.scaleDownControlPlane(context.Background(), controlPlane, controlPlane.Machines) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) @@ -343,8 +349,9 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing. Cluster: cluster, Machines: machines, } + controlPlane.InjectTestManagementCluster(r.managementCluster) - result, err := r.scaleDownControlPlane(context.Background(), cluster, kcp, controlPlane, controlPlane.Machines) + result, err := r.scaleDownControlPlane(context.Background(), controlPlane, controlPlane.Machines) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: preflightFailedRequeueAfter})) diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 426fec4b0977..f135baf0aae2 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -20,82 +20,68 @@ import ( "context" "github.com/pkg/errors" - ctrl "sigs.k8s.io/controller-runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" - "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" ) // updateStatus is called after every reconcilitation loop in a defer statement to always make sure we have the // resource status subresourcs up-to-date. -func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error { - log := ctrl.LoggerFrom(ctx) - - selector := collections.ControlPlaneSelectorForCluster(cluster.Name) +func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, controlPlane *internal.ControlPlane) error { + selector := collections.ControlPlaneSelectorForCluster(controlPlane.Cluster.Name) // Copy label selector to its status counterpart in string format. // This is necessary for CRDs including scale subresources. - kcp.Status.Selector = selector.String() - - ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, cluster, collections.OwnedMachines(kcp)) - if err != nil { - return errors.Wrap(err, "failed to get list of owned machines") - } + controlPlane.KCP.Status.Selector = selector.String() - controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines) - if err != nil { - log.Error(err, "failed to initialize control plane") - return err - } - kcp.Status.UpdatedReplicas = int32(len(controlPlane.UpToDateMachines())) + controlPlane.KCP.Status.UpdatedReplicas = int32(len(controlPlane.UpToDateMachines())) - replicas := int32(len(ownedMachines)) - desiredReplicas := *kcp.Spec.Replicas + replicas := int32(len(controlPlane.Machines)) + desiredReplicas := *controlPlane.KCP.Spec.Replicas // set basic data that does not require interacting with the workload cluster - kcp.Status.Replicas = replicas - kcp.Status.ReadyReplicas = 0 - kcp.Status.UnavailableReplicas = replicas + controlPlane.KCP.Status.Replicas = replicas + controlPlane.KCP.Status.ReadyReplicas = 0 + controlPlane.KCP.Status.UnavailableReplicas = replicas // Return early if the deletion timestamp is set, because we don't want to try to connect to the workload cluster // and we don't want to report resize condition (because it is set to deleting into reconcile delete). - if !kcp.DeletionTimestamp.IsZero() { + if !controlPlane.KCP.DeletionTimestamp.IsZero() { return nil } - machinesWithHealthyAPIServer := ownedMachines.Filter(collections.HealthyAPIServer()) + machinesWithHealthyAPIServer := controlPlane.Machines.Filter(collections.HealthyAPIServer()) lowestVersion := machinesWithHealthyAPIServer.LowestVersion() if lowestVersion != nil { - kcp.Status.Version = lowestVersion + controlPlane.KCP.Status.Version = lowestVersion } switch { // We are scaling up case replicas < desiredReplicas: - conditions.MarkFalse(kcp, controlplanev1.ResizedCondition, controlplanev1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up control plane to %d replicas (actual %d)", desiredReplicas, replicas) + conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, controlplanev1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up control plane to %d replicas (actual %d)", desiredReplicas, replicas) // We are scaling down case replicas > desiredReplicas: - conditions.MarkFalse(kcp, controlplanev1.ResizedCondition, controlplanev1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down control plane to %d replicas (actual %d)", desiredReplicas, replicas) + conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, controlplanev1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down control plane to %d replicas (actual %d)", desiredReplicas, replicas) // This means that there was no error in generating the desired number of machine objects - conditions.MarkTrue(kcp, controlplanev1.MachinesCreatedCondition) + conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesCreatedCondition) default: // make sure last resize operation is marked as completed. // NOTE: we are checking the number of machines ready so we report resize completed only when the machines // are actually provisioned (vs reporting completed immediately after the last machine object is created). - readyMachines := ownedMachines.Filter(collections.IsReady()) + readyMachines := controlPlane.Machines.Filter(collections.IsReady()) if int32(len(readyMachines)) == replicas { - conditions.MarkTrue(kcp, controlplanev1.ResizedCondition) + conditions.MarkTrue(controlPlane.KCP, controlplanev1.ResizedCondition) } // This means that there was no error in generating the desired number of machine objects - conditions.MarkTrue(kcp, controlplanev1.MachinesCreatedCondition) + conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesCreatedCondition) } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { return errors.Wrap(err, "failed to create remote cluster client") } @@ -103,17 +89,17 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c if err != nil { return err } - kcp.Status.ReadyReplicas = status.ReadyNodes - kcp.Status.UnavailableReplicas = replicas - status.ReadyNodes + controlPlane.KCP.Status.ReadyReplicas = status.ReadyNodes + controlPlane.KCP.Status.UnavailableReplicas = replicas - status.ReadyNodes // This only gets initialized once and does not change if the kubeadm config map goes away. if status.HasKubeadmConfig { - kcp.Status.Initialized = true - conditions.MarkTrue(kcp, controlplanev1.AvailableCondition) + controlPlane.KCP.Status.Initialized = true + conditions.MarkTrue(controlPlane.KCP, controlplanev1.AvailableCondition) } - if kcp.Status.ReadyReplicas > 0 { - kcp.Status.Ready = true + if controlPlane.KCP.Status.ReadyReplicas > 0 { + controlPlane.KCP.Status.Ready = true } // Surface lastRemediation data in status. @@ -121,14 +107,14 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c // most recent of the remediation we are keeping track on machines. var lastRemediation *RemediationData - if v, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { + if v, ok := controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { remediationData, err := RemediationDataFromAnnotation(v) if err != nil { return err } lastRemediation = remediationData } else { - for _, m := range ownedMachines.UnsortedList() { + for _, m := range controlPlane.Machines.UnsortedList() { if v, ok := m.Annotations[controlplanev1.RemediationForAnnotation]; ok { remediationData, err := RemediationDataFromAnnotation(v) if err != nil { @@ -142,7 +128,7 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c } if lastRemediation != nil { - kcp.Status.LastRemediation = lastRemediation.ToStatus() + controlPlane.KCP.Status.LastRemediation = lastRemediation.ToStatus() } return nil } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 6dd67b466802..67fe09b25e0c 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -81,7 +81,13 @@ func TestKubeadmControlPlaneReconciler_updateStatusNoMachines(t *testing.T) { recorder: record.NewFakeRecorder(32), } - g.Expect(r.updateStatus(ctx, kcp, cluster)).To(Succeed()) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + g.Expect(r.updateStatus(ctx, controlPlane)).To(Succeed()) g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(0)) g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(0)) g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(0)) @@ -147,7 +153,14 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesNotReady(t *testin recorder: record.NewFakeRecorder(32), } - g.Expect(r.updateStatus(ctx, kcp, cluster)).To(Succeed()) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + g.Expect(r.updateStatus(ctx, controlPlane)).To(Succeed()) g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(3)) g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(0)) g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(3)) @@ -219,7 +232,14 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesReady(t *testing.T recorder: record.NewFakeRecorder(32), } - g.Expect(r.updateStatus(ctx, kcp, cluster)).To(Succeed()) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + g.Expect(r.updateStatus(ctx, controlPlane)).To(Succeed()) g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(3)) g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(3)) g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(0)) @@ -294,7 +314,14 @@ func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing recorder: record.NewFakeRecorder(32), } - g.Expect(r.updateStatus(ctx, kcp, cluster)).To(Succeed()) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + g.Expect(r.updateStatus(ctx, controlPlane)).To(Succeed()) g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(5)) g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(1)) g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(4)) @@ -368,7 +395,14 @@ func TestKubeadmControlPlaneReconciler_machinesCreatedIsIsTrueEvenWhenTheNodesAr recorder: record.NewFakeRecorder(32), } - g.Expect(r.updateStatus(ctx, kcp, cluster)).To(Succeed()) + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: machines, + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + g.Expect(r.updateStatus(ctx, controlPlane)).To(Succeed()) g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(3)) g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(0)) g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(3)) diff --git a/controlplane/kubeadm/internal/controllers/suite_test.go b/controlplane/kubeadm/internal/controllers/suite_test.go index 865f39ef5fcc..c3d10ccb2a23 100644 --- a/controlplane/kubeadm/internal/controllers/suite_test.go +++ b/controlplane/kubeadm/internal/controllers/suite_test.go @@ -20,7 +20,9 @@ import ( "os" "testing" + corev1 "k8s.io/api/core/v1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/cluster-api/internal/test/envtest" ) @@ -32,7 +34,11 @@ var ( func TestMain(m *testing.M) { os.Exit(envtest.Run(ctx, envtest.RunInput{ - M: m, + M: m, + ManagerUncachedObjs: []client.Object{ + &corev1.ConfigMap{}, + &corev1.Secret{}, + }, SetupEnv: func(e *envtest.Environment) { env = e }, })) } diff --git a/controlplane/kubeadm/internal/controllers/upgrade.go b/controlplane/kubeadm/internal/controllers/upgrade.go index a3c0a6f8ee56..b1528283d959 100644 --- a/controlplane/kubeadm/internal/controllers/upgrade.go +++ b/controlplane/kubeadm/internal/controllers/upgrade.go @@ -23,7 +23,6 @@ import ( "github.com/pkg/errors" ctrl "sigs.k8s.io/controller-runtime" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/util" @@ -33,28 +32,26 @@ import ( func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( ctx context.Context, - cluster *clusterv1.Cluster, - kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane, machinesRequireUpgrade collections.Machines, ) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) - if kcp.Spec.RolloutStrategy == nil || kcp.Spec.RolloutStrategy.RollingUpdate == nil { + if controlPlane.KCP.Spec.RolloutStrategy == nil || controlPlane.KCP.Spec.RolloutStrategy.RollingUpdate == nil { return ctrl.Result{}, errors.New("rolloutStrategy is not set") } // TODO: handle reconciliation of etcd members and kubeadm config in case they get out of sync with cluster - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + workloadCluster, err := controlPlane.GetWorkloadCluster(ctx) if err != nil { - logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster)) + logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(controlPlane.Cluster)) return ctrl.Result{}, err } - parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version) + parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) } if err := workloadCluster.ReconcileKubeletRBACRole(ctx, parsedVersion); err != nil { @@ -75,43 +72,43 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( return ctrl.Result{}, errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map") } - if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { + if controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { // We intentionally only parse major/minor/patch so that the subsequent code // also already applies to beta versions of new releases. - parsedVersionTolerant, err := version.ParseMajorMinorPatchTolerant(kcp.Spec.Version) + parsedVersionTolerant, err := version.ParseMajorMinorPatchTolerant(controlPlane.KCP.Spec.Version) if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version) } // Get the imageRepository or the correct value if nothing is set and a migration is necessary. - imageRepository := internal.ImageRepositoryFromClusterConfig(kcp.Spec.KubeadmConfigSpec.ClusterConfiguration, parsedVersionTolerant) + imageRepository := internal.ImageRepositoryFromClusterConfig(controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration, parsedVersionTolerant) if err := workloadCluster.UpdateImageRepositoryInKubeadmConfigMap(ctx, imageRepository, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update the image repository in the kubeadm config map") } } - if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil { - meta := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta + if controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil { + meta := controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta if err := workloadCluster.UpdateEtcdVersionInKubeadmConfigMap(ctx, meta.ImageRepository, meta.ImageTag, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update the etcd version in the kubeadm config map") } - extraArgs := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ExtraArgs + extraArgs := controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ExtraArgs if err := workloadCluster.UpdateEtcdExtraArgsInKubeadmConfigMap(ctx, extraArgs, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update the etcd extra args in the kubeadm config map") } } - if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { - if err := workloadCluster.UpdateAPIServerInKubeadmConfigMap(ctx, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.APIServer, parsedVersion); err != nil { + if controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { + if err := workloadCluster.UpdateAPIServerInKubeadmConfigMap(ctx, controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.APIServer, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update api server in the kubeadm config map") } - if err := workloadCluster.UpdateControllerManagerInKubeadmConfigMap(ctx, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ControllerManager, parsedVersion); err != nil { + if err := workloadCluster.UpdateControllerManagerInKubeadmConfigMap(ctx, controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.ControllerManager, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update controller manager in the kubeadm config map") } - if err := workloadCluster.UpdateSchedulerInKubeadmConfigMap(ctx, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Scheduler, parsedVersion); err != nil { + if err := workloadCluster.UpdateSchedulerInKubeadmConfigMap(ctx, controlPlane.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Scheduler, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update scheduler in the kubeadm config map") } } @@ -120,16 +117,16 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( return ctrl.Result{}, errors.Wrap(err, "failed to upgrade kubelet config map") } - switch kcp.Spec.RolloutStrategy.Type { + switch controlPlane.KCP.Spec.RolloutStrategy.Type { case controlplanev1.RollingUpdateStrategyType: // RolloutStrategy is currently defaulted and validated to be RollingUpdate // We can ignore MaxUnavailable because we are enforcing health checks before we get here. - maxNodes := *kcp.Spec.Replicas + int32(kcp.Spec.RolloutStrategy.RollingUpdate.MaxSurge.IntValue()) + maxNodes := *controlPlane.KCP.Spec.Replicas + int32(controlPlane.KCP.Spec.RolloutStrategy.RollingUpdate.MaxSurge.IntValue()) if int32(controlPlane.Machines.Len()) < maxNodes { // scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs - return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) + return r.scaleUpControlPlane(ctx, controlPlane) } - return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane, machinesRequireUpgrade) + return r.scaleDownControlPlane(ctx, controlPlane, machinesRequireUpgrade) default: logger.Info("RolloutStrategy type is not set to RollingUpdateStrategyType, unable to determine the strategy for rolling out machines") return ctrl.Result{}, nil diff --git a/controlplane/kubeadm/internal/controllers/upgrade_test.go b/controlplane/kubeadm/internal/controllers/upgrade_test.go index 75f26fe3d860..569cb1a23c0a 100644 --- a/controlplane/kubeadm/internal/controllers/upgrade_test.go +++ b/controlplane/kubeadm/internal/controllers/upgrade_test.go @@ -100,8 +100,9 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { Cluster: cluster, Machines: nil, } + controlPlane.InjectTestManagementCluster(r.managementCluster) - result, err := r.initializeControlPlane(ctx, cluster, kcp, controlPlane) + result, err := r.initializeControlPlane(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).NotTo(HaveOccurred()) @@ -123,7 +124,7 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { // run upgrade the first time, expect we scale up needingUpgrade := collections.FromMachineList(initialMachine) controlPlane.Machines = needingUpgrade - result, err = r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needingUpgrade) + result, err = r.upgradeControlPlane(ctx, controlPlane, needingUpgrade) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).To(BeNil()) bothMachines := &clusterv1.MachineList{} @@ -135,7 +136,14 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { // run upgrade a second time, simulate that the node has not appeared yet but the machine exists // Unhealthy control plane will be detected during reconcile loop and upgrade will never be called. - result, err = r.reconcile(context.Background(), cluster, kcp) + controlPlane = &internal.ControlPlane{ + KCP: kcp, + Cluster: cluster, + Machines: collections.FromMachineList(bothMachines), + } + controlPlane.InjectTestManagementCluster(r.managementCluster) + + result, err = r.reconcile(context.Background(), controlPlane) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: preflightFailedRequeueAfter})) g.Eventually(func(g Gomega) { @@ -158,7 +166,7 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { } // run upgrade the second time, expect we scale down - result, err = r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, machinesRequireUpgrade) + result, err = r.upgradeControlPlane(ctx, controlPlane, machinesRequireUpgrade) g.Expect(err).To(BeNil()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) finalMachine := &clusterv1.MachineList{} @@ -230,8 +238,9 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleDown(t *testing.T) { Cluster: cluster, Machines: nil, } + controlPlane.InjectTestManagementCluster(r.managementCluster) - result, err := r.reconcile(ctx, cluster, kcp) + result, err := r.reconcile(ctx, controlPlane) g.Expect(result).To(Equal(ctrl.Result{})) g.Expect(err).NotTo(HaveOccurred()) @@ -248,7 +257,8 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleDown(t *testing.T) { // run upgrade, expect we scale down needingUpgrade := collections.FromMachineList(machineList) controlPlane.Machines = needingUpgrade - result, err = r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needingUpgrade) + + result, err = r.upgradeControlPlane(ctx, controlPlane, needingUpgrade) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).To(BeNil()) remainingMachines := &clusterv1.MachineList{}