From fbc594ad3500cd88086bc6d26e442905fce2f7b2 Mon Sep 17 00:00:00 2001 From: willie-yao Date: Fri, 11 Aug 2023 18:48:31 +0000 Subject: [PATCH] Re-diff desired and reconcile state --- .../topology/cluster/desired_state.go | 19 ++++--- .../topology/cluster/reconcile_state.go | 49 ++++++++++++++++--- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index 13865e89d1e8..62d3c43d86f1 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -75,6 +75,13 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* } s.UpgradeTracker.MachineDeployments.MarkUpgrading(mdUpgradingNames...) + // Mark all the MachinePools that are currently upgrading. + mpUpgradingNames, err := s.Current.MachinePools.Upgrading(ctx, r.Client) + if err != nil { + return nil, errors.Wrap(err, "failed to check if any MachinePool is upgrading") + } + s.UpgradeTracker.MachinePools.MarkUpgrading(mpUpgradingNames...) + // Compute the desired state of the ControlPlane object, eventually adding a reference to the // InfrastructureMachineTemplate generated by the previous step. if desiredState.ControlPlane.Object, err = r.computeControlPlane(ctx, s, desiredState.ControlPlane.InfrastructureMachineTemplate); err != nil { @@ -920,9 +927,9 @@ func computeMachinePool(_ context.Context, s *scope.Scope, machinePoolTopology c // Compute the bootstrap template. currentMachinePool := s.Current.MachinePools[machinePoolTopology.Name] - var currentBootstrapTemplateRef *corev1.ObjectReference + var currentBootstrapConfigRef *corev1.ObjectReference if currentMachinePool != nil && currentMachinePool.BootstrapObject != nil { - currentBootstrapTemplateRef = currentMachinePool.Object.Spec.Template.Spec.Bootstrap.ConfigRef + currentBootstrapConfigRef = currentMachinePool.Object.Spec.Template.Spec.Bootstrap.ConfigRef } var err error desiredMachinePool.BootstrapObject, err = templateToObject(templateToInput{ @@ -930,7 +937,7 @@ func computeMachinePool(_ context.Context, s *scope.Scope, machinePoolTopology c templateClonedFromRef: contract.ObjToRef(machinePoolBlueprint.BootstrapConfig), cluster: s.Current.Cluster, namePrefix: bootstrapTemplateNamePrefix(s.Current.Cluster.Name, machinePoolTopology.Name), - currentObjectRef: currentBootstrapTemplateRef, + currentObjectRef: currentBootstrapConfigRef, }) if err != nil { return nil, errors.Wrapf(err, "failed to compute bootstrap object for topology %q", machinePoolTopology.Name) @@ -991,9 +998,9 @@ func computeMachinePool(_ context.Context, s *scope.Scope, machinePoolTopology c } // Compute the MachinePool object. - desiredBootstrapTemplateRef, err := calculateRefDesiredAPIVersion(currentBootstrapTemplateRef, desiredMachinePool.BootstrapObject) + desiredBootstrapConfigRef, err := calculateRefDesiredAPIVersion(currentBootstrapConfigRef, desiredMachinePool.BootstrapObject) if err != nil { - return nil, errors.Wrap(err, "failed to calculate desired bootstrap template ref") + return nil, errors.Wrap(err, "failed to calculate desired bootstrap config ref") } desiredInfraMachineTemplateRef, err := calculateRefDesiredAPIVersion(currentInfraMachineTemplateRef, desiredMachinePool.InfrastructureMachinePoolObject) if err != nil { @@ -1016,7 +1023,7 @@ func computeMachinePool(_ context.Context, s *scope.Scope, machinePoolTopology c Spec: clusterv1.MachineSpec{ ClusterName: s.Current.Cluster.Name, Version: pointer.String(version), - Bootstrap: clusterv1.Bootstrap{ConfigRef: desiredBootstrapTemplateRef}, + Bootstrap: clusterv1.Bootstrap{ConfigRef: desiredBootstrapConfigRef}, InfrastructureRef: *desiredInfraMachineTemplateRef, NodeDrainTimeout: nodeDrainTimeout, NodeVolumeDetachTimeout: nodeVolumeDetachTimeout, diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index 596ae6a8c023..0657cb7d471c 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -257,7 +257,11 @@ func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) == 0 && // Machine deployments are not upgrading or not about to upgrade !s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() && // No MachineDeployments are pending create !s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() && // No MachineDeployments are pending an upgrade - !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { // No MachineDeployments have deferred an upgrade + !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() && // No MachineDeployments have deferred an upgrade + len(s.UpgradeTracker.MachinePools.UpgradingNames()) == 0 && // Machine pools are not upgrading or not about to upgrade + !s.UpgradeTracker.MachinePools.IsAnyPendingCreate() && // No MachinePools are pending create + !s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade() && // No MachinePools are pending an upgrade + !s.UpgradeTracker.MachinePools.DeferredUpgrade() { // No MachinePools have deferred an upgrade // Everything is stable and the cluster can be considered fully upgraded. hookRequest := &runtimehooksv1.AfterClusterUpgradeRequest{ Cluster: *s.Current.Cluster, @@ -747,7 +751,7 @@ func (r *Reconciler) reconcileMachinePools(ctx context.Context, s *scope.Scope) continue } - if err := r.createMachinePool(ctx, s.Current.Cluster, mp); err != nil { + if err := r.createMachinePool(ctx, s, mp); err != nil { return err } } @@ -800,9 +804,23 @@ func (r *Reconciler) getCurrentMachinePools(ctx context.Context, s *scope.Scope) } // createMachinePool creates a MachinePool and the corresponding templates. -func (r *Reconciler) createMachinePool(ctx context.Context, cluster *clusterv1.Cluster, mp *scope.MachinePoolState) error { - log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) +func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp *scope.MachinePoolState) error { + // Do not create the MachinePool if it is marked as pending create. + // This will also block MHC creation because creating the MHC without the corresponding + // MachinePool is unnecessary. + mpTopologyName, ok := mp.Object.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] + if !ok || mpTopologyName == "" { + // Note: This is only an additional safety check and should not happen. The label will always be added when computing + // the desired MachinePool. + return errors.Errorf("new MachinePool is missing the %q label", clusterv1.ClusterTopologyMachinePoolNameLabel) + } + // Return early if the MachinePool is pending create. + if s.UpgradeTracker.MachinePools.IsPendingCreate(mpTopologyName) { + return nil + } + log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) + cluster := s.Current.Cluster infraCtx, _ := log.WithObject(mp.InfrastructureMachinePoolObject).Into(ctx) if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ cluster: cluster, @@ -830,6 +848,23 @@ func (r *Reconciler) createMachinePool(ctx context.Context, cluster *clusterv1.C } r.recorder.Eventf(cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: mp.Object}) + // Wait until MachinePool is visible in the cache. + // Note: We have to do this because otherwise using a cached client in current state could + // miss a newly created MachinePool (because the cache might be stale). + err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + key := client.ObjectKey{Namespace: mp.Object.Namespace, Name: mp.Object.Name} + if err := r.Client.Get(ctx, key, &expv1.MachinePool{}); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, nil + }) + if err != nil { + return errors.Wrapf(err, "failed waiting for MachinePool %s to be visible in the cache after create", mp.Object.Kind) + } + return nil } @@ -857,7 +892,7 @@ func (r *Reconciler) updateMachinePool(ctx context.Context, cluster *clusterv1.C return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMP.Object}) } - // Check differences between current and desired MachineDeployment, and eventually patch the current object. + // Check differences between current and desired MachinePool, and eventually patch the current object. log = log.WithObject(desiredMP.Object) patchHelper, err := r.patchHelperFactory(ctx, currentMP.Object, desiredMP.Object) if err != nil { @@ -904,8 +939,8 @@ type machineDiff struct { toCreate, toUpdate, toDelete []string } -// calculateMachineDeploymentDiff compares two maps of MachineDeploymentState and calculates which -// MachineDeployments should be created, updated or deleted. +// calculateMachineDeploymentDiff compares two maps of MachinePoolState and calculates which +// MachinePools should be created, updated or deleted. func calculateMachineDeploymentDiff(current, desired map[string]*scope.MachineDeploymentState) machineDiff { var diff machineDiff