Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
avoid deadlock when MHC and upgrade occur together
Browse files Browse the repository at this point in the history
Yuvaraj Kakaraparthi committed Apr 4, 2023
1 parent a2318d1 commit 4fe4e6c
Showing 2 changed files with 83 additions and 11 deletions.
28 changes: 18 additions & 10 deletions internal/controllers/topology/cluster/desired_state.go
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (*
// If required, compute the desired state of the MachineDeployments from the list of MachineDeploymentTopologies
// defined in the cluster.
if s.Blueprint.HasMachineDeployments() {
desiredState.MachineDeployments, err = computeMachineDeployments(ctx, s, desiredState.ControlPlane)
desiredState.MachineDeployments, err = r.computeMachineDeployments(ctx, s, desiredState.ControlPlane)
if err != nil {
return nil, errors.Wrapf(err, "failed to compute MachineDeployments")
}
@@ -522,15 +522,19 @@ func calculateRefDesiredAPIVersion(currentRef *corev1.ObjectReference, desiredRe
}

// computeMachineDeployments computes the desired state of the list of MachineDeployments.
func computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) {
func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) {
// Mark all the machine deployments that are currently rolling out.
// This captured information will be used for
// - Building the TopologyReconciled condition.
// - Making upgrade decisions on machine deployments.
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)
upgradingMDs, err := s.Current.MachineDeployments.Upgrading(ctx, r.Client)
if err != nil {
return nil, errors.Wrap(err, "failed to list upgrading MachineDeployments")
}
s.UpgradeTracker.MachineDeployments.MarkRollingOut(upgradingMDs...)
machineDeploymentsStateMap := make(scope.MachineDeploymentsStateMap)
for _, mdTopology := range s.Blueprint.Topology.Workers.MachineDeployments {
desiredMachineDeployment, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology)
desiredMachineDeployment, err := r.computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology)
if err != nil {
return nil, errors.Wrapf(err, "failed to compute MachineDepoyment for topology %q", mdTopology.Name)
}
@@ -542,7 +546,7 @@ func computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredContr
// computeMachineDeployment computes the desired state for a MachineDeploymentTopology.
// The generated machineDeployment object is calculated using the values from the machineDeploymentTopology and
// the machineDeployment class.
func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState, machineDeploymentTopology clusterv1.MachineDeploymentTopology) (*scope.MachineDeploymentState, error) {
func (r *Reconciler) computeMachineDeployment(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState, machineDeploymentTopology clusterv1.MachineDeploymentTopology) (*scope.MachineDeploymentState, error) {
desiredMachineDeployment := &scope.MachineDeploymentState{}

// Gets the blueprint for the MachineDeployment class.
@@ -614,7 +618,7 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlP
// Add ClusterTopologyMachineDeploymentLabel to the generated InfrastructureMachine template
infraMachineTemplateLabels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] = machineDeploymentTopology.Name
desiredMachineDeployment.InfrastructureMachineTemplate.SetLabels(infraMachineTemplateLabels)
version, err := computeMachineDeploymentVersion(s, machineDeploymentTopology, desiredControlPlaneState, currentMachineDeployment)
version, err := r.computeMachineDeploymentVersion(ctx, s, machineDeploymentTopology, desiredControlPlaneState, currentMachineDeployment)
if err != nil {
return nil, errors.Wrapf(err, "failed to compute version for %s", machineDeploymentTopology.Name)
}
@@ -750,7 +754,7 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlP
// Nb: No MachineDeployment upgrades will be triggered while any MachineDeployment is in the middle
// of an upgrade. Even if the number of MachineDeployments that are being upgraded is less
// than the number of allowed concurrent upgrades.
func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, desiredControlPlaneState *scope.ControlPlaneState, currentMDState *scope.MachineDeploymentState) (string, error) {
func (r *Reconciler) computeMachineDeploymentVersion(ctx context.Context, s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, desiredControlPlaneState *scope.ControlPlaneState, currentMDState *scope.MachineDeploymentState) (string, error) {
desiredVersion := s.Blueprint.Topology.Version
// If creating a new machine deployment, we can pick up the desired version
// Note: We are not blocking the creation of new machine deployments when
@@ -842,9 +846,13 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c
}

// At this point the control plane is stable (not scaling, not upgrading, not being upgraded).
// Checking to see if the machine deployments are also stable.
// If any of the MachineDeployments is rolling out, do not upgrade the machine deployment yet.
if s.Current.MachineDeployments.IsAnyRollingOut() {
// Checking to see if the machine deployments are also not upgrading.
// If any of the MachineDeployments is upgrading, do not upgrade the machine deployment yet.
machineDeploymentsUpgrading, err := s.Current.MachineDeployments.IsAnyUpgrading(ctx, r.Client)
if err != nil {
return currentVersion, errors.Wrap(err, "failed to check if any MachineDeployments are upgrading")
}
if machineDeploymentsUpgrading {
s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name)
return currentVersion, nil
}
66 changes: 65 additions & 1 deletion internal/controllers/topology/cluster/scope/state.go
Original file line number Diff line number Diff line change
@@ -17,7 +17,12 @@ limitations under the License.
package scope

import (
"context"
"fmt"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
@@ -74,6 +79,32 @@ func (mds MachineDeploymentsStateMap) IsAnyRollingOut() bool {
return len(mds.RollingOut()) != 0
}

// Upgrading returns the list of the machine deployments
// that are upgrading.
func (mds MachineDeploymentsStateMap) Upgrading(ctx context.Context, c client.Client) ([]string, error) {
names := []string{}
for _, md := range mds {
upgrading, err := md.IsUpgrading(ctx, c)
if err != nil {
return nil, err
}
if upgrading {
names = append(names, md.Object.Name)
}
}
return names, nil
}

// IsAnyUpgrading returns true if at least one of the
// machine deployments is upgrading. False, otherwise.
func (mds MachineDeploymentsStateMap) IsAnyUpgrading(ctx context.Context, c client.Client) (bool, error) {
upgradingMDs, err := mds.Upgrading(ctx, c)
if err != nil {
return false, errors.Wrap(err, "failed to list upgrading MachineDeployments")
}
return len(upgradingMDs) > 0, nil
}

// MachineDeploymentState holds all the objects representing the state of a managed deployment.
type MachineDeploymentState struct {
// Object holds the MachineDeployment object.
@@ -90,11 +121,44 @@ type MachineDeploymentState struct {
MachineHealthCheck *clusterv1.MachineHealthCheck
}

// IsRollingOut determines if the machine deployment is upgrading.
// IsRollingOut determines if the machine deployment is rolling out.
// A machine deployment is considered upgrading if:
// - if any of the replicas of the machine deployment is not ready.
func (md *MachineDeploymentState) IsRollingOut() bool {
return !mdutil.DeploymentComplete(md.Object, &md.Object.Status) ||
*md.Object.Spec.Replicas != md.Object.Status.ReadyReplicas ||
md.Object.Status.UnavailableReplicas > 0
}

// IsUpgrading determines if the MachineDeployment is upgrading.
// A machine deployment is considered upgrading if at least one of the Machines of this
// MachineDeployment has a different version.
func (md *MachineDeploymentState) IsUpgrading(ctx context.Context, c client.Client) (bool, error) {
// If the MachineDeployment has no version then we cannot determine if it is upgrading.
// Note: This case should not happen.
if md.Object.Spec.Template.Spec.Version == nil {
return false, nil
}
clusterName := md.Object.Labels[clusterv1.ClusterNameLabel]
mdTopology := md.Object.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel]
machines := &clusterv1.MachineList{}
if err := c.List(ctx, machines, client.InNamespace(md.Object.Namespace), client.MatchingLabels{
clusterv1.ClusterNameLabel: clusterName,
clusterv1.ClusterTopologyOwnedLabel: "",
clusterv1.ClusterTopologyMachineDeploymentNameLabel: mdTopology,
}); err != nil {
return false, errors.Wrapf(err, "Failed to check if MachineDeployment %s is upgrading: failed to list Machines", md.Object.Name)
}
mdVersion := *md.Object.Spec.Template.Spec.Version
// Check if the versions of the all the Machines match the MachineDeployment version.
for i := range machines.Items {
machine := machines.Items[i]
if machine.Spec.Version == nil {
return false, fmt.Errorf("failed to check if MachineDeployment %s is upgrading: Machine %s has no version", md.Object.Name, machine.Name)
}
if *machine.Spec.Version != mdVersion {
return true, nil
}
}
return false, nil
}

0 comments on commit 4fe4e6c

Please sign in to comment.