Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 avoid errors when MHC and upgrade occur together in classy clusters #8464

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions internal/controllers/topology/cluster/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
case s.UpgradeTracker.ControlPlane.IsScaling:
msgBuilder.WriteString(" Control plane is reconciling desired replicas")

case len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0:
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading",
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
)

case s.Current.MachineDeployments.IsAnyRollingOut():
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are rolling out",
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.RolloutNames()),
Expand Down
106 changes: 42 additions & 64 deletions internal/controllers/topology/cluster/conditions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import (
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
Expand All @@ -32,11 +35,16 @@ import (
)

func TestReconcileTopologyReconciledCondition(t *testing.T) {
g := NewWithT(t)
scheme := runtime.NewScheme()
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())

tests := []struct {
name string
reconcileErr error
s *scope.Scope
cluster *clusterv1.Cluster
machines []*clusterv1.Machine
wantConditionStatus corev1.ConditionStatus
wantConditionReason string
wantConditionMessage string
Expand Down Expand Up @@ -382,7 +390,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
wantConditionStatus: corev1.ConditionTrue,
},
{
name: "should set the condition to false is some machine deployments have not picked the new version because other machine deployments are rolling out (not all replicas ready)",
name: "should set the condition to false is some machine deployments have not picked the new version because other machine deployments are upgrading",
reconcileErr: nil,
cluster: &clusterv1.Cluster{},
s: &scope.Scope{
Expand All @@ -404,6 +412,11 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
Object: builder.MachineDeployment("ns1", "md0-abc123").
WithReplicas(2).
WithVersion("v1.22.0").
WithSelector(metav1.LabelSelector{
MatchLabels: map[string]string{
clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md0",
},
}).
WithStatus(clusterv1.MachineDeploymentStatus{
// MD is not ready because we don't have 2 updated, ready and available replicas.
Replicas: int32(2),
Expand All @@ -418,67 +431,11 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
Object: builder.MachineDeployment("ns1", "md1-abc123").
WithReplicas(2).
WithVersion("v1.21.2").
WithStatus(clusterv1.MachineDeploymentStatus{
Replicas: int32(2),
UpdatedReplicas: int32(2),
ReadyReplicas: int32(2),
AvailableReplicas: int32(2),
UnavailableReplicas: int32(0),
WithSelector(metav1.LabelSelector{
MatchLabels: map[string]string{
clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md1",
},
}).
Build(),
},
},
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.MachineDeployments.MarkRollingOut("md0-abc123")
ut.MachineDeployments.MarkPendingUpgrade("md1-abc123")
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are rolling out",
},
{
name: "should set the condition to false if some machine deployments have not picked the new version because other machine deployments are rolling out (unavailable replica)",
reconcileErr: nil,
cluster: &clusterv1.Cluster{},
s: &scope.Scope{
Blueprint: &scope.ClusterBlueprint{
Topology: &clusterv1.Topology{
Version: "v1.22.0",
},
},
Current: &scope.ClusterState{
Cluster: &clusterv1.Cluster{},
ControlPlane: &scope.ControlPlaneState{
Object: builder.ControlPlane("ns1", "controlplane1").
WithVersion("v1.22.0").
WithReplicas(3).
Build(),
},
MachineDeployments: scope.MachineDeploymentsStateMap{
"md0": &scope.MachineDeploymentState{
Object: builder.MachineDeployment("ns1", "md0-abc123").
WithReplicas(2).
WithVersion("v1.22.0").
WithStatus(clusterv1.MachineDeploymentStatus{
// MD is not ready because we still have an unavailable replica.
Replicas: int32(2),
UpdatedReplicas: int32(2),
ReadyReplicas: int32(2),
AvailableReplicas: int32(2),
UnavailableReplicas: int32(1),
}).
Build(),
},
"md1": &scope.MachineDeploymentState{
Object: builder.MachineDeployment("ns1", "md1-abc123").
WithReplicas(2).
WithVersion("v1.21.2").
WithStatus(clusterv1.MachineDeploymentStatus{
Replicas: int32(2),
UpdatedReplicas: int32(2),
Expand All @@ -493,15 +450,25 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.MachineDeployments.MarkRollingOut("md0-abc123")
ut.MachineDeployments.MarkUpgradingAndRollingOut("md0-abc123")
ut.MachineDeployments.MarkPendingUpgrade("md1-abc123")
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
machines: []*clusterv1.Machine{
builder.Machine("ns1", "md0-machine0").
WithLabels(map[string]string{clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md0"}).
WithVersion("v1.21.2"). // Machine's version does not match MachineDeployment's version
Build(),
builder.Machine("ns1", "md1-machine0").
WithLabels(map[string]string{clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md1"}).
WithVersion("v1.21.2").
Build(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are rolling out",
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
},
{
name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred",
Expand Down Expand Up @@ -624,7 +591,18 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

r := &Reconciler{}
objs := []client.Object{}
if tt.s != nil && tt.s.Current != nil {
for _, mds := range tt.s.Current.MachineDeployments {
objs = append(objs, mds.Object)
}
}
for _, m := range tt.machines {
objs = append(objs, m)
}
fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build()

r := &Reconciler{Client: fakeClient}
err := r.reconcileTopologyReconciledCondition(tt.s, tt.cluster, tt.reconcileErr)
if tt.wantErr {
g.Expect(err).To(HaveOccurred())
Expand Down
24 changes: 17 additions & 7 deletions internal/controllers/topology/cluster/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (*
// If required, compute the desired state of the MachineDeployments from the list of MachineDeploymentTopologies
// defined in the cluster.
if s.Blueprint.HasMachineDeployments() {
desiredState.MachineDeployments, err = computeMachineDeployments(ctx, s, desiredState.ControlPlane)
desiredState.MachineDeployments, err = r.computeMachineDeployments(ctx, s, desiredState.ControlPlane)
if err != nil {
return nil, errors.Wrapf(err, "failed to compute MachineDeployments")
}
Expand Down Expand Up @@ -523,12 +523,22 @@ func calculateRefDesiredAPIVersion(currentRef *corev1.ObjectReference, desiredRe
}

// computeMachineDeployments computes the desired state of the list of MachineDeployments.
func computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) {
// Mark all the machine deployments that are currently rolling out.
// This captured information will be used for
// - Building the TopologyReconciled condition.
// - Making upgrade decisions on machine deployments.
func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) {
// Mark all the MachineDeployments that are currently upgrading.
// This captured information is used for:
// - Building the TopologyReconciled condition.
// - Making upgrade decisions on machine deployments.
upgradingMDs, err := s.Current.MachineDeployments.Upgrading(ctx, r.Client)
if err != nil {
return nil, err
}
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(upgradingMDs...)

// Mark all MachineDeployments that are currently rolling out.
// This captured information is used for:
// - Building the TopologyReconciled condition (when control plane upgrade is pending)
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)

machineDeploymentsStateMap := make(scope.MachineDeploymentsStateMap)
for _, mdTopology := range s.Blueprint.Topology.Workers.MachineDeployments {
desiredMachineDeployment, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology)
Expand Down Expand Up @@ -844,7 +854,7 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c

// Control plane and machine deployments are stable.
// Ready to pick up the topology version.
s.UpgradeTracker.MachineDeployments.MarkRollingOut(currentMDState.Object.Name)
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(currentMDState.Object.Name)
return desiredVersion, nil
}

Expand Down
Loading