Skip to content

Commit

Permalink
Rollout only migrates configuration
Browse files Browse the repository at this point in the history
Scale up / scale down are only called from reconcile directly
  • Loading branch information
Ben Moss committed Jun 22, 2020
1 parent 7bea1f2 commit 1f081d4
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 118 deletions.
27 changes: 8 additions & 19 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,22 +292,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef())

// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
needRollout := controlPlane.MachinesNeedingRollout()
switch {
case len(needRollout) > 0:
logger.Info("Rolling out Control Plane machines")
if controlPlane.MachinesNeedingRollout().Any() {
// NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message
// and this does not influence any reconciliation logic.
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas)
return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane)
default:
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning,
"Rolling %d replicas with outdated spec (%d replicas up to date)", controlPlane.MachinesNeedingRollout().Len(), kcp.Status.UpdatedReplicas)
if err := r.migrateSpecChanges(ctx, controlPlane); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to roll out control plane")
}
} else if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) {
// make sure last upgrade operation is marked as completed.
// NOTE: we are checking the condition already exists in order to avoid to set this condition at the first
// reconciliation/before a rolling upgrade actually starts.
if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) {
conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition)
}
conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition)
}

if controlPlane.UnhealthyMachines().None() {
Expand All @@ -316,7 +313,6 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
}
}

// If we've made it this far, we can assume that all ownedMachines are up to date
numMachines := len(ownedMachines)
desiredReplicas := int(*kcp.Spec.Replicas)

Expand Down Expand Up @@ -449,13 +445,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, clu
return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
}

// We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted.
// This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd.
// TODO: Revisit during machine remediation implementation which may need to cover other machine phases.
if controlPlane.HasDeletingMachine() {
return &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter}
}

return nil
}

Expand Down
58 changes: 58 additions & 0 deletions controlplane/kubeadm/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"strings"

"github.com/blang/semver"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -246,3 +247,60 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
}
return nil
}

func (r *KubeadmControlPlaneReconciler) migrateSpecChanges(ctx context.Context, controlPlane *internal.ControlPlane) error {
logger := controlPlane.Logger
kcp := controlPlane.KCP
cluster := controlPlane.Cluster

// TODO: handle reconciliation of etcd members and kubeadm config in case they get out of sync with cluster

workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
if err != nil {
logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster))
return err
}

parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version)
if err != nil {
return errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version)
}

if err := workloadCluster.ReconcileKubeletRBACRole(ctx, parsedVersion); err != nil {
return errors.Wrap(err, "failed to reconcile the remote kubelet RBAC role")
}

if err := workloadCluster.ReconcileKubeletRBACBinding(ctx, parsedVersion); err != nil {
return errors.Wrap(err, "failed to reconcile the remote kubelet RBAC binding")
}

// Ensure kubeadm cluster role & bindings for v1.18+
// as per https://github.com/kubernetes/kubernetes/commit/b117a928a6c3f650931bdac02a41fca6680548c4
if err := workloadCluster.AllowBootstrapTokensToGetNodes(ctx); err != nil {
return errors.Wrap(err, "failed to set role and role binding for kubeadm")
}

if err := workloadCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, parsedVersion); err != nil {
return errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map")
}

if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil {
imageRepository := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ImageRepository
if err := workloadCluster.UpdateImageRepositoryInKubeadmConfigMap(ctx, imageRepository); err != nil {
return errors.Wrap(err, "failed to update the image repository in the kubeadm config map")
}
}

if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil {
meta := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta
if err := workloadCluster.UpdateEtcdVersionInKubeadmConfigMap(ctx, meta.ImageRepository, meta.ImageTag); err != nil {
return errors.Wrap(err, "failed to update the etcd version in the kubeadm config map")
}
}

if err := workloadCluster.UpdateKubeletConfigMap(ctx, parsedVersion); err != nil {
return errors.Wrap(err, "failed to upgrade kubelet config map")
}

return nil
}
92 changes: 0 additions & 92 deletions controlplane/kubeadm/controllers/upgrade.go

This file was deleted.

4 changes: 2 additions & 2 deletions controlplane/kubeadm/controllers/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) {
// run upgrade the first time, expect we scale up
needingUpgrade := internal.NewFilterableMachineCollectionFromMachineList(initialMachine)
controlPlane.Machines = needingUpgrade
result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane)
result, err = r.reconcile(context.Background(), cluster, kcp)
g.Expect(result).To(Equal(ctrl.Result{Requeue: true}))
g.Expect(err).To(BeNil())
bothMachines := &clusterv1.MachineList{}
Expand All @@ -91,7 +91,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) {
controlPlane.Machines = internal.NewFilterableMachineCollectionFromMachineList(bothMachines)

// run upgrade the second time, expect we scale down
result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane)
result, err = r.reconcile(context.Background(), cluster, kcp)
g.Expect(err).To(BeNil())
g.Expect(result).To(Equal(ctrl.Result{Requeue: true}))
finalMachine := &clusterv1.MachineList{}
Expand Down
9 changes: 4 additions & 5 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,28 +295,27 @@ func (c *ControlPlane) NeedsScaleDown() bool {
return true
}
if c.Machines.Len() > int(*c.KCP.Spec.Replicas) {
c.Logger.Info("Scale down is required", "existing", c.Machines.Len(), "desired", int(*c.KCP.Spec.Replicas))
return true
}
return false
}

// NeedsScaleUp returns whether the control plane needs to add a machine to the
// cluster. Scale up can be caused by an upgrade, where the cluster will
// cluster. Scale up can be caused by a rollout, where the cluster will
// replace outdated machines one by one, or by the more common case of having
// fewer machines than the number of desired replicas.
func (c *ControlPlane) NeedsScaleUp() bool {
if c.NeedsRollout() {
if c.needsReplacementMachine() {
c.Logger.Info("Scale up is required because rollout is in progress")
return true
}
if c.Machines.Len() < int(*c.KCP.Spec.Replicas) {
c.Logger.Info("Scale up is required", "desired", int(*c.KCP.Spec.Replicas), "existing", c.Machines.Len())
return true
}
return false
}

func (c *ControlPlane) NeedsRollout() bool {
func (c *ControlPlane) needsReplacementMachine() bool {
return c.MachinesNeedingRollout().Any() && !c.NeedsScaleDown()
}

Expand Down

0 comments on commit 1f081d4

Please sign in to comment.