From 1f081d4f80c1b3e371dd7ce519f995741387651a Mon Sep 17 00:00:00 2001 From: Ben Moss Date: Mon, 22 Jun 2020 18:23:31 +0000 Subject: [PATCH] Rollout only migrates configuration Scale up / scale down are only called from reconcile directly --- .../kubeadm/controllers/controller.go | 27 ++---- controlplane/kubeadm/controllers/helpers.go | 58 ++++++++++++ controlplane/kubeadm/controllers/upgrade.go | 92 ------------------- .../kubeadm/controllers/upgrade_test.go | 4 +- .../kubeadm/internal/control_plane.go | 9 +- 5 files changed, 72 insertions(+), 118 deletions(-) delete mode 100644 controlplane/kubeadm/controllers/upgrade.go diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index b1a98aad8a50..adb985947158 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -292,22 +292,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef()) - // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations. - needRollout := controlPlane.MachinesNeedingRollout() - switch { - case len(needRollout) > 0: - logger.Info("Rolling out Control Plane machines") + if controlPlane.MachinesNeedingRollout().Any() { // NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message // and this does not influence any reconciliation logic. - conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas) - return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane) - default: + conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, + "Rolling %d replicas with outdated spec (%d replicas up to date)", controlPlane.MachinesNeedingRollout().Len(), kcp.Status.UpdatedReplicas) + if err := r.migrateSpecChanges(ctx, controlPlane); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to roll out control plane") + } + } else if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) { // make sure last upgrade operation is marked as completed. // NOTE: we are checking the condition already exists in order to avoid to set this condition at the first // reconciliation/before a rolling upgrade actually starts. - if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) { - conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) - } + conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) } if controlPlane.UnhealthyMachines().None() { @@ -316,7 +313,6 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * } } - // If we've made it this far, we can assume that all ownedMachines are up to date numMachines := len(ownedMachines) desiredReplicas := int(*kcp.Spec.Replicas) @@ -449,13 +445,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, clu return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} } - // We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted. - // This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd. - // TODO: Revisit during machine remediation implementation which may need to cover other machine phases. - if controlPlane.HasDeletingMachine() { - return &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter} - } - return nil } diff --git a/controlplane/kubeadm/controllers/helpers.go b/controlplane/kubeadm/controllers/helpers.go index e157be831344..7fa69d1c29b6 100644 --- a/controlplane/kubeadm/controllers/helpers.go +++ b/controlplane/kubeadm/controllers/helpers.go @@ -20,6 +20,7 @@ import ( "context" "strings" + "github.com/blang/semver" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -246,3 +247,60 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp } return nil } + +func (r *KubeadmControlPlaneReconciler) migrateSpecChanges(ctx context.Context, controlPlane *internal.ControlPlane) error { + logger := controlPlane.Logger + kcp := controlPlane.KCP + cluster := controlPlane.Cluster + + // TODO: handle reconciliation of etcd members and kubeadm config in case they get out of sync with cluster + + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + if err != nil { + logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster)) + return err + } + + parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version) + if err != nil { + return errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + } + + if err := workloadCluster.ReconcileKubeletRBACRole(ctx, parsedVersion); err != nil { + return errors.Wrap(err, "failed to reconcile the remote kubelet RBAC role") + } + + if err := workloadCluster.ReconcileKubeletRBACBinding(ctx, parsedVersion); err != nil { + return errors.Wrap(err, "failed to reconcile the remote kubelet RBAC binding") + } + + // Ensure kubeadm cluster role & bindings for v1.18+ + // as per https://github.com/kubernetes/kubernetes/commit/b117a928a6c3f650931bdac02a41fca6680548c4 + if err := workloadCluster.AllowBootstrapTokensToGetNodes(ctx); err != nil { + return errors.Wrap(err, "failed to set role and role binding for kubeadm") + } + + if err := workloadCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, parsedVersion); err != nil { + return errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map") + } + + if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { + imageRepository := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ImageRepository + if err := workloadCluster.UpdateImageRepositoryInKubeadmConfigMap(ctx, imageRepository); err != nil { + return errors.Wrap(err, "failed to update the image repository in the kubeadm config map") + } + } + + if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil { + meta := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta + if err := workloadCluster.UpdateEtcdVersionInKubeadmConfigMap(ctx, meta.ImageRepository, meta.ImageTag); err != nil { + return errors.Wrap(err, "failed to update the etcd version in the kubeadm config map") + } + } + + if err := workloadCluster.UpdateKubeletConfigMap(ctx, parsedVersion); err != nil { + return errors.Wrap(err, "failed to upgrade kubelet config map") + } + + return nil +} diff --git a/controlplane/kubeadm/controllers/upgrade.go b/controlplane/kubeadm/controllers/upgrade.go deleted file mode 100644 index 4c5c8c8e6ae1..000000000000 --- a/controlplane/kubeadm/controllers/upgrade.go +++ /dev/null @@ -1,92 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controllers - -import ( - "context" - - "github.com/blang/semver" - "github.com/pkg/errors" - clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" - controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" - "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" - "sigs.k8s.io/cluster-api/util" - ctrl "sigs.k8s.io/controller-runtime" -) - -func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( - ctx context.Context, - cluster *clusterv1.Cluster, - kcp *controlplanev1.KubeadmControlPlane, - controlPlane *internal.ControlPlane, -) (ctrl.Result, error) { - logger := controlPlane.Logger - - // TODO: handle reconciliation of etcd members and kubeadm config in case they get out of sync with cluster - - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) - if err != nil { - logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster)) - return ctrl.Result{}, err - } - - parsedVersion, err := semver.ParseTolerant(kcp.Spec.Version) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) - } - - if err := workloadCluster.ReconcileKubeletRBACRole(ctx, parsedVersion); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to reconcile the remote kubelet RBAC role") - } - - if err := workloadCluster.ReconcileKubeletRBACBinding(ctx, parsedVersion); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to reconcile the remote kubelet RBAC binding") - } - - // Ensure kubeadm cluster role & bindings for v1.18+ - // as per https://github.com/kubernetes/kubernetes/commit/b117a928a6c3f650931bdac02a41fca6680548c4 - if err := workloadCluster.AllowBootstrapTokensToGetNodes(ctx); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to set role and role binding for kubeadm") - } - - if err := workloadCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, parsedVersion); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map") - } - - if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { - imageRepository := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ImageRepository - if err := workloadCluster.UpdateImageRepositoryInKubeadmConfigMap(ctx, imageRepository); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update the image repository in the kubeadm config map") - } - } - - if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local != nil { - meta := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageMeta - if err := workloadCluster.UpdateEtcdVersionInKubeadmConfigMap(ctx, meta.ImageRepository, meta.ImageTag); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update the etcd version in the kubeadm config map") - } - } - - if err := workloadCluster.UpdateKubeletConfigMap(ctx, parsedVersion); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to upgrade kubelet config map") - } - - if controlPlane.NeedsScaleUp() { - return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) - } - return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane) -} diff --git a/controlplane/kubeadm/controllers/upgrade_test.go b/controlplane/kubeadm/controllers/upgrade_test.go index bd459c1c8c90..12f395a8f3f6 100644 --- a/controlplane/kubeadm/controllers/upgrade_test.go +++ b/controlplane/kubeadm/controllers/upgrade_test.go @@ -81,7 +81,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { // run upgrade the first time, expect we scale up needingUpgrade := internal.NewFilterableMachineCollectionFromMachineList(initialMachine) controlPlane.Machines = needingUpgrade - result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) + result, err = r.reconcile(context.Background(), cluster, kcp) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).To(BeNil()) bothMachines := &clusterv1.MachineList{} @@ -91,7 +91,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { controlPlane.Machines = internal.NewFilterableMachineCollectionFromMachineList(bothMachines) // run upgrade the second time, expect we scale down - result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) + result, err = r.reconcile(context.Background(), cluster, kcp) g.Expect(err).To(BeNil()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) finalMachine := &clusterv1.MachineList{} diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 6c6a7f92e8c5..78edb0caedeb 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -295,28 +295,27 @@ func (c *ControlPlane) NeedsScaleDown() bool { return true } if c.Machines.Len() > int(*c.KCP.Spec.Replicas) { - c.Logger.Info("Scale down is required", "existing", c.Machines.Len(), "desired", int(*c.KCP.Spec.Replicas)) return true } return false } // NeedsScaleUp returns whether the control plane needs to add a machine to the -// cluster. Scale up can be caused by an upgrade, where the cluster will +// cluster. Scale up can be caused by a rollout, where the cluster will // replace outdated machines one by one, or by the more common case of having // fewer machines than the number of desired replicas. func (c *ControlPlane) NeedsScaleUp() bool { - if c.NeedsRollout() { + if c.needsReplacementMachine() { + c.Logger.Info("Scale up is required because rollout is in progress") return true } if c.Machines.Len() < int(*c.KCP.Spec.Replicas) { - c.Logger.Info("Scale up is required", "desired", int(*c.KCP.Spec.Replicas), "existing", c.Machines.Len()) return true } return false } -func (c *ControlPlane) NeedsRollout() bool { +func (c *ControlPlane) needsReplacementMachine() bool { return c.MachinesNeedingRollout().Any() && !c.NeedsScaleDown() }