Skip to content

Commit

Permalink
Merge pull request #2958 from sedefsavas/kcp_upgrade_fix
Browse files Browse the repository at this point in the history
🐛 KCP should check if it has machines being deleted before proceeding with scale up/down
  • Loading branch information
k8s-ci-robot authored Apr 28, 2020
2 parents ef72ed6 + 8bbdd95 commit 7cda00b
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 17 deletions.
4 changes: 0 additions & 4 deletions controllers/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,6 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *cl
// Do not delete the NodeRef if there are no remaining members of
// the control plane.
return errNoControlPlaneNodes
case numControlPlaneMachines == 1 && util.IsControlPlaneMachine(machine):
// Do not delete the NodeRef if this is the last member of the
// control plane.
return errLastControlPlaneNode
default:
// Otherwise it is okay to delete the NodeRef.
return nil
Expand Down
8 changes: 5 additions & 3 deletions controllers/machine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"testing"
"time"

. "github.com/onsi/gomega"

Expand Down Expand Up @@ -836,7 +837,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
expectedError: errNoControlPlaneNodes,
},
{
name: "is last control plane members",
name: "is last control plane member",
cluster: &clusterv1.Cluster{},
machine: &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -846,7 +847,8 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
clusterv1.ClusterLabelName: "test",
clusterv1.MachineControlPlaneLabelName: "",
},
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents},
DeletionTimestamp: &metav1.Time{Time: time.Now().UTC()},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
Expand All @@ -859,7 +861,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
expectedError: errLastControlPlaneNode,
expectedError: errNoControlPlaneNodes,
},
{
name: "has nodeRef and control plane is healthy",
Expand Down
7 changes: 7 additions & 0 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M

// reconcileHealth performs health checks for control plane components and etcd
// It removes any etcd members that do not have a corresponding node.
// Also, as a final step, checks if there is any machines that is being deleted.
func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) error {
logger := controlPlane.Logger()

Expand Down Expand Up @@ -361,5 +362,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, clu
return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
}

// We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted.
// This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd.
// TODO: Revisit during machine remediation implementation which may need to cover other machine phases.
if controlPlane.HasDeletingMachine() {
return &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter}
}
return nil
}
14 changes: 5 additions & 9 deletions controlplane/kubeadm/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
logger := controlPlane.Logger()

// reconcileHealth returns err if there is a machine being delete which is a required condition to check before scaling up
if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil {
return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
}
Expand All @@ -73,21 +74,16 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
) (ctrl.Result, error) {
logger := controlPlane.Logger()

if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil {
return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
}

workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster))
if err != nil {
logger.Error(err, "Failed to create client to workload cluster")
return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster")
}

// Wait for any delete in progress to complete before deleting another Machine
if controlPlane.HasDeletingMachine() {
return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter}
}

if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil {
return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter}
}

machineToDelete, err := selectMachineForScaleDown(controlPlane)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to select machine for scale down")
Expand Down
44 changes: 43 additions & 1 deletion test/e2e/kcp_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,49 @@ func KCPUpgradeSpec(ctx context.Context, inputGetter func() KCPUpgradeSpecInput)
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
})

It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd", func() {
It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd in a single control plane cluster", func() {

By("Creating a workload cluster")
Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.KubernetesVersion))
Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.CNIPath))

cluster, controlPlane, _ = clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
ConfigCluster: clusterctl.ConfigClusterInput{
LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()),
ClusterctlConfigPath: input.ClusterctlConfigPath,
KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(),
InfrastructureProvider: clusterctl.DefaultInfrastructureProvider,
Flavor: clusterctl.DefaultFlavor,
Namespace: namespace.Name,
ClusterName: fmt.Sprintf("cluster-%s", util.RandomString(6)),
KubernetesVersion: input.E2EConfig.GetKubernetesVersion(),
ControlPlaneMachineCount: pointer.Int64Ptr(1),
WorkerMachineCount: pointer.Int64Ptr(1),
},
CNIManifestPath: input.E2EConfig.GetCNIPath(),
WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"),
WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
})

By("Upgrading Kubernetes, DNS, kube-proxy, and etcd versions")
framework.UpgradeControlPlaneAndWaitForUpgrade(ctx, framework.UpgradeControlPlaneAndWaitForUpgradeInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: cluster,
ControlPlane: controlPlane,
//Valid image tags for v1.17.2
EtcdImageTag: "3.4.3-0",
DNSImageTag: "1.6.6",
KubernetesUpgradeVersion: "v1.17.2",
WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
})

By("PASSED!")
})
It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd in a HA cluster", func() {

By("Creating a workload cluster")
Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.KubernetesVersion))
Expand Down

0 comments on commit 7cda00b

Please sign in to comment.