diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index 5461c7aa2bca..2298b70f0a86 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -322,10 +322,6 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *cl // Do not delete the NodeRef if there are no remaining members of // the control plane. return errNoControlPlaneNodes - case numControlPlaneMachines == 1 && util.IsControlPlaneMachine(machine): - // Do not delete the NodeRef if this is the last member of the - // control plane. - return errLastControlPlaneNode default: // Otherwise it is okay to delete the NodeRef. return nil diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 04eaa017ca29..81b2fa526394 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -19,6 +19,7 @@ package controllers import ( "context" "testing" + "time" . "github.com/onsi/gomega" @@ -836,7 +837,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) { expectedError: errNoControlPlaneNodes, }, { - name: "is last control plane members", + name: "is last control plane member", cluster: &clusterv1.Cluster{}, machine: &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ @@ -846,7 +847,8 @@ func TestIsDeleteNodeAllowed(t *testing.T) { clusterv1.ClusterLabelName: "test", clusterv1.MachineControlPlaneLabelName: "", }, - Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents}, + Finalizers: []string{clusterv1.MachineFinalizer, metav1.FinalizerDeleteDependents}, + DeletionTimestamp: &metav1.Time{Time: time.Now().UTC()}, }, Spec: clusterv1.MachineSpec{ ClusterName: "test-cluster", @@ -859,7 +861,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) { }, }, }, - expectedError: errLastControlPlaneNode, + expectedError: errNoControlPlaneNodes, }, { name: "has nodeRef and control plane is healthy", diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 4d56e78c09e1..333a394a4375 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -332,6 +332,7 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M // reconcileHealth performs health checks for control plane components and etcd // It removes any etcd members that do not have a corresponding node. +// Also, as a final step, checks if there is any machines that is being deleted. func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) error { logger := controlPlane.Logger() @@ -361,5 +362,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, clu return &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} } + // We need this check for scale up as well as down to avoid scaling up when there is a machine being deleted. + // This should be at the end of this method as no need to wait for machine to be completely deleted to reconcile etcd. + // TODO: Revisit during machine remediation implementation which may need to cover other machine phases. + if controlPlane.HasDeletingMachine() { + return &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter} + } return nil } diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index 2a2915f76e49..8cb1332f3204 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -48,6 +48,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := controlPlane.Logger() + // reconcileHealth returns err if there is a machine being delete which is a required condition to check before scaling up if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} } @@ -73,21 +74,16 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( ) (ctrl.Result, error) { logger := controlPlane.Logger() + if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { + return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} + } + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) if err != nil { logger.Error(err, "Failed to create client to workload cluster") return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") } - // Wait for any delete in progress to complete before deleting another Machine - if controlPlane.HasDeletingMachine() { - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: deleteRequeueAfter} - } - - if err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil { - return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter} - } - machineToDelete, err := selectMachineForScaleDown(controlPlane) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to select machine for scale down") diff --git a/test/e2e/kcp_upgrade.go b/test/e2e/kcp_upgrade.go index 9fb80b235371..13e1c68a74a4 100644 --- a/test/e2e/kcp_upgrade.go +++ b/test/e2e/kcp_upgrade.go @@ -66,7 +66,49 @@ func KCPUpgradeSpec(ctx context.Context, inputGetter func() KCPUpgradeSpecInput) namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder) }) - It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd", func() { + It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd in a single control plane cluster", func() { + + By("Creating a workload cluster") + Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.KubernetesVersion)) + Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.CNIPath)) + + cluster, controlPlane, _ = clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()), + ClusterctlConfigPath: input.ClusterctlConfigPath, + KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + Flavor: clusterctl.DefaultFlavor, + Namespace: namespace.Name, + ClusterName: fmt.Sprintf("cluster-%s", util.RandomString(6)), + KubernetesVersion: input.E2EConfig.GetKubernetesVersion(), + ControlPlaneMachineCount: pointer.Int64Ptr(1), + WorkerMachineCount: pointer.Int64Ptr(1), + }, + CNIManifestPath: input.E2EConfig.GetCNIPath(), + WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + + By("Upgrading Kubernetes, DNS, kube-proxy, and etcd versions") + framework.UpgradeControlPlaneAndWaitForUpgrade(ctx, framework.UpgradeControlPlaneAndWaitForUpgradeInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ControlPlane: controlPlane, + //Valid image tags for v1.17.2 + EtcdImageTag: "3.4.3-0", + DNSImageTag: "1.6.6", + KubernetesUpgradeVersion: "v1.17.2", + WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + }) + + By("PASSED!") + }) + It("Should successfully upgrade Kubernetes, DNS, kube-proxy, and etcd in a HA cluster", func() { By("Creating a workload cluster") Expect(input.E2EConfig.Variables).To(HaveKey(clusterctl.KubernetesVersion))