From f111b67549d7def65c06bfe590479477947c7c2a Mon Sep 17 00:00:00 2001 From: Michael McCune Date: Tue, 7 Apr 2020 17:13:24 -0400 Subject: [PATCH] Improve delete node mechanisms in cluster-autoscaler CAPI provider This change adds a function to remove the annotations associated with marking a node for deletion. It also adds logic to unmark a node in the event that an error is returned after the node has been annotated but before it has been removed. In the case where a node cannot be removed (eg due to minimum size), the node is unmarked before we return from the error condition. --- .../clusterapi_machinedeployment.go | 4 +++ .../clusterapi/clusterapi_machineset.go | 4 +++ .../clusterapi/clusterapi_nodegroup.go | 1 + .../clusterapi/clusterapi_scalableresource.go | 29 +++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go index f0cf66bf1512..67682a028f04 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go @@ -98,6 +98,10 @@ func (r machineDeploymentScalableResource) SetSize(nreplicas int32) error { return updateErr } +func (r machineDeploymentScalableResource) UnmarkMachineForDeletion(machine *Machine) error { + return unmarkMachineForDeletion(r.controller, machine) +} + func (r machineDeploymentScalableResource) MarkMachineForDeletion(machine *Machine) error { u, err := r.controller.dynamicclient.Resource(*r.controller.machineResource).Namespace(machine.Namespace).Get(machine.Name, metav1.GetOptions{}) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go index 617dd698e3aa..d679deecd562 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go @@ -106,6 +106,10 @@ func (r machineSetScalableResource) MarkMachineForDeletion(machine *Machine) err return updateErr } +func (r machineSetScalableResource) UnmarkMachineForDeletion(machine *Machine) error { + return unmarkMachineForDeletion(r.controller, machine) +} + func newMachineSetScalableResource(controller *machineController, machineSet *MachineSet) (*machineSetScalableResource, error) { minSize, maxSize, err := parseScalingBounds(machineSet.Annotations) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index 8c13da3cb296..b7401a5944a8 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -139,6 +139,7 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error { } if err := ng.scalableResource.SetSize(replicas - 1); err != nil { + nodeGroup.scalableResource.UnmarkMachineForDeletion(machine) return err } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go index 6c47da3004db..35bf7a493ffa 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go @@ -16,6 +16,12 @@ limitations under the License. package clusterapi +import ( + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + // scalableResource is a resource that can be scaled up and down by // adjusting its replica count field. type scalableResource interface { @@ -46,4 +52,27 @@ type scalableResource interface { // MarkMachineForDeletion marks machine for deletion MarkMachineForDeletion(machine *Machine) error + + // UnmarkMachineForDeletion unmarks machine for deletion + UnmarkMachineForDeletion(machine *Machine) error +} + +func unmarkMachineForDeletion(controller *machineController, machine *Machine) error { + u, err := controller.dynamicclient.Resource(*controller.machineResource).Namespace(machine.Namespace).Get(machine.Name, metav1.GetOptions{}) + + if err != nil { + return err + } + if u == nil { + return fmt.Errorf("unknown machine %s", machine.Name) + } + + annotations := u.GetAnnotations() + if _, ok := annotations[machineDeleteAnnotationKey]; ok { + delete(annotations, machineDeleteAnnotationKey) + u.SetAnnotations(annotations) + _, updateErr := controller.dynamicclient.Resource(*controller.machineResource).Namespace(u.GetNamespace()).Update(u, metav1.UpdateOptions{}) + return updateErr + } + return nil }