From 60151cdde1487059fe8209fc0f65869ef293d43a Mon Sep 17 00:00:00 2001 From: Michael McCune Date: Tue, 7 Apr 2020 17:13:24 -0400 Subject: [PATCH 1/2] Improve delete node mechanisms in cluster-autoscaler CAPI provider This change adds a function to remove the annotations associated with marking a node for deletion. It also adds logic to unmark a node in the event that an error is returned after the node has been annotated but before it has been removed. In the case where a node cannot be removed (eg due to minimum size), the node is unmarked before we return from the error condition. --- .../clusterapi_machinedeployment.go | 4 +++ .../clusterapi/clusterapi_machineset.go | 4 +++ .../clusterapi/clusterapi_nodegroup.go | 1 + .../clusterapi/clusterapi_scalableresource.go | 30 +++++++++++++++++++ 4 files changed, 39 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go index 100c1ff6bbf8..ba9ab3749b74 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go @@ -99,6 +99,10 @@ func (r machineDeploymentScalableResource) SetSize(nreplicas int32) error { return updateErr } +func (r machineDeploymentScalableResource) UnmarkMachineForDeletion(machine *Machine) error { + return unmarkMachineForDeletion(r.controller, machine) +} + func (r machineDeploymentScalableResource) MarkMachineForDeletion(machine *Machine) error { u, err := r.controller.dynamicclient.Resource(*r.controller.machineResource).Namespace(machine.Namespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go index 98655bfaf3b5..e4f02096b0a7 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go @@ -107,6 +107,10 @@ func (r machineSetScalableResource) MarkMachineForDeletion(machine *Machine) err return updateErr } +func (r machineSetScalableResource) UnmarkMachineForDeletion(machine *Machine) error { + return unmarkMachineForDeletion(r.controller, machine) +} + func newMachineSetScalableResource(controller *machineController, machineSet *MachineSet) (*machineSetScalableResource, error) { minSize, maxSize, err := parseScalingBounds(machineSet.Annotations) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index 8c13da3cb296..b7401a5944a8 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -139,6 +139,7 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error { } if err := ng.scalableResource.SetSize(replicas - 1); err != nil { + nodeGroup.scalableResource.UnmarkMachineForDeletion(machine) return err } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go index 6c47da3004db..ae32b8548b39 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go @@ -16,6 +16,13 @@ limitations under the License. package clusterapi +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + // scalableResource is a resource that can be scaled up and down by // adjusting its replica count field. type scalableResource interface { @@ -46,4 +53,27 @@ type scalableResource interface { // MarkMachineForDeletion marks machine for deletion MarkMachineForDeletion(machine *Machine) error + + // UnmarkMachineForDeletion unmarks machine for deletion + UnmarkMachineForDeletion(machine *Machine) error +} + +func unmarkMachineForDeletion(controller *machineController, machine *Machine) error { + u, err := controller.dynamicclient.Resource(*controller.machineResource).Namespace(machine.Namespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) + + if err != nil { + return err + } + if u == nil { + return fmt.Errorf("unknown machine %s", machine.Name) + } + + annotations := u.GetAnnotations() + if _, ok := annotations[machineDeleteAnnotationKey]; ok { + delete(annotations, machineDeleteAnnotationKey) + u.SetAnnotations(annotations) + _, updateErr := controller.dynamicclient.Resource(*controller.machineResource).Namespace(u.GetNamespace()).Update(context.TODO(), u, metav1.UpdateOptions{}) + return updateErr + } + return nil } From f527b397b5f0f1d49627e58f0b353c3087e8f785 Mon Sep 17 00:00:00 2001 From: Michael McCune Date: Tue, 2 Jun 2020 16:24:30 -0400 Subject: [PATCH 2/2] remove redundant error checks in mark/unmark deletion functions This change removes a few nil checks against resources returned in the Mark and Unmark deletion functions of the cluster-autoscaler CAPI provider. These checks look to see if the returned value for a resource are nil, but the function will not return nil if it returns an error[0]. We only need to check the error return as discussed here[1]. [0] https://github.com/kubernetes/client-go/blob/master/dynamic/simple.go#L234 [1] https://github.com/openshift/kubernetes-autoscaler/pull/141/files#r414480960 --- .../cloudprovider/clusterapi/clusterapi_machinedeployment.go | 4 ---- .../cloudprovider/clusterapi/clusterapi_machineset.go | 4 ---- .../cloudprovider/clusterapi/clusterapi_scalableresource.go | 5 ----- 3 files changed, 13 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go index ba9ab3749b74..8dc976185e91 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go @@ -105,13 +105,9 @@ func (r machineDeploymentScalableResource) UnmarkMachineForDeletion(machine *Mac func (r machineDeploymentScalableResource) MarkMachineForDeletion(machine *Machine) error { u, err := r.controller.dynamicclient.Resource(*r.controller.machineResource).Namespace(machine.Namespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) - if err != nil { return err } - if u == nil { - return fmt.Errorf("unknown machine %s", machine.Name) - } u = u.DeepCopy() diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go index e4f02096b0a7..dc494e354f68 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go @@ -86,13 +86,9 @@ func (r machineSetScalableResource) SetSize(nreplicas int32) error { func (r machineSetScalableResource) MarkMachineForDeletion(machine *Machine) error { u, err := r.controller.dynamicclient.Resource(*r.controller.machineResource).Namespace(machine.Namespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) - if err != nil { return err } - if u == nil { - return fmt.Errorf("unknown machine %s", machine.Name) - } u = u.DeepCopy() diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go index ae32b8548b39..24c443111936 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go @@ -18,7 +18,6 @@ package clusterapi import ( "context" - "fmt" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -60,13 +59,9 @@ type scalableResource interface { func unmarkMachineForDeletion(controller *machineController, machine *Machine) error { u, err := controller.dynamicclient.Resource(*controller.machineResource).Namespace(machine.Namespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) - if err != nil { return err } - if u == nil { - return fmt.Errorf("unknown machine %s", machine.Name) - } annotations := u.GetAnnotations() if _, ok := annotations[machineDeleteAnnotationKey]; ok {