Skip to content

Commit

Permalink
Merge pull request #64 from atlassian-labs/vportella/add-finalizer-an…
Browse files Browse the repository at this point in the history
…d-safety-checks

Add a finalizer before detaching a node and don't error if a node has been deleted
  • Loading branch information
vincentportella authored Sep 7, 2023
2 parents 2cddb71 + 203e0b2 commit 31cdc7b
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 4 deletions.
15 changes: 14 additions & 1 deletion pkg/controller/cyclenoderequest/transitioner/transitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

var (
const (
scaleUpWait = 1 * time.Minute
scaleUpLimit = 20 * time.Minute
nodeEquilibriumWaitLimit = 5 * time.Minute
Expand Down Expand Up @@ -274,6 +274,14 @@ func (t *CycleNodeRequestTransitioner) transitionInitialised() (reconcile.Result
var validNodes []v1.CycleNodeRequestNode

for _, node := range t.cycleNodeRequest.Status.CurrentNodes {
t.rm.Logger.Info("Adding finalizer to node", "node", node.Name)

// Add the finalizer to the node before detaching it
if err := t.rm.AddFinalizerToNode(node.Name); err != nil {
t.rm.LogEvent(t.cycleNodeRequest, "AddFinalizerToNodeError", err.Error())
return t.transitionToHealing(err)
}

alreadyDetaching, err := nodeGroups.DetachInstance(node.ProviderID)

if alreadyDetaching {
Expand Down Expand Up @@ -531,6 +539,11 @@ func (t *CycleNodeRequestTransitioner) transitionHealing() (reconcile.Result, er
continue
}

if err := t.rm.RemoveFinalizerFromNode(node.Name); err != nil {
t.rm.LogEvent(t.cycleNodeRequest, "RemoveFinalizerFromNodeError", err.Error())
return t.transitionToFailed(err)
}

// try and re-attach the nodes, if any were un-attached
t.rm.LogEvent(t.cycleNodeRequest, "AttachingNodes", "Attaching instances to nodes group: %v", node.Name)
// if the node is already attached, ignore the error and continue to un-cordoning, otherwise return with error
Expand Down
8 changes: 7 additions & 1 deletion pkg/controller/cyclenodestatus/transitioner/transitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,20 @@ func (t *CycleNodeStatusTransitioner) transitionDraining() (reconcile.Result, er
}

// transitionDeleting transitions any CycleNodeStatuses in the Deleting phase to the Terminating phase
// It will delete the node out of the Kubernetes API.
// It will delete the node out of the Kubernetes API and remove the finalizer.
func (t *CycleNodeStatusTransitioner) transitionDeleting() (reconcile.Result, error) {
t.rm.LogEvent(t.cycleNodeStatus, "DeletingNode", "Deleting node: %v", t.cycleNodeStatus.Status.CurrentNode.Name)
err := t.rm.DeleteNode(t.cycleNodeStatus.Status.CurrentNode.Name)
if err != nil {
return t.transitionToFailed(err)
}

// Remove the finalizer to allow the node to be deleted
if err := t.rm.RemoveFinalizerFromNode(t.cycleNodeStatus.Status.CurrentNode.Name); err != nil {
t.rm.LogEvent(t.cycleNodeStatus, "RemoveFinalizerFromNodeError", err.Error())
return t.transitionToFailed(err)
}

return t.transitionObject(v1.CycleNodeStatusTerminatingNode)
}

Expand Down
64 changes: 62 additions & 2 deletions pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@ import (

"github.com/atlassian-labs/cyclops/pkg/k8s"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
nodeFinalizerName = "cyclops.atlassian.com/finalizer"
)

// ListNodes lists nodes from Kubernetes, optionally filtered by a selector.
func (rm *ResourceManager) ListNodes(selector labels.Selector) ([]v1.Node, error) {
// List the nodes
Expand Down Expand Up @@ -41,12 +47,29 @@ func (rm *ResourceManager) GetNode(name string) (*v1.Node, error) {
func (rm *ResourceManager) DeleteNode(name string) error {
// Get the node
node, err := rm.GetNode(name)

// If the node is not found then skip trying to delete it
if err != nil && apierrors.IsNotFound(err) {
rm.Logger.Info("node already deleted, skip deleting", "node", name)
return nil
}

// Account for any other possible errors
if err != nil {
return err
}

// Delete the node
return rm.Client.Delete(context.TODO(), node)
// The node exists as of the previous step, try deleting it now
err = rm.Client.Delete(context.TODO(), node)

// Account for possible race conditions with other controllers managing
// nodes in the cluster
if apierrors.IsNotFound(err) {
rm.Logger.Info("node deletion attemp failed, node already deleted", "node", name)
return nil
}

return err
}

// DrainPods drains the pods off the named node.
Expand All @@ -71,3 +94,40 @@ func (rm *ResourceManager) DrainPods(nodeName string, unhealthyAfter time.Durati

return false, k8s.DrainPods(pods, rm.RawClient, unhealthyAfter)
}

func (rm *ResourceManager) AddFinalizerToNode(nodeName string) error {
return rm.manageFinalizerOnNode(nodeName, k8s.AddFinalizerToNode)
}

func (rm *ResourceManager) RemoveFinalizerFromNode(nodeName string) error {
return rm.manageFinalizerOnNode(nodeName, k8s.RemoveFinalizerFromNode)
}

func (rm *ResourceManager) manageFinalizerOnNode(nodeName string, fn func(*v1.Node, string, kubernetes.Interface) error) error {
// Get the node
node, err := rm.GetNode(nodeName)

// If the node is not found then skip the finalizer operation
if err != nil && apierrors.IsNotFound(err) {
rm.Logger.Info("node deleted, skip adding finalizer", "node", nodeName)
return nil
}

// Account for any other possible errors
if err != nil {
return err
}

// The node exists as of the previous step, try managing the finalizer for
// it now
err = fn(node, nodeFinalizerName, rm.RawClient)

// Account for possible race conditions with other controllers managing
// nodes in the cluster
if apierrors.IsNotFound(err) {
rm.Logger.Info("adding finalizer failed, node already deleted", "node", nodeName)
return nil
}

return err
}
15 changes: 15 additions & 0 deletions pkg/k8s/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

// CordonNode performs a patch operation on a node to mark it as unschedulable
Expand Down Expand Up @@ -60,6 +61,20 @@ func AddLabelToNode(nodeName string, labelName string, labelValue string, client
return PatchNode(nodeName, patches, client)
}

// AddFinalizerToNode updates a node to add a finalizer to it
func AddFinalizerToNode(node *v1.Node, finalizerName string, client kubernetes.Interface) error {
controllerutil.AddFinalizer(node, finalizerName)
_, err := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
return err
}

// RemoveFinalizerFromNode updates a node to remove a finalizer from it
func RemoveFinalizerFromNode(node *v1.Node, finalizerName string, client kubernetes.Interface) error {
controllerutil.RemoveFinalizer(node, finalizerName)
_, err := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
return err
}

// NodeLister defines an object that can list nodes with a label selector
type NodeLister interface {
List(labels.Selector) ([]*v1.Node, error)
Expand Down

0 comments on commit 31cdc7b

Please sign in to comment.