Skip to content

Commit

Permalink
Mark node with condition when due for termination
Browse files Browse the repository at this point in the history
Switch from deleting the Machine to marking the Node with a
`Terminating` condition. This will integrate with an MHC to ensure that
the Machine is deleted.
This should be possible using the Node's credentials and as such, the
termination handler will not need its own credentials and extra
permissions
  • Loading branch information
JoelSpeed committed Nov 17, 2020
1 parent bc5addc commit 48c98a0
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 151 deletions.
100 changes: 73 additions & 27 deletions pkg/termination/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,28 @@ import (
"time"

"github.com/go-logr/logr"
machinev1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
awsTerminationEndpointURL = "http://169.254.169.254/latest/meta-data/spot/termination-time"
awsTerminationEndpointURL = "http://169.254.169.254/latest/meta-data/spot/termination-time"
terminatingConditionType corev1.NodeConditionType = "Terminating"
terminationRequestedReason = "TerminationRequested"
)

// Handler represents a handler that will run to check the termination
// notice endpoint and delete Machine's if the instance termination notice is fulfilled.
// Handler represents a handler that will run to check the termination notice
// endpoint and mark node for deletion if the instance termination notice is fulfilled.
type Handler interface {
Run(stop <-chan struct{}) error
}

// NewHandler constructs a new Handler
func NewHandler(logger logr.Logger, cfg *rest.Config, pollInterval time.Duration, namespace, nodeName string) (Handler, error) {
machinev1.AddToScheme(scheme.Scheme)
c, err := client.New(cfg, client.Options{Scheme: scheme.Scheme})
if err != nil {
return nil, fmt.Errorf("error creating client: %v", err)
Expand All @@ -52,8 +54,8 @@ func NewHandler(logger logr.Logger, cfg *rest.Config, pollInterval time.Duration
}, nil
}

// handler implements the logic to check the termination endpoint and delete the
// machine associated with the node
// handler implements the logic to check the termination endpoint and
// marks the node for termination
type handler struct {
client client.Client
pollURL *url.URL
Expand Down Expand Up @@ -89,13 +91,8 @@ func (h *handler) Run(stop <-chan struct{}) error {
func (h *handler) run(ctx context.Context, wg *sync.WaitGroup) error {
defer wg.Done()

machine, err := h.getMachineForNode(ctx)
if err != nil {
return fmt.Errorf("error fetching machine for node (%q): %v", h.nodeName, err)
}

logger := h.log.WithValues("machine", machine.Name)
logger.V(1).Info("Monitoring node for machine")
logger := h.log.WithValues("node", h.nodeName)
logger.V(1).Info("Monitoring node termination")

if err := wait.PollImmediateUntil(h.pollInterval, func() (bool, error) {
resp, err := http.Get(h.pollURL.String())
Expand All @@ -119,27 +116,76 @@ func (h *handler) run(ctx context.Context, wg *sync.WaitGroup) error {
}

// Will only get here if the termination endpoint returned 200
logger.V(1).Info("Instance marked for termination, deleting Machine")
if err := h.client.Delete(ctx, machine); err != nil {
return fmt.Errorf("error deleting machine: %v", err)
logger.V(1).Info("Instance marked for termination, marking Node for deletion")
if err := h.markNodeForDeletion(ctx); err != nil {
return fmt.Errorf("error marking node: %v", err)
}

return nil
}

// getMachineForNodeName finds the Machine associated with the Node name given
func (h *handler) getMachineForNode(ctx context.Context) (*machinev1.Machine, error) {
machineList := &machinev1.MachineList{}
err := h.client.List(ctx, machineList, client.InNamespace(h.namespace))
if err != nil {
return nil, fmt.Errorf("error listing machines: %v", err)
func (h *handler) markNodeForDeletion(ctx context.Context) error {
node := &corev1.Node{}
if err := h.client.Get(ctx, client.ObjectKey{Name: h.nodeName}, node); err != nil {
return fmt.Errorf("error fetching node: %v", err)
}

addNodeTerminationCondition(node)
if err := h.client.Status().Update(ctx, node); err != nil {
return fmt.Errorf("error updating node status")
}
return nil
}

for _, machine := range machineList.Items {
if machine.Status.NodeRef != nil && machine.Status.NodeRef.Name == h.nodeName {
return &machine, nil
// nodeHasTerminationCondition checks whether the node already
// has a condition with the terminatingConditionType type
func nodeHasTerminationCondition(node *corev1.Node) bool {
for _, condition := range node.Status.Conditions {
if condition.Type == terminatingConditionType {
return true
}
}
return false
}

// addNodeTerminationCondition will add a condition with a
// terminatingConditionType type to the node
func addNodeTerminationCondition(node *corev1.Node) {
now := metav1.Now()
terminatingCondition := corev1.NodeCondition{
Type: terminatingConditionType,
Status: corev1.ConditionTrue,
LastHeartbeatTime: now,
LastTransitionTime: now,
Reason: terminationRequestedReason,
Message: "The cloud provider has marked this instance for termination",
}

if !nodeHasTerminationCondition(node) {
// No need to merge, just add the new condition to the end
node.Status.Conditions = append(node.Status.Conditions, terminatingCondition)
return
}

// The node already has a terminating condition,
// so make sure it has the correct status
conditions := []corev1.NodeCondition{}
for _, condition := range node.Status.Conditions {
if condition.Type != terminatingConditionType {
conditions = append(conditions, condition)
continue
}

// Condition type is terminating
if condition.Status == corev1.ConditionTrue {
// Condition already marked true, do not update
conditions = append(conditions, condition)
continue
}

// The existing terminating condition had the wrong status
conditions = append(conditions, terminatingCondition)
}

return nil, fmt.Errorf("machine not found for node %q", h.nodeName)
node.Status.Conditions = conditions
}
Loading

0 comments on commit 48c98a0

Please sign in to comment.