Skip to content

Commit

Permalink
UPSTREAM: <carry>: openshift: Machine controller: drain node before m…
Browse files Browse the repository at this point in the history
…achine deletion

The node draining code itself is imported from github.com/openshift/kubernetes-drain.

At the same time it's currently impossible to use the controller-runtime client for node draining
due to missing Patch operation (kubernetes-sigs/controller-runtime#235).
Thus, the machine controller needs to initialize kubeclient as well in order to
implement the node draining logic. Once the Patch operation is implemented,
the draining logic can be updated to replace kube client with controller runtime client.

Also, initialize event recorder to generate node draining event.
  • Loading branch information
ingvagabund committed Mar 7, 2019
1 parent 0c3e884 commit a3f3130
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 5 deletions.
5 changes: 5 additions & 0 deletions pkg/controller/machine/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ go_library(
"//pkg/apis/machine/v1beta1:go_default_library",
"//pkg/controller/error:go_default_library",
"//pkg/util:go_default_library",
"//vendor/github.com/go-log/log/info:go_default_library",
"//vendor/github.com/openshift/kubernetes-drain:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/client:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/controller:go_default_library",
Expand Down
72 changes: 67 additions & 5 deletions pkg/controller/machine/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,22 @@ package machine
import (
"context"
"errors"
"fmt"
"os"
"time"

"github.com/go-log/log/info"
machinev1 "github.com/openshift/cluster-api/pkg/apis/machine/v1beta1"
controllerError "github.com/openshift/cluster-api/pkg/controller/error"
"github.com/openshift/cluster-api/pkg/util"
kubedrain "github.com/openshift/kubernetes-drain"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand All @@ -37,7 +44,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"
)

const NodeNameEnvVar = "NODE_NAME"
const (
NodeNameEnvVar = "NODE_NAME"

// ExcludeNodeDrainingAnnotation annotation explicitly skips node draining if set
ExcludeNodeDrainingAnnotation = "machine.openshift.io/exclude-node-draining"
)

var DefaultActuator Actuator

Expand All @@ -48,10 +60,12 @@ func AddWithActuator(mgr manager.Manager, actuator Actuator) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, actuator Actuator) reconcile.Reconciler {
r := &ReconcileMachine{
Client: mgr.GetClient(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
Client: mgr.GetClient(),
eventRecorder: mgr.GetRecorder("machine-controller"),
config: mgr.GetConfig(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
}

if r.nodeName == "" {
Expand Down Expand Up @@ -83,8 +97,11 @@ var _ reconcile.Reconciler = &ReconcileMachine{}
// ReconcileMachine reconciles a Machine object
type ReconcileMachine struct {
client.Client
config *rest.Config
scheme *runtime.Scheme

eventRecorder record.EventRecorder

actuator Actuator

// nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME.
Expand Down Expand Up @@ -145,6 +162,51 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
return reconcile.Result{}, nil
}
klog.Infof("reconciling machine object %v triggers delete.", name)

// Drain node before deletion
// If a machine is not linked to a node, just delete the machine. Since a node
// can be unlinked from a machine when the node goes NotReady and is removed
// by cloud controller manager. In that case some machines would never get
// deleted without a manual intervention.
if _, exists := m.ObjectMeta.Annotations[ExcludeNodeDrainingAnnotation]; !exists && m.Status.NodeRef != nil {
if err := func() error {
kubeClient, err := kubernetes.NewForConfig(r.config)
if err != nil {
return fmt.Errorf("unable to build kube client: %v", err)
}
node, err := kubeClient.CoreV1().Nodes().Get(m.Status.NodeRef.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("unable to get node %q: %v", m.Status.NodeRef.Name, err)
}

if err := kubedrain.Drain(
kubeClient,
[]*corev1.Node{node},
&kubedrain.DrainOptions{
Force: true,
IgnoreDaemonsets: true,
DeleteLocalData: true,
GracePeriodSeconds: -1,
Logger: info.New(klog.V(0)),
// If a pod is not evicted in 20 second, retry the eviction next time the
// machine gets reconciled again (to allow other machines to be reconciled)
Timeout: 20 * time.Second,
},
); err != nil {
// Machine still tries to terminate after drain failure
klog.Warningf("drain failed for machine %q: %v", m.Name, err)
return &controllerError.RequeueAfterError{RequeueAfter: 20 * time.Second}
}

klog.Infof("drain successful for machine %q", m.Name)
r.eventRecorder.Eventf(m, corev1.EventTypeNormal, "Deleted", "Node %q drained", node.Name)

return nil
}(); err != nil {
return reconcile.Result{}, err
}
}

if err := r.actuator.Delete(ctx, cluster, m); err != nil {
klog.Errorf("Error deleting machine object %v; %v", name, err)
if requeueErr, ok := err.(*controllerError.RequeueAfterError); ok {
Expand Down

0 comments on commit a3f3130

Please sign in to comment.