Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UPSTREAM: <carry>: openshift: Machine controller: drain node before machine deletion #11

Merged
merged 2 commits into from
Mar 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
276 changes: 45 additions & 231 deletions Gopkg.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions pkg/controller/machine/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ go_library(
"//pkg/apis/machine/v1beta1:go_default_library",
"//pkg/controller/error:go_default_library",
"//pkg/util:go_default_library",
"//vendor/github.com/go-log/log/info:go_default_library",
"//vendor/github.com/openshift/kubernetes-drain:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/client:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/controller:go_default_library",
Expand Down
74 changes: 69 additions & 5 deletions pkg/controller/machine/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,22 @@ package machine
import (
"context"
"errors"
"fmt"
"os"
"time"

"github.com/go-log/log/info"
machinev1 "github.com/openshift/cluster-api/pkg/apis/machine/v1beta1"
controllerError "github.com/openshift/cluster-api/pkg/controller/error"
"github.com/openshift/cluster-api/pkg/util"
kubedrain "github.com/openshift/kubernetes-drain"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand All @@ -37,7 +44,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"
)

const NodeNameEnvVar = "NODE_NAME"
const (
NodeNameEnvVar = "NODE_NAME"

// ExcludeNodeDrainingAnnotation annotation explicitly skips node draining if set
ExcludeNodeDrainingAnnotation = "machine.openshift.io/exclude-node-draining"
)

var DefaultActuator Actuator

Expand All @@ -48,10 +60,12 @@ func AddWithActuator(mgr manager.Manager, actuator Actuator) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, actuator Actuator) reconcile.Reconciler {
r := &ReconcileMachine{
Client: mgr.GetClient(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
Client: mgr.GetClient(),
eventRecorder: mgr.GetRecorder("machine-controller"),
config: mgr.GetConfig(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
}

if r.nodeName == "" {
Expand Down Expand Up @@ -83,8 +97,11 @@ var _ reconcile.Reconciler = &ReconcileMachine{}
// ReconcileMachine reconciles a Machine object
type ReconcileMachine struct {
client.Client
config *rest.Config
scheme *runtime.Scheme

eventRecorder record.EventRecorder

actuator Actuator

// nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME.
Expand Down Expand Up @@ -145,6 +162,18 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
return reconcile.Result{}, nil
}
klog.Infof("reconciling machine object %v triggers delete.", name)

// Drain node before deletion
// If a machine is not linked to a node, just delete the machine. Since a node
// can be unlinked from a machine when the node goes NotReady and is removed
// by cloud controller manager. In that case some machines would never get
// deleted without a manual intervention.
if _, exists := m.ObjectMeta.Annotations[ExcludeNodeDrainingAnnotation]; !exists && m.Status.NodeRef != nil {
if err := r.drainNode(m); err != nil {
return reconcile.Result{}, err
}
}

if err := r.actuator.Delete(ctx, cluster, m); err != nil {
klog.Errorf("Error deleting machine object %v; %v", name, err)
if requeueErr, ok := err.(*controllerError.RequeueAfterError); ok {
Expand Down Expand Up @@ -193,6 +222,41 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
return reconcile.Result{}, nil
}

func (r *ReconcileMachine) drainNode(machine *machinev1.Machine) error {
kubeClient, err := kubernetes.NewForConfig(r.config)
if err != nil {
return fmt.Errorf("unable to build kube client: %v", err)
}
node, err := kubeClient.CoreV1().Nodes().Get(machine.Status.NodeRef.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("unable to get node %q: %v", machine.Status.NodeRef.Name, err)
}

if err := kubedrain.Drain(
kubeClient,
[]*corev1.Node{node},
&kubedrain.DrainOptions{
Force: true,
IgnoreDaemonsets: true,
DeleteLocalData: true,
GracePeriodSeconds: -1,
Logger: info.New(klog.V(0)),
// If a pod is not evicted in 20 second, retry the eviction next time the
// machine gets reconciled again (to allow other machines to be reconciled)
Timeout: 20 * time.Second,
},
); err != nil {
// Machine still tries to terminate after drain failure
klog.Warningf("drain failed for machine %q: %v", machine.Name, err)
return &controllerError.RequeueAfterError{RequeueAfter: 20 * time.Second}
}

klog.Infof("drain successful for machine %q", machine.Name)
r.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Deleted", "Node %q drained", node.Name)

return nil
}

func (r *ReconcileMachine) getCluster(ctx context.Context, machine *machinev1.Machine) (*machinev1.Cluster, error) {
clusterList := machinev1.ClusterList{}
listOptions := &client.ListOptions{
Expand Down
9 changes: 9 additions & 0 deletions vendor/github.com/go-log/log/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions vendor/github.com/go-log/log/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

92 changes: 92 additions & 0 deletions vendor/github.com/go-log/log/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions vendor/github.com/go-log/log/info/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions vendor/github.com/go-log/log/info/info.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions vendor/github.com/go-log/log/log.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions vendor/github.com/openshift/kubernetes-drain/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading