From d4fe42f8f6751c186fb7273ff87731a223d9ff6f Mon Sep 17 00:00:00 2001 From: Nam Xuan Nguyen Date: Wed, 30 Sep 2020 13:17:52 +0300 Subject: [PATCH] Add the ability to specify a drain timeout for machines Add an option `nodeDrainTimeout` to KCP and machiceSpec of machinedeployment. `nodeDrainTimeout` defines the amount of time we want a node to be drained. The node is forcefully removed if the time is over. Note: Unset this option means there is no time limit. --- api/v1alpha2/conversion.go | 1 + api/v1alpha2/zz_generated.conversion.go | 1 + api/v1alpha3/machine_types.go | 6 + api/v1alpha3/zz_generated.deepcopy.go | 5 + .../cluster.x-k8s.io_machinedeployments.yaml | 7 + .../crd/bases/cluster.x-k8s.io_machines.yaml | 6 + .../bases/cluster.x-k8s.io_machinesets.yaml | 7 + .../exp.cluster.x-k8s.io_machinepools.yaml | 7 + controllers/machine_controller.go | 40 +++++- controllers/machine_controller_test.go | 127 ++++++++++++++++++ .../v1alpha3/kubeadm_control_plane_types.go | 6 + .../v1alpha3/kubeadm_control_plane_webhook.go | 1 + .../api/v1alpha3/zz_generated.deepcopy.go | 6 + ...cluster.x-k8s.io_kubeadmcontrolplanes.yaml | 7 + controlplane/kubeadm/controllers/helpers.go | 3 +- 15 files changed, 227 insertions(+), 3 deletions(-) diff --git a/api/v1alpha2/conversion.go b/api/v1alpha2/conversion.go index 00974460d07e..757b56df72f2 100644 --- a/api/v1alpha2/conversion.go +++ b/api/v1alpha2/conversion.go @@ -129,6 +129,7 @@ func restoreMachineSpec(restored *v1alpha3.MachineSpec, dst *v1alpha3.MachineSpe } dst.Bootstrap.DataSecretName = restored.Bootstrap.DataSecretName dst.FailureDomain = restored.FailureDomain + dst.NodeDrainTimeout = restored.NodeDrainTimeout } func (dst *Machine) ConvertFrom(srcRaw conversion.Hub) error { diff --git a/api/v1alpha2/zz_generated.conversion.go b/api/v1alpha2/zz_generated.conversion.go index c096b057d71d..435282bf0890 100644 --- a/api/v1alpha2/zz_generated.conversion.go +++ b/api/v1alpha2/zz_generated.conversion.go @@ -891,6 +891,7 @@ func autoConvert_v1alpha3_MachineSpec_To_v1alpha2_MachineSpec(in *v1alpha3.Machi out.Version = (*string)(unsafe.Pointer(in.Version)) out.ProviderID = (*string)(unsafe.Pointer(in.ProviderID)) // WARNING: in.FailureDomain requires manual conversion: does not exist in peer-type + // WARNING: in.NodeDrainTimeout requires manual conversion: does not exist in peer-type return nil } diff --git a/api/v1alpha3/machine_types.go b/api/v1alpha3/machine_types.go index 54e3c8aadd52..43ab63c312cc 100644 --- a/api/v1alpha3/machine_types.go +++ b/api/v1alpha3/machine_types.go @@ -89,6 +89,12 @@ type MachineSpec struct { // Must match a key in the FailureDomains map stored on the cluster object. // +optional FailureDomain *string `json:"failureDomain,omitempty"` + + // NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. + // The default value is 0, meaning that the node can be drained without any time limitations. + // NOTE: NodeDrainTimeout is different from `kubectl drain --timeout` + // +optional + NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"` } // ANCHOR_END: MachineSpec diff --git a/api/v1alpha3/zz_generated.deepcopy.go b/api/v1alpha3/zz_generated.deepcopy.go index c0c5a506ef42..9d577525b330 100644 --- a/api/v1alpha3/zz_generated.deepcopy.go +++ b/api/v1alpha3/zz_generated.deepcopy.go @@ -802,6 +802,11 @@ func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { *out = new(string) **out = **in } + if in.NodeDrainTimeout != nil { + in, out := &in.NodeDrainTimeout, &out.NodeDrainTimeout + *out = new(metav1.Duration) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. diff --git a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml index cb9993dd5f71..dd02945d862a 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml @@ -926,6 +926,13 @@ spec: description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time + that the controller will spend on draining a node. The default + value is 0, meaning that the node can be drained without + any time limitations. NOTE: NodeDrainTimeout is different + from `kubectl drain --timeout`' + type: string providerID: description: ProviderID is the identification ID of the machine provided by the provider. This field must match the provider diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml index 90d10bd477e6..98fcb91f1ebf 100644 --- a/config/crd/bases/cluster.x-k8s.io_machines.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machines.yaml @@ -514,6 +514,12 @@ spec: description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time that the + controller will spend on draining a node. The default value is 0, + meaning that the node can be drained without any time limitations. + NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`' + type: string providerID: description: ProviderID is the identification ID of the machine provided by the provider. This field must match the provider ID as seen on diff --git a/config/crd/bases/cluster.x-k8s.io_machinesets.yaml b/config/crd/bases/cluster.x-k8s.io_machinesets.yaml index 6276d4f38e7a..22663b5cece9 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinesets.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinesets.yaml @@ -825,6 +825,13 @@ spec: description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time + that the controller will spend on draining a node. The default + value is 0, meaning that the node can be drained without + any time limitations. NOTE: NodeDrainTimeout is different + from `kubectl drain --timeout`' + type: string providerID: description: ProviderID is the identification ID of the machine provided by the provider. This field must match the provider diff --git a/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml b/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml index c31ccd5f7389..afba16712268 100644 --- a/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml +++ b/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml @@ -346,6 +346,13 @@ spec: description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time + that the controller will spend on draining a node. The default + value is 0, meaning that the node can be drained without + any time limitations. NOTE: NodeDrainTimeout is different + from `kubectl drain --timeout`' + type: string providerID: description: ProviderID is the identification ID of the machine provided by the provider. This field must match the provider diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index addb94700036..b4a0fc2f951a 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -299,14 +299,20 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition) // Drain node before deletion and issue a patch in order to make this operation visible to the users. - if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; !exists { + if r.isNodeDrainAllowed(m) { patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { return ctrl.Result{}, err } logger.Info("Draining node", "node", m.Status.NodeRef.Name) - conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") + // The DrainingSucceededCondition never exists before the node is drained for the first time, + // so its transition time can be used to record the first time draining. + // This `if` condition prevents the transition time to be changed more than once. + if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil { + conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") + } + if err := patchMachine(ctx, patchHelper, m); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") } @@ -363,6 +369,36 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste return ctrl.Result{}, nil } +func (r *MachineReconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool { + if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists { + return false + } + + if r.nodeDrainTimeoutExceeded(m) { + return false + } + + return true + +} + +func (r *MachineReconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool { + // if the NodeDrainTineout type is not set by user + if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 { + return false + } + + // if the draining succeeded condition does not exist + if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil { + return false + } + + now := time.Now() + firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition) + diff := now.Sub(firstTimeDrain.Time) + return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds() +} + // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil // and if the Machine is not the last control plane node in the cluster. func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error { diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 9ded9a2468e8..918ad5e043cd 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -681,6 +681,133 @@ func Test_clusterToActiveMachines(t *testing.T) { } } +func TestIsNodeDrainedAllowed(t *testing.T) { + testCluster := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{Kind: "Cluster", APIVersion: clusterv1.GroupVersion.String()}, + ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "test-cluster"}, + } + + tests := []struct { + name string + machine *clusterv1.Machine + expected bool + }{ + { + name: "Exclude node draining annotation exists", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Finalizers: []string{clusterv1.MachineFinalizer}, + Annotations: map[string]string{clusterv1.ExcludeNodeDrainingAnnotation: "existed!!"}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{}, + Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")}, + }, + Status: clusterv1.MachineStatus{}, + }, + expected: false, + }, + { + name: "Node draining timeout is over", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Finalizers: []string{clusterv1.MachineFinalizer}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{}, + Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")}, + NodeDrainTimeout: &metav1.Duration{Duration: time.Second * 60}, + }, + + Status: clusterv1.MachineStatus{ + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.DrainingSucceededCondition, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()}, + }, + }, + }, + }, + expected: false, + }, + { + name: "Node draining timeout is not yet over", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Finalizers: []string{clusterv1.MachineFinalizer}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{}, + Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")}, + NodeDrainTimeout: &metav1.Duration{Duration: time.Second * 60}, + }, + Status: clusterv1.MachineStatus{ + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.DrainingSucceededCondition, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()}, + }, + }, + }, + }, + expected: true, + }, + { + name: "NodeDrainTimeout option is set to its default value 0", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Finalizers: []string{clusterv1.MachineFinalizer}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{}, + Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")}, + }, + Status: clusterv1.MachineStatus{ + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.DrainingSucceededCondition, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()}, + }, + }, + }, + }, + expected: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var objs []runtime.Object + objs = append(objs, testCluster, tt.machine) + + r := &MachineReconciler{ + Client: helpers.NewFakeClientWithScheme(scheme.Scheme, objs...), + Log: log.Log, + scheme: scheme.Scheme, + } + + got := r.isNodeDrainAllowed(tt.machine) + g.Expect(got).To(Equal(tt.expected)) + }) + } +} + func TestIsDeleteNodeAllowed(t *testing.T) { deletionts := metav1.Now() diff --git a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go index 4bbc7eec6402..48f374d20533 100644 --- a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go +++ b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go @@ -66,6 +66,12 @@ type KubeadmControlPlaneSpec struct { // KubeadmControlPlane // +optional UpgradeAfter *metav1.Time `json:"upgradeAfter,omitempty"` + + // NodeDrainTimeout is the total amount of time that the controller will spend on draining a controlplane node + // The default value is 0, meaning that the node can be drained without any time limitations. + // NOTE: NodeDrainTimeout is different from `kubectl drain --timeout` + // +optional + NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"` } // KubeadmControlPlaneStatus defines the observed state of KubeadmControlPlane. diff --git a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go index c842c0fb1ac8..6bd273019c5d 100644 --- a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go +++ b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go @@ -110,6 +110,7 @@ func (in *KubeadmControlPlane) ValidateUpdate(old runtime.Object) error { {spec, "replicas"}, {spec, "version"}, {spec, "upgradeAfter"}, + {spec, "nodeDrainTimeout"}, } allErrs := in.validateCommon() diff --git a/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go b/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go index 5b1ffc58e057..be97eb20a684 100644 --- a/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go +++ b/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package v1alpha3 import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" apiv1alpha3 "sigs.k8s.io/cluster-api/api/v1alpha3" ) @@ -98,6 +99,11 @@ func (in *KubeadmControlPlaneSpec) DeepCopyInto(out *KubeadmControlPlaneSpec) { in, out := &in.UpgradeAfter, &out.UpgradeAfter *out = (*in).DeepCopy() } + if in.NodeDrainTimeout != nil { + in, out := &in.NodeDrainTimeout, &out.NodeDrainTimeout + *out = new(v1.Duration) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeadmControlPlaneSpec. diff --git a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml index 4b75b3247f04..9a33944a845a 100644 --- a/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml +++ b/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml @@ -1032,6 +1032,13 @@ spec: format: int32 type: integer type: object + nodeDrainTimeout: + description: 'NodeDrainTimeout is the total amount of time that the + controller will spend on draining a controlplane node The default + value is 0, meaning that the node can be drained without any time + limitations. NOTE: NodeDrainTimeout is different from `kubectl drain + --timeout`' + type: string replicas: description: Number of desired machines. Defaults to 1. When stacked etcd is used only odd numbers are permitted, as per [etcd best practice](https://etcd.io/docs/v3.3.12/faq/#why-an-odd-number-of-cluster-members). diff --git a/controlplane/kubeadm/controllers/helpers.go b/controlplane/kubeadm/controllers/helpers.go index fc89f2c2cee8..b5ce5fe63bb3 100644 --- a/controlplane/kubeadm/controllers/helpers.go +++ b/controlplane/kubeadm/controllers/helpers.go @@ -237,7 +237,8 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp Bootstrap: clusterv1.Bootstrap{ ConfigRef: bootstrapRef, }, - FailureDomain: failureDomain, + FailureDomain: failureDomain, + NodeDrainTimeout: kcp.Spec.NodeDrainTimeout, }, }