Merge pull request #3662 from Nordix/node-drain-nam

✨ Add the ability to specify a drain timeout for machines
kubernetes-sigs · Sep 30, 2020 · 7523f71 · 7523f71
2 parents 5754973 + d4fe42f
commit 7523f71
Show file tree

Hide file tree

Showing 15 changed files with 227 additions and 3 deletions.
diff --git a/api/v1alpha2/conversion.go b/api/v1alpha2/conversion.go
@@ -129,6 +129,7 @@ func restoreMachineSpec(restored *v1alpha3.MachineSpec, dst *v1alpha3.MachineSpe
 	}
 	dst.Bootstrap.DataSecretName = restored.Bootstrap.DataSecretName
 	dst.FailureDomain = restored.FailureDomain
+	dst.NodeDrainTimeout = restored.NodeDrainTimeout
 }
 
 func (dst *Machine) ConvertFrom(srcRaw conversion.Hub) error {

diff --git a/api/v1alpha2/zz_generated.conversion.go b/api/v1alpha2/zz_generated.conversion.go
diff --git a/api/v1alpha3/machine_types.go b/api/v1alpha3/machine_types.go
@@ -89,6 +89,12 @@ type MachineSpec struct {
 	// Must match a key in the FailureDomains map stored on the cluster object.
 	// +optional
 	FailureDomain *string `json:"failureDomain,omitempty"`
+
+	// NodeDrainTimeout is the total amount of time that the controller will spend on draining a node.
+	// The default value is 0, meaning that the node can be drained without any time limitations.
+	// NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`
+	// +optional
+	NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"`
 }
 
 // ANCHOR_END: MachineSpec

diff --git a/api/v1alpha3/zz_generated.deepcopy.go b/api/v1alpha3/zz_generated.deepcopy.go
diff --git a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml
@@ -926,6 +926,13 @@ spec:
                             description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
                             type: string
                         type: object
+                      nodeDrainTimeout:
+                        description: 'NodeDrainTimeout is the total amount of time
+                          that the controller will spend on draining a node. The default
+                          value is 0, meaning that the node can be drained without
+                          any time limitations. NOTE: NodeDrainTimeout is different
+                          from `kubectl drain --timeout`'
+                        type: string
                       providerID:
                         description: ProviderID is the identification ID of the machine
                           provided by the provider. This field must match the provider

diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml
@@ -514,6 +514,12 @@ spec:
                     description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
                     type: string
                 type: object
+              nodeDrainTimeout:
+                description: 'NodeDrainTimeout is the total amount of time that the
+                  controller will spend on draining a node. The default value is 0,
+                  meaning that the node can be drained without any time limitations.
+                  NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`'
+                type: string
               providerID:
                 description: ProviderID is the identification ID of the machine provided
                   by the provider. This field must match the provider ID as seen on

diff --git a/config/crd/bases/cluster.x-k8s.io_machinesets.yaml b/config/crd/bases/cluster.x-k8s.io_machinesets.yaml
@@ -825,6 +825,13 @@ spec:
                             description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
                             type: string
                         type: object
+                      nodeDrainTimeout:
+                        description: 'NodeDrainTimeout is the total amount of time
+                          that the controller will spend on draining a node. The default
+                          value is 0, meaning that the node can be drained without
+                          any time limitations. NOTE: NodeDrainTimeout is different
+                          from `kubectl drain --timeout`'
+                        type: string
                       providerID:
                         description: ProviderID is the identification ID of the machine
                           provided by the provider. This field must match the provider

diff --git a/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml b/config/crd/bases/exp.cluster.x-k8s.io_machinepools.yaml
@@ -346,6 +346,13 @@ spec:
                             description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
                             type: string
                         type: object
+                      nodeDrainTimeout:
+                        description: 'NodeDrainTimeout is the total amount of time
+                          that the controller will spend on draining a node. The default
+                          value is 0, meaning that the node can be drained without
+                          any time limitations. NOTE: NodeDrainTimeout is different
+                          from `kubectl drain --timeout`'
+                        type: string
                       providerID:
                         description: ProviderID is the identification ID of the machine
                           provided by the provider. This field must match the provider

diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go
@@ -299,14 +299,20 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
 		conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition)
 
 		// Drain node before deletion and issue a patch in order to make this operation visible to the users.
-		if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; !exists {
+		if r.isNodeDrainAllowed(m) {
 			patchHelper, err := patch.NewHelper(m, r.Client)
 			if err != nil {
 				return ctrl.Result{}, err
 			}
 
 			logger.Info("Draining node", "node", m.Status.NodeRef.Name)
-			conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
+			// The DrainingSucceededCondition never exists before the node is drained for the first time,
+			// so its transition time can be used to record the first time draining.
+			// This `if` condition prevents the transition time to be changed more than once.
+			if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
+				conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
+			}
+
 			if err := patchMachine(ctx, patchHelper, m); err != nil {
 				return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
 			}
@@ -363,6 +369,36 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
 	return ctrl.Result{}, nil
 }
 
+func (r *MachineReconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool {
+	if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists {
+		return false
+	}
+
+	if r.nodeDrainTimeoutExceeded(m) {
+		return false
+	}
+
+	return true
+
+}
+
+func (r *MachineReconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
+	// if the NodeDrainTineout type is not set by user
+	if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
+		return false
+	}
+
+	// if the draining succeeded condition does not exist
+	if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
+		return false
+	}
+
+	now := time.Now()
+	firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
+	diff := now.Sub(firstTimeDrain.Time)
+	return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
+}
+
 // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil
 // and if the Machine is not the last control plane node in the cluster.
 func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {

diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go
@@ -681,6 +681,133 @@ func Test_clusterToActiveMachines(t *testing.T) {
 	}
 }
 
+func TestIsNodeDrainedAllowed(t *testing.T) {
+	testCluster := &clusterv1.Cluster{
+		TypeMeta:   metav1.TypeMeta{Kind: "Cluster", APIVersion: clusterv1.GroupVersion.String()},
+		ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "test-cluster"},
+	}
+
+	tests := []struct {
+		name     string
+		machine  *clusterv1.Machine
+		expected bool
+	}{
+		{
+			name: "Exclude node draining annotation exists",
+			machine: &clusterv1.Machine{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:        "test-machine",
+					Namespace:   "default",
+					Finalizers:  []string{clusterv1.MachineFinalizer},
+					Annotations: map[string]string{clusterv1.ExcludeNodeDrainingAnnotation: "existed!!"},
+				},
+				Spec: clusterv1.MachineSpec{
+					ClusterName:       "test-cluster",
+					InfrastructureRef: corev1.ObjectReference{},
+					Bootstrap:         clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
+				},
+				Status: clusterv1.MachineStatus{},
+			},
+			expected: false,
+		},
+		{
+			name: "Node draining timeout is over",
+			machine: &clusterv1.Machine{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:       "test-machine",
+					Namespace:  "default",
+					Finalizers: []string{clusterv1.MachineFinalizer},
+				},
+				Spec: clusterv1.MachineSpec{
+					ClusterName:       "test-cluster",
+					InfrastructureRef: corev1.ObjectReference{},
+					Bootstrap:         clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
+					NodeDrainTimeout:  &metav1.Duration{Duration: time.Second * 60},
+				},
+
+				Status: clusterv1.MachineStatus{
+					Conditions: clusterv1.Conditions{
+						{
+							Type:               clusterv1.DrainingSucceededCondition,
+							Status:             corev1.ConditionFalse,
+							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
+						},
+					},
+				},
+			},
+			expected: false,
+		},
+		{
+			name: "Node draining timeout is not yet over",
+			machine: &clusterv1.Machine{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:       "test-machine",
+					Namespace:  "default",
+					Finalizers: []string{clusterv1.MachineFinalizer},
+				},
+				Spec: clusterv1.MachineSpec{
+					ClusterName:       "test-cluster",
+					InfrastructureRef: corev1.ObjectReference{},
+					Bootstrap:         clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
+					NodeDrainTimeout:  &metav1.Duration{Duration: time.Second * 60},
+				},
+				Status: clusterv1.MachineStatus{
+					Conditions: clusterv1.Conditions{
+						{
+							Type:               clusterv1.DrainingSucceededCondition,
+							Status:             corev1.ConditionFalse,
+							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
+						},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "NodeDrainTimeout option is set to its default value 0",
+			machine: &clusterv1.Machine{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:       "test-machine",
+					Namespace:  "default",
+					Finalizers: []string{clusterv1.MachineFinalizer},
+				},
+				Spec: clusterv1.MachineSpec{
+					ClusterName:       "test-cluster",
+					InfrastructureRef: corev1.ObjectReference{},
+					Bootstrap:         clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
+				},
+				Status: clusterv1.MachineStatus{
+					Conditions: clusterv1.Conditions{
+						{
+							Type:               clusterv1.DrainingSucceededCondition,
+							Status:             corev1.ConditionFalse,
+							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
+						},
+					},
+				},
+			},
+			expected: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := NewWithT(t)
+
+			var objs []runtime.Object
+			objs = append(objs, testCluster, tt.machine)
+
+			r := &MachineReconciler{
+				Client: helpers.NewFakeClientWithScheme(scheme.Scheme, objs...),
+				Log:    log.Log,
+				scheme: scheme.Scheme,
+			}
+
+			got := r.isNodeDrainAllowed(tt.machine)
+			g.Expect(got).To(Equal(tt.expected))
+		})
+	}
+}
+
 func TestIsDeleteNodeAllowed(t *testing.T) {
 	deletionts := metav1.Now()
 

diff --git a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go
@@ -66,6 +66,12 @@ type KubeadmControlPlaneSpec struct {
 	// KubeadmControlPlane
 	// +optional
 	UpgradeAfter *metav1.Time `json:"upgradeAfter,omitempty"`
+
+	// NodeDrainTimeout is the total amount of time that the controller will spend on draining a controlplane node
+	// The default value is 0, meaning that the node can be drained without any time limitations.
+	// NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`
+	// +optional
+	NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"`
 }
 
 // KubeadmControlPlaneStatus defines the observed state of KubeadmControlPlane.

diff --git a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_webhook.go
@@ -110,6 +110,7 @@ func (in *KubeadmControlPlane) ValidateUpdate(old runtime.Object) error {
 		{spec, "replicas"},
 		{spec, "version"},
 		{spec, "upgradeAfter"},
+		{spec, "nodeDrainTimeout"},
 	}
 
 	allErrs := in.validateCommon()

diff --git a/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go b/controlplane/kubeadm/api/v1alpha3/zz_generated.deepcopy.go
diff --git a/...rolplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml b/...rolplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml
@@ -1032,6 +1032,13 @@ spec:
                     format: int32
                     type: integer
                 type: object
+              nodeDrainTimeout:
+                description: 'NodeDrainTimeout is the total amount of time that the
+                  controller will spend on draining a controlplane node The default
+                  value is 0, meaning that the node can be drained without any time
+                  limitations. NOTE: NodeDrainTimeout is different from `kubectl drain
+                  --timeout`'
+                type: string
               replicas:
                 description: Number of desired machines. Defaults to 1. When stacked
                   etcd is used only odd numbers are permitted, as per [etcd best practice](https://etcd.io/docs/v3.3.12/faq/#why-an-odd-number-of-cluster-members).

diff --git a/controlplane/kubeadm/controllers/helpers.go b/controlplane/kubeadm/controllers/helpers.go
@@ -237,7 +237,8 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
 			Bootstrap: clusterv1.Bootstrap{
 				ConfigRef: bootstrapRef,
 			},
-			FailureDomain: failureDomain,
+			FailureDomain:    failureDomain,
+			NodeDrainTimeout: kcp.Spec.NodeDrainTimeout,
 		},
 	}