From 6c0e2c07382b7ae60ed7b86760cfcb1b555ac743 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 11 Dec 2024 12:23:53 -0800 Subject: [PATCH 1/3] feat: drain and volume detachment status conditions --- pkg/apis/v1/nodeclaim_status.go | 2 + .../node/termination/controller.go | 160 ++++++---- .../node/termination/suite_test.go | 274 +++++++++++------- .../termination/terminator/events/events.go | 10 + .../node/termination/terminator/terminator.go | 2 +- 5 files changed, 285 insertions(+), 163 deletions(-) diff --git a/pkg/apis/v1/nodeclaim_status.go b/pkg/apis/v1/nodeclaim_status.go index 8970e50486..f8d1551fcb 100644 --- a/pkg/apis/v1/nodeclaim_status.go +++ b/pkg/apis/v1/nodeclaim_status.go @@ -28,6 +28,8 @@ const ( ConditionTypeInitialized = "Initialized" ConditionTypeConsolidatable = "Consolidatable" ConditionTypeDrifted = "Drifted" + ConditionTypeDrained = "Drained" + ConditionTypeVolumesDetached = "VolumesDetached" ConditionTypeInstanceTerminating = "InstanceTerminating" ConditionTypeConsistentStateFound = "ConsistentStateFound" ConditionTypeDisruptionReason = "DisruptionReason" diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index 67e12c6ebf..ffdb5f8231 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" "k8s.io/utils/clock" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -76,6 +77,7 @@ func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider clou func (c *Controller) Reconcile(ctx context.Context, n *corev1.Node) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, "node.termination") + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef(n.Namespace, n.Name))) if !n.GetDeletionTimestamp().IsZero() { return c.finalize(ctx, n) @@ -92,82 +94,137 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile return reconcile.Result{}, nil } - nodeClaims, err := nodeutils.GetNodeClaims(ctx, c.kubeClient, node) + nodeClaim, err := nodeutils.NodeClaimForNode(ctx, c.kubeClient, node) if err != nil { - return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) + if nodeutils.IsDuplicateNodeClaimError(err) || nodeutils.IsNodeClaimNotFoundError(err) { + log.FromContext(ctx).Error(err, "failed to terminate node") + return reconcile.Result{}, nil + } + return reconcile.Result{}, err + } + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("NodeClaim", klog.KRef(nodeClaim.Namespace, nodeClaim.Name))) + if nodeClaim.DeletionTimestamp.IsZero() { + if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil { + if errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, fmt.Errorf("deleting nodeclaim, %w", err) + } } - if err = c.deleteAllNodeClaims(ctx, nodeClaims...); err != nil { - return reconcile.Result{}, fmt.Errorf("deleting nodeclaims, %w", err) + // If the underlying NodeClaim no longer exists, we want to delete to avoid trying to gracefully drain nodes that are + // no longer alive. We do a check on the Ready condition of the node since, even though the CloudProvider says the + // instance is not around, we know that the kubelet process is still running if the Node Ready condition is true. + // Similar logic to: https://github.com/kubernetes/kubernetes/blob/3a75a8c8d9e6a1ebd98d8572132e675d4980f184/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go#L144 + if nodeutils.GetCondition(node, corev1.NodeReady).Status != corev1.ConditionTrue { + if _, err = c.cloudProvider.Get(ctx, node.Spec.ProviderID); err != nil { + if cloudprovider.IsNodeClaimNotFoundError(err) { + return reconcile.Result{}, c.removeFinalizer(ctx, node) + } + return reconcile.Result{}, fmt.Errorf("getting nodeclaim, %w", err) + } } - nodeTerminationTime, err := c.nodeTerminationTime(node, nodeClaims...) + nodeTerminationTime, err := c.nodeTerminationTime(node, nodeClaim) if err != nil { return reconcile.Result{}, err } - if err = c.terminator.Taint(ctx, node, v1.DisruptedNoScheduleTaint); err != nil { - if errors.IsConflict(err) { + if errors.IsConflict(err) || errors.IsNotFound(err) { return reconcile.Result{Requeue: true}, nil } - return reconcile.Result{}, client.IgnoreNotFound(fmt.Errorf("tainting node with %s, %w", pretty.Taint(v1.DisruptedNoScheduleTaint), err)) + return reconcile.Result{}, fmt.Errorf("tainting node with %s, %w", pretty.Taint(v1.DisruptedNoScheduleTaint), err) } if err = c.terminator.Drain(ctx, node, nodeTerminationTime); err != nil { if !terminator.IsNodeDrainError(err) { return reconcile.Result{}, fmt.Errorf("draining node, %w", err) } c.recorder.Publish(terminatorevents.NodeFailedToDrain(node, err)) - // If the underlying NodeClaim no longer exists, we want to delete to avoid trying to gracefully draining - // on nodes that are no longer alive. We do a check on the Ready condition of the node since, even - // though the CloudProvider says the instance is not around, we know that the kubelet process is still running - // if the Node Ready condition is true - // Similar logic to: https://github.com/kubernetes/kubernetes/blob/3a75a8c8d9e6a1ebd98d8572132e675d4980f184/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go#L144 - if nodeutils.GetCondition(node, corev1.NodeReady).Status != corev1.ConditionTrue { - if _, err = c.cloudProvider.Get(ctx, node.Spec.ProviderID); err != nil { - if cloudprovider.IsNodeClaimNotFoundError(err) { - return reconcile.Result{}, c.removeFinalizer(ctx, node) + stored := nodeClaim.DeepCopy() + if modified := nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeDrained, "Draining", "Draining"); modified { + if err := c.kubeClient.Status().Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) || errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil } - return reconcile.Result{}, fmt.Errorf("getting nodeclaim, %w", err) + return reconcile.Result{}, err } } - return reconcile.Result{RequeueAfter: 1 * time.Second}, nil } - NodesDrainedTotal.Inc(map[string]string{ - metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey], - }) + if !nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsTrue() { + stored := nodeClaim.DeepCopy() + _ = nodeClaim.StatusConditions().SetTrue(v1.ConditionTypeDrained) + if err := c.kubeClient.Status().Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) || errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err + } + NodesDrainedTotal.Inc(map[string]string{ + metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey], + }) + // We requeue after a patch operation since we want to ensure we read our own writes before any subsequent + // operations on the NodeClaim. + return reconcile.Result{RequeueAfter: 1 * time.Second}, nil + } + // In order for Pods associated with PersistentVolumes to smoothly migrate from the terminating Node, we wait // for VolumeAttachments of drain-able Pods to be cleaned up before terminating Node and removing its finalizer. // However, if TerminationGracePeriod is configured for Node, and we are past that period, we will skip waiting. - if nodeTerminationTime == nil || c.clock.Now().Before(*nodeTerminationTime) { - areVolumesDetached, err := c.ensureVolumesDetached(ctx, node) - if err != nil { - return reconcile.Result{}, fmt.Errorf("ensuring no volume attachments, %w", err) - } - if !areVolumesDetached { - return reconcile.Result{RequeueAfter: 1 * time.Second}, nil - } - } - nodeClaims, err = nodeutils.GetNodeClaims(ctx, c.kubeClient, node) + volumesDetached, err := c.ensureVolumesDetached(ctx, node) if err != nil { - return reconcile.Result{}, fmt.Errorf("deleting nodeclaims, %w", err) + return reconcile.Result{}, fmt.Errorf("ensuring no volume attachments, %w", err) } - for _, nodeClaim := range nodeClaims { - isInstanceTerminated, err := termination.EnsureTerminated(ctx, c.kubeClient, nodeClaim, c.cloudProvider) - if err != nil { - // 404 = the nodeClaim no longer exists - if errors.IsNotFound(err) { - continue + if volumesDetached { + stored := nodeClaim.DeepCopy() + if modified := nodeClaim.StatusConditions().SetTrue(v1.ConditionTypeVolumesDetached); modified { + if err := c.kubeClient.Status().Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) || errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err } - // 409 - The nodeClaim exists, but its status has already been modified - if errors.IsConflict(err) { - return reconcile.Result{Requeue: true}, nil + // We requeue after a patch operation since we want to ensure we read our own writes before any subsequent + // operations on the NodeClaim. + return reconcile.Result{RequeueAfter: 1 * time.Second}, nil + } + } else if !c.hasTerminationGracePeriodElapsed(nodeTerminationTime) { + c.recorder.Publish(terminatorevents.NodeAwaitingVolumeDetachmentEvent(node)) + stored := nodeClaim.DeepCopy() + if modified := nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeVolumesDetached, "AwaitingVolumeDetachment", "AwaitingVolumeDetachment"); modified { + if err := c.kubeClient.Status().Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) || errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err + } + } + return reconcile.Result{RequeueAfter: 1 * time.Second}, nil + } else { + stored := nodeClaim.DeepCopy() + if modified := nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeVolumesDetached, "TerminationGracePeriodElapsed", "TerminationGracePeriodElapsed"); modified { + if err := c.kubeClient.Status().Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) || errors.IsNotFound(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, err } - return reconcile.Result{}, fmt.Errorf("ensuring instance termination, %w", err) + // We requeue after a patch operation since we want to ensure we read our own writes before any subsequent + // operations on the NodeClaim. + return reconcile.Result{RequeueAfter: 1 * time.Second}, nil } - if !isInstanceTerminated { - return reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } + + isInstanceTerminated, err := termination.EnsureTerminated(ctx, c.kubeClient, nodeClaim, c.cloudProvider) + if client.IgnoreNotFound(err) != nil { + // 409 - The nodeClaim exists, but its status has already been modified + if errors.IsConflict(err) { + return reconcile.Result{Requeue: true}, nil } + return reconcile.Result{}, fmt.Errorf("ensuring instance termination, %w", err) + } + if !isInstanceTerminated { + return reconcile.Result{RequeueAfter: 5 * time.Second}, nil } if err := c.removeFinalizer(ctx, node); err != nil { return reconcile.Result{}, err @@ -175,16 +232,11 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile return reconcile.Result{}, nil } -func (c *Controller) deleteAllNodeClaims(ctx context.Context, nodeClaims ...*v1.NodeClaim) error { - for _, nodeClaim := range nodeClaims { - // If we still get the NodeClaim, but it's already marked as terminating, we don't need to call Delete again - if nodeClaim.DeletionTimestamp.IsZero() { - if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil { - return client.IgnoreNotFound(err) - } - } +func (c *Controller) hasTerminationGracePeriodElapsed(nodeTerminationTime *time.Time) bool { + if nodeTerminationTime == nil { + return false } - return nil + return !c.clock.Now().Before(*nodeTerminationTime) } func (c *Controller) ensureVolumesDetached(ctx context.Context, node *corev1.Node) (volumesDetached bool, err error) { diff --git a/pkg/controllers/node/termination/suite_test.go b/pkg/controllers/node/termination/suite_test.go index 2382627ebf..ec977a740b 100644 --- a/pkg/controllers/node/termination/suite_test.go +++ b/pkg/controllers/node/termination/suite_test.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" clock "k8s.io/utils/clock/testing" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/karpenter/pkg/apis" v1 "sigs.k8s.io/karpenter/pkg/apis/v1" @@ -107,9 +108,10 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should ignore nodes not managed by this Karpenter instance", func() { @@ -118,10 +120,7 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) ExpectExists(ctx, env.Client, node) }) It("should delete nodeclaims associated with nodes", func() { @@ -129,15 +128,16 @@ var _ = Describe("Termination", func() { Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - // The first reconciliation should trigger the Delete, and set the terminating status condition - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation nc := ExpectExists(ctx, env.Client, nodeClaim) Expect(nc.StatusConditions().Get(v1.ConditionTypeInstanceTerminating).IsTrue()).To(BeTrue()) ExpectNodeExists(ctx, env.Client, node.Name) - // The second reconciliation should call get, see the "instance" is terminated, and remove the node. + // The final reconciliation should call get, see the "instance" is terminated, and remove the node. // We should have deleted the NodeClaim from the node termination controller, so removing it's finalizer should result in it being removed. - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) ExpectFinalizersRemoved(ctx, env.Client, nodeClaim) ExpectNotFound(ctx, env.Client, node, nodeClaim) }) @@ -151,8 +151,9 @@ var _ = Describe("Termination", func() { *node = *ExpectNodeExists(ctx, env.Client, node.Name) } - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - for range 2 { + // Reconcile four times for each stage: drain, volume detachment, instance termination initiation, and + // instance termination verification. + for range 4 { var wg sync.WaitGroup // this is enough to trip the race detector for i := range nodes { @@ -186,7 +187,8 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim, pod, pdb) ExpectManualBinding(ctx, env.Client, pod, node) Expect(env.Client.Delete(ctx, node)).To(Succeed()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + // We only reconcile once since this label should be applied before draining the node + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) node = ExpectNodeExists(ctx, env.Client, node.Name) Expect(node.Labels[corev1.LabelNodeExcludeBalancers]).Should(Equal("karpenter")) }) @@ -202,7 +204,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation Expect(queue.Has(node, podSkip)).To(BeFalse()) ExpectSingletonReconciled(ctx, queue) @@ -215,9 +217,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should not evict pods that tolerate karpenter disruption taint with exists operator", func() { @@ -232,7 +235,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation Expect(queue.Has(node, podSkip)).To(BeFalse()) ExpectSingletonReconciled(ctx, queue) @@ -247,9 +250,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should evict pods that tolerate the node.kubernetes.io/unschedulable taint", func() { @@ -263,7 +267,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) // Expect node to exist and be draining @@ -275,9 +279,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should delete nodes that have pods without an ownerRef", func() { @@ -291,7 +296,7 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim, pod) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) // Expect pod with no owner ref to be enqueued for eviction @@ -305,9 +310,10 @@ var _ = Describe("Termination", func() { // Reconcile node to evict pod and delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should delete nodes with terminal pods", func() { @@ -324,9 +330,10 @@ var _ = Describe("Termination", func() { Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) // Trigger Termination Controller, which should ignore these pods and delete the node - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should fail to evict pods that violate a PDB", func() { @@ -351,7 +358,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation // Expect node to exist and be draining ExpectNodeWithNodeClaimDraining(env.Client, node.Name) @@ -371,9 +378,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should evict pods in order and wait until pods are fully deleted", func() { @@ -421,7 +429,7 @@ var _ = Describe("Termination", func() { for _, p := range podGroup { ExpectPodExists(ctx, env.Client, p.Name, p.Namespace) } - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) ExpectNodeWithNodeClaimDraining(env.Client, node.Name) for range podGroup { ExpectSingletonReconciled(ctx, queue) @@ -442,9 +450,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should evict non-critical pods first", func() { @@ -457,7 +466,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation (non-critical) ExpectSingletonReconciled(ctx, queue) // Expect node to exist and be draining @@ -469,7 +478,7 @@ var _ = Describe("Termination", func() { // Expect the critical pods to be evicted and deleted node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation (critical) ExpectSingletonReconciled(ctx, queue) ExpectSingletonReconciled(ctx, queue) @@ -479,9 +488,10 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should not evict static pods", func() { @@ -503,7 +513,7 @@ var _ = Describe("Termination", func() { Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) // Expect mirror pod to not be queued for eviction @@ -515,54 +525,62 @@ var _ = Describe("Termination", func() { // Expect node to exist and be draining ExpectNodeWithNodeClaimDraining(env.Client, node.Name) - // Reconcile node to evict pod - node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - // Delete pod to simulate successful eviction ExpectDeleted(ctx, env.Client, podEvict) // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should not delete nodes until all pods are deleted", func() { pods := test.Pods(2, test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{OwnerReferences: defaultOwnerRefs}}) ExpectApplied(ctx, env.Client, node, nodeClaim, pods[0], pods[1]) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsUnknown()).To(BeTrue()) // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) ExpectSingletonReconciled(ctx, queue) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsFalse()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).Reason).To(Equal("Draining")) + // Expect the pods to be evicted EventuallyExpectTerminating(ctx, env.Client, pods[0], pods[1]) // Expect node to exist and be draining, but not deleted node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation ExpectNodeWithNodeClaimDraining(env.Client, node.Name) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsFalse()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).Reason).To(Equal("Draining")) ExpectDeleted(ctx, env.Client, pods[1]) // Expect node to exist and be draining, but not deleted node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain Validation ExpectNodeWithNodeClaimDraining(env.Client, node.Name) ExpectDeleted(ctx, env.Client, pods[0]) // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsTrue()).To(BeTrue()) }) It("should delete nodes with no underlying instance even if not fully drained", func() { pods := test.Pods(2, test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{OwnerReferences: defaultOwnerRefs}}) @@ -574,7 +592,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) ExpectSingletonReconciled(ctx, queue) @@ -583,7 +601,7 @@ var _ = Describe("Termination", func() { // Expect node to exist and be draining, but not deleted node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation ExpectNodeWithNodeClaimDraining(env.Client, node.Name) // After this, the node still has one pod that is evicting. @@ -594,9 +612,7 @@ var _ = Describe("Termination", func() { // Reconcile to delete node node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) ExpectNotFound(ctx, env.Client, node) }) It("should not delete nodes with no underlying instance if the node is still Ready", func() { @@ -606,7 +622,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) ExpectSingletonReconciled(ctx, queue) @@ -615,7 +631,7 @@ var _ = Describe("Termination", func() { // Expect node to exist and be draining, but not deleted node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation ExpectNodeWithNodeClaimDraining(env.Client, node.Name) // After this, the node still has one pod that is evicting. @@ -627,19 +643,17 @@ var _ = Describe("Termination", func() { // Reconcile to try to delete the node, but don't succeed because the readiness condition // of the node still won't let us delete it node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) ExpectObjectReconciled(ctx, env.Client, terminationController, node) ExpectExists(ctx, env.Client, node) }) - It("should wait for pods to terminate", func() { + It("should bypass pods which are stuck terminating past their grace period", func() { pod := test.Pod(test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{OwnerReferences: defaultOwnerRefs}}) fakeClock.SetTime(time.Now()) // make our fake clock match the pod creation time ExpectApplied(ctx, env.Client, node, nodeClaim, pod) // Before grace period, node should not delete Expect(env.Client.Delete(ctx, node)).To(Succeed()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectSingletonReconciled(ctx, queue) ExpectNodeExists(ctx, env.Client, node.Name) EventuallyExpectTerminating(ctx, env.Client, pod) @@ -648,9 +662,10 @@ var _ = Describe("Termination", func() { // to eliminate test-flakiness we reset the time to current time + 90 seconds instead of just advancing // the clock by 90 seconds. fakeClock.SetTime(time.Now().Add(90 * time.Second)) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation ExpectNotFound(ctx, env.Client, node) }) It("should not evict a new pod with the same name using the old pod's eviction queue key", func() { @@ -666,7 +681,7 @@ var _ = Describe("Termination", func() { // Trigger Termination Controller Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation // Don't trigger a call into the queue to make sure that we effectively aren't triggering eviction // We'll use this to try to leave pods in the queue @@ -677,8 +692,11 @@ var _ = Describe("Termination", func() { // Delete the pod directly to act like something else is doing the pod termination ExpectDeleted(ctx, env.Client, pod) - // Requeue the termination controller to completely delete the node - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainValidation + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationValidation + ExpectNotFound(ctx, env.Client, node) // Expect that the old pod's key still exists in the queue Expect(queue.Has(node, pod)).To(BeTrue()) @@ -724,10 +742,10 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim, nodePool, pod) Expect(env.Client.Delete(ctx, node)).To(Succeed()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectSingletonReconciled(ctx, queue) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectNodeExists(ctx, env.Client, node.Name) - ExpectDeleted(ctx, env.Client, pod) + pod = ExpectExists(ctx, env.Client, pod) + Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse()) }) It("should only delete pods when their terminationGracePeriodSeconds is less than the the node's remaining terminationGracePeriod", func() { nodeClaim.Spec.TerminationGracePeriod = &metav1.Duration{Duration: time.Second * 300} @@ -750,16 +768,17 @@ var _ = Describe("Termination", func() { // expect pod still exists fakeClock.Step(90 * time.Second) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation ExpectNodeWithNodeClaimDraining(env.Client, node.Name) ExpectNodeExists(ctx, env.Client, node.Name) - ExpectPodExists(ctx, env.Client, pod.Name, pod.Namespace) + pod = ExpectExists(ctx, env.Client, pod) + Expect(pod.DeletionTimestamp.IsZero()).To(BeTrue()) - // expect pod is now deleted + // The pod should be deleted 60 seconds before the node's TGP expires fakeClock.Step(175 * time.Second) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectSingletonReconciled(ctx, queue) - ExpectDeleted(ctx, env.Client, pod) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) + pod = ExpectExists(ctx, env.Client, pod) + Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse()) }) Context("VolumeAttachments", func() { It("should wait for volume attachments", func() { @@ -769,15 +788,24 @@ var _ = Describe("Termination", func() { }) ExpectApplied(ctx, env.Client, node, nodeClaim, nodePool, va) Expect(env.Client.Delete(ctx, node)).To(Succeed()) + ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsUnknown()).To(BeTrue()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachmentInitiation ExpectExists(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsFalse()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).Reason).To(Equal("AwaitingVolumeDetachment")) ExpectDeleted(ctx, env.Client, va) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachmentFinalization + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization ExpectNotFound(ctx, env.Client, node) + + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsTrue()).To(BeTrue()) }) It("should only wait for volume attachments associated with drainable pods", func() { vaDrainable := test.VolumeAttachment(test.VolumeAttachmentOptions{ @@ -805,13 +833,21 @@ var _ = Describe("Termination", func() { ExpectManualBinding(ctx, env.Client, pod, node) Expect(env.Client.Delete(ctx, node)).To(Succeed()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment ExpectExists(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsFalse()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).Reason).To(Equal("AwaitingVolumeDetachment")) ExpectDeleted(ctx, env.Client, vaDrainable) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectExists(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsTrue()).To(BeTrue()) + + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitiation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization ExpectNotFound(ctx, env.Client, node) }) It("should wait for volume attachments until the nodeclaim's termination grace period expires", func() { @@ -825,13 +861,21 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim, nodePool, va) Expect(env.Client.Delete(ctx, node)).To(Succeed()) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment ExpectExists(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsFalse()).To(BeTrue()) fakeClock.Step(5 * time.Minute) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectExists(ctx, env.Client, node) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).IsFalse()).To(BeTrue()) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeVolumesDetached).Reason).To(Equal("TerminationGracePeriodElapsed")) + + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization ExpectNotFound(ctx, env.Client, node) }) }) @@ -842,8 +886,10 @@ var _ = Describe("Termination", func() { Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization m, ok := FindMetricWithLabelValues("karpenter_nodes_termination_duration_seconds", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) Expect(ok).To(BeTrue()) @@ -853,10 +899,11 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain ExpectMetricCounterValue(termination.NodesDrainedTotal, 1, map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization m, ok := FindMetricWithLabelValues("karpenter_nodes_terminated_total", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) Expect(ok).To(BeTrue()) @@ -866,9 +913,10 @@ var _ = Describe("Termination", func() { ExpectApplied(ctx, env.Client, node, nodeClaim) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). - ExpectObjectReconciled(ctx, env.Client, terminationController, node) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // VolumeDetachment + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationInitation + ExpectNotRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // InstanceTerminationFinalization m, ok := FindMetricWithLabelValues("karpenter_nodes_lifetime_duration_seconds", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) Expect(ok).To(BeTrue()) @@ -882,7 +930,7 @@ var _ = Describe("Termination", func() { // Don't let any pod evict MinAvailable: &minAvailable, }) - ExpectApplied(ctx, env.Client, pdb, node) + ExpectApplied(ctx, env.Client, pdb, node, nodeClaim) pods := test.Pods(5, test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{ OwnerReferences: defaultOwnerRefs, Labels: labelSelector, @@ -892,7 +940,7 @@ var _ = Describe("Termination", func() { wqDepthBefore, _ := FindMetricWithLabelValues("workqueue_adds_total", map[string]string{"name": "eviction.workqueue"}) Expect(env.Client.Delete(ctx, node)).To(Succeed()) node = ExpectNodeExists(ctx, env.Client, node.Name) - ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain wqDepthAfter, ok := FindMetricWithLabelValues("workqueue_adds_total", map[string]string{"name": "eviction.workqueue"}) Expect(ok).To(BeTrue()) Expect(lo.FromPtr(wqDepthAfter.GetCounter().Value) - lo.FromPtr(wqDepthBefore.GetCounter().Value)).To(BeNumerically("==", 5)) @@ -908,3 +956,13 @@ func ExpectNodeWithNodeClaimDraining(c client.Client, nodeName string) *corev1.N Expect(node.DeletionTimestamp).ToNot(BeNil()) return node } + +func ExpectNotRequeued(result reconcile.Result) { + GinkgoHelper() + Expect(result.Requeue == false && result.RequeueAfter == time.Duration(0)).To(BeTrue()) +} + +func ExpectRequeued(result reconcile.Result) { + GinkgoHelper() + Expect(result.Requeue || result.RequeueAfter != time.Duration(0)).To(BeTrue()) +} diff --git a/pkg/controllers/node/termination/terminator/events/events.go b/pkg/controllers/node/termination/terminator/events/events.go index 0e790978f3..4300f60467 100644 --- a/pkg/controllers/node/termination/terminator/events/events.go +++ b/pkg/controllers/node/termination/terminator/events/events.go @@ -56,6 +56,16 @@ func NodeFailedToDrain(node *corev1.Node, err error) events.Event { } } +func NodeAwaitingVolumeDetachmentEvent(node *corev1.Node) events.Event { + return events.Event{ + InvolvedObject: node, + Type: corev1.EventTypeNormal, + Reason: "AwaitingVolumeDetachment", + Message: "Awaiting deletion VolumeAttachments bound to node", + DedupeValues: []string{node.Name}, + } +} + func NodeTerminationGracePeriodExpiring(node *corev1.Node, terminationTime string) events.Event { return events.Event{ InvolvedObject: node, diff --git a/pkg/controllers/node/termination/terminator/terminator.go b/pkg/controllers/node/termination/terminator/terminator.go index bedab8a6d2..7154df4e7e 100644 --- a/pkg/controllers/node/termination/terminator/terminator.go +++ b/pkg/controllers/node/termination/terminator/terminator.go @@ -141,7 +141,7 @@ func (t *Terminator) DeleteExpiringPods(ctx context.Context, pods []*corev1.Pod, for _, pod := range pods { // check if the node has an expiration time and the pod needs to be deleted deleteTime := t.podDeleteTimeWithGracePeriod(nodeGracePeriodTerminationTime, pod) - if deleteTime != nil && time.Now().After(*deleteTime) { + if deleteTime != nil && t.clock.Now().After(*deleteTime) { // delete pod proactively to give as much of its terminationGracePeriodSeconds as possible for deletion // ensure that we clamp the maximum pod terminationGracePeriodSeconds to the node's remaining expiration time in the delete command gracePeriodSeconds := lo.ToPtr(int64(time.Until(*nodeGracePeriodTerminationTime).Seconds())) From 4718acb1fe67e3ac6637c3b2e00c8ab44b6d20ba Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 15 Jan 2025 12:20:44 -0800 Subject: [PATCH 2/3] fix: initialize fake clock in terminator test --- pkg/controllers/node/termination/terminator/suite_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controllers/node/termination/terminator/suite_test.go b/pkg/controllers/node/termination/terminator/suite_test.go index 39bd3ce16b..0aa3454c09 100644 --- a/pkg/controllers/node/termination/terminator/suite_test.go +++ b/pkg/controllers/node/termination/terminator/suite_test.go @@ -59,6 +59,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { + fakeClock = clock.NewFakeClock(time.Now()) env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...)) ctx = options.ToContext(ctx, test.Options()) recorder = test.NewEventRecorder() From 43949ef7999b7e0ca884002fad041e6f5b66577c Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 15 Jan 2025 15:30:16 -0800 Subject: [PATCH 3/3] use TerminalError --- pkg/controllers/node/termination/controller.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index ffdb5f8231..4ee7931843 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -97,8 +97,7 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile nodeClaim, err := nodeutils.NodeClaimForNode(ctx, c.kubeClient, node) if err != nil { if nodeutils.IsDuplicateNodeClaimError(err) || nodeutils.IsNodeClaimNotFoundError(err) { - log.FromContext(ctx).Error(err, "failed to terminate node") - return reconcile.Result{}, nil + return reconcile.Result{}, reconcile.TerminalError(fmt.Errorf("failed to terminate node, %w", err)) } return reconcile.Result{}, err }