Skip to content

Commit

Permalink
revert changes made in kubernetes-sigs#1164, kubernetes-sigs#1170 and k…
Browse files Browse the repository at this point in the history
  • Loading branch information
jigisha620 committed May 10, 2024
1 parent ea25c3e commit 9e7ef04
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 99 deletions.
30 changes: 0 additions & 30 deletions pkg/cloudprovider/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,33 +268,3 @@ func IgnoreNodeClassNotReadyError(err error) error {
}
return err
}

// RetryableError is an error type returned by CloudProviders when the action emitting the error has to be retried
type RetryableError struct {
error
}

func NewRetryableError(err error) *RetryableError {
return &RetryableError{
error: err,
}
}

func (e *RetryableError) Error() string {
return fmt.Sprintf("retryable error, %s", e.error)
}

func IsRetryableError(err error) bool {
if err == nil {
return false
}
var retryableError *RetryableError
return errors.As(err, &retryableError)
}

func IgnoreRetryableError(err error) error {
if IsRetryableError(err) {
return nil
}
return err
}
6 changes: 0 additions & 6 deletions pkg/controllers/node/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,6 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res
// This delete call is needed so that we ensure that we don't remove the node from the cluster
// until the full instance shutdown has taken place
if err := c.cloudProvider.Delete(ctx, nodeclaimutil.NewFromNode(node)); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
// We expect cloudProvider to emit a Retryable Error when the underlying instance is not terminated and if that
// happens, we want to re-enqueue reconciliation until we terminate the underlying instance before removing
// finalizer from the node.
if cloudprovider.IsRetryableError(err) {
return reconcile.Result{RequeueAfter: 10 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("terminating cloudprovider instance, %w", err)
}
if err := c.removeFinalizer(ctx, node); err != nil {
Expand Down
24 changes: 0 additions & 24 deletions pkg/controllers/node/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,13 @@ package termination_test

import (
"context"
"fmt"
"sync"
"testing"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"sigs.k8s.io/karpenter/pkg/cloudprovider"

"github.com/samber/lo"
clock "k8s.io/utils/clock/testing"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -718,27 +715,6 @@ var _ = Describe("Termination", func() {

ExpectMetricGaugeValue(terminator.EvictionQueueDepth, 5, map[string]string{})
})
It("should retry node deletion when cloudProvider returns retryable error", func() {
ExpectApplied(ctx, env.Client, node)
cloudProvider.NextDeleteErr = cloudprovider.NewRetryableError(fmt.Errorf("underlying instance not terminated"))

// Expect the node to stick around and requeue while it gets a retryable error
Expect(env.Client.Delete(ctx, node)).To(Succeed())
result := ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(node))
Expect(result.RequeueAfter).To(Equal(time.Second * 10))

// Instance still exists
_, err := cloudProvider.Get(ctx, node.Spec.ProviderID)
Expect(err).ToNot(HaveOccurred())

// Requeue again and we should no longer get an error and succeed to delete the instance
ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(node)) // re-enqueue reconciliation since we got retryable error previously
ExpectNotFound(ctx, env.Client, node)

// Expect the instance to be gone from the cloudprovider
_, err = cloudProvider.Get(ctx, node.Spec.ProviderID)
Expect(cloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue())
})
})
})

Expand Down
6 changes: 0 additions & 6 deletions pkg/controllers/nodeclaim/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,6 @@ func (c *Controller) Finalize(ctx context.Context, nodeClaim *v1beta1.NodeClaim)
}
if nodeClaim.Status.ProviderID != "" {
if err = c.cloudProvider.Delete(ctx, nodeClaim); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
// We expect cloudProvider to emit a Retryable Error when the underlying instance is not terminated and if that
// happens, we want to re-enqueue reconciliation until we terminate the underlying instance before removing
// finalizer from the nodeClaim.
if cloudprovider.IsRetryableError(err) {
return reconcile.Result{RequeueAfter: 10 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("terminating cloudprovider instance, %w", err)
}
}
Expand Down
33 changes: 0 additions & 33 deletions pkg/controllers/nodeclaim/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package termination_test

import (
"context"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -217,36 +216,4 @@ var _ = Describe("Termination", func() {
ExpectExists(ctx, env.Client, node)
}
})
It("should retry nodeClaim deletion when cloudProvider returns retryable error", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
ExpectReconcileSucceeded(ctx, nodeClaimLifecycleController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
_, err := cloudProvider.Get(ctx, nodeClaim.Status.ProviderID)
Expect(err).ToNot(HaveOccurred())

node := test.NodeClaimLinkedNode(nodeClaim)
ExpectApplied(ctx, env.Client, node)

// Expect the node and the nodeClaim to both be gone
Expect(env.Client.Delete(ctx, nodeClaim)).To(Succeed())
ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim)) // triggers the node deletion
ExpectFinalizersRemoved(ctx, env.Client, node)
ExpectNotFound(ctx, env.Client, node)

cloudProvider.NextDeleteErr = cloudprovider.NewRetryableError(fmt.Errorf("underlying instance not terminated"))
result := ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim))
Expect(result.RequeueAfter).To(Equal(time.Second * 10))

// Instance still exists
_, err = cloudProvider.Get(ctx, node.Spec.ProviderID)
Expect(err).ToNot(HaveOccurred())

ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim)) //re-enqueue reconciliation since we got retryable error previously
ExpectNotFound(ctx, env.Client, nodeClaim, node)

// Expect the nodeClaim to be gone from the cloudprovider
_, err = cloudProvider.Get(ctx, nodeClaim.Status.ProviderID)
Expect(cloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue())
})
})

0 comments on commit 9e7ef04

Please sign in to comment.