Skip to content

Commit

Permalink
Merge pull request #2666 from fabriziopandini/clusterctl-use-exponent…
Browse files Browse the repository at this point in the history
…ial-backoff

🏃clusterctl: use exponential backoff
  • Loading branch information
k8s-ci-robot authored Mar 13, 2020
2 parents 54331c6 + a0110f6 commit ba1aa04
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 47 deletions.
6 changes: 2 additions & 4 deletions cmd/clusterctl/client/cluster/cert_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ const (
waitCertManagerInterval = 1 * time.Second
waitCertManagerTimeout = 10 * time.Minute

retryCreateCertManagerObject = 3
retryIntervalCreateCertManagerObject = 1 * time.Second

certManagerImageComponent = "cert-manager"
)

Expand Down Expand Up @@ -120,14 +117,15 @@ func (cm *certManagerClient) EnsureWebhook() error {
}

// installs the web-hook
createCertManagerBackoff := newBackoff()
objs = sortResourcesForCreate(objs)
for i := range objs {
o := objs[i]
log.V(5).Info("Creating", logf.UnstructuredToValues(o)...)

// Create the Kubernetes object.
// Nb. The operation is wrapped in a retry loop to make ensureCerts more resilient to unexpected conditions.
if err := retry(retryCreateCertManagerObject, retryIntervalCreateCertManagerObject, func() error {
if err := retryWithExponentialBackoff(createCertManagerBackoff, func() error {
return cm.createObj(o)
}); err != nil {
return err
Expand Down
37 changes: 37 additions & 0 deletions cmd/clusterctl/client/cluster/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ import (
"context"
"time"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
"sigs.k8s.io/cluster-api/cmd/clusterctl/client/repository"
"sigs.k8s.io/cluster-api/cmd/clusterctl/internal/test"
logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -193,3 +195,38 @@ type Proxy interface {
}

var _ Proxy = &test.FakeProxy{}

// retryWithExponentialBackoff repeats an operation until it passes or the exponential backoff times out.
func retryWithExponentialBackoff(opts wait.Backoff, operation func() error) error { //nolint:unparam
log := logf.Log

i := 0
err := wait.ExponentialBackoff(opts, func() (bool, error) {
i++
if err := operation(); err != nil {
if i < opts.Steps {
log.V(5).Info("Operation failed, retry", "Error", err)
return false, nil
}
return false, err
}
return true, nil
})
if err != nil {
return errors.Wrapf(err, "action failed after %d attempts", i)
}
return nil
}

// newBackoff creates a new API Machinery backoff parameter set suitable for use with clusterctl operations.
func newBackoff() wait.Backoff {
// Return a exponential backoff configuration which returns durations for a total time of ~40s.
// Example: 0, .5s, 1.2s, 2.3s, 4s, 6s, 10s, 16s, 24s, 37s
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
return wait.Backoff{
Duration: 500 * time.Millisecond,
Factor: 1.5,
Steps: 10,
Jitter: 0.4,
}
}
9 changes: 2 additions & 7 deletions cmd/clusterctl/client/cluster/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package cluster
import (
"fmt"
"strings"
"time"

"github.com/pkg/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -34,11 +33,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
retryCreateComponentObject = 3
retryIntervalComponentObject = 1 * time.Second
)

type DeleteOptions struct {
Provider clusterctlv1.Provider
IncludeNamespace bool
Expand All @@ -63,12 +57,13 @@ type providerComponents struct {
}

func (p *providerComponents) Create(objs []unstructured.Unstructured) error {
createComponentObjectBackoff := newBackoff()
for i := range objs {
obj := objs[i]

// Create the Kubernetes object.
// Nb. The operation is wrapped in a retry loop to make Create more resilient to unexpected conditions.
if err := retry(retryCreateComponentObject, retryIntervalComponentObject, func() error {
if err := retryWithExponentialBackoff(createComponentObjectBackoff, func() error {
return p.createObj(obj)
}); err != nil {
return err
Expand Down
6 changes: 2 additions & 4 deletions cmd/clusterctl/client/cluster/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ import (
const (
embeddedCustomResourceDefinitionPath = "cmd/clusterctl/config/manifest/clusterctl-api.yaml"

retryCreateInventoryObject = 3
retryIntervalCreateInventoryObject = 1 * time.Second

waitInventoryCRDInterval = 250 * time.Millisecond
waitInventoryCRDTimeout = 1 * time.Minute
)
Expand Down Expand Up @@ -123,13 +120,14 @@ func (p *inventoryClient) EnsureCustomResourceDefinitions() error {
}

// Install the CRDs.
createInventoryObjectBackoff := newBackoff()
for i := range objs {
o := objs[i]
log.V(5).Info("Creating", logf.UnstructuredToValues(o)...)

// Create the Kubernetes object.
// Nb. The operation is wrapped in a retry loop to make EnsureCustomResourceDefinitions more resilient to unexpected conditions.
if err := retry(retryCreateInventoryObject, retryIntervalCreateInventoryObject, func() error {
if err := retryWithExponentialBackoff(createInventoryObjectBackoff, func() error {
return p.createObj(o)
}); err != nil {
return err
Expand Down
36 changes: 4 additions & 32 deletions cmd/clusterctl/client/cluster/mover.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package cluster

import (
"fmt"
"time"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
Expand All @@ -29,7 +28,6 @@ import (
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -389,20 +387,16 @@ func (o *objectMover) ensureNamespaces(graph *objectGraph, toProxy Proxy) error
return nil
}

const (
retryCreateTargetObject = 3
retryIntervalCreateTargetObject = 1 * time.Second
)

// createGroup creates all the Kubernetes objects into the target management cluster corresponding to the object graph nodes in a moveGroup.
func (o *objectMover) createGroup(group moveGroup, toProxy Proxy) error {
createTargetObjectBackoff := newBackoff()
errList := []error{}
for i := range group {
nodeToCreate := group[i]

// Creates the Kubernetes object corresponding to the nodeToCreate.
// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
err := retry(retryCreateTargetObject, retryIntervalCreateTargetObject, func() error {
err := retryWithExponentialBackoff(createTargetObjectBackoff, func() error {
return o.createTargetObject(nodeToCreate, toProxy)
})
if err != nil {
Expand Down Expand Up @@ -509,20 +503,16 @@ func (o *objectMover) createTargetObject(nodeToCreate *node, toProxy Proxy) erro
return nil
}

const (
retryDeleteSourceObject = 3
retryIntervalDeleteSourceObject = 1 * time.Second
)

// deleteGroup deletes all the Kubernetes objects from the source management cluster corresponding to the object graph nodes in a moveGroup.
func (o *objectMover) deleteGroup(group moveGroup) error {
deleteSourceObjectBackoff := newBackoff()
errList := []error{}
for i := range group {
nodeToDelete := group[i]

// Delete the Kubernetes object corresponding to the current node.
// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
err := retry(retryDeleteSourceObject, retryIntervalDeleteSourceObject, func() error {
err := retryWithExponentialBackoff(deleteSourceObjectBackoff, func() error {
return o.deleteSourceObject(nodeToDelete)
})

Expand Down Expand Up @@ -583,24 +573,6 @@ func (o *objectMover) deleteSourceObject(nodeToDelete *node) error {
return nil
}

func retry(attempts int, interval time.Duration, action func() error) error { //nolint:unparam
log := logf.Log

var errorToReturn error
for i := 0; i < attempts; i++ {
if err := action(); err != nil {
errorToReturn = err

log.V(5).Info("Operation failed, retry", "Error", err)
pause := wait.Jitter(interval, 1)
time.Sleep(pause)
continue
}
return nil
}
return errors.Wrapf(errorToReturn, "action failed after %d attempts", attempts)
}

// checkTargetProviders checks that all the providers installed in the source cluster exists in the target cluster as well (with a version >= of the current version).
func (o *objectMover) checkTargetProviders(namespace string, toInventory InventoryClient) error {
// Gets the list of providers in the source/target cluster.
Expand Down

0 comments on commit ba1aa04

Please sign in to comment.