Skip to content

Commit

Permalink
Merge pull request #2776 from fabriziopandini/clusterctl-add-retries
Browse files Browse the repository at this point in the history
🏃clusterctl: add retries to clustectl move
  • Loading branch information
k8s-ci-robot authored Mar 25, 2020
2 parents 4672438 + b8026b3 commit 2b2bfb9
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 132 deletions.
2 changes: 1 addition & 1 deletion cmd/clusterctl/client/cluster/cert_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func (cm *certManagerClient) EnsureWebhook() error {
}

// installs the web-hook
createCertManagerBackoff := newBackoff()
createCertManagerBackoff := newWriteBackoff()
objs = sortResourcesForCreate(objs)
for i := range objs {
o := objs[i]
Expand Down
32 changes: 29 additions & 3 deletions cmd/clusterctl/client/cluster/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func retryWithExponentialBackoff(opts wait.Backoff, operation func() error) erro
i++
if err := operation(); err != nil {
if i < opts.Steps {
log.V(5).Info("Operation failed, retry", "Error", err)
log.V(5).Info("Operation failed, retrying with backoff", "Cause", err.Error())
return false, nil
}
return false, err
Expand All @@ -218,8 +218,8 @@ func retryWithExponentialBackoff(opts wait.Backoff, operation func() error) erro
return nil
}

// newBackoff creates a new API Machinery backoff parameter set suitable for use with clusterctl operations.
func newBackoff() wait.Backoff {
// newWriteBackoff creates a new API Machinery backoff parameter set suitable for use with clusterctl write operations.
func newWriteBackoff() wait.Backoff {
// Return a exponential backoff configuration which returns durations for a total time of ~40s.
// Example: 0, .5s, 1.2s, 2.3s, 4s, 6s, 10s, 16s, 24s, 37s
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
Expand All @@ -230,3 +230,29 @@ func newBackoff() wait.Backoff {
Jitter: 0.4,
}
}

// newConnectBackoff creates a new API Machinery backoff parameter set suitable for use when clusterctl connect to a cluster.
func newConnectBackoff() wait.Backoff {
// Return a exponential backoff configuration which returns durations for a total time of ~15s.
// Example: 0, .25s, .6s, 1.2, 2.1s, 3.4s, 5.5s, 8s, 12s
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
return wait.Backoff{
Duration: 250 * time.Millisecond,
Factor: 1.5,
Steps: 9,
Jitter: 0.1,
}
}

// newReadBackoff creates a new API Machinery backoff parameter set suitable for use with clusterctl read operations.
func newReadBackoff() wait.Backoff {
// Return a exponential backoff configuration which returns durations for a total time of ~15s.
// Example: 0, .25s, .6s, 1.2, 2.1s, 3.4s, 5.5s, 8s, 12s
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
return wait.Backoff{
Duration: 250 * time.Millisecond,
Factor: 1.5,
Steps: 9,
Jitter: 0.1,
}
}
2 changes: 1 addition & 1 deletion cmd/clusterctl/client/cluster/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type providerComponents struct {
}

func (p *providerComponents) Create(objs []unstructured.Unstructured) error {
createComponentObjectBackoff := newBackoff()
createComponentObjectBackoff := newWriteBackoff()
for i := range objs {
obj := objs[i]

Expand Down
68 changes: 54 additions & 14 deletions cmd/clusterctl/client/cluster/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,29 @@ func newInventoryClient(proxy Proxy, pollImmediateWaiter PollImmediateWaiter) *i
func (p *inventoryClient) EnsureCustomResourceDefinitions() error {
log := logf.Log

c, err := p.proxy.NewClient()
// Being this the first connection of many clusterctl operations, we want to fail fast if there is no
// connectivity to the cluster, so we try to get a client as a first thing.
// NB. NewClient has an internal retry loop that should mitigate temporary connection glitch; here we are
// trying to detect persistent connection problems (>10s) before entering in longer retry loops while executing
// clusterctl operations.
_, err := p.proxy.NewClient()
if err != nil {
return err
}

// Check the CRDs already exists, if yes, exit immediately.
l := &clusterctlv1.ProviderList{}
if err = c.List(ctx, l); err == nil {
return nil
// Nb. The operation is wrapped in a retry loop to make EnsureCustomResourceDefinitions more resilient to unexpected conditions.
var crdIsIstalled bool
listInventoryBackoff := newReadBackoff()
if err := retryWithExponentialBackoff(listInventoryBackoff, func() error {
var err error
crdIsIstalled, err = checkInventoryCRDs(p.proxy)
return err
}); err != nil {
return err
}
if !apimeta.IsNoMatchError(err) {
return errors.Wrap(err, "failed to check if the clusterctl inventory CRD exists")
if crdIsIstalled {
return nil
}

log.V(1).Info("Installing the clusterctl inventory CRD")
Expand All @@ -120,7 +131,7 @@ func (p *inventoryClient) EnsureCustomResourceDefinitions() error {
}

// Install the CRDs.
createInventoryObjectBackoff := newBackoff()
createInventoryObjectBackoff := newWriteBackoff()
for i := range objs {
o := objs[i]
log.V(5).Info("Creating", logf.UnstructuredToValues(o)...)
Expand Down Expand Up @@ -166,6 +177,23 @@ func (p *inventoryClient) EnsureCustomResourceDefinitions() error {
return nil
}

// checkInventoryCRDs checks if the inventory CRDs are installed in the cluster.
func checkInventoryCRDs(proxy Proxy) (bool, error) {
c, err := proxy.NewClient()
if err != nil {
return false, err
}

l := &clusterctlv1.ProviderList{}
if err = c.List(ctx, l); err == nil {
return true, nil
}
if !apimeta.IsNoMatchError(err) {
return false, errors.Wrap(err, "failed to check if the clusterctl inventory CRD exists")
}
return false, nil
}

func (p *inventoryClient) createObj(o unstructured.Unstructured) error {
c, err := p.proxy.NewClient()
if err != nil {
Expand All @@ -190,8 +218,7 @@ func (p *inventoryClient) createObj(o unstructured.Unstructured) error {

func (p *inventoryClient) Create(m clusterctlv1.Provider) error {
// Create the Kubernetes object.
// Nb. The operation is wrapped in a retry loop to make Create more resilient to unexpected conditions.
createInventoryObjectBackoff := newBackoff()
createInventoryObjectBackoff := newWriteBackoff()
return retryWithExponentialBackoff(createInventoryObjectBackoff, func() error {
cl, err := p.proxy.NewClient()
if err != nil {
Expand Down Expand Up @@ -227,16 +254,29 @@ func (p *inventoryClient) Create(m clusterctlv1.Provider) error {
}

func (p *inventoryClient) List() (*clusterctlv1.ProviderList, error) {
cl, err := p.proxy.NewClient()
if err != nil {
providerList := &clusterctlv1.ProviderList{}

listProvidersBackoff := newReadBackoff()
if err := retryWithExponentialBackoff(listProvidersBackoff, func() error {
return listProviders(p.proxy, providerList)
}); err != nil {
return nil, err
}

providerList := &clusterctlv1.ProviderList{}
return providerList, nil
}

// listProviders retrieves the list of provider inventory objects.
func listProviders(proxy Proxy, providerList *clusterctlv1.ProviderList) error {
cl, err := proxy.NewClient()
if err != nil {
return err
}

if err := cl.List(ctx, providerList); err != nil {
return nil, errors.Wrap(err, "failed get providers")
return errors.Wrap(err, "failed get providers")
}
return providerList, nil
return nil
}

func (p *inventoryClient) GetDefaultProviderName(providerType clusterctlv1.ProviderType) (string, error) {
Expand Down
Loading

0 comments on commit 2b2bfb9

Please sign in to comment.