Skip to content

Commit

Permalink
detect Retry-After during async “does resource exist?” flow
Browse files Browse the repository at this point in the history
  • Loading branch information
jackfrancis committed Sep 30, 2022
1 parent 071dd13 commit ea7b62f
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 6 deletions.
48 changes: 42 additions & 6 deletions azure/services/async/async.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ package async

import (
"context"
"net/http"
"strconv"
"time"

"github.com/Azure/go-autorest/autorest"
azureautorest "github.com/Azure/go-autorest/autorest/azure"
"github.com/pkg/errors"
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
Expand All @@ -29,6 +32,9 @@ import (
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
)

// defaultHTTP429RetryAfter is a default backoff wait time when we get a HTTP 429 response with no Retry-After data.
const defaultHTTP429RetryAfter = 1 * time.Minute

// Service is an implementation of the Reconciler interface. It handles asynchronous creation and deletion of resources.
type Service struct {
Scope FutureScope
Expand Down Expand Up @@ -76,7 +82,7 @@ func processOngoingOperation(ctx context.Context, scope FutureScope, client Futu

// Operation is still in progress, update conditions and requeue.
log.V(2).Info("long running operation is still ongoing", "service", serviceName, "resource", resourceName)
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
}
if err != nil {
log.V(2).Error(err, "error checking long running operation status after it finished")
Expand Down Expand Up @@ -111,7 +117,8 @@ func (s *Service) CreateResource(ctx context.Context, spec azure.ResourceSpecGet
// Get the resource if it already exists, and use it to construct the desired resource parameters.
var existingResource interface{}
if existing, err := s.Creator.Get(ctx, spec); err != nil && !azure.ResourceNotFound(err) {
return nil, errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", rgName, resourceName, serviceName)
errWrapped := errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", rgName, resourceName, serviceName)
return nil, azure.WithTransientError(errWrapped, getRetryAfterFromError(err))
} else if err == nil {
existingResource = existing
log.V(2).Info("successfully got existing resource", "service", serviceName, "resource", resourceName, "resourceGroup", rgName)
Expand All @@ -136,7 +143,7 @@ func (s *Service) CreateResource(ctx context.Context, spec azure.ResourceSpecGet
return nil, errors.Wrapf(err, "failed to create resource %s/%s (service: %s)", rgName, resourceName, serviceName)
}
s.Scope.SetLongRunningOperationState(future)
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
} else if err != nil {
return nil, errors.Wrapf(err, "failed to create resource %s/%s (service: %s)", rgName, resourceName, serviceName)
}
Expand Down Expand Up @@ -170,7 +177,7 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", rgName, resourceName, serviceName)
}
s.Scope.SetLongRunningOperationState(future)
return azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
return azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
} else if err != nil {
if azure.ResourceNotFound(err) {
// already deleted
Expand All @@ -183,12 +190,41 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
return nil
}

// retryAfter returns the max between the `RETRY-AFTER` header and the default requeue time.
// getRequeueAfterFromFuture returns the max between the `RETRY-AFTER` header and the default requeue time.
// This ensures we respect the retry-after header if it is set and avoid retrying too often during an API throttling event.
func retryAfter(sdkFuture azureautorest.FutureAPI) time.Duration {
func getRequeueAfterFromFuture(sdkFuture azureautorest.FutureAPI) time.Duration {
retryAfter, _ := sdkFuture.GetPollingDelay()
if retryAfter < reconciler.DefaultReconcilerRequeue {
retryAfter = reconciler.DefaultReconcilerRequeue
}
return retryAfter
}

// getRetryAfterFromError returns the time.Duration from the http.Response in the autorest.DetailedError.
// If there is no Response object, or if there is no meaningful Retry-After header data, we return a default.
func getRetryAfterFromError(err error) (ret time.Duration) {
// In case we aren't able to introspect Retry-After from the error type, we'll return this default
ret = reconciler.DefaultReconcilerRequeue
var detailedError autorest.DetailedError
// if we have a strongly typed autorest.DetailedError then we can introspect the HTTP response data
if errors.As(err, &detailedError) {
if detailedError.Response != nil {
// If we have Retry-After HTTP header data for any reason, prefer it
if retryAfter := detailedError.Response.Header.Get("Retry-After"); retryAfter != "" {
// This handles the case where Retry-After data is in the form of units of seconds
if rai, err := strconv.Atoi(retryAfter); err == nil {
ret = time.Duration(rai) * time.Second
// This handles the case where Retry-After data is in the form of absolute time
} else if t, err := time.Parse(time.RFC1123, retryAfter); err == nil {
// We round up to the second to make this easier to UT
ret = time.Until(t).Round(time.Second)
}
// If we didn't find Retry-After HTTP header data but the response type is 429,
// we'll have to come up with our sane default.
} else if detailedError.Response.StatusCode == http.StatusTooManyRequests {
ret = defaultHTTP429RetryAfter
}
}
}
return
}
78 changes: 78 additions & 0 deletions azure/services/async/async_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"errors"
"net/http"
"testing"
"time"

"github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2019-05-01/resources"
"github.com/Azure/go-autorest/autorest"
Expand All @@ -31,6 +32,7 @@ import (
"sigs.k8s.io/cluster-api-provider-azure/azure/mock_azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/async/mock_async"
gomockinternal "sigs.k8s.io/cluster-api-provider-azure/internal/test/matchers/gomock"
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
)

var (
Expand Down Expand Up @@ -434,3 +436,79 @@ func TestDeleteResource(t *testing.T) {
})
}
}

func TestGetRetryAfterFromError(t *testing.T) {
cases := []struct {
name string
input error
expected time.Duration
}{
{
name: "Retry-After header data present in the form of units of seconds",
input: autorest.DetailedError{
Response: &http.Response{
Header: http.Header{
"Retry-After": []string{"2"},
},
},
},
expected: 2 * time.Second,
},
{
name: "Retry-After header data present in the form of units of absolute time",
input: autorest.DetailedError{
Response: &http.Response{
Header: http.Header{
"Retry-After": []string{time.Now().Add(1 * time.Minute).Format(time.RFC1123)},
},
},
},
expected: 1 * time.Minute,
},
{
name: "Retry-After header data not present",
input: autorest.DetailedError{
Response: &http.Response{
Header: http.Header{
"foo": []string{"bar"},
},
},
},
expected: reconciler.DefaultReconcilerRequeue,
},
{
name: "Retry-After header data not present in HTTP 429",
input: autorest.DetailedError{
Response: &http.Response{
StatusCode: http.StatusTooManyRequests,
Header: http.Header{
"foo": []string{"bar"},
},
},
},
expected: defaultHTTP429RetryAfter,
},
{
name: "nil http.Response",
input: autorest.DetailedError{
Response: nil,
},
expected: reconciler.DefaultReconcilerRequeue,
},
{
name: "error type is not autorest.DetailedError",
input: errors.New("error"),
expected: reconciler.DefaultReconcilerRequeue,
},
}

for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
g := NewWithT(t)
ret := getRetryAfterFromError(c.input)
g.Expect(ret).To(Equal(c.expected))
})
}
}

0 comments on commit ea7b62f

Please sign in to comment.