Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Don't retry function upload on 400 or 422 status #478

Merged
merged 21 commits into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions go/porcelain/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ type DeployOptions struct {
BuildDir string
LargeMediaEnabled bool

IsDraft bool
IsDraft bool
SkipRetry bool
jenae-janzen marked this conversation as resolved.
Show resolved Hide resolved

Title string
Branch string
Expand Down Expand Up @@ -344,12 +345,14 @@ func (n *Netlify) DoDeploy(ctx context.Context, options *DeployOptions, deploy *
return deploy, nil
}

if err := n.uploadFiles(ctx, deploy, options.files, options.Observer, fileUpload, options.UploadTimeout); err != nil {
skipRetry := options.SkipRetry

if err := n.uploadFiles(ctx, deploy, options.files, options.Observer, fileUpload, options.UploadTimeout, skipRetry); err != nil {
return nil, err
}

if options.functions != nil {
if err := n.uploadFiles(ctx, deploy, options.functions, options.Observer, functionUpload, options.UploadTimeout); err != nil {
if err := n.uploadFiles(ctx, deploy, options.functions, options.Observer, functionUpload, options.UploadTimeout, skipRetry); err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -401,7 +404,7 @@ func (n *Netlify) WaitUntilDeployLive(ctx context.Context, d *models.Deploy) (*m
return n.waitForState(ctx, d, "ready")
}

func (n *Netlify) uploadFiles(ctx context.Context, d *models.Deploy, files *deployFiles, observer DeployObserver, t uploadType, timeout time.Duration) error {
func (n *Netlify) uploadFiles(ctx context.Context, d *models.Deploy, files *deployFiles, observer DeployObserver, t uploadType, timeout time.Duration, skipRetry bool) error {
sharedErr := &uploadError{err: nil, mutex: &sync.Mutex{}}
sem := make(chan int, n.uploadLimit)
wg := &sync.WaitGroup{}
Expand Down Expand Up @@ -431,7 +434,7 @@ func (n *Netlify) uploadFiles(ctx context.Context, d *models.Deploy, files *depl
select {
case sem <- 1:
wg.Add(1)
go n.uploadFile(ctx, d, file, observer, t, timeout, wg, sem, sharedErr)
go n.uploadFile(ctx, d, file, observer, t, timeout, wg, sem, sharedErr, skipRetry)
case <-ctx.Done():
log.Info("Context terminated, aborting file upload")
return errors.Wrap(ctx.Err(), "aborted file upload early")
Expand All @@ -451,7 +454,7 @@ func (n *Netlify) uploadFiles(ctx context.Context, d *models.Deploy, files *depl
return sharedErr.err
}

func (n *Netlify) uploadFile(ctx context.Context, d *models.Deploy, f *FileBundle, c DeployObserver, t uploadType, timeout time.Duration, wg *sync.WaitGroup, sem chan int, sharedErr *uploadError) {
func (n *Netlify) uploadFile(ctx context.Context, d *models.Deploy, f *FileBundle, c DeployObserver, t uploadType, timeout time.Duration, wg *sync.WaitGroup, sem chan int, sharedErr *uploadError, skipRetry bool) {
defer func() {
wg.Done()
<-sem
Expand Down Expand Up @@ -538,10 +541,24 @@ func (n *Netlify) uploadFile(ctx context.Context, d *models.Deploy, f *FileBundl
context.GetLogger(ctx).WithError(operationError).Errorf("Failed to upload file %v", f.Name)
apiErr, ok := operationError.(apierrors.Error)

if ok && apiErr.Code() == 401 {
sharedErr.mutex.Lock()
sharedErr.err = operationError
sharedErr.mutex.Unlock()
// In testing, we repeatedly get to this line, see "Failed to upload file", but no matter
// what we try, ok is always `false`, and the code we're trying to test gets skipped.
// There isn't a test case for the original code (the 401 error)

// The operation error we're receiving is "(*models.Error) is not supported by the TextConsumer, can be resolved by supporting TextUnmarshaler interface"
// which seems like a problem with how we're stubbing the response body from the server in the test,
// but it's similar to how it's stubbed in

if ok {
if apiErr.Code() == 401 {
sharedErr.mutex.Lock()
sharedErr.err = operationError
sharedErr.mutex.Unlock()
}

if skipRetry && (apiErr.Code() == 400 || apiErr.Code() == 422) {
operationError = backoff.Permanent(operationError)
}
}
}

Expand Down
113 changes: 108 additions & 5 deletions go/porcelain/deploy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func TestUploadFiles_Cancelation(t *testing.T) {
for _, bundle := range files.Files {
d.Required = append(d.Required, bundle.Sum)
}
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute)
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute, false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if we could add a test here which exercises the new code paths we introduced (i.e. checking we don't retry 400 and 422 errors when uploading).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JGAntunes do you have any pointers for the testing? I seem to be a bit stuck here. We are also having trouble running the test suite locally, which is slowing this down a bit 😬

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

about running the test suite locally, if it seems like nothing is running it's probably because of the retries loop.

try running the tests as:

 go test -race github.com/netlify/open-api/v2/go/porcelain --run TestUploadFiles400Errors -v

And you should get some output (hopefully)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the tip @4xposed!! We got the tests running and can see the output which has been very helpful :D

require.ErrorIs(t, err, gocontext.Canceled)
}

Expand Down Expand Up @@ -317,10 +317,113 @@ func TestUploadFiles_Errors(t *testing.T) {
for _, bundle := range files.Files {
d.Required = append(d.Required, bundle.Sum)
}
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute)
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute, false)
require.Equal(t, err.Error(), "[PUT /deploys/{deploy_id}/files/{path}][500] uploadDeployFile default &{Code:0 Message:}")
}

func TestUploadFiles400Error_SkipsRetry(t *testing.T) {
attempts := 0
ctx := gocontext.Background()

server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) {
jenae-janzen marked this conversation as resolved.
Show resolved Hide resolved
defer func() {
attempts++
}()

rw.WriteHeader(http.StatusUnprocessableEntity)
Copy link
Contributor

@4xposed 4xposed Aug 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think to properly assert the error message in the requre.Equal in line 350 we need to make the mock server to return a body with a json that has code: and message keys:

Suggested change
rw.WriteHeader(http.StatusUnprocessableEntity)
rw.WriteHeader(http.StatusUnprocessableEntity)
rw.Header().Set("Content-Type", "application/json; charset=utf-8")
rw.Write([]byte(`{"message": "Unprocessable Entity", "code": 422}`))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is super helpful and definitely makes sense! -- looking at other test cases I see this is done similarly. However, when we set rw.Write([]byte(`{"message": "Unprocessable Entity", "code": 422}`)) we keep getting the error&{0 } (*models.Error) is not supported by the TextConsumer, can be resolved by supporting TextUnmarshaler interface.

So then the apiError isn't set and then our test skips over the code we're trying to test and keeps retrying 🙈

Looking at other examples in other tests, it seems like it works to set the response this way when calling client.Operations.UploadDeployFile, but not when calling client.uploadFiles -- when calling uploadFiles the error is thrown from UploadDeployFile.

We've been trying to debug this by writing the body different ways, trying to marshal the response etc, but haven't had any success yet 🤔 Is there maybe something silly we're missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think It might be the order on which those are set, mockserver is a bit quirky at times. try if setting the headers in this order helps:

rw.Header().Set("Content-Type", "application/json; charset=utf-8")
rw.WriteHeader(http.StatusUnprocessableEntity)
rw.Write([]byte(`{"message": "Unprocessable Entity", "code": 422 }`))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 @4xposed thanks so much for the tip! That worked and now as far as I can tell, the server is responding as expected. However, now we're hitting a different problem and struggling to debug it.

We now receive the error with the correct code and message, but when we get to this line
apiErr, ok := operationError.(apierrors.Error)
no matter what we try, apiErr is always nil and ok is always false.

operationError is an error that looks like:
error(*github.com/netlify/open-api/v2/go/plumbing/operations.UploadDeployFunctionDefault) *{_statusCode: 422, Payload: *github.com/netlify/open-api/v2/go/models.Error {Code: 422, Message: "Unprocessable Entity"}}

In my debugger, I can access the status code by calling either operationError._statusCode or operationError.Payload.Code, but not by calling operationError.(apierrors.Error). However, I can't call operationError.Payload.Code in the actual code, because Payload/Code isn't defined on the Error type.

So since operationError.(apierrors.Error) never seems to evaluate to anything and there wasn't a test for the original code, we're uncertain if we're doing something wrong here, or if the original code was actually working as intended.

Are we missing something here? Is there a better way to test this (perhaps non-locally?)

cc @jrwhitmer @JGAntunes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@4xposed Thanks, I really appreciate the offer! I'm in Europe now 🎉

Testing manually using the debugger, it looks like everything's working as expected, thanks so much for the fix. The one last thing I'm trying to do to wrap up this PR is to fix my tests:

require.Equal(t, err, "[PUT /deploys/{deploy_id}/files/{path}][422] uploadDeployFile default  &{Code:422 Message: Unprocessable Entity}")

doesn't work because the error's now wrapped by the backoff.Permanent error. Is it sufficient to just requireError(t, err) here? Or is there a better way to assert this that I'm missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pairing might not be necessary since most of this seems to be working, but I'd appreciate you taking another look when you have the time 🙂

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could call err.Unwrap() seems like backoff.PermanentError has a few methods for this: https://github.com/cenkalti/backoff/blob/v4/retry.go#L129

I don't think it's strictly necessary, but it would be nice to at least have some way that we have the right error and not any error, if that makes sense.

What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll give it a shot! I do agree it would be nice to have the right error if possible

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I used Require. ErrorContains instead and that seemed to do the trick for verifying we have the correct error message

rw.Header().Set("Content-Type", "application/json; charset=utf-8")
rw.Write([]byte(`{"message": "Unprocessable Entity", "code": 422 }`))
}))
defer server.Close()

// // File upload:
// hu, _ := url.Parse(server.URL)
// tr := apiClient.NewWithClient(hu.Host, "/api/v1", []string{"http"}, http.DefaultClient)
// client := NewRetryable(tr, strfmt.Default, 1)
// client.uploadLimit = 1
// ctx = context.WithAuthInfo(ctx, apiClient.BearerToken("token"))

// // Create some files to deploy
// dir, err := ioutil.TempDir("", "deploy")
// require.NoError(t, err)
// defer os.RemoveAll(dir)
// require.NoError(t, ioutil.WriteFile(filepath.Join(dir, "foo.html"), []byte("Hello"), 0644))

// files, err := walk(dir, nil, false, false)
// require.NoError(t, err)
// d := &models.Deploy{}
// for _, bundle := range files.Files {
// d.Required = append(d.Required, bundle.Sum)
// }
// // Set SkipRetry to true
// err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute, true)

// // Function upload:
jenae-janzen marked this conversation as resolved.
Show resolved Hide resolved
hu, _ := url.Parse(server.URL)
tr := apiClient.NewWithClient(hu.Host, "/api/v1", []string{"http"}, http.DefaultClient)
client := NewRetryable(tr, strfmt.Default, 1)
client.uploadLimit = 1
apiCtx := context.WithAuthInfo(ctx, apiClient.BearerToken("token"))

dir, err := ioutil.TempDir("", "deploy")
functionsPath := filepath.Join(dir, ".netlify", "functions")
os.MkdirAll(functionsPath, os.ModePerm)
require.NoError(t, err)
defer os.RemoveAll(dir)
require.NoError(t, ioutil.WriteFile(filepath.Join(functionsPath, "foo.js"), []byte("module.exports = () => {}"), 0644))

files, _, _, err := bundle(ctx, functionsPath, mockObserver{})
require.NoError(t, err)
d := &models.Deploy{}
for _, bundle := range files.Files {
d.RequiredFunctions = append(d.RequiredFunctions, bundle.Sum)
}
// Set SkipRetry to true
require.NoError(t, client.uploadFiles(apiCtx, d, files, nil, functionUpload, time.Minute, true))
require.Equal(t, err, "[PUT /deploys/{deploy_id}/files/{path}][422] uploadDeployFile default &{Code:422 Message: Unprocessable Entity}")
require.Equal(t, attempts, 1)
}

func TestUploadFiles400Error_NoSkipRetry(t *testing.T) {
attempts := 0
ctx := gocontext.Background()

server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) {
defer func() {
attempts++
}()

rw.WriteHeader(http.StatusUnprocessableEntity)
rw.Header().Set("Content-Type", "application/json; charset=utf-8")
rw.Write([]byte(`{"message": "Unprocessable Entity", "code": 422 }`))
return
}))
defer server.Close()

hu, _ := url.Parse(server.URL)
tr := apiClient.NewWithClient(hu.Host, "/api/v1", []string{"http"}, http.DefaultClient)
client := NewRetryable(tr, strfmt.Default, 1)
client.uploadLimit = 1
ctx = context.WithAuthInfo(ctx, apiClient.BearerToken("token"))

// Create some files to deploy
dir, err := ioutil.TempDir("", "deploy")
require.NoError(t, err)
defer os.RemoveAll(dir)
require.NoError(t, ioutil.WriteFile(filepath.Join(dir, "foo.html"), []byte("Hello"), 0644))

files, err := walk(dir, nil, false, false)
require.NoError(t, err)
d := &models.Deploy{}
for _, bundle := range files.Files {
d.Required = append(d.Required, bundle.Sum)
}
// Set SkipRetry to false
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute, false)
// require.Equal(t, err, "[PUT /deploys/{deploy_id}/files/{path}][422] uploadDeployFile default &{Code:422 Message: Unprocessable Entity}")
require.Equal(t, attempts, 12)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The number of attempts doesn't seem to be consistent, so maybe there's a better way to assert this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@4xposed this is one other tiny detail that sometimes makes the test fail - every once in a while it retries 13 times 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I think (emphasis on think) it retries infinitely until the context times out or it's cancelled.

I think checking that there's more than 1 attempt (or some arbitrary number that's not too high) might be enough, maybe?

I will try to look more into the retry mechanism tomorrow morning to try to figure out if we can have more control in the test of how many times we retry

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a check to just make sure that the attempts are greater than 1 :) That should probably be sufficient for what we're testing

}

func TestUploadFiles_SkipEqualFiles(t *testing.T) {
ctx := gocontext.Background()

Expand Down Expand Up @@ -377,11 +480,11 @@ func TestUploadFiles_SkipEqualFiles(t *testing.T) {
d.Required = []string{files.Sums["a.html"]}
d.RequiredFunctions = []string{functions.Sums["a"]}

err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute)
err = client.uploadFiles(ctx, d, files, nil, fileUpload, time.Minute, false)
require.NoError(t, err)
assert.Equal(t, 1, serverRequests)

err = client.uploadFiles(ctx, d, functions, nil, functionUpload, time.Minute)
err = client.uploadFiles(ctx, d, functions, nil, functionUpload, time.Minute, false)
require.NoError(t, err)
assert.Equal(t, 2, serverRequests)
}
Expand Down Expand Up @@ -437,7 +540,7 @@ func TestUploadFunctions_RetryCountHeader(t *testing.T) {
d.RequiredFunctions = append(d.RequiredFunctions, bundle.Sum)
}

require.NoError(t, client.uploadFiles(apiCtx, d, files, nil, functionUpload, time.Minute))
require.NoError(t, client.uploadFiles(apiCtx, d, files, nil, functionUpload, time.Minute, false))
}

func TestBundle(t *testing.T) {
Expand Down
Loading