From e766e14e89f44f6c9d17998e6cd5f709604241f9 Mon Sep 17 00:00:00 2001 From: Christian Winther Date: Fri, 10 May 2024 00:18:25 +0200 Subject: [PATCH] fix: implement backoff + retry when GitLab SetCommitStatus returns 409 (#4503) Co-authored-by: PePe Amengual --- go.mod | 1 + go.sum | 1 + server/events/vcs/gitlab_client.go | 77 +++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index a67e96eace..9055769590 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( github.com/hashicorp/go-version v1.6.0 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/hashicorp/terraform-config-inspect v0.0.0-20231204233900-a34142ec2a72 + github.com/jpillora/backoff v1.0.0 github.com/kr/pretty v0.3.1 github.com/mcdafydd/go-azuredevops v0.12.1 github.com/microcosm-cc/bluemonday v1.0.26 diff --git a/go.sum b/go.sum index 2e06e7cd8f..88a1f0862f 100644 --- a/go.sum +++ b/go.sum @@ -270,6 +270,7 @@ github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+h github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= diff --git a/server/events/vcs/gitlab_client.go b/server/events/vcs/gitlab_client.go index 0a85353adc..4003f33ca4 100644 --- a/server/events/vcs/gitlab_client.go +++ b/server/events/vcs/gitlab_client.go @@ -22,14 +22,13 @@ import ( "strings" "time" - "github.com/runatlantis/atlantis/server/events/command" - "github.com/runatlantis/atlantis/server/events/vcs/common" - version "github.com/hashicorp/go-version" + "github.com/jpillora/backoff" "github.com/pkg/errors" - "github.com/runatlantis/atlantis/server/logging" - + "github.com/runatlantis/atlantis/server/events/command" "github.com/runatlantis/atlantis/server/events/models" + "github.com/runatlantis/atlantis/server/events/vcs/common" + "github.com/runatlantis/atlantis/server/logging" gitlab "github.com/xanzy/go-gitlab" ) @@ -439,17 +438,63 @@ func (g *GitlabClient) UpdateStatus(logger logging.SimpleLogging, repo models.Re } } - _, resp, err := g.Client.Commits.SetCommitStatus(repo.FullName, pull.HeadCommit, &gitlab.SetCommitStatusOptions{ - State: gitlabState, - Context: gitlab.Ptr(src), - Description: gitlab.Ptr(description), - TargetURL: &url, - Ref: gitlab.Ptr(refTarget), - }) - if resp != nil { - logger.Debug("POST /projects/%s/statuses/%s returned: %d", repo.FullName, pull.HeadCommit, resp.StatusCode) + var ( + resp *gitlab.Response + maxAttempts = 10 + b = &backoff.Backoff{Jitter: true} + ) + + for i := 0; i <= maxAttempts; i++ { + logger := logger.With( + "attempt", i+1, + "max_attempts", maxAttempts, + "repo", repo.FullName, + "commit", pull.HeadCommit, + "state", state.String(), + ) + + _, resp, err = g.Client.Commits.SetCommitStatus(repo.FullName, pull.HeadCommit, &gitlab.SetCommitStatusOptions{ + State: gitlabState, + Context: gitlab.Ptr(src), + Description: gitlab.Ptr(description), + TargetURL: &url, + Ref: gitlab.Ptr(refTarget), + }) + + if resp != nil { + logger.Debug("POST /projects/%s/statuses/%s returned: %d", repo.FullName, pull.HeadCommit, resp.StatusCode) + + // GitLab returns a `409 Conflict` status when the commit pipeline status is being changed/locked by another request, + // which is likely to happen if you use [`--parallel-pool-size > 1`] and [`parallel-plan|apply`]. + // + // The likelihood of this happening is increased when the number of parallel apply jobs is increased. + // + // Returning the [err] without retrying will permanently leave the GitLab commit status in a "running" state, + // which would prevent Atlantis from merging the merge request on [apply]. + // + // GitLab does not allow merge requests to be merged when the pipeline status is "running." + + if resp.StatusCode == http.StatusConflict { + sleep := b.ForAttempt(float64(i)) + + logger.With("retry_in", sleep).Warn("GitLab returned HTTP [409 Conflict] when updating commit status") + time.Sleep(sleep) + + continue + } + } + + // Log we got a 200 OK response from GitLab after at least one retry to help with debugging/understanding delays/errors. + if err == nil && i > 0 { + logger.Info("GitLab returned HTTP [200 OK] after updating commit status") + } + + // Return the err, which might be nil if everything worked out + return err } - return err + + // If we got here, we've exhausted all attempts to update the commit status and still failed, so return the error upstream + return errors.Wrap(err, fmt.Sprintf("failed to update commit status for '%s' @ '%s' to '%s' after %d attempts", repo.FullName, pull.HeadCommit, src, maxAttempts)) } func (g *GitlabClient) GetMergeRequest(logger logging.SimpleLogging, repoFullName string, pullNum int) (*gitlab.MergeRequest, error) { @@ -471,7 +516,7 @@ func (g *GitlabClient) WaitForSuccessPipeline(logger logging.SimpleLogging, ctx case <-ctx.Done(): // validation check time out cancel() - return //ctx.Err() + return // ctx.Err() default: mr, _ := g.GetMergeRequest(logger, pull.BaseRepo.FullName, pull.Num)