Skip to content

Commit

Permalink
feat: shallow repo clone in merge checkout strategy (runatlantis#2758)
Browse files Browse the repository at this point in the history
* Implement shallow repo clone in merge checkout strategy

---------

Co-authored-by: Ilya Lukyanov <[email protected]>
Co-authored-by: Nikolai Røed Kristiansen <[email protected]>
Co-authored-by: PePe Amengual <[email protected]>
Co-authored-by: Ken Kaizu <[email protected]>
  • Loading branch information
5 people authored and ijames-gc committed Feb 13, 2024
1 parent 47ccc0f commit 97be92e
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 49 deletions.
13 changes: 12 additions & 1 deletion cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ const (
BitbucketTokenFlag = "bitbucket-token"
BitbucketUserFlag = "bitbucket-user"
BitbucketWebhookSecretFlag = "bitbucket-webhook-secret"
ConfigFlag = "config"
CheckoutDepthFlag = "checkout-depth"
CheckoutStrategyFlag = "checkout-strategy"
ConfigFlag = "config"
DataDirFlag = "data-dir"
DefaultTFVersionFlag = "default-tf-version"
DisableApplyAllFlag = "disable-apply-all"
Expand Down Expand Up @@ -139,6 +140,7 @@ const (
DefaultAutoplanFileList = "**/*.tf,**/*.tfvars,**/*.tfvars.json,**/terragrunt.hcl,**/.terraform.lock.hcl"
DefaultAllowCommands = "version,plan,apply,unlock,approve_policies"
DefaultCheckoutStrategy = "branch"
DefaultCheckoutDepth = 0
DefaultBitbucketBaseURL = bitbucketcloud.BaseURL
DefaultDataDir = "~/.atlantis"
DefaultExecutableName = "atlantis"
Expand Down Expand Up @@ -531,6 +533,12 @@ var boolFlags = map[string]boolFlag{
},
}
var intFlags = map[string]intFlag{
CheckoutDepthFlag: {
description: fmt.Sprintf("Used only if --%s=merge.", CheckoutStrategyFlag) +
" How many commits to include in each of base and feature branches when cloning repository." +
" If merge base is further behind than this number of commits from any of branches heads, full fetch will be performed.",
defaultValue: DefaultCheckoutDepth,
},
ParallelPoolSize: {
description: "Max size of the wait group that runs parallel plans and applies (if enabled).",
defaultValue: DefaultParallelPoolSize,
Expand Down Expand Up @@ -758,6 +766,9 @@ func (s *ServerCmd) setDefaults(c *server.UserConfig) {
if c.AutoplanFileList == "" {
c.AutoplanFileList = DefaultAutoplanFileList
}
if c.CheckoutDepth <= 0 {
c.CheckoutDepth = DefaultCheckoutDepth
}
if c.AllowCommands == "" {
c.AllowCommands = DefaultAllowCommands
}
Expand Down
9 changes: 9 additions & 0 deletions runatlantis.io/docs/checkout-strategy.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,12 @@ Atlantis doesn't actually commit this merge anywhere. It just uses it locally.
Atlantis only performs this merge during the `terraform plan` phase. If another
commit is pushed to `main` **after** Atlantis runs `plan`, nothing will happen.
:::

To optimize cloning time, Atlantis can perform a shallow clone by specifying the `--checkout-depth` flag. The cloning is performed in a following manner:

- Shallow clone of the default branch is performed with depth of `--checkout-depth` value of zero (full clone).
- `branch` is retrieved, including the same amount of commits.
- Merge base of the default branch and `branch` is checked for existence in the shallow clone.
- If the merge base is not present, it means that either of the branches are ahead of the merge base by more than `--checkout-depth` commits. In this case full repo history is fetched.

If the commit history often diverges by more than the default checkout depth then the `--checkout-depth` flag should be tuned to avoid full fetches.
9 changes: 9 additions & 0 deletions runatlantis.io/docs/server-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,15 @@ and set `--autoplan-modules` to `false`.
This means that an attacker could spoof calls to Atlantis and cause it to perform malicious actions.
:::

### `--checkout-depth`
```bash
atlantis server --checkout-depth=0
# or
ATLANTIS_CHECKOUT_DEPTH=0
```
The number of commits to fetch from the branch. Used if `--checkout-strategy=merge` since the `--checkout-strategy=branch` (default) checkout strategy always defaults to a shallow clone using a depth of 1.
Defaults to `0`. See [Checkout Strategy](checkout-strategy.html) for more details.

### `--checkout-strategy`
```bash
atlantis server --checkout-strategy="<branch|merge>"
Expand Down
16 changes: 16 additions & 0 deletions server/events/pending_plan_finder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,22 @@ func TestPendingPlanFinder_FindPlanCheckedIn(t *testing.T) {
Equals(t, 0, len(actPlans))
}

func runCmdErrCode(t *testing.T, dir string, errCode int, name string, args ...string) string {
t.Helper()
cpCmd := exec.Command(name, args...)
cpCmd.Dir = dir
cpOut, err := cpCmd.CombinedOutput()
cmd := strings.Join(append([]string{name}, args...), " ")
if err != nil {
if eerr, ok := err.(*exec.ExitError); ok {
Assert(t, errCode == eerr.ExitCode(), "unexpected exit code: want %v, got %v, running %q: %s", errCode, eerr.ExitCode(), cmd, cpCmd)
return string(cpOut)
}
}
Assert(t, false, "invalid exit code, running %q: %s", cmd, cpOut)
return string(cpOut)
}

// Test that it deletes pending plans.
func TestPendingPlanFinder_DeletePlans(t *testing.T) {
files := map[string]interface{}{
Expand Down
97 changes: 49 additions & 48 deletions server/events/working_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ type FileWorkspace struct {
// If this is false, then we will check out the head branch from the pull
// request.
CheckoutMerge bool
// CheckoutDepth is how many commits of feature branch and main branch we'll
// retrieve by default. If their merge base is not retrieved with this depth,
// full fetch will be performed. Only matters if CheckoutMerge=true.
CheckoutDepth int
// TestingOverrideHeadCloneURL can be used during testing to override the
// URL of the head repo to be cloned. If it's empty then we clone normally.
TestingOverrideHeadCloneURL string
Expand Down Expand Up @@ -267,53 +271,8 @@ func (w *FileWorkspace) forceClone(log logging.SimpleLogging,
baseCloneURL = w.TestingOverrideBaseCloneURL
}

var cmds [][]string
if w.CheckoutMerge {
// NOTE: We can't do a shallow clone when we're merging because we'll
// get merge conflicts if our clone doesn't have the commits that the
// branch we're merging branched off at.
// See https://groups.google.com/forum/#!topic/git-users/v3MkuuiDJ98.
fetchRef := fmt.Sprintf("+refs/heads/%s:", p.HeadBranch)
fetchRemote := "head"
if w.GithubAppEnabled {
fetchRef = fmt.Sprintf("pull/%d/head:", p.Num)
fetchRemote = "origin"
}
cmds = [][]string{
{
"git", "clone", "--branch", p.BaseBranch, "--single-branch", baseCloneURL, cloneDir,
},
{
"git", "remote", "add", "head", headCloneURL,
},
{
"git", "fetch", fetchRemote, fetchRef,
},
}
if w.GpgNoSigningEnabled {
cmds = append(cmds, []string{
"git", "config", "--local", "commit.gpgsign", "false",
})
}
// We use --no-ff because we always want there to be a merge commit.
// This way, our branch will look the same regardless if the merge
// could be fast forwarded. This is useful later when we run
// git rev-parse HEAD^2 to get the head commit because it will
// always succeed whereas without --no-ff, if the merge was fast
// forwarded then git rev-parse HEAD^2 would fail.
cmds = append(cmds, []string{
"git", "merge", "-q", "--no-ff", "-m", "atlantis-merge", "FETCH_HEAD",
})
} else {
cmds = [][]string{
{
"git", "clone", "--branch", p.HeadBranch, "--depth=1", "--single-branch", headCloneURL, cloneDir,
},
}
}

for _, args := range cmds {
cmd := exec.Command(args[0], args[1:]...) // nolint: gosec
runGit := func(args ...string) error {
cmd := exec.Command("git", args...) // nolint: gosec
cmd.Dir = cloneDir
// The git merge command requires these env vars are set.
cmd.Env = append(os.Environ(), []string{
Expand All @@ -330,8 +289,50 @@ func (w *FileWorkspace) forceClone(log logging.SimpleLogging,
return fmt.Errorf("running %s: %s: %s", cmdStr, sanitizedOutput, sanitizedErrMsg)
}
log.Debug("ran: %s. Output: %s", cmdStr, strings.TrimSuffix(sanitizedOutput, "\n"))
return nil
}

if !w.CheckoutMerge {
return runGit("clone", "--depth=1", "--branch", p.HeadBranch, "--single-branch", headCloneURL, cloneDir)
}

if err := runGit("clone", "--depth", fmt.Sprint(w.CheckoutDepth), "--branch", p.BaseBranch, "--single-branch", baseCloneURL, cloneDir); err != nil {
return err
}
if err := runGit("remote", "add", "head", headCloneURL); err != nil {
return err
}

fetchRef := fmt.Sprintf("+refs/heads/%s:", p.HeadBranch)
fetchRemote := "head"
if w.GithubAppEnabled {
fetchRef = fmt.Sprintf("pull/%d/head:", p.Num)
fetchRemote = "origin"
}
if err := runGit("fetch", "--depth", fmt.Sprint(w.CheckoutDepth), fetchRemote, fetchRef); err != nil {
return err
}
return nil

if w.GpgNoSigningEnabled {
if err := runGit("config", "--local", "commit.gpgsign", "false"); err != nil {
return err
}
}
if err := runGit("merge-base", p.BaseBranch, "FETCH_HEAD"); err != nil {
// git merge-base returning error means that we did not receive enough commits in shallow clone.
// Fall back to retrieving full repo history.
if err := runGit("fetch", "--unshallow"); err != nil {
return err
}
}

// We use --no-ff because we always want there to be a merge commit.
// This way, our branch will look the same regardless if the merge
// could be fast forwarded. This is useful later when we run
// git rev-parse HEAD^2 to get the head commit because it will
// always succeed whereas without --no-ff, if the merge was fast
// forwarded then git rev-parse HEAD^2 would fail.
return runGit("merge", "-q", "--no-ff", "-m", "atlantis-merge", "FETCH_HEAD")
}

// GetWorkingDir returns the path to the workspace for this repo and pull.
Expand Down
94 changes: 94 additions & 0 deletions server/events/working_dir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/http"
"os"
"path/filepath"
"strings"
"testing"

"github.com/runatlantis/atlantis/server/events"
Expand Down Expand Up @@ -84,6 +85,7 @@ func TestClone_CheckoutMergeNoneExisting(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
CheckoutDepth: 50,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
Expand Down Expand Up @@ -132,6 +134,7 @@ func TestClone_CheckoutMergeNoReclone(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
CheckoutDepth: 50,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
Expand Down Expand Up @@ -181,6 +184,7 @@ func TestClone_CheckoutMergeNoRecloneFastForward(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
CheckoutDepth: 50,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
Expand Down Expand Up @@ -235,6 +239,7 @@ func TestClone_CheckoutMergeConflict(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
CheckoutDepth: 50,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
Expand All @@ -254,6 +259,93 @@ func TestClone_CheckoutMergeConflict(t *testing.T) {
ErrContains(t, "exit status 1", err)
}

func TestClone_CheckoutMergeShallow(t *testing.T) {
// Initialize the git repo.
repoDir := initRepo(t)

runCmd(t, repoDir, "git", "commit", "--allow-empty", "-m", "should not be cloned")
oldCommit := strings.TrimSpace(runCmd(t, repoDir, "git", "rev-parse", "HEAD"))

runCmd(t, repoDir, "git", "commit", "--allow-empty", "-m", "merge-base")
baseCommit := strings.TrimSpace(runCmd(t, repoDir, "git", "rev-parse", "HEAD"))

runCmd(t, repoDir, "git", "branch", "-f", "branch", "HEAD")

// Add a commit to branch 'branch' that's not on master.
runCmd(t, repoDir, "git", "checkout", "branch")
runCmd(t, repoDir, "touch", "branch-file")
runCmd(t, repoDir, "git", "add", "branch-file")
runCmd(t, repoDir, "git", "commit", "-m", "branch-commit")

// Now switch back to master and advance the master branch by another
// commit.
runCmd(t, repoDir, "git", "checkout", "main")
runCmd(t, repoDir, "touch", "main-file")
runCmd(t, repoDir, "git", "add", "main-file")
runCmd(t, repoDir, "git", "commit", "-m", "main-commit")

overrideURL := fmt.Sprintf("file://%s", repoDir)

// Test that we don't check out full repo if using CheckoutMerge strategy
t.Run("Shallow", func(t *testing.T) {
dataDir := t.TempDir()

wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
// retrieve two commits in each branch:
// master: master-commit, merge-base
// branch: branch-commit, merge-base
CheckoutDepth: 2,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
}

cloneDir, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{}, models.PullRequest{
BaseRepo: models.Repo{},
HeadBranch: "branch",
BaseBranch: "main",
}, "default")
Ok(t, err)
Equals(t, false, hasDiverged)

gotBaseCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", baseCommit)
Assert(t, gotBaseCommitType == "commit\n", "should have merge-base in shallow repo")
gotOldCommitType := runCmdErrCode(t, cloneDir, 128, "git", "cat-file", "-t", oldCommit)
Assert(t, strings.Contains(gotOldCommitType, "could not get object info"), "should not have old commit in shallow repo")
})

// Test that we will check out full repo if CheckoutDepth is too small
t.Run("FullClone", func(t *testing.T) {
dataDir := t.TempDir()

wd := &events.FileWorkspace{
DataDir: dataDir,
CheckoutMerge: true,
// 1 is not enough to retrieve merge-base, so full clone should be performed
CheckoutDepth: 1,
TestingOverrideHeadCloneURL: overrideURL,
TestingOverrideBaseCloneURL: overrideURL,
GpgNoSigningEnabled: true,
}

cloneDir, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{}, models.PullRequest{
BaseRepo: models.Repo{},
HeadBranch: "branch",
BaseBranch: "main",
}, "default")
Ok(t, err)
Equals(t, false, hasDiverged)

gotBaseCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", baseCommit)
Assert(t, gotBaseCommitType == "commit\n", "should have merge-base in full repo")
gotOldCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", oldCommit)
Assert(t, gotOldCommitType == "commit\n", "should have old commit in full repo")
})

}

// Test that if the repo is already cloned and is at the right commit, we
// don't reclone.
func TestClone_NoReclone(t *testing.T) {
Expand Down Expand Up @@ -372,6 +464,7 @@ func TestClone_MasterHasDiverged(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: repoDir,
CheckoutMerge: true,
CheckoutDepth: 50,
GpgNoSigningEnabled: true,
}
_, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{CloneURL: repoDir}, models.PullRequest{
Expand Down Expand Up @@ -448,6 +541,7 @@ func TestHasDiverged_MasterHasDiverged(t *testing.T) {
wd := &events.FileWorkspace{
DataDir: repoDir,
CheckoutMerge: true,
CheckoutDepth: 50,
GpgNoSigningEnabled: true,
}
hasDiverged := wd.HasDiverged(logging.NewNoopLogger(t), repoDir+"/repos/0/default")
Expand Down
1 change: 1 addition & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ func NewServer(userConfig UserConfig, config Config) (*Server, error) {
var workingDir events.WorkingDir = &events.FileWorkspace{
DataDir: userConfig.DataDir,
CheckoutMerge: userConfig.CheckoutStrategy == "merge",
CheckoutDepth: userConfig.CheckoutDepth,
GithubAppEnabled: githubAppEnabled,
}
// provide fresh tokens before clone from the GitHub Apps integration, proxy workingDir
Expand Down
1 change: 1 addition & 0 deletions server/user_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type UserConfig struct {
BitbucketToken string `mapstructure:"bitbucket-token"`
BitbucketUser string `mapstructure:"bitbucket-user"`
BitbucketWebhookSecret string `mapstructure:"bitbucket-webhook-secret"`
CheckoutDepth int `mapstructure:"checkout-depth"`
CheckoutStrategy string `mapstructure:"checkout-strategy"`
DataDir string `mapstructure:"data-dir"`
DisableApplyAll bool `mapstructure:"disable-apply-all"`
Expand Down

0 comments on commit 97be92e

Please sign in to comment.