From d3e3f3944ba6aa978cec89e1d93c2650c3e7a697 Mon Sep 17 00:00:00 2001 From: Michael Crenshaw Date: Tue, 29 Mar 2022 18:13:48 -0400 Subject: [PATCH] fix: prevent excessive repo-server disk usage for large repos (#8845) (#8897) fix: prevent excessive repo-server disk usage for large repos (#8845) (#8897) Signed-off-by: Michael Crenshaw Signed-off-by: wojtekidd --- reposerver/repository/repository.go | 37 ++++++++++-------- reposerver/repository/repository_test.go | 48 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/reposerver/repository/repository.go b/reposerver/repository/repository.go index 9c897af055693..f6fc8aa859c6c 100644 --- a/reposerver/repository/repository.go +++ b/reposerver/repository/repository.go @@ -1678,33 +1678,40 @@ func directoryPermissionInitializer(rootPath string) goio.Closer { // nolint:unparam func (s *Service) checkoutRevision(gitClient git.Client, revision string, submoduleEnabled bool) (goio.Closer, error) { closer := s.gitRepoInitializer(gitClient.Root()) + return closer, checkoutRevision(gitClient, revision, submoduleEnabled) +} + +func checkoutRevision(gitClient git.Client, revision string, submoduleEnabled bool) error { err := gitClient.Init() if err != nil { - return closer, status.Errorf(codes.Internal, "Failed to initialize git repo: %v", err) + return status.Errorf(codes.Internal, "Failed to initialize git repo: %v", err) } - err = gitClient.Fetch(revision) + // Fetching with no revision first. Fetching with an explicit version can cause repo bloat. https://github.com/argoproj/argo-cd/issues/8845 + err = gitClient.Fetch("") + if err != nil { + return status.Errorf(codes.Internal, "Failed to fetch default: %v", err) + } + err = gitClient.Checkout(revision, submoduleEnabled) if err != nil { - log.Infof("Failed to fetch revision %s: %v", revision, err) - log.Infof("Fallback to fetch default") - err = gitClient.Fetch("") + // When fetching with no revision, only refs/heads/* and refs/remotes/origin/* are fetched. If checkout fails + // for the given revision, try explicitly fetching it. + log.Infof("Failed to checkout revision %s: %v", revision, err) + log.Infof("Fallback to fetching specific revision %s. ref might not have been in the default refspec fetched.", revision) + + err = gitClient.Fetch(revision) if err != nil { - return closer, status.Errorf(codes.Internal, "Failed to fetch default: %v", err) + return status.Errorf(codes.Internal, "Failed to checkout revision %s: %v", revision, err) } - err = gitClient.Checkout(revision, submoduleEnabled) + + err = gitClient.Checkout("FETCH_HEAD", submoduleEnabled) if err != nil { - return closer, status.Errorf(codes.Internal, "Failed to checkout revision %s: %v", revision, err) + return status.Errorf(codes.Internal, "Failed to checkout FETCH_HEAD: %v", err) } - return closer, err - } - - err = gitClient.Checkout("FETCH_HEAD", submoduleEnabled) - if err != nil { - return closer, status.Errorf(codes.Internal, "Failed to checkout FETCH_HEAD: %v", err) } - return closer, err + return err } func (s *Service) GetHelmCharts(ctx context.Context, q *apiclient.HelmChartsRequest) (*apiclient.HelmChartsResponse, error) { diff --git a/reposerver/repository/repository_test.go b/reposerver/repository/repository_test.go index 1727991bdfd2e..6c2a96a6782b1 100644 --- a/reposerver/repository/repository_test.go +++ b/reposerver/repository/repository_test.go @@ -1779,3 +1779,51 @@ func TestInit(t *testing.T) { require.Error(t, err) require.NoError(t, initGitRepo(path.Join(dir, "repo2"), "https://github.com/argo-cd/test-repo2")) } + +// TestCheckoutRevisionCanGetNonstandardRefs shows that we can fetch a revision that points to a non-standard ref. In +// other words, we haven't regressed and caused this issue again: https://github.com/argoproj/argo-cd/issues/4935 +func TestCheckoutRevisionCanGetNonstandardRefs(t *testing.T) { + rootPath, err := ioutil.TempDir("", "") + require.NoError(t, err) + + sourceRepoPath, err := ioutil.TempDir(rootPath, "") + require.NoError(t, err) + + // Create a repo such that one commit is on a non-standard ref _and nowhere else_. This is meant to simulate, for + // example, a GitHub ref for a pull into one repo from a fork of that repo. + runGit(t, sourceRepoPath, "init") + runGit(t, sourceRepoPath, "checkout", "-b", "main") // make sure there's a main branch to switch back to + runGit(t, sourceRepoPath, "commit", "-m", "empty", "--allow-empty") + runGit(t, sourceRepoPath, "checkout", "-b", "branch") + runGit(t, sourceRepoPath, "commit", "-m", "empty", "--allow-empty") + sha := runGit(t, sourceRepoPath, "rev-parse", "HEAD") + runGit(t, sourceRepoPath, "update-ref", "refs/pull/123/head", strings.TrimSuffix(sha, "\n")) + runGit(t, sourceRepoPath, "checkout", "main") + runGit(t, sourceRepoPath, "branch", "-D", "branch") + + destRepoPath, err := ioutil.TempDir(rootPath, "") + require.NoError(t, err) + + gitClient, err := git.NewClientExt("file://"+sourceRepoPath, destRepoPath, &git.NopCreds{}, true, false, "") + require.NoError(t, err) + + pullSha, err := gitClient.LsRemote("refs/pull/123/head") + require.NoError(t, err) + + err = checkoutRevision(gitClient, "does-not-exist", false) + assert.Error(t, err) + + err = checkoutRevision(gitClient, pullSha, false) + assert.NoError(t, err) +} + +// runGit runs a git command in the given working directory. If the command succeeds, it returns the combined standard +// and error output. If it fails, it stops the test with a failure message. +func runGit(t *testing.T, workDir string, args ...string) string { + cmd := exec.Command("git", args...) + cmd.Dir = workDir + out, err := cmd.CombinedOutput() + stringOut := string(out) + require.NoError(t, err, stringOut) + return stringOut +}