Skip to content

Commit

Permalink
Only download repo tarball when necessary
Browse files Browse the repository at this point in the history
Previously, this was downloading the tarball for github.com/google/oss-fuzz every time scorecard was run
  • Loading branch information
Chris McGehee authored and naveensrinivasan committed Mar 7, 2022
1 parent 0268747 commit c1761a8
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 30 deletions.
4 changes: 1 addition & 3 deletions clients/githubrepo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string) error {
}

// Init tarballHandler.
if err := client.tarball.init(client.ctx, client.repo, commitSHA); err != nil {
return fmt.Errorf("error during tarballHandler.init: %w", err)
}
client.tarball.init(client.ctx, client.repo, commitSHA)

// Setup GraphQL.
client.graphClient.init(client.ctx, client.repourl)
Expand Down
72 changes: 47 additions & 25 deletions clients/githubrepo/tarball.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"os"
"path/filepath"
"strings"
"sync"

"github.com/google/go-github/v38/github"

Expand Down Expand Up @@ -64,45 +65,60 @@ func extractAndValidateArchivePath(path, dest string) (string, error) {
}

type tarballHandler struct {
errSetup error
once *sync.Once
ctx context.Context
repo *github.Repository
commitSHA string
tempDir string
tempTarFile string
files []string
}

func (handler *tarballHandler) init(ctx context.Context, repo *github.Repository, commitSHA string) error {
// Cleanup any previous state.
if err := handler.cleanup(); err != nil {
return sce.WithMessage(sce.ErrScorecardInternal, err.Error())
}
func (handler *tarballHandler) init(ctx context.Context, repo *github.Repository, commitSHA string) {
handler.errSetup = nil
handler.once = new(sync.Once)
handler.ctx = ctx
handler.repo = repo
handler.commitSHA = commitSHA
}

// Setup temp dir/files and download repo tarball.
if err := handler.getTarball(ctx, repo, commitSHA); errors.Is(err, errTarballNotFound) {
log.Printf("unable to get tarball %v. Skipping...", err)
return nil
} else if err != nil {
return sce.WithMessage(sce.ErrScorecardInternal, err.Error())
}
func (handler *tarballHandler) setup() error {
handler.once.Do(func() {
// Cleanup any previous state.
if err := handler.cleanup(); err != nil {
handler.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error())
return
}

// Extract file names and content from tarball.
if err := handler.extractTarball(); errors.Is(err, errTarballCorrupted) {
log.Printf("unable to extract tarball %v. Skipping...", err)
return nil
} else if err != nil {
return sce.WithMessage(sce.ErrScorecardInternal, err.Error())
}
// Setup temp dir/files and download repo tarball.
if err := handler.getTarball(); errors.Is(err, errTarballNotFound) {
log.Printf("unable to get tarball %v. Skipping...", err)
return
} else if err != nil {
handler.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error())
return
}

return nil
// Extract file names and content from tarball.
if err := handler.extractTarball(); errors.Is(err, errTarballCorrupted) {
log.Printf("unable to extract tarball %v. Skipping...", err)
} else if err != nil {
handler.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error())
}
})
return handler.errSetup
}

func (handler *tarballHandler) getTarball(ctx context.Context, repo *github.Repository, commitSHA string) error {
url := repo.GetArchiveURL()
func (handler *tarballHandler) getTarball() error {
url := handler.repo.GetArchiveURL()
url = strings.Replace(url, "{archive_format}", "tarball/", 1)
if strings.EqualFold(commitSHA, clients.HeadSHA) {
if strings.EqualFold(handler.commitSHA, clients.HeadSHA) {
url = strings.Replace(url, "{/ref}", "", 1)
} else {
url = strings.Replace(url, "{/ref}", commitSHA, 1)
url = strings.Replace(url, "{/ref}", handler.commitSHA, 1)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
req, err := http.NewRequestWithContext(handler.ctx, http.MethodGet, url, nil)
if err != nil {
return fmt.Errorf("http.NewRequestWithContext: %w", err)
}
Expand Down Expand Up @@ -210,6 +226,9 @@ func (handler *tarballHandler) extractTarball() error {
}

func (handler *tarballHandler) listFiles(predicate func(string) (bool, error)) ([]string, error) {
if err := handler.setup(); err != nil {
return nil, fmt.Errorf("error during tarballHandler.setup: %w", err)
}
ret := make([]string, 0)
for _, file := range handler.files {
matches, err := predicate(file)
Expand All @@ -224,6 +243,9 @@ func (handler *tarballHandler) listFiles(predicate func(string) (bool, error)) (
}

func (handler *tarballHandler) getFileContent(filename string) ([]byte, error) {
if err := handler.setup(); err != nil {
return nil, fmt.Errorf("error during tarballHandler.setup: %w", err)
}
content, err := os.ReadFile(filepath.Join(handler.tempDir, filename))
if err != nil {
return content, fmt.Errorf("os.ReadFile: %w", err)
Expand Down
11 changes: 9 additions & 2 deletions clients/githubrepo/tarball_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io"
"os"
"strings"
"sync"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -58,10 +59,16 @@ func setup(inputFile string) (tarballHandler, error) {
if _, err := io.Copy(tempFile, testFile); err != nil {
return tarballHandler{}, fmt.Errorf("unable to do io.Copy: %w", err)
}
return tarballHandler{
tarballHandler := tarballHandler{
tempDir: tempDir,
tempTarFile: tempFile.Name(),
}, nil
once: new(sync.Once),
}
tarballHandler.once.Do(func() {
// We don't want to run the code in tarballHandler.setup(), so if we execute tarballHandler.once.Do() right
// here, it won't get executed later when setup() is called.
})
return tarballHandler, nil
}

// nolint: gocognit
Expand Down

0 comments on commit c1761a8

Please sign in to comment.