diff --git a/clients/azuredevopsrepo/client.go b/clients/azuredevopsrepo/client.go index 0d5e417769c..35495f18e91 100644 --- a/clients/azuredevopsrepo/client.go +++ b/clients/azuredevopsrepo/client.go @@ -42,6 +42,7 @@ type Client struct { repo *git.GitRepository branches *branchesHandler commits *commitsHandler + zip *zipHandler commitDepth int } @@ -84,6 +85,8 @@ func (c *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitDepth c.commits.init(c.ctx, c.repourl, c.commitDepth) + c.zip.init(c.ctx, c.repourl) + return nil } @@ -96,15 +99,15 @@ func (c *Client) IsArchived() (bool, error) { } func (c *Client) ListFiles(predicate func(string) (bool, error)) ([]string, error) { - return []string{}, clients.ErrUnsupportedFeature + return c.zip.listFiles(predicate) } func (c *Client) LocalPath() (string, error) { - return "", clients.ErrUnsupportedFeature + return c.zip.getLocalPath() } func (c *Client) GetFileReader(filename string) (io.ReadCloser, error) { - return nil, clients.ErrUnsupportedFeature + return c.zip.getFile(filename) } func (c *Client) GetBranch(branch string) (*clients.BranchRef, error) { @@ -180,7 +183,7 @@ func (c *Client) SearchCommits(request clients.SearchCommitsOptions) ([]clients. } func (c *Client) Close() error { - return nil + return c.zip.cleanup() } func CreateAzureDevOpsClient(ctx context.Context, repo clients.Repo) (*Client, error) { @@ -193,6 +196,8 @@ func CreateAzureDevOpsClientWithToken(ctx context.Context, token string, repo cl url := "https://" + repo.Host() + "/" + strings.Split(repo.Path(), "/")[0] connection := azuredevops.NewPatConnection(url, token) + client := connection.GetClientByUrl(url) + gitClient, err := git.NewClient(ctx, connection) if err != nil { return nil, fmt.Errorf("could not create azure devops git client with error: %w", err) @@ -207,5 +212,8 @@ func CreateAzureDevOpsClientWithToken(ctx context.Context, token string, repo cl commits: &commitsHandler{ gitClient: gitClient, }, + zip: &zipHandler{ + client: client, + }, }, nil } diff --git a/clients/azuredevopsrepo/testdata/basic.zip b/clients/azuredevopsrepo/testdata/basic.zip new file mode 100644 index 00000000000..280d8545a4d Binary files /dev/null and b/clients/azuredevopsrepo/testdata/basic.zip differ diff --git a/clients/azuredevopsrepo/zip.go b/clients/azuredevopsrepo/zip.go new file mode 100644 index 00000000000..6ac179ffe30 --- /dev/null +++ b/clients/azuredevopsrepo/zip.go @@ -0,0 +1,243 @@ +// Copyright 2024 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package azuredevopsrepo + +import ( + "archive/zip" + "context" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/microsoft/azure-devops-go-api/azuredevops/v7" + + sce "github.com/ossf/scorecard/v5/errors" +) + +const ( + repoDir = "repo*" + repoFilename = "azuredevopsrepo*.zip" + maxSize = 100 * 1024 * 1024 // 100MB limit +) + +var ( + errUnexpectedStatusCode = errors.New("unexpected status code") + errZipNotFound = errors.New("zip not found") + errInvalidFilePath = errors.New("invalid zip file: contains file path outside of target directory") + errFileTooLarge = errors.New("file too large, possible zip bomb") +) + +type zipHandler struct { + client *azuredevops.Client + errSetup error + once *sync.Once + ctx context.Context + repourl *Repo + tempDir string + tempZipFile string + files []string +} + +func (z *zipHandler) init(ctx context.Context, repourl *Repo) { + z.errSetup = nil + z.once = new(sync.Once) + z.ctx = ctx + z.repourl = repourl +} + +func (z *zipHandler) setup() error { + z.once.Do(func() { + if err := z.cleanup(); err != nil { + z.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + return + } + + if err := z.getZipfile(); err != nil { + z.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + return + } + + if err := z.extractZip(); err != nil { + z.errSetup = sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + return + } + }) + + return z.errSetup +} + +func (z *zipHandler) getZipfile() error { + tempDir, err := os.MkdirTemp("", repoDir) + if err != nil { + return fmt.Errorf("os.MkdirTemp: %w", err) + } + repoFile, err := os.CreateTemp(tempDir, repoFilename) + if err != nil { + return fmt.Errorf("%w io.Copy: %w", errZipNotFound, err) + } + defer repoFile.Close() + + // The zip download API is not exposed in the Azure DevOps Go SDK, so we need to construct the request manually. + baseURL := fmt.Sprintf( + "https://%s/%s/%s/_apis/git/repositories/%s/items", + z.repourl.host, + z.repourl.organization, + z.repourl.project, + z.repourl.id) + + queryParams := url.Values{} + queryParams.Add("path", "/") + queryParams.Add("download", "true") + queryParams.Add("api-version", "7.1-preview.1") + queryParams.Add("resolveLfs", "true") + queryParams.Add("$format", "zip") + + if z.repourl.commitSHA == "HEAD" { + queryParams.Add("versionDescriptor.versionType", "branch") + queryParams.Add("versionDescriptor.version", z.repourl.defaultBranch) + } else { + queryParams.Add("versionDescriptor.versionType", "commit") + queryParams.Add("versionDescriptor.version", z.repourl.commitSHA) + } + + parsedURL, err := url.Parse(baseURL + "?" + queryParams.Encode()) + if err != nil { + return fmt.Errorf("url.Parse: %w", err) + } + + req := &http.Request{ + Method: http.MethodGet, + URL: parsedURL, + } + res, err := z.client.SendRequest(req) + if err != nil { + return fmt.Errorf("client.SendRequest: %w", err) + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + return fmt.Errorf("%w: status code %d", errUnexpectedStatusCode, res.StatusCode) + } + + if _, err := io.Copy(repoFile, res.Body); err != nil { + return fmt.Errorf("io.Copy: %w", err) + } + + z.tempDir = tempDir + z.tempZipFile = repoFile.Name() + + return nil +} + +func (z *zipHandler) getLocalPath() (string, error) { + if err := z.setup(); err != nil { + return "", fmt.Errorf("error during zipHandler.setup: %w", err) + } + absTempDir, err := filepath.Abs(z.tempDir) + if err != nil { + return "", fmt.Errorf("error during filepath.Abs: %w", err) + } + return absTempDir, nil +} + +func (z *zipHandler) extractZip() error { + zipReader, err := zip.OpenReader(z.tempZipFile) + if err != nil { + return fmt.Errorf("zip.OpenReader: %w", err) + } + defer zipReader.Close() + + destinationPrefix := filepath.Clean(z.tempDir) + string(os.PathSeparator) + z.files = make([]string, 0, len(zipReader.File)) + for _, file := range zipReader.File { + //nolint:gosec // G305: Handling of file paths is done below + filenamepath := filepath.Join(z.tempDir, file.Name) + if !strings.HasPrefix(filepath.Clean(filenamepath), destinationPrefix) { + return errInvalidFilePath + } + if file.FileInfo().IsDir() { + if err := os.MkdirAll(filenamepath, 0o755); err != nil { + return fmt.Errorf("error during os.MkdirAll: %w", err) + } + continue + } + + outFile, err := os.OpenFile(filenamepath, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("os.OpenFile: %w", err) + } + + rc, err := file.Open() + if err != nil { + return fmt.Errorf("file.Open: %w", err) + } + + written, err := io.CopyN(outFile, rc, maxSize) + if err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("%w io.Copy: %w", errZipNotFound, err) + } + if written > maxSize { + return errFileTooLarge + } + outFile.Close() + + filename := strings.TrimPrefix(filenamepath, destinationPrefix) + z.files = append(z.files, filename) + } + return nil +} + +func (z *zipHandler) listFiles(predicate func(string) (bool, error)) ([]string, error) { + if err := z.setup(); err != nil { + return nil, fmt.Errorf("error during zipHandler.setup: %w", err) + } + ret := make([]string, 0) + for _, file := range z.files { + matches, err := predicate(file) + if err != nil { + return nil, err + } + if matches { + ret = append(ret, file) + } + } + return ret, nil +} + +func (z *zipHandler) getFile(filename string) (*os.File, error) { + if err := z.setup(); err != nil { + return nil, fmt.Errorf("error during zipHandler.setup: %w", err) + } + f, err := os.Open(filepath.Join(z.tempDir, filename)) + if err != nil { + return nil, fmt.Errorf("open file: %w", err) + } + return f, nil +} + +func (z *zipHandler) cleanup() error { + if err := os.RemoveAll(z.tempDir); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("os.Remove: %w", err) + } + + z.files = nil + return nil +} diff --git a/clients/azuredevopsrepo/zip_test.go b/clients/azuredevopsrepo/zip_test.go new file mode 100644 index 00000000000..9e65bddd5c2 --- /dev/null +++ b/clients/azuredevopsrepo/zip_test.go @@ -0,0 +1,183 @@ +// Copyright 2024 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package azuredevopsrepo + +import ( + "errors" + "fmt" + "io" + "os" + "strings" + "sync" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" +) + +type listfileTest struct { + predicate func(string) (bool, error) + err error + outcome []string +} + +type getcontentTest struct { + err error + filename string + output []byte +} + +func isSortedString(x, y string) bool { + return x < y +} + +func setup(inputFile string) (zipHandler, error) { + tempDir, err := os.MkdirTemp("", repoDir) + if err != nil { + return zipHandler{}, fmt.Errorf("test failed to create TempDir: %w", err) + } + tempFile, err := os.CreateTemp(tempDir, repoFilename) + if err != nil { + return zipHandler{}, fmt.Errorf("test failed to create TempFile: %w", err) + } + testFile, err := os.OpenFile(inputFile, os.O_RDONLY, 0o644) + if err != nil { + return zipHandler{}, fmt.Errorf("unable to open testfile: %w", err) + } + if _, err := io.Copy(tempFile, testFile); err != nil { + return zipHandler{}, fmt.Errorf("unable to do io.Copy: %w", err) + } + zipHandler := zipHandler{ + tempDir: tempDir, + tempZipFile: tempFile.Name(), + once: new(sync.Once), + } + zipHandler.once.Do(func() { + // We don't want to run the code in zipHandler.setup(), so if we execute zipHandler.once.Do() right + // here, it won't get executed later when setup() is called. + }) + return zipHandler, nil +} + +//nolint:gocognit +func TestExtractZip(t *testing.T) { + t.Parallel() + testcases := []struct { + name string + inputFile string + listfileTests []listfileTest + getcontentTests []getcontentTest + }{ + { + name: "Basic", + inputFile: "testdata/basic.zip", + listfileTests: []listfileTest{ + { + // Returns all files in the zip. + predicate: func(string) (bool, error) { return true, nil }, + outcome: []string{"file0", "dir1/file1", "dir1/dir2/file2"}, + }, + { + // Skips all files inside `dir1/dir2` directory. + predicate: func(fn string) (bool, error) { return !strings.HasPrefix(fn, "dir1/dir2"), nil }, + outcome: []string{"file0", "dir1/file1"}, + }, + { + // Skips all files. + predicate: func(fn string) (bool, error) { return false, nil }, + outcome: []string{}, + }, + }, + getcontentTests: []getcontentTest{ + { + filename: "file0", + output: []byte("content0\n"), + }, + { + filename: "dir1/file1", + output: []byte("content1\n"), + }, + { + filename: "dir1/dir2/file2", + output: []byte("content2\n"), + }, + { + filename: "does/not/exist", + err: os.ErrNotExist, + }, + }, + }, + } + + for _, testcase := range testcases { + testcase := testcase + t.Run(testcase.name, func(t *testing.T) { + t.Parallel() + + // Setup + handler, err := setup(testcase.inputFile) + if err != nil { + t.Fatalf("test setup failed: %v", err) + } + + // Extract zip. + if err := handler.extractZip(); err != nil { + t.Fatalf("test failed: %v", err) + } + + // Test ListFiles API. + for _, listfiletest := range testcase.listfileTests { + matchedFiles, err := handler.listFiles(listfiletest.predicate) + if !errors.Is(err, listfiletest.err) { + t.Errorf("test failed: expected - %v, got - %v", listfiletest.err, err) + continue + } + if !cmp.Equal(listfiletest.outcome, + matchedFiles, + cmpopts.SortSlices(isSortedString)) { + t.Errorf("test failed: expected - %q, got - %q", listfiletest.outcome, matchedFiles) + } + } + + // Test GetFileContent API. + for _, getcontenttest := range testcase.getcontentTests { + f, err := handler.getFile(getcontenttest.filename) + if getcontenttest.err != nil && !errors.Is(err, getcontenttest.err) { + t.Errorf("test failed: expected - %v, got - %v", getcontenttest.err, err) + } + if getcontenttest.err == nil { + content, err := io.ReadAll(f) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cmp.Equal(getcontenttest.output, content) { + t.Errorf("test failed: expected - %s, got - %s", string(getcontenttest.output), string(content)) + } + } + } + + // Test that files get deleted. + if err := handler.cleanup(); err != nil { + t.Errorf("test failed: %v", err) + } + if _, err := os.Stat(handler.tempDir); !os.IsNotExist(err) { + t.Errorf("%v", err) + } + if len(handler.files) != 0 { + t.Error("client.files not cleaned up!") + } + }) + } +}