From c77939291b8996aebdf41d1f771e6ceeb0ab7cce Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Wed, 6 Mar 2024 13:39:40 -0800 Subject: [PATCH] :bug: Limit Binary Artifact file reads to first 1024 bytes (#3923) * add OnMatchingFileReaderDo Signed-off-by: Spencer Schrock * switch binary artifact to using reader Signed-off-by: Spencer Schrock --------- Signed-off-by: Spencer Schrock --- checks/fileparser/listing.go | 46 +++++++++++++++++++++++++++++------ checks/raw/binary_artifact.go | 25 +++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/checks/fileparser/listing.go b/checks/fileparser/listing.go index 82f0c20ef7b..a51519dd12c 100644 --- a/checks/fileparser/listing.go +++ b/checks/fileparser/listing.go @@ -64,6 +64,21 @@ type PathMatcher struct { CaseSensitive bool } +// DoWhileTrueOnFileReader takes a filepath, its reader and +// optional variadic args. It returns a boolean indicating whether +// iterating over next files should continue. +type DoWhileTrueOnFileReader func(path string, reader io.Reader, args ...interface{}) (bool, error) + +// OnMatchingFileReaderDo matches all files listed by `repoClient` against `matchPathTo` +// and on every successful match, runs onFileReader fn on the file's reader. +// Continues iterating along the matched files until onFileReader returns +// either a false value or an error. +func OnMatchingFileReaderDo(repoClient clients.RepoClient, matchPathTo PathMatcher, + onFileReader DoWhileTrueOnFileReader, args ...interface{}, +) error { + return onMatchingFileDo(repoClient, matchPathTo, onFileReader, args...) +} + // DoWhileTrueOnFileContent takes a filepath, its content and // optional variadic args. It returns a boolean indicating whether // iterating over next files should continue. @@ -75,6 +90,12 @@ type DoWhileTrueOnFileContent func(path string, content []byte, args ...interfac // either a false value or an error. func OnMatchingFileContentDo(repoClient clients.RepoClient, matchPathTo PathMatcher, onFileContent DoWhileTrueOnFileContent, args ...interface{}, +) error { + return onMatchingFileDo(repoClient, matchPathTo, onFileContent, args...) +} + +func onMatchingFileDo(repoClient clients.RepoClient, matchPathTo PathMatcher, + onFile any, args ...interface{}, ) error { predicate := func(filepath string) (bool, error) { // Filter out test files. @@ -95,17 +116,28 @@ func OnMatchingFileContentDo(repoClient clients.RepoClient, matchPathTo PathMatc } for _, file := range matchedFiles { - rc, err := repoClient.GetFileReader(file) + reader, err := repoClient.GetFileReader(file) if err != nil { return fmt.Errorf("error during GetFileReader: %w", err) } - content, err := io.ReadAll(rc) - rc.Close() - if err != nil { - return fmt.Errorf("reading from file: %w", err) - } - continueIter, err := onFileContent(file, content, args...) + var continueIter bool + switch f := onFile.(type) { + case DoWhileTrueOnFileReader: + continueIter, err = f(file, reader, args...) + reader.Close() + case DoWhileTrueOnFileContent: + var content []byte + content, err = io.ReadAll(reader) + reader.Close() + if err != nil { + return fmt.Errorf("reading from file: %w", err) + } + continueIter, err = f(file, content, args...) + default: + msg := fmt.Sprintf("invalid type (%T) passed to onMatchingFileDo", f) + return sce.WithMessage(sce.ErrScorecardInternal, msg) + } if err != nil { return err } diff --git a/checks/raw/binary_artifact.go b/checks/raw/binary_artifact.go index 1c7a82f246d..ad963b092bc 100644 --- a/checks/raw/binary_artifact.go +++ b/checks/raw/binary_artifact.go @@ -17,6 +17,7 @@ package raw import ( "errors" "fmt" + "io" "path/filepath" "regexp" "strings" @@ -39,6 +40,9 @@ var ( gradleWrapperValidationActionVersionConstraint = mustParseConstraint(`>= 1.0.0`) ) +// how many bytes are considered when determining if a file is text or binary. +const binaryTestLen = 1024 + // mustParseConstraint attempts parse of semver constraint, panics if fail. func mustParseConstraint(c string) *semver.Constraints { if c, err := semver.NewConstraint(c); err != nil { @@ -52,10 +56,10 @@ func mustParseConstraint(c string) *semver.Constraints { func BinaryArtifacts(req *checker.CheckRequest) (checker.BinaryArtifactData, error) { c := req.RepoClient files := []checker.File{} - err := fileparser.OnMatchingFileContentDo(c, fileparser.PathMatcher{ + err := fileparser.OnMatchingFileReaderDo(c, fileparser.PathMatcher{ Pattern: "*", CaseSensitive: false, - }, checkBinaryFileContent, &files) + }, checkBinaryFileReader, &files) if err != nil { return checker.BinaryArtifactData{}, fmt.Errorf("%w", err) } @@ -96,17 +100,17 @@ func excludeValidatedGradleWrappers(c clients.RepoClient, files []checker.File) return files, nil } -var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path string, content []byte, +var checkBinaryFileReader fileparser.DoWhileTrueOnFileReader = func(path string, reader io.Reader, args ...interface{}, ) (bool, error) { if len(args) != 1 { return false, fmt.Errorf( - "checkBinaryFileContent requires exactly one argument: %w", errInvalidArgLength) + "checkBinaryFileReader requires exactly one argument: %w", errInvalidArgLength) } pfiles, ok := args[0].(*[]checker.File) if !ok { return false, fmt.Errorf( - "checkBinaryFileContent requires argument of type *[]checker.File: %w", errInvalidArgType) + "checkBinaryFileReader requires argument of type *[]checker.File: %w", errInvalidArgType) } binaryFileTypes := map[string]bool{ @@ -138,8 +142,13 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin "wasm": true, "whl": true, } + + content, err := io.ReadAll(io.LimitReader(reader, binaryTestLen)) + if err != nil { + return false, fmt.Errorf("reading file: %w", err) + } + var t types.Type - var err error if len(content) == 0 { return true, nil } @@ -169,12 +178,12 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin return true, nil } -// determines if the first 1024 bytes are text +// determines if the first binaryTestLen bytes are text // // A version of golang.org/x/tools/godoc/util modified to allow carriage returns // and utf8.RuneError (0xFFFD), as the file may not be utf8 encoded. func isText(s []byte) bool { - const max = 1024 // at least utf8.UTFMax + const max = binaryTestLen // at least utf8.UTFMax (4) if len(s) > max { s = s[0:max] }