Skip to content

Commit

Permalink
enable more performant isText (ossf#2433)
Browse files Browse the repository at this point in the history
Signed-off-by: Spencer Schrock <[email protected]>

Signed-off-by: Spencer Schrock <[email protected]>
Signed-off-by: nathaniel.wert <[email protected]>
  • Loading branch information
spencerschrock authored and nathaniel.wert committed Nov 28, 2022
1 parent c98dda2 commit 594fc14
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 29 deletions.
35 changes: 8 additions & 27 deletions checks/raw/binary_artifact.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@ package raw

import (
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"unicode"
"unicode/utf8"

semver "github.com/Masterminds/semver/v3"
Expand Down Expand Up @@ -158,12 +155,7 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin
}

exists2 := binaryFileTypes[strings.ReplaceAll(filepath.Ext(path), ".", "")]
// TODO remove the comparison after testing in release-test worker
isTextFile := isText(content)
if _, enabled := os.LookupEnv("SCORECARD_COMPARE_ISTEXT"); enabled && isTextFile != isText2(content) {
log.Printf("isText implementations differ (raw.isText: %t) for file: %s", isTextFile, path)
}
if !isTextFile && exists2 {
if !isText(content) && exists2 {
*pfiles = append(*pfiles, checker.File{
Path: path,
Type: checker.FileTypeBinary,
Expand All @@ -174,22 +166,11 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin
return true, nil
}

// TODO: refine this function.
func isText(content []byte) bool {
for _, c := range string(content) {
if c == '\t' || c == '\n' || c == '\r' {
continue
}
if !unicode.IsPrint(c) {
return false
}
}
return true
}

// TODO decine between isText and isText2 after testing in release-test worker
// modified version of golang.org/x/tools/godoc/util.
func isText2(s []byte) bool {
// determines if the first 1024 bytes are text
//
// A version of golang.org/x/tools/godoc/util modified to allow carriage returns
// and utf8.RuneError (0xFFFD), as the file may not be utf8 encoded.
func isText(s []byte) bool {
const max = 1024 // at least utf8.UTFMax
if len(s) > max {
s = s[0:max]
Expand All @@ -199,8 +180,8 @@ func isText2(s []byte) bool {
// last char may be incomplete - ignore
break
}
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' && c != '\r' {
// decoding error or control character - not a text file
if c < ' ' && c != '\n' && c != '\t' && c != '\r' {
// control character - not a text file
return false
}
}
Expand Down
2 changes: 0 additions & 2 deletions cron/k8s/worker.release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ spec:
value: "10.4.4.210:80"
- name: "SCORECARD_API_RESULTS_BUCKET_URL"
value: "gs://ossf-scorecard-cron-releasetest-results"
- name: SCORECARD_COMPARE_ISTEXT
value: "1"
resources:
requests:
memory: 5Gi
Expand Down

0 comments on commit 594fc14

Please sign in to comment.