From b59a16990abd42e7cb80b55139269ca778a5a94b Mon Sep 17 00:00:00 2001 From: afdesk Date: Mon, 30 Sep 2024 18:42:46 +0600 Subject: [PATCH] feat(secret): enhance secret scanning for python binary files (#7223) Signed-off-by: knqyf263 Co-authored-by: knqyf263 --- docs/docs/scanner/secret.md | 4 +- pkg/fanal/analyzer/secret/secret.go | 28 ++++++++++--- pkg/fanal/analyzer/secret/secret_test.go | 25 ++++++++++++ .../secret/testdata/secret.cpython-310.pyc | Bin 0 -> 226 bytes pkg/fanal/secret/scanner.go | 10 ++++- pkg/fanal/utils/utils.go | 38 ++++++++++++++++++ 6 files changed, 96 insertions(+), 9 deletions(-) create mode 100755 pkg/fanal/analyzer/secret/testdata/secret.cpython-310.pyc diff --git a/docs/docs/scanner/secret.md b/docs/docs/scanner/secret.md index 49d8c8488ece..ea74ae64c930 100644 --- a/docs/docs/scanner/secret.md +++ b/docs/docs/scanner/secret.md @@ -3,7 +3,9 @@ Trivy scans any container image, filesystem and git repository to detect exposed secrets like passwords, api keys, and tokens. Secret scanning is enabled by default. -Trivy will scan every plaintext file, according to builtin rules or configuration. There are plenty of builtin rules: +Trivy will scan every plaintext file, according to builtin rules or configuration. Also, Trivy can detect secrets in compiled Python files (`.pyc`). + +There are plenty of builtin rules: - AWS access key - GCP service account diff --git a/pkg/fanal/analyzer/secret/secret.go b/pkg/fanal/analyzer/secret/secret.go index d2627a840c1b..bdb3e96d1428 100644 --- a/pkg/fanal/analyzer/secret/secret.go +++ b/pkg/fanal/analyzer/secret/secret.go @@ -54,6 +54,9 @@ var ( ".gz", ".gzip", ".tar", + } + + allowedBinaries = []string{ ".pyc", } ) @@ -63,6 +66,10 @@ func init() { analyzer.RegisterAnalyzer(NewSecretAnalyzer(secret.Scanner{}, "")) } +func allowedBinary(filename string) bool { + return slices.Contains(allowedBinaries, filepath.Ext(filename)) +} + // SecretAnalyzer is an analyzer for secrets type SecretAnalyzer struct { scanner secret.Scanner @@ -96,7 +103,7 @@ func (a *SecretAnalyzer) Init(opt analyzer.AnalyzerOptions) error { func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) { // Do not scan binaries binary, err := utils.IsBinary(input.Content, input.Info.Size()) - if binary || err != nil { + if err != nil || (binary && !allowedBinary(input.FilePath)) { return nil, nil } @@ -104,12 +111,20 @@ func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput log.WithPrefix("secret").Warn("The size of the scanned file is too large. It is recommended to use `--skip-files` for this file to avoid high memory consumption.", log.FilePath(input.FilePath), log.Int64("size (MB)", size/1048576)) } - content, err := io.ReadAll(input.Content) - if err != nil { - return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err) - } + var content []byte - content = bytes.ReplaceAll(content, []byte("\r"), []byte("")) + if !binary { + content, err = io.ReadAll(input.Content) + if err != nil { + return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err) + } + content = bytes.ReplaceAll(content, []byte("\r"), []byte("")) + } else { + content, err = utils.ExtractPrintableBytes(input.Content) + if err != nil { + return nil, xerrors.Errorf("binary read error %s: %w", input.FilePath, err) + } + } filePath := input.FilePath // Files extracted from the image have an empty input.Dir. @@ -122,6 +137,7 @@ func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput result := a.scanner.Scan(secret.ScanArgs{ FilePath: filePath, Content: content, + Binary: binary, }) if len(result.Findings) == 0 { diff --git a/pkg/fanal/analyzer/secret/secret_test.go b/pkg/fanal/analyzer/secret/secret_test.go index 7cba1d137e8f..5187ebc341bf 100644 --- a/pkg/fanal/analyzer/secret/secret_test.go +++ b/pkg/fanal/analyzer/secret/secret_test.go @@ -95,6 +95,16 @@ func TestSecretAnalyzer(t *testing.T) { }, }, } + wantFindingGH_PAT := types.SecretFinding{ + RuleID: "github-fine-grained-pat", + Category: "GitHub", + Title: "GitHub Fine-grained personal access tokens", + Severity: "CRITICAL", + StartLine: 1, + EndLine: 1, + Match: "Binary file \"/testdata/secret.cpython-310.pyc\" matches a rule \"GitHub Fine-grained personal access tokens\"", + } + tests := []struct { name string configPath string @@ -153,6 +163,21 @@ func TestSecretAnalyzer(t *testing.T) { filePath: "testdata/binaryfile", want: nil, }, + { + name: "python binary file", + configPath: "testdata/skip-tests-config.yaml", + filePath: "testdata/secret.cpython-310.pyc", + want: &analyzer.AnalysisResult{ + Secrets: []types.Secret{ + { + FilePath: "/testdata/secret.cpython-310.pyc", + Findings: []types.SecretFinding{ + wantFindingGH_PAT, + }, + }, + }, + }, + }, } for _, tt := range tests { diff --git a/pkg/fanal/analyzer/secret/testdata/secret.cpython-310.pyc b/pkg/fanal/analyzer/secret/testdata/secret.cpython-310.pyc new file mode 100755 index 0000000000000000000000000000000000000000..2bb659e7eb21abd1fed3da8d1f085600619d6f4b GIT binary patch literal 226 zcmd1j<>g`kf~l#i(<*@UV-N=!FabFZKwK;UBvKfn7*ZKi8JZax8B!R788n%0#inPL zWRxbw7bKR%8yY&fxVreddm0qydZb1gR(g7bC%VKZd6tJ1dsQZeB)a>Ag{D`fx>Xh# zI;KWsS45O#Mmjr(W*3&arFy3MmPa}Hc!rvIxca9?IfeN+R+jkzZD21>O)g3;F}%fE tP?VWhvXY^Q1?Uto@rzqeA0n$)P minLength { + // add a newline between printable lines to separate them + _ = currentPrintableLine.WriteByte('\n') + result = append(result, currentPrintableLine.Bytes()...) + } + currentPrintableLine.Reset() + } + if currentPrintableLine.Len() > minLength { + // add a newline between printable lines to separate them + _ = currentPrintableLine.WriteByte('\n') + result = append(result, currentPrintableLine.Bytes()...) + } + return result, nil +}