diff --git a/pkg/detectors/falsepositives.go b/pkg/detectors/falsepositives.go index 3d7a576be60e..7fb1a51d0f2b 100644 --- a/pkg/detectors/falsepositives.go +++ b/pkg/detectors/falsepositives.go @@ -13,7 +13,12 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" ) -var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "*****"} +var ( + DefaultFalsePositives = map[FalsePositive]struct{}{ + "example": {}, "xxxxxx": {}, "aaaaaa": {}, "abcde": {}, "00000": {}, "sample": {}, "*****": {}, + } + UuidFalsePositives map[FalsePositive]struct{} +) type FalsePositive string @@ -24,18 +29,21 @@ type CustomFalsePositiveChecker interface { IsFalsePositive(result Result) (bool, string) } -//go:embed "badlist.txt" -var badList []byte - -//go:embed "words.txt" -var wordList []byte - -//go:embed "programmingbooks.txt" -var programmingBookWords []byte - -var filter *ahocorasick.Trie +var ( + filter *ahocorasick.Trie + + //go:embed "fp_badlist.txt" + badList []byte + //go:embed "fp_words.txt" + wordList []byte + //go:embed "fp_programmingbooks.txt" + programmingBookWords []byte + //go:embed "fp_uuids.txt" + uuidList []byte +) func init() { + // Populate trie. builder := ahocorasick.NewTrieBuilder() wordList := bytesToCleanWordList(wordList) @@ -47,7 +55,16 @@ func init() { programmingBookWords := bytesToCleanWordList(programmingBookWords) builder.AddStrings(programmingBookWords) + uuidList := bytesToCleanWordList(uuidList) + builder.AddStrings(uuidList) + filter = builder.Build() + + // Populate custom FalsePositive list + UuidFalsePositives = make(map[FalsePositive]struct{}, len(uuidList)) + for _, uuid := range uuidList { + UuidFalsePositives[FalsePositive(uuid)] = struct{}{} + } } func GetFalsePositiveCheck(detector Detector) func(Result) (bool, string) { @@ -65,15 +82,20 @@ func GetFalsePositiveCheck(detector Detector) func(Result) (bool, string) { // // Currently, this includes: english word in key or matches common example patterns. // Only the secret key material should be passed into this function -func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) (bool, string) { +func IsKnownFalsePositive(match string, falsePositives map[FalsePositive]struct{}, wordCheck bool) (bool, string) { if !utf8.ValidString(match) { return true, "invalid utf8" } lower := strings.ToLower(match) - for _, fp := range falsePositives { + + if _, exists := falsePositives[FalsePositive(lower)]; exists { + return true, "matches term: " + lower + } + + for fp := range falsePositives { fps := string(fp) if strings.Contains(lower, fps) { - return true, "matches term: " + fps + return true, "contains term: " + fps } } diff --git a/pkg/detectors/falsepositives_test.go b/pkg/detectors/falsepositives_test.go index e022a539f124..a90672892042 100644 --- a/pkg/detectors/falsepositives_test.go +++ b/pkg/detectors/falsepositives_test.go @@ -1,6 +1,3 @@ -//go:build detectors -// +build detectors - package detectors import ( @@ -32,17 +29,23 @@ func (d fakeDetector) Type() detectorspb.DetectorType { func (f fakeDetector) Description() string { return "" } func (d customFalsePositiveChecker) IsFalsePositive(result Result) (bool, string) { - return IsKnownFalsePositive(string(result.Raw), []FalsePositive{"a specific magic string"}, false) + return IsKnownFalsePositive(string(result.Raw), map[FalsePositive]struct{}{"a specific magic string": {}}, false) } func TestFilterKnownFalsePositives_DefaultLogic(t *testing.T) { results := []Result{ - {Raw: []byte("00000")}, // "default" false positive list - {Raw: []byte("number")}, // from wordlist - {Raw: []byte("hga8adshla3434g")}, // real secret + {Raw: []byte("00000")}, // "default" false positive list + {Raw: []byte("number")}, // from wordlist + // from uuid list + {Raw: []byte("00000000-0000-0000-0000-000000000000")}, + {Raw: []byte("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")}, + // real secrets + {Raw: []byte("hga8adshla3434g")}, + {Raw: []byte("f795f7db-2dfe-4095-96f3-8f8370c735f9")}, } expected := []Result{ {Raw: []byte("hga8adshla3434g")}, + {Raw: []byte("f795f7db-2dfe-4095-96f3-8f8370c735f9")}, } filtered := FilterKnownFalsePositives(logContext.Background(), fakeDetector{}, results) assert.ElementsMatch(t, expected, filtered) @@ -67,7 +70,7 @@ func TestFilterKnownFalsePositives_CustomLogic(t *testing.T) { func TestIsFalsePositive(t *testing.T) { type args struct { match string - falsePositives []FalsePositive + falsePositives map[FalsePositive]struct{} useWordlist bool } tests := []struct { diff --git a/pkg/detectors/badlist.txt b/pkg/detectors/fp_badlist.txt similarity index 100% rename from pkg/detectors/badlist.txt rename to pkg/detectors/fp_badlist.txt diff --git a/pkg/detectors/programmingbooks.txt b/pkg/detectors/fp_programmingbooks.txt similarity index 100% rename from pkg/detectors/programmingbooks.txt rename to pkg/detectors/fp_programmingbooks.txt diff --git a/pkg/detectors/fp_uuids.txt b/pkg/detectors/fp_uuids.txt new file mode 100644 index 000000000000..89a42a2efc36 --- /dev/null +++ b/pkg/detectors/fp_uuids.txt @@ -0,0 +1,37 @@ +00000000-0000-0000-0000-000000000000 +11111111-1111-1111-1111-111111111111 +22222222-2222-2222-2222-222222222222 +33333333-3333-3333-3333-333333333333 +44444444-4444-4444-4444-444444444444 +55555555-5555-5555-5555-555555555555 +66666666-6666-6666-6666-666666666666 +77777777-7777-7777-7777-777777777777 +88888888-8888-8888-8888-888888888888 +99999999-9999-9999-9999-999999999999 +12345678-1234-1234-1234-123456789abc +23456789-2345-2345-2345-23456789abcd +34567890-3456-3456-3456-34567890bcde +45678901-4567-4567-4567-45678901cdef +56789012-5678-5678-5678-56789012def0 +aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb +cccccccc-cccc-cccc-cccc-cccccccccccc +dddddddd-dddd-dddd-dddd-dddddddddddd +eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee +ffffffff-ffff-ffff-ffff-ffffffffffff +deadbeef-dead-beef-dead-beefdeadbeef +cafebabe-cafe-babe-cafe-babecafebabe +badc0ffee-badc-0ffe-badc-0ffeebadc0f +deadface-dead-face-dead-facedeadface +feedface-feed-face-feed-facefeedface +a1b2c3d4-a1b2-c3d4-a1b2-c3d4a1b2c3d4 +98765432-9876-5432-9876-543298765432 +abcdefab-cdef-abcd-efab-cdefabcdefab +a0a0a0a0-a0a0-a0a0-a0a0-a0a0a0a0a0a0 +b0b0b0b0-b0b0-b0b0-b0b0-b0b0b0b0b0b0 +c0c0c0c0-c0c0-c0c0-c0c0-c0c0c0c0c0c0 +d0d0d0d0-d0d0-d0d0-d0d0-d0d0d0d0d0d0 +e0e0e0e0-e0e0-e0e0-e0e0-e0e0e0e0e0e0 +f0f0f0f0-f0f0-f0f0-f0f0-f0f0f0f0f0f0 +xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +-xxxx-xxxx-xxxx-xxxxxxxxxxxx diff --git a/pkg/detectors/words.txt b/pkg/detectors/fp_words.txt similarity index 100% rename from pkg/detectors/words.txt rename to pkg/detectors/fp_words.txt diff --git a/pkg/detectors/ftp/ftp.go b/pkg/detectors/ftp/ftp.go index 02d99cc41389..fc4a30382a4c 100644 --- a/pkg/detectors/ftp/ftp.go +++ b/pkg/detectors/ftp/ftp.go @@ -103,8 +103,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result return results, nil } +var ftpFalsePositives = map[detectors.FalsePositive]struct{}{ + detectors.FalsePositive("@ftp.freebsd.org"): {}, +} + func (s Scanner) IsFalsePositive(result detectors.Result) (bool, string) { - return detectors.IsKnownFalsePositive(string(result.Raw), []detectors.FalsePositive{"@ftp.freebsd.org"}, false) + return detectors.IsKnownFalsePositive(string(result.Raw), ftpFalsePositives, false) } func isErrDeterminate(e error) bool { diff --git a/pkg/detectors/github/v1/github_old.go b/pkg/detectors/github/v1/github_old.go index 53a41478be4f..2ec2a8a564df 100644 --- a/pkg/detectors/github/v1/github_old.go +++ b/pkg/detectors/github/v1/github_old.go @@ -58,6 +58,10 @@ func (s Scanner) Keywords() []string { return []string{"github", "gh", "pat", "token"} } +var ghFalsePositives = map[detectors.FalsePositive]struct{}{ + detectors.FalsePositive("github commit"): {}, +} + // FromData will find and optionally verify GitHub secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -74,8 +78,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result // Note that this false positive check happens **before** verification! I don't know why it's written this way // but that's why this logic wasn't moved into a CustomFalsePositiveChecker implementation. - specificFPs := []detectors.FalsePositive{"github commit"} - if isFp, _ := detectors.IsKnownFalsePositive(token, specificFPs, false); isFp { + if isFp, _ := detectors.IsKnownFalsePositive(token, ghFalsePositives, false); isFp { continue }