Skip to content

Commit

Permalink
Add support for scanning APK files (#3517)
Browse files Browse the repository at this point in the history
* apk initial

* added a couple tests

* Update pkg/handlers/handlers.go

Co-authored-by: Richard Gomez <[email protected]>

* updating log status

* refactored + added new integration test

* updated test

* fixed linting error

* added keyword-value pairs during dex file scanning

* fixed test

* performance updates

* fixing decodeXML memory issue

* added feature flag

* refactored keyword matching + ahocorasick

* small updates

* refactored for dextk.WithReadCache()

---------

Co-authored-by: Richard Gomez <[email protected]>
  • Loading branch information
joeleonjr and rgmz authored Nov 15, 2024
1 parent a6e2b99 commit 8f2ebc9
Show file tree
Hide file tree
Showing 15 changed files with 795 additions and 26 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ require (
github.com/apache/arrow/go/v14 v14.0.2 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae // indirect
github.com/aws/smithy-go v1.20.1 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
Expand All @@ -181,6 +182,7 @@ require (
github.com/couchbase/goprotostellar v1.0.2 // indirect
github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/csnewman/dextk v0.3.0 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/danieljoos/wincred v1.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae h1:rDNramK9mnAbvUBJyIRZnzHchM45cXexHIX9pS9da4Q=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae/go.mod h1:GNvprXNmXaDjpHmN3RFxz5QdK5VXTUvmQludCbjoBy4=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
Expand Down Expand Up @@ -277,6 +279,8 @@ github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/crewjam/rfc5424 v0.1.0 h1:MSeXJm22oKovLzWj44AHwaItjIMUMugYGkEzfa831H8=
github.com/crewjam/rfc5424 v0.1.0/go.mod h1:RCi9M3xHVOeerf6ULZzqv2xOGRO/zYaVUeRyPnBW3gQ=
github.com/csnewman/dextk v0.3.0 h1:gigNZlZRNfCuARV7depunRlafEAzGhyvgBQo1FT3/0M=
github.com/csnewman/dextk v0.3.0/go.mod h1:FcDoI3258ea0KPQogyv4iazQRGcLFNOW+I4pHBUfNO0=
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/danieljoos/wincred v1.1.2 h1:QLdCxFs1/Yl4zduvBdcHB8goaYk9RARS2SgLLRuAyr0=
Expand Down
4 changes: 2 additions & 2 deletions hack/snifftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
Expand Down Expand Up @@ -247,7 +247,7 @@ func main() {

func getAllScanners() map[string]detectors.Detector {
allScanners := map[string]detectors.Detector{}
for _, s := range engine.DefaultDetectors() {
for _, s := range defaults.DefaultDetectors() {
secretType := reflect.Indirect(reflect.ValueOf(s)).Type().PkgPath()
path := strings.Split(secretType, "/")[len(strings.Split(secretType, "/"))-1]
allScanners[path] = s
Expand Down
6 changes: 5 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/config"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
Expand Down Expand Up @@ -409,6 +410,9 @@ func run(state overseer.State) {
feature.UserAgentSuffix.Store(*userAgentSuffix)
}

// OSS Default APK handling on
feature.EnableAPKHandler.Store(true)

conf := &config.Config{}
if *configFilename != "" {
var err error
Expand Down Expand Up @@ -461,7 +465,7 @@ func run(state overseer.State) {
// default detectors, which can be further filtered by the
// user. The filters are applied by the engine and are only
// subtractive.
Detectors: append(engine.DefaultDetectors(), conf.Detectors...),
Detectors: append(defaults.DefaultDetectors(), conf.Detectors...),
Verify: !*noVerification,
IncludeDetectors: *includeDetectors,
ExcludeDetectors: *excludeDetectors,
Expand Down
72 changes: 69 additions & 3 deletions pkg/engine/defaults.go → pkg/engine/defaults/defaults.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package engine
package defaults

import (
"bytes"
"strings"
"sync"

ahocorasick "github.com/BobuSumisu/aho-corasick"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abbysale"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abuseipdb"
Expand Down Expand Up @@ -811,8 +816,8 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func DefaultDetectors() []detectors.Detector {
detectorList := []detectors.Detector{
func buildDetectorList() []detectors.Detector {
return []detectors.Detector{
&abbysale.Scanner{},
// &abstract.Scanner{},
&abuseipdb.Scanner{},
Expand Down Expand Up @@ -1647,6 +1652,10 @@ func DefaultDetectors() []detectors.Detector {
&zonkafeedback.Scanner{},
&zulipchat.Scanner{},
}
}

func DefaultDetectors() []detectors.Detector {
detectorList := buildDetectorList()

// Automatically initialize all detectors that implement
// EndpointCustomizer and/or CloudProvider interfaces.
Expand Down Expand Up @@ -1675,3 +1684,60 @@ func DefaultDetectorTypesImplementing[T any]() map[detectorspb.DetectorType]stru
}
return out
}

func defaultDetectorKeywords() []string {
allDetectors := buildDetectorList()

// Remove keywords that cause lots of false positives.
var exclusions = []string{
"AKIA", "SG.", "pat", "token", "gh", "github", "sql", "database", "http", "key", "api-", "sdk-", "float", "-us", "gh", "pat", "token", "sid", "http", "private", "key", "segment", "close", "protocols", "verifier", "box", "privacy", "dm", "sl.", "vf", "flat",
}

var keywords []string
exclusionSet := make(map[string]struct{})
for _, excl := range exclusions {
exclusionSet[strings.ToLower(excl)] = struct{}{}
}

// Aggregate all keywords from detectors.
for _, detector := range allDetectors {
for _, kw := range detector.Keywords() {
kwLower := strings.ToLower(kw)
if _, excluded := exclusionSet[kwLower]; !excluded {
keywords = append(keywords, kwLower)
}
}
}
return keywords
}

// DefaultDetectorKeywordMatcher encapsulates the Aho-Corasick trie for keyword matching.
type DefaultDetectorKeywordMatcher struct {
mu sync.RWMutex
trie *ahocorasick.Trie
}

// NewDefaultDetectorKeywordMatcher creates a new DefaultDetectorKeywordMatcher.
func NewDefaultDetectorKeywordMatcher() *DefaultDetectorKeywordMatcher {
keywords := defaultDetectorKeywords()
return &DefaultDetectorKeywordMatcher{trie: ahocorasick.NewTrieBuilder().AddStrings(keywords).Build()}
}

// FindKeywords scans the input text and returns a slice of matched keywords.
func (km *DefaultDetectorKeywordMatcher) FindKeywords(text []byte) []string {
km.mu.RLock()
defer km.mu.RUnlock()

matches := km.trie.Match(bytes.ToLower(text))
found := make([]string, 0, len(matches))
seen := make(map[string]struct{}) // To avoid duplicate entries

for _, match := range matches {
keyword := match.MatchString()
if _, exists := seen[keyword]; !exists {
found = append(found, keyword)
seen[keyword] = struct{}{}
}
}
return found
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package engine
package defaults

import (
"testing"
Expand Down
9 changes: 5 additions & 4 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
"github.com/trufflesecurity/trufflehog/v3/pkg/output"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand Down Expand Up @@ -345,7 +346,7 @@ func (e *Engine) setDefaults(ctx context.Context) {

// Only use the default detectors if none are provided.
if len(e.detectors) == 0 {
e.detectors = DefaultDetectors()
e.detectors = defaults.DefaultDetectors()
}

if e.dispatcher == nil {
Expand Down Expand Up @@ -398,7 +399,7 @@ func parseCustomVerifierEndpoints(endpoints map[string]string) (map[config.Detec
return nil, fmt.Errorf("invalid verifier detector configuration id %v: %w", id, err)
}
// Extra check for endpoint customization.
isEndpointCustomizer := DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
isEndpointCustomizer := defaults.DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
for id := range customVerifierEndpoints {
if _, ok := isEndpointCustomizer[id.ID]; !ok {
return nil, fmt.Errorf("endpoint provided but detector does not support endpoint customization: %w", err)
Expand Down Expand Up @@ -435,7 +436,7 @@ func getWithDetectorID[T any](d detectors.Detector, data map[config.DetectorID]T
// verifyDetectorsAreVersioner checks all keys in a provided map to verify the
// provided type is actually a Versioner.
func verifyDetectorsAreVersioner[T any](data map[config.DetectorID]T) (config.DetectorID, error) {
isVersioner := DefaultDetectorTypesImplementing[detectors.Versioner]()
isVersioner := defaults.DefaultDetectorTypesImplementing[detectors.Versioner]()
for id := range data {
if id.Version == 0 {
// Version not provided.
Expand Down Expand Up @@ -564,7 +565,7 @@ func (e *Engine) GetDetectorsMetrics() map[string]time.Duration {
e.metrics.mu.RLock()
defer e.metrics.mu.RUnlock()

result := make(map[string]time.Duration, len(DefaultDetectors()))
result := make(map[string]time.Duration, len(defaults.DefaultDetectors()))
for detectorName, durations := range e.DetectorAvgTime() {
var total time.Duration
for _, d := range durations {
Expand Down
17 changes: 9 additions & 8 deletions pkg/engine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
Expand Down Expand Up @@ -259,7 +260,7 @@ func TestEngine_DuplicateSecrets(t *testing.T) {
conf := Config{
Concurrency: 1,
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sourceManager,
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down Expand Up @@ -360,7 +361,7 @@ even more`,
conf := Config{
Concurrency: 1,
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sourceManager,
Dispatcher: lineCapturer,
Expand Down Expand Up @@ -891,12 +892,12 @@ func TestLikelyDuplicate(t *testing.T) {
// Initialize detectors
// (not actually calling detector FromData or anything, just using detector struct for key creation)
detectorA := ahocorasick.DetectorMatch{
Key: ahocorasick.CreateDetectorKey(DefaultDetectors()[0]),
Detector: DefaultDetectors()[0],
Key: ahocorasick.CreateDetectorKey(defaults.DefaultDetectors()[0]),
Detector: defaults.DefaultDetectors()[0],
}
detectorB := ahocorasick.DetectorMatch{
Key: ahocorasick.CreateDetectorKey(DefaultDetectors()[1]),
Detector: DefaultDetectors()[1],
Key: ahocorasick.CreateDetectorKey(defaults.DefaultDetectors()[1]),
Detector: defaults.DefaultDetectors()[1],
}

// Define test cases
Expand Down Expand Up @@ -1037,7 +1038,7 @@ func TestFilterResults_CustomCleaner(t *testing.T) {
}

func BenchmarkPopulateMatchingDetectors(b *testing.B) {
allDetectors := DefaultDetectors()
allDetectors := defaults.DefaultDetectors()
ac := ahocorasick.NewAhoCorasickCore(allDetectors)

// Generate sample data with keywords from detectors.
Expand Down Expand Up @@ -1164,7 +1165,7 @@ func TestEngineInitializesCloudProviderDetectors(t *testing.T) {
ctx := context.Background()
conf := Config{
Concurrency: 1,
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sources.NewManager(),
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down
3 changes: 2 additions & 1 deletion pkg/engine/gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

Expand Down Expand Up @@ -72,7 +73,7 @@ func TestScanGCS(t *testing.T) {
conf := Config{
Concurrency: 1,
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sourceManager,
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down
5 changes: 3 additions & 2 deletions pkg/engine/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
Expand Down Expand Up @@ -73,7 +74,7 @@ func TestGitEngine(t *testing.T) {
conf := Config{
Concurrency: 1,
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: true,
SourceManager: sourceManager,
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down Expand Up @@ -135,7 +136,7 @@ func BenchmarkGitEngine(b *testing.B) {
conf := Config{
Concurrency: runtime.NumCPU(),
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sourceManager,
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down
3 changes: 2 additions & 1 deletion pkg/engine/postman_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/defaults"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

Expand Down Expand Up @@ -55,7 +56,7 @@ func TestPostmanEngine(t *testing.T) {
conf := Config{
Concurrency: 1,
Decoders: decoders.DefaultDecoders(),
Detectors: DefaultDetectors(),
Detectors: defaults.DefaultDetectors(),
Verify: false,
SourceManager: sourceManager,
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
Expand Down
1 change: 1 addition & 0 deletions pkg/feature/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ var (
ForceSkipBinaries atomic.Bool
ForceSkipArchives atomic.Bool
SkipAdditionalRefs atomic.Bool
EnableAPKHandler atomic.Bool
UserAgentSuffix AtomicString
)

Expand Down
Loading

0 comments on commit 8f2ebc9

Please sign in to comment.