From 3e5741c7333df706ce8eda793e9cddb87a16580e Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Fri, 25 Oct 2024 19:25:53 -0400 Subject: [PATCH] feat(sumologic): update detector --- pkg/detectors/sumologickey/sumologickey.go | 163 +++++++++++++----- .../sumologickey_integration_test.go | 121 +++++++++++++ .../sumologickey/sumologickey_test.go | 148 ++++++---------- 3 files changed, 297 insertions(+), 135 deletions(-) create mode 100644 pkg/detectors/sumologickey/sumologickey_integration_test.go diff --git a/pkg/detectors/sumologickey/sumologickey.go b/pkg/detectors/sumologickey/sumologickey.go index 3dc310edd117..fbda29d42a48 100644 --- a/pkg/detectors/sumologickey/sumologickey.go +++ b/pkg/detectors/sumologickey/sumologickey.go @@ -2,88 +2,165 @@ package sumologickey import ( "context" - b64 "encoding/base64" "fmt" - regexp "github.com/wasilibs/go-re2" + "io" "net/http" "strings" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + + regexp "github.com/wasilibs/go-re2" + "golang.org/x/exp/maps" ) -type Scanner struct{ +type Scanner struct { + client *http.Client + detectors.EndpointSetter detectors.DefaultMultiPartCredentialProvider } // Ensure the Scanner satisfies the interface at compile time. -var _ detectors.Detector = (*Scanner)(nil) +var ( + _ detectors.Detector = (*Scanner)(nil) + _ detectors.EndpointCustomizer = (*Scanner)(nil) +) var ( - client = common.SaneHttpClient() + defaultClient = common.SaneHttpClient() + + // Detect which instance the key is associated with. + // https://help.sumologic.com/docs/api/getting-started/#documentation + urlPat = regexp.MustCompile(`(?i)api\.(?:au|ca|de|eu|fed|jp|kr|in|us2)\.sumologic\.com`) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo"}) + `\b([A-Za-z0-9]{14})\b`) - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo"}) + `\b([A-Za-z0-9]{64})\b`) + idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo", "accessId"}) + `\b(su[A-Za-z0-9]{12})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo", "accessKey"}) + `\b([A-Za-z0-9]{64})\b`) ) // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. func (s Scanner) Keywords() []string { - return []string{"sumologic"} + return []string{"sumo", "accessId", "accessKey"} } +// Default US API endpoint. +func (Scanner) CloudEndpoint() string { return "api.sumologic.com" } + // FromData will find and optionally verify SumoLogicKey secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - idMatches := idPat.FindAllStringSubmatch(dataStr, -1) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - for _, idMatch := range idMatches { - if len(idMatch) != 2 { - continue - } - resIdMatch := strings.TrimSpace(idMatch[1]) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_SumoLogicKey, - Raw: []byte(resMatch), - ExtraData: map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/sumologic/", - }, - } + idMatches := make(map[string]struct{}) + for _, match := range idPat.FindAllStringSubmatch(dataStr, -1) { + idMatches[match[1]] = struct{}{} + } + keyMatches := make(map[string]struct{}) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + keyMatches[match[1]] = struct{}{} + } + endpointMatches := make(map[string]struct{}) + for _, match := range urlPat.FindAllStringSubmatch(dataStr, -1) { + endpointMatches[match[0]] = struct{}{} + } + endpoints := s.Endpoints(maps.Keys(endpointMatches)...) + + for accessKey := range keyMatches { + var ( + r *detectors.Result + accessId string + apiEndpoint string + ) + + IdLoop: + for id := range idMatches { + accessId = id + + for _, e := range endpoints { + apiEndpoint = e + + if verify { + client := s.client + if client == nil { + client = defaultClient + } - if verify { - data := fmt.Sprintf("%s:%s", resIdMatch, resMatch) - encoded := b64.StdEncoding.EncodeToString([]byte(data)) - req, err := http.NewRequestWithContext(ctx, "GET", "https://api.us2.sumologic.com/api/v1/users", nil) - if err != nil { - continue - } - req.Header.Add("Authorization", fmt.Sprintf("Basic %s", encoded)) - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true + isVerified, verificationErr := verifyMatch(ctx, client, apiEndpoint, accessId, accessKey) + if isVerified || (len(idMatches) == 1 && len(endpoints) == 1) { + r = createResult(accessId, accessKey, apiEndpoint, isVerified, verificationErr) + break IdLoop } } } - - results = append(results, s1) } + if r == nil { + // Only include the accessId if we're confident which one it is. + if len(idMatches) != 1 { + accessId = "" + } + r = createResult(accessId, accessKey, apiEndpoint, false, nil) + } + results = append(results, *r) } return results, nil } +func verifyMatch(ctx context.Context, client *http.Client, endpoint string, id string, key string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("https://%s/api/v1/users", endpoint), nil) + if err != nil { + return false, nil + } + + req.SetBasicAuth(id, key) + res, err := client.Do(req) + if err != nil { + return false, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + switch res.StatusCode { + case http.StatusOK: + // If the endpoint returns useful information, we can return it as a map. + return true, nil + case http.StatusUnauthorized: + // The secret is determinately not verified (nothing to do) + return false, nil + default: + return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + } +} + +func createResult(accessId string, accessKey string, endpoint string, verified bool, err error) *detectors.Result { + r := &detectors.Result{ + DetectorType: detectorspb.DetectorType_SumoLogicKey, + Raw: []byte(accessKey), + Verified: verified, + ExtraData: map[string]string{ + "rotation_guide": "https://howtorotate.com/docs/tutorials/sumologic/", + }, + } + r.SetVerificationError(err, accessKey) + + // |endpoint| and |accessId| won't be specified unless there's a confident match. + if endpoint != "" && accessId != "" { + var sb strings.Builder + sb.WriteString(`{`) + sb.WriteString(`"url":"` + endpoint + `"`) + sb.WriteString(`,"accessId":"` + accessId + `"`) + sb.WriteString(`,"accessKey":"` + accessKey + `"`) + sb.WriteString(`}`) + r.RawV2 = []byte(sb.String()) + } + + return r +} + func (s Scanner) Type() detectorspb.DetectorType { return detectorspb.DetectorType_SumoLogicKey } diff --git a/pkg/detectors/sumologickey/sumologickey_integration_test.go b/pkg/detectors/sumologickey/sumologickey_integration_test.go new file mode 100644 index 000000000000..e95e34041d37 --- /dev/null +++ b/pkg/detectors/sumologickey/sumologickey_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package sumologickey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestSumoLogicKey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + id := testSecrets.MustGetField("SUMOLOGIC_ACCESSID") + secret := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY") + inactiveId := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s", id, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SumoLogicKey, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s but not valid", inactiveId, secret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SumoLogicKey, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("SumoLogicKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("SumoLogicKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/sumologickey/sumologickey_test.go b/pkg/detectors/sumologickey/sumologickey_test.go index e95e34041d37..29f6b46c7846 100644 --- a/pkg/detectors/sumologickey/sumologickey_test.go +++ b/pkg/detectors/sumologickey/sumologickey_test.go @@ -1,121 +1,85 @@ -//go:build detectors -// +build detectors - package sumologickey import ( "context" - "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/google/go-cmp/cmp" ) -func TestSumoLogicKey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - id := testSecrets.MustGetField("SUMOLOGIC_ACCESSID") - secret := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY") - inactiveId := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY_INACTIVE") - - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestSumoLogicKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s", id, secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_SumoLogicKey, - Verified: true, - }, - }, - wantErr: false, + name: "typical pattern", + input: `sumologic: + accessId: suDkVYKjXZAwsz + accessKey: Khk3i2ugMxMgkb8bIA2auj4I8juZ3HiimDNssjzYdGqfizPZcxHK70a0LckgRSCL + clusterName: Kubernetes_cluster-2024-10-25T21:34:23.096Z`, + want: []string{"Khk3i2ugMxMgkb8bIA2auj4I8juZ3HiimDNssjzYdGqfizPZcxHK70a0LckgRSCL"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s but not valid", inactiveId, secret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_SumoLogicKey, - Verified: false, - }, - }, - wantErr: false, + name: "finds all matches", + input: `sumoId1 = 'suaRYt6iLL8cxl' +sumoKey1 = 'CzrMhR8zzy1eH1F0XlY1tu5ywqa2yaSFoWGg2cqE43XkfnUVCytnPQfv1enUYrzv' +sumoId2 = 'suDkVYKjXZBwsz' +sumoKey2 = 'Khk3i2ugMxMgkb8bIA2auj4I8juZ3HiimDNssjzYdGqfizPZcxHK21a0LckgRSCL'`, + want: []string{"CzrMhR8zzy1eH1F0XlY1tu5ywqa2yaSFoWGg2cqE43XkfnUVCytnPQfv1enUYrzv", "Khk3i2ugMxMgkb8bIA2auj4I8juZ3HiimDNssjzYdGqfizPZcxHK21a0LckgRSCL"}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invald pattern", + input: "sumoId = 'doDkVYKjXZAwsz'", + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("SumoLogicKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("SumoLogicKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } }