feat(sumologic): update detector (#3511)

trufflesecurity · Nov 7, 2024 · d421a94 · d421a94
1 parent 3077215
commit d421a94
Show file tree

Hide file tree

Showing 3 changed files with 310 additions and 135 deletions.
diff --git a/pkg/detectors/sumologickey/sumologickey.go b/pkg/detectors/sumologickey/sumologickey.go
@@ -2,88 +2,169 @@ package sumologickey
 
 import (
 	"context"
-	b64 "encoding/base64"
 	"fmt"
-	regexp "github.com/wasilibs/go-re2"
+	"io"
 	"net/http"
 	"strings"
 
 	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
+
+	regexp "github.com/wasilibs/go-re2"
 )
 
-type Scanner struct{
+type Scanner struct {
+	client *http.Client
+	detectors.EndpointSetter
 	detectors.DefaultMultiPartCredentialProvider
 }
 
 // Ensure the Scanner satisfies the interface at compile time.
-var _ detectors.Detector = (*Scanner)(nil)
+var (
+	_ detectors.Detector           = (*Scanner)(nil)
+	_ detectors.EndpointCustomizer = (*Scanner)(nil)
+)
 
 var (
-	client = common.SaneHttpClient()
+	defaultClient = common.SaneHttpClient()
+
+	// Detect which instance the key is associated with.
+	// https://help.sumologic.com/docs/api/getting-started/#documentation
+	urlPat = regexp.MustCompile(`(?i)api\.(?:au|ca|de|eu|fed|jp|kr|in|us2)\.sumologic\.com`)
 
 	// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
-	idPat  = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo"}) + `\b([A-Za-z0-9]{14})\b`)
-	keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo"}) + `\b([A-Za-z0-9]{64})\b`)
+	idPat  = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo", "accessId"}) + `\b(su[A-Za-z0-9]{12})\b`)
+	keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sumo", "accessKey"}) + `\b([A-Za-z0-9]{64})\b`)
 )
 
 // Keywords are used for efficiently pre-filtering chunks.
 // Use identifiers in the secret preferably, or the provider name.
 func (s Scanner) Keywords() []string {
-	return []string{"sumologic"}
+	return []string{"sumo", "accessId", "accessKey"}
 }
 
+// Default US API endpoint.
+func (Scanner) CloudEndpoint() string { return "api.sumologic.com" }
+
 // FromData will find and optionally verify SumoLogicKey secrets in a given set of bytes.
 func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
 	dataStr := string(data)
-	idMatches := idPat.FindAllStringSubmatch(dataStr, -1)
-	matches := keyPat.FindAllStringSubmatch(dataStr, -1)
 
-	for _, idMatch := range idMatches {
-		if len(idMatch) != 2 {
-			continue
-		}
-		resIdMatch := strings.TrimSpace(idMatch[1])
-		for _, match := range matches {
-			if len(match) != 2 {
-				continue
-			}
-			resMatch := strings.TrimSpace(match[1])
-
-			s1 := detectors.Result{
-				DetectorType: detectorspb.DetectorType_SumoLogicKey,
-				Raw:          []byte(resMatch),
-				ExtraData: map[string]string{
-					"rotation_guide": "https://howtorotate.com/docs/tutorials/sumologic/",
-				},
-			}
+	idMatches := make(map[string]struct{})
+	for _, match := range idPat.FindAllStringSubmatch(dataStr, -1) {
+		idMatches[match[1]] = struct{}{}
+	}
+	keyMatches := make(map[string]struct{})
+	for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
+		keyMatches[match[1]] = struct{}{}
+	}
+	endpointMatches := make(map[string]struct{})
+	for _, match := range urlPat.FindAllStringSubmatch(dataStr, -1) {
+		endpointMatches[match[0]] = struct{}{}
+	}
+	if len(endpointMatches) == 0 {
+		endpointMatches[s.CloudEndpoint()] = struct{}{}
+	}
 
-			if verify {
-				data := fmt.Sprintf("%s:%s", resIdMatch, resMatch)
-				encoded := b64.StdEncoding.EncodeToString([]byte(data))
-				req, err := http.NewRequestWithContext(ctx, "GET", "https://api.us2.sumologic.com/api/v1/users", nil)
-				if err != nil {
-					continue
-				}
-				req.Header.Add("Authorization", fmt.Sprintf("Basic %s", encoded))
-				res, err := client.Do(req)
-				if err == nil {
-					defer res.Body.Close()
-					if res.StatusCode >= 200 && res.StatusCode < 300 {
-						s1.Verified = true
+	for accessKey := range keyMatches {
+		var (
+			r           *detectors.Result
+			accessId    string
+			apiEndpoint string
+		)
+
+		for id := range idMatches {
+			accessId = id
+
+			for e := range endpointMatches {
+				apiEndpoint = e
+
+				if verify {
+					client := s.client
+					if client == nil {
+						client = defaultClient
+					}
+
+					isVerified, verificationErr := verifyMatch(ctx, client, apiEndpoint, accessId, accessKey)
+					if isVerified {
+						r = createResult(accessId, accessKey, apiEndpoint, isVerified, verificationErr)
 					}
 				}
 			}
-
-			results = append(results, s1)
 		}
 
+		if r == nil {
+			// Only include the accessId if we're confident which one it is.
+			if len(idMatches) != 1 {
+				accessId = ""
+			}
+			if len(endpointMatches) != 1 || apiEndpoint == s.CloudEndpoint() {
+				apiEndpoint = ""
+			}
+			r = createResult(accessId, accessKey, apiEndpoint, false, nil)
+		}
+		results = append(results, *r)
 	}
 
 	return results, nil
 }
 
+func verifyMatch(ctx context.Context, client *http.Client, endpoint string, id string, key string) (bool, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("https://%s/api/v1/users", endpoint), nil)
+	if err != nil {
+		return false, nil
+	}
+
+	req.SetBasicAuth(id, key)
+	res, err := client.Do(req)
+	if err != nil {
+		return false, err
+	}
+	defer func() {
+		_, _ = io.Copy(io.Discard, res.Body)
+		_ = res.Body.Close()
+	}()
+
+	switch res.StatusCode {
+	case http.StatusOK:
+		// If the endpoint returns useful information, we can return it as a map.
+		return true, nil
+	case http.StatusUnauthorized:
+		// The secret is determinately not verified (nothing to do)
+		return false, nil
+	default:
+		return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
+	}
+}
+
+func createResult(accessId string, accessKey string, endpoint string, verified bool, err error) *detectors.Result {
+	r := &detectors.Result{
+		DetectorType: detectorspb.DetectorType_SumoLogicKey,
+		Raw:          []byte(accessKey),
+		Verified:     verified,
+		ExtraData: map[string]string{
+			"rotation_guide": "https://howtorotate.com/docs/tutorials/sumologic/",
+		},
+	}
+	r.SetVerificationError(err, accessKey)
+
+	// |endpoint| and |accessId| won't be specified unless there's a confident match.
+	if accessId != "" {
+		var sb strings.Builder
+		sb.WriteString(`{`)
+		sb.WriteString(`"accessId":"` + accessId + `"`)
+		sb.WriteString(`,"accessKey":"` + accessKey + `"`)
+		if endpoint != "" {
+			sb.WriteString(`,"url":"` + endpoint + `"`)
+		}
+		sb.WriteString(`}`)
+		r.RawV2 = []byte(sb.String())
+	}
+
+	return r
+}
+
 func (s Scanner) Type() detectorspb.DetectorType {
 	return detectorspb.DetectorType_SumoLogicKey
 }

diff --git a/pkg/detectors/sumologickey/sumologickey_integration_test.go b/pkg/detectors/sumologickey/sumologickey_integration_test.go
@@ -0,0 +1,121 @@
+//go:build detectors
+// +build detectors
+
+package sumologickey
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/kylelemons/godebug/pretty"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
+
+	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
+)
+
+func TestSumoLogicKey_FromChunk(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
+	defer cancel()
+	testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3")
+	if err != nil {
+		t.Fatalf("could not get test secrets from GCP: %s", err)
+	}
+	id := testSecrets.MustGetField("SUMOLOGIC_ACCESSID")
+	secret := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY")
+	inactiveId := testSecrets.MustGetField("SUMOLOGIC_ACCESSKEY_INACTIVE")
+
+	type args struct {
+		ctx    context.Context
+		data   []byte
+		verify bool
+	}
+	tests := []struct {
+		name    string
+		s       Scanner
+		args    args
+		want    []detectors.Result
+		wantErr bool
+	}{
+		{
+			name: "found, verified",
+			s:    Scanner{},
+			args: args{
+				ctx:    context.Background(),
+				data:   []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s", id, secret)),
+				verify: true,
+			},
+			want: []detectors.Result{
+				{
+					DetectorType: detectorspb.DetectorType_SumoLogicKey,
+					Verified:     true,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "found, unverified",
+			s:    Scanner{},
+			args: args{
+				ctx:    context.Background(),
+				data:   []byte(fmt.Sprintf("You can find a sumologickey secret %s within %s but not valid", inactiveId, secret)), // the secret would satisfy the regex but not pass validation
+				verify: true,
+			},
+			want: []detectors.Result{
+				{
+					DetectorType: detectorspb.DetectorType_SumoLogicKey,
+					Verified:     false,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "not found",
+			s:    Scanner{},
+			args: args{
+				ctx:    context.Background(),
+				data:   []byte("You cannot find the secret within"),
+				verify: true,
+			},
+			want:    nil,
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			s := Scanner{}
+			got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("SumoLogicKey.FromData() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			for i := range got {
+				if len(got[i].Raw) == 0 {
+					t.Fatalf("no raw secret present: \n %+v", got[i])
+				}
+				got[i].Raw = nil
+			}
+			if diff := pretty.Compare(got, tt.want); diff != "" {
+				t.Errorf("SumoLogicKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
+			}
+		})
+	}
+}
+
+func BenchmarkFromData(benchmark *testing.B) {
+	ctx := context.Background()
+	s := Scanner{}
+	for name, data := range detectors.MustGetBenchmarkData() {
+		benchmark.Run(name, func(b *testing.B) {
+			b.ResetTimer()
+			for n := 0; n < b.N; n++ {
+				_, err := s.FromData(ctx, false, data)
+				if err != nil {
+					b.Fatal(err)
+				}
+			}
+		})
+	}
+}