Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Gitlab inclusion globbing #3500

Merged
merged 7 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ var (
gitlabScanToken = gitlabScan.Flag("token", "GitLab token. Can be provided with environment variable GITLAB_TOKEN.").Envar("GITLAB_TOKEN").Required().String()
gitlabScanIncludePaths = gitlabScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
gitlabScanExcludePaths = gitlabScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
gitlabScanIncludeRepos = gitlabScan.Flag("include-repos", `Repositories to include in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/trufflehog", "trufflesecurity/t*"`).Strings()
gitlabScanExcludeRepos = gitlabScan.Flag("exclude-repos", `Repositories to exclude in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/driftwood", "trufflesecurity/d*"`).Strings()

filesystemScan = cli.Command("filesystem", "Find credentials in a filesystem.")
filesystemPaths = filesystemScan.Arg("path", "Path to file or directory to scan.").Strings()
Expand Down Expand Up @@ -674,10 +676,12 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
}

cfg := sources.GitlabConfig{
Endpoint: *gitlabScanEndpoint,
Token: *gitlabScanToken,
Repos: *gitlabScanRepos,
Filter: filter,
Endpoint: *gitlabScanEndpoint,
Token: *gitlabScanToken,
Repos: *gitlabScanRepos,
IncludeRepos: *gitlabScanIncludeRepos,
ExcludeRepos: *gitlabScanExcludeRepos,
Filter: filter,
}
if err := eng.ScanGitLab(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan GitLab: %v", err)
Expand Down
8 changes: 8 additions & 0 deletions pkg/engine/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) error {
connection.Repositories = c.Repos
}

if len(c.IncludeRepos) > 0 {
connection.IncludeRepos = c.IncludeRepos
}

if len(c.ExcludeRepos) > 0 {
connection.IgnoreRepos = c.ExcludeRepos
}

var conn anypb.Any
err := anypb.MarshalFrom(&conn, connection, proto.MarshalOptions{})
if err != nil {
Expand Down
946 changes: 478 additions & 468 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

105 changes: 75 additions & 30 deletions pkg/sources/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ type Source struct {
jobID sources.JobID
verify bool

authMethod string
user string
password string
token string
url string
repos []string
ignoreRepos []string
authMethod string
user string
password string
token string
url string
repos []string
ignoreRepos []string
includeRepos []string

useCustomContentWriter bool
git *git.Git
Expand Down Expand Up @@ -82,6 +83,56 @@ func (s *Source) JobID() sources.JobID {
return s.jobID
}

// globRepoFilter is a wrapper around cache.Cache that filters out repos
// based on include and exclude globs.
type globRepoFilter struct {
include, exclude []glob.Glob
}

func newGlobRepoFilter(include, exclude []string, onCompileErr func(err error, pattern string)) *globRepoFilter {
includeGlobs := make([]glob.Glob, 0, len(include))
excludeGlobs := make([]glob.Glob, 0, len(exclude))
for _, ig := range include {
g, err := glob.Compile(ig)
if err != nil {
onCompileErr(err, ig)
continue
}
includeGlobs = append(includeGlobs, g)
}
for _, eg := range exclude {
g, err := glob.Compile(eg)
if err != nil {
onCompileErr(err, eg)
continue
}
excludeGlobs = append(excludeGlobs, g)
}
return &globRepoFilter{include: includeGlobs, exclude: excludeGlobs}
}

func (c *globRepoFilter) ignoreRepo(s string) bool {
for _, g := range c.exclude {
if g.Match(s) {
return true
}
}
return false
}

func (c *globRepoFilter) includeRepo(s string) bool {
if len(c.include) == 0 {
return true
}

for _, g := range c.include {
if g.Match(s) {
return true
}
}
return false
}

// Init returns an initialized Gitlab source.
func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
s.name = name
Expand All @@ -101,9 +152,12 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
return fmt.Errorf("error unmarshalling connection: %w", err)
}

s.repos = conn.Repositories
s.ignoreRepos = conn.IgnoreRepos
s.repos = conn.GetRepositories()
s.ignoreRepos = conn.GetIgnoreRepos()
s.includeRepos = conn.GetIncludeRepos()

ctx.Logger().V(3).Info("setting ignore repos patterns", "patterns", s.ignoreRepos)
ctx.Logger().V(3).Info("setting include repos patterns", "patterns", s.includeRepos)

switch cred := conn.GetCredential().(type) {
case *sourcespb.GitLab_Token:
Expand Down Expand Up @@ -192,8 +246,8 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
// Get all repos if not specified.
if len(repos) == 0 {
ctx.Logger().Info("no repositories configured, enumerating")
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
ignoreRepo := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile include/exclude repo glob", "glob", pattern)
})
reporter := sources.VisitorReporter{
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
Expand Down Expand Up @@ -316,8 +370,8 @@ func (s *Source) Validate(ctx context.Context) []error {
return errs
}

ignoreProject := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
errs = append(errs, fmt.Errorf("could not compile ignore repo pattern %q: %w", pattern, err))
ignoreProject := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
errs = append(errs, fmt.Errorf("could not compile include/exclude repo pattern %q: %w", pattern, err))
})

// Query GitLab for the list of configured repos.
Expand Down Expand Up @@ -646,23 +700,14 @@ func (s *Source) WithScanOptions(scanOptions *git.ScanOptions) {
s.scanOptions = scanOptions
}

func buildIgnorer(patterns []string, onCompileErr func(err error, pattern string)) func(repo string) bool {
var globs []glob.Glob
func buildIgnorer(include, exclude []string, onCompile func(err error, pattern string)) func(repo string) bool {

for _, pattern := range patterns {
g, err := glob.Compile(pattern)
if err != nil {
onCompileErr(err, pattern)
continue
}
globs = append(globs, g)
}
// compile and load globRepoFilter
globRepoFilter := newGlobRepoFilter(include, exclude, onCompile)

f := func(repo string) bool {
for _, g := range globs {
if g.Match(repo) {
return true
}
if !globRepoFilter.includeRepo(repo) || globRepoFilter.ignoreRepo(repo) {
return true
}
return false
}
Expand Down Expand Up @@ -765,10 +810,10 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e
}

// Otherwise, enumerate all repos.
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
ignoreRepo := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile include/exclude repo glob", "glob", pattern)
// TODO: Handle error returned from UnitErr.
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile ignore repo glob: %w", err))
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile include/exclude repo glob: %w", err))
})
return s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter)
}
Expand Down
127 changes: 127 additions & 0 deletions pkg/sources/gitlab/gitlab_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,24 @@ func TestSource_Validate(t *testing.T) {
},
wantErrCount: 2,
},

{
name: "could not compile include glob(s)",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{
"tes1188/*-gitlab",
"[", // glob doesn't compile
"[a-]", // glob doesn't compile
},
IgnoreRepos: []string{
"[",
},
},
wantErrCount: 3,
},
{
name: "repositories do not exist or are not accessible",
connection: &sourcespb.GitLab{
Expand Down Expand Up @@ -444,3 +462,112 @@ func TestSource_Chunks_TargetedScan(t *testing.T) {
})
}
}

func TestSource_InclusionGlobbing(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()

secret, err := common.GetTestSecret(ctx)
if err != nil {
t.Fatal(fmt.Errorf("failed to access secret: %v", err))
}

token := secret.MustGetField("GITLAB_TOKEN")

tests := []struct {
name string
connection *sourcespb.GitLab
wantReposScanned int
wantErrCount int
}{
{
name: "Get all Repos",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{"*"},
IgnoreRepos: nil,
},
wantReposScanned: 6,
wantErrCount: 0,
},
{
name: "Ignore testy repo, include all others",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{"*"},
IgnoreRepos: []string{"*testy*"},
},
wantReposScanned: 5,
wantErrCount: 0,
},
{
name: "Ignore all repos",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: nil,
IgnoreRepos: []string{"*"},
},
wantReposScanned: 0,
wantErrCount: 0,
},
{
name: "Ignore all repos, but glob doesn't compile",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{
"[", // glob doesn't compile
"[a-]", // glob doesn't compile
},
IgnoreRepos: []string{
"*", // ignore all repos
"[", // glob doesn't compile
},
},
wantReposScanned: 0,
wantErrCount: 3,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

src := &Source{}
conn, err := anypb.New(tt.connection)
assert.NoError(t, err)

err = src.Init(ctx, tt.name, 0, 0, false, conn, 1)
assert.NoError(t, err)

// Query GitLab for the list of configured repos.
var repos []string
visitor := sources.VisitorReporter{
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
id, _ := unit.SourceUnitID()
repos = append(repos, id)
return nil
},
}
apiClient, err := src.newClient()
assert.NoError(t, err)

var errs []error
ignoreRepo := buildIgnorer(ctx, src.includeRepos, src.ignoreRepos, func(err error, pattern string) {
errs = append(errs, err)
})
err = src.getAllProjectRepos(ctx, apiClient, ignoreRepo, visitor)
assert.NoError(t, err)

assert.Equal(t, tt.wantErrCount, len(errs))
assert.Equal(t, tt.wantReposScanned, len(repos))

})
}
}
4 changes: 4 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ type GitlabConfig struct {
Filter *common.Filter
// SkipBinaries allows skipping binary files from the scan.
SkipBinaries bool
// IncludeRepos is a list of repositories to include in the scan.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a list of repositories or repository glob patterns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

List of repository glob patterns.

IncludeRepos []string
// ExcludeRepos is a list of repositories to exclude from the scan.
ExcludeRepos []string
}

// FilesystemConfig defines the optional configuration for a filesystem source.
Expand Down
1 change: 1 addition & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ message GitLab {
repeated string ignore_repos = 6;
bool skip_binaries = 7;
bool skip_archives = 8;
repeated string include_repos = 9;
}

message GitHub {
Expand Down
Loading