Skip to content

Commit

Permalink
feat: add code-search repository selection flag
Browse files Browse the repository at this point in the history
(GitHub only) Introduces the `--code-search` flag that allows the
provision of GitHub Code Search terms (legacy) to define the selection
of repositories to be visited.

The repository list will be the unique set of repositories referenced in
the code search results.
  • Loading branch information
jamestelfer committed Oct 25, 2023
1 parent 7da2928 commit a024ba5
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 2 deletions.
7 changes: 5 additions & 2 deletions cmd/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func configurePlatform(cmd *cobra.Command) {
flags.StringP("username", "u", "", "The Bitbucket server username.")
flags.StringP("token", "T", "", "The personal access token for the targeting platform. Can also be set using the GITHUB_TOKEN/GITLAB_TOKEN/GITEA_TOKEN/BITBUCKET_SERVER_TOKEN environment variable.")

flags.StringP("code-search", "", "", "Use a code search to find a set of repositories to target. Repeated results from a given repository will be ignored.")
flags.StringSliceP("org", "O", nil, "The name of a GitHub organization. All repositories in that organization will be used.")
flags.StringSliceP("group", "G", nil, "The name of a GitLab organization. All repositories in that group will be used.")
flags.StringSliceP("user", "U", nil, "The name of a user. All repositories owned by that user will be used.")
Expand Down Expand Up @@ -120,6 +121,7 @@ func getVersionController(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (

func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (multigitter.VersionController, error) {
gitBaseURL, _ := flag.GetString("base-url")
codeSearch, _ := flag.GetString("code-search")
orgs, _ := flag.GetStringSlice("org")
users, _ := flag.GetStringSlice("user")
repos, _ := flag.GetStringSlice("repo")
Expand All @@ -130,8 +132,8 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu
sshAuth, _ := flag.GetBool("ssh-auth")
skipForks, _ := flag.GetBool("skip-forks")

if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" {
return nil, errors.New("no organization, user, repo or repo-search set")
if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" && codeSearch == "" {
return nil, errors.New("no organization, user, repo, repo-search or code-search set")
}

token, err := getToken(flag)
Expand Down Expand Up @@ -166,6 +168,7 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu
BaseURL: gitBaseURL,
TransportMiddleware: http.NewLoggingRoundTripper,
RepoListing: github.RepositoryListing{
CodeSearch: codeSearch,
Organizations: orgs,
Users: users,
Repositories: repoRefs,
Expand Down
77 changes: 77 additions & 0 deletions internal/scm/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type Github struct {

// RepositoryListing contains information about which repositories that should be fetched
type RepositoryListing struct {
CodeSearch string
Organizations []string
Users []string
Repositories []RepositoryReference
Expand Down Expand Up @@ -222,6 +223,14 @@ func (g *Github) getRepositories(ctx context.Context) ([]*github.Repository, err
allRepos = append(allRepos, repos...)
}

if len(g.CodeSearch) > 0 {
repos, err := g.getCodeSearchRepositories(ctx, g.CodeSearch)
if err != nil {
return nil, errors.Wrapf(err, "could not get code search results for '%s'", g.CodeSearch)
}
allRepos = append(allRepos, repos...)
}

// Remove duplicate repos
repoMap := map[string]*github.Repository{}
for _, repo := range allRepos {
Expand Down Expand Up @@ -333,6 +342,74 @@ func (g *Github) getSearchRepositories(ctx context.Context, search string) ([]*g
return repos, nil
}

func (g *Github) getCodeSearchRepositories(ctx context.Context, search string) ([]*github.Repository, error) {
resultRepos := make(map[string]RepositoryReference)

i := 1
for {
rr, _, err := retry(ctx, func() ([]*github.CodeResult, *github.Response, error) {
// Include forks in the search, same as repository searches
query := "fork:true " + search
rr, resp, err := g.ghClient.Search.Code(ctx, query, &github.SearchOptions{
ListOptions: github.ListOptions{
Page: i,
PerPage: 100,
},
})

if err != nil {
return nil, nil, err
}

if rr.IncompleteResults != nil && *rr.IncompleteResults {
// can occur when search times out on the server: for now, fail instead
// of handling the issue
return nil, nil, fmt.Errorf("search results incomplete")
}

return rr.CodeResults, resp, nil
})

if err != nil {
return nil, err
}

for _, r := range rr {
repo := r.Repository

resultRepos[repo.GetFullName()] = RepositoryReference{
OwnerName: repo.GetOwner().GetLogin(),
Name: repo.GetName(),
}
}

if len(rr) != 100 {
break
}
i++
}

// Code search does not return full details (like permissions). So for each
// repo discovered, we have to query it again.

repoNames := mapValues(resultRepos)
return g.getAllRepositories(ctx, repoNames)
}

func (g *Github) getAllRepositories(ctx context.Context, repoRefs []RepositoryReference) ([]*github.Repository, error) {
var repos []*github.Repository

for _, ref := range repoRefs {
r, err := g.getRepository(ctx, ref)
if err != nil {
return nil, err
}
repos = append(repos, r)
}

return repos, nil
}

func (g *Github) getRepository(ctx context.Context, repoRef RepositoryReference) (*github.Repository, error) {
repo, _, err := retry(ctx, func() (*github.Repository, *github.Response, error) {
return g.ghClient.Repositories.Get(ctx, repoRef.OwnerName, repoRef.Name)
Expand Down
14 changes: 14 additions & 0 deletions internal/scm/github/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,17 @@ func chunkSlice[T any](stack []T, chunkSize int) [][]T {

return append(chunks, stack)
}

// mapValues returns a new array containing all the values of the supplied map,
// in iteration (i.e. non-deterministic) order.
func mapValues[K comparable, V any](source map[K]V) []V {
values := make([]V, len(source))

i := 0
for _, v := range source {
values[i] = v
i++
}

return values
}

0 comments on commit a024ba5

Please sign in to comment.