From 846fb19724435f0c6465af44adf505ad6b2fc941 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Fri, 3 Mar 2023 00:50:12 +0530 Subject: [PATCH] Refactor githubrepo CheckRun logic (#2710) Signed-off-by: Azeem Shaikh --- clients/githubrepo/checkruns.go | 131 ++++++++++++++++++- clients/githubrepo/checkruns_e2e_test.go | 51 ++++++++ clients/githubrepo/client.go | 15 +-- clients/githubrepo/graphql.go | 153 ++--------------------- clients/githubrepo/graphql_e2e_test.go | 84 ++++++------- clients/githubrepo/repo.go | 8 ++ 6 files changed, 236 insertions(+), 206 deletions(-) create mode 100644 clients/githubrepo/checkruns_e2e_test.go diff --git a/clients/githubrepo/checkruns.go b/clients/githubrepo/checkruns.go index 5514762f086..6ed5c04d5b3 100644 --- a/clients/githubrepo/checkruns.go +++ b/clients/githubrepo/checkruns.go @@ -17,31 +17,152 @@ package githubrepo import ( "context" "fmt" + "strings" + "sync" "github.com/google/go-github/v38/github" + "github.com/shurcooL/githubv4" "github.com/ossf/scorecard/v4/clients" sce "github.com/ossf/scorecard/v4/errors" + "github.com/ossf/scorecard/v4/log" ) +//nolint:govet +type checkRunsGraphqlData struct { + Repository struct { + Object struct { + Commit struct { + History struct { + Nodes []struct { + AssociatedPullRequests struct { + Nodes []struct { + HeadRefOid githubv4.String + Commits struct { + Nodes []struct { + Commit struct { + CheckSuites struct { + Nodes []struct { + App struct { + Slug githubv4.String + } + Conclusion githubv4.CheckConclusionState + Status githubv4.CheckStatusState + } + } `graphql:"checkSuites(first: $checksToAnalyze)"` + } + } + } `graphql:"commits(last:1)"` + } + } `graphql:"associatedPullRequests(first: $pullRequestsToAnalyze)"` + } + } `graphql:"history(first: $commitsToAnalyze)"` + } `graphql:"... on Commit"` + } `graphql:"object(expression: $commitExpression)"` + } `graphql:"repository(owner: $owner, name: $name)"` + RateLimit struct { + Cost *int + } +} + +type checkRunsByRef = map[string][]clients.CheckRun + +// nolint: govet type checkrunsHandler struct { - client *github.Client - ctx context.Context - repourl *repoURL + client *github.Client + graphClient *githubv4.Client + repourl *repoURL + logger *log.Logger + checkData *checkRunsGraphqlData + setupOnce *sync.Once + ctx context.Context + commitDepth int + checkRunsByRef checkRunsByRef + errSetup error } -func (handler *checkrunsHandler) init(ctx context.Context, repourl *repoURL) { +func (handler *checkrunsHandler) init(ctx context.Context, repourl *repoURL, commitDepth int) { handler.ctx = ctx handler.repourl = repourl + handler.commitDepth = commitDepth + handler.logger = log.NewLogger(log.DefaultLevel) + handler.checkData = new(checkRunsGraphqlData) + handler.setupOnce = new(sync.Once) + handler.checkRunsByRef = checkRunsByRef{} +} + +func (handler *checkrunsHandler) setup() error { + handler.setupOnce.Do(func() { + commitExpression := handler.repourl.commitExpression() + vars := map[string]interface{}{ + "owner": githubv4.String(handler.repourl.owner), + "name": githubv4.String(handler.repourl.repo), + "pullRequestsToAnalyze": githubv4.Int(pullRequestsToAnalyze), + "commitsToAnalyze": githubv4.Int(handler.commitDepth), + "commitExpression": githubv4.String(commitExpression), + "checksToAnalyze": githubv4.Int(checksToAnalyze), + } + // TODO(#2224): + // sast and ci checks causes cache miss if commits dont match number of check runs. + // paging for this needs to be implemented if using higher than 100 --number-of-commits + if handler.commitDepth > 99 { + vars["commitsToAnalyze"] = githubv4.Int(99) + } + if err := handler.graphClient.Query(handler.ctx, handler.checkData, vars); err != nil { + // quit early without setting crsErrSetup for "Resource not accessible by integration" error + // for whatever reason, this check doesn't work with a GITHUB_TOKEN, only a PAT + if strings.Contains(err.Error(), "Resource not accessible by integration") { + return + } + handler.errSetup = err + return + } + handler.checkRunsByRef = parseCheckRuns(handler.checkData) + }) + return handler.errSetup } func (handler *checkrunsHandler) listCheckRunsForRef(ref string) ([]clients.CheckRun, error) { + if err := handler.setup(); err != nil { + return nil, fmt.Errorf("error during graphqlHandler.setupCheckRuns: %w", err) + } + if crs, ok := handler.checkRunsByRef[ref]; ok { + return crs, nil + } + msg := fmt.Sprintf("listCheckRunsForRef cache miss: %s/%s:%s", handler.repourl.owner, handler.repourl.repo, ref) + handler.logger.Info(msg) + checkRuns, _, err := handler.client.Checks.ListCheckRunsForRef( handler.ctx, handler.repourl.owner, handler.repourl.repo, ref, &github.ListCheckRunsOptions{}) if err != nil { return nil, sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("ListCheckRunsForRef: %v", err)) } - return checkRunsFrom(checkRuns), nil + handler.checkRunsByRef[ref] = checkRunsFrom(checkRuns) + return handler.checkRunsByRef[ref], nil +} + +func parseCheckRuns(data *checkRunsGraphqlData) checkRunsByRef { + checkCache := checkRunsByRef{} + for _, commit := range data.Repository.Object.Commit.History.Nodes { + for _, pr := range commit.AssociatedPullRequests.Nodes { + var crs []clients.CheckRun + for _, c := range pr.Commits.Nodes { + for _, checkRun := range c.Commit.CheckSuites.Nodes { + crs = append(crs, clients.CheckRun{ + // the REST API returns lowercase. the graphQL API returns upper + Status: strings.ToLower(string(checkRun.Status)), + Conclusion: strings.ToLower(string(checkRun.Conclusion)), + App: clients.CheckRunApp{ + Slug: string(checkRun.App.Slug), + }, + }) + } + } + headRef := string(pr.HeadRefOid) + checkCache[headRef] = crs + } + } + return checkCache } func checkRunsFrom(data *github.ListCheckRunsResults) []clients.CheckRun { diff --git a/clients/githubrepo/checkruns_e2e_test.go b/clients/githubrepo/checkruns_e2e_test.go new file mode 100644 index 00000000000..45c5fad2bee --- /dev/null +++ b/clients/githubrepo/checkruns_e2e_test.go @@ -0,0 +1,51 @@ +// Copyright 2021 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package githubrepo + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/ossf/scorecard/v4/clients" +) + +var _ = Describe("E2E TEST: githubrepo.checkrunsHandler", func() { + var checkrunshandler *checkrunsHandler + + BeforeEach(func() { + checkrunshandler = &checkrunsHandler{ + graphClient: graphClient, + } + }) + + // TODO: Add e2e tests for commit depth. + + Context("E2E TEST: Validate query cost", func() { + It("Should not have increased query cost", func() { + repourl := &repoURL{ + owner: "ossf", + repo: "scorecard", + commitSHA: clients.HeadSHA, + } + checkrunshandler.init(context.Background(), repourl, 30) + Expect(checkrunshandler.setup()).Should(BeNil()) + Expect(checkrunshandler.checkData).ShouldNot(BeNil()) + Expect(checkrunshandler.checkData.RateLimit.Cost).ShouldNot(BeNil()) + Expect(*checkrunshandler.checkData.RateLimit.Cost).Should(BeNumerically("<=", 1)) + }) + }) +}) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index 9b3e14fbfd3..612ccc83e5e 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -103,7 +103,7 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD client.workflows.init(client.ctx, client.repourl) // Setup checkrunsHandler. - client.checkruns.init(client.ctx, client.repourl) + client.checkruns.init(client.ctx, client.repourl, commitDepth) // Setup statusesHandler. client.statuses.init(client.ctx, client.repourl) @@ -207,15 +207,7 @@ func (client *Client) ListSuccessfulWorkflowRuns(filename string) ([]clients.Wor // ListCheckRunsForRef implements RepoClient.ListCheckRunsForRef. func (client *Client) ListCheckRunsForRef(ref string) ([]clients.CheckRun, error) { - cachedCrs, err := client.graphClient.listCheckRunsForRef(ref) - if errors.Is(err, errNotCached) { - crs, err := client.checkruns.listCheckRunsForRef(ref) - if err == nil { - client.graphClient.cacheCheckRunsForRef(ref, crs) - } - return crs, err - } - return cachedCrs, err + return client.checkruns.listCheckRunsForRef(ref) } // ListStatuses implements RepoClient.ListStatuses. @@ -276,7 +268,8 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp client: client, }, checkruns: &checkrunsHandler{ - client: client, + client: client, + graphClient: graphClient, }, statuses: &statusesHandler{ client: client, diff --git a/clients/githubrepo/graphql.go b/clients/githubrepo/graphql.go index eb3302f2704..93e4a9afbb4 100644 --- a/clients/githubrepo/graphql.go +++ b/clients/githubrepo/graphql.go @@ -16,7 +16,6 @@ package githubrepo import ( "context" - "errors" "fmt" "strings" "sync" @@ -26,7 +25,6 @@ import ( "github.com/ossf/scorecard/v4/clients" sce "github.com/ossf/scorecard/v4/errors" - "github.com/ossf/scorecard/v4/log" ) const ( @@ -38,8 +36,6 @@ const ( labelsToAnalyze = 30 ) -var errNotCached = errors.New("result not cached") - //nolint:govet type graphqlData struct { Repository struct { @@ -134,61 +130,17 @@ type graphqlData struct { } } -//nolint:govet -type checkRunsGraphqlData struct { - Repository struct { - Object struct { - Commit struct { - History struct { - Nodes []struct { - AssociatedPullRequests struct { - Nodes []struct { - HeadRefOid githubv4.String - Commits struct { - Nodes []struct { - Commit struct { - CheckSuites struct { - Nodes []struct { - App struct { - Slug githubv4.String - } - Conclusion githubv4.CheckConclusionState - Status githubv4.CheckStatusState - } - } `graphql:"checkSuites(first: $checksToAnalyze)"` - } - } - } `graphql:"commits(last:1)"` - } - } `graphql:"associatedPullRequests(first: $pullRequestsToAnalyze)"` - } - } `graphql:"history(first: $commitsToAnalyze)"` - } `graphql:"... on Commit"` - } `graphql:"object(expression: $commitExpression)"` - } `graphql:"repository(owner: $owner, name: $name)"` - RateLimit struct { - Cost *int - } -} - -type checkRunCache = map[string][]clients.CheckRun - type graphqlHandler struct { - checkRuns checkRunCache - client *githubv4.Client - data *graphqlData - setupOnce *sync.Once - checkData *checkRunsGraphqlData - setupCheckRunsOnce *sync.Once - errSetupCheckRuns error - logger *log.Logger - ctx context.Context - errSetup error - repourl *repoURL - commits []clients.Commit - issues []clients.Issue - archived bool - commitDepth int + client *githubv4.Client + data *graphqlData + setupOnce *sync.Once + ctx context.Context + errSetup error + repourl *repoURL + commits []clients.Commit + issues []clients.Issue + archived bool + commitDepth int } func (handler *graphqlHandler) init(ctx context.Context, repourl *repoURL, commitDepth int) { @@ -197,10 +149,6 @@ func (handler *graphqlHandler) init(ctx context.Context, repourl *repoURL, commi handler.data = new(graphqlData) handler.errSetup = nil handler.setupOnce = new(sync.Once) - handler.checkData = new(checkRunsGraphqlData) - handler.setupCheckRunsOnce = new(sync.Once) - handler.checkRuns = checkRunCache{} - handler.logger = log.NewLogger(log.DefaultLevel) handler.commitDepth = commitDepth handler.commits = nil handler.issues = nil @@ -233,7 +181,7 @@ func populateCommits(handler *graphqlHandler, vars map[string]interface{}) ([]cl func (handler *graphqlHandler) setup() error { handler.setupOnce.Do(func() { - commitExpression := handler.commitExpression() + commitExpression := handler.repourl.commitExpression() vars := map[string]interface{}{ "owner": githubv4.String(handler.repourl.owner), "name": githubv4.String(handler.repourl.repo), @@ -264,37 +212,6 @@ func (handler *graphqlHandler) setup() error { return handler.errSetup } -func (handler *graphqlHandler) setupCheckRuns() error { - handler.setupCheckRunsOnce.Do(func() { - commitExpression := handler.commitExpression() - vars := map[string]interface{}{ - "owner": githubv4.String(handler.repourl.owner), - "name": githubv4.String(handler.repourl.repo), - "pullRequestsToAnalyze": githubv4.Int(pullRequestsToAnalyze), - "commitsToAnalyze": githubv4.Int(handler.commitDepth), - "commitExpression": githubv4.String(commitExpression), - "checksToAnalyze": githubv4.Int(checksToAnalyze), - } - // TODO(#2224): - // sast and ci checks causes cache miss if commits dont match number of check runs. - // paging for this needs to be implemented if using higher than 100 --number-of-commits - if handler.commitDepth > 99 { - vars["commitsToAnalyze"] = githubv4.Int(99) - } - if err := handler.client.Query(handler.ctx, handler.checkData, vars); err != nil { - // quit early without setting crsErrSetup for "Resource not accessible by integration" error - // for whatever reason, this check doesn't work with a GITHUB_TOKEN, only a PAT - if strings.Contains(err.Error(), "Resource not accessible by integration") { - return - } - handler.errSetupCheckRuns = err - return - } - handler.checkRuns = parseCheckRuns(handler.checkData) - }) - return handler.errSetupCheckRuns -} - func (handler *graphqlHandler) getCommits() ([]clients.Commit, error) { if err := handler.setup(); err != nil { return nil, fmt.Errorf("error during graphqlHandler.setup: %w", err) @@ -302,22 +219,6 @@ func (handler *graphqlHandler) getCommits() ([]clients.Commit, error) { return handler.commits, nil } -func (handler *graphqlHandler) cacheCheckRunsForRef(ref string, crs []clients.CheckRun) { - handler.checkRuns[ref] = crs -} - -func (handler *graphqlHandler) listCheckRunsForRef(ref string) ([]clients.CheckRun, error) { - if err := handler.setupCheckRuns(); err != nil { - return nil, fmt.Errorf("error during graphqlHandler.setupCheckRuns: %w", err) - } - if crs, ok := handler.checkRuns[ref]; ok { - return crs, nil - } - msg := fmt.Sprintf("listCheckRunsForRef cache miss: %s/%s:%s", handler.repourl.owner, handler.repourl.repo, ref) - handler.logger.Info(msg) - return nil, errNotCached -} - func (handler *graphqlHandler) getIssues() ([]clients.Issue, error) { if !strings.EqualFold(handler.repourl.commitSHA, clients.HeadSHA) { return nil, fmt.Errorf("%w: ListIssues only supported for HEAD queries", clients.ErrUnsupportedFeature) @@ -338,38 +239,6 @@ func (handler *graphqlHandler) isArchived() (bool, error) { return handler.archived, nil } -func (handler *graphqlHandler) commitExpression() string { - if strings.EqualFold(handler.repourl.commitSHA, clients.HeadSHA) { - // TODO(#575): Confirm that this works as expected. - return fmt.Sprintf("heads/%s", handler.repourl.defaultBranch) - } - return handler.repourl.commitSHA -} - -func parseCheckRuns(data *checkRunsGraphqlData) checkRunCache { - checkCache := checkRunCache{} - for _, commit := range data.Repository.Object.Commit.History.Nodes { - for _, pr := range commit.AssociatedPullRequests.Nodes { - var crs []clients.CheckRun - for _, c := range pr.Commits.Nodes { - for _, checkRun := range c.Commit.CheckSuites.Nodes { - crs = append(crs, clients.CheckRun{ - // the REST API returns lowercase. the graphQL API returns upper - Status: strings.ToLower(string(checkRun.Status)), - Conclusion: strings.ToLower(string(checkRun.Conclusion)), - App: clients.CheckRunApp{ - Slug: string(checkRun.App.Slug), - }, - }) - } - } - headRef := string(pr.HeadRefOid) - checkCache[headRef] = crs - } - } - return checkCache -} - // nolint func commitsFrom(data *graphqlData, repoOwner, repoName string) ([]clients.Commit, error) { ret := make([]clients.Commit, 0) diff --git a/clients/githubrepo/graphql_e2e_test.go b/clients/githubrepo/graphql_e2e_test.go index 9eb860ac183..eb4bc225f7d 100644 --- a/clients/githubrepo/graphql_e2e_test.go +++ b/clients/githubrepo/graphql_e2e_test.go @@ -38,12 +38,12 @@ var _ = Describe("E2E TEST: githubrepo.graphqlHandler", func() { Context("E2E TEST: Confirm Paging Commits Works", func() { It("Should only have 1 commit", func() { - _repourl := &repoURL{ + repourl := &repoURL{ owner: "ossf", repo: "scorecard", commitSHA: clients.HeadSHA, } - _vars := map[string]interface{}{ + vars := map[string]interface{}{ "owner": githubv4.String("ossf"), "name": githubv4.String("scorecard"), "pullRequestsToAnalyze": githubv4.Int(1), @@ -55,28 +55,28 @@ var _ = Describe("E2E TEST: githubrepo.graphqlHandler", func() { "commitExpression": githubv4.String("heads/main"), "historyCursor": (*githubv4.String)(nil), } - _ctx := context.Background() - _logger := log.NewLogger(log.DebugLevel) - _rt := roundtripper.NewTransport(_ctx, _logger) - _httpClient := &http.Client{ - Transport: _rt, + ctx := context.Background() + logger := log.NewLogger(log.DebugLevel) + rt := roundtripper.NewTransport(ctx, logger) + httpClient := &http.Client{ + Transport: rt, } - _graphClient := githubv4.NewClient(_httpClient) - _handler := &graphqlHandler{ - client: _graphClient, + graphClient := githubv4.NewClient(httpClient) + handler := &graphqlHandler{ + client: graphClient, } - _handler.init(context.Background(), _repourl, 1) - commits, err := populateCommits(_handler, _vars) + handler.init(context.Background(), repourl, 1) + commits, err := populateCommits(handler, vars) Expect(err).To(BeNil()) Expect(len(commits)).Should(BeEquivalentTo(1)) }) It("Should have 30 commits", func() { - _repourl := &repoURL{ + repourl := &repoURL{ owner: "ossf", repo: "scorecard", commitSHA: clients.HeadSHA, } - _vars := map[string]interface{}{ + vars := map[string]interface{}{ "owner": githubv4.String("ossf"), "name": githubv4.String("scorecard"), "pullRequestsToAnalyze": githubv4.Int(1), @@ -88,28 +88,28 @@ var _ = Describe("E2E TEST: githubrepo.graphqlHandler", func() { "commitExpression": githubv4.String("heads/main"), "historyCursor": (*githubv4.String)(nil), } - _ctx := context.Background() - _logger := log.NewLogger(log.DebugLevel) - _rt := roundtripper.NewTransport(_ctx, _logger) - _httpClient := &http.Client{ - Transport: _rt, + ctx := context.Background() + logger := log.NewLogger(log.DebugLevel) + rt := roundtripper.NewTransport(ctx, logger) + httpClient := &http.Client{ + Transport: rt, } - _graphClient := githubv4.NewClient(_httpClient) - _handler := &graphqlHandler{ - client: _graphClient, + graphClient := githubv4.NewClient(httpClient) + handler := &graphqlHandler{ + client: graphClient, } - _handler.init(context.Background(), _repourl, 30) - commits, err := populateCommits(_handler, _vars) + handler.init(context.Background(), repourl, 30) + commits, err := populateCommits(handler, vars) Expect(err).To(BeNil()) Expect(len(commits)).Should(BeEquivalentTo(30)) }) It("Should have 101 commits", func() { - _repourl := &repoURL{ + repourl := &repoURL{ owner: "ossf", repo: "scorecard", commitSHA: clients.HeadSHA, } - _vars := map[string]interface{}{ + vars := map[string]interface{}{ "owner": githubv4.String("ossf"), "name": githubv4.String("scorecard"), "pullRequestsToAnalyze": githubv4.Int(1), @@ -121,18 +121,18 @@ var _ = Describe("E2E TEST: githubrepo.graphqlHandler", func() { "commitExpression": githubv4.String("heads/main"), "historyCursor": (*githubv4.String)(nil), } - _ctx := context.Background() - _logger := log.NewLogger(log.DebugLevel) - _rt := roundtripper.NewTransport(_ctx, _logger) - _httpClient := &http.Client{ - Transport: _rt, + ctx := context.Background() + logger := log.NewLogger(log.DebugLevel) + rt := roundtripper.NewTransport(ctx, logger) + httpClient := &http.Client{ + Transport: rt, } - _graphClient := githubv4.NewClient(_httpClient) - _handler := &graphqlHandler{ - client: _graphClient, + graphClient := githubv4.NewClient(httpClient) + handler := &graphqlHandler{ + client: graphClient, } - _handler.init(context.Background(), _repourl, 101) - commits, err := populateCommits(_handler, _vars) + handler.init(context.Background(), repourl, 101) + commits, err := populateCommits(handler, vars) Expect(err).To(BeNil()) Expect(len(commits)).Should(BeEquivalentTo(101)) }) @@ -163,17 +163,5 @@ var _ = Describe("E2E TEST: githubrepo.graphqlHandler", func() { Expect(graphqlhandler.data.RateLimit.Cost).ShouldNot(BeNil()) Expect(*graphqlhandler.data.RateLimit.Cost).Should(BeNumerically("<=", 1)) }) - It("Should not have increased for check run query", func() { - repourl := &repoURL{ - owner: "ossf", - repo: "scorecard", - commitSHA: clients.HeadSHA, - } - graphqlhandler.init(context.Background(), repourl, 30) - Expect(graphqlhandler.setupCheckRuns()).Should(BeNil()) - Expect(graphqlhandler.checkData).ShouldNot(BeNil()) - Expect(graphqlhandler.checkData.RateLimit.Cost).ShouldNot(BeNil()) - Expect(*graphqlhandler.checkData.RateLimit.Cost).Should(BeNumerically("<=", 1)) - }) }) }) diff --git a/clients/githubrepo/repo.go b/clients/githubrepo/repo.go index f0f24da28b8..0f288cf7205 100644 --- a/clients/githubrepo/repo.go +++ b/clients/githubrepo/repo.go @@ -114,6 +114,14 @@ func (r *repoURL) Metadata() []string { return r.metadata } +func (r *repoURL) commitExpression() string { + if strings.EqualFold(r.commitSHA, clients.HeadSHA) { + // TODO(#575): Confirm that this works as expected. + return fmt.Sprintf("heads/%s", r.defaultBranch) + } + return r.commitSHA +} + // MakeGithubRepo takes input of form "owner/repo" or "github.com/owner/repo" // and returns an implementation of clients.Repo interface. func MakeGithubRepo(input string) (clients.Repo, error) {