From 77d8c739a7fd62c1844aff9f047670234b6ad120 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 2 Nov 2021 01:15:54 +0000 Subject: [PATCH 1/4] draft --- docs/checks/internal/validate/main.go | 204 ++++++++++++++++++++++++-- 1 file changed, 190 insertions(+), 14 deletions(-) diff --git a/docs/checks/internal/validate/main.go b/docs/checks/internal/validate/main.go index 267e3ce7c0b..e679f6e0360 100644 --- a/docs/checks/internal/validate/main.go +++ b/docs/checks/internal/validate/main.go @@ -15,8 +15,14 @@ package main import ( "fmt" + "io/ioutil" + "path" + "regexp" "strings" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/ossf/scorecard/v3/checks" docs "github.com/ossf/scorecard/v3/docs/checks" ) @@ -24,41 +30,212 @@ import ( var ( allowedRisks = map[string]bool{"Critical": true, "High": true, "Medium": true, "Low": true} allowedRepoTypes = map[string]bool{"GitHub": true, "local": true} + supportedAPIs = map[string][]string{ + "InitRepo": {"GitHub"}, + "URI": {"GitHub", "local"}, + "IsArchived": {"GitHub"}, + "ListFiles": {"GitHub", "local"}, + "GetFileContent": {"GitHub", "local"}, + "ListMergedPRs": {"GitHub"}, + "ListBranches": {"GitHub"}, + "GetDefaultBranch": {"GitHub"}, + "ListCommits": {"GitHub"}, + "ListReleases": {"GitHub"}, + "ListContributors": {"GitHub"}, + "ListSuccessfulWorkflowRuns": {"GitHub"}, + "ListCheckRunsForRef": {"GitHub"}, + "ListStatuses": {"GitHub"}, + "Search": {"GitHub", "local"}, + "Close": {"GitHub", "local"}, + } ) +func listCheckFiles() (map[string]string, error) { + checkFiles := make(map[string]string) + // Use regex to determine the file that contains the entry point. + regex := regexp.MustCompile(`const\s+[^"]*=\s+"(.*)"`) + files, err := ioutil.ReadDir("checks/") + if err != nil { + return nil, fmt.Errorf("ioutil.ReadDir: %w", err) + } + + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".go") || file.IsDir() { + continue + } + + fullpath := path.Join("checks/", file.Name()) + content, err := ioutil.ReadFile(fullpath) + if err != nil { + return nil, fmt.Errorf("ioutil.ReadFile: %s: %w", fullpath, err) + } + + res := regex.FindStringSubmatch(string(content)) + if len(res) != 2 { + continue + } + + r := res[1] + if entry, exists := checkFiles[r]; exists { + //nolint:goerr113 + return nil, fmt.Errorf("check %s already exists: %v", r, entry) + } + checkFiles[r] = fullpath + } + return checkFiles, nil +} + +func extractAPINames() ([]string, error) { + fns := []string{} + interfaceRe := regexp.MustCompile(`type\s+RepoClient\s+interface\s+{\s*`) + content, err := ioutil.ReadFile("clients/repo_client.go") + if err != nil { + return nil, fmt.Errorf("ioutil.ReadFile: %s: %w", "clients/repo_client.go", err) + } + + locs := interfaceRe.FindIndex(content) + if len(locs) != 2 || locs[1] == 0 { + //nolint:goerr113 + return nil, fmt.Errorf("FindIndex: cannot find Doc interface definition") + } + + nameRe := regexp.MustCompile(`[\s]+([A-Z]\S+)\s*\(.*\).+[\n]+`) + matches := nameRe.FindAllStringSubmatch(string(content[locs[1]-1:]), -1) + if len(matches) == 0 { + //nolint:goerr113 + return nil, fmt.Errorf("FindAllStringSubmatch: no match found") + } + + for _, v := range matches { + if len(v) != 2 { + //nolint:goerr113 + return nil, fmt.Errorf("invalid length: %d", len(v)) + } + fns = append(fns, v[1]) + } + return fns, nil +} + +func isSubsetOf(a, b []string) bool { + mb := make(map[string]bool) + for _, vb := range b { + mb[vb] = true + } + for _, va := range a { + if _, exists := mb[va]; !exists { + return false + } + } + return true +} + +func validateRepoTypeAPIs(checkName string, repoTypes []string, checkFiles map[string]string) error { + // For now, we only list APIs in a check's implementation. + // Long-term, we should use the callgraph feature using + // https://github.com/golang/tools/blob/master/cmd/callgraph/main.go. + // + + pathfn, exists := checkFiles[checkName] + if !exists { + //nolint:goerr113 + return fmt.Errorf("check %s does not exists", checkName) + } + + content, err := ioutil.ReadFile(pathfn) + if err != nil { + return fmt.Errorf("ioutil.ReadFile: %s: %w", pathfn, err) + } + + // For each API, check if it's used or not. + for api, supportedInterfaces := range supportedAPIs { + regex := fmt.Sprintf(`\.%s`, api) + re := regexp.MustCompile(regex) + r := re.Match(content) + // Check if the API is used but should not. + if r && !isSubsetOf(repoTypes, supportedInterfaces) { + //nolint:goerr113 + return fmt.Errorf("mismatch for check %s, file %s: found unexpected API use: %s(): %s not a subset of %s", + checkName, pathfn, api, repoTypes, supportedInterfaces) + } + } + + return nil +} + +func validateAPINames() error { + // Extract API names. + fns, err := extractAPINames() + if err != nil { + return fmt.Errorf("invalid functions: %w", err) + } + + // Validate function names. + functions := []string{} + for v := range supportedAPIs { + functions = append(functions, v) + } + + if !cmp.Equal(functions, fns, cmpopts.SortSlices(func(x, y string) bool { return x < y })) { + //nolint:goerr113 + return fmt.Errorf("got diff: %s", cmp.Diff(functions, fns)) + } + + return nil +} + func main() { m, err := docs.Read() if err != nil { - panic(fmt.Errorf("docs.Read: %w", err)) + panic(fmt.Sprintf("docs.Read: %v", err)) + } + + if err := validateAPINames(); err != nil { + panic(fmt.Sprintf("cannot extract function names: %v", err)) + } + + checkFiles, err := listCheckFiles() + if err != nil { + panic(err) } allChecks := checks.AllChecks for check := range allChecks { c, e := m.GetCheck(check) if e != nil { - panic(fmt.Errorf("GetCheck: %w: %s", e, check)) + panic(fmt.Sprintf("GetCheck: %v: %s", e, check)) } + if check != c.GetName() { + panic(fmt.Sprintf("invalid checkName: %s != %s", check, c.GetName())) + } if c.GetDescription() == "" { - // nolint: goerr113 - panic(fmt.Errorf("description for checkName: %s is empty", check)) + panic(fmt.Sprintf("description for checkName: %s is empty", check)) } if strings.TrimSpace(strings.Join(c.GetRemediation(), "")) == "" { - // nolint: goerr113 - panic(fmt.Errorf("remediation for checkName: %s is empty", check)) + panic(fmt.Sprintf("remediation for checkName: %s is empty", check)) } if c.GetShort() == "" { - // nolint: goerr113 - panic(fmt.Errorf("short for checkName: %s is empty", check)) + panic(fmt.Sprintf("short for checkName: %s is empty", check)) } if len(c.GetTags()) == 0 { - // nolint: goerr113 - panic(fmt.Errorf("tags for checkName: %s is empty", check)) + panic(fmt.Sprintf("tags for checkName: %s is empty", check)) } r := c.GetRisk() if _, exists := allowedRisks[r]; !exists { - // nolint: goerr113 - panic(fmt.Errorf("risk for checkName: %s is invalid: '%s'", check, r)) + panic(fmt.Sprintf("risk for checkName: %s is invalid: '%s'", check, r)) + } + repoTypes := c.GetSupportedRepoTypes() + if len(repoTypes) == 0 { + panic(fmt.Sprintf("repos for checkName: %s is empty", check)) + } + for _, rt := range repoTypes { + if _, exists := allowedRepoTypes[rt]; !exists { + panic(fmt.Sprintf("repo type for checkName: %s is invalid: '%s'", check, rt)) + } + } + // Validate that the check only calls API the interface supports. + if err := validateRepoTypeAPIs(check, repoTypes, checkFiles); err != nil { + panic(fmt.Sprintf("validateRepoTypeAPIs: %v", err)) } repoTypes := c.GetSupportedRepoTypes() if len(repoTypes) == 0 { @@ -74,8 +251,7 @@ func main() { } for _, check := range m.GetChecks() { if _, exists := allChecks[check.GetName()]; !exists { - // nolint: goerr113 - panic(fmt.Errorf("check present in checks.yaml is not part of `checks.AllChecks`: %s", check.GetName())) + panic(fmt.Sprintf("check present in checks.yaml is not part of `checks.AllChecks`: %s", check.GetName())) } } } From 7332a510c7dc2a9544a0f453551f8ee6f61b3db9 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 2 Nov 2021 17:42:36 +0000 Subject: [PATCH 2/4] validate --- docs/checks/internal/validate/main.go | 75 +++++++++++++++++++-------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/docs/checks/internal/validate/main.go b/docs/checks/internal/validate/main.go index e679f6e0360..7ca1f25d6d8 100644 --- a/docs/checks/internal/validate/main.go +++ b/docs/checks/internal/validate/main.go @@ -129,36 +129,75 @@ func isSubsetOf(a, b []string) bool { return true } -func validateRepoTypeAPIs(checkName string, repoTypes []string, checkFiles map[string]string) error { - // For now, we only list APIs in a check's implementation. - // Long-term, we should use the callgraph feature using - // https://github.com/golang/tools/blob/master/cmd/callgraph/main.go. - // +func contains(l []string, elt string) bool { + for _, v := range l { + if v == elt { + return true + } + } + return false +} + +func supportedInterfacesFromImplementation(checkName string, checkFiles map[string]string) ([]string, error) { + // Special case. No APIs are used, + // but we need the repo name for a db lookup. + if checkName == "CII-Best-Practices" { + return []string{"GitHub"}, nil + } + // Create our map. + s := make(map[string]bool) + for k := range allowedRepoTypes { + s[k] = true + } + + // Read the source file for the check. pathfn, exists := checkFiles[checkName] if !exists { //nolint:goerr113 - return fmt.Errorf("check %s does not exists", checkName) + return nil, fmt.Errorf("check %s does not exists", checkName) } content, err := ioutil.ReadFile(pathfn) if err != nil { - return fmt.Errorf("ioutil.ReadFile: %s: %w", pathfn, err) + return nil, fmt.Errorf("ioutil.ReadFile: %s: %w", pathfn, err) } // For each API, check if it's used or not. + // Adjust supported repo types accordingly. for api, supportedInterfaces := range supportedAPIs { regex := fmt.Sprintf(`\.%s`, api) re := regexp.MustCompile(regex) r := re.Match(content) - // Check if the API is used but should not. - if r && !isSubsetOf(repoTypes, supportedInterfaces) { - //nolint:goerr113 - return fmt.Errorf("mismatch for check %s, file %s: found unexpected API use: %s(): %s not a subset of %s", - checkName, pathfn, api, repoTypes, supportedInterfaces) + if r { + for k := range allowedRepoTypes { + if !contains(supportedInterfaces, k) { + delete(s, k) + } + } } } + r := []string{} + for k := range s { + r = append(r, k) + } + return r, nil +} + +func validateRepoTypeAPIs(checkName string, repoTypes []string, checkFiles map[string]string) error { + // For now, we only list APIs in a check's implementation. + // Long-term, we should use the callgraph feature using + // https://github.com/golang/tools/blob/master/cmd/callgraph/main.go + l, err := supportedInterfacesFromImplementation(checkName, checkFiles) + if err != nil { + return fmt.Errorf("supportedInterfacesFromImplementation: %w", err) + } + + if !cmp.Equal(l, repoTypes, cmpopts.SortSlices(func(x, y string) bool { return x < y })) { + //nolint: goerr113 + return fmt.Errorf("%s: got diff: %s", checkName, cmp.Diff(l, repoTypes)) + } return nil } @@ -233,21 +272,11 @@ func main() { panic(fmt.Sprintf("repo type for checkName: %s is invalid: '%s'", check, rt)) } } + // Validate that the check only calls API the interface supports. if err := validateRepoTypeAPIs(check, repoTypes, checkFiles); err != nil { panic(fmt.Sprintf("validateRepoTypeAPIs: %v", err)) } - repoTypes := c.GetSupportedRepoTypes() - if len(repoTypes) == 0 { - // nolint: goerr113 - panic(fmt.Errorf("repos for checkName: %s is empty", check)) - } - for _, rt := range repoTypes { - if _, exists := allowedRepoTypes[rt]; !exists { - // nolint: goerr113 - panic(fmt.Errorf("repo type for checkName: %s is invalid: '%s'", check, rt)) - } - } } for _, check := range m.GetChecks() { if _, exists := allChecks[check.GetName()]; !exists { From 306c278ec53094711cfa52a7a6fe60dad71f2d2f Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 2 Nov 2021 17:46:10 +0000 Subject: [PATCH 3/4] comment --- docs/checks/internal/validate/main.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/checks/internal/validate/main.go b/docs/checks/internal/validate/main.go index 7ca1f25d6d8..f2b78283d17 100644 --- a/docs/checks/internal/validate/main.go +++ b/docs/checks/internal/validate/main.go @@ -31,6 +31,9 @@ var ( allowedRisks = map[string]bool{"Critical": true, "High": true, "Medium": true, "Low": true} allowedRepoTypes = map[string]bool{"GitHub": true, "local": true} supportedAPIs = map[string][]string{ + // InitRepo is supported for local reepos in general. However, in the context of checks, + // this is only used to look up remote data, e.g. in Fuzzinng check. + // So we only have "GitHub" supported. "InitRepo": {"GitHub"}, "URI": {"GitHub", "local"}, "IsArchived": {"GitHub"}, From cfc2b83c562bfc914e4ba280f1192135a18713f7 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 2 Nov 2021 17:50:32 +0000 Subject: [PATCH 4/4] commments --- docs/checks/internal/validate/main.go | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/docs/checks/internal/validate/main.go b/docs/checks/internal/validate/main.go index f2b78283d17..5a071a13dee 100644 --- a/docs/checks/internal/validate/main.go +++ b/docs/checks/internal/validate/main.go @@ -53,9 +53,11 @@ var ( } ) +// Indentify the source file that declares each check. func listCheckFiles() (map[string]string, error) { checkFiles := make(map[string]string) // Use regex to determine the file that contains the entry point. + // We're looking for `const someVarName = "CheckName"`. regex := regexp.MustCompile(`const\s+[^"]*=\s+"(.*)"`) files, err := ioutil.ReadDir("checks/") if err != nil { @@ -88,6 +90,7 @@ func listCheckFiles() (map[string]string, error) { return checkFiles, nil } +// extract API names for RepoClient interface. func extractAPINames() ([]string, error) { fns := []string{} interfaceRe := regexp.MustCompile(`type\s+RepoClient\s+interface\s+{\s*`) @@ -119,19 +122,6 @@ func extractAPINames() ([]string, error) { return fns, nil } -func isSubsetOf(a, b []string) bool { - mb := make(map[string]bool) - for _, vb := range b { - mb[vb] = true - } - for _, va := range a { - if _, exists := mb[va]; !exists { - return false - } - } - return true -} - func contains(l []string, elt string) bool { for _, v := range l { if v == elt { @@ -141,9 +131,10 @@ func contains(l []string, elt string) bool { return false } +// extract supported interfaces frmo a check implementation file. func supportedInterfacesFromImplementation(checkName string, checkFiles map[string]string) ([]string, error) { // Special case. No APIs are used, - // but we need the repo name for a db lookup. + // but we need the repo name for an online database lookup. if checkName == "CII-Best-Practices" { return []string{"GitHub"}, nil }