Skip to content

Commit

Permalink
✨ Add Language struct and optimize result parsing for GHClient.ListPr…
Browse files Browse the repository at this point in the history
…ogrammingLanguages (ossf#1992)

* temp save 05262022

* finished golang fuzz func check, getLang interface to be done next week

* temp save 05/31/2022

* temp save 06/01/2022

* temp save-2 06/01/2022

* temp save-1 06032022

* temp save-2 06022022

* temp save

* temp save 06032022

* temp save 06032022 (2)

* update err def

* temp save 3

* update docs for fuzzing

* update docs for fuzzing

* update checks.yaml to gen docs

* temp save 0606

* temp save-2 0606

* temp save-3 0606

* temp save-4 0606

* fix linter errors

* fix linter errs-2

* fix e2e errors

* 0608

* 0608-2

* optimize Language struct & parsing

* add more lang const

* resolved nits

Co-authored-by: Aiden Wang <[email protected]>
  • Loading branch information
aidenwang9867 and Aiden Wang authored Jun 10, 2022
1 parent 64cd053 commit e42af75
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 64 deletions.
35 changes: 25 additions & 10 deletions checks/fuzzing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func TestFuzzing(t *testing.T) {
tests := []struct {
name string
want checker.CheckResult
langs map[clients.Language]int
langs []clients.Language
response clients.SearchResponse
wantErr bool
wantFuzzErr bool
Expand All @@ -45,8 +45,11 @@ func TestFuzzing(t *testing.T) {
{
name: "empty response",
response: clients.SearchResponse{},
langs: map[clients.Language]int{
clients.Go: 300,
langs: []clients.Language{
{
Name: clients.Go,
NumLines: 300,
},
},
wantErr: false,
},
Expand All @@ -55,9 +58,15 @@ func TestFuzzing(t *testing.T) {
response: clients.SearchResponse{
Hits: 1,
},
langs: map[clients.Language]int{
clients.Go: 100,
clients.Java: 70,
langs: []clients.Language{
{
Name: clients.Go,
NumLines: 100,
},
{
Name: clients.Java,
NumLines: 70,
},
},
wantErr: false,
want: checker.CheckResult{Score: 10},
Expand All @@ -70,8 +79,11 @@ func TestFuzzing(t *testing.T) {
},
{
name: "nil response",
langs: map[clients.Language]int{
clients.Python: 256,
langs: []clients.Language{
{
Name: clients.Python,
NumLines: 256,
},
},
wantErr: true,
want: checker.CheckResult{Score: -1},
Expand All @@ -85,8 +97,11 @@ func TestFuzzing(t *testing.T) {
},
{
name: "min score since lang not supported",
langs: map[clients.Language]int{
clients.Language("not_supported_lang"): 1490,
langs: []clients.Language{
{
Name: clients.LanguageName("a_not_supported_lang"),
NumLines: 500,
},
},
wantFuzzErr: false,
want: checker.CheckResult{Score: 0},
Expand Down
22 changes: 11 additions & 11 deletions checks/raw/fuzzing.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type languageFuzzConfig struct {

// Contains fuzzing speficications for programming languages.
// Please use the type Language defined in clients/languages.go rather than a raw string.
var languageFuzzSpecs = map[clients.Language]languageFuzzConfig{
var languageFuzzSpecs = map[clients.LanguageName]languageFuzzConfig{
// Default fuzz patterns for Go.
clients.Go: {
filePattern: "*_test.go",
Expand Down Expand Up @@ -93,11 +93,11 @@ func Fuzzing(c *checker.CheckRequest) (checker.FuzzingData, error) {
)
}

langMap, err := c.RepoClient.ListProgrammingLanguages()
langs, err := c.RepoClient.ListProgrammingLanguages()
if err != nil {
return checker.FuzzingData{}, fmt.Errorf("cannot get langs of repo: %w", err)
}
prominentLangs := getProminentLanguages(langMap)
prominentLangs := getProminentLanguages(langs)

for _, lang := range prominentLangs {
usingFuzzFunc, files, e := checkFuzzFunc(c, lang)
Expand Down Expand Up @@ -151,7 +151,7 @@ func checkOSSFuzz(c *checker.CheckRequest) (bool, error) {
return result.Hits > 0, nil
}

func checkFuzzFunc(c *checker.CheckRequest, lang clients.Language) (bool, []checker.File, error) {
func checkFuzzFunc(c *checker.CheckRequest, lang clients.LanguageName) (bool, []checker.File, error) {
if c.RepoClient == nil {
return false, nil, nil
}
Expand Down Expand Up @@ -217,24 +217,24 @@ var getFuzzFunc fileparser.DoWhileTrueOnFileContent = func(
return true, nil
}

func getProminentLanguages(langs map[clients.Language]int) []clients.Language {
func getProminentLanguages(langs []clients.Language) []clients.LanguageName {
numLangs := len(langs)
if numLangs == 0 {
return nil
}
totalLoC := 0
for _, LoC := range langs {
totalLoC += LoC
for _, l := range langs {
totalLoC += l.NumLines
}
// Var avgLoC calculates the average lines of code in the current repo,
// and it can stay as an int, no need for a float value.
avgLoC := totalLoC / numLangs

// Languages that have lines of code above average will be considered prominent.
ret := []clients.Language{}
for lang, LoC := range langs {
if LoC >= avgLoC {
lang = clients.Language(strings.ToLower(string(lang)))
ret := []clients.LanguageName{}
for _, l := range langs {
if l.NumLines >= avgLoC {
lang := clients.LanguageName(strings.ToLower(string(l.Name)))
ret = append(ret, lang)
}
}
Expand Down
23 changes: 13 additions & 10 deletions checks/raw/fuzzing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func Test_fuzzFileAndFuncMatchPattern(t *testing.T) {
name string
expectedFileMatch bool
expectedFuncMatch bool
lang clients.Language
lang clients.LanguageName
fileName string
fileContent string
wantErr bool
Expand All @@ -174,7 +174,7 @@ func Test_fuzzFileAndFuncMatchPattern(t *testing.T) {
name: "Test_fuzzFuncRegex file success & func success",
expectedFileMatch: true,
expectedFuncMatch: true,
lang: "go",
lang: clients.LanguageName("go"),
fileName: "FOOoo_fOOff_BaRRR_test.go",
fileContent: `func FuzzSomething (fOo_bAR_1234 *testing.F)`,
wantErr: false,
Expand All @@ -183,7 +183,7 @@ func Test_fuzzFileAndFuncMatchPattern(t *testing.T) {
name: "Test_fuzzFuncRegex file success & func failure",
expectedFileMatch: true,
expectedFuncMatch: false,
lang: "go",
lang: clients.LanguageName("go"),
fileName: "a_unit_test.go",
fileContent: `func TestSomethingUnitTest (t *testing.T)`,
wantErr: true,
Expand All @@ -192,7 +192,7 @@ func Test_fuzzFileAndFuncMatchPattern(t *testing.T) {
name: "Test_fuzzFuncRegex file failure & func failure",
expectedFileMatch: false,
expectedFuncMatch: false,
lang: "go",
lang: clients.LanguageName("go"),
fileName: "not_a_fuzz_test_file.go",
fileContent: `func main (t *testing.T)`,
wantErr: true,
Expand All @@ -201,7 +201,7 @@ func Test_fuzzFileAndFuncMatchPattern(t *testing.T) {
name: "Test_fuzzFuncRegex not a support language",
expectedFileMatch: false,
expectedFuncMatch: false,
lang: "not_a_supported_one",
lang: clients.LanguageName("not_a_supported_one"),
fileName: "a_fuzz_test.py",
fileContent: `def NotSupported (foo)`,
wantErr: true,
Expand Down Expand Up @@ -237,7 +237,7 @@ func Test_checkFuzzFunc(t *testing.T) {
name string
want bool
wantErr bool
langs map[clients.Language]int
langs []clients.Language
fileName []string
fileContent string
}{
Expand All @@ -250,8 +250,11 @@ func Test_checkFuzzFunc(t *testing.T) {
"foo_test.go",
"main.go",
},
langs: map[clients.Language]int{
clients.Go: 100,
langs: []clients.Language{
{
Name: clients.Go,
NumLines: 100,
},
},
fileContent: "func TestFoo (t *testing.T)",
},
Expand All @@ -274,8 +277,8 @@ func Test_checkFuzzFunc(t *testing.T) {
req := checker.CheckRequest{
RepoClient: mockClient,
}
for l := range tt.langs {
got, _, err := checkFuzzFunc(&req, l)
for _, l := range tt.langs {
got, _, err := checkFuzzFunc(&req, l.Name)
if (got != tt.want || err != nil) && !tt.wantErr {
t.Errorf("checkFuzzFunc() = %v, want %v for %v", got, tt.want, tt.name)
}
Expand Down
2 changes: 1 addition & 1 deletion clients/githubrepo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (client *Client) ListStatuses(ref string) ([]clients.Status, error) {
}

//ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages.
func (client *Client) ListProgrammingLanguages() (map[clients.Language]int, error) {
func (client *Client) ListProgrammingLanguages() ([]clients.Language, error) {
return client.languages.listProgrammingLanguages()
}

Expand Down
23 changes: 18 additions & 5 deletions clients/githubrepo/languages.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type languagesHandler struct {
ctx context.Context
errSetup error
repourl *repoURL
languages map[clients.Language]int
languages []clients.Language
}

func (handler *languagesHandler) init(ctx context.Context, repourl *repoURL) {
Expand All @@ -52,21 +52,34 @@ func (handler *languagesHandler) setup() error {
handler.errSetup = fmt.Errorf("request for repo languages failed with %w", err)
return
}
handler.languages = map[clients.Language]int{}
// The client.repoClient.Do API writes the reponse body to the handler.languages,
bodyJSON := map[clients.LanguageName]int{}
// The client.repoClient.Do API writes the reponse body to var bodyJSON,
// so we can ignore the first returned variable (the entire http response object)
// since we only need the response body here.
_, err = client.Do(handler.ctx, req, &handler.languages)
_, err = client.Do(handler.ctx, req, &bodyJSON)
if err != nil {
handler.errSetup = fmt.Errorf("response for repo languages failed with %w", err)
return
}
// Parse the raw JSON to an array of languages.
for k, v := range bodyJSON {
// TODO: once the const defined in clients/languages.go becomes a complete list of langs supported,
// add support here so that for not supported langs, it emits an "not-supported" error and break the parse.
// Currently, we are parsing all the JSON-returned langs into the result since the const is incomplete.
handler.languages = append(handler.languages,
clients.Language{
Name: k,
NumLines: v,
},
)
}
handler.errSetup = nil
})

return handler.errSetup
}

func (handler *languagesHandler) listProgrammingLanguages() (map[clients.Language]int, error) {
func (handler *languagesHandler) listProgrammingLanguages() ([]clients.Language, error) {
if err := handler.setup(); err != nil {
return nil, fmt.Errorf("error during languagesHandler.setup: %w", err)
}
Expand Down
65 changes: 42 additions & 23 deletions clients/languages.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,60 +14,79 @@

package clients

// Language represents a customized string for languages used by clients.
// A language could be a programming language, or more general,
// such as Dockerfile, CMake, HTML, YAML, etc.
type Language string
// LanguageName is the name of a language, a customized type of string.
type LanguageName string

// TODO: retrieve all languages supported by GitHub.
// TODO: retrieve all languages supported by GitHub, or add one manually if needed.
// Currently, this is still an incomplete list of languages.
const (
// Go: https://go.dev/
Go Language = "go"
Go LanguageName = "go"

// Python: https://www.python.org/
Python Language = "python"
Python LanguageName = "python"

// JavaScript: https://www.javascript.com/
JavaScript Language = "javascript"
JavaScript LanguageName = "javascript"

// C++: https://cplusplus.com/
Cpp Language = "c++"
Cpp LanguageName = "c++"

// C: https://www.open-std.org/jtc1/sc22/wg14/
C Language = "c"
C LanguageName = "c"

// TypeScript: https://www.typescriptlang.org/
TypeScript Language = "typescript"
TypeScript LanguageName = "typescript"

// Java: https://www.java.com/en/
Java Language = "java"
Java LanguageName = "java"

// C#: https://docs.microsoft.com/en-us/dotnet/csharp/
CSharp Language = "c#"
CSharp LanguageName = "c#"

// Ruby: https://www.ruby-lang.org/
Ruby Language = "ruby"
Ruby LanguageName = "ruby"

// PHP: https://www.php.net/
PHP Language = "php"
PHP LanguageName = "php"

// Starlark: https://github.com/bazelbuild/starlark
StarLark Language = "starlark"
StarLark LanguageName = "starlark"

// Scala: https://www.scala-lang.org/
Scala Language = "scala"
Scala LanguageName = "scala"

// Kotlin: https://kotlinlang.org/
Kotlin Language = "kotlin"
Kotlin LanguageName = "kotlin"

// Swift: https://github.com/apple/swift
Swift Language = "swift"
Swift LanguageName = "swift"

// Rust: https://github.com/rust-lang/rust
Rust Language = "rust"
Rust LanguageName = "rust"

// CMake: https://cmake.org/
CMake LanguageName = "cmake"

// Other indicates other programming languages not listed by the GitHub API.
Other Language = "other"
// Dockerfile: https://docs.docker.com/engine/reference/builder/
Dockerfile LanguageName = "dockerfile"

// Add more programming languages here if needed, please use lower cases.
// Other indicates other languages not listed by the GitHub API.
Other LanguageName = "other"

// Add more languages here if needed,
// please use lowercases for the LanguageName value.
)

// Language represents a customized struct for languages used by clients.
// A language could be a programming language, or more general,
// such as Dockerfile, CMake, HTML, YAML, etc.
type Language struct {
// Name is the name of this language.
Name LanguageName

// NumLines is the total number of code lines of this language in the repo.
NumLines int

// TODO: add more properties for Language.
}
2 changes: 1 addition & 1 deletion clients/localdir/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func (client *localDirClient) Close() error {

// ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages.
// TODO: add ListProgrammingLanguages support for local directories
func (client *localDirClient) ListProgrammingLanguages() (map[clients.Language]int, error) {
func (client *localDirClient) ListProgrammingLanguages() ([]clients.Language, error) {
return nil, fmt.Errorf("ListProgrammingLanguages: %w", clients.ErrUnsupportedFeature)
}

Expand Down
4 changes: 2 additions & 2 deletions clients/mockclients/repo_client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e42af75

Please sign in to comment.