Flaky test reporting(part knative#5) Automatic Slack notification for…

… flaky tests (knative#541) * initialize commit for Slack notification * slack notification * update based on PR comments * Update tools/flaky-test-reporter/slack_notification.go Co-Authored-By: chaodaiG <[email protected]> * Update tools/flaky-test-reporter/slack_notification.go Co-Authored-By: chaodaiG <[email protected]> * Update tools/flaky-test-reporter/slack_notification.go Co-Authored-By: chaodaiG <[email protected]> * updates for PR comments * updates for PR comments
coryrc · Mar 8, 2019 · 56d792c · 56d792c
1 parent e7cb9c7
commit 56d792c
Show file tree

Hide file tree

Showing 7 changed files with 244 additions and 36 deletions.
diff --git a/shared/testgrid/testgrid.go b/shared/testgrid/testgrid.go
@@ -19,21 +19,28 @@ limitations under the License.
 package testgrid
 
 import (
-	"path"
 	"fmt"
 	"log"
 	"os"
+	"path"
 
-	"github.com/knative/test-infra/shared/prow"
 	"github.com/knative/test-infra/shared/junit"
+	"github.com/knative/test-infra/shared/prow"
 )
 
 const (
 	filePrefix = "junit_"
 	extension  = ".xml"
+	// BaseURL is Knative testgrid base URL
+	BaseURL = "https://testgrid.knative.dev"
 )
 
-// createDir creates dir if does not exist. 
+// jobNameTestgridURLMap contains harded coded mapping of job name: Testgrid tab URL relative to base URL
+var jobNameTestgridURLMap = map[string]string{
+	"ci-knative-serving-continuous": "knative-serving#continuous",
+}
+
+// createDir creates dir if does not exist.
 func createDir(dirPath string) error {
 	if _, err := os.Stat(dirPath); os.IsNotExist(err) {
 		if err = os.MkdirAll(dirPath, 0777); err != nil {
@@ -43,6 +50,18 @@ func createDir(dirPath string) error {
 	return nil
 }
 
+// GetTestgridTabURL gets Testgrid URL for giving job and filters for Testgrid
+func GetTestgridTabURL(jobName string, filters []string) (string, error) {
+	url, ok := jobNameTestgridURLMap[jobName]
+	if !ok {
+		return "", fmt.Errorf("cannot find Testgrid tab for job '%s'", jobName)
+	}
+	for _, filter := range filters {
+		url += "&" + filter
+	}
+	return fmt.Sprintf("%s/%s", BaseURL, url), nil
+}
+
 // CreateXMLOutput creates the junit xml file in the provided artifacts directory
 func CreateXMLOutput(tc []junit.TestCase, testName string) error {
 	ts := junit.TestSuites{}
@@ -58,7 +77,7 @@ func CreateXMLOutput(tc []junit.TestCase, testName string) error {
 		return err
 	}
 
-	outputFile := path.Join(artifactsDir, filePrefix + testName + extension)
+	outputFile := path.Join(artifactsDir, filePrefix+testName+extension)
 	log.Printf("Storing output in %s", outputFile)
 	f, err := os.OpenFile(outputFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 	defer f.Close()

diff --git a/tools/flaky-test-reporter/error.go b/tools/flaky-test-reporter/error.go
@@ -25,7 +25,7 @@ import (
 
 // combineErrors combines slice of errors and return a single error
 func combineErrors(errs []error) error {
-	if nil == errs || 0 == len(errs) {
+	if len(errs) == 0 {
 		return nil
 	}
 	var errStrs []string

diff --git a/tools/flaky-test-reporter/github_issue.go b/tools/flaky-test-reporter/github_issue.go
@@ -96,6 +96,11 @@ type flakyIssue struct {
 	comment  *github.IssueComment // The first auto comment, updated for every history
 }
 
+// getIdentityForTest creates a unique string for a test, which will be used for identifying Github issue
+func getIdentityForTest(testFullName, repoName string) string {
+	return fmt.Sprintf("'%s' in repo '%s'", testFullName, repoName)
+}
+
 // GithubIssue handles methods for github issues
 type GithubIssue struct {
 	user   *ghutil.GithubUser
@@ -182,7 +187,7 @@ func (gi *GithubIssue) prependComment(oldComment, newComment string) string {
 
 // updateIssue adds comments to an existing issue, close an issue if test passed both in previous day and today,
 // reopens the issue if test becomes flaky while issue is closed.
-func (gi *GithubIssue) updateIssue(fi *flakyIssue, newComment string, ts *TestStat, dryrun *bool) error {
+func (gi *GithubIssue) updateIssue(fi *flakyIssue, newComment string, ts *TestStat, dryrun bool) error {
 	issue := fi.issue
 	passedLastTime := false
 	latestStatus := regexp.MustCompile(reLastestStatus).FindStringSubmatch(fi.comment.GetBody())
@@ -238,7 +243,7 @@ func (gi *GithubIssue) updateIssue(fi *flakyIssue, newComment string, ts *TestSt
 }
 
 // createNewIssue creates an issue, adds flaky label and adds comment.
-func (gi *GithubIssue) createNewIssue(org, repoForIssue, title, body string, comment string, dryrun *bool) error {
+func (gi *GithubIssue) createNewIssue(org, repoForIssue, title, body string, comment string, dryrun bool) error {
 	var newIssue *github.Issue
 	if err := run(
 		"creating issue",
@@ -369,15 +374,12 @@ func (gi *GithubIssue) getFlakyIssues() (map[string][]*flakyIssue, error) {
 // processGithubIssueForRepo reads RepoData and existing issues, and create/close/reopen/comment on issues.
 // The function returns a slice of messages containing performed actions, and a slice of error messages,
 // these can later on be printed as summary at the end of run
-func (gi *GithubIssue) processGithubIssueForRepo(rd *RepoData, flakyIssuesMap map[string][]*flakyIssue, dryrun *bool) ([]string, error) {
+func (gi *GithubIssue) processGithubIssueForRepo(rd *RepoData, flakyIssuesMap map[string][]*flakyIssue, dryrun bool) ([]string, error) {
 	var messages []string
 	var errs []error
 
 	// If there are too many failures, create a single issue tracking it.
-	flakyRate, err := getFlakyRate(rd.TestStats)
-	if nil != err {
-		return nil, err
-	}
+	flakyRate := getFlakyRate(rd)
 	if flakyRate > threshold {
 		log.Printf("flaky rate above '%f', creating a single issue", threshold)
 		identity := fmt.Sprintf("%.2f%% tests failed in repo %s on %s",
@@ -400,7 +402,7 @@ func (gi *GithubIssue) processGithubIssueForRepo(rd *RepoData, flakyIssuesMap ma
 		if !ts.hasEnoughRuns() || (!ts.isFlaky() && !ts.isPassed()) {
 			continue
 		}
-		identity := fmt.Sprintf("'%s' in repo '%s'", testFullName, rd.Config.Repo)
+		identity := getIdentityForTest(testFullName, rd.Config.Repo)
 		comment := gi.createCommentForTest(rd, testFullName)
 		if existIssues, ok := flakyIssuesMap[identity]; ok { // update issue with current result
 			for _, existIssue := range existIssues {
@@ -437,7 +439,7 @@ func (gi *GithubIssue) processGithubIssueForRepo(rd *RepoData, flakyIssuesMap ma
 }
 
 // analyze all results, figure out flaky tests and processing existing auto:flaky issues
-func (gi *GithubIssue) processGithubIssues(repoDataAll []*RepoData, dryrun *bool) error {
+func (gi *GithubIssue) processGithubIssues(repoDataAll []*RepoData, dryrun bool) error {
 	messagesMap := make(map[string][]string)
 	errMap := make(map[string][]error)
 

diff --git a/tools/flaky-test-reporter/main.go b/tools/flaky-test-reporter/main.go
@@ -29,20 +29,31 @@ import (
 )
 
 func main() {
-	serviceAccount := flag.String("service-account", os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"), "JSON key file for service account to use")
+	serviceAccount := flag.String("service-account", os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"), "JSON key file for GCS service account")
 	githubToken := flag.String("github-token", "", "Token file for Github authentication")
+	slackAccount := flag.String("slack-account", "", "slack secret file for authenticating with Slack")
 	dryrun := flag.Bool("dry-run", false, "dry run switch")
 	flag.Parse()
 
 	if nil != dryrun && true == *dryrun {
 		log.Printf("running in [dry run mode]")
 	}
 
-	var repoDataAll []*RepoData
-	prow.Initialize(*serviceAccount) // Explicit authenticate with gcs Client
+	if err := prow.Initialize(*serviceAccount); nil != err { // Explicit authenticate with gcs Client
+		log.Fatalf("Failed authenticating GCS: '%v'", err)
+	}
+	ghi, err := Setup(*githubToken)
+	if err != nil {
+		log.Fatalf("Cannot setup github: %v", err)
+	}
+	slackClient, err := newSlackClient(*slackAccount)
+	if nil != err {
+		log.Fatalf("Failed authenticating Slack: '%v'", err)
+	}
 
+	var repoDataAll []*RepoData
 	// Clean up local artifacts directory, this will be used later for artifacts uploads
-	err := os.RemoveAll(prow.GetLocalArtifactsDir()) // this function returns nil if path not found
+	err = os.RemoveAll(prow.GetLocalArtifactsDir()) // this function returns nil if path not found
 	if nil == err {
 		if _, err = os.Stat(prow.GetLocalArtifactsDir()); os.IsNotExist(err) {
 			err = os.MkdirAll(prow.GetLocalArtifactsDir(), 0777)
@@ -64,9 +75,18 @@ func main() {
 		repoDataAll = append(repoDataAll, rd)
 	}
 
-	ghi, err := Setup(*githubToken)
-	if err != nil {
-		log.Fatalf("Cannot setup github: %v", err)
+	// Errors that could result in inaccuracy reporting would be treated with fast fail by processGithubIssues,
+	// so any errors returned are github opeations error, which in most cases wouldn't happend, but in case it
+	// happens, it should fail the job after Slack notification 
+	githubErr := ghi.processGithubIssues(repoDataAll, *dryrun)
+	slackErr := sendSlackNotifications(repoDataAll, slackClient, ghi, *dryrun)
+	if nil != githubErr {
+		log.Printf("Github step failures:\n%v", githubErr)
+	}
+	if nil != slackErr {
+		log.Printf("Slack step failures:\n%v", slackErr)
+	}
+	if nil != githubErr || nil != slackErr { // Fail this job if there is any error
+		os.Exit(1)
 	}
-	ghi.processGithubIssues(repoDataAll, dryrun)
 }
diff --git a/tools/flaky-test-reporter/result.go b/tools/flaky-test-reporter/result.go
@@ -88,18 +88,23 @@ func (ts *TestStat) getTestStatus() string {
 	}
 }
 
-func getFlakyRate(testStats map[string]*TestStat) (float32, error) {
-	totalCount := len(testStats)
-	if 0 == totalCount {
-		return 0.0, nil
-	}
-	flakyCount := 0
-	for _, ts := range testStats {
+func getFlakyTests(rd *RepoData) []string {
+	var flakyTests []string
+	for testName, ts := range rd.TestStats {
 		if ts.isFlaky() {
-			flakyCount++
+			flakyTests = append(flakyTests, testName)
 		}
 	}
-	return float32(flakyCount)/float32(totalCount), nil
+	return flakyTests
+}
+
+
+func getFlakyRate(rd *RepoData) float32 {
+	totalCount := len(rd.TestStats)
+	if 0 == totalCount {
+		return 0.0
+	}
+	return float32(len(getFlakyTests(rd)))/float32(totalCount)
 }
 
 // createArtifactForRepo marshals RepoData into json format and stores it in a json file,
@@ -204,9 +209,6 @@ func (rd *RepoData) getResultSliceForTest(testName string) []junit.TestStatusEnu
 }
 
 func intSliceContains(its []int, target int) bool {
-	if nil == its {
-		return false
-	}
 	for _, it := range its {
 		if it == target {
 			return true

diff --git a/tools/flaky-test-reporter/run.go b/tools/flaky-test-reporter/run.go
@@ -22,8 +22,8 @@ import (
 	"log"
 )
 
-func run(message string, call func() error, dryrun *bool) error {
-	if nil != dryrun && true == *dryrun {
+func run(message string, call func() error, dryrun bool) error {
+	if dryrun {
 		log.Printf("[dry run] %s", message)
 		return nil
 	}