roachprod-microbench: add threshold flag to compare command

Epic: none Release note: None As part of fixing cockroachdb#106661, we need to run microbenchmarks in a CI job and compare the microbenchmarks with that of the latest master branch commit. For this purpose we want to re-use `roachprod-microbench` compare command. However, as of current implementation of the compare command, it doesn't do threshold comparison and this PR aims to add that capability using a flag. This change also adds a flag `--publish-sheets` to optionally allow publishing google sheets. It is a follow up change to cockroachdb#126810 which exposed `roachprod-microbench` capability of cleaning the benchmark output.
sambhav-jain-16 · Jul 19, 2024 · 69ff927 · 69ff927
1 parent d51907a
commit 69ff927
Show file tree

Hide file tree

Showing 4 changed files with 171 additions and 74 deletions.
diff --git a/pkg/cmd/roachprod-microbench/compare.go b/pkg/cmd/roachprod-microbench/compare.go
@@ -31,12 +31,14 @@ import (
 )
 
 type compareConfig struct {
-	newDir       string
-	oldDir       string
-	sheetDesc    string
-	slackUser    string
-	slackChannel string
-	slackToken   string
+	newDir             string
+	oldDir             string
+	sheetDesc          string
+	slackUser          string
+	slackChannel       string
+	slackToken         string
+	threshold          float64
+	publishGoogleSheet bool
 }
 
 type compare struct {
@@ -50,6 +52,7 @@ const (
 	packageSeparator         = "→"
 	slackPercentageThreshold = 20.0
 	slackReportMax           = 3
+	skipComparison           = math.MaxFloat64
 )
 
 const slackCompareTemplateScript = `
@@ -66,17 +69,22 @@ func newCompare(config compareConfig) (*compare, error) {
 		return nil, err
 	}
 	ctx := context.Background()
-	service, err := google.New(ctx)
-	if err != nil {
-		return nil, err
+	var service *google.Service
+	if config.publishGoogleSheet {
+		service, err = google.New(ctx)
+		if err != nil {
+			return nil, err
+		}
 	}
 	return &compare{compareConfig: config, service: service, packages: packages, ctx: ctx}, nil
 }
 
 func defaultCompareConfig() compareConfig {
 	return compareConfig{
-		slackUser:    "microbench",
-		slackChannel: "perf-ops",
+		threshold:          skipComparison, // Skip comparison by default
+		slackUser:          "microbench",
+		slackChannel:       "perf-ops",
+		publishGoogleSheet: true,
 	}
 }
 
@@ -113,16 +121,71 @@ func (c *compare) readMetrics() (map[string]*model.MetricMap, error) {
 	return metricMaps, nil
 }
 
+func (c *compare) createComparisons(
+	metricMaps map[string]*model.MetricMap, oldID string, newID string,
+) model.ComparisonResultsMap {
+
+	comparisonResultsMap := make(model.ComparisonResultsMap)
+
+	for pkgGroup, metricMap := range metricMaps {
+		var comparisonResults []*model.ComparisonResult
+		metricKeys := maps.Keys(*metricMap)
+		sort.Sort(sort.Reverse(sort.StringSlice(metricKeys)))
+		for _, metricKey := range metricKeys {
+			metric := (*metricMap)[metricKey]
+			// Compute comparisons for each benchmark present in both runs.
+			comparisons := make(map[string]*model.Comparison)
+			for name := range metric.BenchmarkEntries {
+				comparison := metric.ComputeComparison(name, oldID, newID)
+				if comparison != nil {
+					comparisons[name] = comparison
+				}
+			}
+
+			if len(comparisons) != 0 {
+				// Sort comparisons by delta, or the benchmark name if no delta is available.
+				keys := maps.Keys(comparisons)
+				sort.Slice(keys, func(i, j int) bool {
+					d1 := comparisons[keys[i]].Delta * float64(metric.Better)
+					d2 := comparisons[keys[j]].Delta * float64(metric.Better)
+					if d1 == d2 {
+						return keys[i] < keys[j]
+					}
+					return d1 < d2
+				})
+
+				var comparisonDetails []*model.ComparisonDetail
+				for _, name := range keys {
+					comparisonDetails = append(comparisonDetails, &model.ComparisonDetail{
+						BenchmarkName: name,
+						Comparison:    comparisons[name],
+					})
+				}
+
+				comparisonResults = append(comparisonResults, &model.ComparisonResult{
+					Metric:      metric,
+					Comparisons: comparisonDetails,
+				})
+			}
+		}
+
+		comparisonResultsMap[pkgGroup] = comparisonResults
+	}
+
+	return comparisonResultsMap
+}
+
 func (c *compare) publishToGoogleSheets(
-	metricMaps map[string]*model.MetricMap,
+	comparisonResultsMap model.ComparisonResultsMap,
 ) (map[string]string, error) {
 	sheets := make(map[string]string)
-	for pkgGroup, metricMap := range metricMaps {
+	for pkgGroup, comparisonResults := range comparisonResultsMap {
 		sheetName := pkgGroup + "/..."
 		if c.sheetDesc != "" {
 			sheetName = fmt.Sprintf("%s (%s)", sheetName, c.sheetDesc)
 		}
-		url, err := c.service.CreateSheet(c.ctx, sheetName, *metricMap, "old", "new")
+
+		url, err := c.service.CreateSheet(c.ctx, sheetName, comparisonResults, "old", "new")
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to create sheet for %s", pkgGroup)
 		}
@@ -133,7 +196,7 @@ func (c *compare) publishToGoogleSheets(
 }
 
 func (c *compare) postToSlack(
-	links map[string]string, metricMaps map[string]*model.MetricMap,
+	links map[string]string, comparisonResultsMap model.ComparisonResultsMap,
 ) error {
 	// Template structures used to generate the Slack message.
 	type changeInfo struct {
@@ -146,58 +209,39 @@ func (c *compare) postToSlack(
 		Changes    []changeInfo
 	}
 
-	pkgGroups := maps.Keys(metricMaps)
+	pkgGroups := maps.Keys(comparisonResultsMap)
 	sort.Strings(pkgGroups)
 	var attachments []slack.Attachment
 	for _, pkgGroup := range pkgGroups {
-		metricMap := metricMaps[pkgGroup]
-		metricKeys := maps.Keys(*metricMap)
-		sort.Sort(sort.Reverse(sort.StringSlice(metricKeys)))
-		metrics := make([]metricInfo, 0)
-		var highestPercentChange = 0.0
-		for _, metricKey := range metricKeys {
-			metric := (*metricMap)[metricKey]
-			mi := metricInfo{MetricName: metric.Name}
-
-			// Compute comparisons for each benchmark present in both runs.
-			comparisons := make(map[string]*model.Comparison)
-			for name := range metric.BenchmarkEntries {
-				comparison := metric.ComputeComparison(name, "old", "new")
-				if comparison != nil {
-					comparisons[name] = comparison
-				}
-			}
+		comparisonResults := comparisonResultsMap[pkgGroup]
 
-			// Sort comparisons by delta, or the benchmark name if no delta is available.
-			keys := maps.Keys(comparisons)
-			sort.Slice(keys, func(i, j int) bool {
-				d1 := comparisons[keys[i]].Delta * float64(metric.Better)
-				d2 := comparisons[keys[j]].Delta * float64(metric.Better)
-				if d1 == d2 {
-					return keys[i] < keys[j]
-				}
-				return d1 < d2
-			})
+		var metrics []metricInfo
+		var highestPercentChange = 0.0
+		for _, result := range comparisonResults {
+			mi := metricInfo{MetricName: result.Metric.Name}
 
-			for _, name := range keys {
+			for _, detail := range result.Comparisons {
 				if len(mi.Changes) >= slackReportMax {
 					break
 				}
-				if (comparisons[name].Delta < 0 && metric.Better < 0) ||
-					(comparisons[name].Delta > 0 && metric.Better > 0) ||
-					comparisons[name].Delta == 0 {
+				comparison := detail.Comparison
+				metric := result.Metric
+
+				if (comparison.Delta < 0 && metric.Better < 0) ||
+					(comparison.Delta > 0 && metric.Better > 0) ||
+					comparison.Delta == 0 {
 					continue
 				}
-				nameSplit := strings.Split(name, packageSeparator)
+				nameSplit := strings.Split(detail.BenchmarkName, packageSeparator)
 				ci := changeInfo{
 					BenchmarkName: nameSplit[0] + packageSeparator + truncateBenchmarkName(nameSplit[1], 32),
-					PercentChange: fmt.Sprintf("%.2f%%", comparisons[name].Delta),
+					PercentChange: fmt.Sprintf("%.2f%%", comparison.Delta),
 				}
-				if math.Abs(comparisons[name].Delta) > highestPercentChange {
-					highestPercentChange = math.Abs(comparisons[name].Delta)
+				if math.Abs(comparison.Delta) > highestPercentChange {
+					highestPercentChange = math.Abs(comparison.Delta)
 				}
 				ci.ChangeSymbol = ":small_orange_diamond:"
-				if math.Abs(comparisons[name].Delta) > slackPercentageThreshold {
+				if math.Abs(comparison.Delta) > slackPercentageThreshold {
 					ci.ChangeSymbol = ":small_red_triangle:"
 				}
 				mi.Changes = append(mi.Changes, ci)
@@ -241,6 +285,39 @@ func (c *compare) postToSlack(
 	)
 }
 
+func (c *compare) compareUsingThreshold(comparisonResultsMap model.ComparisonResultsMap) error {
+	var reportStrings []string
+
+	for pkgName, comparisonResults := range comparisonResultsMap {
+		var metrics []string
+
+		for _, result := range comparisonResults {
+			metricKey := result.Metric.Unit
+
+			for _, detail := range result.Comparisons {
+				comparison := detail.Comparison
+
+				// If Delta is more negative than the threshold, then there's a concerning perf regression
+				if (comparison.Delta*float64(result.Metric.Better))+c.threshold < 0 {
+					metrics = append(metrics, fmt.Sprintf("Metric: %s, Benchmark: %s, Change: %s", metricKey, detail.BenchmarkName, comparison.FormattedDelta))
+				}
+			}
+		}
+
+		if len(metrics) > 0 {
+			reportStrings = append(reportStrings, fmt.Sprintf("Package: %s\n%s", pkgName, strings.Join(metrics, "\n")))
+		}
+	}
+
+	if len(reportStrings) > 0 {
+		reportString := strings.Join(reportStrings, "\n\n")
+		return errors.Errorf("there are benchmark regressions of > %.2f%% in the following packages:\n\n%s",
+			c.threshold*100, reportString)
+	}
+
+	return nil
+}
+
 func processReportFile(builder *model.Builder, id, pkg, path string) error {
 	file, err := os.Open(path)
 	if err != nil {

diff --git a/pkg/cmd/roachprod-microbench/google/service.go b/pkg/cmd/roachprod-microbench/google/service.go
@@ -79,31 +79,23 @@ func (srv *Service) testServices(ctx context.Context) error {
 
 // CreateSheet creates a new Google spreadsheet with the provided metric data.
 func (srv *Service) CreateSheet(
-	ctx context.Context, name string, metricMap model.MetricMap, oldID, newID string,
+	ctx context.Context,
+	name string,
+	comparisonResults []*model.ComparisonResult,
+	oldID, newID string,
 ) (string, error) {
 	var s sheets.Spreadsheet
 	s.Properties = &sheets.SpreadsheetProperties{Title: name}
 
-	// Sort sheets by name in reverse order. This ensures `sec/op` is the first
-	// sheet and metric in the summary.
-	sheetNames := make([]string, 0, len(metricMap))
-	for sheetName := range metricMap {
-		sheetNames = append(sheetNames, sheetName)
-	}
-	sort.Sort(sort.Reverse(sort.StringSlice(sheetNames)))
-
 	// Raw data sheets.
-	sheetInfos, idx := make([]rawSheetInfo, 0, len(metricMap)), 0
-	for _, sheetName := range sheetNames {
-		metric := metricMap[sheetName]
-		// Compute comparisons for each benchmark present in both runs.
+	sheetInfos, idx := make([]rawSheetInfo, 0, len(comparisonResults)), 0
+	for _, result := range comparisonResults {
+		metric := result.Metric
 		comparisons := make(map[string]*model.Comparison)
-		for name := range metric.BenchmarkEntries {
-			comparison := metric.ComputeComparison(name, oldID, newID)
-			if comparison != nil {
-				comparisons[name] = comparison
-			}
+		for _, detail := range result.Comparisons {
+			comparisons[detail.BenchmarkName] = detail.Comparison
 		}
+
 		// Only generate a sheet if there are comparisons to show.
 		if len(comparisons) != 0 {
 			sh, info := srv.createRawSheet(metric, comparisons, oldID, newID, idx)

diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go
@@ -142,16 +142,27 @@ func makeCompareCommand() *cobra.Command {
 			return err
 		}
 
-		links, err := c.publishToGoogleSheets(metricMaps)
-		if err != nil {
-			return err
+		comparisonResult := c.createComparisons(metricMaps, "old", "new")
+
+		var links map[string]string
+		if config.publishGoogleSheet {
+			links, err = c.publishToGoogleSheets(comparisonResult)
+			if err != nil {
+				return err
+			}
 		}
+
 		if config.slackToken != "" {
-			err = c.postToSlack(links, metricMaps)
+			err = c.postToSlack(links, comparisonResult)
 			if err != nil {
 				return err
 			}
 		}
+
+		// if the threshold is set, we want to compare and fail the job in case of perf regressions
+		if config.threshold != skipComparison {
+			return c.compareUsingThreshold(comparisonResult)
+		}
 		return nil
 	}
 
@@ -166,6 +177,8 @@ func makeCompareCommand() *cobra.Command {
 	cmd.Flags().StringVar(&config.slackToken, "slack-token", config.slackToken, "pass a slack token to post the results to a slack channel")
 	cmd.Flags().StringVar(&config.slackUser, "slack-user", config.slackUser, "slack user to post the results as")
 	cmd.Flags().StringVar(&config.slackChannel, "slack-channel", config.slackChannel, "slack channel to post the results to")
+	cmd.Flags().Float64Var(&config.threshold, "threshold", config.threshold, "threshold in percentage value for detecting perf regression ")
+	cmd.Flags().BoolVar(&config.publishGoogleSheet, "publish-sheets", config.publishGoogleSheet, "flag to make the command create a google sheet of the benchmark results and publish it")
 	return cmd
 }
 

diff --git a/pkg/cmd/roachprod-microbench/model/metric.go b/pkg/cmd/roachprod-microbench/model/metric.go
@@ -48,3 +48,18 @@ type Comparison struct {
 	Delta          float64
 	FormattedDelta string
 }
+
+// ComparisonResult holds the comparison results for a specific metric.
+type ComparisonResult struct {
+	Metric      *Metric
+	Comparisons []*ComparisonDetail
+}
+
+// ComparisonDetail holds the details of a single comparison.
+type ComparisonDetail struct {
+	BenchmarkName string
+	Comparison    *Comparison
+}
+
+// ComparisonResultsMap holds the comparison results for all package groups.
+type ComparisonResultsMap map[string][]*ComparisonResult