From 39c1a677b8d4f015c3f5cacec64eb6da2200b14c Mon Sep 17 00:00:00 2001 From: Sambhav Jain Date: Mon, 8 Jul 2024 16:17:55 +0530 Subject: [PATCH] roachprod-microbench: add threshold flag to compare command Epic: none Release note: None As part of fixing https://github.com/cockroachdb/cockroach/issues/106661, we need to run microbenchmarks in a CI job and compare the microbenchmarks with that of the latest master branch commit. For this purpose we want to re-use `roachprod-microbench` compare command. However, as of current implementation of the compare command, it doesn't do threshold comparison and this PR aims to add that capability using a flag. This change also adds a flag `--publish-sheets` to optionally allow publishing google sheets or not. It is a follow up change to https://github.com/cockroachdb/cockroach/pull/126810 which exposed `roachprod-microbench` capability of cleaning the benchmark output. --- pkg/cmd/roachprod-microbench/compare.go | 179 +++++++++++++----- .../roachprod-microbench/google/service.go | 28 +-- pkg/cmd/roachprod-microbench/main.go | 21 +- pkg/cmd/roachprod-microbench/model/metric.go | 15 ++ 4 files changed, 169 insertions(+), 74 deletions(-) diff --git a/pkg/cmd/roachprod-microbench/compare.go b/pkg/cmd/roachprod-microbench/compare.go index ec330b9abefa..9efb03bc3546 100644 --- a/pkg/cmd/roachprod-microbench/compare.go +++ b/pkg/cmd/roachprod-microbench/compare.go @@ -31,12 +31,14 @@ import ( ) type compareConfig struct { - newDir string - oldDir string - sheetDesc string - slackUser string - slackChannel string - slackToken string + newDir string + oldDir string + sheetDesc string + slackUser string + slackChannel string + slackToken string + threshold float64 + publishGoogleSheet bool } type compare struct { @@ -50,6 +52,7 @@ const ( packageSeparator = "→" slackPercentageThreshold = 20.0 slackReportMax = 3 + skipComparison = math.MaxFloat64 ) const slackCompareTemplateScript = ` @@ -66,17 +69,22 @@ func newCompare(config compareConfig) (*compare, error) { return nil, err } ctx := context.Background() - service, err := google.New(ctx) - if err != nil { - return nil, err + var service *google.Service + if config.publishGoogleSheet { + service, err = google.New(ctx) + if err != nil { + return nil, err + } } return &compare{compareConfig: config, service: service, packages: packages, ctx: ctx}, nil } func defaultCompareConfig() compareConfig { return compareConfig{ - slackUser: "microbench", - slackChannel: "perf-ops", + threshold: skipComparison, // Skip comparison by default + slackUser: "microbench", + slackChannel: "perf-ops", + publishGoogleSheet: true, } } @@ -113,16 +121,69 @@ func (c *compare) readMetrics() (map[string]*model.MetricMap, error) { return metricMaps, nil } +func (c *compare) createComparisons( + metricMaps map[string]*model.MetricMap, oldID string, newID string, +) model.ComparisonResultsMap { + + comparisonResultsMap := make(model.ComparisonResultsMap) + + for pkgGroup, metricMap := range metricMaps { + var comparisonResults []*model.ComparisonResult + + for _, metric := range *metricMap { + // Compute comparisons for each benchmark present in both runs. + comparisons := make(map[string]*model.Comparison) + for name := range metric.BenchmarkEntries { + comparison := metric.ComputeComparison(name, oldID, newID) + if comparison != nil { + comparisons[name] = comparison + } + } + + if len(comparisons) != 0 { + // Sort comparisons by delta, or the benchmark name if no delta is available. + keys := maps.Keys(comparisons) + sort.Slice(keys, func(i, j int) bool { + d1 := comparisons[keys[i]].Delta * float64(metric.Better) + d2 := comparisons[keys[j]].Delta * float64(metric.Better) + if d1 == d2 { + return keys[i] < keys[j] + } + return d1 < d2 + }) + + var comparisonDetails []*model.ComparisonDetail + for _, name := range keys { + comparisonDetails = append(comparisonDetails, &model.ComparisonDetail{ + BenchmarkName: name, + Comparison: comparisons[name], + }) + } + + comparisonResults = append(comparisonResults, &model.ComparisonResult{ + Metric: metric, + Comparisons: comparisonDetails, + }) + } + } + + comparisonResultsMap[pkgGroup] = comparisonResults + } + + return comparisonResultsMap +} + func (c *compare) publishToGoogleSheets( - metricMaps map[string]*model.MetricMap, + comparisonResultsMap model.ComparisonResultsMap, ) (map[string]string, error) { sheets := make(map[string]string) - for pkgGroup, metricMap := range metricMaps { + for pkgGroup, comparisonResults := range comparisonResultsMap { sheetName := pkgGroup + "/..." if c.sheetDesc != "" { sheetName = fmt.Sprintf("%s (%s)", sheetName, c.sheetDesc) } - url, err := c.service.CreateSheet(c.ctx, sheetName, *metricMap, "old", "new") + + url, err := c.service.CreateSheet(c.ctx, sheetName, comparisonResults, "old", "new") if err != nil { return nil, errors.Wrapf(err, "failed to create sheet for %s", pkgGroup) } @@ -133,7 +194,7 @@ func (c *compare) publishToGoogleSheets( } func (c *compare) postToSlack( - links map[string]string, metricMaps map[string]*model.MetricMap, + links map[string]string, comparisonResultsMap model.ComparisonResultsMap, ) error { // Template structures used to generate the Slack message. type changeInfo struct { @@ -146,58 +207,39 @@ func (c *compare) postToSlack( Changes []changeInfo } - pkgGroups := maps.Keys(metricMaps) + pkgGroups := maps.Keys(comparisonResultsMap) sort.Strings(pkgGroups) var attachments []slack.Attachment for _, pkgGroup := range pkgGroups { - metricMap := metricMaps[pkgGroup] - metricKeys := maps.Keys(*metricMap) - sort.Sort(sort.Reverse(sort.StringSlice(metricKeys))) - metrics := make([]metricInfo, 0) - var highestPercentChange = 0.0 - for _, metricKey := range metricKeys { - metric := (*metricMap)[metricKey] - mi := metricInfo{MetricName: metric.Name} + comparisonResults := comparisonResultsMap[pkgGroup] - // Compute comparisons for each benchmark present in both runs. - comparisons := make(map[string]*model.Comparison) - for name := range metric.BenchmarkEntries { - comparison := metric.ComputeComparison(name, "old", "new") - if comparison != nil { - comparisons[name] = comparison - } - } - - // Sort comparisons by delta, or the benchmark name if no delta is available. - keys := maps.Keys(comparisons) - sort.Slice(keys, func(i, j int) bool { - d1 := comparisons[keys[i]].Delta * float64(metric.Better) - d2 := comparisons[keys[j]].Delta * float64(metric.Better) - if d1 == d2 { - return keys[i] < keys[j] - } - return d1 < d2 - }) + var metrics []metricInfo + var highestPercentChange = 0.0 + for _, result := range comparisonResults { + mi := metricInfo{MetricName: result.Metric.Name} - for _, name := range keys { + for _, detail := range result.Comparisons { if len(mi.Changes) >= slackReportMax { break } - if (comparisons[name].Delta < 0 && metric.Better < 0) || - (comparisons[name].Delta > 0 && metric.Better > 0) || - comparisons[name].Delta == 0 { + comparison := detail.Comparison + metric := result.Metric + + if (comparison.Delta < 0 && metric.Better < 0) || + (comparison.Delta > 0 && metric.Better > 0) || + comparison.Delta == 0 { continue } - nameSplit := strings.Split(name, packageSeparator) + nameSplit := strings.Split(detail.BenchmarkName, packageSeparator) ci := changeInfo{ BenchmarkName: nameSplit[0] + packageSeparator + truncateBenchmarkName(nameSplit[1], 32), - PercentChange: fmt.Sprintf("%.2f%%", comparisons[name].Delta), + PercentChange: fmt.Sprintf("%.2f%%", comparison.Delta), } - if math.Abs(comparisons[name].Delta) > highestPercentChange { - highestPercentChange = math.Abs(comparisons[name].Delta) + if math.Abs(comparison.Delta) > highestPercentChange { + highestPercentChange = math.Abs(comparison.Delta) } ci.ChangeSymbol = ":small_orange_diamond:" - if math.Abs(comparisons[name].Delta) > slackPercentageThreshold { + if math.Abs(comparison.Delta) > slackPercentageThreshold { ci.ChangeSymbol = ":small_red_triangle:" } mi.Changes = append(mi.Changes, ci) @@ -241,6 +283,39 @@ func (c *compare) postToSlack( ) } +func (c *compare) compareUsingThreshold(comparisonResultsMap model.ComparisonResultsMap) error { + var reportStrings []string + + for pkgName, comparisonResults := range comparisonResultsMap { + var metrics []string + + for _, result := range comparisonResults { + metricKey := result.Metric.Unit + + for _, detail := range result.Comparisons { + comparison := detail.Comparison + + // If Delta is more negative than the threshold, then there's a concerning perf regression + if (comparison.Delta*float64(result.Metric.Better))+c.threshold < 0 { + metrics = append(metrics, fmt.Sprintf("Metric: %s, Benchmark: %s, Change: %s", metricKey, detail.BenchmarkName, comparison.FormattedDelta)) + } + } + } + + if len(metrics) > 0 { + reportStrings = append(reportStrings, fmt.Sprintf("Package: %s\n%s", pkgName, strings.Join(metrics, "\n"))) + } + } + + if len(reportStrings) > 0 { + reportString := strings.Join(reportStrings, "\n\n") + return errors.Errorf("there are benchmark regressions of > %.2f%% in the following packages:\n\n%s", + c.threshold*100, reportString) + } + + return nil +} + func processReportFile(builder *model.Builder, id, pkg, path string) error { file, err := os.Open(path) if err != nil { diff --git a/pkg/cmd/roachprod-microbench/google/service.go b/pkg/cmd/roachprod-microbench/google/service.go index 2fc61f006730..ea9c34e3646c 100644 --- a/pkg/cmd/roachprod-microbench/google/service.go +++ b/pkg/cmd/roachprod-microbench/google/service.go @@ -79,31 +79,23 @@ func (srv *Service) testServices(ctx context.Context) error { // CreateSheet creates a new Google spreadsheet with the provided metric data. func (srv *Service) CreateSheet( - ctx context.Context, name string, metricMap model.MetricMap, oldID, newID string, + ctx context.Context, + name string, + comparisonResults []*model.ComparisonResult, + oldID, newID string, ) (string, error) { var s sheets.Spreadsheet s.Properties = &sheets.SpreadsheetProperties{Title: name} - // Sort sheets by name in reverse order. This ensures `sec/op` is the first - // sheet and metric in the summary. - sheetNames := make([]string, 0, len(metricMap)) - for sheetName := range metricMap { - sheetNames = append(sheetNames, sheetName) - } - sort.Sort(sort.Reverse(sort.StringSlice(sheetNames))) - // Raw data sheets. - sheetInfos, idx := make([]rawSheetInfo, 0, len(metricMap)), 0 - for _, sheetName := range sheetNames { - metric := metricMap[sheetName] - // Compute comparisons for each benchmark present in both runs. + sheetInfos, idx := make([]rawSheetInfo, 0, len(comparisonResults)), 0 + for _, result := range comparisonResults { + metric := result.Metric comparisons := make(map[string]*model.Comparison) - for name := range metric.BenchmarkEntries { - comparison := metric.ComputeComparison(name, oldID, newID) - if comparison != nil { - comparisons[name] = comparison - } + for _, detail := range result.Comparisons { + comparisons[detail.BenchmarkName] = detail.Comparison } + // Only generate a sheet if there are comparisons to show. if len(comparisons) != 0 { sh, info := srv.createRawSheet(metric, comparisons, oldID, newID, idx) diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go index 1c050c7b5c6d..2e0d67498fb8 100644 --- a/pkg/cmd/roachprod-microbench/main.go +++ b/pkg/cmd/roachprod-microbench/main.go @@ -142,16 +142,27 @@ func makeCompareCommand() *cobra.Command { return err } - links, err := c.publishToGoogleSheets(metricMaps) - if err != nil { - return err + comparisonResult := c.createComparisons(metricMaps, "old", "new") + + var links map[string]string + if config.publishGoogleSheet { + links, err = c.publishToGoogleSheets(comparisonResult) + if err != nil { + return err + } } + if config.slackToken != "" { - err = c.postToSlack(links, metricMaps) + err = c.postToSlack(links, comparisonResult) if err != nil { return err } } + + // if the threshold is set, we want to compare and fail the job in case of perf regressions + if config.threshold != skipComparison { + return c.compareUsingThreshold(comparisonResult) + } return nil } @@ -166,6 +177,8 @@ func makeCompareCommand() *cobra.Command { cmd.Flags().StringVar(&config.slackToken, "slack-token", config.slackToken, "pass a slack token to post the results to a slack channel") cmd.Flags().StringVar(&config.slackUser, "slack-user", config.slackUser, "slack user to post the results as") cmd.Flags().StringVar(&config.slackChannel, "slack-channel", config.slackChannel, "slack channel to post the results to") + cmd.Flags().Float64Var(&config.threshold, "threshold", config.threshold, "threshold in percentage value for detecting perf regression ") + cmd.Flags().BoolVar(&config.publishGoogleSheet, "publish-sheets", config.publishGoogleSheet, "flag to make the command create a google sheet of the benchmark results and publish it") return cmd } diff --git a/pkg/cmd/roachprod-microbench/model/metric.go b/pkg/cmd/roachprod-microbench/model/metric.go index 1760c1baf5d6..8440c44185a9 100644 --- a/pkg/cmd/roachprod-microbench/model/metric.go +++ b/pkg/cmd/roachprod-microbench/model/metric.go @@ -48,3 +48,18 @@ type Comparison struct { Delta float64 FormattedDelta string } + +// ComparisonResult holds the comparison results for a specific metric. +type ComparisonResult struct { + Metric *Metric + Comparisons []*ComparisonDetail +} + +// ComparisonDetail holds the details of a single comparison. +type ComparisonDetail struct { + BenchmarkName string + Comparison *Comparison +} + +// ComparisonResultsMap holds the comparison results for all package groups. +type ComparisonResultsMap map[string][]*ComparisonResult