Skip to content

Commit

Permalink
roachprod-microbench: add threshold flag to compare command
Browse files Browse the repository at this point in the history
Epic: none

Release note: None

As part of fixing cockroachdb#106661, we need to run microbenchmarks in a CI job and compare the microbenchmarks with that of the latest master branch commit.

For this purpose we want to re-use `roachprod-microbench` compare command. However, as of current implementation of the compare command, it doesn't do threshold comparison and this PR aims to add that capability using a flag.

This change also adds a flag `--publish-sheets` to optionally allow publishing google sheets or not.

It is a follow up change to cockroachdb#126810 which exposed `roachprod-microbench` capability of cleaning the benchmark output.
  • Loading branch information
sambhav-jain-16 committed Jul 17, 2024
1 parent d51907a commit 39c1a67
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 74 deletions.
179 changes: 127 additions & 52 deletions pkg/cmd/roachprod-microbench/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ import (
)

type compareConfig struct {
newDir string
oldDir string
sheetDesc string
slackUser string
slackChannel string
slackToken string
newDir string
oldDir string
sheetDesc string
slackUser string
slackChannel string
slackToken string
threshold float64
publishGoogleSheet bool
}

type compare struct {
Expand All @@ -50,6 +52,7 @@ const (
packageSeparator = "→"
slackPercentageThreshold = 20.0
slackReportMax = 3
skipComparison = math.MaxFloat64
)

const slackCompareTemplateScript = `
Expand All @@ -66,17 +69,22 @@ func newCompare(config compareConfig) (*compare, error) {
return nil, err
}
ctx := context.Background()
service, err := google.New(ctx)
if err != nil {
return nil, err
var service *google.Service
if config.publishGoogleSheet {
service, err = google.New(ctx)
if err != nil {
return nil, err
}
}
return &compare{compareConfig: config, service: service, packages: packages, ctx: ctx}, nil
}

func defaultCompareConfig() compareConfig {
return compareConfig{
slackUser: "microbench",
slackChannel: "perf-ops",
threshold: skipComparison, // Skip comparison by default
slackUser: "microbench",
slackChannel: "perf-ops",
publishGoogleSheet: true,
}
}

Expand Down Expand Up @@ -113,16 +121,69 @@ func (c *compare) readMetrics() (map[string]*model.MetricMap, error) {
return metricMaps, nil
}

func (c *compare) createComparisons(
metricMaps map[string]*model.MetricMap, oldID string, newID string,
) model.ComparisonResultsMap {

comparisonResultsMap := make(model.ComparisonResultsMap)

for pkgGroup, metricMap := range metricMaps {
var comparisonResults []*model.ComparisonResult

for _, metric := range *metricMap {
// Compute comparisons for each benchmark present in both runs.
comparisons := make(map[string]*model.Comparison)
for name := range metric.BenchmarkEntries {
comparison := metric.ComputeComparison(name, oldID, newID)
if comparison != nil {
comparisons[name] = comparison
}
}

if len(comparisons) != 0 {
// Sort comparisons by delta, or the benchmark name if no delta is available.
keys := maps.Keys(comparisons)
sort.Slice(keys, func(i, j int) bool {
d1 := comparisons[keys[i]].Delta * float64(metric.Better)
d2 := comparisons[keys[j]].Delta * float64(metric.Better)
if d1 == d2 {
return keys[i] < keys[j]
}
return d1 < d2
})

var comparisonDetails []*model.ComparisonDetail
for _, name := range keys {
comparisonDetails = append(comparisonDetails, &model.ComparisonDetail{
BenchmarkName: name,
Comparison: comparisons[name],
})
}

comparisonResults = append(comparisonResults, &model.ComparisonResult{
Metric: metric,
Comparisons: comparisonDetails,
})
}
}

comparisonResultsMap[pkgGroup] = comparisonResults
}

return comparisonResultsMap
}

func (c *compare) publishToGoogleSheets(
metricMaps map[string]*model.MetricMap,
comparisonResultsMap model.ComparisonResultsMap,
) (map[string]string, error) {
sheets := make(map[string]string)
for pkgGroup, metricMap := range metricMaps {
for pkgGroup, comparisonResults := range comparisonResultsMap {
sheetName := pkgGroup + "/..."
if c.sheetDesc != "" {
sheetName = fmt.Sprintf("%s (%s)", sheetName, c.sheetDesc)
}
url, err := c.service.CreateSheet(c.ctx, sheetName, *metricMap, "old", "new")

url, err := c.service.CreateSheet(c.ctx, sheetName, comparisonResults, "old", "new")
if err != nil {
return nil, errors.Wrapf(err, "failed to create sheet for %s", pkgGroup)
}
Expand All @@ -133,7 +194,7 @@ func (c *compare) publishToGoogleSheets(
}

func (c *compare) postToSlack(
links map[string]string, metricMaps map[string]*model.MetricMap,
links map[string]string, comparisonResultsMap model.ComparisonResultsMap,
) error {
// Template structures used to generate the Slack message.
type changeInfo struct {
Expand All @@ -146,58 +207,39 @@ func (c *compare) postToSlack(
Changes []changeInfo
}

pkgGroups := maps.Keys(metricMaps)
pkgGroups := maps.Keys(comparisonResultsMap)
sort.Strings(pkgGroups)
var attachments []slack.Attachment
for _, pkgGroup := range pkgGroups {
metricMap := metricMaps[pkgGroup]
metricKeys := maps.Keys(*metricMap)
sort.Sort(sort.Reverse(sort.StringSlice(metricKeys)))
metrics := make([]metricInfo, 0)
var highestPercentChange = 0.0
for _, metricKey := range metricKeys {
metric := (*metricMap)[metricKey]
mi := metricInfo{MetricName: metric.Name}
comparisonResults := comparisonResultsMap[pkgGroup]

// Compute comparisons for each benchmark present in both runs.
comparisons := make(map[string]*model.Comparison)
for name := range metric.BenchmarkEntries {
comparison := metric.ComputeComparison(name, "old", "new")
if comparison != nil {
comparisons[name] = comparison
}
}

// Sort comparisons by delta, or the benchmark name if no delta is available.
keys := maps.Keys(comparisons)
sort.Slice(keys, func(i, j int) bool {
d1 := comparisons[keys[i]].Delta * float64(metric.Better)
d2 := comparisons[keys[j]].Delta * float64(metric.Better)
if d1 == d2 {
return keys[i] < keys[j]
}
return d1 < d2
})
var metrics []metricInfo
var highestPercentChange = 0.0
for _, result := range comparisonResults {
mi := metricInfo{MetricName: result.Metric.Name}

for _, name := range keys {
for _, detail := range result.Comparisons {
if len(mi.Changes) >= slackReportMax {
break
}
if (comparisons[name].Delta < 0 && metric.Better < 0) ||
(comparisons[name].Delta > 0 && metric.Better > 0) ||
comparisons[name].Delta == 0 {
comparison := detail.Comparison
metric := result.Metric

if (comparison.Delta < 0 && metric.Better < 0) ||
(comparison.Delta > 0 && metric.Better > 0) ||
comparison.Delta == 0 {
continue
}
nameSplit := strings.Split(name, packageSeparator)
nameSplit := strings.Split(detail.BenchmarkName, packageSeparator)
ci := changeInfo{
BenchmarkName: nameSplit[0] + packageSeparator + truncateBenchmarkName(nameSplit[1], 32),
PercentChange: fmt.Sprintf("%.2f%%", comparisons[name].Delta),
PercentChange: fmt.Sprintf("%.2f%%", comparison.Delta),
}
if math.Abs(comparisons[name].Delta) > highestPercentChange {
highestPercentChange = math.Abs(comparisons[name].Delta)
if math.Abs(comparison.Delta) > highestPercentChange {
highestPercentChange = math.Abs(comparison.Delta)
}
ci.ChangeSymbol = ":small_orange_diamond:"
if math.Abs(comparisons[name].Delta) > slackPercentageThreshold {
if math.Abs(comparison.Delta) > slackPercentageThreshold {
ci.ChangeSymbol = ":small_red_triangle:"
}
mi.Changes = append(mi.Changes, ci)
Expand Down Expand Up @@ -241,6 +283,39 @@ func (c *compare) postToSlack(
)
}

func (c *compare) compareUsingThreshold(comparisonResultsMap model.ComparisonResultsMap) error {
var reportStrings []string

for pkgName, comparisonResults := range comparisonResultsMap {
var metrics []string

for _, result := range comparisonResults {
metricKey := result.Metric.Unit

for _, detail := range result.Comparisons {
comparison := detail.Comparison

// If Delta is more negative than the threshold, then there's a concerning perf regression
if (comparison.Delta*float64(result.Metric.Better))+c.threshold < 0 {
metrics = append(metrics, fmt.Sprintf("Metric: %s, Benchmark: %s, Change: %s", metricKey, detail.BenchmarkName, comparison.FormattedDelta))
}
}
}

if len(metrics) > 0 {
reportStrings = append(reportStrings, fmt.Sprintf("Package: %s\n%s", pkgName, strings.Join(metrics, "\n")))
}
}

if len(reportStrings) > 0 {
reportString := strings.Join(reportStrings, "\n\n")
return errors.Errorf("there are benchmark regressions of > %.2f%% in the following packages:\n\n%s",
c.threshold*100, reportString)
}

return nil
}

func processReportFile(builder *model.Builder, id, pkg, path string) error {
file, err := os.Open(path)
if err != nil {
Expand Down
28 changes: 10 additions & 18 deletions pkg/cmd/roachprod-microbench/google/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,31 +79,23 @@ func (srv *Service) testServices(ctx context.Context) error {

// CreateSheet creates a new Google spreadsheet with the provided metric data.
func (srv *Service) CreateSheet(
ctx context.Context, name string, metricMap model.MetricMap, oldID, newID string,
ctx context.Context,
name string,
comparisonResults []*model.ComparisonResult,
oldID, newID string,
) (string, error) {
var s sheets.Spreadsheet
s.Properties = &sheets.SpreadsheetProperties{Title: name}

// Sort sheets by name in reverse order. This ensures `sec/op` is the first
// sheet and metric in the summary.
sheetNames := make([]string, 0, len(metricMap))
for sheetName := range metricMap {
sheetNames = append(sheetNames, sheetName)
}
sort.Sort(sort.Reverse(sort.StringSlice(sheetNames)))

// Raw data sheets.
sheetInfos, idx := make([]rawSheetInfo, 0, len(metricMap)), 0
for _, sheetName := range sheetNames {
metric := metricMap[sheetName]
// Compute comparisons for each benchmark present in both runs.
sheetInfos, idx := make([]rawSheetInfo, 0, len(comparisonResults)), 0
for _, result := range comparisonResults {
metric := result.Metric
comparisons := make(map[string]*model.Comparison)
for name := range metric.BenchmarkEntries {
comparison := metric.ComputeComparison(name, oldID, newID)
if comparison != nil {
comparisons[name] = comparison
}
for _, detail := range result.Comparisons {
comparisons[detail.BenchmarkName] = detail.Comparison
}

// Only generate a sheet if there are comparisons to show.
if len(comparisons) != 0 {
sh, info := srv.createRawSheet(metric, comparisons, oldID, newID, idx)
Expand Down
21 changes: 17 additions & 4 deletions pkg/cmd/roachprod-microbench/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,27 @@ func makeCompareCommand() *cobra.Command {
return err
}

links, err := c.publishToGoogleSheets(metricMaps)
if err != nil {
return err
comparisonResult := c.createComparisons(metricMaps, "old", "new")

var links map[string]string
if config.publishGoogleSheet {
links, err = c.publishToGoogleSheets(comparisonResult)
if err != nil {
return err
}
}

if config.slackToken != "" {
err = c.postToSlack(links, metricMaps)
err = c.postToSlack(links, comparisonResult)
if err != nil {
return err
}
}

// if the threshold is set, we want to compare and fail the job in case of perf regressions
if config.threshold != skipComparison {
return c.compareUsingThreshold(comparisonResult)
}
return nil
}

Expand All @@ -166,6 +177,8 @@ func makeCompareCommand() *cobra.Command {
cmd.Flags().StringVar(&config.slackToken, "slack-token", config.slackToken, "pass a slack token to post the results to a slack channel")
cmd.Flags().StringVar(&config.slackUser, "slack-user", config.slackUser, "slack user to post the results as")
cmd.Flags().StringVar(&config.slackChannel, "slack-channel", config.slackChannel, "slack channel to post the results to")
cmd.Flags().Float64Var(&config.threshold, "threshold", config.threshold, "threshold in percentage value for detecting perf regression ")
cmd.Flags().BoolVar(&config.publishGoogleSheet, "publish-sheets", config.publishGoogleSheet, "flag to make the command create a google sheet of the benchmark results and publish it")
return cmd
}

Expand Down
15 changes: 15 additions & 0 deletions pkg/cmd/roachprod-microbench/model/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,18 @@ type Comparison struct {
Delta float64
FormattedDelta string
}

// ComparisonResult holds the comparison results for a specific metric.
type ComparisonResult struct {
Metric *Metric
Comparisons []*ComparisonDetail
}

// ComparisonDetail holds the details of a single comparison.
type ComparisonDetail struct {
BenchmarkName string
Comparison *Comparison
}

// ComparisonResultsMap holds the comparison results for all package groups.
type ComparisonResultsMap map[string][]*ComparisonResult

0 comments on commit 39c1a67

Please sign in to comment.