diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 52b1f4e792f2..a78e6c540a6e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -341,6 +341,7 @@ /pkg/cmd/returncheck/ @cockroachdb/dev-inf /pkg/cmd/roachprod/ @cockroachdb/test-eng /pkg/cmd/roachprod/vm/azure/auth.go @cockroachdb/test-eng @cockroachdb/prodsec +/pkg/cmd/roachprod-microbench/ @cockroachdb/test-eng /pkg/cmd/roachprod-stress/ @cockroachdb/test-eng /pkg/cmd/roachtest/ @cockroachdb/test-eng /pkg/cmd/label-merged-pr/ @cockroachdb/dev-inf diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 315cd3fac5d7..bfb75fcfec9c 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -134,6 +134,7 @@ ALL_TESTS = [ "//pkg/cmd/reduce/reduce/reducesql:reducesql_test", "//pkg/cmd/reduce/reduce:reduce_test", "//pkg/cmd/release:release_test", + "//pkg/cmd/roachprod-microbench:roachprod-microbench_test", "//pkg/cmd/roachtest/clusterstats:clusterstats_test", "//pkg/cmd/roachtest/tests:tests_test", "//pkg/cmd/roachtest:roachtest_test", @@ -993,6 +994,11 @@ GO_TARGETS = [ "//pkg/cmd/release:release_test", "//pkg/cmd/returncheck:returncheck", "//pkg/cmd/returncheck:returncheck_lib", + "//pkg/cmd/roachprod-microbench/cluster:cluster", + "//pkg/cmd/roachprod-microbench/google:google", + "//pkg/cmd/roachprod-microbench:roachprod-microbench", + "//pkg/cmd/roachprod-microbench:roachprod-microbench_lib", + "//pkg/cmd/roachprod-microbench:roachprod-microbench_test", "//pkg/cmd/roachprod-stress:roachprod-stress", "//pkg/cmd/roachprod-stress:roachprod-stress_lib", "//pkg/cmd/roachprod:roachprod", @@ -2435,6 +2441,9 @@ GET_X_DATA_TARGETS = [ "//pkg/cmd/release:get_x_data", "//pkg/cmd/returncheck:get_x_data", "//pkg/cmd/roachprod:get_x_data", + "//pkg/cmd/roachprod-microbench:get_x_data", + "//pkg/cmd/roachprod-microbench/cluster:get_x_data", + "//pkg/cmd/roachprod-microbench/google:get_x_data", "//pkg/cmd/roachprod-stress:get_x_data", "//pkg/cmd/roachtest:get_x_data", "//pkg/cmd/roachtest/cluster:get_x_data", diff --git a/pkg/cmd/dev/build.go b/pkg/cmd/dev/build.go index 23f9938a79dd..75a5defb9d8e 100644 --- a/pkg/cmd/dev/build.go +++ b/pkg/cmd/dev/build.go @@ -73,41 +73,42 @@ func makeBuildCmd(runE func(cmd *cobra.Command, args []string) error) *cobra.Com // buildTargetMapping maintains shorthands that map 1:1 with bazel targets. var buildTargetMapping = map[string]string{ - "bazel-remote": bazelRemoteTarget, - "buildifier": "@com_github_bazelbuild_buildtools//buildifier:buildifier", - "buildozer": "@com_github_bazelbuild_buildtools//buildozer:buildozer", - "cockroach": "//pkg/cmd/cockroach:cockroach", - "cockroach-sql": "//pkg/cmd/cockroach-sql:cockroach-sql", - "cockroach-oss": "//pkg/cmd/cockroach-oss:cockroach-oss", - "cockroach-short": "//pkg/cmd/cockroach-short:cockroach-short", - "crlfmt": "@com_github_cockroachdb_crlfmt//:crlfmt", - "dev": devTarget, - "docgen": "//pkg/cmd/docgen:docgen", - "docs-issue-gen": "//pkg/cmd/docs-issue-generation:docs-issue-generation", - "execgen": "//pkg/sql/colexec/execgen/cmd/execgen:execgen", - "gofmt": "@com_github_cockroachdb_gostdlib//cmd/gofmt:gofmt", - "goimports": "@com_github_cockroachdb_gostdlib//x/tools/cmd/goimports:goimports", - "label-merged-pr": "//pkg/cmd/label-merged-pr:label-merged-pr", - "geos": geosTarget, - "langgen": "//pkg/sql/opt/optgen/cmd/langgen:langgen", - "libgeos": geosTarget, - "obsservice": "//pkg/obsservice/cmd/obsservice:obsservice", - "optgen": "//pkg/sql/opt/optgen/cmd/optgen:optgen", - "optfmt": "//pkg/sql/opt/optgen/cmd/optfmt:optfmt", - "oss": "//pkg/cmd/cockroach-oss:cockroach-oss", - "reduce": "//pkg/cmd/reduce:reduce", - "roachprod": "//pkg/cmd/roachprod:roachprod", - "roachprod-stress": "//pkg/cmd/roachprod-stress:roachprod-stress", - "roachtest": "//pkg/cmd/roachtest:roachtest", - "short": "//pkg/cmd/cockroach-short:cockroach-short", - "smith": "//pkg/cmd/smith:smith", - "smithcmp": "//pkg/cmd/smithcmp:smithcmp", - "smithtest": "//pkg/cmd/smithtest:smithtest", - "staticcheck": "@co_honnef_go_tools//cmd/staticcheck:staticcheck", - "stress": stressTarget, - "swagger": "@com_github_go_swagger_go_swagger//cmd/swagger:swagger", - "tests": "//pkg:all_tests", - "workload": "//pkg/cmd/workload:workload", + "bazel-remote": bazelRemoteTarget, + "buildifier": "@com_github_bazelbuild_buildtools//buildifier:buildifier", + "buildozer": "@com_github_bazelbuild_buildtools//buildozer:buildozer", + "cockroach": "//pkg/cmd/cockroach:cockroach", + "cockroach-sql": "//pkg/cmd/cockroach-sql:cockroach-sql", + "cockroach-oss": "//pkg/cmd/cockroach-oss:cockroach-oss", + "cockroach-short": "//pkg/cmd/cockroach-short:cockroach-short", + "crlfmt": "@com_github_cockroachdb_crlfmt//:crlfmt", + "dev": devTarget, + "docgen": "//pkg/cmd/docgen:docgen", + "docs-issue-gen": "//pkg/cmd/docs-issue-generation:docs-issue-generation", + "execgen": "//pkg/sql/colexec/execgen/cmd/execgen:execgen", + "gofmt": "@com_github_cockroachdb_gostdlib//cmd/gofmt:gofmt", + "goimports": "@com_github_cockroachdb_gostdlib//x/tools/cmd/goimports:goimports", + "label-merged-pr": "//pkg/cmd/label-merged-pr:label-merged-pr", + "geos": geosTarget, + "langgen": "//pkg/sql/opt/optgen/cmd/langgen:langgen", + "libgeos": geosTarget, + "obsservice": "//pkg/obsservice/cmd/obsservice:obsservice", + "optgen": "//pkg/sql/opt/optgen/cmd/optgen:optgen", + "optfmt": "//pkg/sql/opt/optgen/cmd/optfmt:optfmt", + "oss": "//pkg/cmd/cockroach-oss:cockroach-oss", + "reduce": "//pkg/cmd/reduce:reduce", + "roachprod": "//pkg/cmd/roachprod:roachprod", + "roachprod-stress": "//pkg/cmd/roachprod-stress:roachprod-stress", + "roachprod-microbench": "//pkg/cmd/roachprod-microbench:roachprod-microbench", + "roachtest": "//pkg/cmd/roachtest:roachtest", + "short": "//pkg/cmd/cockroach-short:cockroach-short", + "smith": "//pkg/cmd/smith:smith", + "smithcmp": "//pkg/cmd/smithcmp:smithcmp", + "smithtest": "//pkg/cmd/smithtest:smithtest", + "staticcheck": "@co_honnef_go_tools//cmd/staticcheck:staticcheck", + "stress": stressTarget, + "swagger": "@com_github_go_swagger_go_swagger//cmd/swagger:swagger", + "tests": "//pkg:all_tests", + "workload": "//pkg/cmd/workload:workload", } // allBuildTargets is a sorted list of all the available build targets. diff --git a/pkg/cmd/roachprod-microbench/BUILD.bazel b/pkg/cmd/roachprod-microbench/BUILD.bazel new file mode 100644 index 000000000000..aa5e8175200b --- /dev/null +++ b/pkg/cmd/roachprod-microbench/BUILD.bazel @@ -0,0 +1,51 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +go_library( + name = "roachprod-microbench_lib", + srcs = [ + "benchmark.go", + "compare.go", + "main.go", + "metadata.go", + "report.go", + "storage.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench", + visibility = ["//visibility:private"], + deps = [ + "//pkg/cmd/roachprod-microbench/cluster", + "//pkg/cmd/roachprod-microbench/google", + "//pkg/roachprod", + "//pkg/roachprod/logger", + "//pkg/roachprod/ssh", + "//pkg/roachprod/vm", + "//pkg/util/syncutil", + "//pkg/util/timeutil", + "@com_github_cockroachdb_errors//:errors", + "@com_github_klauspost_compress//gzip", + "@com_google_cloud_go_storage//:storage", + "@org_golang_x_perf//benchstat", + "@org_golang_x_perf//storage/benchfmt", + ], +) + +go_binary( + name = "roachprod-microbench", + embed = [":roachprod-microbench_lib"], + visibility = ["//visibility:public"], +) + +go_test( + name = "roachprod-microbench_test", + srcs = ["benchmark_test.go"], + args = ["-test.timeout=295s"], + data = glob(["testdata/**"]), + embed = [":roachprod-microbench_lib"], + deps = [ + "//pkg/testutils/datapathutils", + "@com_github_cockroachdb_datadriven//:datadriven", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/cmd/roachprod-microbench/README.md b/pkg/cmd/roachprod-microbench/README.md new file mode 100644 index 000000000000..7d9dee274bab --- /dev/null +++ b/pkg/cmd/roachprod-microbench/README.md @@ -0,0 +1,40 @@ +## Name +**roachprod-microbench** - Execute microbenchmarks across a `roachprod` cluster + +## Synopsis +**roachprod-microbench** ` [] -- []` + +## Examples +> **roachprod-microbench** ./artifacts/log -binaries=bin.tar.gz -cluster=user-bench -- -test.benchtime=1ns + +Execute microbenchmarks present in the `bin.tar.gz` archive across the `user-bench` roachprod cluster. + +Pass the additional test argument `-test.benchtime=1ns` to all executions. + +Microbenchmark results are written to the `./artifacts/log` directory. This directory will also contain logs for any failures that occurred. + +> **roachprod-microbench** ./artifacts/log -binaries=bin.tar.gz -cluster=user-bench -publishdir=gs://microbench/2042 + +Publish the logs captured from the microbenchmarks in the `` `./artifacts/log` to a GCS bucket `gs://microbench/2042`. + +> **roachprod-microbench** ./artifacts/log -binaries=bin.tar.gz -cluster=user-bench -comparedir=gs://microbench/1899 + +Compare the logs captured from the microbenchmarks in the `` `./artifacts/log` to a GCS bucket `gs://microbench/1899` of a previously published **roachprod-microbench** run. + +## Description + +The **roachprod-microbench** command operates on a portable test binaries archive that has been built with the `dev test-binaries` tooling. +Compressed and uncompressed `tar` archives are supported, since the `dev` tooling can produce both. +It should be noted that the archive has to adhere to a specific layout and contains `run.sh` scripts for each package, which facilitates execution. +This tool only supports binaries archives produced in this format by the `dev test-binaries` tooling. + +As part of the orchestration, test binaries are copied to the target cluster and extracted. +Once the cluster is ready, the tool will first probe for all microbenchmarks available and then execute the microbenchmarks according to any flags that have been set. + +The `` must always be specified and all results from the microbenchmarks will be captured to this directory, including any errors. +If the `publishdir` or `comparedir` flags are specified these will also use the `` to publish from or compare against. + +The publish functionality is used to store the results of a run to a specified location. +On a subsequent run the compare functionality can then be pointed to that same location to do a comparison which will publish Google sheets for further inspection. + +For additional information on flags, run the **roachprod-microbench** command without any arguments to display a helpful message. diff --git a/pkg/cmd/roachprod-microbench/benchmark.go b/pkg/cmd/roachprod-microbench/benchmark.go new file mode 100644 index 000000000000..5a77d5c8953b --- /dev/null +++ b/pkg/cmd/roachprod-microbench/benchmark.go @@ -0,0 +1,394 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/cluster" + "github.com/cockroachdb/cockroach/pkg/roachprod" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm" + "github.com/cockroachdb/errors" +) + +type benchmark struct { + pkg string + name string +} + +type benchmarkIndexed struct { + benchmark + index int +} + +// prepareCluster prepares the cluster for executing microbenchmarks. It copies +// the binaries to the remote cluster and extracts it. It also copies the lib +// directory to the remote cluster. The number of nodes in the cluster is +// returned, or -1 if the node count could not be determined. +func prepareCluster(binaries []string, remoteDir string) (int, error) { + l.Printf("Setting up roachprod") + if err := initRoachprod(); err != nil { + return -1, err + } + + statuses, err := roachprod.Status(context.Background(), l, *flagCluster, "") + if err != nil { + return -1, err + } + numNodes := len(statuses) + l.Printf("Number of nodes: %d", numNodes) + + // Clear old artifacts, copy and extract new artifacts on to the cluster. + if *flagCopy { + if fi, cmdErr := os.Stat(*flagLibDir); cmdErr == nil && fi.IsDir() { + if putErr := roachprod.Put(context.Background(), l, *flagCluster, *flagLibDir, "lib", true); putErr != nil { + return numNodes, putErr + } + } + + for binIndex, bin := range binaries { + // Specify the binaries' tarball remote location. + remoteBinSrc := fmt.Sprintf("%d_%s", binIndex, filepath.Base(bin)) + remoteBinDest := filepath.Join(remoteDir, strconv.Itoa(binIndex)) + if rpErr := roachprod.Put(context.Background(), l, *flagCluster, bin, remoteBinSrc, true); rpErr != nil { + return numNodes, rpErr + } + if rpErr := roachprodRun(*flagCluster, []string{"rm", "-rf", remoteBinDest}); rpErr != nil { + return numNodes, rpErr + } + if rpErr := roachprodRun(*flagCluster, []string{"mkdir", "-p", remoteBinDest}); rpErr != nil { + return numNodes, rpErr + } + extractFlags := "-xf" + if filepath.Ext(bin) == ".gz" { + extractFlags = "-xzf" + } + if rpErr := roachprodRun(*flagCluster, []string{"tar", extractFlags, remoteBinSrc, "-C", remoteBinDest}); rpErr != nil { + return numNodes, rpErr + } + if rpErr := roachprodRun(*flagCluster, []string{"rm", "-rf", remoteBinSrc}); rpErr != nil { + return numNodes, rpErr + } + } + } + return numNodes, nil +} + +func initRoachprod() error { + _ = roachprod.InitProviders() + _, err := roachprod.Sync(l, vm.ListOptions{}) + return err +} + +func roachprodRun(clusterName string, cmdArray []string) error { + return roachprod.Run(context.Background(), l, clusterName, "", "", false, os.Stdout, os.Stderr, cmdArray) +} + +// listBenchmarks distributes a listing command to test package binaries across +// the cluster. It will filter out any packages that do not contain benchmarks +// or that matches the exclusion regex. A list of valid packages and benchmarks +// are returned. +func listBenchmarks( + log *logger.Logger, remoteDir string, binaries, packages []string, numNodes int, +) ([]string, []benchmark, error) { + exclusionPairs := getRegexExclusionPairs() + + // Generate commands for listing benchmarks. + commands := make([][]cluster.RemoteCommand, 0) + for _, pkg := range packages { + for binIndex := range binaries { + remoteBinDir := fmt.Sprintf("%s/%d/%s/bin", remoteDir, binIndex, pkg) + command := cluster.RemoteCommand{ + Args: []string{"sh", "-c", + fmt.Sprintf(`"[ ! -d %s ] || (cd %s && ./run.sh -test.list=^Benchmark*)"`, + remoteBinDir, remoteBinDir)}, + Metadata: pkg, + } + commands = append(commands, []cluster.RemoteCommand{command}) + } + } + + // Execute commands for listing benchmarks. + l.Printf("Distributing and running benchmark listings across cluster %s", *flagCluster) + isValidBenchmarkName := regexp.MustCompile(`^Benchmark[a-zA-Z0-9_]+$`).MatchString + errorCount := 0 + benchmarkCounts := make(map[benchmark]int) + cluster.ExecuteRemoteCommands(log, *flagCluster, commands, numNodes, true, func(response cluster.RemoteResponse) { + fmt.Print(".") + if response.Err == nil { + pkg := response.Metadata.(string) + outer: + for _, benchmarkName := range strings.Split(response.Stdout, "\n") { + benchmarkName = strings.TrimSpace(benchmarkName) + if benchmarkName == "" { + continue + } + if !isValidBenchmarkName(benchmarkName) { + fmt.Println() + l.Printf("Ignoring invalid benchmark name: %s", benchmarkName) + continue + } + for _, exclusionPair := range exclusionPairs { + if exclusionPair[0].MatchString(pkg) && exclusionPair[1].MatchString(benchmarkName) { + continue outer + } + } + + benchmarkEntry := benchmark{pkg, benchmarkName} + benchmarkCounts[benchmarkEntry]++ + } + } else { + fmt.Println() + l.Errorf("Remote command = {%s}, error = {%v}, stderr output = {%s}", + strings.Join(response.Args, " "), response.Err, response.Stderr) + errorCount++ + } + }) + fmt.Println() + + if errorCount > 0 { + return nil, nil, errors.New("Failed to list benchmarks") + } + + benchmarks := make([]benchmark, 0, len(benchmarkCounts)) + packageCounts := make(map[string]int) + for k, v := range benchmarkCounts { + // Some packages override TestMain to run the tests twice or more thus + // appearing more than once in the listing logic. + if v >= len(binaries) { + packageCounts[k.pkg]++ + benchmarks = append(benchmarks, k) + } else { + l.Printf("Ignoring benchmark %s/%s, missing from %d binaries", k.pkg, k.name, len(binaries)-v) + } + } + + validPackages := make([]string, 0, len(packageCounts)) + for pkg, count := range packageCounts { + l.Printf("Found %d benchmarks in %s", count, pkg) + validPackages = append(validPackages, pkg) + } + return validPackages, benchmarks, nil +} + +// executeBenchmarks executes the microbenchmarks on the remote cluster. Reports +// containing the microbenchmark results for each package are stored in the log +// output directory. Microbenchmark failures are recorded in separate log files, +// that are stored alongside the reports, and are named the same as the +// corresponding microbenchmark. When running in lenient mode errors will not +// fail the execution, and will still be logged to the aforementioned logs. +func executeBenchmarks(binaries, packages []string) error { + remoteDir := fmt.Sprintf("%s/microbench", *flagRemoteDir) + + // Remote execution Logging is captured and saved to appropriate log files and + // the main logger is used for orchestration logging only. Therefore, we use a + // muted logger for remote execution. + muteLogger, err := (&logger.Config{Stdout: io.Discard, Stderr: io.Discard}).NewLogger("") + if err != nil { + return err + } + + numNodes, err := prepareCluster(binaries, remoteDir) + if err != nil { + return err + } + + validPackages, benchmarks, err := listBenchmarks(muteLogger, remoteDir, binaries, packages, numNodes) + if err != nil { + return err + } + if len(validPackages) == 0 { + return errors.New("no packages containing benchmarks found") + } + + // Create reports for each binary. + // Currently, we only support comparing two binaries. + reporters := make([]*Report, 0) + defer func() { + for _, report := range reporters { + report.closeReports() + } + }() + for binIndex := range binaries { + report := &Report{} + dir := workingDir + if binIndex > 0 { + dir = *flagCompareDir + } + err = report.createReports(dir, validPackages) + reporters = append(reporters, report) + if err != nil { + return err + } + } + + // Generate commands for running benchmarks. + commands := make([][]cluster.RemoteCommand, 0) + for _, bench := range benchmarks { + runCommand := fmt.Sprintf("./run.sh %s -test.benchmem -test.bench=^%s$ -test.run=^$", + strings.Join(testArgs, " "), bench.name) + if *flagTimeout != "" { + runCommand = fmt.Sprintf("timeout %s %s", *flagTimeout, runCommand) + } + if *flagShell != "" { + runCommand = fmt.Sprintf("%s && %s", *flagShell, runCommand) + } + commandGroup := make([]cluster.RemoteCommand, 0) + // Weave the commands between binaries and iterations. + for i := 0; i < *flagIterations; i++ { + for binIndex := range binaries { + shellCommand := fmt.Sprintf(`"cd %s/%d/%s/bin && %s"`, remoteDir, binIndex, bench.pkg, runCommand) + command := cluster.RemoteCommand{ + Args: []string{"sh", "-c", shellCommand}, + Metadata: benchmarkIndexed{bench, binIndex}, + } + commandGroup = append(commandGroup, command) + } + } + if *flagAffinity { + commands = append(commands, commandGroup) + } else { + // When affinity is disabled, commands & single iterations can run on any + // node. This has the benefit of not having stragglers, but the downside + // of possibly introducing noise. + for _, command := range commandGroup { + commands = append(commands, []cluster.RemoteCommand{command}) + } + } + } + + // Execute commands. + errorCount := 0 + logIndex := 0 + missingBenchmarks := make(map[benchmark]int, 0) + l.Printf("Found %d benchmarks, distributing and running benchmarks for %d iteration(s) across cluster %s", + len(benchmarks), *flagIterations, *flagCluster) + cluster.ExecuteRemoteCommands(muteLogger, *flagCluster, commands, numNodes, !*flagLenient, func(response cluster.RemoteResponse) { + fmt.Print(".") + benchmarkResults, containsErrors := extractBenchmarkResults(response.Stdout) + benchmarkResponse := response.Metadata.(benchmarkIndexed) + report := reporters[benchmarkResponse.index] + for _, benchmarkResult := range benchmarkResults { + if _, writeErr := report.benchmarkOutput[benchmarkResponse.pkg].WriteString( + fmt.Sprintf("%s\n", strings.Join(benchmarkResult, " "))); writeErr != nil { + l.Errorf("Failed to write benchmark result to file - %v", writeErr) + } + } + if containsErrors || response.Err != nil { + fmt.Println() + err = report.writeBenchmarkErrorLogs(response, logIndex) + if err != nil { + l.Errorf("Failed to write error logs - %v", err) + } + errorCount++ + logIndex++ + } + if _, writeErr := report.analyticsOutput[benchmarkResponse.pkg].WriteString( + fmt.Sprintf("%s %d ms\n", benchmarkResponse.name, + response.Duration.Milliseconds())); writeErr != nil { + l.Errorf("Failed to write analytics to file - %v", writeErr) + } + if len(benchmarkResults) == 0 { + missingBenchmarks[benchmarkResponse.benchmark]++ + } + }) + + fmt.Println() + l.Printf("Completed benchmarks, results located at %s for time %s", workingDir, timestamp.Format(timeFormat)) + if len(missingBenchmarks) > 0 { + l.Errorf("Failed to find results for %d benchmarks", len(missingBenchmarks)) + l.Errorf("Missing benchmarks %v", missingBenchmarks) + } + if errorCount != 0 { + if *flagLenient { + l.Printf("Ignoring errors in benchmark results (lenient flag was set)") + } else { + return errors.Newf("Found %d errors during remote execution", errorCount) + } + } + return nil +} + +// getRegexExclusionPairs returns a list of regex exclusion pairs, separated by +// comma, derived from the command flags. The first element of the pair is the +// package regex and the second is the microbenchmark regex. +func getRegexExclusionPairs() [][]*regexp.Regexp { + if *flagExclude == "" { + return nil + } + excludeRegexes := make([][]*regexp.Regexp, 0) + excludeList := strings.Split(*flagExclude, ",") + for _, pair := range excludeList { + pairSplit := strings.Split(pair, ":") + var pkgRegex, benchRegex *regexp.Regexp + if len(pairSplit) != 2 { + pkgRegex = regexp.MustCompile(".*") + benchRegex = regexp.MustCompile(pairSplit[0]) + } else { + pkgRegex = regexp.MustCompile(pairSplit[0]) + benchRegex = regexp.MustCompile(pairSplit[1]) + } + excludeRegexes = append(excludeRegexes, []*regexp.Regexp{pkgRegex, benchRegex}) + } + return excludeRegexes +} + +// extractBenchmarkResults extracts the microbenchmark results generated by a +// test binary and reports if any failures were found in the output. This method +// makes specific assumptions regarding the format of the output, and attempts +// to ignore any spurious output that the test binary may have logged. The +// returned list of string arrays each represent a row of metrics as outputted +// by the test binary. +func extractBenchmarkResults(benchmarkOutput string) ([][]string, bool) { + keywords := map[string]struct{}{ + "ns/op": {}, + "b/op": {}, + "allocs/op": {}, + } + results := make([][]string, 0) + buf := make([]string, 0) + containsErrors := false + var benchName string + for _, line := range strings.Split(benchmarkOutput, "\n") { + elems := strings.Fields(line) + for _, s := range elems { + if !containsErrors { + containsErrors = strings.Contains(s, "FAIL") || strings.Contains(s, "panic") + } + if strings.HasPrefix(s, "Benchmark") && len(s) > 9 { + benchName = s + } + if _, ok := keywords[s]; ok { + row := elems + if elems[0] == benchName { + row = elems[1:] + } + + buf = append(buf, row...) + if benchName != "" { + buf = append([]string{benchName}, buf...) + results = append(results, buf) + } + buf = make([]string, 0) + benchName = "" + } + } + } + return results, containsErrors +} diff --git a/pkg/cmd/roachprod-microbench/benchmark_test.go b/pkg/cmd/roachprod-microbench/benchmark_test.go new file mode 100644 index 000000000000..360cb17813e8 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/benchmark_test.go @@ -0,0 +1,32 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" + "github.com/cockroachdb/datadriven" +) + +func TestExtractBenchmarkResultsDataDriven(t *testing.T) { + datadriven.Walk(t, datapathutils.TestDataPath(t), func(t *testing.T, path string) { + datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { + if d.Cmd == "benchmark" { + result, fail := extractBenchmarkResults(d.Input) + output := fmt.Sprintf("%v %v", fail, result) + return output + } + return "" + }) + }) +} diff --git a/pkg/cmd/roachprod-microbench/cluster/BUILD.bazel b/pkg/cmd/roachprod-microbench/cluster/BUILD.bazel new file mode 100644 index 000000000000..34e38f88e9b6 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/cluster/BUILD.bazel @@ -0,0 +1,17 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "cluster", + srcs = ["execute.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/cluster", + visibility = ["//visibility:public"], + deps = [ + "//pkg/roachprod", + "//pkg/roachprod/logger", + "//pkg/util/timeutil", + "@com_github_cockroachdb_errors//:errors", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/cmd/roachprod-microbench/cluster/execute.go b/pkg/cmd/roachprod-microbench/cluster/execute.go new file mode 100644 index 000000000000..0bed469e4ffb --- /dev/null +++ b/pkg/cmd/roachprod-microbench/cluster/execute.go @@ -0,0 +1,137 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cluster + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/cockroachdb/cockroach/pkg/roachprod" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" +) + +// RemoteCommand is a command to be executed on a remote node. The Metadata field is used to +// store additional information from the original caller. +type RemoteCommand struct { + Args []string + Metadata interface{} +} + +// RemoteResponse is the response to a RemoteCommand. +// A Duration of -1 indicates that the command was cancelled. +type RemoteResponse struct { + RemoteCommand + Stdout string + Stderr string + Err error + Duration time.Duration +} + +func remoteWorker( + ctx context.Context, + log *logger.Logger, + clusterNode string, + receive chan []RemoteCommand, + response chan RemoteResponse, +) { + for { + commands := <-receive + if commands == nil { + return + } + for index, command := range commands { + if errors.Is(ctx.Err(), context.Canceled) { + for _, cancelCommand := range commands[index:] { + response <- RemoteResponse{cancelCommand, "", "", nil, -1} + } + break + } + start := timeutil.Now() + runResult, err := roachprod.RunWithDetails(context.Background(), log, clusterNode, + "", "", false, command.Args) + duration := timeutil.Since(start) + var stdout, stderr string + if len(runResult) > 0 { + stdout = runResult[0].Stdout + stderr = runResult[0].Stderr + err = errors.CombineErrors(err, runResult[0].Err) + } + response <- RemoteResponse{command, stdout, stderr, err, duration} + } + } +} + +// ExecuteRemoteCommands distributes the commands to the cluster nodes and waits +// for the responses. Commands can be grouped as a sub-array to be executed +// serially on the same node. Only one command is executed per node at a time. +// The commands are executed in the order they are provided. The failFast +// parameter indicates whether the execution should stop on the first error. +func ExecuteRemoteCommands( + log *logger.Logger, + cluster string, + commandGroups [][]RemoteCommand, + numNodes int, + failFast bool, + callback func(response RemoteResponse), +) { + workChannel := make(chan []RemoteCommand, numNodes) + responseChannel := make(chan RemoteResponse, numNodes) + + ctx, cancelCtx := context.WithCancel(context.Background()) + + for idx := 1; idx <= numNodes; idx++ { + go remoteWorker(ctx, log, fmt.Sprintf("%s:%d", cluster, idx), workChannel, responseChannel) + } + + var wg sync.WaitGroup + // Receive responses. + go func() { + for { + response := <-responseChannel + if response.Args == nil { + return + } + if response.Duration != -1 { + callback(response) + } + if response.Err != nil && failFast { + cancelCtx() + } + wg.Done() + } + }() + + // Send commands to workers. +done: + for _, commands := range commandGroups { + outer: + for { + select { + case <-ctx.Done(): + break done + case workChannel <- commands: + wg.Add(len(commands)) + break outer + default: + time.Sleep(100 * time.Millisecond) + } + } + } + + wg.Wait() + + close(workChannel) + close(responseChannel) +} diff --git a/pkg/cmd/roachprod-microbench/compare.go b/pkg/cmd/roachprod-microbench/compare.go new file mode 100644 index 000000000000..71ec215de804 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/compare.go @@ -0,0 +1,114 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "context" + "strings" + "sync" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/google" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/errors" + "golang.org/x/perf/benchstat" + "golang.org/x/perf/storage/benchfmt" +) + +func compareBenchmarks( + packages []string, currentDir, previousDir string, +) (map[string][]*benchstat.Table, error) { + packageResults := make(map[string][][]*benchfmt.Result) + var resultMutex syncutil.Mutex + var wg sync.WaitGroup + errorsFound := false + wg.Add(len(packages)) + for _, pkg := range packages { + go func(pkg string) { + defer wg.Done() + basePackage := pkg[:strings.Index(pkg[4:]+"/", "/")+4] + resultMutex.Lock() + results, ok := packageResults[basePackage] + if !ok { + results = [][]*benchfmt.Result{make([]*benchfmt.Result, 0), make([]*benchfmt.Result, 0)} + packageResults[basePackage] = results + } + resultMutex.Unlock() + + // Read the previous and current results. If either is missing, we'll just + // skip it. The not found error is ignored since it can be expected that + // some benchmarks have changed names or been removed. + if err := readReportFile(joinPath(previousDir, getReportLogName(reportLogName, pkg)), + func(result *benchfmt.Result) { + resultMutex.Lock() + results[0] = append(results[0], result) + resultMutex.Unlock() + }); err != nil && + !isNotFoundError(err) { + l.Errorf("failed to add report for %s: %s", pkg, err) + errorsFound = true + } + if err := readReportFile(joinPath(currentDir, getReportLogName(reportLogName, pkg)), + func(result *benchfmt.Result) { + resultMutex.Lock() + results[1] = append(results[1], result) + resultMutex.Unlock() + }); err != nil && + !isNotFoundError(err) { + l.Errorf("failed to add report for %s: %s", pkg, err) + errorsFound = true + } + }(pkg) + } + wg.Wait() + if errorsFound { + return nil, errors.New("failed to process reports") + } + + tableResults := make(map[string][]*benchstat.Table) + for pkgGroup, results := range packageResults { + var c benchstat.Collection + c.Alpha = 0.05 + c.Order = benchstat.Reverse(benchstat.ByDelta) + // Only add the results if both sets are present. + if len(results[0]) > 0 && len(results[1]) > 0 { + c.AddResults("old", results[0]) + c.AddResults("new", results[1]) + tables := c.Tables() + tableResults[pkgGroup] = tables + } else if len(results[0])+len(results[1]) > 0 { + l.Printf("Only one set of results present for %s", pkgGroup) + } + } + return tableResults, nil +} + +func publishToGoogleSheets( + ctx context.Context, srv *google.Service, sheetName string, tables []*benchstat.Table, +) error { + url, err := srv.CreateSheet(ctx, sheetName, tables) + if err != nil { + return err + } + l.Printf("Generated sheet for %s: %s\n", sheetName, url) + return nil +} + +func readReportFile(path string, reportResults func(*benchfmt.Result)) error { + reader, err := createReader(path) + if err != nil { + return errors.Wrapf(err, "failed to create reader for %s", path) + } + br := benchfmt.NewReader(reader) + for br.Next() { + reportResults(br.Result()) + } + return br.Err() +} diff --git a/pkg/cmd/roachprod-microbench/google/BUILD.bazel b/pkg/cmd/roachprod-microbench/google/BUILD.bazel new file mode 100644 index 000000000000..6172022589ee --- /dev/null +++ b/pkg/cmd/roachprod-microbench/google/BUILD.bazel @@ -0,0 +1,19 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "google", + srcs = ["service.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/google", + visibility = ["//visibility:public"], + deps = [ + "@com_github_cockroachdb_errors//:errors", + "@org_golang_google_api//drive/v3:drive", + "@org_golang_google_api//googleapi", + "@org_golang_google_api//option", + "@org_golang_google_api//sheets/v4:sheets", + "@org_golang_x_perf//benchstat", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/cmd/roachprod-microbench/google/service.go b/pkg/cmd/roachprod-microbench/google/service.go new file mode 100644 index 000000000000..448c41727fd7 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/google/service.go @@ -0,0 +1,446 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package google + +import ( + "context" + "fmt" + "math" + "net/http" + "strconv" + "strings" + + "github.com/cockroachdb/errors" + "golang.org/x/perf/benchstat" + "google.golang.org/api/drive/v3" + "google.golang.org/api/googleapi" + "google.golang.org/api/option" + "google.golang.org/api/sheets/v4" +) + +// Service is capable of communicating with the Google Drive API and the Google +// Sheets API to create new spreadsheets and populate them from benchstat +// output. +type Service struct { + drive *drive.Service + sheets *sheets.Service +} + +// New creates a new Service. It verifies that credentials are properly set and +// returns an error if they are not. +func New(ctx context.Context) (*Service, error) { + var srv Service + var err error + if srv.drive, err = newDriveService(ctx); err != nil { + return nil, errors.Wrap(err, "retrieve Drive client") + } + if srv.sheets, err = newSheetsService(ctx); err != nil { + return nil, errors.Wrap(err, "retrieve Sheets client") + } + if err = srv.testServices(ctx); err != nil { + return nil, errors.Wrap(err, "testing Google clients") + } + return &srv, nil +} + +// newDriveService constructs a new Google Drive service. +func newDriveService(ctx context.Context) (*drive.Service, error) { + return drive.NewService(ctx, option.WithScopes(drive.DriveFileScope)) +} + +// newSheetsService constructs a new Google Sheets service. +func newSheetsService(ctx context.Context) (*sheets.Service, error) { + return sheets.NewService(ctx, option.WithScopes(sheets.SpreadsheetsScope)) +} + +func (srv *Service) testServices(ctx context.Context) error { + if _, err := srv.drive.About.Get().Fields("user").Context(ctx).Do(); err != nil { + return errors.Wrap(err, "testing Drive client") + } + if _, err := srv.sheets.Spreadsheets.Get("none").Context(ctx).Do(); err != nil { + // We expect a 404. + var apiError *googleapi.Error + if errors.As(err, &apiError) && apiError.Code != http.StatusNotFound { + return errors.Wrap(err, "testing Sheets client") + } + } + return nil +} + +// CreateSheet creates a new Google spreadsheet with the provided metric data. +func (srv *Service) CreateSheet( + ctx context.Context, name string, tables []*benchstat.Table, +) (string, error) { + var s sheets.Spreadsheet + s.Properties = &sheets.SpreadsheetProperties{Title: name} + + // Raw data sheets. + sheetInfos := make([]rawSheetInfo, len(tables)) + for i, t := range tables { + sh, info := srv.createRawSheet(t, i) + s.Sheets = append(s.Sheets, sh) + sheetInfos[i] = info + } + + // Pivot table overview sheet. Place in front. + overview := srv.createOverviewSheet(sheetInfos) + s.Sheets = append([]*sheets.Sheet{overview}, s.Sheets...) + + // Create the spreadsheet. + res, err := srv.createSheet(ctx, s) + if err != nil { + return "", err + } + + // Update the new spreadsheet's permissions. + if err := srv.updatePerms(ctx, res.SpreadsheetId); err != nil { + return "", err + } + + return res.SpreadsheetUrl, nil +} + +type rawSheetInfo struct { + id int64 + table *benchstat.Table + grid *sheets.GridProperties + deltaCol int64 + nonZeroVals []string +} + +// createRawSheet creates a new sheet that corresponds to the raw metric data in +// a single benchstat table. The sheet is formatted like: +// +// +------------+---------------------+---------------------+---------+-----------------+ +// | name | old time/op (ns/op) | new time/op (ns/op) | delta | note | +// +------------+---------------------+---------------------+---------+-----------------+ +// | Benchmark1 | 290026.2 | 290075 | -34.29% | (p=0.008 n=5+5) | +// | Benchmark2 | 15588 | 15717.6 | ~ | (p=0.841 n=5+5) | +// ... +func (srv *Service) createRawSheet(t *benchstat.Table, tIdx int) (*sheets.Sheet, rawSheetInfo) { + sheetID := sheetIDForTable(tIdx) + + var info rawSheetInfo + info.table = t + info.id = sheetID + + props := &sheets.SheetProperties{ + Title: "Raw: " + t.Metric, + SheetId: sheetID, + } + + var data []*sheets.RowData + var metadata []*sheets.DimensionProperties + var numCols int64 + + // Header row. + { + var vals []*sheets.CellData + + // Column: Benchmark name. + vals = append(vals, strCell("name")) + metadata = append(metadata, withSize(400)) + + // Columns: Metric names. + for j, cfg := range t.Configs { + unit := fmt.Sprintf("%s %s", cfg, t.Metric) + if len(t.Rows) > 0 { + metric := t.Rows[0].Metrics[j] + unit = fmt.Sprintf("%s (%s)", unit, metric.Unit) + } + vals = append(vals, strCell(unit)) + metadata = append(metadata, withSize(150)) + } + + // Column: delta. + info.deltaCol = int64(len(vals)) + vals = append(vals, strCell("delta")) + metadata = append(metadata, withSize(100)) + + // Column: note. + vals = append(vals, strCell("note")) + metadata = append(metadata, withSize(150)) + + numCols = int64(len(vals)) + data = append(data, &sheets.RowData{Values: vals}) + } + + // Data rows. + for _, row := range t.Rows { + var vals []*sheets.CellData + vals = append(vals, strCell(row.Benchmark)) + for _, val := range row.Metrics { + vals = append(vals, numCell(val.Mean)) + } + if row.Delta == "~" { + vals = append(vals, strCell(row.Delta)) + } else { + vals = append(vals, percentCell(deltaToNum(row.Delta))) + info.nonZeroVals = append(info.nonZeroVals, deltaToPercentString(row.Delta)) + } + vals = append(vals, strCell(row.Note)) + data = append(data, &sheets.RowData{Values: vals}) + } + + // Conditional formatting. + cf := condFormatting(sheetID, info.deltaCol, isSmallerBetter(t)) + + // Grid properties. + grid := &sheets.GridProperties{ + ColumnCount: numCols, + RowCount: int64(len(data)), + FrozenRowCount: 1, + } + info.grid = grid + props.GridProperties = grid + + // Construct the new sheet. + sheet := &sheets.Sheet{ + Properties: props, + Data: []*sheets.GridData{{ + RowData: data, + ColumnMetadata: metadata, + }}, + ConditionalFormats: []*sheets.ConditionalFormatRule{cf}, + } + return sheet, info +} + +// createRawSheet creates a new sheet that contains an overview of all raw +// metric data using pivot tables. The sheet is formatted like: +// +// +------------+---------+----+------------+----------+ +// | name | time/op | | name | alloc/op | +// +------------+---------+----+------------+----------+ +// | Benchmark1 | -34.29% | | Benchmark3 | -12.99% | +// | Benchmark2 | 4.02% | | Benchmark4 | 0.11% | +// ... +func (srv *Service) createOverviewSheet(rawInfos []rawSheetInfo) *sheets.Sheet { + const title = "Overview: Significant Changes" + sheetID := sheetIDForTable(len(rawInfos)) + props := &sheets.SheetProperties{ + Title: title, + SheetId: sheetID, + } + + var vals []*sheets.CellData + var metadata []*sheets.DimensionProperties + var cfs []*sheets.ConditionalFormatRule + + for i, info := range rawInfos { + if i != 0 { + metadata = append(metadata, withSize(50)) + vals = append(vals, &sheets.CellData{}) + } + + // If there were no significant changes in this table, don't create + // a pivot table. + if len(info.nonZeroVals) == 0 { + noChanges := fmt.Sprintf("no change in %s", info.table.Metric) + vals = append(vals, strCell(noChanges)) + metadata = append(metadata, withSize(200)) + continue + } + + smallerBetter := isSmallerBetter(info.table) + sortOrder := "DESCENDING" + if smallerBetter { + sortOrder = "ASCENDING" + } + vals = append(vals, &sheets.CellData{ + PivotTable: &sheets.PivotTable{ + Source: &sheets.GridRange{ + SheetId: sheetIDForTable(i), + StartColumnIndex: 0, + EndColumnIndex: info.grid.ColumnCount, + StartRowIndex: 0, + EndRowIndex: info.grid.RowCount, + }, + Rows: []*sheets.PivotGroup{{ + SourceColumnOffset: 0, + SortOrder: sortOrder, + ValueBucket: &sheets.PivotGroupSortValueBucket{ + ValuesIndex: 0, + }, + ForceSendFields: []string{"SourceColumnOffset"}, + }}, + Values: []*sheets.PivotValue{{ + SourceColumnOffset: info.deltaCol, + Name: info.table.Metric, + SummarizeFunction: "AVERAGE", + }}, + Criteria: map[string]sheets.PivotFilterCriteria{ + strconv.Itoa(int(info.deltaCol)): { + VisibleValues: info.nonZeroVals, + }, + }, + }, + }) + vals = append(vals, &sheets.CellData{}) + metadata = append(metadata, withSize(200), withSize(100)) + + deltaCol := int64(len(vals)) - 1 + cf := condFormatting(props.SheetId, deltaCol, smallerBetter) + cfs = append(cfs, cf) + } + + // Construct the new sheet. + props.GridProperties = &sheets.GridProperties{ + ColumnCount: int64(len(vals)), + FrozenRowCount: 1, + } + data := []*sheets.RowData{{ + Values: vals, + }} + pivot := &sheets.Sheet{ + Properties: props, + Data: []*sheets.GridData{{ + RowData: data, + ColumnMetadata: metadata, + }}, + ConditionalFormats: cfs, + } + return pivot +} + +func (srv *Service) createSheet( + ctx context.Context, s sheets.Spreadsheet, +) (*sheets.Spreadsheet, error) { + res, err := srv.sheets.Spreadsheets.Create(&s).Context(ctx).Do() + if err != nil { + return nil, errors.Wrap(err, "create new Spreadsheet") + } + return res, nil +} + +func (srv *Service) updatePerms(ctx context.Context, spreadsheetID string) error { + // Update the new spreadsheet's permissions. By default, the spreadsheet is + // owned by the Service Account that the Sheets service was authenticated + // with, and is not accessible to anyone else. We open the file up to anyone + // with the link. + perm := &drive.Permission{ + Type: "anyone", + Role: "writer", + } + _, err := srv.drive.Permissions.Create(spreadsheetID, perm).Context(ctx).Do() + return errors.Wrap(err, "update Spreadsheet permissions") +} + +func sheetIDForTable(i int) int64 { + return int64(i + 1) +} + +func strCell(s string) *sheets.CellData { + return &sheets.CellData{ + UserEnteredValue: &sheets.ExtendedValue{ + StringValue: &s, + }, + } +} + +func numCell(f float64) *sheets.CellData { + switch { + case math.IsInf(f, +1): + f = math.MaxFloat64 + case math.IsInf(f, -1): + f = -math.MaxFloat64 + } + return &sheets.CellData{ + UserEnteredValue: &sheets.ExtendedValue{ + NumberValue: &f, + }, + } +} + +func percentCell(f float64) *sheets.CellData { + c := numCell(f) + c.UserEnteredFormat = &sheets.CellFormat{ + NumberFormat: &sheets.NumberFormat{Type: "PERCENT"}, + } + return c +} + +func deltaToNum(delta string) float64 { + delta = strings.TrimRight(delta, `%`) + f, err := strconv.ParseFloat(delta, 64) + if err != nil { + panic(err) + } + return f / 100 +} + +func deltaToPercentString(delta string) string { + delta = strings.TrimLeft(delta, "+") + for strings.Contains(delta, `.`) { + if strings.Contains(delta, `.%`) { + delta = strings.ReplaceAll(delta, `.%`, `%`) + } else if strings.Contains(delta, `0%`) { + delta = strings.ReplaceAll(delta, `0%`, `%`) + } else { + break + } + } + return delta +} + +func isSmallerBetter(table *benchstat.Table) bool { + // "smaller is better, except speeds" + // https://github.com/golang/perf/blob/master/benchstat/table.go#L110 + return (table.Metric != "speed") +} + +func withSize(pixels int64) *sheets.DimensionProperties { + return &sheets.DimensionProperties{PixelSize: pixels} +} + +var red = rgb(230, 124, 115) +var white = rgb(255, 255, 255) +var green = rgb(87, 187, 138) + +func rgb(red, green, blue int) *sheets.Color { + return &sheets.Color{ + Red: float64(red) / 255, + Green: float64(green) / 255, + Blue: float64(blue) / 255, + } +} + +func condFormatting(sheetID, col int64, smallerBetter bool) *sheets.ConditionalFormatRule { + minColor, maxColor := red, green + if smallerBetter { + minColor, maxColor = maxColor, minColor + } + return &sheets.ConditionalFormatRule{ + GradientRule: &sheets.GradientRule{ + Minpoint: &sheets.InterpolationPoint{ + Color: minColor, + Type: "NUMBER", + Value: "-1.0", + }, + Midpoint: &sheets.InterpolationPoint{ + Color: white, + Type: "NUMBER", + Value: "0.0", + }, + Maxpoint: &sheets.InterpolationPoint{ + Color: maxColor, + Type: "NUMBER", + Value: "1.0", + }, + }, + Ranges: []*sheets.GridRange{{ + SheetId: sheetID, + StartColumnIndex: col, + EndColumnIndex: col + 1, + StartRowIndex: 1, + }}, + } +} diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go new file mode 100644 index 000000000000..6d3491c0a403 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/main.go @@ -0,0 +1,215 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "bytes" + "context" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/google" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" + "github.com/cockroachdb/cockroach/pkg/roachprod/ssh" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" +) + +const timeFormat = "2006-01-02T15_04_05" + +var ( + l *logger.Logger + flags = flag.NewFlagSet(os.Args[0], flag.ContinueOnError) + flagCluster = flags.String("cluster", "", "cluster to run the benchmarks on") + flagLibDir = flags.String("libdir", "bin/lib", "location of libraries required by test binaries") + flagBinaries = flags.String("binaries", "bin/test_binaries.tar.gz", "portable test binaries archive built with dev test-binaries") + flagCompareBinaries = flags.String("compare-binaries", "", "run additional binaries from this archive and compare the results") + flagRemoteDir = flags.String("remotedir", "/mnt/data1", "working directory on the target cluster") + flagCompareDir = flags.String("comparedir", "", "directory with reports to compare the results of the benchmarks against (produces a comparison sheet)") + flagPublishDir = flags.String("publishdir", "", "directory to publish the reports of the benchmarks to") + flagSheetDesc = flags.String("sheet-desc", "", "append a description to the sheet title when doing a comparison") + flagExclude = flags.String("exclude", "", "comma-separated regex of packages and benchmarks to exclude e.g. 'pkg/util/.*:BenchmarkIntPool,pkg/sql:.*'") + flagTimeout = flags.String("timeout", "", "timeout for each benchmark e.g. 10m") + flagShell = flags.String("shell", "", "additional shell command to run on node before benchmark execution") + flagCopy = flags.Bool("copy", true, "copy and extract test binaries and libraries to the target cluster") + flagLenient = flags.Bool("lenient", true, "tolerate errors in the benchmark results") + flagAffinity = flags.Bool("affinity", true, "run benchmarks with iterations and binaries having affinity to the same node") + flagIterations = flags.Int("iterations", 1, "number of iterations to run each benchmark") + workingDir string + testArgs []string + timestamp time.Time +) + +func verifyPathFlag(flagName, path string, expectDir bool) error { + if fi, err := os.Stat(path); err != nil { + return fmt.Errorf("the %s flag points to a path %s that does not exist", flagName, path) + } else { + switch isDir := fi.Mode().IsDir(); { + case expectDir && !isDir: + return fmt.Errorf("the %s flag must point to a directory not a file", flagName) + case !expectDir && isDir: + return fmt.Errorf("the %s flag must point to a file not a directory", flagName) + } + } + return nil +} + +func setupVars() error { + flags.Usage = func() { + _, _ = fmt.Fprintf(flags.Output(), "usage: %s [] -- []\n", flags.Name()) + flags.PrintDefaults() + } + + if len(os.Args) < 2 { + var b bytes.Buffer + flags.SetOutput(&b) + flags.Usage() + return errors.Newf("%s", b.String()) + } + + workingDir = strings.TrimRight(os.Args[1], "/") + err := os.MkdirAll(workingDir, os.ModePerm) + if err != nil { + return err + } + err = verifyPathFlag("working dir", workingDir, true) + if err != nil { + return err + } + + testArgs = getTestArgs() + if err = flags.Parse(os.Args[2:]); err != nil { + return err + } + + // Only require binaries if we are going to run microbenchmarks. + if *flagCluster != "" { + if err = verifyPathFlag("binaries archive", *flagBinaries, false); err != nil { + return err + } + if !strings.HasSuffix(*flagBinaries, ".tar") && !strings.HasSuffix(*flagBinaries, ".tar.gz") { + return fmt.Errorf("the binaries archive must have the extention .tar or .tar.gz") + } + } + + if *flagCompareBinaries != "" { + if err = verifyPathFlag("binaries compare archive", *flagCompareBinaries, false); err != nil { + return err + } + if *flagCompareDir != "" { + return fmt.Errorf("cannot specify both --compare-binaries and --comparedir") + } + *flagCompareDir = filepath.Join(workingDir, "compare") + err = os.MkdirAll(*flagCompareDir, os.ModePerm) + if err != nil { + return err + } + } + + timestamp = timeutil.Now() + initLogger(filepath.Join(workingDir, fmt.Sprintf("roachprod-microbench-%s.log", timestamp.Format(timeFormat)))) + l.Printf("roachprod-microbench %s", strings.Join(os.Args, " ")) + + return nil +} + +func run() error { + err := setupVars() + if err != nil { + return err + } + + var packages []string + if *flagCluster != "" { + binaries := []string{*flagBinaries} + if *flagCompareBinaries != "" { + binaries = append(binaries, *flagCompareBinaries) + } + packages, err = readArchivePackages(*flagBinaries) + if err != nil { + return err + } + err = executeBenchmarks(binaries, packages) + if err != nil { + return err + } + } else { + packages, err = getPackagesFromLogs(workingDir) + if err != nil { + return err + } + l.Printf("No cluster specified, skipping microbenchmark execution") + } + + if *flagPublishDir != "" { + err = publishDirectory(workingDir, *flagPublishDir) + if err != nil { + return err + } + } + + if *flagCompareDir != "" { + ctx := context.Background() + service, cErr := google.New(ctx) + if cErr != nil { + return cErr + } + tableResults, cErr := compareBenchmarks(packages, workingDir, *flagCompareDir) + if cErr != nil { + return cErr + } + for pkgGroup, tables := range tableResults { + sheetName := pkgGroup + "/..." + if *flagSheetDesc != "" { + sheetName += " " + *flagSheetDesc + } + if pubErr := publishToGoogleSheets(ctx, service, sheetName, tables); pubErr != nil { + return pubErr + } + } + } else { + l.Printf("No comparison directory specified, skipping comparison\n") + } + + return nil +} + +func main() { + ssh.InsecureIgnoreHostKey = true + if err := run(); err != nil { + if l != nil { + l.Errorf("Failed with error: %v", err) + } else { + _, _ = fmt.Fprintln(os.Stderr, err) + } + fmt.Println("FAIL") + os.Exit(1) + } else { + fmt.Println("SUCCESS") + } +} + +func getTestArgs() (ret []string) { + if len(os.Args) > 2 { + flagsAndArgs := os.Args[2:] + for i, arg := range flagsAndArgs { + if arg == "--" { + ret = flagsAndArgs[i+1:] + break + } + } + } + return ret +} diff --git a/pkg/cmd/roachprod-microbench/metadata.go b/pkg/cmd/roachprod-microbench/metadata.go new file mode 100644 index 000000000000..ccaf34493720 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/metadata.go @@ -0,0 +1,77 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "archive/tar" + "io" + "os" + "strings" + + "github.com/klauspost/compress/gzip" +) + +const binRunSuffix = "/bin/run.sh" + +// readArchivePackages reads the entries in the provided archive file and +// returns a list of packages for which test binaries have been built. +func readArchivePackages(archivePath string) ([]string, error) { + file, err := os.Open(archivePath) + defer func() { _ = file.Close() }() + if err != nil { + return nil, err + } + + reader := io.Reader(file) + if strings.HasSuffix(*flagBinaries, ".gz") { + reader, err = gzip.NewReader(reader) + if err != nil { + return nil, err + } + } + + packages := make([]string, 0) + tarScan := tar.NewReader(reader) + for { + entry, nextErr := tarScan.Next() + if nextErr == io.EOF { + break + } + if nextErr != nil { + return nil, err + } + + if strings.HasSuffix(entry.Name, binRunSuffix) { + packages = append(packages, strings.TrimSuffix(entry.Name, binRunSuffix)) + } + } + return packages, nil +} + +// getPackagesFromLogs scans a directory for benchmark report logs and +// creates a list of packages that were used to generate the logs. +func getPackagesFromLogs(dir string) ([]string, error) { + packages := make([]string, 0) + files, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + for _, file := range files { + if file.IsDir() { + continue + } + if isReportLog(file.Name()) { + packages = append(packages, getPackageFromReportLogName(file.Name())) + } + } + return packages, nil +} diff --git a/pkg/cmd/roachprod-microbench/report.go b/pkg/cmd/roachprod-microbench/report.go new file mode 100644 index 000000000000..c82ae42b2902 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/report.go @@ -0,0 +1,115 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/cluster" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" +) + +const ( + reportLogName = "report" + analyticsLogName = "analytics" +) + +type Report struct { + benchmarkOutput map[string]*os.File + analyticsOutput map[string]*os.File + path string +} + +func initLogger(path string) { + loggerCfg := logger.Config{Stdout: os.Stdout, Stderr: os.Stderr} + var loggerError error + l, loggerError = loggerCfg.NewLogger(path) + if loggerError != nil { + _, _ = fmt.Fprintf(os.Stderr, "unable to configure logger: %s\n", loggerError) + os.Exit(1) + } +} + +func getReportLogName(name string, pkg string) string { + pkgFormatted := strings.ReplaceAll(pkg, "/", "_") + return fmt.Sprintf("%s-%s.log", pkgFormatted, name) +} + +func getPackageFromReportLogName(name string) string { + pkg := strings.ReplaceAll(name, "_", "/") + pkg = strings.TrimSuffix(pkg, "-"+reportLogName+".log") + return pkg +} + +func isReportLog(name string) bool { + return strings.HasSuffix(name, reportLogName+".log") +} + +func (report *Report) createReports(path string, packages []string) error { + report.benchmarkOutput = make(map[string]*os.File) + report.analyticsOutput = make(map[string]*os.File) + report.path = path + + var err error + for _, pkg := range packages { + report.benchmarkOutput[pkg], err = os.Create(filepath.Join(path, getReportLogName(reportLogName, pkg))) + if err != nil { + return err + } + report.analyticsOutput[pkg], err = os.Create(filepath.Join(path, getReportLogName(analyticsLogName, pkg))) + if err != nil { + return err + } + } + return nil +} + +func (report *Report) closeReports() { + for _, file := range report.benchmarkOutput { + err := file.Close() + if err != nil { + l.Errorf("Error closing benchmark output file: %s", err) + } + } + for _, file := range report.analyticsOutput { + err := file.Close() + if err != nil { + l.Errorf("Error closing analytics output file: %s", err) + } + } +} + +func (report *Report) writeBenchmarkErrorLogs(response cluster.RemoteResponse, index int) error { + benchmarkResponse := response.Metadata.(benchmarkIndexed) + stdoutLogName := fmt.Sprintf("%s-%d-stdout.log", benchmarkResponse.name, index) + stderrLogName := fmt.Sprintf("%s-%d-stderr.log", benchmarkResponse.name, index) + l.Printf("Writing error logs for benchmark at %s, %s\n", stdoutLogName, stderrLogName) + + if err := os.WriteFile(filepath.Join(report.path, stdoutLogName), []byte(response.Stdout), 0644); err != nil { + return err + } + + var buffer strings.Builder + buffer.WriteString(fmt.Sprintf("Remote command: %s\n", strings.Join(response.Args, " "))) + if response.Err != nil { + buffer.WriteString(fmt.Sprintf("Remote error: %s\n", response.Err)) + } + buffer.WriteString(response.Stderr) + + if err := os.WriteFile(filepath.Join(report.path, stderrLogName), []byte(buffer.String()), 0644); err != nil { + return err + } + + return nil +} diff --git a/pkg/cmd/roachprod-microbench/storage.go b/pkg/cmd/roachprod-microbench/storage.go new file mode 100644 index 000000000000..b6fdd8f5c44a --- /dev/null +++ b/pkg/cmd/roachprod-microbench/storage.go @@ -0,0 +1,144 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" + + "cloud.google.com/go/storage" +) + +var client *storage.Client + +func initGCS() error { + var err error + ctx := context.Background() + client, err = storage.NewClient(ctx) + return err +} + +func createWriter(path string) (io.WriteCloser, error) { + if strings.HasPrefix(path, "gs://") { + return createGCSWriter(path) + } else { + return os.Create(path) + } +} + +func createGCSWriter(path string) (io.WriteCloser, error) { + if client == nil { + if err := initGCS(); err != nil { + return nil, err + } + } + bucket, obj, err := parseGCSPath(path) + if err != nil { + return nil, err + } + return bucket.Object(obj).NewWriter(context.Background()), nil +} + +func parseGCSPath(path string) (*storage.BucketHandle, string, error) { + parts := strings.SplitN(path, "/", 4) + if len(parts) != 4 { + return nil, "", fmt.Errorf("invalid GCS path") + } + return client.Bucket(parts[2]), parts[3], nil +} + +func createReader(path string) (io.ReadCloser, error) { + if strings.HasPrefix(path, "gs://") { + return createGCSReader(path) + } else { + return os.Open(path) + } +} + +func createGCSReader(path string) (io.ReadCloser, error) { + if client == nil { + if err := initGCS(); err != nil { + return nil, err + } + } + bucket, obj, err := parseGCSPath(path) + if err != nil { + return nil, err + } + return bucket.Object(obj).NewReader(context.Background()) +} + +func joinPath(elem ...string) string { + if len(elem) == 0 { + return "" + } + if strings.HasPrefix(elem[0], "gs://") { + elem[0] = strings.TrimPrefix(elem[0], "gs://") + return "gs://" + filepath.Join(elem...) + } + return filepath.Join(elem...) +} + +func publishDirectory(localSrcDir, dstDir string) error { + files, readErr := os.ReadDir(localSrcDir) + if readErr != nil { + return readErr + } + var wg sync.WaitGroup + errorsFound := false + wg.Add(len(files)) + for index := range files { + go func(index int) { + defer wg.Done() + path := files[index] + reader, wErr := createReader(filepath.Join(localSrcDir, path.Name())) + if wErr != nil { + l.Errorf("Failed to create reader for %s - %v", path.Name(), wErr) + errorsFound = true + } + writer, wErr := createWriter(joinPath(dstDir, path.Name())) + if wErr != nil { + l.Errorf("Failed to create writer for %s - %v", path.Name(), wErr) + errorsFound = true + } + _, cErr := io.Copy(writer, reader) + if cErr != nil { + l.Errorf("Failed to copy %s - %v", path.Name(), cErr) + errorsFound = true + } + cErr = writer.Close() + if cErr != nil { + l.Errorf("Failed to close writer for %s - %v", path.Name(), cErr) + errorsFound = true + } + cErr = reader.Close() + if cErr != nil { + l.Errorf("Failed to close reader for %s - %v", path.Name(), cErr) + errorsFound = true + } + }(index) + } + wg.Wait() + if errorsFound { + return fmt.Errorf("failed to publish directory %s", localSrcDir) + } + return nil +} + +func isNotFoundError(err error) bool { + return errors.Is(err, storage.ErrObjectNotExist) || errors.Is(err, os.ErrNotExist) +} diff --git a/pkg/cmd/roachprod-microbench/testdata/benchmark b/pkg/cmd/roachprod-microbench/testdata/benchmark new file mode 100644 index 000000000000..3a1aa2eb523f --- /dev/null +++ b/pkg/cmd/roachprod-microbench/testdata/benchmark @@ -0,0 +1,61 @@ +# Well formed benchmark output +benchmark +goarch: amd64 +cpu: Intel(R) Xeon(R) CPU @ 2.80GHz +BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op +PASS +---- +false [[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] + +# Scattered benchmark output +benchmark +goarch: amd64 +cpu: Intel(R) Xeon(R) CPU @ 2.80GHz +BenchmarkFastIntMap/4x4-4/fastintmap-24 + 1 603.0 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-24 +log noise + 1 1039 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op +more noise +BenchmarkFastIntMap/4x4-4/slice-24 +extra log noise + + +1 1406 ns/op 64 B/op 3 allocs/op +PASS +---- +false [[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] + +# Missing benchmark name, and missing benchmark metrics (incorrect column count) +benchmark +goarch: amd64 +cpu: Intel(R) Xeon(R) CPU @ 2.80GHz +1 603.0 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op +BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op +BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op +PASS +---- +false [[BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] + +# Failed benchmark +benchmark +goarch: amd64 +cpu: Intel(R) Xeon(R) CPU @ 2.80GHz +BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op +FAIL +---- +true [[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] + +# Benchmark with panic +benchmark +goarch: amd64 +cpu: Intel(R) Xeon(R) CPU @ 2.80GHz +BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op +panic: something went wrong +---- +true [[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]