Skip to content

Commit

Permalink
roachtest: require perf. tests to opt in via TestSpec.Benchmark
Browse files Browse the repository at this point in the history
Previously, roachtests which benchmark performance (cf. correctness)
were indistinguishable from correctness tests. That is, a performance
test is like any other test with the exception of _optionally_ writing
stats.json under 'Test.PerfArtifactsDir'; these artifacts are
automatically exported to a gcs bucket, used in conjunction with the
roachperf dashboard.

Having no direct way to distinguish a performance test from a correctness
test has several challenges. E.g., performance tests may require a specific
machine type or architecture; background workloads like incremental backup
may cause a performance regression; new metamorphic configurations like
arm64 and fips may require a "bake-in" time before performance tests
can be enabled. In future, the test runner may make specialized
decisions (e.g., don't reuse a cluster) when executing a performance
test. Thus, we need a (standard) mechanism to enumerate all performance tests.
Given their specific requirements, the test author must explicitly opt in,
by setting TestSpec.Benchmark to 'true'.

This PR applies the above change retroactively, i.e., setting 'TestSpec.Benchmark'
for all _known_ performance tests, including those which _assert_ on performance
instead of exporting stats.json.
It also fixes `roachtest list --bench` and `roachtest bench`, which were
out-of-date, albeit not actively used.

Epic: none
Release note: None
  • Loading branch information
srosenberg committed Jun 9, 2023
1 parent 8a2050b commit c2606a5
Show file tree
Hide file tree
Showing 27 changed files with 188 additions and 123 deletions.
7 changes: 6 additions & 1 deletion pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ func TestShouldPost(t *testing.T) {
{false, 1, "token", "master", true},
}

reg, _ := makeTestRegistry(spec.GCE, "", "", false)
reg := makeTestRegistry(spec.GCE, "", "", false, false)

for _, c := range testCases {
t.Setenv("GITHUB_API_TOKEN", c.envGithubAPIToken)
t.Setenv("TC_BUILD_BRANCH", c.envTcBuildBranch)
Expand Down Expand Up @@ -145,8 +146,12 @@ func TestCreatePostRequest(t *testing.T) {
{true, false, true, false, otherErr, false, nil},
}

<<<<<<< HEAD
reg, _ := makeTestRegistry(spec.GCE, "", "", false)

=======
reg := makeTestRegistry(spec.GCE, "", "", false, false)
>>>>>>> 0df3a03e781 (roachtest: require perf. tests to opt in via TestSpec.Benchmark)
for _, c := range testCases {
clusterSpec := reg.MakeClusterSpec(1)

Expand Down
33 changes: 16 additions & 17 deletions pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,13 @@ Examples:
roachtest list tag:weekly
`,
RunE: func(_ *cobra.Command, args []string) error {
r, err := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
r, err := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg, listBench)
if err != nil {
return err
}
if !listBench {
tests.RegisterTests(&r)
} else {
tests.RegisterBenchmarks(&r)
}
tests.RegisterTests(&r)

matchedTests := r.List(context.Background(), args)
matchedTests := r.List(args)
for _, test := range matchedTests {
var skip string
if test.Skip != "" {
Expand Down Expand Up @@ -228,7 +224,8 @@ runner itself.
user: username,
clusterID: clusterID,
versionsBinaryOverride: versionsBinaryOverride,
})
enableFIPS: enableFIPS,
}, false /* benchOnly */)
},
}

Expand All @@ -255,7 +252,7 @@ runner itself.
if literalArtifacts == "" {
literalArtifacts = artifacts
}
return runTests(tests.RegisterBenchmarks, cliCfg{
return runTests(tests.RegisterTests, cliCfg{
args: args,
count: count,
cpuQuota: cpuQuota,
Expand All @@ -266,7 +263,8 @@ runner itself.
user: username,
clusterID: clusterID,
versionsBinaryOverride: versionsBinaryOverride,
})
enableFIPS: enableFIPS,
}, true /* benchOnly */)
},
}

Expand Down Expand Up @@ -360,14 +358,17 @@ type cliCfg struct {
versionsBinaryOverride map[string]string
}

func runTests(register func(registry.Registry), cfg cliCfg) error {
func runTests(register func(registry.Registry), cfg cliCfg, benchOnly bool) error {
if cfg.count <= 0 {
return fmt.Errorf("--count (%d) must by greater than 0", cfg.count)
}
r, err := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
r, err := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg, benchOnly)
if err != nil {
return err
}

// actual registering of tests
// TODO: don't register if we can't run on the specified registry cloud
register(&r)
cr := newClusterRegistry()
stopper := stop.NewStopper()
Expand Down Expand Up @@ -403,7 +404,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
return err
}

tests := testsToRun(context.Background(), r, filter)
tests := testsToRun(r, filter)
n := len(tests)
if n*cfg.count < cfg.parallelism {
// Don't spin up more workers than necessary. This has particular
Expand Down Expand Up @@ -535,10 +536,8 @@ func testRunnerLogger(
return l, teeOpt
}

func testsToRun(
ctx context.Context, r testRegistryImpl, filter *registry.TestFilter,
) []registry.TestSpec {
tests := r.GetTests(ctx, filter)
func testsToRun(r testRegistryImpl, filter *registry.TestFilter) []registry.TestSpec {
tests := r.GetTests(filter)

var notSkipped []registry.TestSpec
for _, s := range tests {
Expand Down
8 changes: 8 additions & 0 deletions pkg/cmd/roachtest/registry/test_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ type TestSpec struct {
// associated cluster expires. The timeout is always truncated to 10m before
// the test's cluster expires.
Timeout time.Duration
// Denotes whether the test is a roachperf benchmark. If true, the test is expected, but not required, to produce
// artifacts in Test.PerfArtifactsDir(), which get exported to the roachperf dashboard
// (see getPerfArtifacts() in test_runner.go).
// N.B. performance tests may choose to _assert_ on latency, throughput, or some other perf. metric, _without_
// exporting artifacts to the dashboard. E.g., see 'registerKVContention' and 'verifyTxnPerSecond'.
// N.B. performance tests may have different requirements than correctness tests, e.g., machine type/architecture.
// Thus, they must be opted into explicitly via this field.
Benchmark bool
// Tags is a set of tags associated with the test that allow grouping
// tests. If no tags are specified, the set ["default"] is automatically
// given.
Expand Down
21 changes: 14 additions & 7 deletions pkg/cmd/roachtest/test_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
package main

import (
"context"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -43,18 +42,21 @@ type testRegistryImpl struct {
preferSSD bool
// buildVersion is the version of the Cockroach binary that tests will run against.
buildVersion version.Version

benchOnly bool
}

// makeTestRegistry constructs a testRegistryImpl and configures it with opts.
func makeTestRegistry(
cloud string, instanceType string, zones string, preferSSD bool,
cloud string, instanceType string, zones string, preferSSD bool, benchOnly bool,
) (testRegistryImpl, error) {
r := testRegistryImpl{
cloud: cloud,
instanceType: instanceType,
zones: zones,
preferSSD: preferSSD,
m: make(map[string]*registry.TestSpec),
benchOnly: benchOnly,
}
v := buildTag
if v == "" {
Expand All @@ -76,6 +78,11 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) {
fmt.Fprintf(os.Stderr, "test %s already registered\n", spec.Name)
os.Exit(1)
}
if r.benchOnly && !spec.Benchmark {
// Skip non-benchmarks.
return
}

if err := r.prepareSpec(&spec); err != nil {
fmt.Fprintf(os.Stderr, "%+v\n", err)
os.Exit(1)
Expand Down Expand Up @@ -158,9 +165,8 @@ func (r *testRegistryImpl) prepareSpec(spec *registry.TestSpec) error {
// GetTests returns all the tests that match the given regexp.
// Skipped tests are included, and tests that don't match their minVersion spec
// are also included but marked as skipped.
func (r testRegistryImpl) GetTests(
ctx context.Context, filter *registry.TestFilter,
) []registry.TestSpec {
func (r testRegistryImpl) GetTests(filter *registry.TestFilter) []registry.TestSpec {

var tests []registry.TestSpec
for _, t := range r.m {
if !t.MatchOrSkip(filter) {
Expand All @@ -175,9 +181,10 @@ func (r testRegistryImpl) GetTests(
}

// List lists tests that match one of the filters.
func (r testRegistryImpl) List(ctx context.Context, filters []string) []registry.TestSpec {
func (r testRegistryImpl) List(filters []string) []registry.TestSpec {
filter := registry.NewTestFilter(filters)
tests := r.GetTests(ctx, filter)
tests := r.GetTests(filter)

sort.Slice(tests, func(i, j int) bool { return tests[i].Name < tests[j].Name })
return tests
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/roachtest/test_registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ import (

func TestMakeTestRegistry(t *testing.T) {
testutils.RunTrueAndFalse(t, "preferSSD", func(t *testing.T, preferSSD bool) {
r, err := makeTestRegistry(spec.AWS, "foo", "zone123", preferSSD)
r, err := makeTestRegistry(spec.AWS, "foo", "zone123", preferSSD, false)
require.NoError(t, err)

require.Equal(t, preferSSD, r.preferSSD)
require.Equal(t, "zone123", r.zones)
require.Equal(t, "foo", r.instanceType)
Expand All @@ -41,5 +42,4 @@ func TestMakeTestRegistry(t *testing.T) {
require.EqualValues(t, 4, s.CPUs)
require.True(t, s.TerminateOnMigration)
})

}
4 changes: 3 additions & 1 deletion pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,9 @@ func (r *testRunner) runWorker(
}
} else {
// Upon success fetch the perf artifacts from the remote hosts.
getPerfArtifacts(ctx, l, c, t)
if t.spec.Benchmark {
getPerfArtifacts(ctx, l, c, t)
}
}
}
}
Expand Down
12 changes: 7 additions & 5 deletions pkg/cmd/roachtest/test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ const defaultParallelism = 10

func mkReg(t *testing.T) testRegistryImpl {
t.Helper()
r, err := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */)

r, err := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */, false /* benchOnly */)
require.NoError(t, err)
return r
}
Expand Down Expand Up @@ -244,8 +245,7 @@ type runnerTest struct {

func setupRunnerTest(t *testing.T, r testRegistryImpl, testFilters []string) *runnerTest {
ctx := context.Background()

tests := testsToRun(ctx, r, registry.NewTestFilter(testFilters))
tests := testsToRun(r, registry.NewTestFilter(testFilters))
cr := newClusterRegistry()

stopper := stop.NewStopper()
Expand Down Expand Up @@ -402,11 +402,12 @@ func TestRegistryPrepareSpec(t *testing.T) {
}
for _, c := range testCases {
t.Run("", func(t *testing.T) {
r, err := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */)
r, err := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */, false /* benchOnly */)
if err != nil {
t.Fatal(err)
}
err = r.prepareSpec(&c.spec)

if !testutils.IsError(err, c.expectedErr) {
t.Fatalf("expected %q, but found %q", c.expectedErr, err.Error())
}
Expand Down Expand Up @@ -439,7 +440,8 @@ func runExitCodeTest(t *testing.T, injectedError error) error {
}
},
})
tests := testsToRun(ctx, r, registry.NewTestFilter(nil))

tests := testsToRun(r, registry.NewTestFilter(nil))
lopt := loggingOpt{
l: nilLogger(),
tee: logger.NoTee,
Expand Down
27 changes: 16 additions & 11 deletions pkg/cmd/roachtest/tests/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,27 +145,32 @@ func registerAllocator(r registry.Registry) {
}

r.Add(registry.TestSpec{
Name: `replicate/up/1to3`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(4),
Name: `replicate/up/1to3`,
Owner: registry.OwnerKV,
Benchmark: true,
Cluster: r.MakeClusterSpec(4),

Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 1, 10.0)
},
})
r.Add(registry.TestSpec{
Name: `replicate/rebalance/3to5`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(6),
Name: `replicate/rebalance/3to5`,
Owner: registry.OwnerKV,
Benchmark: true,
Cluster: r.MakeClusterSpec(6),

Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 3, 42.0)
},
})
r.Add(registry.TestSpec{
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Timeout: 10 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Run: runWideReplication,
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 10 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Run: runWideReplication,
})
}

Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ func registerBackup(r registry.Registry) {
r.Add(registry.TestSpec{
Name: fmt.Sprintf("backup/2TB/%s", backup2TBSpec),
Owner: registry.OwnerDisasterRecovery,
Benchmark: true,
Cluster: backup2TBSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down
19 changes: 11 additions & 8 deletions pkg/cmd/roachtest/tests/connection_latency.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ func registerConnectionLatencyTest(r registry.Registry) {
// Single region test.
numNodes := 3
r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/certs", numNodes),
Owner: registry.OwnerSQLFoundations,
Name: fmt.Sprintf("connection_latency/nodes=%d/certs", numNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
// Add one more node for load node.
Cluster: r.MakeClusterSpec(numNodes+1, spec.Zones(regionUsCentral)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand All @@ -133,18 +134,20 @@ func registerConnectionLatencyTest(r registry.Registry) {
loadNodes := numZones

r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/certs", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/certs", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runConnectionLatencyTest(ctx, t, c, numMultiRegionNodes, numZones, false /*password*/)
},
})

r.Add(registry.TestSpec{
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/password", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Name: fmt.Sprintf("connection_latency/nodes=%d/multiregion/password", numMultiRegionNodes),
Owner: registry.OwnerSQLFoundations,
Benchmark: true,
Cluster: r.MakeClusterSpec(numMultiRegionNodes+loadNodes, spec.Geo(), spec.Zones(geoZonesStr)),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runConnectionLatencyTest(ctx, t, c, numMultiRegionNodes, numZones, true /*password*/)
},
Expand Down
Loading

0 comments on commit c2606a5

Please sign in to comment.