diff --git a/pkg/cmd/roachtest/test.go b/pkg/cmd/roachtest/test.go index b6eb562acbc3..4951bac33cfe 100644 --- a/pkg/cmd/roachtest/test.go +++ b/pkg/cmd/roachtest/test.go @@ -654,6 +654,12 @@ type test struct { runnerID int64 start time.Time end time.Time + + // debugEnabled is a test scoped value which enables automated tests to + // enable debugging without enabling debugging for all tests. + // It is a bit of a hack added to help debug #34458. + debugEnabled bool + // artifactsDir is the path to the directory holding all the artifacts for // this test. It will contain a test.log file, cluster logs, and // subdirectories for subtests. @@ -1097,7 +1103,7 @@ func (r *registry) runAsync( } if c != nil { defer func() { - if !debugEnabled || !t.Failed() { + if (!debugEnabled && !t.debugEnabled) || !t.Failed() { c.Destroy(ctx) } else { c.l.Printf("not destroying cluster to allow debugging\n") diff --git a/pkg/cmd/roachtest/ycsb.go b/pkg/cmd/roachtest/ycsb.go index 6b83edbbd4d5..4010a4e7222f 100644 --- a/pkg/cmd/roachtest/ycsb.go +++ b/pkg/cmd/roachtest/ycsb.go @@ -18,10 +18,40 @@ package main import ( "context" "fmt" + "strconv" + "strings" + + "github.com/pkg/errors" ) +// performanceExpectations is a map from workload to a map from core count to +// expected throughput below which we consider the test to have failed. +var performanceExpectations = map[string]map[int]float64{ + // The below numbers are minimum expectations based on historical data. + "A": {8: 2000}, + "B": {8: 15000}, +} + +func getPerformanceExpectation(wl string, cpus int) (float64, bool) { + m, exists := performanceExpectations[cloud] + if !exists { + return 0, false + } + expected, exists := m[cpus] + return expected, exists +} + +func parseThroughputFromOutput(output string) (opsPerSec float64, _ error) { + prefix := "__result\n" // this string precedes the cumulative results + idx := strings.LastIndex(output, prefix) + if idx == -1 { + return 0, fmt.Errorf("failed to find %q in output", prefix) + } + return strconv.ParseFloat(strings.Fields(output[idx+len(prefix):])[3], 64) +} + func registerYCSB(r *registry) { - runYCSB := func(ctx context.Context, t *test, c *cluster, wl string) { + runYCSB := func(ctx context.Context, t *test, c *cluster, wl string, cpus int) { nodes := c.nodes - 1 c.Put(ctx, cockroach, "./cockroach", c.Range(1, nodes)) @@ -38,7 +68,22 @@ func registerYCSB(r *registry) { " --workload=%s --concurrency=64 --histograms=logs/stats.json"+ ramp+duration+" {pgurl:1-%d}", wl, nodes) - c.Run(ctx, c.Node(nodes+1), cmd) + out, err := c.RunWithBuffer(ctx, t.l, c.Node(nodes+1), cmd) + if err != nil { + return errors.Wrapf(err, "failed with output %q", string(out)) + } + if expected, ok := getPerformanceExpectation(wl, cpus); ok { + throughput, err := parseThroughputFromOutput(string(out)) + if err != nil { + return err + } + t.debugEnabled = teamCity + if throughput < expected { + return fmt.Errorf("%v failed to meet throughput expectations: "+ + "observed %v, expected at least %v", t.Name(), expected, throughput) + } + c.l.Printf("Observed throughput of %v > %v", throughput, expected) + } return nil }) m.Wait() @@ -51,18 +96,18 @@ func registerYCSB(r *registry) { continue } for _, cpus := range []int{8, 32} { - wl := wl var name string if cpus == 8 { // support legacy test name which didn't include cpu name = fmt.Sprintf("ycsb/%s/nodes=3", wl) } else { name = fmt.Sprintf("ycsb/%s/nodes=3/cpu=%d", wl, cpus) } + wl, cpus := wl, cpus r.Add(testSpec{ Name: name, Cluster: makeClusterSpec(4, cpu(cpus)), Run: func(ctx context.Context, t *test, c *cluster) { - runYCSB(ctx, t, c, wl) + runYCSB(ctx, t, c, wl, cpus) }, }) }