Skip to content

Commit

Permalink
roachtest: unskip large decommissionBench test
Browse files Browse the repository at this point in the history
This extends the timeout of the large, 3000 warehouse decommission
benchmark roachtest to 3 hours, since it can take up to an hour for the
test to import data, achieve range count balance, and ramp up its
workload. This was skipped in #83445 due to frequent timeouts at the 1hr
mark.

It also adds a `--max-rate` parameter to the workload generator in order
to ensure the cluster avoids overload.

Release note: None
  • Loading branch information
AlexTalks committed Jul 1, 2022
1 parent d45ca84 commit e5d33a1
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
17 changes: 13 additions & 4 deletions pkg/cmd/roachtest/tests/decommissionbench.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ type decommissionBenchSpec struct {
// When true, the test will attempt to stop the node prior to decommission.
whileDown bool

// An override for the default timeout, if needed.
timeout time.Duration

skip string
}

Expand Down Expand Up @@ -86,7 +89,9 @@ func registerDecommissionBench(r registry.Registry) {
warehouses: 3000,
load: true,
admissionControl: true,
skip: "https://github.com/cockroachdb/cockroach/issues/82870",
// This test can take nearly an hour to import and achieve balance, so
// we extend the timeout to let it complete.
timeout: 3 * time.Hour,
},
} {
registerDecommissionBenchSpec(r, benchSpec)
Expand All @@ -96,6 +101,9 @@ func registerDecommissionBench(r registry.Registry) {
// registerDecommissionBenchSpec adds a test using the specified configuration to the registry.
func registerDecommissionBenchSpec(r registry.Registry, benchSpec decommissionBenchSpec) {
timeout := defaultTimeout
if benchSpec.timeout != time.Duration(0) {
timeout = benchSpec.timeout
}
extraNameParts := []string{""}

if benchSpec.snapshotRate != 0 {
Expand Down Expand Up @@ -166,12 +174,13 @@ func runDecommissionBench(
c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.Node(i))
}

maxRate := tpccMaxRate(benchSpec.warehouses)
rampDuration := 3 * time.Minute
rampStarted := make(chan struct{}, 1)
importCmd := fmt.Sprintf(`./cockroach workload fixtures import tpcc --warehouses=%d`,
benchSpec.warehouses)
workloadCmd := fmt.Sprintf("./workload run tpcc --warehouses=%d --duration=%s "+
"--histograms=%s/stats.json --ramp=%s --tolerate-errors {pgurl:1-%d}", benchSpec.warehouses,
workloadCmd := fmt.Sprintf("./workload run tpcc --warehouses=%d --max-rate=%d --duration=%s "+
"--histograms=%s/stats.json --ramp=%s --tolerate-errors {pgurl:1-%d}", maxRate, benchSpec.warehouses,
testTimeout, t.PerfArtifactsDir(), rampDuration, benchSpec.nodes)
t.Status(fmt.Sprintf("initializing cluster with %d warehouses", benchSpec.warehouses))
c.Run(ctx, c.Node(pinnedNode), importCmd)
Expand Down Expand Up @@ -230,7 +239,7 @@ func runDecommissionBench(
// per-second "tick", we will simply tick at the start of the decommission
// and again at the completion. Roachperf will use the elapsed time between
// these ticks to plot the duration of the decommission.
tick, perfBuf := initBulkJobPerfArtifacts("decommission", defaultTimeout)
tick, perfBuf := initBulkJobPerfArtifacts("decommission", testTimeout)
recorder := &decommBenchTicker{pre: tick, post: tick}

m.Go(func(ctx context.Context) error {
Expand Down
12 changes: 9 additions & 3 deletions pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ var tpccSupportedWarehouses = []struct {
{hardware: "gce-n5cpu16", v: version.MustParse(`v2.1.0-0`), warehouses: 1300},
}

// tpccMaxRate calculates the max rate of the workload given a number of warehouses.
func tpccMaxRate(warehouses int) int {
const txnsPerWarehousePerSecond = 12.8 * (23.0 / 10.0) * (1.0 / 60.0) // max_tpmC/warehouse * all_txns/new_order_txns * minutes/seconds
rateAtExpected := txnsPerWarehousePerSecond * float64(warehouses)
maxRate := int(rateAtExpected / 2)
return maxRate
}

func maxSupportedTPCCWarehouses(
buildVersion version.Version, cloud string, nodes spec.ClusterSpec,
) int {
Expand Down Expand Up @@ -1016,9 +1024,7 @@ func loadTPCCBench(
// the desired distribution. This should allow for load-based rebalancing to
// help distribute load. Optionally pass some load configuration-specific
// flags.
const txnsPerWarehousePerSecond = 12.8 * (23.0 / 10.0) * (1.0 / 60.0) // max_tpmC/warehouse * all_txns/new_order_txns * minutes/seconds
rateAtExpected := txnsPerWarehousePerSecond * float64(b.EstimatedMax)
maxRate := int(rateAtExpected / 2)
maxRate := tpccMaxRate(b.EstimatedMax)
rampTime := (1 * rebalanceWait) / 4
loadTime := (3 * rebalanceWait) / 4
cmd = fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --workers=%d --max-rate=%d "+
Expand Down

0 comments on commit e5d33a1

Please sign in to comment.