-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
roachtest: introduce admission-control/elastic-cdc
This test sets up a 3-node CRDB cluster on 8vCPU machines running 1000-warehouse TPC-C, and kicks off a few changefeed backfills concurrently. We've observed latency spikes during backfills because of its CPU/scan-heavy nature -- it can elevate CPU scheduling latencies which in turn translates to an increase in foreground latency. Also in this commit: routing std{err,out} from prometheus/grafana setup that roachtests do to the logger in scope. Release note: None
- Loading branch information
1 parent
fa47f7b
commit a3f7ea1
Showing
5 changed files
with
163 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package tests | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" | ||
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" | ||
"github.com/cockroachdb/cockroach/pkg/roachprod/prometheus" | ||
) | ||
|
||
// This test sets up a 3-node CRDB cluster on 8vCPU machines running | ||
// 1000-warehouse TPC-C, and kicks off a few changefeed backfills concurrently. | ||
// We've observed latency spikes during backfills because of its CPU/scan-heavy | ||
// nature -- it can elevate CPU scheduling latencies which in turn translates to | ||
// an increase in foreground latency. | ||
func registerElasticControlForCDC(r registry.Registry) { | ||
r.Add(registry.TestSpec{ | ||
Name: "admission-control/elastic-cdc", | ||
Owner: registry.OwnerAdmissionControl, | ||
// TODO(irfansharif): After two weeks of nightly baking time, reduce | ||
// this to a weekly cadence. This is a long-running test and serves only | ||
// as a coarse-grained benchmark. | ||
// Tags: []string{`weekly`}, | ||
Cluster: r.MakeClusterSpec(4, spec.CPU(8)), | ||
RequiresLicense: true, | ||
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { | ||
if c.Spec().NodeCount < 4 { | ||
t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount) | ||
} | ||
|
||
crdbNodes := c.Spec().NodeCount - 1 | ||
workloadNode := crdbNodes + 1 | ||
numWarehouses, workloadDuration, estimatedSetupTime := 1000, 45*time.Minute, 10*time.Minute | ||
if c.IsLocal() { | ||
numWarehouses, workloadDuration, estimatedSetupTime = 1, time.Minute, 2*time.Minute | ||
} | ||
|
||
promCfg := &prometheus.Config{} | ||
promCfg.WithPrometheusNode(c.Node(workloadNode).InstallNodes()[0]). | ||
WithNodeExporter(c.Range(1, c.Spec().NodeCount-1).InstallNodes()). | ||
WithCluster(c.Range(1, c.Spec().NodeCount-1).InstallNodes()). | ||
WithGrafanaDashboard("http://go.crdb.dev/p/changefeed-admission-control-grafana"). | ||
WithScrapeConfigs( | ||
prometheus.MakeWorkloadScrapeConfig("workload", "/", | ||
makeWorkloadScrapeNodes( | ||
c.Node(workloadNode).InstallNodes()[0], | ||
[]workloadInstance{{nodes: c.Node(workloadNode)}}, | ||
), | ||
), | ||
) | ||
|
||
if t.SkipInit() { | ||
t.Status(fmt.Sprintf("running tpcc for %s (<%s)", workloadDuration, time.Minute)) | ||
} else { | ||
t.Status(fmt.Sprintf("initializing + running tpcc for %s (<%s)", workloadDuration, 10*time.Minute)) | ||
} | ||
|
||
padDuration, err := time.ParseDuration(ifLocal(c, "5s", "10m")) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
runTPCC(ctx, t, c, tpccOptions{ | ||
Warehouses: numWarehouses, | ||
Duration: workloadDuration, | ||
SetupType: usingImport, | ||
EstimatedSetupTime: estimatedSetupTime, | ||
SkipPostRunCheck: true, | ||
ExtraSetupArgs: "--checks=false", | ||
PrometheusConfig: promCfg, | ||
During: func(ctx context.Context) error { | ||
db := c.Conn(ctx, t.L(), crdbNodes) | ||
defer db.Close() | ||
|
||
t.Status(fmt.Sprintf("during: cluster settings (<%s)", 30*time.Second)) | ||
{ | ||
setAdmissionControl(ctx, t, c, true) | ||
|
||
// Changefeeds depend on rangefeeds being enabled. | ||
if _, err := db.Exec("SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
t.Status(fmt.Sprintf("during: setting performance baseline (<%s)", padDuration)) | ||
time.Sleep(padDuration) | ||
|
||
defer func() { // stop change feeds after the fact | ||
_, _ = db.Exec(` | ||
CANCEL JOBS ( | ||
SELECT job_id FROM [SHOW JOBS] WHERE status = 'running' AND job_type = 'CHANGEFEED' | ||
) | ||
`) | ||
}() | ||
|
||
m := c.NewMonitor(ctx, c.Range(1, crdbNodes)) | ||
const numChangefeeds = 5 | ||
for i := 0; i < numChangefeeds; i++ { | ||
i := i | ||
m.Go(func(ctx context.Context) error { | ||
for j := 0; j < i; j++ { | ||
time.Sleep(padDuration) | ||
} | ||
|
||
t.Status(fmt.Sprintf("during: creating changefeed %d (<%s)", i, time.Minute)) | ||
stmt := ` | ||
CREATE CHANGEFEED FOR tpcc.order_line, tpcc.stock, tpcc.customer | ||
INTO 'null://' WITH initial_scan = 'only' | ||
` | ||
|
||
_, err := db.ExecContext(ctx, stmt) | ||
return err | ||
}) | ||
} | ||
|
||
t.Status(fmt.Sprintf("during: waiting for workload to finish (<%s)", workloadDuration)) | ||
m.Wait() | ||
|
||
return nil | ||
}, | ||
}) | ||
}, | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters