Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.1: roachtest: tpcc: don't look at cloud during registration #112553

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2525,6 +2525,10 @@ func (c *clusterImpl) MakeNodes(opts ...option.Option) string {
return c.name + r.String()
}

func (c *clusterImpl) Cloud() string {
return c.spec.Cloud
}

func (c *clusterImpl) IsLocal() bool {
return config.IsLocalClusterName(c.name)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/cluster/cluster_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ type Cluster interface {

Spec() spec.ClusterSpec
Name() string
Cloud() string
IsLocal() bool
// IsSecure returns true iff the cluster uses TLS.
IsSecure() bool
Expand Down
10 changes: 7 additions & 3 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,13 @@ func (m MemPerCPU) String() string {
// ClusterSpec represents a test's description of what its cluster needs to
// look like. It becomes part of a clusterConfig when the cluster is created.
type ClusterSpec struct {
Cloud string
Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
InstanceType string // auto-chosen if left empty
// TODO(#104029): We should remove the Cloud field; the tests now specify
// their compatible clouds.
Cloud string
Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
// TODO(radu): An InstanceType can only make sense in the context of a
// specific cloud. We should replace this with cloud-specific arguments.
InstanceType string // auto-chosen if left empty
NodeCount int
// CPUs is the number of CPUs per node.
CPUs int
Expand Down
146 changes: 92 additions & 54 deletions pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,12 @@ func maxSupportedTPCCWarehouses(
// TPCC workload is running. The number of database upgrades is
// controlled by the `versionsToUpgrade` parameter.
func runTPCCMixedHeadroom(
ctx context.Context, t test.Test, c cluster.Cluster, cloud string, versionsToUpgrade int,
ctx context.Context, t test.Test, c cluster.Cluster, versionsToUpgrade int,
) {
crdbNodes := c.Range(1, c.Spec().NodeCount-1)
workloadNode := c.Node(c.Spec().NodeCount)

maxWarehouses := maxSupportedTPCCWarehouses(*t.BuildVersion(), cloud, c.Spec())
maxWarehouses := maxSupportedTPCCWarehouses(*t.BuildVersion(), c.Cloud(), c.Spec())
headroomWarehouses := int(float64(maxWarehouses) * 0.7)
if c.IsLocal() {
headroomWarehouses = 10
Expand Down Expand Up @@ -484,7 +484,6 @@ func runTPCCMixedHeadroom(
}

func registerTPCC(r registry.Registry) {
cloud := r.MakeClusterSpec(1).Cloud
headroomSpec := r.MakeClusterSpec(4, spec.CPU(16), spec.RandomlyUseZfs())
r.Add(registry.TestSpec{
// w=headroom runs tpcc for a semi-extended period with some amount of
Expand All @@ -499,7 +498,7 @@ func registerTPCC(r registry.Registry) {
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
maxWarehouses := maxSupportedTPCCWarehouses(*t.BuildVersion(), cloud, t.Spec().(*registry.TestSpec).Cluster)
maxWarehouses := maxSupportedTPCCWarehouses(*t.BuildVersion(), c.Cloud(), c.Spec())
headroomWarehouses := int(float64(maxWarehouses) * 0.7)
t.L().Printf("computed headroom warehouses of %d\n", headroomWarehouses)
runTPCC(ctx, t, c, tpccOptions{
Expand All @@ -526,7 +525,7 @@ func registerTPCC(r registry.Registry) {
Cluster: mixedHeadroomSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runTPCCMixedHeadroom(ctx, t, c, cloud, 1)
runTPCCMixedHeadroom(ctx, t, c, 1)
},
})

Expand All @@ -540,7 +539,7 @@ func registerTPCC(r registry.Registry) {
Cluster: mixedHeadroomSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runTPCCMixedHeadroom(ctx, t, c, cloud, 2)
runTPCCMixedHeadroom(ctx, t, c, 2)
},
})
r.Add(registry.TestSpec{
Expand Down Expand Up @@ -839,8 +838,10 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 4,

LoadWarehouses: 1000,
EstimatedMax: gceOrAws(cloud, 750, 900),
LoadWarehousesGCE: 1000,
LoadWarehousesAWS: 1000,
EstimatedMaxGCE: 750,
EstimatedMaxAWS: 900,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -849,18 +850,22 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 3500, 3900),
EstimatedMax: gceOrAws(cloud, 2900, 3500),
Clouds: registry.AllClouds,
Suites: registry.Suites(registry.Nightly),
Tags: registry.Tags(`aws`),
LoadWarehousesGCE: 3500,
LoadWarehousesAWS: 3900,
EstimatedMaxGCE: 2900,
EstimatedMaxAWS: 3500,
Clouds: registry.AllClouds,
Suites: registry.Suites(registry.Nightly),
Tags: registry.Tags(`aws`),
})
registerTPCCBenchSpec(r, tpccBenchSpec{
Nodes: 12,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 11500, 11500),
EstimatedMax: gceOrAws(cloud, 10000, 10000),
LoadWarehousesGCE: 11500,
LoadWarehousesAWS: 11500,
EstimatedMaxGCE: 10000,
EstimatedMaxAWS: 10000,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Weekly),
Expand All @@ -871,8 +876,10 @@ func registerTPCC(r registry.Registry) {
CPUs: 16,
Distribution: multiZone,

LoadWarehouses: 6500,
EstimatedMax: 5000,
LoadWarehousesGCE: 6500,
LoadWarehousesAWS: 6500,
EstimatedMaxGCE: 5000,
EstimatedMaxAWS: 5000,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -884,8 +891,10 @@ func registerTPCC(r registry.Registry) {
Distribution: multiRegion,
LoadConfig: multiLoadgen,

LoadWarehouses: 3000,
EstimatedMax: 2000,
LoadWarehousesGCE: 3000,
LoadWarehousesAWS: 3000,
EstimatedMaxGCE: 2000,
EstimatedMaxAWS: 2000,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -896,8 +905,10 @@ func registerTPCC(r registry.Registry) {
Chaos: true,
LoadConfig: singlePartitionedLoadgen,

LoadWarehouses: 2000,
EstimatedMax: 900,
LoadWarehousesGCE: 2000,
LoadWarehousesAWS: 2000,
EstimatedMaxGCE: 900,
EstimatedMaxAWS: 900,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -909,8 +920,10 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 4,

LoadWarehouses: 1000,
EstimatedMax: gceOrAws(cloud, 750, 900),
LoadWarehousesGCE: 1000,
LoadWarehousesAWS: 1000,
EstimatedMaxGCE: 750,
EstimatedMaxAWS: 900,
EncryptionEnabled: true,

Clouds: registry.AllExceptAWS,
Expand All @@ -920,8 +933,10 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 3500, 3900),
EstimatedMax: gceOrAws(cloud, 2900, 3500),
LoadWarehousesGCE: 3500,
LoadWarehousesAWS: 3900,
EstimatedMaxGCE: 2900,
EstimatedMaxAWS: 3500,
EncryptionEnabled: true,
Clouds: registry.AllClouds,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -931,8 +946,10 @@ func registerTPCC(r registry.Registry) {
Nodes: 12,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 11500, 11500),
EstimatedMax: gceOrAws(cloud, 10000, 10000),
LoadWarehousesGCE: 11500,
LoadWarehousesAWS: 11500,
EstimatedMaxGCE: 10000,
EstimatedMaxAWS: 10000,
EncryptionEnabled: true,

Clouds: registry.AllExceptAWS,
Expand All @@ -945,9 +962,11 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 4,

LoadWarehouses: 1000,
EstimatedMax: gceOrAws(cloud, 750, 900),
ExpirationLeases: true,
LoadWarehousesGCE: 1000,
LoadWarehousesAWS: 1000,
EstimatedMaxGCE: 750,
EstimatedMaxAWS: 900,
ExpirationLeases: true,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Expand All @@ -956,20 +975,24 @@ func registerTPCC(r registry.Registry) {
Nodes: 3,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 3500, 3900),
EstimatedMax: gceOrAws(cloud, 2900, 3500),
ExpirationLeases: true,
Clouds: registry.AllClouds,
Suites: registry.Suites(registry.Nightly),
Tags: registry.Tags(`aws`),
LoadWarehousesGCE: 3500,
LoadWarehousesAWS: 3900,
EstimatedMaxGCE: 2900,
EstimatedMaxAWS: 3500,
ExpirationLeases: true,
Clouds: registry.AllClouds,
Suites: registry.Suites(registry.Nightly),
Tags: registry.Tags(`aws`),
})
registerTPCCBenchSpec(r, tpccBenchSpec{
Nodes: 12,
CPUs: 16,

LoadWarehouses: gceOrAws(cloud, 11500, 11500),
EstimatedMax: gceOrAws(cloud, 10000, 10000),
ExpirationLeases: true,
LoadWarehousesGCE: 11500,
LoadWarehousesAWS: 11500,
EstimatedMaxGCE: 10000,
EstimatedMaxAWS: 10000,
ExpirationLeases: true,

Clouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Weekly),
Expand All @@ -978,10 +1001,14 @@ func registerTPCC(r registry.Registry) {
}

func gceOrAws(cloud string, gce, aws int) int {
if cloud == "aws" {
switch cloud {
case spec.AWS:
return aws
case spec.GCE:
return gce
default:
panic(fmt.Sprintf("unknown cloud %s", cloud))
}
return gce
}

// tpccBenchDistribution represents a distribution of nodes in a tpccbench
Expand Down Expand Up @@ -1052,13 +1079,15 @@ type tpccBenchSpec struct {
// The number of warehouses to load into the cluster before beginning
// benchmarking. Should be larger than EstimatedMax and should be a
// value that is unlikely to be achievable.
LoadWarehouses int
LoadWarehousesGCE int
LoadWarehousesAWS int
// An estimate of the maximum number of warehouses achievable in the
// cluster config. The closer this is to the actual max achievable
// warehouse count, the faster the benchmark will be in producing a
// result. This can be adjusted over time as performance characteristics
// change (i.e. CockroachDB gets faster!).
EstimatedMax int
EstimatedMaxGCE int
EstimatedMaxAWS int

// MinVersion to pass to testRegistryImpl.Add.
MinVersion string
Expand All @@ -1074,6 +1103,14 @@ type tpccBenchSpec struct {
ExpirationLeases bool
}

func (s tpccBenchSpec) EstimatedMax(cloud string) int {
return gceOrAws(cloud, s.EstimatedMaxGCE, s.EstimatedMaxAWS)
}

func (s tpccBenchSpec) LoadWarehouses(cloud string) int {
return gceOrAws(cloud, s.LoadWarehousesGCE, s.LoadWarehousesAWS)
}

// partitions returns the number of partitions specified to the load generator.
func (s tpccBenchSpec) partitions() int {
switch s.LoadConfig {
Expand Down Expand Up @@ -1205,7 +1242,7 @@ func loadTPCCBench(
).Scan(&curWarehouses); err != nil {
return err
}
if curWarehouses >= b.LoadWarehouses {
if curWarehouses >= b.LoadWarehouses(c.Cloud()) {
// The cluster has enough warehouses. Nothing to do.
return nil
}
Expand All @@ -1222,17 +1259,18 @@ func loadTPCCBench(

var loadArgs string
var rebalanceWait time.Duration
loadWarehouses := b.LoadWarehouses(c.Cloud())
switch b.LoadConfig {
case singleLoadgen:
loadArgs = `--checks=false`
rebalanceWait = time.Duration(b.LoadWarehouses/250) * time.Minute
rebalanceWait = time.Duration(loadWarehouses/250) * time.Minute
case singlePartitionedLoadgen:
loadArgs = fmt.Sprintf(`--checks=false --partitions=%d`, b.partitions())
rebalanceWait = time.Duration(b.LoadWarehouses/125) * time.Minute
rebalanceWait = time.Duration(loadWarehouses/125) * time.Minute
case multiLoadgen:
loadArgs = fmt.Sprintf(`--checks=false --partitions=%d --zones="%s"`,
b.partitions(), strings.Join(b.Distribution.zones(), ","))
rebalanceWait = time.Duration(b.LoadWarehouses/50) * time.Minute
rebalanceWait = time.Duration(loadWarehouses/50) * time.Minute
default:
panic("unexpected")
}
Expand All @@ -1241,7 +1279,7 @@ func loadTPCCBench(
t.L().Printf("restoring tpcc fixture\n")
err := WaitFor3XReplication(ctx, t, db)
require.NoError(t, err)
cmd := tpccImportCmd(b.LoadWarehouses, loadArgs)
cmd := tpccImportCmd(loadWarehouses, loadArgs)
if err = c.RunE(ctx, roachNodes[:1], cmd); err != nil {
return err
}
Expand All @@ -1259,12 +1297,12 @@ func loadTPCCBench(
// the desired distribution. This should allow for load-based rebalancing to
// help distribute load. Optionally pass some load configuration-specific
// flags.
maxRate := tpccMaxRate(b.EstimatedMax)
maxRate := tpccMaxRate(b.EstimatedMax(c.Cloud()))
rampTime := (1 * rebalanceWait) / 4
loadTime := (3 * rebalanceWait) / 4
cmd = fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --workers=%d --max-rate=%d "+
"--wait=false --ramp=%s --duration=%s --scatter --tolerate-errors {pgurl%s}",
b.LoadWarehouses, b.LoadWarehouses, maxRate, rampTime, loadTime, roachNodes)
b.LoadWarehouses(c.Cloud()), b.LoadWarehouses(c.Cloud()), maxRate, rampTime, loadTime, roachNodes)
if _, err := c.RunWithDetailsSingleNode(ctx, t.L(), loadNode, cmd); err != nil {
return err
}
Expand Down Expand Up @@ -1327,7 +1365,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
// 10k warehouses requires at least 20,000 connections, so add a
// bit of breathing room and check the warehouse count.
c.Run(ctx, loadNodes, "sed -i 's/maxconn [0-9]\\+/maxconn 21000/' haproxy.cfg")
if b.LoadWarehouses > 1e4 {
if b.LoadWarehouses(c.Cloud()) > 1e4 {
t.Fatal("HAProxy config supports up to 10k warehouses")
}
c.Run(ctx, loadNodes, "haproxy -f haproxy.cfg -D")
Expand All @@ -1344,7 +1382,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
// Search between 1 and b.LoadWarehouses for the largest number of
// warehouses that can be operated on while sustaining a throughput
// threshold, set to a fraction of max tpmC.
precision := int(math.Max(1.0, float64(b.LoadWarehouses/200)))
precision := int(math.Max(1.0, float64(b.LoadWarehouses(c.Cloud())/200)))
initStepSize := precision

// Create a temp directory to store the local copy of results from the
Expand All @@ -1361,7 +1399,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
c.Start(ctx, t.L(), startOpts, settings, roachNodes)
}

s := search.NewLineSearcher(1, b.LoadWarehouses, b.EstimatedMax, initStepSize, precision)
s := search.NewLineSearcher(1, b.LoadWarehouses(c.Cloud()), b.EstimatedMax(c.Cloud()), initStepSize, precision)
iteration := 0
if res, err := s.Search(func(warehouses int) (bool, error) {
iteration++
Expand Down Expand Up @@ -1434,7 +1472,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
histogramsPath := fmt.Sprintf("%s/warehouses=%d/stats.json", t.PerfArtifactsDir(), warehouses)
cmd := fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --active-warehouses=%d "+
"--tolerate-errors --ramp=%s --duration=%s%s --histograms=%s {pgurl%s}",
b.LoadWarehouses, warehouses, rampDur,
b.LoadWarehouses(c.Cloud()), warehouses, rampDur,
loadDur, extraFlags, histogramsPath, sqlGateways)
err := c.RunE(ctx, group.loadNodes, cmd)
loadDone <- timeutil.Now()
Expand Down