Skip to content

Commit

Permalink
roachtest: increase scale for ac/index-backfill test
Browse files Browse the repository at this point in the history
Release note: None
  • Loading branch information
irfansharif committed May 24, 2023
1 parent 22a2eb5 commit 7b176c1
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 28 deletions.
50 changes: 38 additions & 12 deletions pkg/cmd/roachtest/tests/admission_control_index_backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (

func registerIndexBackfill(r registry.Registry) {
clusterSpec := r.MakeClusterSpec(
4, /* nodeCount */
10, /* nodeCount */
spec.CPU(8),
spec.Zones("us-east1-b"),
spec.VolumeSize(500),
Expand All @@ -39,9 +39,11 @@ func registerIndexBackfill(r registry.Registry) {
clusterSpec.GCEMinCPUPlatform = "Intel Ice Lake"
clusterSpec.GCEVolumeType = "pd-ssd"

// XXX: Subsume https://github.com/cockroachdb/cockroach/pull/90005/files
r.Add(registry.TestSpec{
Name: "admission-control/index-backfill",
Owner: registry.OwnerAdmissionControl,
Name: "admission-control/index-backfill",
Timeout: 6 * time.Hour,
Owner: registry.OwnerAdmissionControl,
// TODO(irfansharif): Reduce to weekly cadence once stabilized.
// Tags: registry.Tags(`weekly`),
Cluster: clusterSpec,
Expand Down Expand Up @@ -77,7 +79,9 @@ func registerIndexBackfill(r registry.Registry) {
t.L().Printf("no existing snapshots found for %s (%s), doing pre-work",
t.Name(), t.SnapshotPrefix())

// XXX: Do pre-work. Set up tpc-e dataset.
// Set up TPC-E with 100k customers. Do so using a published
// CRDB release, since we'll use this state to generate disk
// snapshots.
runTPCE(ctx, t, c, tpceOptions{
start: func(ctx context.Context, t test.Test, c cluster.Cluster) {
pred, err := version.PredecessorVersion(*t.BuildVersion())
Expand All @@ -102,21 +106,21 @@ func registerIndexBackfill(r registry.Registry) {
t.Fatal(err)
}
},
customers: 1_000,
customers: 100_000,
disablePrometheus: true,
setupType: usingTPCEInit,
estimatedSetupTime: 20 * time.Minute,
estimatedSetupTime: 4 * time.Hour,
nodes: crdbNodes,
owner: registry.OwnerAdmissionControl,
cpus: clusterSpec.CPUs,
ssds: 1,
onlySetup: true,
timeout: 4 * time.Hour,
})

// Stop all nodes before capturing cluster snapshots.
c.Stop(ctx, t.L(), option.DefaultStopOpts())

// Create the aforementioned snapshots.
if err := c.CreateSnapshot(ctx, t.SnapshotPrefix()); err != nil {
t.Fatal(err)
}
Expand All @@ -135,8 +139,8 @@ func registerIndexBackfill(r registry.Registry) {
t.Fatal(err)
}

// XXX: Run the workload. Run index-backfills during.

// Run the foreground TPC-E workload. Run a large index backfill
// while it's running.
runTPCE(ctx, t, c, tpceOptions{
start: func(ctx context.Context, t test.Test, c cluster.Cluster) {
settings := install.MakeClusterSettings(install.NumRacksOption(crdbNodes))
Expand All @@ -145,17 +149,39 @@ func registerIndexBackfill(r registry.Registry) {
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), settings, c.Node(i))
}
},
customers: 1_000,
customers: 100_000,
activeCustomers: 20_000,
threads: 400,
skipCleanup: true,
ssds: 1,
setupType: usingExistingTPCEData,
nodes: clusterSpec.NodeCount - 1,
owner: registry.OwnerAdmissionControl,
cpus: clusterSpec.CPUs,
prometheusConfig: promCfg,
timeout: 4 * time.Hour,
during: func(ctx context.Context) error {
return nil // XXX: run index backfills
duration := 5 * time.Minute
t.Status(fmt.Sprintf("recording baseline performance (<%s)", duration))
time.Sleep(duration)

// Choose an index creation that takes ~10-12 minutes.
t.Status(fmt.Sprintf("starting index creation (<%s)", duration*6))

db := c.Conn(ctx, t.L(), 1)
defer db.Close()

if _, err := db.ExecContext(ctx,
fmt.Sprintf("CREATE INDEX index_%s ON tpce.cash_transaction (ct_dts)",
time.Now().Format("20060102_T150405"),
),
); err != nil {
t.Fatalf("failed to create index: %v", err)
}

t.Status("index creation complete, waiting for workload to finish")
return nil
},
workloadDuration: time.Hour,
})
},
})
Expand Down
45 changes: 29 additions & 16 deletions pkg/cmd/roachtest/tests/tpce.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,21 @@ type tpceSpec struct {
}

type tpceCmdOptions struct {
customers int
racks int
duration time.Duration
threads int
customers int
activeCustomers int
racks int
duration time.Duration
threads int
skipCleanup bool
}

func (to tpceCmdOptions) AddCommandOptions(cmd *roachtestutil.Command) {
cmd.MaybeFlag(to.customers != 0, "customers", to.customers)
cmd.MaybeFlag(to.activeCustomers != 0, "active-customers", to.activeCustomers)
cmd.MaybeFlag(to.racks != 0, "racks", to.racks)
cmd.MaybeFlag(to.duration != 0, "duration", to.duration)
cmd.MaybeFlag(to.threads != 0, "threads", to.threads)
cmd.MaybeFlag(to.skipCleanup, "skip-cleanup", "")
}

func initTPCESpec(
Expand Down Expand Up @@ -116,6 +120,10 @@ type tpceOptions struct {
disablePrometheus bool // forces prometheus to not start up
onlySetup bool
during func(ctx context.Context) error
workloadDuration time.Duration
activeCustomers int
threads int
skipCleanup bool
}
type tpceSetupType int

Expand All @@ -137,10 +145,6 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio
startOpts := option.DefaultStartOpts()
startOpts.RoachprodOpts.StoreCount = opts.ssds
settings := install.MakeClusterSettings(install.NumRacksOption(racks))
if c.IsLocal() { // XXX: Does local make sense?
settings.Env = append(settings.Env, "COCKROACH_SCAN_INTERVAL=200ms")
settings.Env = append(settings.Env, "COCKROACH_SCAN_MAX_IDLE_TIME=5ms")
}
c.Start(ctx, t.L(), startOpts, settings, crdbNodes)
}
}
Expand All @@ -165,11 +169,6 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio
}
}

if c.IsLocal() {
opts.customers = 10
opts.timeout = 5 * time.Minute
}

if !opts.disablePrometheus {
// TODO(irfansharif): Move this after the import step? The statistics
// during import itself is uninteresting and pollutes actual workload
Expand Down Expand Up @@ -204,12 +203,26 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio
m := c.NewMonitor(ctx, crdbNodes)
m.Go(func(ctx context.Context) error {
t.Status("running workload")
result, err := tpceSpec.run(ctx, t, c, tpceCmdOptions{
workloadDuration := opts.workloadDuration
if workloadDuration == 0 {
workloadDuration = 2 * time.Hour
}
runOptions := tpceCmdOptions{
customers: opts.customers,
racks: racks,
duration: 2 * time.Hour,
duration: workloadDuration,
threads: opts.nodes * opts.cpus,
})
}
if opts.activeCustomers != 0 {
runOptions.activeCustomers = opts.activeCustomers
}
if opts.threads != 0 {
runOptions.threads = opts.threads
}
if opts.skipCleanup {
runOptions.skipCleanup = opts.skipCleanup
}
result, err := tpceSpec.run(ctx, t, c, runOptions)
if err != nil {
t.Fatal(err.Error())
}
Expand Down

0 comments on commit 7b176c1

Please sign in to comment.