From 24d115e907a772effccfbb10d8998c735275e94c Mon Sep 17 00:00:00 2001 From: Nick Travers Date: Wed, 29 Mar 2023 18:35:19 +0000 Subject: [PATCH] roachtest: use local SSDs for disk-stall failover tests The disk-stalled roachtests were updated in #99747 to use PDs in favor of local SSDs. This change broke the `failover/*/disk-stall` tests, which look for `/dev/sdb` on GCE (the device name used for GCE Persistent Disks), but the tests still create clusters with local SSDs (the roachtest default). Fix #99902. Fix #99926. Fix #99930. Touches #97968. Release note: None. --- pkg/cmd/roachtest/tests/disk_stall.go | 2 ++ pkg/cmd/roachtest/tests/failover.go | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index 0c0918c986e0..3318f017be91 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -47,6 +47,8 @@ func registerDiskStalledDetection(r registry.Registry) { } makeSpec := func() spec.ClusterSpec { s := r.MakeClusterSpec(4, spec.ReuseNone()) + // Use PDs in an attempt to work around flakes encountered when using SSDs. + // See #97968. s.PreferLocalSSD = false return s } diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index 5181a6e56552..5927035bccfd 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -45,11 +45,20 @@ func registerFailover(r registry.Registry) { failureModeDiskStall, } { failureMode := failureMode // pin loop variable + makeSpec := func(nNodes, nCPU int) spec.ClusterSpec { + s := r.MakeClusterSpec(nNodes, spec.CPU(nCPU)) + if failureMode == failureModeDiskStall { + // Use PDs in an attempt to work around flakes encountered when using + // SSDs. See #97968. + s.PreferLocalSSD = false + } + return s + } r.Add(registry.TestSpec{ Name: fmt.Sprintf("failover/non-system/%s", failureMode), Owner: registry.OwnerKV, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(7, spec.CPU(4)), + Cluster: makeSpec(7 /* nodes */, 4 /* cpus */), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverNonSystem(ctx, t, c, failureMode) }, @@ -58,7 +67,7 @@ func registerFailover(r registry.Registry) { Name: fmt.Sprintf("failover/liveness/%s", failureMode), Owner: registry.OwnerKV, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(5, spec.CPU(4)), + Cluster: makeSpec(5 /* nodes */, 4 /* cpus */), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverLiveness(ctx, t, c, failureMode) }, @@ -67,7 +76,7 @@ func registerFailover(r registry.Registry) { Name: fmt.Sprintf("failover/system-non-liveness/%s", failureMode), Owner: registry.OwnerKV, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(7, spec.CPU(4)), + Cluster: makeSpec(7 /* nodes */, 4 /* cpus */), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverSystemNonLiveness(ctx, t, c, failureMode) },