From 099bd82c4aac266d53ee3a22d73b0212eb380a7c Mon Sep 17 00:00:00 2001 From: Andrew Baptist Date: Thu, 3 Aug 2023 10:56:03 -0400 Subject: [PATCH] roachtest: don't reuse clusters that call dmsetup Certain tests need to modify the blockdevice and they are prone to failures during setup that the device is still busy. Ideally we would figure out what is still holding onto the dish handle, but it is safer to simply not reuse clusters that perform this by adding `spec.ReuseNone()` Fixes: #107865 Epic: none Release note: None --- pkg/cmd/roachtest/tests/failover.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index dc47bd3da6ee..38bdceca3737 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -76,7 +76,7 @@ func registerFailover(r registry.Registry) { Name: "failover/chaos" + suffix, Owner: registry.OwnerKV, Timeout: 60 * time.Minute, - Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false)), + Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false), spec.ReuseNone()), // uses disk stalls Leases: leases, SkipPostValidations: registry.PostValidationNoDeadNodes, // cleanup kills nodes Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { @@ -118,14 +118,17 @@ func registerFailover(r registry.Registry) { for _, failureMode := range allFailureModes { failureMode := failureMode // pin loop variable - var usePD bool + clusterOpts := make([]spec.Option, 0) + clusterOpts = append(clusterOpts, spec.CPU(2)) + + var postValidation registry.PostValidation if failureMode == failureModeDiskStall { // Use PDs in an attempt to work around flakes encountered when using // SSDs. See #97968. - usePD = true - } - var postValidation registry.PostValidation = 0 - if failureMode == failureModeDiskStall { + clusterOpts = append(clusterOpts, spec.PreferLocalSSD(false)) + // Don't reuse the cluster for tests that call dmsetup to avoid + // spurious flakes from previous runs. See #107865 + clusterOpts = append(clusterOpts, spec.ReuseNone()) postValidation = registry.PostValidationNoDeadNodes } r.Add(registry.TestSpec{ @@ -133,7 +136,7 @@ func registerFailover(r registry.Registry) { Owner: registry.OwnerKV, Benchmark: true, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(7, clusterOpts...), Leases: leases, SkipPostValidations: postValidation, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { @@ -146,7 +149,7 @@ func registerFailover(r registry.Registry) { Tags: []string{"weekly"}, Benchmark: true, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(5, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(5, clusterOpts...), Leases: leases, SkipPostValidations: postValidation, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { @@ -159,7 +162,7 @@ func registerFailover(r registry.Registry) { Tags: []string{"weekly"}, Benchmark: true, Timeout: 30 * time.Minute, - Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(7, clusterOpts...), Leases: leases, SkipPostValidations: postValidation, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {