From 704e6e958103f7575dc64dc656408c60170fe197 Mon Sep 17 00:00:00 2001 From: Andrew Baptist Date: Thu, 3 Aug 2023 10:56:03 -0400 Subject: [PATCH] roachtest: don't reuse clusters that call dmsetup Certain tests need to modify the blockdevice and they are prone to failures during setup that the device is still busy. Ideally we would figure out what is still holding onto the dish handle, but it is safer to simply not reuse clusters that perform this by adding `spec.ReuseNone()` Fixes: #107865 Epic: none Release note: None --- pkg/cmd/roachtest/tests/failover.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index 2f428d2c30c0..ee7887a500e7 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -80,7 +80,7 @@ func registerFailover(r registry.Registry) { Owner: registry.OwnerKV, Benchmark: true, Timeout: 60 * time.Minute, - Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false)), // uses disk stalls + Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false), spec.ReuseNone()), // uses disk stalls Leases: leases, SkipPostValidations: registry.PostValidationNoDeadNodes, // cleanup kills nodes Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { @@ -122,12 +122,17 @@ func registerFailover(r registry.Registry) { for _, failureMode := range allFailureModes { failureMode := failureMode // pin loop variable - var usePD bool + clusterOpts := make([]spec.Option, 0) + clusterOpts = append(clusterOpts, spec.CPU(2)) + var postValidation registry.PostValidation if failureMode == failureModeDiskStall { // Use PDs in an attempt to work around flakes encountered when using // SSDs. See #97968. - usePD = true + clusterOpts = append(clusterOpts, spec.PreferLocalSSD(false)) + // Don't reuse the cluster for tests that call dmsetup to avoid + // spurious flakes from previous runs. See #107865 + clusterOpts = append(clusterOpts, spec.ReuseNone()) postValidation = registry.PostValidationNoDeadNodes } r.Add(registry.TestSpec{ @@ -136,7 +141,7 @@ func registerFailover(r registry.Registry) { Benchmark: true, Timeout: 30 * time.Minute, SkipPostValidations: postValidation, - Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(7, clusterOpts...), Leases: leases, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverNonSystem(ctx, t, c, failureMode) @@ -149,7 +154,7 @@ func registerFailover(r registry.Registry) { Benchmark: true, Timeout: 30 * time.Minute, SkipPostValidations: postValidation, - Cluster: r.MakeClusterSpec(5, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(5, clusterOpts...), Leases: leases, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverLiveness(ctx, t, c, failureMode) @@ -162,7 +167,7 @@ func registerFailover(r registry.Registry) { Benchmark: true, Timeout: 30 * time.Minute, SkipPostValidations: postValidation, - Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)), + Cluster: r.MakeClusterSpec(7, clusterOpts...), Leases: leases, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runFailoverSystemNonLiveness(ctx, t, c, failureMode)