Skip to content

Commit

Permalink
roachtest: don't reuse clusters that call dmsetup
Browse files Browse the repository at this point in the history
Certain tests need to modify the blockdevice and they are prone to
failures during setup that the device is still busy. Ideally we would
figure out what is still holding onto the dish handle, but it is safer
to simply not reuse clusters that perform this by adding
`spec.ReuseNone()`

Fixes: cockroachdb#107865
Epic: none

Release note: None
  • Loading branch information
andrewbaptist committed Aug 4, 2023
1 parent 9355e10 commit 704e6e9
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions pkg/cmd/roachtest/tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func registerFailover(r registry.Registry) {
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 60 * time.Minute,
Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false)), // uses disk stalls
Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false), spec.ReuseNone()), // uses disk stalls
Leases: leases,
SkipPostValidations: registry.PostValidationNoDeadNodes, // cleanup kills nodes
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down Expand Up @@ -122,12 +122,17 @@ func registerFailover(r registry.Registry) {
for _, failureMode := range allFailureModes {
failureMode := failureMode // pin loop variable

var usePD bool
clusterOpts := make([]spec.Option, 0)
clusterOpts = append(clusterOpts, spec.CPU(2))

var postValidation registry.PostValidation
if failureMode == failureModeDiskStall {
// Use PDs in an attempt to work around flakes encountered when using
// SSDs. See #97968.
usePD = true
clusterOpts = append(clusterOpts, spec.PreferLocalSSD(false))
// Don't reuse the cluster for tests that call dmsetup to avoid
// spurious flakes from previous runs. See #107865
clusterOpts = append(clusterOpts, spec.ReuseNone())
postValidation = registry.PostValidationNoDeadNodes
}
r.Add(registry.TestSpec{
Expand All @@ -136,7 +141,7 @@ func registerFailover(r registry.Registry) {
Benchmark: true,
Timeout: 30 * time.Minute,
SkipPostValidations: postValidation,
Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
Cluster: r.MakeClusterSpec(7, clusterOpts...),
Leases: leases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runFailoverNonSystem(ctx, t, c, failureMode)
Expand All @@ -149,7 +154,7 @@ func registerFailover(r registry.Registry) {
Benchmark: true,
Timeout: 30 * time.Minute,
SkipPostValidations: postValidation,
Cluster: r.MakeClusterSpec(5, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
Cluster: r.MakeClusterSpec(5, clusterOpts...),
Leases: leases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runFailoverLiveness(ctx, t, c, failureMode)
Expand All @@ -162,7 +167,7 @@ func registerFailover(r registry.Registry) {
Benchmark: true,
Timeout: 30 * time.Minute,
SkipPostValidations: postValidation,
Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
Cluster: r.MakeClusterSpec(7, clusterOpts...),
Leases: leases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runFailoverSystemNonLiveness(ctx, t, c, failureMode)
Expand Down

0 comments on commit 704e6e9

Please sign in to comment.