From f30c4f4427f990bbabb06929f714c33016ed0c03 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 20 Jun 2023 14:31:05 +0000 Subject: [PATCH] roachtest: let `failover` clusters recover Previously, `failover` tests would begin teardown as soon as the last node was recovered. However, it didn't actually give the node time to recover. This could cause problems with post-test assertions, e.g. if replica circuit breakers were still tripped. We'd also like to get proper data for the last failure. This patch adds a 1 minute wait after recovering the final node, allowing the cluster to recover. Epic: none Release note: None --- pkg/cmd/roachtest/tests/failover.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index b774ce4e0390..3b9f41a251dd 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -321,6 +321,8 @@ func runFailoverChaos(ctx context.Context, t test.Test, c cluster.Cluster, readO failer.Recover(ctx, node) } } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait() @@ -464,6 +466,8 @@ func runFailoverPartialLeaseGateway(ctx context.Context, t test.Test, c cluster. } } } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait() @@ -597,6 +601,8 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C failer.Recover(ctx, node) } } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait() @@ -711,6 +717,8 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster failer.Recover(ctx, node) } } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait() @@ -931,6 +939,8 @@ func runFailoverLiveness( failer.Recover(ctx, 4) relocateLeases(t, ctx, conn, `range_id = 2`, 4) } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait() @@ -1046,6 +1056,8 @@ func runFailoverSystemNonLiveness( failer.Recover(ctx, node) } } + + sleepFor(ctx, t, time.Minute) // let cluster recover return nil }) m.Wait()