From fe2156eb9aaafd418cf282de7f69611fe662a067 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Wed, 9 Jan 2019 15:45:46 +0100 Subject: [PATCH] storage: deflake TestStoreRangeRemoveDead This took me longer to track down than I'm comfortable admitting. The test wasn't properly checking for full upreplication because it was only looking at the first range. It still is, but that's now all the ranges -- it's finicky to upgrade the check. Fixes #33497. Closes #33486. Release note: None --- pkg/storage/client_raft_test.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/pkg/storage/client_raft_test.go b/pkg/storage/client_raft_test.go index ca319f86301e..d3ff034f48bf 100644 --- a/pkg/storage/client_raft_test.go +++ b/pkg/storage/client_raft_test.go @@ -3012,7 +3012,12 @@ func TestStoreRangeRemoveDead(t *testing.T) { defer leaktest.AfterTest(t)() sc := storage.TestStoreConfig(nil) sc.TestingKnobs.DisableReplicaRebalancing = true - mtc := &multiTestContext{storeConfig: &sc} + mtc := &multiTestContext{ + storeConfig: &sc, + // This test needs to verify full replication, but as written the check + // only works if there's only one range initially. + startWithSingleRange: true, + } storage.TimeUntilStoreDead.Override(&sc.Settings.SV, storage.TestTimeUntilStoreDead) // Disable declined and failed reservations. Their default values are on @@ -3066,13 +3071,26 @@ func TestStoreRangeRemoveDead(t *testing.T) { panic(err) } } + + case <-stopper.ShouldQuiesce(): + return + } + } + }) + stopper.RunWorker(ctx, func(ctx context.Context) { + tickerDur := storage.TestTimeUntilStoreDead / 2 + ticker := time.NewTicker(tickerDur) + defer ticker.Stop() + + for { + select { + case <-ticker.C: // Force the repair queues on all alive stores to run. for _, s := range nonDeadStores { if err := s.ForceReplicationScanAndProcess(); err != nil { panic(err) } } - case <-stopper.ShouldQuiesce(): return } @@ -3081,6 +3099,8 @@ func TestStoreRangeRemoveDead(t *testing.T) { // Wait for up-replication, including at least one replica on s2 (deadStore). testutils.SucceedsSoon(t, func() error { + // NB: this only checks the first range, so it'll give false positives + // if there's more than one (see the note at the beginning of the test). replicas := getRangeMetadata(roachpb.RKeyMin, mtc, t).Replicas var found bool for _, replica := range replicas {