From 8253aea0546dfa9c69ee060c59ff32525a946e2a Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Wed, 12 Jul 2023 11:02:17 -0400 Subject: [PATCH] vfs: Deflake TestDiskHealthChecking_Filesystem_{Create,Close} Previously we were relying on sleeps and timing-based ways of synchronization between observing a stall in the disk health checking goroutine and confirming for it in the test code itself. This change adds a more direct synchronization between the two events through the use of a channel, to deflake both tests. Furthermore, the TestDiskHealthChecking_Filesystem_Close test was previously doing a relatively thread-unsafe use of a map, which increased the chances of a flake. Fixes #1718. --- vfs/disk_health_test.go | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/vfs/disk_health_test.go b/vfs/disk_health_test.go index a3246ad062..37968df032 100644 --- a/vfs/disk_health_test.go +++ b/vfs/disk_health_test.go @@ -493,11 +493,17 @@ func TestDiskHealthChecking_Filesystem(t *testing.T) { // Wrap with disk-health checking, counting each stall via stallCount. var expectedOpType OpType var stallCount atomic.Uint64 + onStall := make(chan struct{}, 10) + var lastOpType OpType fs, closer := WithDiskHealthChecks(filesystemOpsMockFS(sleepDur), stallThreshold, func(info DiskSlowInfo) { require.Equal(t, 0, info.WriteSize) require.Equal(t, expectedOpType, info.OpType) stallCount.Add(1) + if lastOpType != info.OpType { + lastOpType = info.OpType + onStall <- struct{}{} + } }) defer closer.Close() fs.(*diskHealthCheckingFS).tickInterval = 5 * time.Millisecond @@ -507,6 +513,11 @@ func TestDiskHealthChecking_Filesystem(t *testing.T) { expectedOpType = o.opType before := stallCount.Load() o.f() + select { + case <-onStall: + case <-time.After(10 * stallThreshold): + t.Fatal("timed out waiting for stall") + } after := stallCount.Load() require.Greater(t, int(after-before), 0) }) @@ -528,12 +539,18 @@ func TestDiskHealthChecking_Filesystem_Close(t *testing.T) { }, } - stalled := map[string]time.Duration{} + files := []string{"foo", "bar", "bax"} + var lastPath string + stalled := make(chan string, len(files)*3) fs, closer := WithDiskHealthChecks(mockFS, stallThreshold, - func(info DiskSlowInfo) { stalled[info.Path] = info.Duration }) + func(info DiskSlowInfo) { + if lastPath != info.Path { + lastPath = info.Path + stalled <- info.Path + } + }) fs.(*diskHealthCheckingFS).tickInterval = 5 * time.Millisecond - files := []string{"foo", "bar", "bax"} for _, filename := range files { // Create will stall, and the detector should write to the stalled map // with the filename. @@ -542,6 +559,11 @@ func TestDiskHealthChecking_Filesystem_Close(t *testing.T) { // exit, but the fs should still be usable and should still detect // subsequent stalls on the next iteration. require.NoError(t, closer.Close()) - require.Contains(t, stalled, filename) + select { + case stalledPath := <-stalled: + require.Equal(t, filename, stalledPath) + case <-time.After(10 * stallThreshold): + t.Fatalf("timed out waiting for stall") + } } }