From f2ca885177d798dfb8b370a32b0784e856614d65 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Wed, 3 Jul 2024 08:53:04 -0300
Subject: [PATCH] bbr: Fix test flakes

This fixes test flakes that happen due to timing of clock actions.

The tests involve multiple goroutines that access the current time of
the clock, but it may happen that an Advance() call in the test happens
before a read of the clock time to create a new timer, thus causing a
test flake.

This issue is fixed by adding appropriate synchronization.
---
 exp/clock/clock.go           | 10 ++++++++++
 flowcontrol/bbr/util_test.go | 31 ++++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/exp/clock/clock.go b/exp/clock/clock.go
index 9288d0a4..643f3b2d 100644
--- a/exp/clock/clock.go
+++ b/exp/clock/clock.go
@@ -91,6 +91,7 @@ func (m *Manual) NewTimer(d time.Duration) Timer {
 
 // Advance advances the clock forward by the given duration.
 func (m *Manual) Advance(d time.Duration) {
+	triggeredTimer := false
 	syncutil.With(&m.mu, func() {
 		before := m.now
 		m.now = before.Add(d)
@@ -100,9 +101,18 @@ func (m *Manual) Advance(d time.Duration) {
 
 			if before.Before(t.deadline) && !m.now.Before(t.deadline) {
 				t.ch <- m.now
+				triggeredTimer = true
 			}
 		}
 	})
+
+	// Before returning, give a chance for any timer handlers to run. This
+	// helps avoid test flakes when 2 .Advance() calls happen back-to-back
+	// before timer handlers had a chance to run and update their
+	// deadlines.
+	if triggeredTimer {
+		time.Sleep(50 * time.Millisecond)
+	}
 }
 
 type manualTimer struct {
diff --git a/flowcontrol/bbr/util_test.go b/flowcontrol/bbr/util_test.go
index 0ba4fce4..710139d9 100644
--- a/flowcontrol/bbr/util_test.go
+++ b/flowcontrol/bbr/util_test.go
@@ -43,15 +43,21 @@ func startTestPath(ctx context.Context, clock clock.Clock, links ...testLink) (*
 		rx := &q.Rx
 		q = mpsc.New[testPacket]()
 		tx := &q.Tx
-		go l.run(ctx, clock, rx, tx)
+		ready := make(chan struct{})
+		go l.run(ctx, clock, rx, tx, ready)
+
+		// Wait until the link is ready to receive (avoids races with
+		// the test clock).
+		<-ready
 	}
 
 	return initTx, &q.Rx
 }
 
-func (l testLink) run(ctx context.Context, clock clock.Clock, rx *mpsc.Rx[testPacket], tx *mpsc.Tx[testPacket]) {
+func (l testLink) run(ctx context.Context, clock clock.Clock, rx *mpsc.Rx[testPacket], tx *mpsc.Tx[testPacket], ready chan struct{}) {
 	timer := clock.NewTimer(0)
 	<-timer.Chan()
+	close(ready)
 	for {
 
 		// Wait for a packet to arrive:
@@ -95,6 +101,17 @@ func assertNotDone(t *testing.T, done <-chan struct{}) {
 	}
 }
 
+// assertDone asserts that the done channel is written to at a reasonable time
+// or else it fails the test.
+func assertDone(t *testing.T, done <-chan struct{}) {
+	t.Helper()
+	select {
+	case <-done:
+	case <-time.After(10 * time.Second):
+		t.Fatal("Not done before timeout")
+	}
+}
+
 // Try sending a packet through a path with one link of a given known delay;
 // make sure it arrives at the correct time.
 func TestLinkDelay(t *testing.T) {
@@ -119,7 +136,7 @@ func TestLinkDelay(t *testing.T) {
 	assertNotDone(t, done)
 
 	clock.Advance(3 * time.Second) // This puts us at t = 5, after our delay.
-	<-done                         // would deadlock if the packet still did't send.
+	assertDone(t, done)            // would deadlock if the packet still did't send.
 }
 
 // Try sending a packet through a path with two links of known delay;
@@ -152,7 +169,7 @@ func TestMultiLinkDelay(t *testing.T) {
 	assertNotDone(t, done)         // ...but it still needs to get through the second link.
 
 	clock.Advance(2 * time.Second) // This should get us all the way to the end.
-	<-done                         // would deadlock if the packet still did't send.
+	assertDone(t, done)            // would deadlock if the packet still did't send.
 }
 
 const (
@@ -185,7 +202,7 @@ func TestLinkBandwidth(t *testing.T) {
 	clock.Advance(2 * time.Second)
 	assertNotDone(t, done)
 	clock.Advance(1 * time.Second)
-	<-done
+	assertDone(t, done)
 }
 
 func TestLinkBandwidthMultiPacket(t *testing.T) {
@@ -224,9 +241,9 @@ func TestLinkBandwidthMultiPacket(t *testing.T) {
 	assertNotDone(t, done2)
 
 	clock.Advance(1 * time.Second)
-	<-done1
+	assertDone(t, done1)
 	assertNotDone(t, done2)
 
 	clock.Advance(3 * time.Second)
-	<-done2
+	assertDone(t, done2)
 }