cockroachdb · craig · Nov 19, 2024 · Nov 15, 2024
@@ -51,6 +51,12 @@ import (
 	"github.com/petermattis/goid"
 )
 
+func init() {
+	pollPreemptionInterval.Lock()
+	defer pollPreemptionInterval.Unlock()
+	pollPreemptionInterval.interval = 5 * time.Minute
+}
+
 var (
 	errTestsFailed = fmt.Errorf("some tests failed")
 
@@ -2132,20 +2138,29 @@ var getPreemptedVMsHook = func(c cluster.Cluster, ctx context.Context, l *logger
 	return c.GetPreemptedVMs(ctx, l)
 }
 
-// pollPreemptionInterval is how often to poll for preempted VMs.
-var pollPreemptionInterval = 5 * time.Minute
+// pollPreemptionInterval is how often to poll for preempted VMs. We use a
+// mutex protected struct to allow for unit tests to safely modify it.
+// Interval defaults to 5 minutes if not set.
+var pollPreemptionInterval struct {
+	syncutil.Mutex
+	interval time.Duration
+}
 
 func monitorForPreemptedVMs(ctx context.Context, t test.Test, c cluster.Cluster, l *logger.Logger) {
 	if c.IsLocal() || !c.Spec().UseSpotVMs {
 		return
 	}
 
+	pollPreemptionInterval.Lock()
+	defer pollPreemptionInterval.Unlock()
+	interval := pollPreemptionInterval.interval
+
 	go func() {
 		for {
 			select {
 			case <-ctx.Done():
 				return
-			case <-time.After(pollPreemptionInterval):
+			case <-time.After(interval):
 				preemptedVMs, err := getPreemptedVMsHook(c, ctx, l)
 				if err != nil {
 					l.Printf("WARN: monitorForPreemptedVMs: failed to check preempted VMs:\n%+v", err)

@@ -719,6 +719,12 @@ func TestVMPreemptionPolling(t *testing.T) {
 		},
 	}
 
+	setPollPreemptionInterval := func(interval time.Duration) {
+		pollPreemptionInterval.Lock()
+		defer pollPreemptionInterval.Unlock()
+		pollPreemptionInterval.interval = interval
+	}
+
 	getPreemptedVMsHook = func(c cluster.Cluster, ctx context.Context, l *logger.Logger) ([]vm.PreemptedVM, error) {
 		preemptedVMs := []vm.PreemptedVM{{
 			Name:        "test_node",
@@ -731,13 +737,13 @@ func TestVMPreemptionPolling(t *testing.T) {
 		getPreemptedVMsHook = func(c cluster.Cluster, ctx context.Context, l *logger.Logger) ([]vm.PreemptedVM, error) {
 			return c.GetPreemptedVMs(ctx, l)
 		}
-		pollPreemptionInterval = 5 * time.Minute
+		setPollPreemptionInterval(5 * time.Minute)
 	}()
 
 	// Test that if a VM is preempted, the VM preemption monitor will catch
 	// it and cancel the test before it times out.
 	t.Run("polling cancels test", func(t *testing.T) {
-		pollPreemptionInterval = 50 * time.Millisecond
+		setPollPreemptionInterval(50 * time.Millisecond)
 
 		err := runner.Run(ctx, []registry.TestSpec{mockTest}, 1, /* count */
 			defaultParallelism, copt, testOpts{}, lopt)
@@ -750,7 +756,7 @@ func TestVMPreemptionPolling(t *testing.T) {
 	// test finished first, the post failure checks will check again and mark it as a flake.
 	t.Run("polling doesn't catch preemption", func(t *testing.T) {
 		// Set the interval very high so we don't poll for preemptions.
-		pollPreemptionInterval = 1 * time.Hour
+		setPollPreemptionInterval(1 * time.Hour)
 
 		mockTest.Run = func(ctx context.Context, t test.Test, c cluster.Cluster) {
 			t.Error("Should be ignored")