From 3ac7f3cb6640505c2277703b96093d85e6e166c2 Mon Sep 17 00:00:00 2001 From: Matt Jibson Date: Mon, 11 Dec 2017 16:57:28 -0500 Subject: [PATCH] sql/jobs: fix races in TestRegistryResumeExpiredLease Release note: None Sometimes jobs will resume more times than usual. Move lock and unlocking to places to prevent deadlocks. Move the done close to a place that will allow all jobs to proceed without receiving from resumeCalled. Fixes #20623 --- pkg/sql/jobs/registry_external_test.go | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pkg/sql/jobs/registry_external_test.go b/pkg/sql/jobs/registry_external_test.go index fcb89dcae695..2552d096107e 100644 --- a/pkg/sql/jobs/registry_external_test.go +++ b/pkg/sql/jobs/registry_external_test.go @@ -118,7 +118,6 @@ func TestRegistryResumeExpiredLease(t *testing.T) { resumeCounts := make(map[int64]int) // done prevents jobs from finishing. done := make(chan struct{}) - defer close(done) // resumeCalled does a locked, blocking send when a job is started/resumed. A // receive on it will block until a job is running. resumeCalled := make(chan struct{}) @@ -128,11 +127,11 @@ func TestRegistryResumeExpiredLease(t *testing.T) { hookCallCount++ lock.Unlock() return jobs.FakeResumer{OnResume: func(job *jobs.Job) error { - lock.Lock() select { case resumeCalled <- struct{}{}: case <-done: } + lock.Lock() resumeCounts[*job.ID()]++ lock.Unlock() <-done @@ -175,31 +174,35 @@ func TestRegistryResumeExpiredLease(t *testing.T) { return nil }) - lock.Lock() - if e, a := 2, resumeCounts[jobMap[1]]; e != a { - t.Fatalf("expected resumeCount to be %d, but got %d", e, a) - } - lock.Unlock() + testutils.SucceedsSoon(t, func() error { + lock.Lock() + defer lock.Unlock() + if e, a := 2, resumeCounts[jobMap[1]]; e != a { + return errors.Errorf("expected resumeCount to be %d, but got %d", e, a) + } + return nil + }) nodeLiveness.FakeIncrementEpoch(3) <-resumeCalled + close(done) testutils.SucceedsSoon(t, func() error { lock.Lock() - if e, a := 2, resumeCounts[jobMap[3]]; e != a { - return errors.Errorf("expected resumeCount to be %d, but got %d", e, a) + defer lock.Unlock() + if e, a := 2, resumeCounts[jobMap[3]]; e > a { + return errors.Errorf("expected resumeCount to be > %d, but got %d", e, a) } - if e, a := 1, resumeCounts[jobMap[2]]; e != a { - return errors.Errorf("expected resumeCount to be %d, but got %d", e, a) + if e, a := 1, resumeCounts[jobMap[2]]; e > a { + return errors.Errorf("expected resumeCount to be > %d, but got %d", e, a) } count := 0 for _, ct := range resumeCounts { count += ct } - if e, a := 5, count; e != a { - return errors.Errorf("expected total jobs to be %d, but got %d", e, a) + if e, a := 5, count; e > a { + return errors.Errorf("expected total jobs to be > %d, but got %d", e, a) } - lock.Unlock() return nil }) }