From 7fe8580da5233e88d4b14771ea246c324284d618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 2 Sep 2020 16:40:43 +0200 Subject: [PATCH 1/2] sealing sched: Fix deadlock between worker.wndLk / workersLk --- extern/sector-storage/sched.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 93ba89aceed..2ebe8c73762 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -563,6 +563,7 @@ func (sh *scheduler) runWorker(wid WorkerID) { return } + sh.workersLk.RLock() worker.wndLk.Lock() windowsRequested -= sh.workerCompactWindows(worker, wid) @@ -574,8 +575,6 @@ func (sh *scheduler) runWorker(wid WorkerID) { // process tasks within a window, preferring tasks at lower indexes for len(firstWindow.todo) > 0 { - sh.workersLk.RLock() - tidx := -1 worker.lk.Lock() @@ -589,7 +588,6 @@ func (sh *scheduler) runWorker(wid WorkerID) { worker.lk.Unlock() if tidx == -1 { - sh.workersLk.RUnlock() break assignLoop } @@ -597,7 +595,6 @@ func (sh *scheduler) runWorker(wid WorkerID) { log.Debugf("assign worker sector %d", todo.sector.Number) err := sh.assignWorker(taskDone, wid, worker, todo) - sh.workersLk.RUnlock() if err != nil { log.Error("assignWorker error: %+v", err) @@ -618,6 +615,7 @@ func (sh *scheduler) runWorker(wid WorkerID) { } worker.wndLk.Unlock() + sh.workersLk.RUnlock() } }() } From 5a2b4397733cd70652da136e9494aac96d4a822a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 2 Sep 2020 17:37:19 +0200 Subject: [PATCH 2/2] sched: Fix tests --- extern/sector-storage/sched.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 2ebe8c73762..831a2615fed 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -774,14 +774,19 @@ func (sh *scheduler) dropWorker(wid WorkerID) { } func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { - if !w.cleanupStarted { + select { + case <-w.closingMgr: + default: close(w.closingMgr) } + + sh.workersLk.Unlock() select { case <-w.closedMgr: case <-time.After(time.Second): log.Errorf("timeout closing worker manager goroutine %d", wid) } + sh.workersLk.Lock() if !w.cleanupStarted { w.cleanupStarted = true