From 1ca36292825fd4686931370a87b7c368ebab82dd Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 8 Mar 2019 17:44:54 +0100 Subject: [PATCH] sqlbase: avoid a race in sqlbase.CancelChecker Prior to the distsql-ification of planNode execution, all the execution steps in a local query were interleaved (using coroutine-style concurrency), so that the calls to `(*CancelChecker).Check()` were all sequential. With distsql now it's possible for different planNode `Next()` or `startExec()` methods to be running in concurrent goroutines on multiple cores, i.e. truly parallel. This in turn requires atomic access to the counter in the cancel checker. Without atomic access, there is a race condition and the possibility that the cancel checker does not work well (some increments can be performed two times, which could cause the condition of the check to occasionally fail). Found with SQLSmith. Release note: None --- pkg/sql/sqlbase/cancel_checker.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/sql/sqlbase/cancel_checker.go b/pkg/sql/sqlbase/cancel_checker.go index 4e0c97fa27ac..e2f7ee9dc9ef 100644 --- a/pkg/sql/sqlbase/cancel_checker.go +++ b/pkg/sql/sqlbase/cancel_checker.go @@ -14,7 +14,10 @@ package sqlbase -import "context" +import ( + "context" + "sync/atomic" +) // CancelChecker is a helper object for repeatedly checking whether the associated context // has been canceled or not. Encapsulates all logic for waiting for cancelCheckInterval @@ -25,7 +28,7 @@ type CancelChecker struct { ctx context.Context // Number of times Check() has been called since last context cancellation check. - callsSinceLastCheck int32 + callsSinceLastCheck uint32 } // NewCancelChecker returns a new CancelChecker. @@ -42,7 +45,7 @@ func (c *CancelChecker) Check() error { // bitwise AND instead of division. const cancelCheckInterval = 1024 - if c.callsSinceLastCheck%cancelCheckInterval == 0 { + if atomic.LoadUint32(&c.callsSinceLastCheck)%cancelCheckInterval == 0 { select { case <-c.ctx.Done(): // Once the context is canceled, we no longer increment @@ -52,7 +55,10 @@ func (c *CancelChecker) Check() error { default: } } - c.callsSinceLastCheck++ + + // Increment. This may rollover when the 32-bit capacity is reached, + // but that's all right. + atomic.AddUint32(&c.callsSinceLastCheck, 1) return nil }