Skip to content

Commit

Permalink
kvprober: make the planner thread-safe to fix test-only race
Browse files Browse the repository at this point in the history
This commit makes the planner thread-safe, in order to fix a test-only data
race. This commit also un-skips the test that was skipped, when this bug was
discovered.

Fixes: #109564
Release note: None.
  • Loading branch information
joshimhoff committed Sep 7, 2023
1 parent 5dd3ab1 commit 8bf46dc
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
1 change: 0 additions & 1 deletion pkg/kv/kvprober/kvprober_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ func TestProberDoesReadsAndWrites(t *testing.T) {
defer log.Scope(t).Close(t)

skip.UnderShort(t)
skip.WithIssue(t, 109564)

ctx := context.Background()

Expand Down
15 changes: 13 additions & 2 deletions pkg/kv/kvprober/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)
Expand All @@ -34,8 +35,6 @@ type Step struct {

// planner abstracts deciding on ranges to probe. It is public to integration
// test from kvprober_test.
//
// planner is NOT thread-safe.
type planner interface {
// Next returns a Step for the prober to execute on. A Step is a decision on
// on what range to probe next. Executing on a series of Steps returned by
Expand All @@ -54,6 +53,14 @@ type planner interface {
type meta2Planner struct {
db *kv.DB
settings *cluster.Settings

// The production kvprober as of Sep 7th 2023 does not need this mutex, as
// there is one planner per probe loop we run. At the same time, making the
// planner thread-safe makes for easier testing in kvprober_integration_test.go.
// Given that in this context there is no meaningful performance cost to
// synchronizing with a mutex, it makes sense to do it for the test use case
// specifically.
mu syncutil.Mutex
// cursor points to a key in meta2 at which scanning should resume when new
// plans are needed.
cursor roachpb.Key
Expand Down Expand Up @@ -148,6 +155,10 @@ func newMeta2Planner(
// number, we pay a higher CPU cost less often; if it's set to a low number,
// we pay a smaller CPU cost more often.
func (p *meta2Planner) next(ctx context.Context) (Step, error) {
// See comment where mu is defined.
p.mu.Lock()
defer p.mu.Unlock()

if len(p.plan) == 0 {
// Protect CRDB from planning executing too often, due to either issues
// with CRDB (meta2 unavailability) or bugs in kvprober.
Expand Down

0 comments on commit 8bf46dc

Please sign in to comment.