Skip to content

Commit

Permalink
kvserver: avoid load-based splitting between rows
Browse files Browse the repository at this point in the history
It was possible for a SQL row to be torn across two ranges due to the
load-based splitter not rejecting potentially unsafe split keys. It is
impossible to determine with just the sampled request keys, whether a
key is certainly unsafe or safe.

This commit side steps this problem by re-using the
`adminSplitWithDescriptor` command to the next real key, after the
provided `args.SplitKey`. This ensures that the split key will always be
a real key whilst not requiring any checks in the splitter itself.

As such, all safe split key checks are also removed from the `split`
pkg, with a warning added.

Resolves: cockroachdb#103483

Release note (bug fix): It was possible for a SQL row to be split across
two ranges. When this occurred, SQL queries could return unexpected
errors. This bug is resolved by these changes, as we now sample the real
keys, rather than just request keys to determine load-based split points.
  • Loading branch information
kvoli committed May 22, 2023
1 parent 72ae87f commit b341eb0
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 122 deletions.
54 changes: 50 additions & 4 deletions pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (r *Replica) AdminSplit(

err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error {
var err error
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason)
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason, false /* findNextSafeKey */)
return err
})
return reply, err
Expand Down Expand Up @@ -292,7 +292,6 @@ func splitTxnStickyUpdateAttempt(
// affirmative the descriptor is passed to AdminSplit, which performs a
// Conditional Put on the RangeDescriptor to ensure that no other operation has
// modified the range in the time the decision was being made.
// TODO(tschottdorf): should assert that split key is not a local key.
//
// See the comment on splitTrigger for details on the complexities.
func (r *Replica) adminSplitWithDescriptor(
Expand All @@ -301,6 +300,7 @@ func (r *Replica) adminSplitWithDescriptor(
desc *roachpb.RangeDescriptor,
delayable bool,
reason string,
findNextSafeKey bool,
) (kvpb.AdminSplitResponse, error) {
var err error
var reply kvpb.AdminSplitResponse
Expand Down Expand Up @@ -341,11 +341,57 @@ func (r *Replica) adminSplitWithDescriptor(
ri := r.GetRangeInfo(ctx)
return reply, kvpb.NewRangeKeyMismatchErrorWithCTPolicy(ctx, args.Key, args.Key, desc, &ri.Lease, ri.ClosedTimestampPolicy)
}
foundSplitKey = args.SplitKey
// When findNextSafeKey is true, we find the first key after
// args.SplitKey which is a safe split key.The primary user of
// findNextSafeKey is load based splitting, which only has knowledge of
// sampled keys from batch requests. These sampled keys can be
// arbitrarily within SQL rows due to column family keys.
//
// Not every caller requires a real key as a split point (creating empty
// table), however when we cannot verify the split key as safe, the most
// reliable method is checking existing keys.
//
// TODO(kvoli): We will never split a table with just two keys because of
// this limitation. Instead of piggybacking the MVCCFindSplitKey
// function, we should add a new function which allows splitting before
// the args.SplitKey, provided that it is a table key.
if findNextSafeKey {
// NOTE: The requested split key cannot be less than the start key for
// the range, if it is we risk returning a key which straddles a row.
// This is treat as an error below. Catch this case here as well to
// prevent unecessary work.
if !kvserverbase.ContainsKey(desc, args.SplitKey) {
return reply, errors.Errorf("requested split key %s out of bounds of %s",
args.SplitKey, r)
}
// Ensure the requested split key is addressable and not less than the
// start key of the range.
var desiredSplitKey roachpb.RKey
desiredSplitKey, err = keys.Addr(args.SplitKey)
if err != nil {
return reply, err
}
// We re-use the size based splitting method to ensure finding a safe
// key. To make this work, the target size of the lhs/rhs is set to
// zero and the args.SplitKey is given in place of the start key for
// the range.
targetSize := int64(0)
foundSplitKey, err = storage.MVCCFindSplitKey(
ctx, r.store.TODOEngine(), desiredSplitKey, desc.EndKey, targetSize)
if err != nil {
return reply, errors.Wrap(err, "unable to determine split key")
}
if foundSplitKey == nil {
return reply, unsplittableRangeError{}
}
} else {
foundSplitKey = args.SplitKey
}
}

if !kvserverbase.ContainsKey(desc, foundSplitKey) {
return reply, errors.Errorf("requested split key %s out of bounds of %s", args.SplitKey, r)
return reply, errors.Errorf("requested split key %s (found=%s) out of bounds of %s",
args.SplitKey, foundSplitKey, r)
}

// If predicate keys are specified, make sure they are contained by this
Expand Down
19 changes: 18 additions & 1 deletion pkg/kv/kvserver/replica_split_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,30 @@ func (r *Replica) recordBatchForLoadBasedSplitting(

// loadSplitKey returns a suggested load split key for the range if it exists,
// otherwise it returns nil. If there were any errors encountered when
// validating the split key, the error is returned as well.
// validating the split key, the error is returned as well. It is guaranteed
// that the key returned, if non-nil, will be greater than the start key of the
// range and also within the range bounds.
//
// NOTE: The returned split key CAN BE BETWEEN A SQL ROW, The split key
// returned should only be used to engage a split via adminSplitWithDescriptor
// where findNextSafeKey is set to true.
func (r *Replica) loadSplitKey(ctx context.Context, now time.Time) roachpb.Key {
splitKey := r.loadBasedSplitter.MaybeSplitKey(ctx, now)
if splitKey == nil {
return nil
}

// If the splitKey belongs to a Table range, try and shorten the key to just
// the row prefix. This allows us to check that splitKey doesn't map to the
// first key of the range here.
//
// NB: We handle unsafe split keys in replica.adminSplitWithDescriptor, so it
// isn't an issue if we return an unsafe key here. See the case where
// findNextSafeKey is true.
if keyRowPrefix, err := keys.EnsureSafeSplitKey(splitKey); err == nil {
splitKey = keyRowPrefix
}

// We swallow the error here and instead log an event. It is currently
// expected that the load based splitter may return the start key of the
// range.
Expand Down
2 changes: 0 additions & 2 deletions pkg/kv/kvserver/split/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split",
visibility = ["//visibility:public"],
deps = [
"//pkg/keys",
"//pkg/roachpb",
"//pkg/util/humanizeutil",
"//pkg/util/log",
Expand All @@ -38,7 +37,6 @@ go_test(
"//pkg/roachpb",
"//pkg/testutils/datapathutils",
"//pkg/testutils/skip",
"//pkg/util/encoding",
"//pkg/util/leaktest",
"//pkg/util/metric",
"//pkg/util/stop",
Expand Down
49 changes: 26 additions & 23 deletions pkg/kv/kvserver/split/decider.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
Expand Down Expand Up @@ -261,7 +260,9 @@ func (d *Decider) recordLocked(
if d.mu.splitFinder.Ready(now) &&
now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval {
if splitKey := d.mu.splitFinder.Key(); splitKey != nil {
log.KvDistribution.VEventf(ctx, 3, "suggesting split key %s splitter_state=%s", splitKey, d.stringLocked())
log.KvDistribution.VEventf(ctx, 3,
"suggesting split key %s splitter_state=%s",
splitKey, d.stringLocked())
d.mu.lastSplitSuggestion = now
d.mu.suggestionsMade++
return true
Expand All @@ -271,9 +272,16 @@ func (d *Decider) recordLocked(
noSplitKeyCauseLogMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg()
if noSplitKeyCauseLogMsg != "" {
popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency()
noSplitKeyCauseLogMsg += fmt.Sprintf(", most popular key occurs in %d%% of samples", int(popularKeyFrequency*100))
noSplitKeyCauseLogMsg += fmt.Sprintf(
", most popular key occurs in %d%% of samples",
int(popularKeyFrequency*100),
)
if log.ExpensiveLogEnabled(ctx, 3) {
noSplitKeyCauseLogMsg += fmt.Sprintf(" splitter_state=%s",
d.stringLocked(),
)
}
log.KvDistribution.Infof(ctx, "%s", noSplitKeyCauseLogMsg)
log.KvDistribution.VInfof(ctx, 3, "splitter_state=%s")
if popularKeyFrequency >= splitKeyThreshold {
d.loadSplitterMetrics.PopularKeyCount.Inc(1)
}
Expand Down Expand Up @@ -315,6 +323,8 @@ func (d *Decider) maxStatLocked(ctx context.Context, now time.Time) (float64, bo
// or if it wasn't able to determine a suitable split key.
//
// It is legal to call MaybeSplitKey at any time.
// WARNING: The key returned from MaybeSplitKey has no guarantee of being a
// safe split key. The key is derived from sampled spans. See below.
func (d *Decider) MaybeSplitKey(ctx context.Context, now time.Time) roachpb.Key {
var key roachpb.Key

Expand All @@ -338,27 +348,20 @@ func (d *Decider) MaybeSplitKey(ctx context.Context, now time.Time) roachpb.Key
//
// /Table/51/52/53/54/55
//
// (see TestDeciderCallsEnsureSafeSplitKey).
//
// The key found here isn't guaranteed to be a valid SQL column family
// key. This is because the keys are sampled from StartKey of requests
// hitting this replica. Ranged operations may well wish to exclude the
// start point by calling .Next() or may span multiple ranges, and so
// such a key may end up being passed to EnsureSafeSplitKey here.
// The key found here isn't guaranteed to be a valid SQL column family key.
// This is because the keys are sampled from StartKey and EndKey of
// requests hitting this replica. Ranged operations may well wish to
// exclude the start point by calling .Next() or may span multiple ranges,
// and so such a key may end up being returned. This is more common than
// one might think since SQL issues plenty of scans over all column
// families, meaning that we'll frequently find a key that has no column
// family suffix and thus errors out in EnsureSafeSplitKey.
//
// We take the risk that the result may sometimes not be a good split
// point (or even in this range).
//
// Note that we ignore EnsureSafeSplitKey when it returns an error since
// that error only tells us that this key couldn't possibly be a SQL
// key. This is more common than one might think since SQL issues plenty
// of scans over all column families, meaning that we'll frequently find
// a key that has no column family suffix and thus errors out in
// EnsureSafeSplitKey.
// We do not attempt to validate the key is safe here, simply return it to
// the caller as the best possible split point found so far. See
// replica.adminSplitWithDescriptor for how split keys are handled when we
// aren't certain the provided key is safe.
key = d.mu.splitFinder.Key()
if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil {
key = safeKey
}
}
return key
}
Expand Down
92 changes: 0 additions & 92 deletions pkg/kv/kvserver/split/decider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@ package split

import (
"context"
"math"
"math/rand"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -290,96 +288,6 @@ func TestDecider_MaxStat(t *testing.T) {
assertMaxStat(25000, 6, true)
}

func TestDeciderCallsEnsureSafeSplitKey(t *testing.T) {
defer leaktest.AfterTest(t)()
rand := rand.New(rand.NewSource(11))

var d Decider
loadSplitConfig := testLoadSplitConfig{
randSource: rand,
useWeighted: false,
statRetention: time.Second,
statThreshold: 1,
}

Init(&d, &loadSplitConfig, &LoadSplitterMetrics{
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
}, SplitCPU)

baseKey := keys.SystemSQLCodec.TablePrefix(51)
for i := 0; i < 4; i++ {
baseKey = encoding.EncodeUvarintAscending(baseKey, uint64(52+i))
}
c0 := func() roachpb.Span { return roachpb.Span{Key: append([]byte(nil), keys.MakeFamilyKey(baseKey, 1)...)} }
c1 := func() roachpb.Span { return roachpb.Span{Key: append([]byte(nil), keys.MakeFamilyKey(baseKey, 9)...)} }

expK, err := keys.EnsureSafeSplitKey(c1().Key)
require.NoError(t, err)

var k roachpb.Key
var now time.Time
for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ {
now = now.Add(500 * time.Millisecond)
d.Record(context.Background(), now, ld(1), c0)
now = now.Add(500 * time.Millisecond)
d.Record(context.Background(), now, ld(1), c1)
k = d.MaybeSplitKey(context.Background(), now)
if len(k) != 0 {
break
}
}

require.Equal(t, expK, k)
}

func TestDeciderIgnoresEnsureSafeSplitKeyOnError(t *testing.T) {
defer leaktest.AfterTest(t)()
rand := rand.New(rand.NewSource(11))
var d Decider

loadSplitConfig := testLoadSplitConfig{
randSource: rand,
useWeighted: false,
statRetention: time.Second,
statThreshold: 1,
}

Init(&d, &loadSplitConfig, &LoadSplitterMetrics{
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
}, SplitCPU)

baseKey := keys.SystemSQLCodec.TablePrefix(51)
for i := 0; i < 4; i++ {
baseKey = encoding.EncodeUvarintAscending(baseKey, uint64(52+i))
}
c0 := func() roachpb.Span {
return roachpb.Span{Key: append([]byte(nil), encoding.EncodeUvarintAscending(baseKey, math.MaxInt32+1)...)}
}
c1 := func() roachpb.Span {
return roachpb.Span{Key: append([]byte(nil), encoding.EncodeUvarintAscending(baseKey, math.MaxInt32+2)...)}
}

_, err := keys.EnsureSafeSplitKey(c1().Key)
require.Error(t, err)

var k roachpb.Key
var now time.Time
for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ {
now = now.Add(500 * time.Millisecond)
d.Record(context.Background(), now, ld(1), c0)
now = now.Add(500 * time.Millisecond)
d.Record(context.Background(), now, ld(1), c1)
k = d.MaybeSplitKey(context.Background(), now)
if len(k) != 0 {
break
}
}

require.Equal(t, c1().Key, k)
}

func TestMaxStatTracker(t *testing.T) {
defer leaktest.AfterTest(t)()

Expand Down
7 changes: 7 additions & 0 deletions pkg/kv/kvserver/split_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ func (sq *splitQueue) processAttempt(
desc,
false, /* delayable */
"span config",
false, /* findFirstSafeSplitKey */
); err != nil {
return false, errors.Wrapf(err, "unable to split %s at key %q", r, splitKey)
}
Expand All @@ -277,6 +278,7 @@ func (sq *splitQueue) processAttempt(
desc,
false, /* delayable */
fmt.Sprintf("%s above threshold size %s", humanizeutil.IBytes(size), humanizeutil.IBytes(maxBytes)),
false, /* findFirstSafeSplitKey */
); err != nil {
return false, err
}
Expand Down Expand Up @@ -312,6 +314,10 @@ func (sq *splitQueue) processAttempt(
if expDelay := kvserverbase.SplitByLoadMergeDelay.Get(&sq.store.cfg.Settings.SV); expDelay > 0 {
expTime = sq.store.Clock().Now().Add(expDelay.Nanoseconds(), 0)
}
// The splitByLoadKey has no guarantee of being a safe key to split at (not
// between SQL rows). To sanitize the split point, pass
// findFirstSafeSplitKey set to true, so that the first key after the
// suggested split point which is safe to split at is used.
if _, pErr := r.adminSplitWithDescriptor(
ctx,
kvpb.AdminSplitRequest{
Expand All @@ -324,6 +330,7 @@ func (sq *splitQueue) processAttempt(
desc,
false, /* delayable */
reason,
true, /* findFirstSafeSplitKey */
); pErr != nil {
return false, errors.Wrapf(pErr, "unable to split %s at key %q", r, splitByLoadKey)
}
Expand Down

0 comments on commit b341eb0

Please sign in to comment.