Skip to content

Commit

Permalink
kvserver: avoid load-based splitting between rows
Browse files Browse the repository at this point in the history
It was possible for a SQL row to be torn across two ranges due to the
load-based splitter not rejecting potentially unsafe split keys. It is
impossible to determine with keys sampled from response spans, whether a
key is certainly unsafe or safe.

This commit side steps this problem by re-using the
`adminSplitWithDescriptor` command to find the first real key, after or
at the provided `args.SplitKey`. This ensures that the split key will
always be a real key whilst not requiring any checks in the splitter
itself.

The updated `adminSplitWithDescriptor` is local only and requires opting
into finding the first safe key by setting `findFirstSafeKey` to `true`.

As such, all safe split key checks are also removed from the `split`
pkg, with a warning added that the any split key returned is unsafe.

Resolves: cockroachdb#103483

Release note (bug fix): It was possible for a SQL row to be split across
two ranges. When this occurred, SQL queries could return unexpected
errors. This bug is resolved by these changes, as we now inspect the real
keys, rather than just request keys to determine load-based split points.
  • Loading branch information
kvoli committed May 25, 2023
1 parent 279a219 commit a145013
Show file tree
Hide file tree
Showing 10 changed files with 637 additions and 190 deletions.
246 changes: 246 additions & 0 deletions pkg/kv/kvserver/client_split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3685,3 +3685,249 @@ func TestStoreRangeSplitAndMergeWithGlobalReads(t *testing.T) {
repl = store.LookupReplica(roachpb.RKey(splitKey))
require.Equal(t, descKey, repl.Desc().StartKey.AsRawKey())
}

// TestLBSplitUnsafeKeys tests that load based splits do not split between table
// rows, even when the suggested load based split key is itself between a table row.
func TestLBSplitUnsafeKeys(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx := context.Background()
const indexID = 1

// The test is expensive and prone to timing out under race.
skip.UnderRace(t)
skip.UnderStressRace(t)
skip.UnderDeadlock(t)

makeTestKey := func(tableID uint32, suffix []byte) roachpb.Key {
tableKey := keys.MakeTableIDIndexID(nil, tableID, indexID)
return append(tableKey, suffix...)
}

es := func(vals ...int64) []byte {
k := []byte{}
for _, v := range vals {
k = encoding.EncodeVarintAscending(k, v)
}
return k
}

fk := func(k []byte, famID uint32) []byte {
return keys.MakeFamilyKey(k, famID)
}

testCases := []struct {
splitKey roachpb.Key
existingKeys []int
expSplitKey roachpb.Key
expErrStr string
}{
// We don't know the table ID here, we append the splitKey to the
// table/index prefix. e.g. /1 will be /Table/table_id/index_id/1.
{
// /1 -> /2
splitKey: es(1),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/0 -> /2
splitKey: fk(es(1), 0),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/3 -> /2
splitKey: fk(es(1), 3),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/0/0 -> /2
splitKey: roachpb.Key(fk(es(1), 0)).Next(),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/3/0 -> /2
splitKey: roachpb.Key(fk(es(1), 3)).Next(),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/0/0/0 -> /2
splitKey: roachpb.Key(fk(es(1), 0)).Next().Next(),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /1/3/0/0 -> /2
splitKey: roachpb.Key(fk(es(1), 3)).Next().Next(),
existingKeys: []int{1, 2, 3},
expSplitKey: es(2),
},
{
// /0 -> /3
// We will not split at the first row in a table, so expect the split at
// /3 intead of /2.
splitKey: es(0),
existingKeys: []int{2, 3},
expSplitKey: es(3),
},
{
// /2 -> /3
// Same case as above, despite the key being safe, the split would create
// an empty LHS.
splitKey: es(2),
existingKeys: []int{2, 3},
expSplitKey: es(3),
},
{
// /1 -> error
// There are no rows to split on.
splitKey: es(1),
existingKeys: []int{},
expErrStr: "could not find valid split key",
},
{
// /1 -> error
// There is only one row to split on, the range should not be split.
splitKey: es(1),
existingKeys: []int{2},
expErrStr: "could not find valid split key",
},
{
// /1/0 -> error
splitKey: fk(es(1), 0),
existingKeys: []int{2},
expErrStr: "could not find valid split key",
},
{
// /1/3 -> error
splitKey: fk(es(1), 3),
existingKeys: []int{2},
expErrStr: "could not find valid split key",
},
{
// /1/3/0/0 -> error
splitKey: roachpb.Key(fk(es(1), 0)).Next().Next(),
existingKeys: []int{2},
expErrStr: "could not find valid split key",
},
{
// /2 -> /2
splitKey: es(2),
existingKeys: []int{1, 2},
expSplitKey: es(2),
},
}

for _, tc := range testCases {
var expectStr string
if tc.expErrStr != "" {
expectStr = tc.expErrStr
} else {
expectStr = makeTestKey(1, tc.expSplitKey).String()
}
t.Run(fmt.Sprintf("%s%v -> %s", makeTestKey(1, tc.splitKey), tc.existingKeys, expectStr), func(t *testing.T) {
var targetRange atomic.Int32
var splitKeyOverride atomic.Value
splitKeyOverride.Store(roachpb.Key{})

// Mock the load based splitter key finding method. This function will be
// checked in splitQueue.shouldQueue() and splitQueue.process via
// replica.loadSplitKey. When a key is returned, the split queue calls
// replica.adminSplitWithDescriptor(...findFirstSafeSplitKey=true).
overrideLBSplitFn := func(rangeID roachpb.RangeID) (splitKey roachpb.Key, useSplitKey bool) {
if rangeID == roachpb.RangeID(targetRange.Load()) {
override := splitKeyOverride.Load()
// It is possible that the split queue is checking the range before
// we manually enqueued it.
if override == nil {
return nil, false
}
overrideKey, ok := override.(roachpb.Key)
require.Truef(t, ok, "stored value not key %+v", override)

if len(overrideKey) == 0 {
return nil, false
}

return override.(roachpb.Key), true
}
return nil, false
}

serv, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{
DisableSpanConfigs: true,
Knobs: base.TestingKnobs{
Store: &kvserver.StoreTestingKnobs{
LoadBasedSplittingOverrideKey: overrideLBSplitFn,
DisableMergeQueue: true,
},
},
})
s := serv.(*server.TestServer)
defer s.Stopper().Stop(ctx)
tdb := sqlutils.MakeSQLRunner(sqlDB)
store, err := s.Stores().GetStore(1)
require.NoError(t, err)

// We want to exercise the case where there are column family keys.
// Create a simple table and insert the existing keys.
_ = tdb.Exec(t,
"CREATE TABLE t (k INT PRIMARY KEY, "+
"t0 INT, t1 INT, t2 INT, t3 INT, "+
"FAMILY (k), FAMILY (t0), FAMILY (t1), FAMILY (t2), FAMILY (t3))")
for _, k := range tc.existingKeys {
_ = tdb.Exec(t, fmt.Sprintf("INSERT INTO t VALUES (%d, %d, %d, %d, %d)",
k, k, k, k, k))
}

// Force a table scan to resolve descriptors.
var keyCount int
tdb.QueryRow(t, "SELECT count(k) FROM t").Scan(&keyCount)
require.Equal(t, len(tc.existingKeys), keyCount)
var tableID uint32
tdb.QueryRow(t, "SELECT table_id FROM crdb_internal.leases where name = 't'").Scan(&tableID)

// Split off the table range for the test, otherwise the range may
// contain multiple tables with existing values.
splitArgs := adminSplitArgs(keys.SystemSQLCodec.TablePrefix(tableID))
_, pErr := kv.SendWrapped(ctx, store.TestSender(), splitArgs)
require.Nil(t, pErr)

var rangeID roachpb.RangeID
tdb.QueryRow(t, "SELECT range_id FROM [SHOW RANGES FROM TABLE t] LIMIT 1").Scan(&rangeID)
targetRange.Store(int32(rangeID))
repl, err := store.GetReplica(rangeID)
require.NoError(t, err)

// Keep the previous end key around, we will use this to assert that no
// split has occurred when expecting an error.
prevEndKey := repl.Desc().EndKey.AsRawKey()
splitKey := makeTestKey(tableID, tc.splitKey)

// Update the split key override so that the split queue will enqueue and
// process the range. Remove it afterwards to avoid retrying the LHS.
splitKeyOverride.Store(splitKey)
_, processErr, enqueueErr := store.Enqueue(ctx, "split", repl, false /* shouldSkipQueue */, false /* async */)
splitKeyOverride.Store(roachpb.Key{})
require.NoError(t, enqueueErr)

endKey := repl.Desc().EndKey.AsRawKey()
if tc.expErrStr != "" {
// We expect this split not to process, assert that the expected error
// matches the returned error and the range has the same end key.
require.ErrorContainsf(t, processErr, tc.expErrStr,
"end key %s, previous end key %s", endKey, prevEndKey)
require.Equal(t, prevEndKey, endKey)
} else {
// Otherwise, assert that the new range end key matches the expected
// end key.
require.NoError(t, processErr)
require.Equal(t, makeTestKey(tableID, tc.expSplitKey), endKey)
}
})
}
}
33 changes: 29 additions & 4 deletions pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (r *Replica) AdminSplit(

err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error {
var err error
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason)
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason, false /* findFirstSafeKey */)
return err
})
return reply, err
Expand Down Expand Up @@ -292,7 +292,6 @@ func splitTxnStickyUpdateAttempt(
// affirmative the descriptor is passed to AdminSplit, which performs a
// Conditional Put on the RangeDescriptor to ensure that no other operation has
// modified the range in the time the decision was being made.
// TODO(tschottdorf): should assert that split key is not a local key.
//
// See the comment on splitTrigger for details on the complexities.
func (r *Replica) adminSplitWithDescriptor(
Expand All @@ -301,6 +300,7 @@ func (r *Replica) adminSplitWithDescriptor(
desc *roachpb.RangeDescriptor,
delayable bool,
reason string,
findFirstSafeKey bool,
) (kvpb.AdminSplitResponse, error) {
var err error
var reply kvpb.AdminSplitResponse
Expand Down Expand Up @@ -341,11 +341,36 @@ func (r *Replica) adminSplitWithDescriptor(
ri := r.GetRangeInfo(ctx)
return reply, kvpb.NewRangeKeyMismatchErrorWithCTPolicy(ctx, args.Key, args.Key, desc, &ri.Lease, ri.ClosedTimestampPolicy)
}
foundSplitKey = args.SplitKey
// When findFirstSafeKey is true, we find the first key after or at
// args.SplitKey which is a safe split to split at. The current user of
// findFirstSafeKey is load based splitting, which only has knowledge of
// sampled keys from batch requests. These sampled keys can be
// arbitrarily within SQL rows due to column family keys.
//
// Not every caller requires a real key as a split point (creating empty
// table), however when we cannot verify the split key as safe, the most
// reliable method is checking existing keys.
if findFirstSafeKey {
var desiredSplitKey roachpb.RKey
if desiredSplitKey, err = keys.Addr(args.SplitKey); err != nil {
return reply, err
}
if foundSplitKey, err = storage.MVCCFirstSplitKey(
ctx, r.store.TODOEngine(), desiredSplitKey,
desc.StartKey, desc.EndKey,
); err != nil {
return reply, errors.Wrap(err, "unable to determine split key")
} else if foundSplitKey == nil {
return reply, unsplittableRangeError{}
}
} else {
foundSplitKey = args.SplitKey
}
}

if !kvserverbase.ContainsKey(desc, foundSplitKey) {
return reply, errors.Errorf("requested split key %s out of bounds of %s", args.SplitKey, r)
return reply, errors.Errorf("requested split key %s (found=%s) out of bounds of %s",
args.SplitKey, foundSplitKey, r)
}

// If predicate keys are specified, make sure they are contained by this
Expand Down
39 changes: 37 additions & 2 deletions pkg/kv/kvserver/replica_split_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,48 @@ func (r *Replica) recordBatchForLoadBasedSplitting(

// loadSplitKey returns a suggested load split key for the range if it exists,
// otherwise it returns nil. If there were any errors encountered when
// validating the split key, the error is returned as well.
// validating the split key, the error is returned as well. It is guaranteed
// that the key returned, if non-nil, will be greater than the start key of the
// range and also within the range bounds.
//
// NOTE: The returned split key CAN BE BETWEEN A SQL ROW, The split key
// returned should only be used to engage a split via adminSplitWithDescriptor
// where findFirstSafeKey is set to true.
func (r *Replica) loadSplitKey(ctx context.Context, now time.Time) roachpb.Key {
splitKey := r.loadBasedSplitter.MaybeSplitKey(ctx, now)
var splitKey roachpb.Key
if overrideFn := r.store.cfg.TestingKnobs.LoadBasedSplittingOverrideKey; overrideFn != nil {
var useSplitKey bool
if splitKey, useSplitKey = overrideFn(r.GetRangeID()); useSplitKey {
return splitKey
}
} else {
splitKey = r.loadBasedSplitter.MaybeSplitKey(ctx, now)
}

if splitKey == nil {
return nil
}

// If the splitKey belongs to a Table range, try and shorten the key to just
// the row prefix. This allows us to check that splitKey doesn't map to the
// first key of the range here. If the split key contains column families, it
// is possible that the full key is strictly after every existing key for
// that row. e.g. for a table row where the table ID is 100, index ID is 1,
// primary key is a, and the column family ID is 3 (length=1):
//
// splitKey = /Table/100/1/"a"/3/1
// existing = [..., /Table/100/1/"a"/2/1]
//
// We would not split at /Table/100/1/"a" as there's no key >= the splitKey
// in the range.
//
// NB: We handle unsafe split keys in replica.adminSplitWithDescriptor, so it
// isn't an issue if we return an unsafe key here. See the case where
// findFirstSafeKey is true.
if keyRowPrefix, err := keys.EnsureSafeSplitKey(splitKey); err == nil {
splitKey = keyRowPrefix
}

// We swallow the error here and instead log an event. It is currently
// expected that the load based splitter may return the start key of the
// range.
Expand Down
2 changes: 0 additions & 2 deletions pkg/kv/kvserver/split/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split",
visibility = ["//visibility:public"],
deps = [
"//pkg/keys",
"//pkg/roachpb",
"//pkg/util/humanizeutil",
"//pkg/util/log",
Expand All @@ -39,7 +38,6 @@ go_test(
"//pkg/roachpb",
"//pkg/testutils/datapathutils",
"//pkg/testutils/skip",
"//pkg/util/encoding",
"//pkg/util/leaktest",
"//pkg/util/metric",
"//pkg/util/stop",
Expand Down
Loading

0 comments on commit a145013

Please sign in to comment.