diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index 7a3d982cf125..992bff96c772 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -298,6 +298,7 @@ go_test( "replica_rangefeed_test.go", "replica_rankings_test.go", "replica_sideload_test.go", + "replica_split_load_test.go", "replica_sst_snapshot_storage_test.go", "replica_test.go", "replica_tscache_test.go", diff --git a/pkg/kv/kvserver/asim/state/split_decider.go b/pkg/kv/kvserver/asim/state/split_decider.go index 270358e9d7de..f03ec4b4c9b0 100644 --- a/pkg/kv/kvserver/asim/state/split_decider.go +++ b/pkg/kv/kvserver/asim/state/split_decider.go @@ -11,12 +11,14 @@ package state import ( + "context" "math/rand" "time" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/workload" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/metric" ) // LoadSplitter provides an abstraction for load based splitting. It records @@ -66,7 +68,10 @@ func (s *SplitDecider) newDecider() *split.Decider { } decider := &split.Decider{} - split.Init(decider, intN, s.qpsThreshold, s.qpsRetention) + split.Init(decider, intN, s.qpsThreshold, s.qpsRetention, &split.LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) return decider } @@ -81,7 +86,7 @@ func (s *SplitDecider) Record(tick time.Time, rangeID RangeID, le workload.LoadE } qps := LoadEventQPS(le) - shouldSplit := decider.Record(tick, int(qps), func() roachpb.Span { + shouldSplit := decider.Record(context.Background(), tick, int(qps), func() roachpb.Span { return roachpb.Span{ Key: Key(le.Key).ToRKey().AsRawKey(), } @@ -102,7 +107,7 @@ func (s *SplitDecider) SplitKey(tick time.Time, rangeID RangeID) (Key, bool) { return InvalidKey, false } - key := decider.MaybeSplitKey(tick) + key := decider.MaybeSplitKey(context.Background(), tick) if key == nil { return InvalidKey, false } diff --git a/pkg/kv/kvserver/batcheval/cmd_range_stats.go b/pkg/kv/kvserver/batcheval/cmd_range_stats.go index 183ae552e74d..1e1d14611232 100644 --- a/pkg/kv/kvserver/batcheval/cmd_range_stats.go +++ b/pkg/kv/kvserver/batcheval/cmd_range_stats.go @@ -44,8 +44,8 @@ func RangeStats( ) (result.Result, error) { reply := resp.(*roachpb.RangeStatsResponse) reply.MVCCStats = cArgs.EvalCtx.GetMVCCStats() - reply.DeprecatedLastQueriesPerSecond = cArgs.EvalCtx.GetLastSplitQPS() - if qps, ok := cArgs.EvalCtx.GetMaxSplitQPS(); ok { + reply.DeprecatedLastQueriesPerSecond = cArgs.EvalCtx.GetLastSplitQPS(ctx) + if qps, ok := cArgs.EvalCtx.GetMaxSplitQPS(ctx); ok { reply.MaxQueriesPerSecond = qps } else { // See comment on MaxQueriesPerSecond. -1 means !ok. diff --git a/pkg/kv/kvserver/batcheval/eval_context.go b/pkg/kv/kvserver/batcheval/eval_context.go index 7f89b273c6cf..277e01c02c43 100644 --- a/pkg/kv/kvserver/batcheval/eval_context.go +++ b/pkg/kv/kvserver/batcheval/eval_context.go @@ -88,7 +88,7 @@ type EvalContext interface { // // NOTE: This should not be used when the load based splitting cluster setting // is disabled. - GetMaxSplitQPS() (float64, bool) + GetMaxSplitQPS(context.Context) (float64, bool) // GetLastSplitQPS returns the Replica's most recent queries/s request rate. // @@ -96,7 +96,7 @@ type EvalContext interface { // is disabled. // // TODO(nvanbenschoten): remove this method in v22.1. - GetLastSplitQPS() float64 + GetLastSplitQPS(context.Context) float64 GetGCThreshold() hlc.Timestamp ExcludeDataFromBackup() bool @@ -240,10 +240,10 @@ func (m *mockEvalCtxImpl) ContainsKey(key roachpb.Key) bool { func (m *mockEvalCtxImpl) GetMVCCStats() enginepb.MVCCStats { return m.Stats } -func (m *mockEvalCtxImpl) GetMaxSplitQPS() (float64, bool) { +func (m *mockEvalCtxImpl) GetMaxSplitQPS(context.Context) (float64, bool) { return m.QPS, true } -func (m *mockEvalCtxImpl) GetLastSplitQPS() float64 { +func (m *mockEvalCtxImpl) GetLastSplitQPS(context.Context) float64 { return m.QPS } func (m *mockEvalCtxImpl) CanCreateTxnRecord( diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go index 3c4a981895f3..2c5720790fc3 100644 --- a/pkg/kv/kvserver/client_split_test.go +++ b/pkg/kv/kvserver/client_split_test.go @@ -3649,3 +3649,249 @@ func TestStoreRangeSplitAndMergeWithGlobalReads(t *testing.T) { repl = store.LookupReplica(roachpb.RKey(splitKey)) require.Equal(t, descKey, repl.Desc().StartKey.AsRawKey()) } + +// TestLBSplitUnsafeKeys tests that load based splits do not split between table +// rows, even when the suggested load based split key is itself between a table row. +func TestLBSplitUnsafeKeys(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + const indexID = 1 + + // The test is expensive and prone to timing out under race. + skip.UnderRace(t) + skip.UnderStressRace(t) + skip.UnderDeadlock(t) + + makeTestKey := func(tableID uint32, suffix []byte) roachpb.Key { + tableKey := keys.MakeTableIDIndexID(nil, tableID, indexID) + return append(tableKey, suffix...) + } + + es := func(vals ...int64) []byte { + k := []byte{} + for _, v := range vals { + k = encoding.EncodeVarintAscending(k, v) + } + return k + } + + fk := func(k []byte, famID uint32) []byte { + return keys.MakeFamilyKey(k, famID) + } + + testCases := []struct { + splitKey roachpb.Key + existingKeys []int + expSplitKey roachpb.Key + expErrStr string + }{ + // We don't know the table ID here, we append the splitKey to the + // table/index prefix. e.g. /1 will be /Table/table_id/index_id/1. + { + // /1 -> /2 + splitKey: es(1), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/0 -> /2 + splitKey: fk(es(1), 0), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/3 -> /2 + splitKey: fk(es(1), 3), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/0/0 -> /2 + splitKey: roachpb.Key(fk(es(1), 0)).Next(), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/3/0 -> /2 + splitKey: roachpb.Key(fk(es(1), 3)).Next(), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/0/0/0 -> /2 + splitKey: roachpb.Key(fk(es(1), 0)).Next().Next(), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /1/3/0/0 -> /2 + splitKey: roachpb.Key(fk(es(1), 3)).Next().Next(), + existingKeys: []int{1, 2, 3}, + expSplitKey: es(2), + }, + { + // /0 -> /3 + // We will not split at the first row in a table, so expect the split at + // /3 intead of /2. + splitKey: es(0), + existingKeys: []int{2, 3}, + expSplitKey: es(3), + }, + { + // /2 -> /3 + // Same case as above, despite the key being safe, the split would create + // an empty LHS. + splitKey: es(2), + existingKeys: []int{2, 3}, + expSplitKey: es(3), + }, + { + // /1 -> error + // There are no rows to split on. + splitKey: es(1), + existingKeys: []int{}, + expErrStr: "could not find valid split key", + }, + { + // /1 -> error + // There is only one row to split on, the range should not be split. + splitKey: es(1), + existingKeys: []int{2}, + expErrStr: "could not find valid split key", + }, + { + // /1/0 -> error + splitKey: fk(es(1), 0), + existingKeys: []int{2}, + expErrStr: "could not find valid split key", + }, + { + // /1/3 -> error + splitKey: fk(es(1), 3), + existingKeys: []int{2}, + expErrStr: "could not find valid split key", + }, + { + // /1/3/0/0 -> error + splitKey: roachpb.Key(fk(es(1), 0)).Next().Next(), + existingKeys: []int{2}, + expErrStr: "could not find valid split key", + }, + { + // /2 -> /2 + splitKey: es(2), + existingKeys: []int{1, 2}, + expSplitKey: es(2), + }, + } + + for _, tc := range testCases { + var expectStr string + if tc.expErrStr != "" { + expectStr = tc.expErrStr + } else { + expectStr = makeTestKey(1, tc.expSplitKey).String() + } + t.Run(fmt.Sprintf("%s%v -> %s", makeTestKey(1, tc.splitKey), tc.existingKeys, expectStr), func(t *testing.T) { + var targetRange atomic.Int32 + var splitKeyOverride atomic.Value + splitKeyOverride.Store(roachpb.Key{}) + + // Mock the load based splitter key finding method. This function will be + // checked in splitQueue.shouldQueue() and splitQueue.process via + // replica.loadSplitKey. When a key is returned, the split queue calls + // replica.adminSplitWithDescriptor(...findFirstSafeSplitKey=true). + overrideLBSplitFn := func(rangeID roachpb.RangeID) (splitKey roachpb.Key, useSplitKey bool) { + if rangeID == roachpb.RangeID(targetRange.Load()) { + override := splitKeyOverride.Load() + // It is possible that the split queue is checking the range before + // we manually enqueued it. + if override == nil { + return nil, false + } + overrideKey, ok := override.(roachpb.Key) + require.Truef(t, ok, "stored value not key %+v", override) + + if len(overrideKey) == 0 { + return nil, false + } + + return override.(roachpb.Key), true + } + return nil, false + } + + serv, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{ + DisableSpanConfigs: true, + Knobs: base.TestingKnobs{ + Store: &kvserver.StoreTestingKnobs{ + LoadBasedSplittingOverrideKey: overrideLBSplitFn, + DisableMergeQueue: true, + }, + }, + }) + s := serv.(*server.TestServer) + defer s.Stopper().Stop(ctx) + tdb := sqlutils.MakeSQLRunner(sqlDB) + store, err := s.Stores().GetStore(1) + require.NoError(t, err) + + // We want to exercise the case where there are column family keys. + // Create a simple table and insert the existing keys. + _ = tdb.Exec(t, + "CREATE TABLE t (k INT PRIMARY KEY, "+ + "t0 INT, t1 INT, t2 INT, t3 INT, "+ + "FAMILY (k), FAMILY (t0), FAMILY (t1), FAMILY (t2), FAMILY (t3))") + for _, k := range tc.existingKeys { + _ = tdb.Exec(t, fmt.Sprintf("INSERT INTO t VALUES (%d, %d, %d, %d, %d)", + k, k, k, k, k)) + } + + // Force a table scan to resolve descriptors. + var keyCount int + tdb.QueryRow(t, "SELECT count(k) FROM t").Scan(&keyCount) + require.Equal(t, len(tc.existingKeys), keyCount) + var tableID uint32 + tdb.QueryRow(t, "SELECT table_id FROM crdb_internal.leases where name = 't'").Scan(&tableID) + + // Split off the table range for the test, otherwise the range may + // contain multiple tables with existing values. + splitArgs := adminSplitArgs(keys.SystemSQLCodec.TablePrefix(tableID)) + _, pErr := kv.SendWrapped(ctx, store.TestSender(), splitArgs) + require.Nil(t, pErr) + + var rangeID roachpb.RangeID + tdb.QueryRow(t, "SELECT range_id FROM [SHOW RANGES FROM TABLE t] LIMIT 1").Scan(&rangeID) + targetRange.Store(int32(rangeID)) + repl, err := store.GetReplica(rangeID) + require.NoError(t, err) + + // Keep the previous end key around, we will use this to assert that no + // split has occurred when expecting an error. + prevEndKey := repl.Desc().EndKey.AsRawKey() + splitKey := makeTestKey(tableID, tc.splitKey) + + // Update the split key override so that the split queue will enqueue and + // process the range. Remove it afterwards to avoid retrying the LHS. + splitKeyOverride.Store(splitKey) + _, processErr, enqueueErr := store.Enqueue(ctx, "split", repl, false /* shouldSkipQueue */, false /* async */) + splitKeyOverride.Store(roachpb.Key{}) + require.NoError(t, enqueueErr) + + endKey := repl.Desc().EndKey.AsRawKey() + if tc.expErrStr != "" { + // We expect this split not to process, assert that the expected error + // matches the returned error and the range has the same end key. + require.ErrorContainsf(t, processErr, tc.expErrStr, + "end key %s, previous end key %s", endKey, prevEndKey) + require.Equal(t, prevEndKey, endKey) + } else { + // Otherwise, assert that the new range end key matches the expected + // end key. + require.NoError(t, processErr) + require.Equal(t, makeTestKey(tableID, tc.expSplitKey), endKey) + } + }) + } +} diff --git a/pkg/kv/kvserver/merge_queue.go b/pkg/kv/kvserver/merge_queue.go index 0cf20d24feb1..b6eb35448354 100644 --- a/pkg/kv/kvserver/merge_queue.go +++ b/pkg/kv/kvserver/merge_queue.go @@ -217,7 +217,7 @@ func (mq *mergeQueue) process( lhsDesc := lhsRepl.Desc() lhsStats := lhsRepl.GetMVCCStats() - lhsQPS, lhsQPSOK := lhsRepl.GetMaxSplitQPS() + lhsQPS, lhsQPSOK := lhsRepl.GetMaxSplitQPS(ctx) minBytes := lhsRepl.GetMinBytes() if lhsStats.Total() >= minBytes { log.VEventf(ctx, 2, "skipping merge: LHS meets minimum size threshold %d with %d bytes", diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 0ac65b7c33fc..5f09f366c09e 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" "github.com/cockroachdb/cockroach/pkg/multitenant" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" @@ -1644,6 +1645,20 @@ Note that the measurement does not include the duration for replicating the eval Measurement: "Nanoseconds", Unit: metric.Unit_NANOSECONDS, } + + metaPopularKeyCount = metric.Metadata{ + Name: "kv.loadsplitter.popularkey", + Help: "Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples.", + Measurement: "Occurrences", + Unit: metric.Unit_COUNT, + } + + metaNoSplitKeyCount = metric.Metadata{ + Name: "kv.loadsplitter.nosplitkey", + Help: "Load-based splitter could not find a split key.", + Measurement: "Occurrences", + Unit: metric.Unit_COUNT, + } ) // StoreMetrics is the set of metrics for a given store. @@ -1654,6 +1669,9 @@ type StoreMetrics struct { // tenant basis. *TenantsStorageMetrics + // LoadSplitterMetrics stores metrics for load-based splitter split key. + *split.LoadSplitterMetrics + // Replica metrics. ReplicaCount *metric.Gauge // Does not include uninitialized or reserved replicas. ReservedReplicaCount *metric.Gauge @@ -2185,6 +2203,10 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { sm := &StoreMetrics{ registry: storeRegistry, TenantsStorageMetrics: newTenantsStorageMetrics(), + LoadSplitterMetrics: &split.LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metaPopularKeyCount), + NoSplitKeyCount: metric.NewCounter(metaNoSplitKeyCount), + }, // Replica metrics. ReplicaCount: metric.NewGauge(metaReplicaCount), @@ -2516,6 +2538,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { } storeRegistry.AddMetricStruct(sm) + storeRegistry.AddMetricStruct(sm.LoadSplitterMetrics) return sm } diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index e3f8e39c2cc2..027c60123182 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -1150,8 +1150,8 @@ func (r *Replica) SetMVCCStatsForTesting(stats *enginepb.MVCCStats) { // works when the load based splitting cluster setting is enabled. // // Use QueriesPerSecond() for current QPS stats for all other purposes. -func (r *Replica) GetMaxSplitQPS() (float64, bool) { - return r.loadBasedSplitter.MaxQPS(r.Clock().PhysicalTime()) +func (r *Replica) GetMaxSplitQPS(ctx context.Context) (float64, bool) { + return r.loadBasedSplitter.MaxQPS(ctx, r.Clock().PhysicalTime()) } // GetLastSplitQPS returns the Replica's most recent queries/s request rate. @@ -1160,8 +1160,8 @@ func (r *Replica) GetMaxSplitQPS() (float64, bool) { // works when the load based splitting cluster setting is enabled. // // Use QueriesPerSecond() for current QPS stats for all other purposes. -func (r *Replica) GetLastSplitQPS() float64 { - return r.loadBasedSplitter.LastQPS(r.Clock().PhysicalTime()) +func (r *Replica) GetLastSplitQPS(ctx context.Context) float64 { + return r.loadBasedSplitter.LastQPS(ctx, r.Clock().PhysicalTime()) } // ContainsKey returns whether this range contains the specified key. diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index 148b0493612d..acc70a38bc18 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -77,7 +77,7 @@ func (r *Replica) AdminSplit( err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error { var err error - reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason) + reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason, false /* findFirstSafeKey */) return err }) return reply, err @@ -296,7 +296,6 @@ func splitTxnStickyUpdateAttempt( // affirmative the descriptor is passed to AdminSplit, which performs a // Conditional Put on the RangeDescriptor to ensure that no other operation has // modified the range in the time the decision was being made. -// TODO(tschottdorf): should assert that split key is not a local key. // // See the comment on splitTrigger for details on the complexities. func (r *Replica) adminSplitWithDescriptor( @@ -305,6 +304,7 @@ func (r *Replica) adminSplitWithDescriptor( desc *roachpb.RangeDescriptor, delayable bool, reason string, + findFirstSafeKey bool, ) (roachpb.AdminSplitResponse, error) { var err error var reply roachpb.AdminSplitResponse @@ -345,11 +345,36 @@ func (r *Replica) adminSplitWithDescriptor( ri := r.GetRangeInfo(ctx) return reply, roachpb.NewRangeKeyMismatchErrorWithCTPolicy(ctx, args.Key, args.Key, desc, &ri.Lease, ri.ClosedTimestampPolicy) } - foundSplitKey = args.SplitKey + // When findFirstSafeKey is true, we find the first key after or at + // args.SplitKey which is a safe split to split at. The current user of + // findFirstSafeKey is load based splitting, which only has knowledge of + // sampled keys from batch requests. These sampled keys can be + // arbitrarily within SQL rows due to column family keys. + // + // Not every caller requires a real key as a split point (creating empty + // table), however when we cannot verify the split key as safe, the most + // reliable method is checking existing keys. + if findFirstSafeKey { + var desiredSplitKey roachpb.RKey + if desiredSplitKey, err = keys.Addr(args.SplitKey); err != nil { + return reply, err + } + if foundSplitKey, err = storage.MVCCFirstSplitKey( + ctx, r.store.engine, desiredSplitKey, + desc.StartKey, desc.EndKey, + ); err != nil { + return reply, errors.Wrap(err, "unable to determine split key") + } else if foundSplitKey == nil { + return reply, unsplittableRangeError{} + } + } else { + foundSplitKey = args.SplitKey + } } if !kvserverbase.ContainsKey(desc, foundSplitKey) { - return reply, errors.Errorf("requested split key %s out of bounds of %s", args.SplitKey, r) + return reply, errors.Errorf("requested split key %s (found=%s) out of bounds of %s", + args.SplitKey, foundSplitKey, r) } // If predicate keys are specified, make sure they are contained by this diff --git a/pkg/kv/kvserver/replica_eval_context_span.go b/pkg/kv/kvserver/replica_eval_context_span.go index 4cee31883efa..ea103a9ec97f 100644 --- a/pkg/kv/kvserver/replica_eval_context_span.go +++ b/pkg/kv/kvserver/replica_eval_context_span.go @@ -131,14 +131,14 @@ func (rec SpanSetReplicaEvalContext) GetMVCCStats() enginepb.MVCCStats { // GetMaxSplitQPS returns the Replica's maximum queries/s rate for splitting and // merging purposes. -func (rec SpanSetReplicaEvalContext) GetMaxSplitQPS() (float64, bool) { - return rec.i.GetMaxSplitQPS() +func (rec SpanSetReplicaEvalContext) GetMaxSplitQPS(ctx context.Context) (float64, bool) { + return rec.i.GetMaxSplitQPS(ctx) } // GetLastSplitQPS returns the Replica's most recent queries/s rate for // splitting and merging purposes. -func (rec SpanSetReplicaEvalContext) GetLastSplitQPS() float64 { - return rec.i.GetLastSplitQPS() +func (rec SpanSetReplicaEvalContext) GetLastSplitQPS(ctx context.Context) float64 { + return rec.i.GetLastSplitQPS(ctx) } // CanCreateTxnRecord determines whether a transaction record can be created diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index d60bf9600cfb..34203d9ab9c2 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -98,7 +98,7 @@ func newUnloadedReplica( return float64(SplitByLoadQPSThreshold.Get(&store.cfg.Settings.SV)) }, func() time.Duration { return kvserverbase.SplitByLoadMergeDelay.Get(&store.cfg.Settings.SV) - }) + }, store.metrics.LoadSplitterMetrics) r.mu.proposals = map[kvserverbase.CmdIDKey]*ProposalData{} r.mu.checksums = map[uuid.UUID]*replicaChecksum{} r.mu.proposalBuf.Init((*replicaProposer)(r), tracker.NewLockfreeTracker(), r.Clock(), r.ClusterSettings()) diff --git a/pkg/kv/kvserver/replica_send.go b/pkg/kv/kvserver/replica_send.go index ab5a705ff0a2..aa920d3f544e 100644 --- a/pkg/kv/kvserver/replica_send.go +++ b/pkg/kv/kvserver/replica_send.go @@ -401,6 +401,17 @@ func (r *Replica) executeBatchWithConcurrencyRetries( var requestEvalKind concurrency.RequestEvalKind var g *concurrency.Guard defer func() { + // Handle load-based splitting, if necessary. + if pErr == nil && br != nil { + if len(ba.Requests) != len(br.Responses) { + log.KvDistribution.Errorf(ctx, + "Requests and responses should be equal lengths: # of requests = %d, # of responses = %d", + len(ba.Requests), len(br.Responses)) + } else { + r.recordBatchForLoadBasedSplitting(ctx, ba, br) + } + } + // NB: wrapped to delay g evaluation to its value when returning. if g != nil { r.concMgr.FinishReq(g) @@ -412,7 +423,7 @@ func (r *Replica) executeBatchWithConcurrencyRetries( // commands and wait even if the circuit breaker is tripped. pp = poison.Policy_Wait } - for first := true; ; first = false { + for { // Exit loop if context has been canceled or timed out. if err := ctx.Err(); err != nil { return nil, nil, roachpb.NewError(errors.Wrap(err, "aborted during Replica.Send")) @@ -432,11 +443,6 @@ func (r *Replica) executeBatchWithConcurrencyRetries( } } - // Handle load-based splitting, if necessary. - if first { - r.recordBatchForLoadBasedSplitting(ctx, ba, latchSpans) - } - // Acquire latches to prevent overlapping requests from executing until // this request completes. After latching, wait on any conflicting locks // to ensure that the request has full isolation during evaluation. This diff --git a/pkg/kv/kvserver/replica_split_load.go b/pkg/kv/kvserver/replica_split_load.go index 15adcc0dad81..34d26e3ead83 100644 --- a/pkg/kv/kvserver/replica_split_load.go +++ b/pkg/kv/kvserver/replica_split_load.go @@ -12,11 +12,14 @@ package kvserver import ( "context" + "time" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" ) // SplitByLoadEnabled wraps "kv.range_split.by_load_enabled". @@ -48,18 +51,169 @@ func (r *Replica) SplitByLoadEnabled() bool { !r.store.TestingKnobs().DisableLoadBasedSplitting } +// getResponseBoundarySpan computes the union span of the true spans that were +// iterated over using the request span and the response's resumeSpan. +// +// Assumptions: +// 1. br != nil +// 2. len(ba.Requests) == len(br.Responses) +// Assumptions are checked in executeBatchWithConcurrencyRetries. +func getResponseBoundarySpan( + ba *roachpb.BatchRequest, br *roachpb.BatchResponse, +) (responseBoundarySpan roachpb.Span) { + addSpanToBoundary := func(span roachpb.Span) { + if !responseBoundarySpan.Valid() { + responseBoundarySpan = span + } else { + responseBoundarySpan = responseBoundarySpan.Combine(span) + } + } + for i, respUnion := range br.Responses { + reqHeader := ba.Requests[i].GetInner().Header() + resp := respUnion.GetInner() + resumeSpan := resp.Header().ResumeSpan + if resumeSpan == nil { + // Fully evaluated. + addSpanToBoundary(reqHeader.Span()) + continue + } + + switch resp.(type) { + case *roachpb.GetResponse: + // The request did not evaluate. Ignore it. + continue + case *roachpb.ScanResponse: + // Not reverse (->) + // Request: [key...............endKey) + // ResumeSpan: [key......endKey) + // True span: [key......key) + // + // Assumptions (not checked to minimize overhead): + // reqHeader.EndKey == resumeSpan.EndKey + // reqHeader.Key <= resumeSpan.Key. + if reqHeader.Key.Equal(resumeSpan.Key) { + // The request did not evaluate. Ignore it. + continue + } + addSpanToBoundary(roachpb.Span{ + Key: reqHeader.Key, + EndKey: resumeSpan.Key, + }) + case *roachpb.ReverseScanResponse: + // Reverse (<-) + // Request: [key...............endKey) + // ResumeSpan: [key......endKey) + // True span: [endKey...endKey) + // + // Assumptions (not checked to minimize overhead): + // reqHeader.Key == resumeSpan.Key + // resumeSpan.EndKey <= reqHeader.EndKey. + if reqHeader.EndKey.Equal(resumeSpan.EndKey) { + // The request did not evaluate. Ignore it. + continue + } + addSpanToBoundary(roachpb.Span{ + Key: resumeSpan.EndKey, + EndKey: reqHeader.EndKey, + }) + default: + // Consider it fully evaluated, which is safe. + addSpanToBoundary(reqHeader.Span()) + } + } + return +} + // recordBatchForLoadBasedSplitting records the batch's spans to be considered // for load based splitting. func (r *Replica) recordBatchForLoadBasedSplitting( - ctx context.Context, ba *roachpb.BatchRequest, spans *spanset.SpanSet, + ctx context.Context, ba *roachpb.BatchRequest, br *roachpb.BatchResponse, ) { if !r.SplitByLoadEnabled() { return } - shouldInitSplit := r.loadBasedSplitter.Record(timeutil.Now(), len(ba.Requests), func() roachpb.Span { - return spans.BoundarySpan(spanset.SpanGlobal) + shouldInitSplit := r.loadBasedSplitter.Record(ctx, timeutil.Now(), len(ba.Requests), func() roachpb.Span { + return getResponseBoundarySpan(ba, br) }) if shouldInitSplit { r.store.splitQueue.MaybeAddAsync(ctx, r, r.store.Clock().NowAsClockTimestamp()) } } + +// loadSplitKey returns a suggested load split key for the range if it exists, +// otherwise it returns nil. If there were any errors encountered when +// validating the split key, the error is returned as well. It is guaranteed +// that the key returned, if non-nil, will be greater than the start key of the +// range and also within the range bounds. +// +// NOTE: The returned split key CAN BE BETWEEN A SQL ROW, The split key +// returned should only be used to engage a split via adminSplitWithDescriptor +// where findFirstSafeKey is set to true. +func (r *Replica) loadSplitKey(ctx context.Context, now time.Time) roachpb.Key { + var splitKey roachpb.Key + if overrideFn := r.store.cfg.TestingKnobs.LoadBasedSplittingOverrideKey; overrideFn != nil { + var useSplitKey bool + if splitKey, useSplitKey = overrideFn(r.GetRangeID()); useSplitKey { + return splitKey + } + } else { + splitKey = r.loadBasedSplitter.MaybeSplitKey(ctx, now) + } + + if splitKey == nil { + return nil + } + + // If the splitKey belongs to a Table range, try and shorten the key to just + // the row prefix. This allows us to check that splitKey doesn't map to the + // first key of the range here. If the split key contains column families, it + // is possible that the full key is strictly after every existing key for + // that row. e.g. for a table row where the table ID is 100, index ID is 1, + // primary key is a, and the column family ID is 3 (length=1): + // + // splitKey = /Table/100/1/"a"/3/1 + // existing = [..., /Table/100/1/"a"/2/1] + // + // We would not split at /Table/100/1/"a" as there's no key >= the splitKey + // in the range. + // + // NB: We handle unsafe split keys in replica.adminSplitWithDescriptor, so it + // isn't an issue if we return an unsafe key here. See the case where + // findFirstSafeKey is true. + if keyRowPrefix, err := keys.EnsureSafeSplitKey(splitKey); err == nil { + splitKey = keyRowPrefix + } + + // We swallow the error here and instead log an event. It is currently + // expected that the load based splitter may return the start key of the + // range. + if err := splitKeyPreCheck(r.Desc().RSpan(), splitKey); err != nil { + log.KvDistribution.VEventf(ctx, 1, "suggested load split key not usable: %s", err) + return nil + } + + return splitKey +} + +// splitKeyPreCheck checks that a split key is addressable and not the same as +// the start key. An error is returned if these are not true. Additional checks +// are made in adminSplitWithDescriptor when a split request is processed by +// the replica. +func splitKeyPreCheck(rspan roachpb.RSpan, splitKey roachpb.Key) error { + splitRKey, err := keys.Addr(splitKey) + if err != nil { + return err + } + + // If the split key is equal to the start key of the range, it is treated as + // a no-op in adminSplitWithDescriptor, however it is treated as an error + // here because we shouldn't be suggesting split keys that are identical to + // the start key of the range. + if splitRKey.Equal(rspan.Key) { + return errors.Errorf( + "split key is equal to range start key (split_key=%s)", + splitRKey) + } + + return nil +} diff --git a/pkg/kv/kvserver/replica_split_load_test.go b/pkg/kv/kvserver/replica_split_load_test.go new file mode 100644 index 000000000000..3d04da3546b9 --- /dev/null +++ b/pkg/kv/kvserver/replica_split_load_test.go @@ -0,0 +1,297 @@ +/// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package kvserver + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/stretchr/testify/assert" +) + +func roachpbKey(key uint32) roachpb.Key { + return keys.SystemSQLCodec.TablePrefix(key) +} + +func requestHeaderWithNilEndKey(key uint32) roachpb.RequestHeader { + return roachpb.RequestHeader{ + Key: roachpbKey(key), + } +} + +func requestHeader(key uint32, endKey uint32) roachpb.RequestHeader { + return roachpb.RequestHeader{ + Key: roachpbKey(key), + EndKey: roachpbKey(endKey), + } +} + +func responseHeaderWithNilResumeSpan() roachpb.ResponseHeader { + return roachpb.ResponseHeader{ + ResumeSpan: nil, + } +} + +func responseHeaderWithNilEndKey(key uint32) roachpb.ResponseHeader { + return roachpb.ResponseHeader{ + ResumeSpan: &roachpb.Span{ + Key: roachpbKey(key), + }, + } +} + +func responseHeader(key uint32, endKey uint32) roachpb.ResponseHeader { + return roachpb.ResponseHeader{ + ResumeSpan: &roachpb.Span{ + Key: roachpbKey(key), + EndKey: roachpbKey(endKey), + }, + } +} + +func requestUnionGet(requestHeader roachpb.RequestHeader) roachpb.RequestUnion { + return roachpb.RequestUnion{ + Value: &roachpb.RequestUnion_Get{ + Get: &roachpb.GetRequest{ + RequestHeader: requestHeader, + }, + }, + } +} + +func responseUnionGet(responseHeader roachpb.ResponseHeader) roachpb.ResponseUnion { + return roachpb.ResponseUnion{ + Value: &roachpb.ResponseUnion_Get{ + Get: &roachpb.GetResponse{ + ResponseHeader: responseHeader, + }, + }, + } +} + +func requestUnionScan(requestHeader roachpb.RequestHeader) roachpb.RequestUnion { + return roachpb.RequestUnion{ + Value: &roachpb.RequestUnion_Scan{ + Scan: &roachpb.ScanRequest{ + RequestHeader: requestHeader, + }, + }, + } +} + +func responseUnionScan(responseHeader roachpb.ResponseHeader) roachpb.ResponseUnion { + return roachpb.ResponseUnion{ + Value: &roachpb.ResponseUnion_Scan{ + Scan: &roachpb.ScanResponse{ + ResponseHeader: responseHeader, + }, + }, + } +} + +func requestUnionReverseScan(requestHeader roachpb.RequestHeader) roachpb.RequestUnion { + return roachpb.RequestUnion{ + Value: &roachpb.RequestUnion_ReverseScan{ + ReverseScan: &roachpb.ReverseScanRequest{ + RequestHeader: requestHeader, + }, + }, + } +} + +func responseUnionReverseScan(responseHeader roachpb.ResponseHeader) roachpb.ResponseUnion { + return roachpb.ResponseUnion{ + Value: &roachpb.ResponseUnion_ReverseScan{ + ReverseScan: &roachpb.ReverseScanResponse{ + ResponseHeader: responseHeader, + }, + }, + } +} + +func requestUnionDeleteRange(requestHeader roachpb.RequestHeader) roachpb.RequestUnion { + return roachpb.RequestUnion{ + Value: &roachpb.RequestUnion_DeleteRange{ + DeleteRange: &roachpb.DeleteRangeRequest{ + RequestHeader: requestHeader, + }, + }, + } +} + +func responseUnionDeleteRange(responseHeader roachpb.ResponseHeader) roachpb.ResponseUnion { + return roachpb.ResponseUnion{ + Value: &roachpb.ResponseUnion_DeleteRange{ + DeleteRange: &roachpb.DeleteRangeResponse{ + ResponseHeader: responseHeader, + }, + }, + } +} + +func TestGetResponseBoundarySpan(t *testing.T) { + defer leaktest.AfterTest(t)() + testCases := []struct { + ba *roachpb.BatchRequest + br *roachpb.BatchResponse + expectedResponseBoundarySpan roachpb.Span + }{ + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionGet(requestHeaderWithNilEndKey(100)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionGet(responseHeaderWithNilResumeSpan()), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(100), + }, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionScan(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionScan(responseHeaderWithNilResumeSpan()), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(100), + EndKey: roachpbKey(900), + }, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionScan(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionScan(responseHeader(113, 900)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(100), + EndKey: roachpbKey(113), + }, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionReverseScan(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionReverseScan(responseHeader(100, 879)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(879), + EndKey: roachpbKey(900), + }, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionDeleteRange(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionDeleteRange(responseHeader(113, 900)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(100), + EndKey: roachpbKey(900), + }, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionGet(requestHeaderWithNilEndKey(100)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionGet(responseHeaderWithNilEndKey(100)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{}, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionScan(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionScan(responseHeader(100, 900)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{}, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionReverseScan(requestHeader(100, 900)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionReverseScan(responseHeader(100, 900)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{}, + }, + { + ba: &roachpb.BatchRequest{ + Requests: []roachpb.RequestUnion{ + requestUnionScan(requestHeader(500, 600)), + requestUnionReverseScan(requestHeader(475, 625)), + requestUnionGet(requestHeaderWithNilEndKey(480)), + requestUnionReverseScan(requestHeader(500, 510)), + requestUnionScan(requestHeader(700, 800)), + }, + }, + br: &roachpb.BatchResponse{ + Responses: []roachpb.ResponseUnion{ + responseUnionScan(responseHeader(550, 600)), + responseUnionReverseScan(responseHeader(475, 525)), + responseUnionGet(responseHeaderWithNilResumeSpan()), + responseUnionReverseScan(responseHeaderWithNilResumeSpan()), + responseUnionScan(responseHeader(700, 800)), + }, + }, + expectedResponseBoundarySpan: roachpb.Span{ + Key: roachpbKey(480), + EndKey: roachpbKey(625), + }, + }, + } + for i, test := range testCases { + responseBoundarySpan := getResponseBoundarySpan(test.ba, test.br) + assert.Equal(t, test.expectedResponseBoundarySpan, responseBoundarySpan, "Expected response boundary span %s, got %s in test %d", + test.expectedResponseBoundarySpan, responseBoundarySpan, i) + } +} diff --git a/pkg/kv/kvserver/split/BUILD.bazel b/pkg/kv/kvserver/split/BUILD.bazel index 354061351004..5a6e668deafc 100644 --- a/pkg/kv/kvserver/split/BUILD.bazel +++ b/pkg/kv/kvserver/split/BUILD.bazel @@ -10,8 +10,9 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split", visibility = ["//visibility:public"], deps = [ - "//pkg/keys", "//pkg/roachpb", + "//pkg/util/log", + "//pkg/util/metric", "//pkg/util/syncutil", ], ) @@ -28,8 +29,8 @@ go_test( deps = [ "//pkg/keys", "//pkg/roachpb", - "//pkg/util/encoding", "//pkg/util/leaktest", + "//pkg/util/metric", "//pkg/util/stop", "//pkg/util/timeutil", "@com_github_stretchr_testify//assert", diff --git a/pkg/kv/kvserver/split/decider.go b/pkg/kv/kvserver/split/decider.go index c29e316d2eee..c2c2f8fb796e 100644 --- a/pkg/kv/kvserver/split/decider.go +++ b/pkg/kv/kvserver/split/decider.go @@ -13,14 +13,17 @@ package split import ( + "context" "time" - "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/syncutil" ) const minSplitSuggestionInterval = time.Minute +const minNoSplitKeyLoggingMetricsInterval = time.Minute const minQueriesPerSecondSampleDuration = time.Second // A Decider collects measurements about the activity (measured in qps) on a @@ -48,10 +51,21 @@ const minQueriesPerSecondSampleDuration = time.Second // prevent load-based splits from being merged away until the resulting ranges // have consistently remained below a certain QPS threshold for a sufficiently // long period of time. + +// LoadSplitterMetrics consists of metrics for load-based splitter split key. +type LoadSplitterMetrics struct { + PopularKeyCount *metric.Counter + NoSplitKeyCount *metric.Counter +} + +// Decider tracks the latest QPS and if certain conditions are met, records +// incoming requests to find potential split keys and checks if sampled +// candidate split keys satisfy certain requirements. type Decider struct { - intn func(n int) int // supplied to Init - qpsThreshold func() float64 // supplied to Init - qpsRetention func() time.Duration // supplied to Init + intn func(n int) int // supplied to Init + qpsThreshold func() float64 // supplied to Init + qpsRetention func() time.Duration // supplied to Init + loadSplitterMetrics *LoadSplitterMetrics // supplied to Init mu struct { syncutil.Mutex @@ -67,6 +81,9 @@ type Decider struct { // Fields tracking split key suggestions. splitFinder *Finder // populated when engaged or decided lastSplitSuggestion time.Time // last stipulation to client to carry out split + + // Fields tracking logging / metrics around load-based splitter split key. + lastNoSplitKeyLoggingMetrics time.Time } } @@ -79,10 +96,12 @@ func Init( intn func(n int) int, qpsThreshold func() float64, qpsRetention func() time.Duration, + loadSplitterMetrics *LoadSplitterMetrics, ) { lbs.intn = intn lbs.qpsThreshold = qpsThreshold lbs.qpsRetention = qpsRetention + lbs.loadSplitterMetrics = loadSplitterMetrics } // Record notifies the Decider that 'n' operations are being carried out which @@ -93,14 +112,16 @@ func Init( // If the returned boolean is true, a split key is available (though it may // disappear as more keys are sampled) and should be initiated by the caller, // which can call MaybeSplitKey to retrieve the suggested key. -func (d *Decider) Record(now time.Time, n int, span func() roachpb.Span) bool { +func (d *Decider) Record(ctx context.Context, now time.Time, n int, span func() roachpb.Span) bool { d.mu.Lock() defer d.mu.Unlock() - return d.recordLocked(now, n, span) + return d.recordLocked(ctx, now, n, span) } -func (d *Decider) recordLocked(now time.Time, n int, span func() roachpb.Span) bool { +func (d *Decider) recordLocked( + ctx context.Context, now time.Time, n int, span func() roachpb.Span, +) bool { d.mu.count += int64(n) // First compute requests per second since the last check. @@ -137,9 +158,28 @@ func (d *Decider) recordLocked(now time.Time, n int, span func() roachpb.Span) b if s.Key != nil { d.mu.splitFinder.Record(span(), d.intn) } - if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval && d.mu.splitFinder.Ready(now) && d.mu.splitFinder.Key() != nil { - d.mu.lastSplitSuggestion = now - return true + if d.mu.splitFinder.Ready(now) { + if d.mu.splitFinder.Key() != nil { + if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval { + d.mu.lastSplitSuggestion = now + return true + } + } else { + if now.Sub(d.mu.lastNoSplitKeyLoggingMetrics) > minNoSplitKeyLoggingMetricsInterval { + d.mu.lastNoSplitKeyLoggingMetrics = now + insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained := d.mu.splitFinder.NoSplitKeyCause() + if insufficientCounters < splitKeySampleSize { + popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency() + if popularKeyFrequency >= splitKeyThreshold { + d.loadSplitterMetrics.PopularKeyCount.Inc(1) + } + d.loadSplitterMetrics.NoSplitKeyCount.Inc(1) + log.KvDistribution.Infof(ctx, + "No split key found: insufficient counters = %d, imbalance = %d, too many contained = %d, imbalance and too many contained = %d, most popular key occurs in %d%% of samples", + insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained, int(popularKeyFrequency*100)) + } + } + } } } return false @@ -156,22 +196,22 @@ func (d *Decider) RecordMax(now time.Time, qps float64) { } // LastQPS returns the most recent QPS measurement. -func (d *Decider) LastQPS(now time.Time) float64 { +func (d *Decider) LastQPS(ctx context.Context, now time.Time) float64 { d.mu.Lock() defer d.mu.Unlock() - d.recordLocked(now, 0, nil) // force QPS computation + d.recordLocked(ctx, now, 0, nil) // force QPS computation return d.mu.lastQPS } // MaxQPS returns the maximum QPS measurement recorded over the retention // period. If the Decider has not been recording for a full retention period, // the method returns false. -func (d *Decider) MaxQPS(now time.Time) (float64, bool) { +func (d *Decider) MaxQPS(ctx context.Context, now time.Time) (float64, bool) { d.mu.Lock() defer d.mu.Unlock() - d.recordLocked(now, 0, nil) // force QPS computation + d.recordLocked(ctx, now, 0, nil) // force QPS computation return d.mu.maxQPS.maxQPS(now, d.qpsRetention()) } @@ -180,13 +220,15 @@ func (d *Decider) MaxQPS(now time.Time) (float64, bool) { // or if it wasn't able to determine a suitable split key. // // It is legal to call MaybeSplitKey at any time. -func (d *Decider) MaybeSplitKey(now time.Time) roachpb.Key { +// WARNING: The key returned from MaybeSplitKey has no guarantee of being a +// safe split key. The key is derived from sampled spans. See below. +func (d *Decider) MaybeSplitKey(ctx context.Context, now time.Time) roachpb.Key { var key roachpb.Key d.mu.Lock() defer d.mu.Unlock() - d.recordLocked(now, 0, nil) + d.recordLocked(ctx, now, 0, nil) if d.mu.splitFinder != nil && d.mu.splitFinder.Ready(now) { // We've found a key to split at. This key might be in the middle of a // SQL row. If we fail to rectify that, we'll cause SQL crashes: @@ -203,27 +245,20 @@ func (d *Decider) MaybeSplitKey(now time.Time) roachpb.Key { // // /Table/51/52/53/54/55 // - // (see TestDeciderCallsEnsureSafeSplitKey). - // - // The key found here isn't guaranteed to be a valid SQL column family - // key. This is because the keys are sampled from StartKey of requests - // hitting this replica. Ranged operations may well wish to exclude the - // start point by calling .Next() or may span multiple ranges, and so - // such a key may end up being passed to EnsureSafeSplitKey here. + // The key found here isn't guaranteed to be a valid SQL column family key. + // This is because the keys are sampled from StartKey and EndKey of + // requests hitting this replica. Ranged operations may well wish to + // exclude the start point by calling .Next() or may span multiple ranges, + // and so such a key may end up being returned. This is more common than + // one might think since SQL issues plenty of scans over all column + // families, meaning that we'll frequently find a key that has no column + // family suffix and thus errors out in EnsureSafeSplitKey. // - // We take the risk that the result may sometimes not be a good split - // point (or even in this range). - // - // Note that we ignore EnsureSafeSplitKey when it returns an error since - // that error only tells us that this key couldn't possibly be a SQL - // key. This is more common than one might think since SQL issues plenty - // of scans over all column families, meaning that we'll frequently find - // a key that has no column family suffix and thus errors out in - // EnsureSafeSplitKey. + // We do not attempt to validate the key is safe here, simply return it to + // the caller as the best possible split point found so far. See + // replica.adminSplitWithDescriptor for how split keys are handled when we + // aren't certain the provided key is safe. key = d.mu.splitFinder.Key() - if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil { - key = safeKey - } } return key } @@ -240,6 +275,7 @@ func (d *Decider) Reset(now time.Time) { d.mu.maxQPS.reset(now, d.qpsRetention()) d.mu.splitFinder = nil d.mu.lastSplitSuggestion = time.Time{} + d.mu.lastNoSplitKeyLoggingMetrics = time.Time{} } // maxQPSTracker collects a series of queries-per-second measurement samples and diff --git a/pkg/kv/kvserver/split/decider_test.go b/pkg/kv/kvserver/split/decider_test.go index 3ea1821c3734..c41ca8edfe6f 100644 --- a/pkg/kv/kvserver/split/decider_test.go +++ b/pkg/kv/kvserver/split/decider_test.go @@ -11,15 +11,15 @@ package split import ( - "math" + "context" "math/rand" "testing" "time" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -38,7 +38,10 @@ func TestDecider(t *testing.T) { intn := rand.New(rand.NewSource(12)).Intn var d Decider - Init(&d, intn, func() float64 { return 10.0 }, func() time.Duration { return 2 * time.Second }) + Init(&d, intn, func() float64 { return 10.0 }, func() time.Duration { return 2 * time.Second }, &LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) op := func(s string) func() roachpb.Span { return func() roachpb.Span { return roachpb.Span{Key: roachpb.Key(s)} } @@ -46,38 +49,38 @@ func TestDecider(t *testing.T) { assertQPS := func(i int, expQPS float64) { t.Helper() - qps := d.LastQPS(ms(i)) + qps := d.LastQPS(context.Background(), ms(i)) assert.Equal(t, expQPS, qps) } assertMaxQPS := func(i int, expMaxQPS float64, expOK bool) { t.Helper() - maxQPS, ok := d.MaxQPS(ms(i)) + maxQPS, ok := d.MaxQPS(context.Background(), ms(i)) assert.Equal(t, expMaxQPS, maxQPS) assert.Equal(t, expOK, ok) } - assert.Equal(t, false, d.Record(ms(100), 1, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(100), 1, nil)) assertQPS(100, 0) assertMaxQPS(100, 0, false) assert.Equal(t, ms(100), d.mu.lastQPSRollover) assert.EqualValues(t, 1, d.mu.count) - assert.Equal(t, false, d.Record(ms(400), 3, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(400), 3, nil)) assertQPS(100, 0) assertQPS(700, 0) assertMaxQPS(400, 0, false) - assert.Equal(t, false, d.Record(ms(300), 3, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(300), 3, nil)) assertQPS(100, 0) assertMaxQPS(300, 0, false) - assert.Equal(t, false, d.Record(ms(900), 1, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(900), 1, nil)) assertQPS(0, 0) assertMaxQPS(900, 0, false) - assert.Equal(t, false, d.Record(ms(1099), 1, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(1099), 1, nil)) assertQPS(0, 0) assertMaxQPS(1099, 0, false) @@ -86,7 +89,7 @@ func TestDecider(t *testing.T) { // It won't engage because the duration between the rollovers is 1.1s, and // we had 10 events over that interval. - assert.Equal(t, false, d.Record(ms(1200), 1, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(1200), 1, nil)) assertQPS(0, float64(10)/float64(1.1)) assert.Equal(t, ms(1200), d.mu.lastQPSRollover) assertMaxQPS(1099, 0, false) @@ -95,11 +98,11 @@ func TestDecider(t *testing.T) { assert.Equal(t, nilFinder, d.mu.splitFinder) - assert.Equal(t, false, d.Record(ms(2199), 12, nil)) + assert.Equal(t, false, d.Record(context.Background(), ms(2199), 12, nil)) assert.Equal(t, nilFinder, d.mu.splitFinder) // 2200 is the next rollover point, and 12+1=13 qps should be computed. - assert.Equal(t, false, d.Record(ms(2200), 1, op("a"))) + assert.Equal(t, false, d.Record(context.Background(), ms(2200), 1, op("a"))) assert.Equal(t, ms(2200), d.mu.lastQPSRollover) assertQPS(0, float64(13)) assertMaxQPS(2200, 13, true) @@ -111,7 +114,7 @@ func TestDecider(t *testing.T) { // to split. We don't test the details of exactly when that happens because // this is done in the finder tests. tick := 2200 - for o := op("a"); !d.Record(ms(tick), 11, o); tick += 1000 { + for o := op("a"); !d.Record(context.Background(), ms(tick), 11, o); tick += 1000 { if tick/1000%2 == 0 { o = op("z") } else { @@ -119,7 +122,7 @@ func TestDecider(t *testing.T) { } } - assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick))) + assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick))) // We were told to split, but won't be told to split again for some time // to avoid busy-looping on split attempts. @@ -128,35 +131,35 @@ func TestDecider(t *testing.T) { if i%2 != 0 { o = op("a") } - assert.False(t, d.Record(ms(tick), 11, o)) - assert.True(t, d.LastQPS(ms(tick)) > 1.0) + assert.False(t, d.Record(context.Background(), ms(tick), 11, o)) + assert.True(t, d.LastQPS(context.Background(), ms(tick)) > 1.0) // Even though the split key remains. - assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick+999))) + assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick+999))) tick += 1000 } // But after minSplitSuggestionInterval of ticks, we get another one. - assert.True(t, d.Record(ms(tick), 11, op("a"))) + assert.True(t, d.Record(context.Background(), ms(tick), 11, op("a"))) assertQPS(tick, float64(11)) assertMaxQPS(tick, 11, true) // Split key suggestion vanishes once qps drops. tick += 1000 - assert.False(t, d.Record(ms(tick), 9, op("a"))) - assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick))) + assert.False(t, d.Record(context.Background(), ms(tick), 9, op("a"))) + assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick))) assert.Equal(t, nilFinder, d.mu.splitFinder) // Hammer a key with writes above threshold. There shouldn't be a split // since everyone is hitting the same key and load can't be balanced. for i := 0; i < 1000; i++ { - assert.False(t, d.Record(ms(tick), 11, op("q"))) + assert.False(t, d.Record(context.Background(), ms(tick), 11, op("q"))) tick += 1000 } assert.True(t, d.mu.splitFinder.Ready(ms(tick))) - assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick))) + assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick))) // But the finder keeps sampling to adapt to changing workload... for i := 0; i < 1000; i++ { - assert.False(t, d.Record(ms(tick), 11, op("p"))) + assert.False(t, d.Record(context.Background(), ms(tick), 11, op("p"))) tick += 1000 } @@ -168,7 +171,7 @@ func TestDecider(t *testing.T) { // Since the new workload is also not partitionable, nothing changes in // the decision. assert.True(t, d.mu.splitFinder.Ready(ms(tick))) - assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick))) + assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick))) // Get the decider engaged again so that we can test Reset(). for i := 0; i < 1000; i++ { @@ -176,16 +179,16 @@ func TestDecider(t *testing.T) { if i%2 != 0 { o = op("a") } - d.Record(ms(tick), 11, o) + d.Record(context.Background(), ms(tick), 11, o) tick += 500 } // The finder wants to split, until Reset is called, at which point it starts // back up at zero. assert.True(t, d.mu.splitFinder.Ready(ms(tick))) - assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick))) + assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick))) d.Reset(ms(tick)) - assert.Nil(t, d.MaybeSplitKey(ms(tick))) + assert.Nil(t, d.MaybeSplitKey(context.Background(), ms(tick))) assert.Nil(t, d.mu.splitFinder) } @@ -194,11 +197,14 @@ func TestDecider_MaxQPS(t *testing.T) { intn := rand.New(rand.NewSource(11)).Intn var d Decider - Init(&d, intn, func() float64 { return 100.0 }, func() time.Duration { return 10 * time.Second }) + Init(&d, intn, func() float64 { return 100.0 }, func() time.Duration { return 10 * time.Second }, &LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) assertMaxQPS := func(i int, expMaxQPS float64, expOK bool) { t.Helper() - maxQPS, ok := d.MaxQPS(ms(i)) + maxQPS, ok := d.MaxQPS(context.Background(), ms(i)) assert.Equal(t, expMaxQPS, maxQPS) assert.Equal(t, expOK, ok) } @@ -206,22 +212,22 @@ func TestDecider_MaxQPS(t *testing.T) { assertMaxQPS(1000, 0, false) // Record a large number of samples. - d.Record(ms(1500), 5, nil) - d.Record(ms(2000), 5, nil) - d.Record(ms(4500), 1, nil) - d.Record(ms(5000), 15, nil) - d.Record(ms(5500), 2, nil) - d.Record(ms(8000), 5, nil) - d.Record(ms(10000), 9, nil) + d.Record(context.Background(), ms(1500), 5, nil) + d.Record(context.Background(), ms(2000), 5, nil) + d.Record(context.Background(), ms(4500), 1, nil) + d.Record(context.Background(), ms(5000), 15, nil) + d.Record(context.Background(), ms(5500), 2, nil) + d.Record(context.Background(), ms(8000), 5, nil) + d.Record(context.Background(), ms(10000), 9, nil) assertMaxQPS(10000, 0, false) assertMaxQPS(11000, 17, true) // Record more samples with a lower QPS. - d.Record(ms(12000), 1, nil) - d.Record(ms(13000), 4, nil) - d.Record(ms(15000), 2, nil) - d.Record(ms(19000), 3, nil) + d.Record(context.Background(), ms(12000), 1, nil) + d.Record(context.Background(), ms(13000), 4, nil) + d.Record(context.Background(), ms(15000), 2, nil) + d.Record(context.Background(), ms(19000), 3, nil) assertMaxQPS(20000, 4.5, true) assertMaxQPS(21000, 4, true) @@ -232,76 +238,6 @@ func TestDecider_MaxQPS(t *testing.T) { assertMaxQPS(25000, 6, true) } -func TestDeciderCallsEnsureSafeSplitKey(t *testing.T) { - defer leaktest.AfterTest(t)() - intn := rand.New(rand.NewSource(11)).Intn - - var d Decider - Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }) - - baseKey := keys.SystemSQLCodec.TablePrefix(51) - for i := 0; i < 4; i++ { - baseKey = encoding.EncodeUvarintAscending(baseKey, uint64(52+i)) - } - c0 := func() roachpb.Span { return roachpb.Span{Key: append([]byte(nil), keys.MakeFamilyKey(baseKey, 1)...)} } - c1 := func() roachpb.Span { return roachpb.Span{Key: append([]byte(nil), keys.MakeFamilyKey(baseKey, 9)...)} } - - expK, err := keys.EnsureSafeSplitKey(c1().Key) - require.NoError(t, err) - - var k roachpb.Key - var now time.Time - for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ { - now = now.Add(500 * time.Millisecond) - d.Record(now, 1, c0) - now = now.Add(500 * time.Millisecond) - d.Record(now, 1, c1) - k = d.MaybeSplitKey(now) - if len(k) != 0 { - break - } - } - - require.Equal(t, expK, k) -} - -func TestDeciderIgnoresEnsureSafeSplitKeyOnError(t *testing.T) { - defer leaktest.AfterTest(t)() - intn := rand.New(rand.NewSource(11)).Intn - - var d Decider - Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }) - - baseKey := keys.SystemSQLCodec.TablePrefix(51) - for i := 0; i < 4; i++ { - baseKey = encoding.EncodeUvarintAscending(baseKey, uint64(52+i)) - } - c0 := func() roachpb.Span { - return roachpb.Span{Key: append([]byte(nil), encoding.EncodeUvarintAscending(baseKey, math.MaxInt32+1)...)} - } - c1 := func() roachpb.Span { - return roachpb.Span{Key: append([]byte(nil), encoding.EncodeUvarintAscending(baseKey, math.MaxInt32+2)...)} - } - - _, err := keys.EnsureSafeSplitKey(c1().Key) - require.Error(t, err) - - var k roachpb.Key - var now time.Time - for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ { - now = now.Add(500 * time.Millisecond) - d.Record(now, 1, c0) - now = now.Add(500 * time.Millisecond) - d.Record(now, 1, c1) - k = d.MaybeSplitKey(now) - if len(k) != 0 { - break - } - } - - require.Equal(t, c1().Key, k) -} - func TestMaxQPSTracker(t *testing.T) { defer leaktest.AfterTest(t)() @@ -392,3 +328,70 @@ func TestMaxQPSTracker(t *testing.T) { require.Equal(t, [6]float64{20, 27, 0, 0, 0, 0}, mt.windows) require.Equal(t, 1, mt.curIdx) } + +func TestDeciderMetrics(t *testing.T) { + defer leaktest.AfterTest(t)() + intn := rand.New(rand.NewSource(11)).Intn + timeStart := 1000 + + var dPopular Decider + Init(&dPopular, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) + + // No split key, popular key + for i := 0; i < 20; i++ { + dPopular.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))} + }) + } + for i := 1; i <= 2000; i++ { + dPopular.Record(context.Background(), ms(timeStart+i*50), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))} + }) + } + + assert.Equal(t, dPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(2)) + assert.Equal(t, dPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2)) + + // No split key, not popular key + var dNotPopular Decider + Init(&dNotPopular, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) + for i := 0; i < 20; i++ { + dNotPopular.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))} + }) + } + for i := 1; i <= 2000; i++ { + dNotPopular.Record(context.Background(), ms(timeStart+i*50), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(i))} + }) + } + + assert.Equal(t, dNotPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(0)) + assert.Equal(t, dNotPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2)) + + // No split key, all insufficient counters + var dAllInsufficientCounters Decider + Init(&dAllInsufficientCounters, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{ + PopularKeyCount: metric.NewCounter(metric.Metadata{}), + NoSplitKeyCount: metric.NewCounter(metric.Metadata{}), + }) + for i := 0; i < 20; i++ { + dAllInsufficientCounters.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))} + }) + } + for i := 1; i <= 80; i++ { + dAllInsufficientCounters.Record(context.Background(), ms(timeStart+i*1000), 1, func() roachpb.Span { + return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))} + }) + } + + assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.PopularKeyCount.Count(), int64(0)) + assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(0)) +} diff --git a/pkg/kv/kvserver/split/finder.go b/pkg/kv/kvserver/split/finder.go index 942216a36db6..e622e6a6e044 100644 --- a/pkg/kv/kvserver/split/finder.go +++ b/pkg/kv/kvserver/split/finder.go @@ -13,6 +13,7 @@ package split import ( "bytes" "math" + "sort" "time" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -149,3 +150,53 @@ func (f *Finder) Key() roachpb.Key { } return f.samples[bestIdx].key } + +// NoSplitKeyCause iterates over all sampled candidate split keys and +// determines the number of samples that don't pass each split key requirement +// (e.g. insufficient counters, imbalance in left and right counters, too many +// contained counters, or a combination of the last two). +func (f *Finder) NoSplitKeyCause() ( + insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained int, +) { + for _, s := range f.samples { + if s.left+s.right+s.contained < splitKeyMinCounter { + insufficientCounters++ + } else { + balanceScore := math.Abs(float64(s.left-s.right)) / float64(s.left+s.right) + imbalanceBool := balanceScore >= splitKeyThreshold + containedScore := float64(s.contained) / float64(s.left+s.right+s.contained) + tooManyContainedBool := containedScore >= splitKeyContainedThreshold + if imbalanceBool && !tooManyContainedBool { + imbalance++ + } else if !imbalanceBool && tooManyContainedBool { + tooManyContained++ + } else if imbalanceBool && tooManyContainedBool { + imbalanceAndTooManyContained++ + } + } + } + return +} + +// PopularKeyFrequency returns the percentage that the most popular key appears +// in f.samples. +func (f *Finder) PopularKeyFrequency() float64 { + sort.Slice(f.samples[:], func(i, j int) bool { + return bytes.Compare(f.samples[i].key, f.samples[j].key) < 0 + }) + + currentKeyCount := 1 + popularKeyCount := 1 + for i := 1; i < len(f.samples); i++ { + if bytes.Equal(f.samples[i].key, f.samples[i-1].key) { + currentKeyCount++ + } else { + currentKeyCount = 1 + } + if popularKeyCount < currentKeyCount { + popularKeyCount = currentKeyCount + } + } + + return float64(popularKeyCount) / float64(splitKeySampleSize) +} diff --git a/pkg/kv/kvserver/split/finder_test.go b/pkg/kv/kvserver/split/finder_test.go index 6f6783868e78..0afd1844fc4c 100644 --- a/pkg/kv/kvserver/split/finder_test.go +++ b/pkg/kv/kvserver/split/finder_test.go @@ -13,6 +13,7 @@ package split import ( "bytes" "context" + "math/rand" "reflect" "testing" @@ -21,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/assert" ) // TestSplitFinderKey verifies the Key() method correctly @@ -271,3 +273,129 @@ func TestSplitFinderRecorder(t *testing.T) { } } } + +func TestFinderNoSplitKeyCause(t *testing.T) { + samples := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + if i < 5 { + // insufficient counters + samples[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i)), + left: 0, + right: 0, + contained: splitKeyMinCounter - 1, + } + } else if i < 7 { + // imbalance + deviationLeft := rand.Intn(5) + deviationRight := rand.Intn(5) + samples[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i)), + left: 25 + deviationLeft, + right: 15 - deviationRight, + contained: int(max(float64(splitKeyMinCounter-40-deviationLeft+deviationRight), float64(40+deviationLeft-deviationRight))), + } + } else if i < 13 { + // imbalance + deviationLeft := rand.Intn(5) + deviationRight := rand.Intn(5) + samples[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i)), + left: 50 + deviationLeft, + right: 30 - deviationRight, + contained: int(max(float64(splitKeyMinCounter-80-deviationLeft+deviationRight), 0)), + } + } else { + // too many contained + contained := int(splitKeyMinCounter*splitKeyContainedThreshold + 1) + left := (splitKeyMinCounter - contained) / 2 + samples[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i)), + left: left, + right: splitKeyMinCounter - left - contained, + contained: contained, + } + } + } + + finder := NewFinder(timeutil.Now()) + finder.samples = samples + insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained := finder.NoSplitKeyCause() + assert.Equal(t, 5, insufficientCounters, "unexpected insufficient counters") + assert.Equal(t, 6, imbalance, "unexpected imbalance counters") + assert.Equal(t, 7, tooManyContained, "unexpected too many contained counters") + assert.Equal(t, 2, imbalanceAndTooManyContained, "unexpected imbalance and too many contained counters") +} + +func TestFinderPopularKeyFrequency(t *testing.T) { + uniqueKeySample := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + uniqueKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i)), + } + } + twentyPercentPopularKeySample := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + var tableID uint32 + if i <= 15 { + tableID = uint32(i / 3) + } else { + tableID = 6 + } + twentyPercentPopularKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(tableID), + } + } + twentyFivePercentPopularKeySample := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + var tableID uint32 + if i < 8 || i >= 13 { + tableID = uint32(i / 4) + } else { + tableID = 2 + } + twentyFivePercentPopularKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(tableID), + } + } + fiftyPercentPopularKeySample := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + fiftyPercentPopularKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(uint32(i / 10)), + } + } + fiftyFivePercentPopularKeySample := [splitKeySampleSize]sample{} + for i, idx := range rand.Perm(splitKeySampleSize) { + var tableID uint32 + if i >= 11 { + tableID = uint32(1) + } + fiftyFivePercentPopularKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(tableID), + } + } + sameKeySample := [splitKeySampleSize]sample{} + for _, idx := range rand.Perm(splitKeySampleSize) { + sameKeySample[idx] = sample{ + key: keys.SystemSQLCodec.TablePrefix(0), + } + } + + testCases := []struct { + samples [splitKeySampleSize]sample + expectedPopularKeyFrequency float64 + }{ + {uniqueKeySample, 0.05}, + {twentyPercentPopularKeySample, 0.2}, + {twentyFivePercentPopularKeySample, 0.25}, + {fiftyPercentPopularKeySample, 0.5}, + {fiftyFivePercentPopularKeySample, 0.55}, + {sameKeySample, 1}, + } + for i, test := range testCases { + finder := NewFinder(timeutil.Now()) + finder.samples = test.samples + popularKeyFrequency := finder.PopularKeyFrequency() + assert.Equal(t, test.expectedPopularKeyFrequency, popularKeyFrequency, "unexpected popular key frequency in test %d", i) + } +} diff --git a/pkg/kv/kvserver/split_queue.go b/pkg/kv/kvserver/split_queue.go index 514aaf98a48f..72c8e0508a2d 100644 --- a/pkg/kv/kvserver/split_queue.go +++ b/pkg/kv/kvserver/split_queue.go @@ -151,7 +151,7 @@ func (sq *splitQueue) shouldQueue( repl.GetMaxBytes(), repl.shouldBackpressureWrites(), confReader) if !shouldQ && repl.SplitByLoadEnabled() { - if splitKey := repl.loadBasedSplitter.MaybeSplitKey(timeutil.Now()); splitKey != nil { + if splitKey := repl.loadSplitKey(ctx, timeutil.Now()); splitKey != nil { shouldQ, priority = true, 1.0 // default priority } } @@ -209,6 +209,7 @@ func (sq *splitQueue) processAttempt( desc, false, /* delayable */ "span config", + false, /* findFirstSafeSplitKey */ ); err != nil { return false, errors.Wrapf(err, "unable to split %s at key %q", r, splitKey) } @@ -227,16 +228,17 @@ func (sq *splitQueue) processAttempt( desc, false, /* delayable */ fmt.Sprintf("%s above threshold size %s", humanizeutil.IBytes(size), humanizeutil.IBytes(maxBytes)), + false, /* findFirstSafeSplitKey */ ) return err == nil, err } now := timeutil.Now() - if splitByLoadKey := r.loadBasedSplitter.MaybeSplitKey(now); splitByLoadKey != nil { + if splitByLoadKey := r.loadSplitKey(ctx, now); splitByLoadKey != nil { batchHandledQPS, _ := r.QueriesPerSecond() raftAppliedQPS := r.WritesPerSecond() - splitQPS := r.loadBasedSplitter.LastQPS(now) + splitQPS := r.loadBasedSplitter.LastQPS(ctx, now) reason := fmt.Sprintf( "load at key %s (%.2f splitQPS, %.2f batches/sec, %.2f raft mutations/sec)", splitByLoadKey, @@ -256,6 +258,10 @@ func (sq *splitQueue) processAttempt( if expDelay := kvserverbase.SplitByLoadMergeDelay.Get(&sq.store.cfg.Settings.SV); expDelay > 0 { expTime = sq.store.Clock().Now().Add(expDelay.Nanoseconds(), 0) } + // The splitByLoadKey has no guarantee of being a safe key to split at (not + // between SQL rows). To sanitize the split point, pass + // findFirstSafeSplitKey set to true, so that the first key after the + // suggested split point which is safe to split at is used. if _, pErr := r.adminSplitWithDescriptor( ctx, roachpb.AdminSplitRequest{ @@ -268,6 +274,7 @@ func (sq *splitQueue) processAttempt( desc, false, /* delayable */ reason, + true, /* findFirstSafeSplitKey */ ); pErr != nil { return false, errors.Wrapf(pErr, "unable to split %s at key %q", r, splitByLoadKey) } diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 271155e9e06c..56c58806e3fa 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -137,6 +137,10 @@ type StoreTestingKnobs struct { DisableReplicaRebalancing bool // DisableLoadBasedSplitting turns off LBS so no splits happen because of load. DisableLoadBasedSplitting bool + // LoadBasedSplittingOverrideKey returns a key which should be used for load + // based splitting, overriding any value returned from the real load based + // splitter. + LoadBasedSplittingOverrideKey func(rangeID roachpb.RangeID) (splitKey roachpb.Key, useSplitKey bool) // DisableSplitQueue disables the split queue. DisableSplitQueue bool // DisableTimeSeriesMaintenanceQueue disables the time series maintenance diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 9ae85491a83b..f01d9a69540b 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5436,7 +5436,27 @@ func MVCCFindSplitKey( // was dangerous because partitioning can split off ranges that do not start // at valid row keys. The keys that are present in the range, by contrast, are // necessarily valid row keys. - it.SeekGE(MakeMVCCMetadataKey(key.AsRawKey())) + minSplitKey, err := mvccMinSplitKey(it, key.AsRawKey()) + if err != nil { + return nil, err + } else if minSplitKey == nil { + return nil, nil + } + + splitKey, err := it.FindSplitKey(key.AsRawKey(), endKey.AsRawKey(), minSplitKey, targetSize) + if err != nil { + return nil, err + } + // Ensure the key is a valid split point that does not fall in the middle of a + // SQL row by removing the column family ID, if any, from the end of the key. + return keys.EnsureSafeSplitKey(splitKey.Key) +} + +// mvccMinSplitKey returns the minimum key that a range may be split at. The +// caller is responsible for setting the iterator upper bound to the range end +// key. The caller is also responsible for closing the iterator. +func mvccMinSplitKey(it MVCCIterator, startKey roachpb.Key) (roachpb.Key, error) { + it.SeekGE(MakeMVCCMetadataKey(startKey)) if ok, err := it.Valid(); err != nil { return nil, err } else if !ok { @@ -5460,14 +5480,60 @@ func MVCCFindSplitKey( // Allow a split at any key that sorts after it. minSplitKey = it.Key().Key.Next() } + return minSplitKey, nil +} - splitKey, err := it.FindSplitKey(key.AsRawKey(), endKey.AsRawKey(), minSplitKey, targetSize) +// MVCCFirstSplitKey returns the first key which is safe to split at and no +// less than desiredSplitKey in the range which spans [startKey,endKey). If a +// non-nil key is returned, it is safe to split at. If a nil key is returned, no +// safe split key could be determined. The safe split key returned is +// guaranteed to be: +// +// 1. Within [startKey,endKey). +// 2. No less than desiredSplitKey. +// 3. Greater than the first key in [startKey,endKey]; or greater than all the +// first row's keys if a table range. . +// 4. Not in between the start and end of a row for table ranges. +// +// The returned split key is NOT guaranteed to be outside a no-split span, such +// as Meta2Max or Node Liveness. +func MVCCFirstSplitKey( + _ context.Context, reader Reader, desiredSplitKey, startKey, endKey roachpb.RKey, +) (roachpb.Key, error) { + // If the start key of the range is within the meta1 key space, the range + // cannot be split. + if startKey.Less(roachpb.RKey(keys.LocalMax)) { + return nil, nil + } + + it := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: endKey.AsRawKey()}) + defer it.Close() + + // If the caller has provided a desiredSplitKey less than the minimum split + // key, we update the desired split key to be the minimum split key. This + // prevents splitting before the first row in a Table range, which would + // result in the LHS having now rows. + minSplitKey, err := mvccMinSplitKey(it, startKey.AsRawKey()) if err != nil { return nil, err + } else if minSplitKey == nil { + return nil, nil } - // Ensure the key is a valid split point that does not fall in the middle of a - // SQL row by removing the column family ID, if any, from the end of the key. - return keys.EnsureSafeSplitKey(splitKey.Key) + var seekKey roachpb.Key + if minSplitKey.Compare(desiredSplitKey.AsRawKey()) > 0 { + seekKey = minSplitKey + } else { + seekKey = desiredSplitKey.AsRawKey() + } + + it.SeekGE(MakeMVCCMetadataKey(seekKey)) + if ok, err := it.Valid(); err != nil { + return nil, err + } else if !ok { + return nil, nil + } + + return keys.EnsureSafeSplitKey(it.UnsafeKey().Key.Clone()) } // willOverflow returns true iff adding both inputs would under- or overflow diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 72475e3383b7..427a4cd0cb7a 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -4121,23 +4121,6 @@ func TestFindValidSplitKeys(t *testing.T) { defer log.Scope(t).Close(t) userID := TestingUserDescID(0) - // Manually creates rows corresponding to the schema: - // CREATE TABLE t (id1 STRING, id2 STRING, ... PRIMARY KEY (id1, id2, ...)) - addTablePrefix := func(prefix roachpb.Key, id uint32, rowVals ...string) roachpb.Key { - tableKey := append(prefix, keys.SystemSQLCodec.TablePrefix(id)...) - rowKey := roachpb.Key(encoding.EncodeVarintAscending(tableKey, 1)) - for _, rowVal := range rowVals { - rowKey = encoding.EncodeStringAscending(rowKey, rowVal) - } - return rowKey - } - tablePrefix := func(id uint32, rowVals ...string) roachpb.Key { - return addTablePrefix(nil, id, rowVals...) - } - addColFam := func(rowKey roachpb.Key, colFam uint32) roachpb.Key { - return keys.MakeFamilyKey(append([]byte(nil), rowKey...), colFam) - } - type testCase struct { keys []roachpb.Key rangeStart roachpb.Key // optional @@ -4276,16 +4259,16 @@ func TestFindValidSplitKeys(t *testing.T) { // or return the start key of the range. { keys: []roachpb.Key{ - addColFam(tablePrefix(userID, "a"), 1), - addColFam(tablePrefix(userID, "a"), 2), - addColFam(tablePrefix(userID, "a"), 3), - addColFam(tablePrefix(userID, "a"), 4), - addColFam(tablePrefix(userID, "a"), 5), - addColFam(tablePrefix(userID, "b"), 1), - addColFam(tablePrefix(userID, "c"), 1), + testAddColFam(testTablePrefix(userID, "a"), 1), + testAddColFam(testTablePrefix(userID, "a"), 2), + testAddColFam(testTablePrefix(userID, "a"), 3), + testAddColFam(testTablePrefix(userID, "a"), 4), + testAddColFam(testTablePrefix(userID, "a"), 5), + testAddColFam(testTablePrefix(userID, "b"), 1), + testAddColFam(testTablePrefix(userID, "c"), 1), }, - rangeStart: tablePrefix(userID, "a"), - expSplit: tablePrefix(userID, "b"), + rangeStart: testTablePrefix(userID, "a"), + expSplit: testTablePrefix(userID, "b"), expError: false, }, // More example table data. Make sure ranges at the start of a table can @@ -4293,58 +4276,58 @@ func TestFindValidSplitKeys(t *testing.T) { // break for such ranges. { keys: []roachpb.Key{ - addColFam(tablePrefix(userID, "a"), 1), - addColFam(tablePrefix(userID, "b"), 1), - addColFam(tablePrefix(userID, "c"), 1), - addColFam(tablePrefix(userID, "d"), 1), + testAddColFam(testTablePrefix(userID, "a"), 1), + testAddColFam(testTablePrefix(userID, "b"), 1), + testAddColFam(testTablePrefix(userID, "c"), 1), + testAddColFam(testTablePrefix(userID, "d"), 1), }, rangeStart: keys.SystemSQLCodec.TablePrefix(userID), - expSplit: tablePrefix(userID, "c"), + expSplit: testTablePrefix(userID, "c"), expError: false, }, // More example table data. Make sure ranges at the start of a table can // be split properly even in the presence of a large first row. { keys: []roachpb.Key{ - addColFam(tablePrefix(userID, "a"), 1), - addColFam(tablePrefix(userID, "a"), 2), - addColFam(tablePrefix(userID, "a"), 3), - addColFam(tablePrefix(userID, "a"), 4), - addColFam(tablePrefix(userID, "a"), 5), - addColFam(tablePrefix(userID, "b"), 1), - addColFam(tablePrefix(userID, "c"), 1), + testAddColFam(testTablePrefix(userID, "a"), 1), + testAddColFam(testTablePrefix(userID, "a"), 2), + testAddColFam(testTablePrefix(userID, "a"), 3), + testAddColFam(testTablePrefix(userID, "a"), 4), + testAddColFam(testTablePrefix(userID, "a"), 5), + testAddColFam(testTablePrefix(userID, "b"), 1), + testAddColFam(testTablePrefix(userID, "c"), 1), }, rangeStart: keys.SystemSQLCodec.TablePrefix(TestingUserDescID(0)), - expSplit: tablePrefix(userID, "b"), + expSplit: testTablePrefix(userID, "b"), expError: false, }, // One partition where partition key is the first column. Checks that // split logic is not confused by the special partition start key. { keys: []roachpb.Key{ - addColFam(tablePrefix(userID, "a", "a"), 1), - addColFam(tablePrefix(userID, "a", "b"), 1), - addColFam(tablePrefix(userID, "a", "c"), 1), - addColFam(tablePrefix(userID, "a", "d"), 1), + testAddColFam(testTablePrefix(userID, "a", "a"), 1), + testAddColFam(testTablePrefix(userID, "a", "b"), 1), + testAddColFam(testTablePrefix(userID, "a", "c"), 1), + testAddColFam(testTablePrefix(userID, "a", "d"), 1), }, - rangeStart: tablePrefix(userID, "a"), - expSplit: tablePrefix(userID, "a", "c"), + rangeStart: testTablePrefix(userID, "a"), + expSplit: testTablePrefix(userID, "a", "c"), expError: false, }, // One partition with a large first row. Checks that our logic to avoid // splitting in the middle of a row still applies. { keys: []roachpb.Key{ - addColFam(tablePrefix(userID, "a", "a"), 1), - addColFam(tablePrefix(userID, "a", "a"), 2), - addColFam(tablePrefix(userID, "a", "a"), 3), - addColFam(tablePrefix(userID, "a", "a"), 4), - addColFam(tablePrefix(userID, "a", "a"), 5), - addColFam(tablePrefix(userID, "a", "b"), 1), - addColFam(tablePrefix(userID, "a", "c"), 1), + testAddColFam(testTablePrefix(userID, "a", "a"), 1), + testAddColFam(testTablePrefix(userID, "a", "a"), 2), + testAddColFam(testTablePrefix(userID, "a", "a"), 3), + testAddColFam(testTablePrefix(userID, "a", "a"), 4), + testAddColFam(testTablePrefix(userID, "a", "a"), 5), + testAddColFam(testTablePrefix(userID, "a", "b"), 1), + testAddColFam(testTablePrefix(userID, "a", "c"), 1), }, - rangeStart: tablePrefix(userID, "a"), - expSplit: tablePrefix(userID, "a", "b"), + rangeStart: testTablePrefix(userID, "a"), + expSplit: testTablePrefix(userID, "a", "b"), expError: false, }, } @@ -4366,17 +4349,7 @@ func TestFindValidSplitKeys(t *testing.T) { defer engine.Close() ms := &enginepb.MVCCStats{} - val := roachpb.MakeValueFromString(strings.Repeat("X", 10)) - for _, k := range test.keys { - // Add three MVCC versions of every key. Splits are not allowed - // between MVCC versions, so this shouldn't have any effect. - for j := 1; j <= 3; j++ { - ts := hlc.Timestamp{Logical: int32(j)} - if err := MVCCPut(ctx, engine, ms, []byte(k), ts, hlc.ClockTimestamp{}, val, nil); err != nil { - t.Fatal(err) - } - } - } + testPopulateKeysWithVersions(ctx, t, engine, ms, test.keys) rangeStart := test.keys[0] if len(test.rangeStart) > 0 { rangeStart = test.rangeStart @@ -4487,6 +4460,223 @@ func TestFindBalancedSplitKeys(t *testing.T) { } } +// testAddPrefix manually creates rows corresponding to the schema e.g. +// CREATE TABLE t (id1 STRING, id2 STRING, ... PRIMARY KEY (id1, id2, ...)) +func testAddPrefix(prefix roachpb.Key, id uint32, rowVals ...string) roachpb.Key { + tableKey := append(prefix, keys.SystemSQLCodec.TablePrefix(id)...) + rowKey := roachpb.Key(encoding.EncodeVarintAscending(tableKey, 1)) + for _, rowVal := range rowVals { + rowKey = encoding.EncodeStringAscending(rowKey, rowVal) + } + return rowKey +} + +func testTablePrefix(id uint32, rowVals ...string) roachpb.Key { + return testAddPrefix(nil, id, rowVals...) +} +func testAddColFam(rowKey roachpb.Key, colFam uint32) roachpb.Key { + return keys.MakeFamilyKey(append([]byte(nil), rowKey...), colFam) +} + +// testPopulateKeysWithVersions puts the keys into the engine provided. Each +// key is added with 3 MVCC versions with a XX.. value. +func testPopulateKeysWithVersions( + ctx context.Context, t *testing.T, engine Engine, ms *enginepb.MVCCStats, keys []roachpb.Key, +) { + val := roachpb.MakeValueFromString(strings.Repeat("X", 10)) + for _, k := range keys { + // Add three MVCC versions of every key. Splits are not allowed + // between MVCC versions, so this shouldn't have any effect. + for j := 1; j <= 3; j++ { + ts := hlc.Timestamp{Logical: int32(j)} + require.NoError( + t, + MVCCPut(ctx, engine, ms, []byte(k), ts, hlc.ClockTimestamp{}, val, nil), + ) + } + } +} + +// TestMVCCFirstSplitKey checks that the split key returned from +// MVCCFirstSplitKey is: +// (1) Within a range's bounds +// (2) No less than the desired split key. +// (3) Greater than the first key, or first row's keys in table ranges. +// (4) Not inbetween the start and end of a row for table ranges. +func TestMVCCFirstSplitKey(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + userID := TestingUserDescID(0) + + type splitExpect struct { + desired roachpb.Key + expected roachpb.Key + } + + testCases := []struct { + keys []roachpb.Key + startKey, endKey roachpb.Key + splits []splitExpect + }{ + { + // No keys, no splits. + keys: []roachpb.Key{}, + startKey: roachpb.Key("a"), + endKey: roachpb.Key("z"), + splits: []splitExpect{ + {desired: roachpb.Key("a"), expected: nil}, + {desired: roachpb.Key("m"), expected: nil}, + {desired: roachpb.Key("z"), expected: nil}, + }, + }, + { + // meta1 cannot be split. Previously, this test would cause a panic in + // mvccMinSplitKey, called by MVCCFirstSplitKey. The iterator is + // initialized with a global key constraint from the endKey + // ("\x02\xff\xff"), but we seekGE the start key (MinKey="") which is + // local because it is before LocalMax (0x02). + keys: []roachpb.Key{ + roachpb.Key("\x02"), + roachpb.Key("\x02\x00"), + roachpb.Key("\x02\xff"), + }, + startKey: keys.MinKey, + endKey: keys.Meta1KeyMax, + splits: []splitExpect{ + {desired: keys.MinKey, expected: nil}, + {desired: roachpb.Key("\x02"), expected: nil}, + {desired: roachpb.Key("\x02\x00"), expected: nil}, + }, + }, + { + // All keys are outside the range, no keys to spit at so expect no + // splits. + keys: []roachpb.Key{ + roachpb.Key("0"), + roachpb.Key("c"), + roachpb.Key("d"), + }, + startKey: roachpb.Key("a"), + endKey: roachpb.Key("c"), + splits: []splitExpect{ + {desired: roachpb.Key("a"), expected: nil}, + {desired: roachpb.Key("b"), expected: nil}, + {desired: roachpb.Key("c"), expected: nil}, + }, + }, + { + // Only one key within the range, require at least two keys to split. + keys: []roachpb.Key{ + // (0) is outside the range [a,c) + roachpb.Key("0"), + roachpb.Key("a"), + // (c) is outside of the range [a,c). + roachpb.Key("c"), + }, + startKey: roachpb.Key("a"), + endKey: roachpb.Key("c"), + splits: []splitExpect{ + {desired: roachpb.Key("a"), expected: nil}, + {desired: roachpb.Key("b"), expected: nil}, + {desired: roachpb.Key("c"), expected: nil}, + }, + }, + { + // Enough keys to realize a split on c. Only desiredSplitKeys <= c should + // split at c. + keys: []roachpb.Key{ + // (0) is outside the range [a,e) + roachpb.Key("0"), + roachpb.Key("b"), + roachpb.Key("c"), + // (e) is outside of the range [a,e). + roachpb.Key("e"), + }, + startKey: roachpb.Key("a"), + endKey: roachpb.Key("e"), + splits: []splitExpect{ + // Should iterate to the first split key after minSpitKey which is (c). + {desired: roachpb.Key("0"), expected: roachpb.Key("c")}, + {desired: roachpb.Key("b"), expected: roachpb.Key("c")}, + {desired: roachpb.Key("c"), expected: roachpb.Key("c")}, + {desired: keys.MinKey, expected: roachpb.Key("c")}, + // Desired split key is after the last key in the range (c), shouldn't + // split. + {desired: roachpb.Key("d"), expected: nil}, + }, + }, + { + keys: []roachpb.Key{ + testAddColFam(testTablePrefix(userID, "a"), 1), + testAddColFam(testTablePrefix(userID, "b"), 1), + testAddColFam(testTablePrefix(userID, "b"), 2), + testAddColFam(testTablePrefix(userID, "b"), 3), + testAddColFam(testTablePrefix(userID, "d"), 1), + // (e,1) is outside of the range [a,e) + testAddColFam(testTablePrefix(userID, "e"), 1), + }, + startKey: testTablePrefix(userID, "a"), + endKey: testTablePrefix(userID, "e"), + splits: []splitExpect{ + {desired: testAddColFam(testTablePrefix(userID, "a"), 0), expected: testTablePrefix(userID, "b")}, + {desired: testAddColFam(testTablePrefix(userID, "b"), 3), expected: testTablePrefix(userID, "b")}, + // The first key after the desired split key is (d,1), expect a split + // at the prefix (d). + {desired: testAddColFam(testTablePrefix(userID, "b"), 4), expected: testTablePrefix(userID, "d")}, + // Desired split key is after the last key in the range (d,1), + // shouldn't split. + {desired: testAddColFam(testTablePrefix(userID, "d"), 2), expected: nil}, + }, + }, + { + // One partiton key, where the partition key is the first column (a). + keys: []roachpb.Key{ + testAddColFam(testTablePrefix(userID, "a", "a"), 1), + testAddColFam(testTablePrefix(userID, "a", "a"), 3), + testAddColFam(testTablePrefix(userID, "a", "b"), 1), + testAddColFam(testTablePrefix(userID, "a", "c"), 1), + // (a,d,0) is outside the range [a,(a,d)). + testAddColFam(testTablePrefix(userID, "a", "d"), 0), + }, + startKey: testTablePrefix(userID, "a"), + endKey: testTablePrefix(userID, "a", "d"), + splits: []splitExpect{ + {desired: testTablePrefix(userID, "a"), expected: testTablePrefix(userID, "a", "b")}, + {desired: testAddColFam(testTablePrefix(userID, "a", "a"), 3), expected: testTablePrefix(userID, "a", "b")}, + {desired: testAddColFam(testTablePrefix(userID, "a", "b"), 2), expected: testTablePrefix(userID, "a", "c")}, + // Desired split key is after the last key in the range (a,c,1), + // shouldn't split. + {desired: testAddColFam(testTablePrefix(userID, "a", "c"), 2), expected: nil}, + {desired: testTablePrefix(userID, "a", "e"), expected: nil}, + }, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%v", tc.keys), + func(t *testing.T) { + ctx := context.Background() + engine := NewDefaultInMemForTesting() + defer engine.Close() + + testPopulateKeysWithVersions(ctx, t, engine, &enginepb.MVCCStats{}, tc.keys) + rangeStartAddr := keys.MustAddr(tc.startKey) + rangeEndAddr := keys.MustAddr(tc.endKey) + for _, split := range tc.splits { + t.Run(fmt.Sprintf("%v", split.desired), func(t *testing.T) { + desiredSplitAddr := keys.MustAddr(split.desired) + splitKey, err := MVCCFirstSplitKey(ctx, engine, desiredSplitAddr, rangeStartAddr, rangeEndAddr) + // NB: We don't expect errors. If no split key can be found, we + // expect a nil splitKey to be returned. + require.NoError(t, err) + require.Equal(t, split.expected, splitKey) + }) + } + }) + } +} + // TestMVCCGarbageCollect writes a series of gc'able bytes and then // sends an MVCC GC request and verifies cleared values and updated // stats. diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index c91f7b93110a..62d38fde0654 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -741,6 +741,18 @@ var charts = []sectionDescription{ }, }, }, + { + Organization: [][]string{{DistributionLayer, "Load", "Splitter"}}, + Charts: []chartDescription{ + { + Title: "Load Splitter", + Metrics: []string{ + "kv.loadsplitter.popularkey", + "kv.loadsplitter.nosplitkey", + }, + }, + }, + }, { Organization: [][]string{ {DistributionLayer, "Split Queue"},