diff --git a/pkg/kv/kvserver/split/weighted_finder.go b/pkg/kv/kvserver/split/weighted_finder.go index 773c548e613f..a1a66e354a39 100644 --- a/pkg/kv/kvserver/split/weighted_finder.go +++ b/pkg/kv/kvserver/split/weighted_finder.go @@ -16,6 +16,7 @@ import ( "sort" "time" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" ) @@ -123,6 +124,23 @@ func (f *WeightedFinder) record(key roachpb.Key, weight float64) { idx = f.randSource.Intn(splitKeySampleSize) } + // We only wish to retain safe split keys as samples, as they are the split + // keys that will eventually be returned from Key(). If instead we kept every + // key, it is possible for all sample keys to map to the same split key + // implicitly with column families. Note this doesn't stop every sample being + // the same key, however it will cause no split key logging and bump metrics. + // TODO(kvoli): When the single key situation arises, we should backoff + // attempting to split. There is a fixed overhead on the hotpath when the + // finder is active. + if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil { + key = safeKey + } else { + // If the key is not a safe split key, instead ignore it and don't bump any + // counters. This biases the algorithm slightly, as keys which would be + // invalid are not sampled, nor their impact recorded if they reach here. + return + } + // Note we always use the start key of the span. We could // take the average of the byte slices, but that seems // unnecessarily complex for practical usage. diff --git a/pkg/kv/kvserver/split/weighted_finder_test.go b/pkg/kv/kvserver/split/weighted_finder_test.go index 6db32a6bae3f..5a86b79c052d 100644 --- a/pkg/kv/kvserver/split/weighted_finder_test.go +++ b/pkg/kv/kvserver/split/weighted_finder_test.go @@ -191,6 +191,17 @@ func TestSplitWeightedFinderRecorder(t *testing.T) { stopper := stop.NewStopper() defer stopper.Stop(context.Background()) + colKey := func(prefix roachpb.Key) roachpb.Key { + return keys.MakeFamilyKey(prefix, 9) + } + + colFamSpan := func(span roachpb.Span) roachpb.Span { + return roachpb.Span{ + Key: colKey(span.Key), + EndKey: colKey(span.EndKey), + } + } + const ReservoirKeyOffset = 1000 // Test recording a key query before the reservoir is full. @@ -284,12 +295,16 @@ func TestSplitWeightedFinderRecorder(t *testing.T) { }{ // Test recording a key query before the reservoir is full. {basicSpan, basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir}, + {colFamSpan(basicSpan), basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir}, // Test recording a key query after the reservoir is full with replacement. {replacementSpan, replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir}, + {colFamSpan(replacementSpan), replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir}, // Test recording a key query after the reservoir is full without replacement. {fullSpan, fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir}, + {colFamSpan(fullSpan), fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir}, // Test recording a spanning query. {spanningSpan, spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir}, + {colFamSpan(spanningSpan), spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir}, } for i, test := range testCases {