split: only retain safe sql key samples

Previously, the weighted load split finder could retain samples for any key that was a start key of a request span. This was problematic as the split key which is used, always has user column suffixes removed by calling `keys.EnsureSafeSplitKey` - meaning that it was possible for every sampled key to refer to the same key after this call. We could convert every incoming key into the safe split key, however this doesn't seem like a worthwhile trade off the additional overhead - as every request touches this hot path. This patch updates the sampling logic to always store the safe split key when a sampled key is replaced or added. The samples may still contain only a single key, however now it is much more likely that we will log due imbalance (all requests to the RHS) and bump the split finder no split key metrics. Release note: None
kvoli · Apr 27, 2023 · e4f003b · e4f003b
1 parent b959020
commit e4f003b
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 0 deletions.
diff --git a/pkg/kv/kvserver/split/weighted_finder.go b/pkg/kv/kvserver/split/weighted_finder.go
@@ -16,6 +16,7 @@ import (
 	"sort"
 	"time"
 
+	"github.com/cockroachdb/cockroach/pkg/keys"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
 )
 
@@ -123,6 +124,23 @@ func (f *WeightedFinder) record(key roachpb.Key, weight float64) {
 		idx = f.randSource.Intn(splitKeySampleSize)
 	}
 
+	// We only wish to retain safe split keys as samples, as they are the split
+	// keys that will eventually be returned from Key(). If instead we kept every
+	// key, it is possible for all sample keys to map to the same split key
+	// implicitly with column families. Note this doesn't stop every sample being
+	// the same key, however it will cause no split key logging and bump metrics.
+	// TODO(kvoli): When the single key situation arises, we should backoff
+	// attempting to split. There is a fixed overhead on the hotpath when the
+	// finder is active.
+	if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil {
+		key = safeKey
+	} else {
+		// If the key is not a safe split key, instead ignore it and don't bump any
+		// counters. This biases the algorithm slightly, as keys which would be
+		// invalid are not sampled, nor their impact recorded if they reach here.
+		return
+	}
+
 	// Note we always use the start key of the span. We could
 	// take the average of the byte slices, but that seems
 	// unnecessarily complex for practical usage.

diff --git a/pkg/kv/kvserver/split/weighted_finder_test.go b/pkg/kv/kvserver/split/weighted_finder_test.go
@@ -191,6 +191,17 @@ func TestSplitWeightedFinderRecorder(t *testing.T) {
 	stopper := stop.NewStopper()
 	defer stopper.Stop(context.Background())
 
+	colKey := func(prefix roachpb.Key) roachpb.Key {
+		return keys.MakeFamilyKey(prefix, 9)
+	}
+
+	colFamSpan := func(span roachpb.Span) roachpb.Span {
+		return roachpb.Span{
+			Key:    colKey(span.Key),
+			EndKey: colKey(span.EndKey),
+		}
+	}
+
 	const ReservoirKeyOffset = 1000
 
 	// Test recording a key query before the reservoir is full.
@@ -284,12 +295,16 @@ func TestSplitWeightedFinderRecorder(t *testing.T) {
 	}{
 		// Test recording a key query before the reservoir is full.
 		{basicSpan, basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir},
+		{colFamSpan(basicSpan), basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir},
 		// Test recording a key query after the reservoir is full with replacement.
 		{replacementSpan, replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir},
+		{colFamSpan(replacementSpan), replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir},
 		// Test recording a key query after the reservoir is full without replacement.
 		{fullSpan, fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir},
+		{colFamSpan(fullSpan), fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir},
 		// Test recording a spanning query.
 		{spanningSpan, spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir},
+		{colFamSpan(spanningSpan), spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir},
 	}
 
 	for i, test := range testCases {