Skip to content

Commit

Permalink
split: only retain safe sql key samples
Browse files Browse the repository at this point in the history
Previously, the weighted load split finder could retain samples for any
key that was a start key of a request span.

This was problematic as the split key which is used, always has user
column suffixes removed by calling `keys.EnsureSafeSplitKey` - meaning
that it was possible for every sampled key to refer to the same key
after this call.

We could convert every incoming key into the safe split key, however
this doesn't seem like a worthwhile trade off the additional overhead -
as every request touches this hot path.

This patch updates the sampling logic to always store the safe split
key when a sampled key is replaced or added.

The samples may still contain only a single key, however now it is much
more likely that we will log due imbalance (all requests to the RHS) and
bump the split finder no split key metrics.

Release note: None
  • Loading branch information
kvoli committed Apr 27, 2023
1 parent b959020 commit e4f003b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
18 changes: 18 additions & 0 deletions pkg/kv/kvserver/split/weighted_finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"sort"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
)

Expand Down Expand Up @@ -123,6 +124,23 @@ func (f *WeightedFinder) record(key roachpb.Key, weight float64) {
idx = f.randSource.Intn(splitKeySampleSize)
}

// We only wish to retain safe split keys as samples, as they are the split
// keys that will eventually be returned from Key(). If instead we kept every
// key, it is possible for all sample keys to map to the same split key
// implicitly with column families. Note this doesn't stop every sample being
// the same key, however it will cause no split key logging and bump metrics.
// TODO(kvoli): When the single key situation arises, we should backoff
// attempting to split. There is a fixed overhead on the hotpath when the
// finder is active.
if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil {
key = safeKey
} else {
// If the key is not a safe split key, instead ignore it and don't bump any
// counters. This biases the algorithm slightly, as keys which would be
// invalid are not sampled, nor their impact recorded if they reach here.
return
}

// Note we always use the start key of the span. We could
// take the average of the byte slices, but that seems
// unnecessarily complex for practical usage.
Expand Down
15 changes: 15 additions & 0 deletions pkg/kv/kvserver/split/weighted_finder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ func TestSplitWeightedFinderRecorder(t *testing.T) {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())

colKey := func(prefix roachpb.Key) roachpb.Key {
return keys.MakeFamilyKey(prefix, 9)
}

colFamSpan := func(span roachpb.Span) roachpb.Span {
return roachpb.Span{
Key: colKey(span.Key),
EndKey: colKey(span.EndKey),
}
}

const ReservoirKeyOffset = 1000

// Test recording a key query before the reservoir is full.
Expand Down Expand Up @@ -284,12 +295,16 @@ func TestSplitWeightedFinderRecorder(t *testing.T) {
}{
// Test recording a key query before the reservoir is full.
{basicSpan, basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir},
{colFamSpan(basicSpan), basicWeight, WFLargestRandSource{}, 0, basicReservoir, expectedBasicReservoir},
// Test recording a key query after the reservoir is full with replacement.
{replacementSpan, replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir},
{colFamSpan(replacementSpan), replacementWeight, ZeroRandSource{}, splitKeySampleSize + 1, replacementReservoir, expectedReplacementReservoir},
// Test recording a key query after the reservoir is full without replacement.
{fullSpan, fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir},
{colFamSpan(fullSpan), fullWeight, WFLargestRandSource{}, splitKeySampleSize + 1, fullReservoir, expectedFullReservoir},
// Test recording a spanning query.
{spanningSpan, spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir},
{colFamSpan(spanningSpan), spanningWeight, WFLargestRandSource{}, splitKeySampleSize + 1, spanningReservoir, expectedSpanningReservoir},
}

for i, test := range testCases {
Expand Down

0 comments on commit e4f003b

Please sign in to comment.