Skip to content

Commit

Permalink
split: add string methods for debugging/logging
Browse files Browse the repository at this point in the history
Previously, there was no way to peak the contents of the load based
splitter samples when inspecting nodes. This commit adds string methods
for the `UnweightedFinder`, `WeightedFinder` and `Decider`.

This commit also swaps the order of the should split check to avoid
computation. As a result the output of `cpu_decider_cartesian` changed
slightly as the no split key logging message is now ordered differently.

Informs: #103672
Informs: #103483

Release note: None
  • Loading branch information
kvoli committed Jun 5, 2023
1 parent 6b81f53 commit 7887654
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 35 deletions.
7 changes: 4 additions & 3 deletions pkg/cmd/roachtest/tests/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s
c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(4))
startOpts := option.DefaultStartOptsNoBackups()
startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs,
"--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2",
"--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2,"+
"decider=3,replica_split_load=1",
)
c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All())

Expand All @@ -424,7 +425,7 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s
// Set the objective to QPS or CPU and update the load split threshold
// appropriately.
if params.qpsThreshold > 0 {
t.Status("setting split objective to QPS with threshold %d", params.qpsThreshold)
t.Status("setting split objective to QPS with threshold ", params.qpsThreshold)
if err := setLoadBasedRebalancingObjective(ctx, db, "qps"); err != nil {
return err
}
Expand All @@ -433,7 +434,7 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s
return err
}
} else if params.cpuThreshold > 0 {
t.Status("setting split objective to CPU with threshold %s", params.cpuThreshold)
t.Status("setting split objective to CPU with threshold ", params.cpuThreshold)
if err := setLoadBasedRebalancingObjective(ctx, db, "cpu"); err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/split/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ go_library(
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/syncutil",
"@com_github_cockroachdb_redact//:redact",
],
)

Expand Down
52 changes: 40 additions & 12 deletions pkg/kv/kvserver/split/decider.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ package split

import (
"context"
"fmt"
"math/rand"
"time"

Expand All @@ -23,13 +22,15 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/redact"
)

const minSplitSuggestionInterval = time.Minute
const minNoSplitKeyLoggingMetricsInterval = time.Minute
const minPerSecondSampleDuration = time.Second

type LoadBasedSplitter interface {
redact.SafeFormatter
// Record informs the LoadBasedSplitter about where the span lies with regard
// to the keys in the samples.
Record(span roachpb.Span, weight float64)
Expand All @@ -46,11 +47,14 @@ type LoadBasedSplitter interface {
// number of samples that don't pass each split key requirement if not all
// samples are invalid due to insufficient counters, otherwise returns an
// empty string.
NoSplitKeyCauseLogMsg() string
NoSplitKeyCauseLogMsg() redact.RedactableString

// PopularKeyFrequency returns the percentage that the most popular key
// appears in the sampled candidate split keys.
PopularKeyFrequency() float64

// String formats the state of the load based splitter.
String() string
}

type LoadSplitConfig interface {
Expand Down Expand Up @@ -154,6 +158,7 @@ type Decider struct {
// Fields tracking split key suggestions.
splitFinder LoadBasedSplitter // populated when engaged or decided
lastSplitSuggestion time.Time // last stipulation to client to carry out split
suggestionsMade int // suggestions made since last reset

// Fields tracking logging / metrics around load-based splitter split key.
lastNoSplitKeyLoggingMetrics time.Time
Expand All @@ -175,6 +180,23 @@ func Init(
lbs.mu.objective = objective
}

type lockedDecider Decider

func (ld *lockedDecider) SafeFormat(w redact.SafePrinter, r rune) {
w.Printf(
"objective=%v count=%d suggestions=%d last=%.1f last-roll=%v last-suggest=%v",
ld.mu.objective, ld.mu.count, ld.mu.suggestionsMade, ld.mu.lastStatVal,
ld.mu.lastStatRollover, ld.mu.lastSplitSuggestion,
)
if ld.mu.splitFinder != nil {
w.Printf(" %v", ld.mu.splitFinder)
}
}

func (ld *lockedDecider) String() string {
return redact.StringWithoutMarkers(ld)
}

// Record notifies the Decider that 'n' operations are being carried out which
// operate on the span returned by the supplied method. The closure will only
// be called when necessary, that is, when the Decider is considering a split
Expand Down Expand Up @@ -231,20 +253,25 @@ func (d *Decider) recordLocked(
if s.Key != nil {
d.mu.splitFinder.Record(span(), float64(n))
}
if d.mu.splitFinder.Ready(now) {
if d.mu.splitFinder.Key() != nil {
if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval {
d.mu.lastSplitSuggestion = now
return true
}
// We don't want to check for a split key if we don't need to as it
// requires some computation. When the splitFinder isn't ready or we
// recently suggested a split, skip the key check.
if d.mu.splitFinder.Ready(now) &&
now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval {
if splitKey := d.mu.splitFinder.Key(); splitKey != nil {
log.KvDistribution.VEventf(ctx, 3, "suggesting split key %v splitter_state=%v",
splitKey, (*lockedDecider)(d))
d.mu.lastSplitSuggestion = now
d.mu.suggestionsMade++
return true
} else {
if now.Sub(d.mu.lastNoSplitKeyLoggingMetrics) > minNoSplitKeyLoggingMetricsInterval {
d.mu.lastNoSplitKeyLoggingMetrics = now
noSplitKeyCauseLogMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg()
if noSplitKeyCauseLogMsg != "" {
if causeMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg(); causeMsg != "" {
popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency()
noSplitKeyCauseLogMsg += fmt.Sprintf(", most popular key occurs in %d%% of samples", int(popularKeyFrequency*100))
log.KvDistribution.Infof(ctx, "%s", noSplitKeyCauseLogMsg)
log.KvDistribution.Infof(ctx, "%s, most popular key occurs in %d%% of samples",
causeMsg, int(popularKeyFrequency*100))
log.KvDistribution.VInfof(ctx, 3, "splitter_state=%v", (*lockedDecider)(d))
if popularKeyFrequency >= splitKeyThreshold {
d.loadSplitterMetrics.PopularKeyCount.Inc(1)
}
Expand Down Expand Up @@ -349,6 +376,7 @@ func (d *Decider) resetLocked(now time.Time) {
d.mu.count = 0
d.mu.maxStat.reset(now, d.config.StatRetention())
d.mu.splitFinder = nil
d.mu.suggestionsMade = 0
d.mu.lastSplitSuggestion = time.Time{}
d.mu.lastNoSplitKeyLoggingMetrics = time.Time{}
}
Expand Down
39 changes: 35 additions & 4 deletions pkg/kv/kvserver/split/load_based_splitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"fmt"
"math"
"sort"
"strings"
"testing"
"text/tabwriter"
"time"
Expand Down Expand Up @@ -342,6 +343,7 @@ type lbsTestSettings struct {
finderConfig *finderConfig
seed uint64
iterations int
showLastState bool
}

func uint32ToKey(key uint32) roachpb.Key {
Expand Down Expand Up @@ -444,6 +446,7 @@ type repeatedResult struct {
avgOptimalPercentDifference, maxOptimalPercentDifference float64
avgRecordExecutionTime, avgKeyExecutionTime time.Duration
noKeyFoundPercent float64
lastStateString string
}

func resultTable(configs []multiReqConfig, rr []repeatedResult, showTiming bool) string {
Expand Down Expand Up @@ -506,7 +509,6 @@ func resultTable(configs []multiReqConfig, rr []repeatedResult, showTiming bool)
_ = w.Flush()

return buf.String()

}

func runTestRepeated(settings *lbsTestSettings) repeatedResult {
Expand All @@ -515,13 +517,15 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult {
avgOptimalPercentDifference, maxOptimalPercentDifference float64
avgRecordExecutionTime, avgKeyExecutionTime time.Duration
noKeyFoundPercent float64
lastStateString string
)
randSource := rand.New(rand.NewSource(settings.seed))
requestGen := settings.requestConfig.makeGenerator(randSource)

for i := 0; i < settings.iterations; i++ {
var recordFn func(span roachpb.Span, weight int)
var keyFn func() roachpb.Key
var stringFn func() string

if settings.deciderConfig != nil {
d := settings.deciderConfig.makeDecider(randSource)
Expand All @@ -540,6 +544,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult {
keyFn = func() roachpb.Key {
return d.MaybeSplitKey(ctx, now)
}
stringFn = (*lockedDecider)(d).String
} else {
f := settings.finderConfig.makeFinder(randSource)
recordFn = func(span roachpb.Span, weight int) {
Expand All @@ -548,6 +553,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult {
keyFn = func() roachpb.Key {
return f.Key()
}
stringFn = f.String
}
ret := runTest(recordFn, keyFn, randSource, requestGen)

Expand All @@ -571,6 +577,9 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult {
avgRecordExecutionTime += ret.recordExecutionTime
avgKeyExecutionTime += ret.keyExecutionTime

if i == settings.iterations-1 && settings.showLastState {
lastStateString = stringFn()
}
}

nRuns := settings.iterations
Expand All @@ -592,6 +601,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult {
avgRecordExecutionTime: avgRecordExecutionTime,
avgKeyExecutionTime: avgKeyExecutionTime,
noKeyFoundPercent: noKeyFoundPercent,
lastStateString: lastStateString,
}
}

Expand Down Expand Up @@ -821,7 +831,7 @@ func TestDataDriven(t *testing.T) {
findConfig = &finderConfig{
weighted: weighted,
}

decConfig = nil
case "decider":
var duration int
var threshold int
Expand All @@ -841,10 +851,11 @@ func TestDataDriven(t *testing.T) {
objective: splitObj,
duration: time.Duration(duration) * time.Second,
}
findConfig = nil
case "eval":
var seed uint64
var iterations, mixCount int
var showTiming, cartesian, all bool
var showTiming, cartesian, all, showLastState bool
var mix string
var mixT mixType

Expand All @@ -859,6 +870,9 @@ func TestDataDriven(t *testing.T) {
if d.HasArg("all") {
d.ScanArgs(t, "all", &all)
}
if d.HasArg("show_last") {
d.ScanArgs(t, "show_last", &showLastState)
}
if d.HasArg("mix") {
d.ScanArgs(t, "mix", &mix)
d.ScanArgs(t, "mix_count", &mixCount)
Expand Down Expand Up @@ -900,9 +914,26 @@ func TestDataDriven(t *testing.T) {
finderConfig: findConfig,
seed: seed,
iterations: iterations,
showLastState: showLastState,
})
}
return resultTable(evalRequestConfigs, repeatedResults, showTiming)
retTable := resultTable(evalRequestConfigs, repeatedResults, showTiming)
if showLastState {
showRequestConfgDesc := len(evalRequestConfigs) > 1
var buf strings.Builder
for i := range evalRequestConfigs {
if i > 0 {
buf.WriteString("\n")
}
if showRequestConfgDesc {
buf.WriteString(evalRequestConfigs[i].String())
}
fmt.Fprintf(&buf, "\t%s",
repeatedResults[i].lastStateString)
}
return fmt.Sprintf("%s%s", retTable, buf.String())
}
return retTable
default:
return fmt.Sprintf("unknown command: %s", d.Cmd)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/kv/kvserver/split/testdata/cpu_decider_cartesian
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ w=zip(10000)/k=zip(1000000)/s=zip(1000)/s(%)=95/10000
mixed_requests(2) 0.00 3.69 19.97 0.03 0.08
w=uni(100)/k=zip(10000)/s=zip(1000)/s(%)=20/10000
w=zip(10000)/k=zip(10000)/s=uni(1000)/s(%)=20/10000
mixed_requests(2) 0.00 6.36 15.63 0.01 0.02
mixed_requests(2) 0.00 6.64 21.33 0.01 0.02
w=zip(100)/k=uni(10000)/s=uni(1)/s(%)=0/10000
w=zip(100)/k=uni(10000)/s=zip(1000)/s(%)=95/10000
mixed_requests(2) 0.00 4.11 16.75 0.00 0.01
Expand Down Expand Up @@ -87,7 +87,7 @@ w=zip(100)/k=uni(1000000)/s=uni(1000)/s(%)=20/10000
mixed_requests(2) 0.00 6.29 24.99 0.00 0.02
w=uni(10000)/k=zip(10000)/s=uni(1)/s(%)=0/10000
w=zip(10000)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000
mixed_requests(2) 0.00 6.39 13.28 0.02 0.07
mixed_requests(2) 0.00 6.24 13.28 0.02 0.07
w=zip(100)/k=uni(1000000)/s=zip(1000)/s(%)=95/10000
w=zip(10000)/k=uni(10000)/s=zip(1000)/s(%)=20/10000
mixed_requests(2) 0.00 4.08 11.02 0.03 0.08
Expand All @@ -102,7 +102,7 @@ w=zip(100)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000
mixed_requests(2) 0.00 4.89 12.20 0.00 0.01
w=uni(100)/k=zip(1000000)/s=uni(1000)/s(%)=95/10000
w=uni(100)/k=uni(1000000)/s=uni(1000)/s(%)=20/10000
mixed_requests(2) 0.00 5.03 16.77 0.00 0.00
mixed_requests(2) 0.00 4.99 16.77 0.00 0.00
w=uni(10000)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000
w=uni(10000)/k=zip(1000000)/s=zip(1000)/s(%)=95/10000
mixed_requests(2) 0.00 5.07 15.26 0.02 0.06
Expand Down
49 changes: 49 additions & 0 deletions pkg/kv/kvserver/split/testdata/format
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
requests key_dist=zipfian key_max=10000 span_dist=uniform span_max=1000 weight_dist=uniform weight_max=10 range_request_percent=5 request_count=10000
----

requests key_dist=zipfian key_max=10000 span_dist=uniform span_max=1000 weight_dist=zipfian weight_max=1000 range_request_percent=50 request_count=10000
----

finder weighted=true
----

eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true
----
description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%)
mixed_requests(2) 0.00 2.35 2.35 0.00 0.00
w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000
w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000
key=/Table/223 start=2022-03-21 11:00:00 +0000 UTC count=25555 total=1419849.00 samples=[/Table/1710(l=12086.5 r=2801.5 c=2840 w=9.0) /Table/2(l=2045.0 r=18033.0 c=3856 w=8.0) /Table/13(l=282841.0 r=802019.0 c=22062 w=549.0) /Table/354(l=245816.0 r=201859.0 c=14956 w=225.0) /Table/514(l=559428.0 r=354804.0 c=20232 w=260.5) /Table/305(l=576211.5 r=511062.5 c=22091 w=226.0) /Table/950(l=262150.5 r=98121.5 c=14042 w=17.0) /Table/80(l=279974.5 r=416784.0 c=17812 w=330.0) /Table/463(l=381587.5 r=249268.0 c=17112 w=179.0) /Table/223(l=224001.5 r=226421.5 c=14987 w=557.0) /Table/6344(l=91668.5 r=6660.5 c=10958 w=613.0) /Table/1346(l=378507.0 r=106093.5 c=15360 w=949.0) /Table/2(l=29973.5 r=337055.5 c=14118 w=453.0) /Table/97(l=378809.5 r=533071.5 c=20187 w=360.0) /Table/46(l=81138.0 r=136007.5 c=12342 w=448.5) /Table/806(l=901622.5 r=409269.5 c=24587 w=206.5) /Table/1(l=0.0 r=90149.0 c=10869 w=80.0) /Table/1873(l=233979.5 r=54757.0 c=13156 w=317.0) /Table/4(l=47772.5 r=254390.0 c=13337 w=286.0) /Table/798(l=171737.5 r=74976.0 c=12665 w=919.0)]

finder weighted=false
----

eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true
----
description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%)
mixed_requests(2) 0.00 10.26 10.26 0.00 0.00
w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000
w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000
key=/Table/150 start=2022-03-21 11:00:00 +0000 UTC count=20000 samples=[/Table/1(l=0 r=10765 c=0) /Table/6471(l=11888 r=569 c=19) /Table/445(l=9595 r=5229 c=1548) /Table/2(l=994 r=9976 c=106) /Table/208(l=933 r=674 c=42) /Table/3(l=2033 r=14906 c=641) /Table/59(l=6677 r=9756 c=2234) /Table/1(l=0 r=8197 c=0) /Table/5(l=1228 r=4715 c=54) /Table/4166(l=15285 r=1574 c=47) /Table/41(l=1160 r=1583 c=66) /Table/6012(l=7018 r=394 c=4) /Table/191(l=5414 r=3893 c=238) /Table/351(l=1441 r=788 c=49) /Table/150(l=3610 r=3009 c=172) /Table/9(l=1565 r=4339 c=73) /Table/1(l=0 r=11452 c=0) /Table/2(l=1380 r=16499 c=475) /Table/8882(l=10942 r=142 c=10) /Table/650(l=1009 r=382 c=22)]

decider duration=100 retention=200 objective=cpu threshold=100
----

eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true
----
description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%)
mixed_requests(2) 0.00 9.55 9.55 0.02 0.02
w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000
w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000
objective=cpu count=0 suggestions=2 last=1095.0 last-roll=2022-03-21 11:01:40 +0000 UTC last-suggest=2022-03-21 11:01:11.01 +0000 UTC key=/Table/152 start=2022-03-21 11:00:01 +0000 UTC count=25155 total=1393083.00 samples=[/Table/814(l=9562.0 r=3339.0 c=2434 w=5.0) /Table/46(l=7820.5 r=10403.5 c=3450 w=8.0) /Table/3193(l=1149400.5 r=169368.0 c=24541 w=444.0) /Table/6(l=213149.0 r=980453.5 c=23200 w=423.5) /Table/648(l=391124.0 r=208895.0 c=16669 w=98.0) /Table/1(l=0.0 r=1241723.0 c=23711 w=77.0) /Table/824(l=227827.0 r=102958.0 c=13644 w=9.0) /Table/68(l=507405.0 r=814941.0 c=24564 w=140.0) /Table/5224(l=53022.0 r=3727.0 c=10461 w=351.0) /Table/781(l=608857.5 r=274689.0 c=19721 w=241.0) /Table/718(l=753749.0 r=380244.5 c=22412 w=426.0) /Table/975(l=941479.0 r=349419.5 c=24188 w=214.5) /Table/3310(l=1162395.0 r=166671.0 c=24643 w=474.0) /Table/1138(l=677104.0 r=211530.5 c=19793 w=71.0) /Table/1366(l=147664.0 r=41503.0 c=11938 w=336.0) /Table/52(l=229715.5 r=395288.0 c=16949 w=954.0) /Table/6(l=138512.5 r=631612.0 c=18555 w=270.0) /Table/996(l=701757.5 r=245949.5 c=20520 w=69.5) /Table/2487(l=231011.0 r=42289.0 c=12937 w=53.0) /Table/152(l=609437.5 r=740302.0 c=24879 w=397.5)]

decider duration=100 retention=200 objective=qps threshold=100
----

eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true
----
description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%)
mixed_requests(2) 0.00 17.42 17.42 0.00 0.00
w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000
w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000
objective=qps count=0 suggestions=2 last=1095.0 last-roll=2022-03-21 11:01:40 +0000 UTC last-suggest=2022-03-21 11:01:11.01 +0000 UTC key=/Table/99 start=2022-03-21 11:00:01 +0000 UTC count=19800 samples=[/Table/261(l=6244 r=3962 c=359) /Table/1010(l=9201 r=2937 c=138) /Table/2(l=1249 r=14557 c=366) /Table/1(l=0 r=10876 c=0) /Table/3479(l=1284 r=164 c=1) /Table/1(l=0 r=17380 c=0) /Table/7362(l=17817 r=616 c=34) /Table/47(l=3401 r=4434 c=162) /Table/1(l=0 r=5797 c=0) /Table/1(l=0 r=16706 c=0) /Table/2(l=262 r=2332 c=15) /Table/313(l=4436 r=2607 c=173) /Table/3(l=1340 r=7933 c=72) /Table/99(l=1032 r=1002 c=44) /Table/7359(l=6364 r=226 c=1) /Table/1(l=0 r=5777 c=0) /Table/422(l=7096 r=3682 c=474) /Table/520(l=10856 r=5591 c=1707) /Table/4741(l=10027 r=851 c=16) /Table/1(l=0 r=1213 c=0)]
Loading

0 comments on commit 7887654

Please sign in to comment.