From 7887654542737ccc13e619435aedc2ce369b6d05 Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Fri, 19 May 2023 15:29:18 +0000 Subject: [PATCH] split: add string methods for debugging/logging Previously, there was no way to peak the contents of the load based splitter samples when inspecting nodes. This commit adds string methods for the `UnweightedFinder`, `WeightedFinder` and `Decider`. This commit also swaps the order of the should split check to avoid computation. As a result the output of `cpu_decider_cartesian` changed slightly as the no split key logging message is now ordered differently. Informs: #103672 Informs: #103483 Release note: None --- pkg/cmd/roachtest/tests/split.go | 7 +-- pkg/kv/kvserver/split/BUILD.bazel | 1 + pkg/kv/kvserver/split/decider.go | 52 ++++++++++++++----- .../split/load_based_splitter_test.go | 39 ++++++++++++-- .../split/testdata/cpu_decider_cartesian | 6 +-- pkg/kv/kvserver/split/testdata/format | 49 +++++++++++++++++ .../kvserver/split/testdata/weighted_finder | 6 +-- pkg/kv/kvserver/split/unweighted_finder.go | 33 +++++++++--- pkg/kv/kvserver/split/weighted_finder.go | 33 ++++++++++-- 9 files changed, 191 insertions(+), 35 deletions(-) create mode 100644 pkg/kv/kvserver/split/testdata/format diff --git a/pkg/cmd/roachtest/tests/split.go b/pkg/cmd/roachtest/tests/split.go index 83b1ecc4d8e1..3461757a3b43 100644 --- a/pkg/cmd/roachtest/tests/split.go +++ b/pkg/cmd/roachtest/tests/split.go @@ -407,7 +407,8 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(4)) startOpts := option.DefaultStartOptsNoBackups() startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, - "--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2", + "--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2,"+ + "decider=3,replica_split_load=1", ) c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) @@ -424,7 +425,7 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s // Set the objective to QPS or CPU and update the load split threshold // appropriately. if params.qpsThreshold > 0 { - t.Status("setting split objective to QPS with threshold %d", params.qpsThreshold) + t.Status("setting split objective to QPS with threshold ", params.qpsThreshold) if err := setLoadBasedRebalancingObjective(ctx, db, "qps"); err != nil { return err } @@ -433,7 +434,7 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s return err } } else if params.cpuThreshold > 0 { - t.Status("setting split objective to CPU with threshold %s", params.cpuThreshold) + t.Status("setting split objective to CPU with threshold ", params.cpuThreshold) if err := setLoadBasedRebalancingObjective(ctx, db, "cpu"); err != nil { return err } diff --git a/pkg/kv/kvserver/split/BUILD.bazel b/pkg/kv/kvserver/split/BUILD.bazel index 2536fc4f959a..8eb21bc3c05b 100644 --- a/pkg/kv/kvserver/split/BUILD.bazel +++ b/pkg/kv/kvserver/split/BUILD.bazel @@ -18,6 +18,7 @@ go_library( "//pkg/util/log", "//pkg/util/metric", "//pkg/util/syncutil", + "@com_github_cockroachdb_redact//:redact", ], ) diff --git a/pkg/kv/kvserver/split/decider.go b/pkg/kv/kvserver/split/decider.go index ac0998257442..c6dab3a4cedc 100644 --- a/pkg/kv/kvserver/split/decider.go +++ b/pkg/kv/kvserver/split/decider.go @@ -14,7 +14,6 @@ package split import ( "context" - "fmt" "math/rand" "time" @@ -23,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/redact" ) const minSplitSuggestionInterval = time.Minute @@ -30,6 +30,7 @@ const minNoSplitKeyLoggingMetricsInterval = time.Minute const minPerSecondSampleDuration = time.Second type LoadBasedSplitter interface { + redact.SafeFormatter // Record informs the LoadBasedSplitter about where the span lies with regard // to the keys in the samples. Record(span roachpb.Span, weight float64) @@ -46,11 +47,14 @@ type LoadBasedSplitter interface { // number of samples that don't pass each split key requirement if not all // samples are invalid due to insufficient counters, otherwise returns an // empty string. - NoSplitKeyCauseLogMsg() string + NoSplitKeyCauseLogMsg() redact.RedactableString // PopularKeyFrequency returns the percentage that the most popular key // appears in the sampled candidate split keys. PopularKeyFrequency() float64 + + // String formats the state of the load based splitter. + String() string } type LoadSplitConfig interface { @@ -154,6 +158,7 @@ type Decider struct { // Fields tracking split key suggestions. splitFinder LoadBasedSplitter // populated when engaged or decided lastSplitSuggestion time.Time // last stipulation to client to carry out split + suggestionsMade int // suggestions made since last reset // Fields tracking logging / metrics around load-based splitter split key. lastNoSplitKeyLoggingMetrics time.Time @@ -175,6 +180,23 @@ func Init( lbs.mu.objective = objective } +type lockedDecider Decider + +func (ld *lockedDecider) SafeFormat(w redact.SafePrinter, r rune) { + w.Printf( + "objective=%v count=%d suggestions=%d last=%.1f last-roll=%v last-suggest=%v", + ld.mu.objective, ld.mu.count, ld.mu.suggestionsMade, ld.mu.lastStatVal, + ld.mu.lastStatRollover, ld.mu.lastSplitSuggestion, + ) + if ld.mu.splitFinder != nil { + w.Printf(" %v", ld.mu.splitFinder) + } +} + +func (ld *lockedDecider) String() string { + return redact.StringWithoutMarkers(ld) +} + // Record notifies the Decider that 'n' operations are being carried out which // operate on the span returned by the supplied method. The closure will only // be called when necessary, that is, when the Decider is considering a split @@ -231,20 +253,25 @@ func (d *Decider) recordLocked( if s.Key != nil { d.mu.splitFinder.Record(span(), float64(n)) } - if d.mu.splitFinder.Ready(now) { - if d.mu.splitFinder.Key() != nil { - if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval { - d.mu.lastSplitSuggestion = now - return true - } + // We don't want to check for a split key if we don't need to as it + // requires some computation. When the splitFinder isn't ready or we + // recently suggested a split, skip the key check. + if d.mu.splitFinder.Ready(now) && + now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval { + if splitKey := d.mu.splitFinder.Key(); splitKey != nil { + log.KvDistribution.VEventf(ctx, 3, "suggesting split key %v splitter_state=%v", + splitKey, (*lockedDecider)(d)) + d.mu.lastSplitSuggestion = now + d.mu.suggestionsMade++ + return true } else { if now.Sub(d.mu.lastNoSplitKeyLoggingMetrics) > minNoSplitKeyLoggingMetricsInterval { d.mu.lastNoSplitKeyLoggingMetrics = now - noSplitKeyCauseLogMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg() - if noSplitKeyCauseLogMsg != "" { + if causeMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg(); causeMsg != "" { popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency() - noSplitKeyCauseLogMsg += fmt.Sprintf(", most popular key occurs in %d%% of samples", int(popularKeyFrequency*100)) - log.KvDistribution.Infof(ctx, "%s", noSplitKeyCauseLogMsg) + log.KvDistribution.Infof(ctx, "%s, most popular key occurs in %d%% of samples", + causeMsg, int(popularKeyFrequency*100)) + log.KvDistribution.VInfof(ctx, 3, "splitter_state=%v", (*lockedDecider)(d)) if popularKeyFrequency >= splitKeyThreshold { d.loadSplitterMetrics.PopularKeyCount.Inc(1) } @@ -349,6 +376,7 @@ func (d *Decider) resetLocked(now time.Time) { d.mu.count = 0 d.mu.maxStat.reset(now, d.config.StatRetention()) d.mu.splitFinder = nil + d.mu.suggestionsMade = 0 d.mu.lastSplitSuggestion = time.Time{} d.mu.lastNoSplitKeyLoggingMetrics = time.Time{} } diff --git a/pkg/kv/kvserver/split/load_based_splitter_test.go b/pkg/kv/kvserver/split/load_based_splitter_test.go index 07eeecceaa85..c27441132433 100644 --- a/pkg/kv/kvserver/split/load_based_splitter_test.go +++ b/pkg/kv/kvserver/split/load_based_splitter_test.go @@ -16,6 +16,7 @@ import ( "fmt" "math" "sort" + "strings" "testing" "text/tabwriter" "time" @@ -342,6 +343,7 @@ type lbsTestSettings struct { finderConfig *finderConfig seed uint64 iterations int + showLastState bool } func uint32ToKey(key uint32) roachpb.Key { @@ -444,6 +446,7 @@ type repeatedResult struct { avgOptimalPercentDifference, maxOptimalPercentDifference float64 avgRecordExecutionTime, avgKeyExecutionTime time.Duration noKeyFoundPercent float64 + lastStateString string } func resultTable(configs []multiReqConfig, rr []repeatedResult, showTiming bool) string { @@ -506,7 +509,6 @@ func resultTable(configs []multiReqConfig, rr []repeatedResult, showTiming bool) _ = w.Flush() return buf.String() - } func runTestRepeated(settings *lbsTestSettings) repeatedResult { @@ -515,6 +517,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { avgOptimalPercentDifference, maxOptimalPercentDifference float64 avgRecordExecutionTime, avgKeyExecutionTime time.Duration noKeyFoundPercent float64 + lastStateString string ) randSource := rand.New(rand.NewSource(settings.seed)) requestGen := settings.requestConfig.makeGenerator(randSource) @@ -522,6 +525,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { for i := 0; i < settings.iterations; i++ { var recordFn func(span roachpb.Span, weight int) var keyFn func() roachpb.Key + var stringFn func() string if settings.deciderConfig != nil { d := settings.deciderConfig.makeDecider(randSource) @@ -540,6 +544,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { keyFn = func() roachpb.Key { return d.MaybeSplitKey(ctx, now) } + stringFn = (*lockedDecider)(d).String } else { f := settings.finderConfig.makeFinder(randSource) recordFn = func(span roachpb.Span, weight int) { @@ -548,6 +553,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { keyFn = func() roachpb.Key { return f.Key() } + stringFn = f.String } ret := runTest(recordFn, keyFn, randSource, requestGen) @@ -571,6 +577,9 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { avgRecordExecutionTime += ret.recordExecutionTime avgKeyExecutionTime += ret.keyExecutionTime + if i == settings.iterations-1 && settings.showLastState { + lastStateString = stringFn() + } } nRuns := settings.iterations @@ -592,6 +601,7 @@ func runTestRepeated(settings *lbsTestSettings) repeatedResult { avgRecordExecutionTime: avgRecordExecutionTime, avgKeyExecutionTime: avgKeyExecutionTime, noKeyFoundPercent: noKeyFoundPercent, + lastStateString: lastStateString, } } @@ -821,7 +831,7 @@ func TestDataDriven(t *testing.T) { findConfig = &finderConfig{ weighted: weighted, } - + decConfig = nil case "decider": var duration int var threshold int @@ -841,10 +851,11 @@ func TestDataDriven(t *testing.T) { objective: splitObj, duration: time.Duration(duration) * time.Second, } + findConfig = nil case "eval": var seed uint64 var iterations, mixCount int - var showTiming, cartesian, all bool + var showTiming, cartesian, all, showLastState bool var mix string var mixT mixType @@ -859,6 +870,9 @@ func TestDataDriven(t *testing.T) { if d.HasArg("all") { d.ScanArgs(t, "all", &all) } + if d.HasArg("show_last") { + d.ScanArgs(t, "show_last", &showLastState) + } if d.HasArg("mix") { d.ScanArgs(t, "mix", &mix) d.ScanArgs(t, "mix_count", &mixCount) @@ -900,9 +914,26 @@ func TestDataDriven(t *testing.T) { finderConfig: findConfig, seed: seed, iterations: iterations, + showLastState: showLastState, }) } - return resultTable(evalRequestConfigs, repeatedResults, showTiming) + retTable := resultTable(evalRequestConfigs, repeatedResults, showTiming) + if showLastState { + showRequestConfgDesc := len(evalRequestConfigs) > 1 + var buf strings.Builder + for i := range evalRequestConfigs { + if i > 0 { + buf.WriteString("\n") + } + if showRequestConfgDesc { + buf.WriteString(evalRequestConfigs[i].String()) + } + fmt.Fprintf(&buf, "\t%s", + repeatedResults[i].lastStateString) + } + return fmt.Sprintf("%s%s", retTable, buf.String()) + } + return retTable default: return fmt.Sprintf("unknown command: %s", d.Cmd) } diff --git a/pkg/kv/kvserver/split/testdata/cpu_decider_cartesian b/pkg/kv/kvserver/split/testdata/cpu_decider_cartesian index c9cc81c583d6..e48c2b6c3f9e 100644 --- a/pkg/kv/kvserver/split/testdata/cpu_decider_cartesian +++ b/pkg/kv/kvserver/split/testdata/cpu_decider_cartesian @@ -33,7 +33,7 @@ w=zip(10000)/k=zip(1000000)/s=zip(1000)/s(%)=95/10000 mixed_requests(2) 0.00 3.69 19.97 0.03 0.08 w=uni(100)/k=zip(10000)/s=zip(1000)/s(%)=20/10000 w=zip(10000)/k=zip(10000)/s=uni(1000)/s(%)=20/10000 -mixed_requests(2) 0.00 6.36 15.63 0.01 0.02 +mixed_requests(2) 0.00 6.64 21.33 0.01 0.02 w=zip(100)/k=uni(10000)/s=uni(1)/s(%)=0/10000 w=zip(100)/k=uni(10000)/s=zip(1000)/s(%)=95/10000 mixed_requests(2) 0.00 4.11 16.75 0.00 0.01 @@ -87,7 +87,7 @@ w=zip(100)/k=uni(1000000)/s=uni(1000)/s(%)=20/10000 mixed_requests(2) 0.00 6.29 24.99 0.00 0.02 w=uni(10000)/k=zip(10000)/s=uni(1)/s(%)=0/10000 w=zip(10000)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000 -mixed_requests(2) 0.00 6.39 13.28 0.02 0.07 +mixed_requests(2) 0.00 6.24 13.28 0.02 0.07 w=zip(100)/k=uni(1000000)/s=zip(1000)/s(%)=95/10000 w=zip(10000)/k=uni(10000)/s=zip(1000)/s(%)=20/10000 mixed_requests(2) 0.00 4.08 11.02 0.03 0.08 @@ -102,7 +102,7 @@ w=zip(100)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000 mixed_requests(2) 0.00 4.89 12.20 0.00 0.01 w=uni(100)/k=zip(1000000)/s=uni(1000)/s(%)=95/10000 w=uni(100)/k=uni(1000000)/s=uni(1000)/s(%)=20/10000 -mixed_requests(2) 0.00 5.03 16.77 0.00 0.00 +mixed_requests(2) 0.00 4.99 16.77 0.00 0.00 w=uni(10000)/k=uni(1000000)/s=uni(1000)/s(%)=95/10000 w=uni(10000)/k=zip(1000000)/s=zip(1000)/s(%)=95/10000 mixed_requests(2) 0.00 5.07 15.26 0.02 0.06 diff --git a/pkg/kv/kvserver/split/testdata/format b/pkg/kv/kvserver/split/testdata/format new file mode 100644 index 000000000000..5b834039e324 --- /dev/null +++ b/pkg/kv/kvserver/split/testdata/format @@ -0,0 +1,49 @@ +requests key_dist=zipfian key_max=10000 span_dist=uniform span_max=1000 weight_dist=uniform weight_max=10 range_request_percent=5 request_count=10000 +---- + +requests key_dist=zipfian key_max=10000 span_dist=uniform span_max=1000 weight_dist=zipfian weight_max=1000 range_request_percent=50 request_count=10000 +---- + +finder weighted=true +---- + +eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true +---- +description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) +mixed_requests(2) 0.00 2.35 2.35 0.00 0.00 +w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000 +w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000 + key=/Table/223 start=2022-03-21 11:00:00 +0000 UTC count=25555 total=1419849.00 samples=[/Table/1710(l=12086.5 r=2801.5 c=2840 w=9.0) /Table/2(l=2045.0 r=18033.0 c=3856 w=8.0) /Table/13(l=282841.0 r=802019.0 c=22062 w=549.0) /Table/354(l=245816.0 r=201859.0 c=14956 w=225.0) /Table/514(l=559428.0 r=354804.0 c=20232 w=260.5) /Table/305(l=576211.5 r=511062.5 c=22091 w=226.0) /Table/950(l=262150.5 r=98121.5 c=14042 w=17.0) /Table/80(l=279974.5 r=416784.0 c=17812 w=330.0) /Table/463(l=381587.5 r=249268.0 c=17112 w=179.0) /Table/223(l=224001.5 r=226421.5 c=14987 w=557.0) /Table/6344(l=91668.5 r=6660.5 c=10958 w=613.0) /Table/1346(l=378507.0 r=106093.5 c=15360 w=949.0) /Table/2(l=29973.5 r=337055.5 c=14118 w=453.0) /Table/97(l=378809.5 r=533071.5 c=20187 w=360.0) /Table/46(l=81138.0 r=136007.5 c=12342 w=448.5) /Table/806(l=901622.5 r=409269.5 c=24587 w=206.5) /Table/1(l=0.0 r=90149.0 c=10869 w=80.0) /Table/1873(l=233979.5 r=54757.0 c=13156 w=317.0) /Table/4(l=47772.5 r=254390.0 c=13337 w=286.0) /Table/798(l=171737.5 r=74976.0 c=12665 w=919.0)] + +finder weighted=false +---- + +eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true +---- +description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) +mixed_requests(2) 0.00 10.26 10.26 0.00 0.00 +w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000 +w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000 + key=/Table/150 start=2022-03-21 11:00:00 +0000 UTC count=20000 samples=[/Table/1(l=0 r=10765 c=0) /Table/6471(l=11888 r=569 c=19) /Table/445(l=9595 r=5229 c=1548) /Table/2(l=994 r=9976 c=106) /Table/208(l=933 r=674 c=42) /Table/3(l=2033 r=14906 c=641) /Table/59(l=6677 r=9756 c=2234) /Table/1(l=0 r=8197 c=0) /Table/5(l=1228 r=4715 c=54) /Table/4166(l=15285 r=1574 c=47) /Table/41(l=1160 r=1583 c=66) /Table/6012(l=7018 r=394 c=4) /Table/191(l=5414 r=3893 c=238) /Table/351(l=1441 r=788 c=49) /Table/150(l=3610 r=3009 c=172) /Table/9(l=1565 r=4339 c=73) /Table/1(l=0 r=11452 c=0) /Table/2(l=1380 r=16499 c=475) /Table/8882(l=10942 r=142 c=10) /Table/650(l=1009 r=382 c=22)] + +decider duration=100 retention=200 objective=cpu threshold=100 +---- + +eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true +---- +description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) +mixed_requests(2) 0.00 9.55 9.55 0.02 0.02 +w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000 +w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000 + objective=cpu count=0 suggestions=2 last=1095.0 last-roll=2022-03-21 11:01:40 +0000 UTC last-suggest=2022-03-21 11:01:11.01 +0000 UTC key=/Table/152 start=2022-03-21 11:00:01 +0000 UTC count=25155 total=1393083.00 samples=[/Table/814(l=9562.0 r=3339.0 c=2434 w=5.0) /Table/46(l=7820.5 r=10403.5 c=3450 w=8.0) /Table/3193(l=1149400.5 r=169368.0 c=24541 w=444.0) /Table/6(l=213149.0 r=980453.5 c=23200 w=423.5) /Table/648(l=391124.0 r=208895.0 c=16669 w=98.0) /Table/1(l=0.0 r=1241723.0 c=23711 w=77.0) /Table/824(l=227827.0 r=102958.0 c=13644 w=9.0) /Table/68(l=507405.0 r=814941.0 c=24564 w=140.0) /Table/5224(l=53022.0 r=3727.0 c=10461 w=351.0) /Table/781(l=608857.5 r=274689.0 c=19721 w=241.0) /Table/718(l=753749.0 r=380244.5 c=22412 w=426.0) /Table/975(l=941479.0 r=349419.5 c=24188 w=214.5) /Table/3310(l=1162395.0 r=166671.0 c=24643 w=474.0) /Table/1138(l=677104.0 r=211530.5 c=19793 w=71.0) /Table/1366(l=147664.0 r=41503.0 c=11938 w=336.0) /Table/52(l=229715.5 r=395288.0 c=16949 w=954.0) /Table/6(l=138512.5 r=631612.0 c=18555 w=270.0) /Table/996(l=701757.5 r=245949.5 c=20520 w=69.5) /Table/2487(l=231011.0 r=42289.0 c=12937 w=53.0) /Table/152(l=609437.5 r=740302.0 c=24879 w=397.5)] + +decider duration=100 retention=200 objective=qps threshold=100 +---- + +eval seed=42 iterations=1 all=true mix=sequential mix_count=2 show_last=true +---- +description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) +mixed_requests(2) 0.00 17.42 17.42 0.00 0.00 +w=zip(1000)/k=zip(10000)/s=uni(1000)/s(%)=50/10000 +w=uni(10)/k=zip(10000)/s=uni(1000)/s(%)=5/10000 + objective=qps count=0 suggestions=2 last=1095.0 last-roll=2022-03-21 11:01:40 +0000 UTC last-suggest=2022-03-21 11:01:11.01 +0000 UTC key=/Table/99 start=2022-03-21 11:00:01 +0000 UTC count=19800 samples=[/Table/261(l=6244 r=3962 c=359) /Table/1010(l=9201 r=2937 c=138) /Table/2(l=1249 r=14557 c=366) /Table/1(l=0 r=10876 c=0) /Table/3479(l=1284 r=164 c=1) /Table/1(l=0 r=17380 c=0) /Table/7362(l=17817 r=616 c=34) /Table/47(l=3401 r=4434 c=162) /Table/1(l=0 r=5797 c=0) /Table/1(l=0 r=16706 c=0) /Table/2(l=262 r=2332 c=15) /Table/313(l=4436 r=2607 c=173) /Table/3(l=1340 r=7933 c=72) /Table/99(l=1032 r=1002 c=44) /Table/7359(l=6364 r=226 c=1) /Table/1(l=0 r=5777 c=0) /Table/422(l=7096 r=3682 c=474) /Table/520(l=10856 r=5591 c=1707) /Table/4741(l=10027 r=851 c=16) /Table/1(l=0 r=1213 c=0)] diff --git a/pkg/kv/kvserver/split/testdata/weighted_finder b/pkg/kv/kvserver/split/testdata/weighted_finder index bd651033a453..905a83086b25 100644 --- a/pkg/kv/kvserver/split/testdata/weighted_finder +++ b/pkg/kv/kvserver/split/testdata/weighted_finder @@ -20,7 +20,7 @@ requests key_dist=zipfian key_max=10000000000 span_dist=uniform span_max=1000 we eval seed=2022 iterations=100 all=true mix=sequential mix_count=5 ---- description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) -mixed_requests(5) 3.00 10.87 29.92 0.01 0.05 +mixed_requests(5) 1.00 10.55 28.44 0.01 0.05 w=uni(10)/k=zip(1000)/s=uni(100)/s(%)=95/10000 w=uni(10)/k=zip(1000)/s=uni(1000)/s(%)=0/10000 w=uni(10)/k=zip(10000000000)/s=uni(1000)/s(%)=95/10000 @@ -30,10 +30,10 @@ w=zip(1000)/k=zip(10000000000)/s=uni(1000)/s(%)=50/10000 eval seed=42 iterations=100 all=true mix=sequential mix_count=2 ---- description no_key(%) avg_diff(%) max_diff(%) avg_optimal_diff(%) max_optimal_diff(%) -mixed_requests(2) 3.00 36.26 62.78 0.00 0.01 +mixed_requests(2) 1.00 33.75 56.57 0.00 0.01 w=uni(10)/k=zip(1000)/s=uni(100)/s(%)=95/10000 w=uni(10)/k=zip(10000000000)/s=uni(1000)/s(%)=95/10000 -mixed_requests(2) 0.00 6.22 26.87 0.01 0.05 +mixed_requests(2) 1.00 6.42 23.03 0.01 0.06 w=uni(10)/k=zip(100000)/s=uni(1000)/s(%)=95/10000 w=zip(1000)/k=zip(10000000000)/s=uni(1000)/s(%)=50/10000 w=uni(10)/k=zip(1000)/s=uni(1000)/s(%)=0/10000 2.00 4.90 23.89 0.01 0.02 diff --git a/pkg/kv/kvserver/split/unweighted_finder.go b/pkg/kv/kvserver/split/unweighted_finder.go index cbab193abf91..f659d727d0f3 100644 --- a/pkg/kv/kvserver/split/unweighted_finder.go +++ b/pkg/kv/kvserver/split/unweighted_finder.go @@ -12,12 +12,12 @@ package split import ( "bytes" - "fmt" "math" "sort" "time" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/redact" ) // Load-based splitting. @@ -59,6 +59,16 @@ type sample struct { left, right, contained int } +// SafeFormat implements the redact.SafeFormatter interface. +func (s sample) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("%s(l=%d r=%d c=%d)", + s.key, s.left, s.right, s.contained) +} + +func (s sample) String() string { + return redact.StringWithoutMarkers(s) +} + // UnweightedFinder is a structure that is used to determine the split point // using the Reservoir Sampling method. type UnweightedFinder struct { @@ -184,15 +194,16 @@ func (f *UnweightedFinder) noSplitKeyCause() ( } // NoSplitKeyCauseLogMsg implements the LoadBasedSplitter interface. -func (f *UnweightedFinder) NoSplitKeyCauseLogMsg() string { +func (f *UnweightedFinder) NoSplitKeyCauseLogMsg() redact.RedactableString { insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained := f.noSplitKeyCause() if insufficientCounters == splitKeySampleSize { return "" } - noSplitKeyCauseLogMsg := fmt.Sprintf( - "No split key found: insufficient counters = %d, imbalance = %d, too many contained = %d, imbalance and too many contained = %d", - insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained) - return noSplitKeyCauseLogMsg + return redact.Sprintf( + "no split key found: insufficient counters = %d, imbalance = %d, "+ + "too many contained = %d, imbalance and too many contained = %d", + insufficientCounters, imbalance, tooManyContained, + imbalanceAndTooManyContained) } // PopularKeyFrequency implements the LoadBasedSplitter interface. @@ -216,3 +227,13 @@ func (f *UnweightedFinder) PopularKeyFrequency() float64 { return float64(popularKeyCount) / float64(splitKeySampleSize) } + +// SafeFormat implements the redact.SafeFormatter interface. +func (f *UnweightedFinder) SafeFormat(w redact.SafePrinter, r rune) { + w.Printf("key=%v start=%v count=%d samples=%v", + f.Key(), f.startTime, f.count, f.samples) +} + +func (f *UnweightedFinder) String() string { + return redact.StringWithoutMarkers(f) +} diff --git a/pkg/kv/kvserver/split/weighted_finder.go b/pkg/kv/kvserver/split/weighted_finder.go index a1a66e354a39..0acb3ce36e17 100644 --- a/pkg/kv/kvserver/split/weighted_finder.go +++ b/pkg/kv/kvserver/split/weighted_finder.go @@ -11,13 +11,13 @@ package split import ( - "fmt" "math" "sort" "time" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/redact" ) // Load-based splitting. @@ -54,6 +54,16 @@ type weightedSample struct { count int } +// SafeFormat implements the redact.SafeFormatter interface. +func (ws weightedSample) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("%s(l=%.1f r=%.1f c=%d w=%.1f)", + ws.key, ws.left, ws.right, ws.count, ws.weight) +} + +func (ws weightedSample) String() string { + return redact.StringWithoutMarkers(ws) +} + // WeightedFinder is a structure that is used to determine the split point // using the Weighted Reservoir Sampling method (a simplified version of A-Chao // algorithm). @@ -254,17 +264,22 @@ func (f *WeightedFinder) noSplitKeyCause() (insufficientCounters, imbalance int) } // NoSplitKeyCauseLogMsg implements the LoadBasedSplitter interface. -func (f *WeightedFinder) NoSplitKeyCauseLogMsg() string { +func (f *WeightedFinder) NoSplitKeyCauseLogMsg() redact.RedactableString { insufficientCounters, imbalance := f.noSplitKeyCause() if insufficientCounters == splitKeySampleSize { return "" } - noSplitKeyCauseLogMsg := fmt.Sprintf("No split key found: insufficient counters = %d, imbalance = %d", insufficientCounters, imbalance) - return noSplitKeyCauseLogMsg + return redact.Sprintf( + "no split key found: insufficient counters = %d, imbalance = %d", + insufficientCounters, imbalance) } // PopularKeyFrequency implements the LoadBasedSplitter interface. func (f *WeightedFinder) PopularKeyFrequency() float64 { + // Sort the sample slice to determine the frequency that a popular key + // appears. We could copy the slice, however it would require an allocation. + // The probability a sample is replaced doesn't change as it is independent + // of position. sort.Slice(f.samples[:], func(i, j int) bool { return f.samples[i].key.Compare(f.samples[j].key) < 0 }) @@ -288,3 +303,13 @@ func (f *WeightedFinder) PopularKeyFrequency() float64 { return popularKeyWeight / totalWeight } + +// SafeFormat implements the redact.SafeFormatter interface. +func (f *WeightedFinder) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("key=%v start=%v count=%d total=%.2f samples=%v", + f.Key(), f.startTime, f.count, f.totalWeight, f.samples) +} + +func (f *WeightedFinder) String() string { + return redact.StringWithoutMarkers(f) +}