From d2db70ad7448d21dacad7621863fad86041a2566 Mon Sep 17 00:00:00 2001
From: Kai Sun <kai.sun@cockroachlabs.com>
Date: Mon, 19 Sep 2022 13:46:49 -0400
Subject: [PATCH] split: add observability for when load based splitting cannot
 find a key

Previously, there were no metrics or logging in the load-based splitter.

This was inadequate because there is minimal observability into why the
load splitter could not find a split key.

To address this, this patch adds metrics and logging to the load
splitter, including counter metrics indicating number of times could not
find a split key and popular key (>25% occurrence) and logging
indicating causes for no split key (insufficient counters, imbalance,
too many contained).

Release note (ops change): Added observability for when load based
splitting cannot find a key to indicate the reasons why the load
splitter could not find a split key, enabling us to have more
observability and insight to debug why a range is not splitting more
easily.
---
 pkg/kv/kvserver/asim/state/split_decider.go  |  11 +-
 pkg/kv/kvserver/batcheval/cmd_range_stats.go |   4 +-
 pkg/kv/kvserver/batcheval/eval_context.go    |   8 +-
 pkg/kv/kvserver/merge_queue.go               |   2 +-
 pkg/kv/kvserver/metrics.go                   |  23 +++
 pkg/kv/kvserver/replica.go                   |   8 +-
 pkg/kv/kvserver/replica_eval_context_span.go |   8 +-
 pkg/kv/kvserver/replica_init.go              |   2 +-
 pkg/kv/kvserver/replica_split_load.go        |   2 +-
 pkg/kv/kvserver/split/BUILD.bazel            |   3 +
 pkg/kv/kvserver/split/decider.go             |  72 ++++++--
 pkg/kv/kvserver/split/decider_test.go        | 175 ++++++++++++++-----
 pkg/kv/kvserver/split/finder.go              |  51 ++++++
 pkg/kv/kvserver/split/finder_test.go         | 128 ++++++++++++++
 pkg/kv/kvserver/split_queue.go               |   6 +-
 pkg/ts/catalog/chart_catalog.go              |  12 ++
 16 files changed, 430 insertions(+), 85 deletions(-)

diff --git a/pkg/kv/kvserver/asim/state/split_decider.go b/pkg/kv/kvserver/asim/state/split_decider.go
index 270358e9d7de..f03ec4b4c9b0 100644
--- a/pkg/kv/kvserver/asim/state/split_decider.go
+++ b/pkg/kv/kvserver/asim/state/split_decider.go
@@ -11,12 +11,14 @@
 package state
 
 import (
+	"context"
 	"math/rand"
 	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/workload"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/split"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/util/metric"
 )
 
 // LoadSplitter provides an abstraction for load based splitting. It records
@@ -66,7 +68,10 @@ func (s *SplitDecider) newDecider() *split.Decider {
 	}
 
 	decider := &split.Decider{}
-	split.Init(decider, intN, s.qpsThreshold, s.qpsRetention)
+	split.Init(decider, intN, s.qpsThreshold, s.qpsRetention, &split.LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
 	return decider
 }
 
@@ -81,7 +86,7 @@ func (s *SplitDecider) Record(tick time.Time, rangeID RangeID, le workload.LoadE
 	}
 
 	qps := LoadEventQPS(le)
-	shouldSplit := decider.Record(tick, int(qps), func() roachpb.Span {
+	shouldSplit := decider.Record(context.Background(), tick, int(qps), func() roachpb.Span {
 		return roachpb.Span{
 			Key: Key(le.Key).ToRKey().AsRawKey(),
 		}
@@ -102,7 +107,7 @@ func (s *SplitDecider) SplitKey(tick time.Time, rangeID RangeID) (Key, bool) {
 		return InvalidKey, false
 	}
 
-	key := decider.MaybeSplitKey(tick)
+	key := decider.MaybeSplitKey(context.Background(), tick)
 	if key == nil {
 		return InvalidKey, false
 	}
diff --git a/pkg/kv/kvserver/batcheval/cmd_range_stats.go b/pkg/kv/kvserver/batcheval/cmd_range_stats.go
index 183ae552e74d..1e1d14611232 100644
--- a/pkg/kv/kvserver/batcheval/cmd_range_stats.go
+++ b/pkg/kv/kvserver/batcheval/cmd_range_stats.go
@@ -44,8 +44,8 @@ func RangeStats(
 ) (result.Result, error) {
 	reply := resp.(*roachpb.RangeStatsResponse)
 	reply.MVCCStats = cArgs.EvalCtx.GetMVCCStats()
-	reply.DeprecatedLastQueriesPerSecond = cArgs.EvalCtx.GetLastSplitQPS()
-	if qps, ok := cArgs.EvalCtx.GetMaxSplitQPS(); ok {
+	reply.DeprecatedLastQueriesPerSecond = cArgs.EvalCtx.GetLastSplitQPS(ctx)
+	if qps, ok := cArgs.EvalCtx.GetMaxSplitQPS(ctx); ok {
 		reply.MaxQueriesPerSecond = qps
 	} else {
 		// See comment on MaxQueriesPerSecond. -1 means !ok.
diff --git a/pkg/kv/kvserver/batcheval/eval_context.go b/pkg/kv/kvserver/batcheval/eval_context.go
index 7f89b273c6cf..277e01c02c43 100644
--- a/pkg/kv/kvserver/batcheval/eval_context.go
+++ b/pkg/kv/kvserver/batcheval/eval_context.go
@@ -88,7 +88,7 @@ type EvalContext interface {
 	//
 	// NOTE: This should not be used when the load based splitting cluster setting
 	// is disabled.
-	GetMaxSplitQPS() (float64, bool)
+	GetMaxSplitQPS(context.Context) (float64, bool)
 
 	// GetLastSplitQPS returns the Replica's most recent queries/s request rate.
 	//
@@ -96,7 +96,7 @@ type EvalContext interface {
 	// is disabled.
 	//
 	// TODO(nvanbenschoten): remove this method in v22.1.
-	GetLastSplitQPS() float64
+	GetLastSplitQPS(context.Context) float64
 
 	GetGCThreshold() hlc.Timestamp
 	ExcludeDataFromBackup() bool
@@ -240,10 +240,10 @@ func (m *mockEvalCtxImpl) ContainsKey(key roachpb.Key) bool {
 func (m *mockEvalCtxImpl) GetMVCCStats() enginepb.MVCCStats {
 	return m.Stats
 }
-func (m *mockEvalCtxImpl) GetMaxSplitQPS() (float64, bool) {
+func (m *mockEvalCtxImpl) GetMaxSplitQPS(context.Context) (float64, bool) {
 	return m.QPS, true
 }
-func (m *mockEvalCtxImpl) GetLastSplitQPS() float64 {
+func (m *mockEvalCtxImpl) GetLastSplitQPS(context.Context) float64 {
 	return m.QPS
 }
 func (m *mockEvalCtxImpl) CanCreateTxnRecord(
diff --git a/pkg/kv/kvserver/merge_queue.go b/pkg/kv/kvserver/merge_queue.go
index 0cf20d24feb1..b6eb35448354 100644
--- a/pkg/kv/kvserver/merge_queue.go
+++ b/pkg/kv/kvserver/merge_queue.go
@@ -217,7 +217,7 @@ func (mq *mergeQueue) process(
 
 	lhsDesc := lhsRepl.Desc()
 	lhsStats := lhsRepl.GetMVCCStats()
-	lhsQPS, lhsQPSOK := lhsRepl.GetMaxSplitQPS()
+	lhsQPS, lhsQPSOK := lhsRepl.GetMaxSplitQPS(ctx)
 	minBytes := lhsRepl.GetMinBytes()
 	if lhsStats.Total() >= minBytes {
 		log.VEventf(ctx, 2, "skipping merge: LHS meets minimum size threshold %d with %d bytes",
diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go
index 0ac65b7c33fc..5f09f366c09e 100644
--- a/pkg/kv/kvserver/metrics.go
+++ b/pkg/kv/kvserver/metrics.go
@@ -21,6 +21,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
+	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/split"
 	"github.com/cockroachdb/cockroach/pkg/multitenant"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
 	"github.com/cockroachdb/cockroach/pkg/storage"
@@ -1644,6 +1645,20 @@ Note that the measurement does not include the duration for replicating the eval
 		Measurement: "Nanoseconds",
 		Unit:        metric.Unit_NANOSECONDS,
 	}
+
+	metaPopularKeyCount = metric.Metadata{
+		Name:        "kv.loadsplitter.popularkey",
+		Help:        "Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples.",
+		Measurement: "Occurrences",
+		Unit:        metric.Unit_COUNT,
+	}
+
+	metaNoSplitKeyCount = metric.Metadata{
+		Name:        "kv.loadsplitter.nosplitkey",
+		Help:        "Load-based splitter could not find a split key.",
+		Measurement: "Occurrences",
+		Unit:        metric.Unit_COUNT,
+	}
 )
 
 // StoreMetrics is the set of metrics for a given store.
@@ -1654,6 +1669,9 @@ type StoreMetrics struct {
 	// tenant basis.
 	*TenantsStorageMetrics
 
+	// LoadSplitterMetrics stores metrics for load-based splitter split key.
+	*split.LoadSplitterMetrics
+
 	// Replica metrics.
 	ReplicaCount                  *metric.Gauge // Does not include uninitialized or reserved replicas.
 	ReservedReplicaCount          *metric.Gauge
@@ -2185,6 +2203,10 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
 	sm := &StoreMetrics{
 		registry:              storeRegistry,
 		TenantsStorageMetrics: newTenantsStorageMetrics(),
+		LoadSplitterMetrics: &split.LoadSplitterMetrics{
+			PopularKeyCount: metric.NewCounter(metaPopularKeyCount),
+			NoSplitKeyCount: metric.NewCounter(metaNoSplitKeyCount),
+		},
 
 		// Replica metrics.
 		ReplicaCount:                  metric.NewGauge(metaReplicaCount),
@@ -2516,6 +2538,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
 	}
 
 	storeRegistry.AddMetricStruct(sm)
+	storeRegistry.AddMetricStruct(sm.LoadSplitterMetrics)
 	return sm
 }
 
diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go
index e3f8e39c2cc2..027c60123182 100644
--- a/pkg/kv/kvserver/replica.go
+++ b/pkg/kv/kvserver/replica.go
@@ -1150,8 +1150,8 @@ func (r *Replica) SetMVCCStatsForTesting(stats *enginepb.MVCCStats) {
 // works when the load based splitting cluster setting is enabled.
 //
 // Use QueriesPerSecond() for current QPS stats for all other purposes.
-func (r *Replica) GetMaxSplitQPS() (float64, bool) {
-	return r.loadBasedSplitter.MaxQPS(r.Clock().PhysicalTime())
+func (r *Replica) GetMaxSplitQPS(ctx context.Context) (float64, bool) {
+	return r.loadBasedSplitter.MaxQPS(ctx, r.Clock().PhysicalTime())
 }
 
 // GetLastSplitQPS returns the Replica's most recent queries/s request rate.
@@ -1160,8 +1160,8 @@ func (r *Replica) GetMaxSplitQPS() (float64, bool) {
 // works when the load based splitting cluster setting is enabled.
 //
 // Use QueriesPerSecond() for current QPS stats for all other purposes.
-func (r *Replica) GetLastSplitQPS() float64 {
-	return r.loadBasedSplitter.LastQPS(r.Clock().PhysicalTime())
+func (r *Replica) GetLastSplitQPS(ctx context.Context) float64 {
+	return r.loadBasedSplitter.LastQPS(ctx, r.Clock().PhysicalTime())
 }
 
 // ContainsKey returns whether this range contains the specified key.
diff --git a/pkg/kv/kvserver/replica_eval_context_span.go b/pkg/kv/kvserver/replica_eval_context_span.go
index 4cee31883efa..ea103a9ec97f 100644
--- a/pkg/kv/kvserver/replica_eval_context_span.go
+++ b/pkg/kv/kvserver/replica_eval_context_span.go
@@ -131,14 +131,14 @@ func (rec SpanSetReplicaEvalContext) GetMVCCStats() enginepb.MVCCStats {
 
 // GetMaxSplitQPS returns the Replica's maximum queries/s rate for splitting and
 // merging purposes.
-func (rec SpanSetReplicaEvalContext) GetMaxSplitQPS() (float64, bool) {
-	return rec.i.GetMaxSplitQPS()
+func (rec SpanSetReplicaEvalContext) GetMaxSplitQPS(ctx context.Context) (float64, bool) {
+	return rec.i.GetMaxSplitQPS(ctx)
 }
 
 // GetLastSplitQPS returns the Replica's most recent queries/s rate for
 // splitting and merging purposes.
-func (rec SpanSetReplicaEvalContext) GetLastSplitQPS() float64 {
-	return rec.i.GetLastSplitQPS()
+func (rec SpanSetReplicaEvalContext) GetLastSplitQPS(ctx context.Context) float64 {
+	return rec.i.GetLastSplitQPS(ctx)
 }
 
 // CanCreateTxnRecord determines whether a transaction record can be created
diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go
index d60bf9600cfb..34203d9ab9c2 100644
--- a/pkg/kv/kvserver/replica_init.go
+++ b/pkg/kv/kvserver/replica_init.go
@@ -98,7 +98,7 @@ func newUnloadedReplica(
 		return float64(SplitByLoadQPSThreshold.Get(&store.cfg.Settings.SV))
 	}, func() time.Duration {
 		return kvserverbase.SplitByLoadMergeDelay.Get(&store.cfg.Settings.SV)
-	})
+	}, store.metrics.LoadSplitterMetrics)
 	r.mu.proposals = map[kvserverbase.CmdIDKey]*ProposalData{}
 	r.mu.checksums = map[uuid.UUID]*replicaChecksum{}
 	r.mu.proposalBuf.Init((*replicaProposer)(r), tracker.NewLockfreeTracker(), r.Clock(), r.ClusterSettings())
diff --git a/pkg/kv/kvserver/replica_split_load.go b/pkg/kv/kvserver/replica_split_load.go
index 15adcc0dad81..ea42bbabbdde 100644
--- a/pkg/kv/kvserver/replica_split_load.go
+++ b/pkg/kv/kvserver/replica_split_load.go
@@ -56,7 +56,7 @@ func (r *Replica) recordBatchForLoadBasedSplitting(
 	if !r.SplitByLoadEnabled() {
 		return
 	}
-	shouldInitSplit := r.loadBasedSplitter.Record(timeutil.Now(), len(ba.Requests), func() roachpb.Span {
+	shouldInitSplit := r.loadBasedSplitter.Record(ctx, timeutil.Now(), len(ba.Requests), func() roachpb.Span {
 		return spans.BoundarySpan(spanset.SpanGlobal)
 	})
 	if shouldInitSplit {
diff --git a/pkg/kv/kvserver/split/BUILD.bazel b/pkg/kv/kvserver/split/BUILD.bazel
index 354061351004..02f19930667f 100644
--- a/pkg/kv/kvserver/split/BUILD.bazel
+++ b/pkg/kv/kvserver/split/BUILD.bazel
@@ -12,6 +12,8 @@ go_library(
     deps = [
         "//pkg/keys",
         "//pkg/roachpb",
+        "//pkg/util/log",
+        "//pkg/util/metric",
         "//pkg/util/syncutil",
     ],
 )
@@ -30,6 +32,7 @@ go_test(
         "//pkg/roachpb",
         "//pkg/util/encoding",
         "//pkg/util/leaktest",
+        "//pkg/util/metric",
         "//pkg/util/stop",
         "//pkg/util/timeutil",
         "@com_github_stretchr_testify//assert",
diff --git a/pkg/kv/kvserver/split/decider.go b/pkg/kv/kvserver/split/decider.go
index c29e316d2eee..ce7d54e6c3fd 100644
--- a/pkg/kv/kvserver/split/decider.go
+++ b/pkg/kv/kvserver/split/decider.go
@@ -13,14 +13,18 @@
 package split
 
 import (
+	"context"
 	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/keys"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/cockroachdb/cockroach/pkg/util/metric"
 	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
 )
 
 const minSplitSuggestionInterval = time.Minute
+const minNoSplitKeyLoggingMetricsInterval = time.Minute
 const minQueriesPerSecondSampleDuration = time.Second
 
 // A Decider collects measurements about the activity (measured in qps) on a
@@ -48,10 +52,21 @@ const minQueriesPerSecondSampleDuration = time.Second
 // prevent load-based splits from being merged away until the resulting ranges
 // have consistently remained below a certain QPS threshold for a sufficiently
 // long period of time.
+
+// LoadSplitterMetrics consists of metrics for load-based splitter split key.
+type LoadSplitterMetrics struct {
+	PopularKeyCount *metric.Counter
+	NoSplitKeyCount *metric.Counter
+}
+
+// Decider tracks the latest QPS and if certain conditions are met, records
+// incoming requests to find potential split keys and checks if sampled
+// candidate split keys satisfy certain requirements.
 type Decider struct {
-	intn         func(n int) int      // supplied to Init
-	qpsThreshold func() float64       // supplied to Init
-	qpsRetention func() time.Duration // supplied to Init
+	intn                func(n int) int      // supplied to Init
+	qpsThreshold        func() float64       // supplied to Init
+	qpsRetention        func() time.Duration // supplied to Init
+	loadSplitterMetrics *LoadSplitterMetrics // supplied to Init
 
 	mu struct {
 		syncutil.Mutex
@@ -67,6 +82,9 @@ type Decider struct {
 		// Fields tracking split key suggestions.
 		splitFinder         *Finder   // populated when engaged or decided
 		lastSplitSuggestion time.Time // last stipulation to client to carry out split
+
+		// Fields tracking logging / metrics around load-based splitter split key.
+		lastNoSplitKeyLoggingMetrics time.Time
 	}
 }
 
@@ -79,10 +97,12 @@ func Init(
 	intn func(n int) int,
 	qpsThreshold func() float64,
 	qpsRetention func() time.Duration,
+	loadSplitterMetrics *LoadSplitterMetrics,
 ) {
 	lbs.intn = intn
 	lbs.qpsThreshold = qpsThreshold
 	lbs.qpsRetention = qpsRetention
+	lbs.loadSplitterMetrics = loadSplitterMetrics
 }
 
 // Record notifies the Decider that 'n' operations are being carried out which
@@ -93,14 +113,16 @@ func Init(
 // If the returned boolean is true, a split key is available (though it may
 // disappear as more keys are sampled) and should be initiated by the caller,
 // which can call MaybeSplitKey to retrieve the suggested key.
-func (d *Decider) Record(now time.Time, n int, span func() roachpb.Span) bool {
+func (d *Decider) Record(ctx context.Context, now time.Time, n int, span func() roachpb.Span) bool {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
-	return d.recordLocked(now, n, span)
+	return d.recordLocked(ctx, now, n, span)
 }
 
-func (d *Decider) recordLocked(now time.Time, n int, span func() roachpb.Span) bool {
+func (d *Decider) recordLocked(
+	ctx context.Context, now time.Time, n int, span func() roachpb.Span,
+) bool {
 	d.mu.count += int64(n)
 
 	// First compute requests per second since the last check.
@@ -137,9 +159,28 @@ func (d *Decider) recordLocked(now time.Time, n int, span func() roachpb.Span) b
 		if s.Key != nil {
 			d.mu.splitFinder.Record(span(), d.intn)
 		}
-		if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval && d.mu.splitFinder.Ready(now) && d.mu.splitFinder.Key() != nil {
-			d.mu.lastSplitSuggestion = now
-			return true
+		if d.mu.splitFinder.Ready(now) {
+			if d.mu.splitFinder.Key() != nil {
+				if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval {
+					d.mu.lastSplitSuggestion = now
+					return true
+				}
+			} else {
+				if now.Sub(d.mu.lastNoSplitKeyLoggingMetrics) > minNoSplitKeyLoggingMetricsInterval {
+					d.mu.lastNoSplitKeyLoggingMetrics = now
+					insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained := d.mu.splitFinder.NoSplitKeyCause()
+					if insufficientCounters < splitKeySampleSize {
+						popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency()
+						if popularKeyFrequency >= splitKeyThreshold {
+							d.loadSplitterMetrics.PopularKeyCount.Inc(1)
+						}
+						d.loadSplitterMetrics.NoSplitKeyCount.Inc(1)
+						log.KvDistribution.Infof(ctx,
+							"No split key found: insufficient counters = %d, imbalance = %d, too many contained = %d, imbalance and too many contained = %d, most popular key occurs in %d%% of samples",
+							insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained, int(popularKeyFrequency*100))
+					}
+				}
+			}
 		}
 	}
 	return false
@@ -156,22 +197,22 @@ func (d *Decider) RecordMax(now time.Time, qps float64) {
 }
 
 // LastQPS returns the most recent QPS measurement.
-func (d *Decider) LastQPS(now time.Time) float64 {
+func (d *Decider) LastQPS(ctx context.Context, now time.Time) float64 {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
-	d.recordLocked(now, 0, nil) // force QPS computation
+	d.recordLocked(ctx, now, 0, nil) // force QPS computation
 	return d.mu.lastQPS
 }
 
 // MaxQPS returns the maximum QPS measurement recorded over the retention
 // period. If the Decider has not been recording for a full retention period,
 // the method returns false.
-func (d *Decider) MaxQPS(now time.Time) (float64, bool) {
+func (d *Decider) MaxQPS(ctx context.Context, now time.Time) (float64, bool) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
-	d.recordLocked(now, 0, nil) // force QPS computation
+	d.recordLocked(ctx, now, 0, nil) // force QPS computation
 	return d.mu.maxQPS.maxQPS(now, d.qpsRetention())
 }
 
@@ -180,13 +221,13 @@ func (d *Decider) MaxQPS(now time.Time) (float64, bool) {
 // or if it wasn't able to determine a suitable split key.
 //
 // It is legal to call MaybeSplitKey at any time.
-func (d *Decider) MaybeSplitKey(now time.Time) roachpb.Key {
+func (d *Decider) MaybeSplitKey(ctx context.Context, now time.Time) roachpb.Key {
 	var key roachpb.Key
 
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
-	d.recordLocked(now, 0, nil)
+	d.recordLocked(ctx, now, 0, nil)
 	if d.mu.splitFinder != nil && d.mu.splitFinder.Ready(now) {
 		// We've found a key to split at. This key might be in the middle of a
 		// SQL row. If we fail to rectify that, we'll cause SQL crashes:
@@ -240,6 +281,7 @@ func (d *Decider) Reset(now time.Time) {
 	d.mu.maxQPS.reset(now, d.qpsRetention())
 	d.mu.splitFinder = nil
 	d.mu.lastSplitSuggestion = time.Time{}
+	d.mu.lastNoSplitKeyLoggingMetrics = time.Time{}
 }
 
 // maxQPSTracker collects a series of queries-per-second measurement samples and
diff --git a/pkg/kv/kvserver/split/decider_test.go b/pkg/kv/kvserver/split/decider_test.go
index 3ea1821c3734..6db6a2e4223a 100644
--- a/pkg/kv/kvserver/split/decider_test.go
+++ b/pkg/kv/kvserver/split/decider_test.go
@@ -11,6 +11,7 @@
 package split
 
 import (
+	"context"
 	"math"
 	"math/rand"
 	"testing"
@@ -20,6 +21,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
 	"github.com/cockroachdb/cockroach/pkg/util/encoding"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/cockroachdb/cockroach/pkg/util/metric"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -38,7 +40,10 @@ func TestDecider(t *testing.T) {
 	intn := rand.New(rand.NewSource(12)).Intn
 
 	var d Decider
-	Init(&d, intn, func() float64 { return 10.0 }, func() time.Duration { return 2 * time.Second })
+	Init(&d, intn, func() float64 { return 10.0 }, func() time.Duration { return 2 * time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
 
 	op := func(s string) func() roachpb.Span {
 		return func() roachpb.Span { return roachpb.Span{Key: roachpb.Key(s)} }
@@ -46,38 +51,38 @@ func TestDecider(t *testing.T) {
 
 	assertQPS := func(i int, expQPS float64) {
 		t.Helper()
-		qps := d.LastQPS(ms(i))
+		qps := d.LastQPS(context.Background(), ms(i))
 		assert.Equal(t, expQPS, qps)
 	}
 
 	assertMaxQPS := func(i int, expMaxQPS float64, expOK bool) {
 		t.Helper()
-		maxQPS, ok := d.MaxQPS(ms(i))
+		maxQPS, ok := d.MaxQPS(context.Background(), ms(i))
 		assert.Equal(t, expMaxQPS, maxQPS)
 		assert.Equal(t, expOK, ok)
 	}
 
-	assert.Equal(t, false, d.Record(ms(100), 1, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(100), 1, nil))
 	assertQPS(100, 0)
 	assertMaxQPS(100, 0, false)
 
 	assert.Equal(t, ms(100), d.mu.lastQPSRollover)
 	assert.EqualValues(t, 1, d.mu.count)
 
-	assert.Equal(t, false, d.Record(ms(400), 3, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(400), 3, nil))
 	assertQPS(100, 0)
 	assertQPS(700, 0)
 	assertMaxQPS(400, 0, false)
 
-	assert.Equal(t, false, d.Record(ms(300), 3, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(300), 3, nil))
 	assertQPS(100, 0)
 	assertMaxQPS(300, 0, false)
 
-	assert.Equal(t, false, d.Record(ms(900), 1, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(900), 1, nil))
 	assertQPS(0, 0)
 	assertMaxQPS(900, 0, false)
 
-	assert.Equal(t, false, d.Record(ms(1099), 1, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(1099), 1, nil))
 	assertQPS(0, 0)
 	assertMaxQPS(1099, 0, false)
 
@@ -86,7 +91,7 @@ func TestDecider(t *testing.T) {
 
 	// It won't engage because the duration between the rollovers is 1.1s, and
 	// we had 10 events over that interval.
-	assert.Equal(t, false, d.Record(ms(1200), 1, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(1200), 1, nil))
 	assertQPS(0, float64(10)/float64(1.1))
 	assert.Equal(t, ms(1200), d.mu.lastQPSRollover)
 	assertMaxQPS(1099, 0, false)
@@ -95,11 +100,11 @@ func TestDecider(t *testing.T) {
 
 	assert.Equal(t, nilFinder, d.mu.splitFinder)
 
-	assert.Equal(t, false, d.Record(ms(2199), 12, nil))
+	assert.Equal(t, false, d.Record(context.Background(), ms(2199), 12, nil))
 	assert.Equal(t, nilFinder, d.mu.splitFinder)
 
 	// 2200 is the next rollover point, and 12+1=13 qps should be computed.
-	assert.Equal(t, false, d.Record(ms(2200), 1, op("a")))
+	assert.Equal(t, false, d.Record(context.Background(), ms(2200), 1, op("a")))
 	assert.Equal(t, ms(2200), d.mu.lastQPSRollover)
 	assertQPS(0, float64(13))
 	assertMaxQPS(2200, 13, true)
@@ -111,7 +116,7 @@ func TestDecider(t *testing.T) {
 	// to split. We don't test the details of exactly when that happens because
 	// this is done in the finder tests.
 	tick := 2200
-	for o := op("a"); !d.Record(ms(tick), 11, o); tick += 1000 {
+	for o := op("a"); !d.Record(context.Background(), ms(tick), 11, o); tick += 1000 {
 		if tick/1000%2 == 0 {
 			o = op("z")
 		} else {
@@ -119,7 +124,7 @@ func TestDecider(t *testing.T) {
 		}
 	}
 
-	assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick)))
+	assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick)))
 
 	// We were told to split, but won't be told to split again for some time
 	// to avoid busy-looping on split attempts.
@@ -128,35 +133,35 @@ func TestDecider(t *testing.T) {
 		if i%2 != 0 {
 			o = op("a")
 		}
-		assert.False(t, d.Record(ms(tick), 11, o))
-		assert.True(t, d.LastQPS(ms(tick)) > 1.0)
+		assert.False(t, d.Record(context.Background(), ms(tick), 11, o))
+		assert.True(t, d.LastQPS(context.Background(), ms(tick)) > 1.0)
 		// Even though the split key remains.
-		assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick+999)))
+		assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick+999)))
 		tick += 1000
 	}
 	// But after minSplitSuggestionInterval of ticks, we get another one.
-	assert.True(t, d.Record(ms(tick), 11, op("a")))
+	assert.True(t, d.Record(context.Background(), ms(tick), 11, op("a")))
 	assertQPS(tick, float64(11))
 	assertMaxQPS(tick, 11, true)
 
 	// Split key suggestion vanishes once qps drops.
 	tick += 1000
-	assert.False(t, d.Record(ms(tick), 9, op("a")))
-	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick)))
+	assert.False(t, d.Record(context.Background(), ms(tick), 9, op("a")))
+	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick)))
 	assert.Equal(t, nilFinder, d.mu.splitFinder)
 
 	// Hammer a key with writes above threshold. There shouldn't be a split
 	// since everyone is hitting the same key and load can't be balanced.
 	for i := 0; i < 1000; i++ {
-		assert.False(t, d.Record(ms(tick), 11, op("q")))
+		assert.False(t, d.Record(context.Background(), ms(tick), 11, op("q")))
 		tick += 1000
 	}
 	assert.True(t, d.mu.splitFinder.Ready(ms(tick)))
-	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick)))
+	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick)))
 
 	// But the finder keeps sampling to adapt to changing workload...
 	for i := 0; i < 1000; i++ {
-		assert.False(t, d.Record(ms(tick), 11, op("p")))
+		assert.False(t, d.Record(context.Background(), ms(tick), 11, op("p")))
 		tick += 1000
 	}
 
@@ -168,7 +173,7 @@ func TestDecider(t *testing.T) {
 	// Since the new workload is also not partitionable, nothing changes in
 	// the decision.
 	assert.True(t, d.mu.splitFinder.Ready(ms(tick)))
-	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(ms(tick)))
+	assert.Equal(t, roachpb.Key(nil), d.MaybeSplitKey(context.Background(), ms(tick)))
 
 	// Get the decider engaged again so that we can test Reset().
 	for i := 0; i < 1000; i++ {
@@ -176,16 +181,16 @@ func TestDecider(t *testing.T) {
 		if i%2 != 0 {
 			o = op("a")
 		}
-		d.Record(ms(tick), 11, o)
+		d.Record(context.Background(), ms(tick), 11, o)
 		tick += 500
 	}
 
 	// The finder wants to split, until Reset is called, at which point it starts
 	// back up at zero.
 	assert.True(t, d.mu.splitFinder.Ready(ms(tick)))
-	assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(ms(tick)))
+	assert.Equal(t, roachpb.Key("z"), d.MaybeSplitKey(context.Background(), ms(tick)))
 	d.Reset(ms(tick))
-	assert.Nil(t, d.MaybeSplitKey(ms(tick)))
+	assert.Nil(t, d.MaybeSplitKey(context.Background(), ms(tick)))
 	assert.Nil(t, d.mu.splitFinder)
 }
 
@@ -194,11 +199,14 @@ func TestDecider_MaxQPS(t *testing.T) {
 	intn := rand.New(rand.NewSource(11)).Intn
 
 	var d Decider
-	Init(&d, intn, func() float64 { return 100.0 }, func() time.Duration { return 10 * time.Second })
+	Init(&d, intn, func() float64 { return 100.0 }, func() time.Duration { return 10 * time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
 
 	assertMaxQPS := func(i int, expMaxQPS float64, expOK bool) {
 		t.Helper()
-		maxQPS, ok := d.MaxQPS(ms(i))
+		maxQPS, ok := d.MaxQPS(context.Background(), ms(i))
 		assert.Equal(t, expMaxQPS, maxQPS)
 		assert.Equal(t, expOK, ok)
 	}
@@ -206,22 +214,22 @@ func TestDecider_MaxQPS(t *testing.T) {
 	assertMaxQPS(1000, 0, false)
 
 	// Record a large number of samples.
-	d.Record(ms(1500), 5, nil)
-	d.Record(ms(2000), 5, nil)
-	d.Record(ms(4500), 1, nil)
-	d.Record(ms(5000), 15, nil)
-	d.Record(ms(5500), 2, nil)
-	d.Record(ms(8000), 5, nil)
-	d.Record(ms(10000), 9, nil)
+	d.Record(context.Background(), ms(1500), 5, nil)
+	d.Record(context.Background(), ms(2000), 5, nil)
+	d.Record(context.Background(), ms(4500), 1, nil)
+	d.Record(context.Background(), ms(5000), 15, nil)
+	d.Record(context.Background(), ms(5500), 2, nil)
+	d.Record(context.Background(), ms(8000), 5, nil)
+	d.Record(context.Background(), ms(10000), 9, nil)
 
 	assertMaxQPS(10000, 0, false)
 	assertMaxQPS(11000, 17, true)
 
 	// Record more samples with a lower QPS.
-	d.Record(ms(12000), 1, nil)
-	d.Record(ms(13000), 4, nil)
-	d.Record(ms(15000), 2, nil)
-	d.Record(ms(19000), 3, nil)
+	d.Record(context.Background(), ms(12000), 1, nil)
+	d.Record(context.Background(), ms(13000), 4, nil)
+	d.Record(context.Background(), ms(15000), 2, nil)
+	d.Record(context.Background(), ms(19000), 3, nil)
 
 	assertMaxQPS(20000, 4.5, true)
 	assertMaxQPS(21000, 4, true)
@@ -237,7 +245,10 @@ func TestDeciderCallsEnsureSafeSplitKey(t *testing.T) {
 	intn := rand.New(rand.NewSource(11)).Intn
 
 	var d Decider
-	Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second })
+	Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
 
 	baseKey := keys.SystemSQLCodec.TablePrefix(51)
 	for i := 0; i < 4; i++ {
@@ -253,10 +264,10 @@ func TestDeciderCallsEnsureSafeSplitKey(t *testing.T) {
 	var now time.Time
 	for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ {
 		now = now.Add(500 * time.Millisecond)
-		d.Record(now, 1, c0)
+		d.Record(context.Background(), now, 1, c0)
 		now = now.Add(500 * time.Millisecond)
-		d.Record(now, 1, c1)
-		k = d.MaybeSplitKey(now)
+		d.Record(context.Background(), now, 1, c1)
+		k = d.MaybeSplitKey(context.Background(), now)
 		if len(k) != 0 {
 			break
 		}
@@ -270,7 +281,10 @@ func TestDeciderIgnoresEnsureSafeSplitKeyOnError(t *testing.T) {
 	intn := rand.New(rand.NewSource(11)).Intn
 
 	var d Decider
-	Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second })
+	Init(&d, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
 
 	baseKey := keys.SystemSQLCodec.TablePrefix(51)
 	for i := 0; i < 4; i++ {
@@ -290,10 +304,10 @@ func TestDeciderIgnoresEnsureSafeSplitKeyOnError(t *testing.T) {
 	var now time.Time
 	for i := 0; i < 2*int(minSplitSuggestionInterval/time.Second); i++ {
 		now = now.Add(500 * time.Millisecond)
-		d.Record(now, 1, c0)
+		d.Record(context.Background(), now, 1, c0)
 		now = now.Add(500 * time.Millisecond)
-		d.Record(now, 1, c1)
-		k = d.MaybeSplitKey(now)
+		d.Record(context.Background(), now, 1, c1)
+		k = d.MaybeSplitKey(context.Background(), now)
 		if len(k) != 0 {
 			break
 		}
@@ -392,3 +406,70 @@ func TestMaxQPSTracker(t *testing.T) {
 	require.Equal(t, [6]float64{20, 27, 0, 0, 0, 0}, mt.windows)
 	require.Equal(t, 1, mt.curIdx)
 }
+
+func TestDeciderMetrics(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	intn := rand.New(rand.NewSource(11)).Intn
+	timeStart := 1000
+
+	var dPopular Decider
+	Init(&dPopular, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
+
+	// No split key, popular key
+	for i := 0; i < 20; i++ {
+		dPopular.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+	}
+	for i := 1; i <= 2000; i++ {
+		dPopular.Record(context.Background(), ms(timeStart+i*50), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+	}
+
+	assert.Equal(t, dPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(2))
+	assert.Equal(t, dPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2))
+
+	// No split key, not popular key
+	var dNotPopular Decider
+	Init(&dNotPopular, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
+	for i := 0; i < 20; i++ {
+		dNotPopular.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+	}
+	for i := 1; i <= 2000; i++ {
+		dNotPopular.Record(context.Background(), ms(timeStart+i*50), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(i))}
+		})
+	}
+
+	assert.Equal(t, dNotPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(0))
+	assert.Equal(t, dNotPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2))
+
+	// No split key, all insufficient counters
+	var dAllInsufficientCounters Decider
+	Init(&dAllInsufficientCounters, intn, func() float64 { return 1.0 }, func() time.Duration { return time.Second }, &LoadSplitterMetrics{
+		PopularKeyCount: metric.NewCounter(metric.Metadata{}),
+		NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
+	})
+	for i := 0; i < 20; i++ {
+		dAllInsufficientCounters.Record(context.Background(), ms(timeStart), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+	}
+	for i := 1; i <= 80; i++ {
+		dAllInsufficientCounters.Record(context.Background(), ms(timeStart+i*1000), 1, func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+	}
+
+	assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.PopularKeyCount.Count(), int64(0))
+	assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(0))
+}
diff --git a/pkg/kv/kvserver/split/finder.go b/pkg/kv/kvserver/split/finder.go
index 942216a36db6..e622e6a6e044 100644
--- a/pkg/kv/kvserver/split/finder.go
+++ b/pkg/kv/kvserver/split/finder.go
@@ -13,6 +13,7 @@ package split
 import (
 	"bytes"
 	"math"
+	"sort"
 	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -149,3 +150,53 @@ func (f *Finder) Key() roachpb.Key {
 	}
 	return f.samples[bestIdx].key
 }
+
+// NoSplitKeyCause iterates over all sampled candidate split keys and
+// determines the number of samples that don't pass each split key requirement
+// (e.g. insufficient counters, imbalance in left and right counters, too many
+// contained counters, or a combination of the last two).
+func (f *Finder) NoSplitKeyCause() (
+	insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained int,
+) {
+	for _, s := range f.samples {
+		if s.left+s.right+s.contained < splitKeyMinCounter {
+			insufficientCounters++
+		} else {
+			balanceScore := math.Abs(float64(s.left-s.right)) / float64(s.left+s.right)
+			imbalanceBool := balanceScore >= splitKeyThreshold
+			containedScore := float64(s.contained) / float64(s.left+s.right+s.contained)
+			tooManyContainedBool := containedScore >= splitKeyContainedThreshold
+			if imbalanceBool && !tooManyContainedBool {
+				imbalance++
+			} else if !imbalanceBool && tooManyContainedBool {
+				tooManyContained++
+			} else if imbalanceBool && tooManyContainedBool {
+				imbalanceAndTooManyContained++
+			}
+		}
+	}
+	return
+}
+
+// PopularKeyFrequency returns the percentage that the most popular key appears
+// in f.samples.
+func (f *Finder) PopularKeyFrequency() float64 {
+	sort.Slice(f.samples[:], func(i, j int) bool {
+		return bytes.Compare(f.samples[i].key, f.samples[j].key) < 0
+	})
+
+	currentKeyCount := 1
+	popularKeyCount := 1
+	for i := 1; i < len(f.samples); i++ {
+		if bytes.Equal(f.samples[i].key, f.samples[i-1].key) {
+			currentKeyCount++
+		} else {
+			currentKeyCount = 1
+		}
+		if popularKeyCount < currentKeyCount {
+			popularKeyCount = currentKeyCount
+		}
+	}
+
+	return float64(popularKeyCount) / float64(splitKeySampleSize)
+}
diff --git a/pkg/kv/kvserver/split/finder_test.go b/pkg/kv/kvserver/split/finder_test.go
index 6f6783868e78..0afd1844fc4c 100644
--- a/pkg/kv/kvserver/split/finder_test.go
+++ b/pkg/kv/kvserver/split/finder_test.go
@@ -13,6 +13,7 @@ package split
 import (
 	"bytes"
 	"context"
+	"math/rand"
 	"reflect"
 	"testing"
 
@@ -21,6 +22,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/cockroachdb/cockroach/pkg/util/stop"
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+	"github.com/stretchr/testify/assert"
 )
 
 // TestSplitFinderKey verifies the Key() method correctly
@@ -271,3 +273,129 @@ func TestSplitFinderRecorder(t *testing.T) {
 		}
 	}
 }
+
+func TestFinderNoSplitKeyCause(t *testing.T) {
+	samples := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		if i < 5 {
+			// insufficient counters
+			samples[idx] = sample{
+				key:       keys.SystemSQLCodec.TablePrefix(uint32(i)),
+				left:      0,
+				right:     0,
+				contained: splitKeyMinCounter - 1,
+			}
+		} else if i < 7 {
+			// imbalance
+			deviationLeft := rand.Intn(5)
+			deviationRight := rand.Intn(5)
+			samples[idx] = sample{
+				key:       keys.SystemSQLCodec.TablePrefix(uint32(i)),
+				left:      25 + deviationLeft,
+				right:     15 - deviationRight,
+				contained: int(max(float64(splitKeyMinCounter-40-deviationLeft+deviationRight), float64(40+deviationLeft-deviationRight))),
+			}
+		} else if i < 13 {
+			// imbalance
+			deviationLeft := rand.Intn(5)
+			deviationRight := rand.Intn(5)
+			samples[idx] = sample{
+				key:       keys.SystemSQLCodec.TablePrefix(uint32(i)),
+				left:      50 + deviationLeft,
+				right:     30 - deviationRight,
+				contained: int(max(float64(splitKeyMinCounter-80-deviationLeft+deviationRight), 0)),
+			}
+		} else {
+			// too many contained
+			contained := int(splitKeyMinCounter*splitKeyContainedThreshold + 1)
+			left := (splitKeyMinCounter - contained) / 2
+			samples[idx] = sample{
+				key:       keys.SystemSQLCodec.TablePrefix(uint32(i)),
+				left:      left,
+				right:     splitKeyMinCounter - left - contained,
+				contained: contained,
+			}
+		}
+	}
+
+	finder := NewFinder(timeutil.Now())
+	finder.samples = samples
+	insufficientCounters, imbalance, tooManyContained, imbalanceAndTooManyContained := finder.NoSplitKeyCause()
+	assert.Equal(t, 5, insufficientCounters, "unexpected insufficient counters")
+	assert.Equal(t, 6, imbalance, "unexpected imbalance counters")
+	assert.Equal(t, 7, tooManyContained, "unexpected too many contained counters")
+	assert.Equal(t, 2, imbalanceAndTooManyContained, "unexpected imbalance and too many contained counters")
+}
+
+func TestFinderPopularKeyFrequency(t *testing.T) {
+	uniqueKeySample := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		uniqueKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(uint32(i)),
+		}
+	}
+	twentyPercentPopularKeySample := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		var tableID uint32
+		if i <= 15 {
+			tableID = uint32(i / 3)
+		} else {
+			tableID = 6
+		}
+		twentyPercentPopularKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(tableID),
+		}
+	}
+	twentyFivePercentPopularKeySample := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		var tableID uint32
+		if i < 8 || i >= 13 {
+			tableID = uint32(i / 4)
+		} else {
+			tableID = 2
+		}
+		twentyFivePercentPopularKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(tableID),
+		}
+	}
+	fiftyPercentPopularKeySample := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		fiftyPercentPopularKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(uint32(i / 10)),
+		}
+	}
+	fiftyFivePercentPopularKeySample := [splitKeySampleSize]sample{}
+	for i, idx := range rand.Perm(splitKeySampleSize) {
+		var tableID uint32
+		if i >= 11 {
+			tableID = uint32(1)
+		}
+		fiftyFivePercentPopularKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(tableID),
+		}
+	}
+	sameKeySample := [splitKeySampleSize]sample{}
+	for _, idx := range rand.Perm(splitKeySampleSize) {
+		sameKeySample[idx] = sample{
+			key: keys.SystemSQLCodec.TablePrefix(0),
+		}
+	}
+
+	testCases := []struct {
+		samples                     [splitKeySampleSize]sample
+		expectedPopularKeyFrequency float64
+	}{
+		{uniqueKeySample, 0.05},
+		{twentyPercentPopularKeySample, 0.2},
+		{twentyFivePercentPopularKeySample, 0.25},
+		{fiftyPercentPopularKeySample, 0.5},
+		{fiftyFivePercentPopularKeySample, 0.55},
+		{sameKeySample, 1},
+	}
+	for i, test := range testCases {
+		finder := NewFinder(timeutil.Now())
+		finder.samples = test.samples
+		popularKeyFrequency := finder.PopularKeyFrequency()
+		assert.Equal(t, test.expectedPopularKeyFrequency, popularKeyFrequency, "unexpected popular key frequency in test %d", i)
+	}
+}
diff --git a/pkg/kv/kvserver/split_queue.go b/pkg/kv/kvserver/split_queue.go
index 514aaf98a48f..5eecb2d1c475 100644
--- a/pkg/kv/kvserver/split_queue.go
+++ b/pkg/kv/kvserver/split_queue.go
@@ -151,7 +151,7 @@ func (sq *splitQueue) shouldQueue(
 		repl.GetMaxBytes(), repl.shouldBackpressureWrites(), confReader)
 
 	if !shouldQ && repl.SplitByLoadEnabled() {
-		if splitKey := repl.loadBasedSplitter.MaybeSplitKey(timeutil.Now()); splitKey != nil {
+		if splitKey := repl.loadBasedSplitter.MaybeSplitKey(ctx, timeutil.Now()); splitKey != nil {
 			shouldQ, priority = true, 1.0 // default priority
 		}
 	}
@@ -233,10 +233,10 @@ func (sq *splitQueue) processAttempt(
 	}
 
 	now := timeutil.Now()
-	if splitByLoadKey := r.loadBasedSplitter.MaybeSplitKey(now); splitByLoadKey != nil {
+	if splitByLoadKey := r.loadBasedSplitter.MaybeSplitKey(ctx, now); splitByLoadKey != nil {
 		batchHandledQPS, _ := r.QueriesPerSecond()
 		raftAppliedQPS := r.WritesPerSecond()
-		splitQPS := r.loadBasedSplitter.LastQPS(now)
+		splitQPS := r.loadBasedSplitter.LastQPS(ctx, now)
 		reason := fmt.Sprintf(
 			"load at key %s (%.2f splitQPS, %.2f batches/sec, %.2f raft mutations/sec)",
 			splitByLoadKey,
diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go
index c91f7b93110a..62d38fde0654 100644
--- a/pkg/ts/catalog/chart_catalog.go
+++ b/pkg/ts/catalog/chart_catalog.go
@@ -741,6 +741,18 @@ var charts = []sectionDescription{
 			},
 		},
 	},
+	{
+		Organization: [][]string{{DistributionLayer, "Load", "Splitter"}},
+		Charts: []chartDescription{
+			{
+				Title: "Load Splitter",
+				Metrics: []string{
+					"kv.loadsplitter.popularkey",
+					"kv.loadsplitter.nosplitkey",
+				},
+			},
+		},
+	},
 	{
 		Organization: [][]string{
 			{DistributionLayer, "Split Queue"},