From d5571a275e44cd27727f390209c046dfb5d8518e Mon Sep 17 00:00:00 2001 From: Kai Sun Date: Mon, 12 Sep 2022 17:33:57 -0400 Subject: [PATCH] kvserver: add admission.io.overload metric Resolves #87424. Previously, only the unnormalized values of the LSM L0 sub-level and file counts is exposed externally, not the store's IOThreshold. This was inadequate because it is tedious to normalize and compare the LSM L0 sub-level and file counts (as they require dividing by different numbers). To address this, this patch adds a metric `admission.io.overload` tracking the store's IOThreshold. Release note (ops change): Added a metric `admission.io.overload` which tracks the store's IOThreshold. --- pkg/kv/kvserver/metrics.go | 14 ++++++++++++++ pkg/kv/kvserver/store.go | 8 ++++++++ pkg/ts/catalog/chart_catalog.go | 6 ++++++ pkg/util/admission/admissionpb/io_threshold.go | 9 ++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 501e8e2f3750..9b238ec09886 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -1023,6 +1023,18 @@ The messages are dropped to help these replicas to recover from I/O overload.`, Unit: metric.Unit_COUNT, } + metaIOOverload = metric.Metadata{ + Name: "admission.io.overload", + Help: `1-normalized float to pause replication to raft group followers if its value exceeds a given threshold. + +This threshold is the admission.kv.pause_replication_io_threshold cluster setting +(pause replication feature is disabled if this setting is 0, feature is disabled by default); +see pkg/kv/kvserver/replica_raft_overload.go for more details. Composed of LSM L0 +sub-level and file counts.`, + Measurement: "Threshold", + Unit: metric.Unit_COUNT, + } + // Replica queue metrics. metaMVCCGCQueueSuccesses = metric.Metadata{ Name: "queue.gc.process.success", @@ -1770,6 +1782,7 @@ type StoreMetrics struct { RaftPausedFollowerCount *metric.Gauge RaftPausedFollowerDroppedMsgs *metric.Counter + IOOverload *metric.GaugeFloat64 RaftCoalescedHeartbeatsPending *metric.Gauge @@ -2293,6 +2306,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { RaftPausedFollowerCount: metric.NewGauge(metaRaftFollowerPaused), RaftPausedFollowerDroppedMsgs: metric.NewCounter(metaRaftPausedFollowerDroppedMsgs), + IOOverload: metric.NewGaugeFloat64(metaIOOverload), // This Gauge measures the number of heartbeats queued up just before // the queue is cleared, to avoid flapping wildly. diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index f038393854ee..1c88c88b3ede 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -3127,6 +3127,7 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { overreplicatedRangeCount int64 behindCount int64 pausedFollowerCount int64 + ioOverload float64 slowRaftProposalCount int64 locks int64 @@ -3152,6 +3153,12 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { uninitializedCount = int64(len(s.mu.uninitReplicas)) s.mu.RUnlock() + // TODO(kaisun314,kvoli): move this to a per-store admission control metrics + // struct when available. See pkg/util/admission/granter.go. + s.ioThreshold.Lock() + ioOverload, _ = s.ioThreshold.t.Score() + s.ioThreshold.Unlock() + newStoreReplicaVisitor(s).Visit(func(rep *Replica) bool { metrics := rep.Metrics(ctx, now, livenessMap, clusterNodes) if metrics.Leader { @@ -3249,6 +3256,7 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { s.metrics.OverReplicatedRangeCount.Update(overreplicatedRangeCount) s.metrics.RaftLogFollowerBehindCount.Update(behindCount) s.metrics.RaftPausedFollowerCount.Update(pausedFollowerCount) + s.metrics.IOOverload.Update(ioOverload) s.metrics.SlowRaftRequests.Update(slowRaftProposalCount) var averageLockHoldDurationNanos int64 diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index fc77a12e07c1..a60e7830bcfa 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -3503,6 +3503,12 @@ var charts = []sectionDescription{ "admission.granter.io_tokens_exhausted_duration.kv", }, }, + { + Title: "IO Overload - IOThreshold Score", + Metrics: []string{ + "admission.io.overload", + }, + }, }, }, { diff --git a/pkg/util/admission/admissionpb/io_threshold.go b/pkg/util/admission/admissionpb/io_threshold.go index ab088938aaca..4024ff92c900 100644 --- a/pkg/util/admission/admissionpb/io_threshold.go +++ b/pkg/util/admission/admissionpb/io_threshold.go @@ -32,7 +32,14 @@ import ( // max number of compactions). And we will need to incorporate overload due to // disk bandwidth bottleneck. func (iot *IOThreshold) Score() (float64, bool) { - if iot == nil { + // iot.L0NumFilesThreshold and iot.L0NumSubLevelsThreshold are initialized to + // 0 by default, and there appears to be a period of time before we update + // iot.L0NumFilesThreshold and iot.L0NumSubLevelsThreshold to their + // appropriate values. During this period of time, to prevent dividing by 0 + // below and Score() returning NaN, we check if iot.L0NumFilesThreshold or + // iot.L0NumSubLevelsThreshold are 0 (i.e. currently uninitialized) and + // return 0 as the score if so. + if iot == nil || iot.L0NumFilesThreshold == 0 || iot.L0NumSubLevelsThreshold == 0 { return 0, false } f := math.Max(