Skip to content

Commit

Permalink
Merge #119028
Browse files Browse the repository at this point in the history
119028: kvserver: split upreplication from recovery metrics r=kvoli a=andrewbaptist

Previously both raft recovery and upreplication snapshots were counted as recovery metrics. This PR splits them into two separate categories.

Epic: none
Fixes: #115729

Release note (ops change): Adds 2 new metrics
range.snapshots.upreplication.rcvd-bytes and
range.snapshots.upreplication.sent-bytes. It also changes the meaning of range.snapshots.recovery.rcvd-bytes and
range.snapshots.recovery.sent-bytes to only include raft snapshots.

Co-authored-by: Andrew Baptist <[email protected]>
  • Loading branch information
craig[bot] and andrewbaptist committed Apr 25, 2024
2 parents 744da4c + def6033 commit d772813
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 8 deletions.
6 changes: 4 additions & 2 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@
<tr><td>STORAGE</td><td>range.snapshots.rcvd-bytes</td><td>Number of snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.rebalancing.rcvd-bytes</td><td>Number of rebalancing snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.rebalancing.sent-bytes</td><td>Number of rebalancing snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.rcvd-bytes</td><td>Number of recovery snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.sent-bytes</td><td>Number of recovery snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.rcvd-bytes</td><td>Number of raft recovery snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.sent-bytes</td><td>Number of raft recovery snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-failed</td><td>Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred</td><td>Snapshots</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-in-progress</td><td>Number of non-empty snapshots being received</td><td>Snapshots</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-queue</td><td>Number of snapshots queued to receive</td><td>Snapshots</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
Expand All @@ -513,6 +513,8 @@
<tr><td>STORAGE</td><td>range.snapshots.sent-bytes</td><td>Number of snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.unknown.rcvd-bytes</td><td>Number of unknown snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.unknown.sent-bytes</td><td>Number of unknown snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.upreplication.rcvd-bytes</td><td>Number of upreplication snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.upreplication.sent-bytes</td><td>Number of upreplication snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.splits</td><td>Number of range splits</td><td>Range Ops</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>rangekeybytes</td><td>Number of bytes taken up by range keys (e.g. MVCC range tombstones)</td><td>Storage</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>STORAGE</td><td>rangekeycount</td><td>Count of all range keys (e.g. MVCC range tombstones)</td><td>Keys</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
Expand Down
20 changes: 18 additions & 2 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -1057,13 +1057,25 @@ var (
}
metaRangeSnapshotRecoveryRcvdBytes = metric.Metadata{
Name: "range.snapshots.recovery.rcvd-bytes",
Help: "Number of recovery snapshot bytes received",
Help: "Number of raft recovery snapshot bytes received",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotRecoverySentBytes = metric.Metadata{
Name: "range.snapshots.recovery.sent-bytes",
Help: "Number of recovery snapshot bytes sent",
Help: "Number of raft recovery snapshot bytes sent",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotUpreplicationRcvdBytes = metric.Metadata{
Name: "range.snapshots.upreplication.rcvd-bytes",
Help: "Number of upreplication snapshot bytes received",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotUpreplicationSentBytes = metric.Metadata{
Name: "range.snapshots.upreplication.sent-bytes",
Help: "Number of upreplication snapshot bytes sent",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
Expand Down Expand Up @@ -2652,6 +2664,8 @@ type StoreMetrics struct {
RangeSnapshotUnknownSentBytes *metric.Counter
RangeSnapshotRecoveryRcvdBytes *metric.Counter
RangeSnapshotRecoverySentBytes *metric.Counter
RangeSnapshotUpreplicationRcvdBytes *metric.Counter
RangeSnapshotUpreplicationSentBytes *metric.Counter
RangeSnapshotRebalancingRcvdBytes *metric.Counter
RangeSnapshotRebalancingSentBytes *metric.Counter
RangeSnapshotRecvFailed *metric.Counter
Expand Down Expand Up @@ -3355,6 +3369,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RangeSnapshotUnknownSentBytes: metric.NewCounter(metaRangeSnapshotUnknownSentBytes),
RangeSnapshotRecoveryRcvdBytes: metric.NewCounter(metaRangeSnapshotRecoveryRcvdBytes),
RangeSnapshotRecoverySentBytes: metric.NewCounter(metaRangeSnapshotRecoverySentBytes),
RangeSnapshotUpreplicationRcvdBytes: metric.NewCounter(metaRangeSnapshotUpreplicationRcvdBytes),
RangeSnapshotUpreplicationSentBytes: metric.NewCounter(metaRangeSnapshotUpreplicationSentBytes),
RangeSnapshotRebalancingRcvdBytes: metric.NewCounter(metaRangeSnapshotRebalancingRcvdBytes),
RangeSnapshotRebalancingSentBytes: metric.NewCounter(metaRangeSnapshotRebalancingSentBytes),
RangeSnapshotRecvFailed: metric.NewCounter(metaRangeSnapshotRecvFailed),
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -3349,7 +3349,7 @@ func (r *Replica) followerSendSnapshot(
// it is used for rebalance.
// See AllocatorAction.Priority
if header.SenderQueuePriority > 0 {
r.store.metrics.RangeSnapshotRecoverySentBytes.Inc(inc)
r.store.metrics.RangeSnapshotUpreplicationSentBytes.Inc(inc)
} else {
r.store.metrics.RangeSnapshotRebalancingSentBytes.Inc(inc)
}
Expand Down
4 changes: 1 addition & 3 deletions pkg/kv/kvserver/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -1452,12 +1452,10 @@ func (s *Store) receiveSnapshot(
} else if header.SenderQueueName == kvserverpb.SnapshotRequest_OTHER {
s.metrics.RangeSnapshotRebalancingRcvdBytes.Inc(inc)
} else {
// TODO(baptist): This logic is pretty messy. Consider refactoring all the
// snapshot related metrics.
// Replicate queue does both types, so split based on priority.
// See AllocatorAction.Priority
if header.SenderQueuePriority > 0 {
s.metrics.RangeSnapshotRecoveryRcvdBytes.Inc(inc)
s.metrics.RangeSnapshotUpreplicationRcvdBytes.Inc(inc)
} else {
s.metrics.RangeSnapshotRebalancingRcvdBytes.Inc(inc)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@ export default function (props: GraphDashboardProps) {
sources={storeIDsForNode(storeIDsByNodeID, nid)}
nonNegativeRate
/>
<Metric
key={nid}
name="cr.store.range.snapshots.upreplication.rcvd-bytes"
title={
nodeDisplayName(nodeDisplayNameByID, nid) + "-upreplication"
}
sources={storeIDsForNode(storeIDsByNodeID, nid)}
nonNegativeRate
/>
</>
))}
</Axis>
Expand Down

0 comments on commit d772813

Please sign in to comment.