Skip to content

Commit

Permalink
kvserver: split upreplication from recovery metrics
Browse files Browse the repository at this point in the history
Previously both raft recovery and upreplication snapshots were counted
as recovery metrics. This PR splits them into two separate categories.

Epic: none
Fixes: #115729

Release note (ops change): Adds 2 new metrics
range.snapshots.upreplication.rcvd-bytes and
range.snapshots.upreplication.sent-bytes. It also changes the meaning of
range.snapshots.recovery.rcvd-bytes and
range.snapshots.recovery.sent-bytes to only include raft snapshots.
Additionally it adds the new line to the "Snapshot Data Received"
graph.
  • Loading branch information
andrewbaptist committed Feb 9, 2024
1 parent 91b49df commit 14ae779
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 8 deletions.
6 changes: 4 additions & 2 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@
<tr><td>STORAGE</td><td>range.snapshots.rcvd-bytes</td><td>Number of snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.rebalancing.rcvd-bytes</td><td>Number of rebalancing snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.rebalancing.sent-bytes</td><td>Number of rebalancing snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.rcvd-bytes</td><td>Number of recovery snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.sent-bytes</td><td>Number of recovery snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.rcvd-bytes</td><td>Number of raft recovery snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recovery.sent-bytes</td><td>Number of raft recovery snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-failed</td><td>Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred</td><td>Snapshots</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-in-progress</td><td>Number of non-empty snapshots being received</td><td>Snapshots</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.recv-queue</td><td>Number of snapshots queued to receive</td><td>Snapshots</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
Expand All @@ -513,6 +513,8 @@
<tr><td>STORAGE</td><td>range.snapshots.sent-bytes</td><td>Number of snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.unknown.rcvd-bytes</td><td>Number of unknown snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.unknown.sent-bytes</td><td>Number of unknown snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.upreplication.rcvd-bytes</td><td>Number of upreplication snapshot bytes received</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.snapshots.upreplication.sent-bytes</td><td>Number of upreplication snapshot bytes sent</td><td>Bytes</td><td>COUNTER</td><td>BYTES</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>range.splits</td><td>Number of range splits</td><td>Range Ops</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>rangekeybytes</td><td>Number of bytes taken up by range keys (e.g. MVCC range tombstones)</td><td>Storage</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>STORAGE</td><td>rangekeycount</td><td>Count of all range keys (e.g. MVCC range tombstones)</td><td>Keys</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
Expand Down
20 changes: 18 additions & 2 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -1056,13 +1056,25 @@ var (
}
metaRangeSnapshotRecoveryRcvdBytes = metric.Metadata{
Name: "range.snapshots.recovery.rcvd-bytes",
Help: "Number of recovery snapshot bytes received",
Help: "Number of raft recovery snapshot bytes received",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotRecoverySentBytes = metric.Metadata{
Name: "range.snapshots.recovery.sent-bytes",
Help: "Number of recovery snapshot bytes sent",
Help: "Number of raft recovery snapshot bytes sent",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotUpreplicationRcvdBytes = metric.Metadata{
Name: "range.snapshots.upreplication.rcvd-bytes",
Help: "Number of upreplication snapshot bytes received",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotUpreplicationSentBytes = metric.Metadata{
Name: "range.snapshots.upreplication.sent-bytes",
Help: "Number of upreplication snapshot bytes sent",
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
Expand Down Expand Up @@ -2636,6 +2648,8 @@ type StoreMetrics struct {
RangeSnapshotUnknownSentBytes *metric.Counter
RangeSnapshotRecoveryRcvdBytes *metric.Counter
RangeSnapshotRecoverySentBytes *metric.Counter
RangeSnapshotUpreplicationRcvdBytes *metric.Counter
RangeSnapshotUpreplicationSentBytes *metric.Counter
RangeSnapshotRebalancingRcvdBytes *metric.Counter
RangeSnapshotRebalancingSentBytes *metric.Counter
RangeSnapshotRecvFailed *metric.Counter
Expand Down Expand Up @@ -3334,6 +3348,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RangeSnapshotUnknownSentBytes: metric.NewCounter(metaRangeSnapshotUnknownSentBytes),
RangeSnapshotRecoveryRcvdBytes: metric.NewCounter(metaRangeSnapshotRecoveryRcvdBytes),
RangeSnapshotRecoverySentBytes: metric.NewCounter(metaRangeSnapshotRecoverySentBytes),
RangeSnapshotUpreplicationRcvdBytes: metric.NewCounter(metaRangeSnapshotUpreplicationRcvdBytes),
RangeSnapshotUpreplicationSentBytes: metric.NewCounter(metaRangeSnapshotUpreplicationSentBytes),
RangeSnapshotRebalancingRcvdBytes: metric.NewCounter(metaRangeSnapshotRebalancingRcvdBytes),
RangeSnapshotRebalancingSentBytes: metric.NewCounter(metaRangeSnapshotRebalancingSentBytes),
RangeSnapshotRecvFailed: metric.NewCounter(metaRangeSnapshotRecvFailed),
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -3349,7 +3349,7 @@ func (r *Replica) followerSendSnapshot(
// it is used for rebalance.
// See AllocatorAction.Priority
if header.SenderQueuePriority > 0 {
r.store.metrics.RangeSnapshotRecoverySentBytes.Inc(inc)
r.store.metrics.RangeSnapshotUpreplicationSentBytes.Inc(inc)
} else {
r.store.metrics.RangeSnapshotRebalancingSentBytes.Inc(inc)
}
Expand Down
4 changes: 1 addition & 3 deletions pkg/kv/kvserver/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -1448,12 +1448,10 @@ func (s *Store) receiveSnapshot(
} else if header.SenderQueueName == kvserverpb.SnapshotRequest_OTHER {
s.metrics.RangeSnapshotRebalancingRcvdBytes.Inc(inc)
} else {
// TODO(baptist): This logic is pretty messy. Consider refactoring all the
// snapshot related metrics.
// Replicate queue does both types, so split based on priority.
// See AllocatorAction.Priority
if header.SenderQueuePriority > 0 {
s.metrics.RangeSnapshotRecoveryRcvdBytes.Inc(inc)
s.metrics.RangeSnapshotUpreplicationRcvdBytes.Inc(inc)
} else {
s.metrics.RangeSnapshotRebalancingRcvdBytes.Inc(inc)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@ export default function (props: GraphDashboardProps) {
sources={storeIDsForNode(storeIDsByNodeID, nid)}
nonNegativeRate
/>
<Metric
key={nid}
name="cr.store.range.snapshots.upreplication.rcvd-bytes"
title={
nodeDisplayName(nodeDisplayNameByID, nid) + "-upreplication"
}
sources={storeIDsForNode(storeIDsByNodeID, nid)}
nonNegativeRate
/>
</>
))}
</Axis>
Expand Down

0 comments on commit 14ae779

Please sign in to comment.