Skip to content

Commit

Permalink
kv: Add stats for delegate snapshots
Browse files Browse the repository at this point in the history
Fixes: cockroachdb#98243
This PR adds two new stats for delegate snapshots to track failure of
sending snapshots. There are failures either before data is transferred
or after the snapshot is received. Both stats are useful.

Epic: none

Release note: None
  • Loading branch information
andrewbaptist committed Apr 7, 2023
1 parent 22ab7ed commit d11d509
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 35 deletions.
16 changes: 16 additions & 0 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,18 @@ var (
Measurement: "Bytes",
Unit: metric.Unit_BYTES,
}
metaRangeSnapshotRecvFailed = metric.Metadata{
Name: "range.snapshots.recv-failed",
Help: "Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred",
Measurement: "Snapshots",
Unit: metric.Unit_COUNT,
}
metaRangeSnapshotRecvUnusable = metric.Metadata{
Name: "range.snapshots.send-failed",
Help: "Number of range snapshot that were fully transmitted but determined to be unusable",
Measurement: "Snapshots",
Unit: metric.Unit_COUNT,
}
metaRangeSnapshotSendQueueLength = metric.Metadata{
Name: "range.snapshots.send-queue",
Help: "Number of snapshots queued to send",
Expand Down Expand Up @@ -2038,6 +2050,8 @@ type StoreMetrics struct {
RangeSnapshotRecoverySentBytes *metric.Counter
RangeSnapshotRebalancingRcvdBytes *metric.Counter
RangeSnapshotRebalancingSentBytes *metric.Counter
RangeSnapshotRecvFailed *metric.Counter
RangeSnapshotRecvUnusable *metric.Counter

// Range snapshot queue metrics.
RangeSnapshotSendQueueLength *metric.Gauge
Expand Down Expand Up @@ -2639,6 +2653,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RangeSnapshotRecoverySentBytes: metric.NewCounter(metaRangeSnapshotRecoverySentBytes),
RangeSnapshotRebalancingRcvdBytes: metric.NewCounter(metaRangeSnapshotRebalancingRcvdBytes),
RangeSnapshotRebalancingSentBytes: metric.NewCounter(metaRangeSnapshotRebalancingSentBytes),
RangeSnapshotRecvFailed: metric.NewCounter(metaRangeSnapshotRecvFailed),
RangeSnapshotRecvUnusable: metric.NewCounter(metaRangeSnapshotRecvUnusable),
RangeSnapshotSendQueueLength: metric.NewGauge(metaRangeSnapshotSendQueueLength),
RangeSnapshotRecvQueueLength: metric.NewGauge(metaRangeSnapshotRecvQueueLength),
RangeSnapshotSendInProgress: metric.NewGauge(metaRangeSnapshotSendInProgress),
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2839,7 +2839,7 @@ func (r *Replica) sendSnapshotUsingDelegate(
ctx, 2, "delegating snapshot transmission attempt %v for %v to %v", n+1, recipient, sender,
)

selfDelegate := n == len(senders)-1
selfDelegate := sender.StoreID == r.StoreID()

// On the last attempt, always queue on the delegate to time out naturally.
if selfDelegate {
Expand Down
Loading

0 comments on commit d11d509

Please sign in to comment.