Skip to content

Commit

Permalink
kvserver: add storage time-series metrics for level size and score
Browse files Browse the repository at this point in the history
Currently, the only way to infer the compaction score and heuristics is
to use the LSM printout from the logs (emitted once every ten minutes),
or to call the `/debug/lsm` endpoint manually, and track values over
time. This makes it difficult to debug issues retroactively.

Add two new sets of per-LSM-level time-series metrics for level size and
level score. These new metrics have names of the form
`storage.$LEVEL-level-{size,score}`.

Closes #88415.

Release note (ops change): Adds two new sets of per-LSM-level
time-series metrics, one for level size and another for level score.
These metrics are of the form `storage.$LEVEL-level-{size,score}`.
  • Loading branch information
nicktrav committed Sep 22, 2022
1 parent 30ba7c3 commit d41cce0
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 1 deletion.
32 changes: 31 additions & 1 deletion pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,20 @@ var metaRdbBytesIngested = storageLevelMetricMetadata(
metric.Unit_BYTES,
)

var metaRdbLevelSize = storageLevelMetricMetadata(
"level-size",
"Size of the SSTables in level %d",
"Bytes",
metric.Unit_BYTES,
)

var metaRdbLevelScores = storageLevelMetricMetadata(
"level-score",
"Compaction score of level %d",
"Score",
metric.Unit_COUNT,
)

var (
metaRdbWriteStalls = metric.Metadata{
Name: "storage.write-stalls",
Expand Down Expand Up @@ -1708,7 +1722,9 @@ type StoreMetrics struct {
RdbL0BytesFlushed *metric.Gauge
RdbL0Sublevels *metric.Gauge
RdbL0NumFiles *metric.Gauge
RdbBytesIngested [7]*metric.Gauge // idx = level
RdbBytesIngested [7]*metric.Gauge // idx = level
RdbLevelSize [7]*metric.Gauge // idx = level
RdbLevelScore [7]*metric.GaugeFloat64 // idx = level
RdbWriteStalls *metric.Gauge
RdbWriteStallNanos *metric.Gauge

Expand Down Expand Up @@ -2137,6 +2153,8 @@ func newTenantsStorageMetrics() *TenantsStorageMetrics {
func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
storeRegistry := metric.NewRegistry()
rdbBytesIngested := storageLevelGaugeSlice(metaRdbBytesIngested)
rdbLevelSize := storageLevelGaugeSlice(metaRdbLevelSize)
rdbLevelScore := storageLevelGaugeFloat64Slice(metaRdbLevelScores)

sm := &StoreMetrics{
registry: storeRegistry,
Expand Down Expand Up @@ -2219,6 +2237,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RdbL0Sublevels: metric.NewGauge(metaRdbL0Sublevels),
RdbL0NumFiles: metric.NewGauge(metaRdbL0NumFiles),
RdbBytesIngested: rdbBytesIngested,
RdbLevelSize: rdbLevelSize,
RdbLevelScore: rdbLevelScore,
RdbWriteStalls: metric.NewGauge(metaRdbWriteStalls),
RdbWriteStallNanos: metric.NewGauge(metaRdbWriteStallNanos),

Expand Down Expand Up @@ -2523,6 +2543,8 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) {
sm.RdbL0BytesFlushed.Update(int64(m.Levels[0].BytesFlushed))
for level, stats := range m.Levels {
sm.RdbBytesIngested[level].Update(int64(stats.BytesIngested))
sm.RdbLevelSize[level].Update(stats.Size)
sm.RdbLevelScore[level].Update(stats.Score)
}
}

Expand Down Expand Up @@ -2577,3 +2599,11 @@ func storageLevelGaugeSlice(sl [7]metric.Metadata) [7]*metric.Gauge {
}
return gs
}

func storageLevelGaugeFloat64Slice(sl [7]metric.Metadata) [7]*metric.GaugeFloat64 {
var gs [7]*metric.GaugeFloat64
for i := range sl {
gs[i] = metric.NewGaugeFloat64(sl[i])
}
return gs
}
25 changes: 25 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -3004,6 +3004,31 @@ var charts = []sectionDescription{
Metrics: []string{"storage.write-stall-nanos"},
AxisLabel: "Duration (nanos)",
},
{
Title: "Bytes Used Per Level",
Metrics: []string{
"storage.l0-level-size",
"storage.l1-level-size",
"storage.l2-level-size",
"storage.l3-level-size",
"storage.l4-level-size",
"storage.l5-level-size",
"storage.l6-level-size",
},
AxisLabel: "Bytes",
},
{
Title: "Compaction Score Per Level",
Metrics: []string{
"storage.l0-level-score",
"storage.l1-level-score",
"storage.l2-level-score",
"storage.l3-level-score",
"storage.l4-level-score",
"storage.l5-level-score",
"storage.l6-level-score",
},
},
},
},
{
Expand Down

0 comments on commit d41cce0

Please sign in to comment.