db: improve metrics formatting

This change improves the formatting of the Pebble metrics: - we no longer try to artificially fit non-tabular information in a single table - all byte figures end in B/KB/MB/etc - counts and corresponding sizes are grouped together - flushable ingests information moved under ingestions These metrics are periodically logged and can be obtained through the db console as well. Fixes cockroachdb#1473.
RaduBerinde · Jul 13, 2023 · f233ee7 · f233ee7
1 parent 02413ad
commit f233ee7
Show file tree

Hide file tree

Showing 7 changed files with 390 additions and 400 deletions.
diff --git a/internal/humanize/humanize.go b/internal/humanize/humanize.go
@@ -41,6 +41,12 @@ var IEC = config{1024, []string{" B", " K", " M", " G", " T", " P", " E"}}
 // SI produces human readable representations of integer values in SI units.
 var SI = config{1000, []string{"", " K", " M", " G", " T", " P", " E"}}
 
+// Bytes produces human readable representations of byte values in IEC units.
+var Bytes = config{1024, []string{"B", "KB", "MB", "GB", "TB", "PB", "EB"}}
+
+// Count produces human readable representations of unitless values in SI units.
+var Count = config{1000, []string{"", "K", "M", "G", "T", "P", "E"}}
+
 // Int64 produces a human readable representation of the value.
 func (c *config) Int64(s int64) FormattedString {
 	if s < 0 {

diff --git a/metrics.go b/metrics.go
@@ -6,7 +6,6 @@ package pebble
 
 import (
 	"fmt"
-	"strings"
 	"time"
 
 	"github.com/cockroachdb/pebble/internal/base"
@@ -29,10 +28,10 @@ type FilterMetrics = sstable.FilterMetrics
 type ThroughputMetric = base.ThroughputMetric
 
 func formatCacheMetrics(w redact.SafePrinter, m *CacheMetrics, name redact.SafeString) {
-	w.Printf("%7s %9s %7s %6.1f%%  (score == hit-rate)\n",
+	w.Printf("%s: %s (%s)  hit rate: %.1f%%\n",
 		name,
-		humanize.SI.Int64(m.Count),
-		humanize.IEC.Int64(m.Size),
+		humanize.Count.Int64(m.Count),
+		humanize.Bytes.Int64(m.Size),
 		redact.Safe(hitRate(m.Hits, m.Misses)))
 }
 
@@ -126,22 +125,25 @@ func (m *LevelMetrics) WriteAmp() float64 {
 func (m *LevelMetrics) format(
 	w redact.SafePrinter, score redact.SafeValue, includeValueBlocksSize bool,
 ) {
-	w.Printf("%9d %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7d %7.1f",
+	//w.Printf("%9d %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7d %7.1f",
+
+	w.Printf("| %9d %7s | %7s | %7s | %7s %7s | %7s %7s | %7s %7s | %7s | %7d %7.1f",
 		redact.Safe(m.NumFiles),
-		humanize.IEC.Int64(m.Size),
+		humanize.Bytes.Int64(m.Size),
 		score,
-		humanize.IEC.Uint64(m.BytesIn),
-		humanize.IEC.Uint64(m.BytesIngested),
-		humanize.SI.Uint64(m.TablesIngested),
-		humanize.IEC.Uint64(m.BytesMoved),
-		humanize.SI.Uint64(m.TablesMoved),
-		humanize.IEC.Uint64(m.BytesFlushed+m.BytesCompacted),
-		humanize.SI.Uint64(m.TablesFlushed+m.TablesCompacted),
-		humanize.IEC.Uint64(m.BytesRead),
+		humanize.Bytes.Uint64(m.BytesIn),
+		humanize.Count.Uint64(m.TablesIngested),
+		humanize.Bytes.Uint64(m.BytesIngested),
+		humanize.Count.Uint64(m.TablesMoved),
+		humanize.Bytes.Uint64(m.BytesMoved),
+		humanize.Count.Uint64(m.TablesFlushed+m.TablesCompacted),
+		humanize.Bytes.Uint64(m.BytesFlushed+m.BytesCompacted),
+		humanize.Bytes.Uint64(m.BytesRead),
 		redact.Safe(m.Sublevels),
 		redact.Safe(m.WriteAmp()))
+
 	if includeValueBlocksSize {
-		w.Printf(" %7s\n", humanize.IEC.Uint64(m.Additional.ValueBlocksSize))
+		w.Printf("| %7s\n", humanize.Bytes.Uint64(m.Additional.ValueBlocksSize))
 	} else {
 		w.SafeString("\n")
 	}
@@ -364,27 +366,6 @@ func (m *Metrics) Total() LevelMetrics {
 
 const notApplicable = redact.SafeString("-")
 
-func (m *Metrics) formatWAL(w redact.SafePrinter) {
-	var writeAmp float64
-	if m.WAL.BytesIn > 0 {
-		writeAmp = float64(m.WAL.BytesWritten) / float64(m.WAL.BytesIn)
-	}
-	w.Printf("    WAL %9d %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7.1f\n",
-		redact.Safe(m.WAL.Files),
-		humanize.Uint64(m.WAL.Size),
-		notApplicable,
-		humanize.Uint64(m.WAL.BytesIn),
-		notApplicable,
-		notApplicable,
-		notApplicable,
-		notApplicable,
-		humanize.Uint64(m.WAL.BytesWritten),
-		notApplicable,
-		notApplicable,
-		notApplicable,
-		redact.Safe(writeAmp))
-}
-
 // String pretty-prints the metrics, showing a line for the WAL, a line per-level, and
 // a total:
 //
@@ -436,18 +417,23 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
 	// RedactableStrings. https://github.com/cockroachdb/redact/issues/17
 
 	haveValueBlocks := false
-	var valueBlocksHeading redact.SafeString
 	for level := 0; level < numLevels; level++ {
 		if m.Levels[level].Additional.ValueBlocksSize > 0 {
 			haveValueBlocks = true
-			valueBlocksHeading = "__val-bl"
 			break
 		}
 	}
+	// print a string and optionally include a second string if we have value blocks.
+	print := func(s redact.SafeString, valBlocks redact.SafeString) {
+		w.SafeString(s)
+		if haveValueBlocks {
+			w.SafeString(valBlocks)
+		}
+		w.SafeString("\n")
+	}
+	print("  level |            tables |   score |      in |          ingest |            move |           write |    read |   r-amp   w-amp", "|  val-bl")
+	print("--------+-------------------+---------+---------+-----------------+-----------------+-----------------+---------+----------------", "+--------")
 	var total LevelMetrics
-	w.Printf("__level_____count____size___score______in__ingest(sz_cnt)"+
-		"____move(sz_cnt)___write(sz_cnt)____read___r-amp___w-amp%s\n", valueBlocksHeading)
-	m.formatWAL(w)
 	for level := 0; level < numLevels; level++ {
 		l := &m.Levels[level]
 		w.Printf("%7d ", redact.Safe(level))
@@ -470,57 +456,65 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
 	w.SafeString("  total ")
 	total.format(w, notApplicable, haveValueBlocks)
 
-	w.Printf("  flush %9d %31s %7d %7d  %s\n",
-		redact.Safe(m.Flush.Count),
-		humanize.IEC.Uint64(m.Flush.AsIngestBytes),
-		redact.Safe(m.Flush.AsIngestTableCount),
-		redact.Safe(m.Flush.AsIngestCount),
-		redact.SafeString(`(ingest = tables-ingested, move = ingested-as-flushable)`))
-	w.Printf("compact %9d %7s %7s %7d %s %s\n",
+	print("---------------------------------------------------------------------------------------------------------------------------------", "---------")
+
+	w.Printf("WAL: %d (%s)  in: %s  written: %s (%.0f%% overhead)\n",
+		redact.Safe(m.WAL.Files),
+		humanize.Bytes.Uint64(m.WAL.Size),
+		humanize.Bytes.Uint64(m.WAL.BytesIn),
+		humanize.Bytes.Uint64(m.WAL.BytesWritten),
+		redact.Safe(percent(int64(m.WAL.BytesWritten)-int64(m.WAL.BytesIn), int64(m.WAL.BytesIn))))
+
+	w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
+
+	w.Printf("Compactions: %d  estimated debt: %s  in progress: %d (%s)\n",
 		redact.Safe(m.Compact.Count),
-		humanize.IEC.Uint64(m.Compact.EstimatedDebt),
-		humanize.IEC.Int64(m.Compact.InProgressBytes),
+		humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
 		redact.Safe(m.Compact.NumInProgress),
-		redact.SafeString(strings.Repeat(" ", 24)),
-		redact.SafeString(`(size == estimated-debt, score = in-progress-bytes, in = num-in-progress)`))
-	w.Printf("  ctype %9d %7d %7d %7d %7d %7d %7d  %s\n",
+		humanize.Bytes.Int64(m.Compact.InProgressBytes))
+
+	w.Printf("             default: %d  delete: %d  elision: %d  move: %d  read: %d  rewrite: %d  multi-level: %d\n",
 		redact.Safe(m.Compact.DefaultCount),
 		redact.Safe(m.Compact.DeleteOnlyCount),
 		redact.Safe(m.Compact.ElisionOnlyCount),
 		redact.Safe(m.Compact.MoveCount),
 		redact.Safe(m.Compact.ReadCount),
 		redact.Safe(m.Compact.RewriteCount),
-		redact.Safe(m.Compact.MultiLevelCount),
-		redact.SafeString(`(default, delete, elision, move, read, rewrite, multi-level)`))
-	w.Printf(" memtbl %9d %7s\n",
+		redact.Safe(m.Compact.MultiLevelCount))
+
+	w.Printf("MemTables: %d (%s)  zombie: %d (%s)\n",
 		redact.Safe(m.MemTable.Count),
-		humanize.IEC.Uint64(m.MemTable.Size))
-	w.Printf("zmemtbl %9d %7s\n",
+		humanize.Bytes.Uint64(m.MemTable.Size),
 		redact.Safe(m.MemTable.ZombieCount),
-		humanize.IEC.Uint64(m.MemTable.ZombieSize))
-	w.Printf("   ztbl %9d %7s\n",
+		humanize.Bytes.Uint64(m.MemTable.ZombieSize))
+
+	w.Printf("Zombie tables: %d (%s)\n",
 		redact.Safe(m.Table.ZombieCount),
-		humanize.IEC.Uint64(m.Table.ZombieSize))
-	formatCacheMetrics(w, &m.BlockCache, "bcache")
-	formatCacheMetrics(w, &m.TableCache, "tcache")
-	w.Printf("  snaps %9d %7s %7d  (score == earliest seq num)\n",
+		humanize.Bytes.Uint64(m.Table.ZombieSize))
+
+	formatCacheMetrics(w, &m.BlockCache, "Block cache")
+	formatCacheMetrics(w, &m.TableCache, "Table cache")
+
+	w.Printf("Snapshots: %d  earliest seq num: %d\n",
 		redact.Safe(m.Snapshots.Count),
-		notApplicable,
 		redact.Safe(m.Snapshots.EarliestSeqNum))
-	w.Printf(" titers %9d\n", redact.Safe(m.TableIters))
-	w.Printf(" filter %9s %7s %6.1f%%  (score == utility)\n",
-		notApplicable,
-		notApplicable,
-		redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
-	w.Printf(" ingest %9d\n",
+
+	w.Printf("Table iters: %d\n", redact.Safe(m.TableIters))
+	w.Printf("Filter utility: %.1f%%\n", redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
+	w.Printf("Ingestions: %d  as flushable: %d (%s in %d tables)\n",
 		redact.Safe(m.Ingest.Count),
-	)
+		redact.Safe(m.Flush.AsIngestCount),
+		humanize.Bytes.Uint64(m.Flush.AsIngestBytes),
+		redact.Safe(m.Flush.AsIngestTableCount))
 }
 
 func hitRate(hits, misses int64) float64 {
-	sum := hits + misses
-	if sum == 0 {
+	return percent(hits, hits+misses)
+}
+
+func percent(numerator, denominator int64) float64 {
+	if denominator == 0 {
 		return 0
 	}
-	return 100 * float64(hits) / float64(sum)
+	return 100 * float64(numerator) / float64(denominator)
 }
diff --git a/metrics_test.go b/metrics_test.go
@@ -18,7 +18,7 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func TestMetricsFormat(t *testing.T) {
+func exampleMetrics() Metrics {
 	var m Metrics
 	m.BlockCache.Size = 1
 	m.BlockCache.Count = 2
@@ -79,34 +79,7 @@ func TestMetricsFormat(t *testing.T) {
 		l.TablesIngested = base + 12
 		l.TablesMoved = base + 13
 	}
-
-	const expected = `
-__level_____count____size___score______in__ingest(sz_cnt)____move(sz_cnt)___write(sz_cnt)____read___r-amp___w-amp
-    WAL        22    24 B       -    25 B       -       -       -       -    26 B       -       -       -     1.0
-      0       101   102 B  103.00   104 B   104 B     112   106 B     113   217 B     221   107 B       1     2.1
-      1       201   202 B  203.00   204 B   204 B     212   206 B     213   417 B     421   207 B       2     2.0
-      2       301   302 B  303.00   304 B   304 B     312   306 B     313   617 B     621   307 B       3     2.0
-      3       401   402 B  403.00   404 B   404 B     412   406 B     413   817 B     821   407 B       4     2.0
-      4       501   502 B  503.00   504 B   504 B     512   506 B     513  1017 B   1.0 K   507 B       5     2.0
-      5       601   602 B  603.00   604 B   604 B     612   606 B     613   1.2 K   1.2 K   607 B       6     2.0
-      6       701   702 B       -   704 B   704 B     712   706 B     713   1.4 K   1.4 K   707 B       7     2.0
-  total      2807   2.7 K       -   2.8 K   2.8 K   2.9 K   2.8 K   2.9 K   8.4 K   5.7 K   2.8 K      28     3.0
-  flush         8                            34 B      35      36  (ingest = tables-ingested, move = ingested-as-flushable)
-compact         5     6 B     7 B       2                          (size == estimated-debt, score = in-progress-bytes, in = num-in-progress)
-  ctype        27      28      29      30      31      32      33  (default, delete, elision, move, read, rewrite, multi-level)
- memtbl        12    11 B
-zmemtbl        14    13 B
-   ztbl        16    15 B
- bcache         2     1 B   42.9%  (score == hit-rate)
- tcache        18    17 B   48.7%  (score == hit-rate)
-  snaps         4       -    1024  (score == earliest seq num)
- titers        21
- filter         -       -   47.4%  (score == utility)
- ingest        27
-`
-	if s := "\n" + m.String(); expected != s {
-		t.Fatalf("expected%s\nbut found%s", expected, s)
-	}
+	return m
 }
 
 func TestMetrics(t *testing.T) {
@@ -143,6 +116,17 @@ func TestMetrics(t *testing.T) {
 
 	datadriven.RunTest(t, "testdata/metrics", func(t *testing.T, td *datadriven.TestData) string {
 		switch td.Cmd {
+		case "example":
+			m := exampleMetrics()
+			res := m.String()
+
+			// Nothing in the metrics should be redacted.
+			redacted := string(redact.Sprintf("%s", &m).Redact())
+			if redacted != res {
+				td.Fatalf(t, "redacted metrics don't match\nunredacted:\n%s\nredacted:%s\n", res, redacted)
+			}
+			return res
+
 		case "batch":
 			b := d.NewBatch()
 			if err := runBatchDefineCmd(td, b); err != nil {
@@ -270,38 +254,6 @@ func TestMetrics(t *testing.T) {
 	})
 }
 
-func TestMetricsRedact(t *testing.T) {
-	const expected = `
-__level_____count____size___score______in__ingest(sz_cnt)____move(sz_cnt)___write(sz_cnt)____read___r-amp___w-amp
-    WAL         0     0 B       -     0 B       -       -       -       -     0 B       -       -       -     0.0
-      0         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      1         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      2         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      3         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      4         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      5         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-      6         0     0 B       -     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-  total         0     0 B       -     0 B     0 B       0     0 B       0     0 B       0     0 B       0     0.0
-  flush         0                             0 B       0       0  (ingest = tables-ingested, move = ingested-as-flushable)
-compact         0     0 B     0 B       0                          (size == estimated-debt, score = in-progress-bytes, in = num-in-progress)
-  ctype         0       0       0       0       0       0       0  (default, delete, elision, move, read, rewrite, multi-level)
- memtbl         0     0 B
-zmemtbl         0     0 B
-   ztbl         0     0 B
- bcache         0     0 B    0.0%  (score == hit-rate)
- tcache         0     0 B    0.0%  (score == hit-rate)
-  snaps         0       -       0  (score == earliest seq num)
- titers         0
- filter         -       -    0.0%  (score == utility)
- ingest         0
-`
-
-	got := redact.Sprintf("%s", &Metrics{}).Redact()
-	if s := "\n" + got; expected != s {
-		t.Fatalf("expected%s\nbut found%s", expected, s)
-	}
-}
-
 func TestMetricsWAmpDisableWAL(t *testing.T) {
 	d, err := Open("", &Options{FS: vfs.NewMem(), DisableWAL: true})
 	require.NoError(t, err)