Skip to content

Commit

Permalink
tool: add more summaries to manifest summarize
Browse files Browse the repository at this point in the history
These were motivated by some disagg storage experimentation. There
are summaries for compacted in bytes and files. Also, a histogram
of file lifetimes when the file gets deleted. The classification of
what is a compaction is slightly improved, and there is some small
cleanup (unused field).

On a real manifest, in verbose mode (for the lifetime histograms) the
output looks like:
                        _______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
2024-05-01T12:00:00Z
     Ingest+Flush Bytes       16GB        .        .        .        .        .        .     16GB
   Ingest+Flush Bytes/s    4.5MB/s        .        .        .        .        .        .  4.5MB/s
      Compact Out Bytes       16GB        .     32GB     36GB     36GB     32GB        .    151GB
    Compact Out Bytes/s    4.5MB/s        .  9.0MB/s   10MB/s   10MB/s  9.1MB/s        .   43MB/s
     Compact In Bytes/s          .        .  9.0MB/s   10MB/s   10MB/s  9.5MB/s  7.7MB/s   47MB/s
     Compact In Files/s          .        .    2.8/s    2.1/s    1.2/s    0.6/s    0.2/s    6.8/s
<snip>
Lifetime histograms
2024-05-01T12:00:00Z
  L0: mean: 15s p25: 10s p50: 15s p75: 21s p90: 26s
  L2: mean: 14s p25: 6s p50: 13s p75: 22s p90: 28s
  L3: mean: 1m7s p25: 13s p50: 1m3s p75: 1m39s p90: 2m23s
  L4: mean: 5m53s p25: 15s p50: 5m19s p75: 10m7s p90: 14m23s
  L5: mean: 34m10s p25: 35s p50: 30m55s p75: 59m43s p90: 1h21m3s
  L6: mean: 3h34m3s p25: 2h42m7s p50: 4h24m31s p75: 5h7m11s p90: 5h41m19s
<snip>
  • Loading branch information
sumeerbhola committed May 15, 2024
1 parent 5e941d8 commit 864cb26
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 37 deletions.
128 changes: 101 additions & 27 deletions tool/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"slices"
"time"

"github.com/HdrHistogram/hdrhistogram-go"
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/humanize"
Expand Down Expand Up @@ -309,7 +310,12 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
type summaryBucket struct {
bytesAdded [manifest.NumLevels]uint64
bytesCompactOut [manifest.NumLevels]uint64
bytesCompactIn [manifest.NumLevels]uint64
filesCompactIn [manifest.NumLevels]uint64
fileLifetimeSec [manifest.NumLevels]*hdrhistogram.Histogram
}
// 365 days. Arbitrary.
const maxLifetimeSec = 365 * 24 * 60 * 60
var (
bve manifest.BulkVersionEdit
newestOverall time.Time
Expand All @@ -319,6 +325,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
)
bve.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
rr := record.NewReader(f, 0 /* logNum */)
numHistErrors := 0
for i := 0; ; i++ {
r, err := rr.Next()
if err == io.EOF {
Expand All @@ -336,9 +343,27 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
return err
}

veNewest, veOldest := newestOverall, newestOverall
// !isLikelyCompaction corresponds to flushes or ingests, that will be
// counted in bytesAdded. This is imperfect since ingests that excise can
// have deleted files without creating backing tables, and be counted as
// compactions. Also, copy compactions have no deleted files and create
// backing tables, so will be counted as flush/ingest.
//
// The bytesAdded metric overcounts since existing files virtualized by an
// ingest are also included.
//
// TODO(sumeer): this summarization needs a rewrite. We could do that
// after adding an enum to the VersionEdit to aid the summarization.
isLikelyCompaction := len(ve.NewFiles) > 0 && len(ve.DeletedFiles) > 0 && len(ve.CreatedBackingTables) == 0
isIntraL0Compaction := isLikelyCompaction && ve.NewFiles[0].Level == 0
veNewest := newestOverall
for _, nf := range ve.NewFiles {
_, seen := metadatas[nf.Meta.FileNum]
if seen && !isLikelyCompaction {
// Output error and continue processing as usual.
fmt.Fprintf(stdout, "error: flush/ingest has file that is already known %d size %s\n",
nf.Meta.FileNum, humanize.Bytes.Uint64(nf.Meta.Size))
}
metadatas[nf.Meta.FileNum] = nf.Meta
if nf.Meta.CreationTime == 0 {
continue
Expand All @@ -348,12 +373,6 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
if veNewest.Before(t) {
veNewest = t
}
// Only update the oldest if we haven't already seen this
// file; it might've been moved in which case the sstable's
// creation time is from when it was originally created.
if veOldest.After(t) && !seen {
veOldest = t
}
}
// Ratchet up the most recent timestamp we've seen.
if newestOverall.Before(veNewest) {
Expand All @@ -377,26 +396,36 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
buckets[bucketKey] = b
}

// Increase `bytesAdded` for any version edits that only add files.
// These are either flushes or ingests.
if len(ve.NewFiles) > 0 && len(ve.DeletedFiles) == 0 {
for _, nf := range ve.NewFiles {
for _, nf := range ve.NewFiles {
if !isLikelyCompaction {
b.bytesAdded[nf.Level] += nf.Meta.Size
} else if !isIntraL0Compaction {
b.bytesCompactIn[nf.Level] += nf.Meta.Size
b.filesCompactIn[nf.Level]++
}
continue
}

// Increase `bytesCompactOut` for the input level of any compactions
// that remove bytes from a level (excluding intra-L0 compactions).
// compactions.
destLevel := -1
if len(ve.NewFiles) > 0 {
destLevel = ve.NewFiles[0].Level
}
for dfe := range ve.DeletedFiles {
if dfe.Level != destLevel {
// Increase `bytesCompactOut` for the input level of any compactions
// that remove bytes from a level (excluding intra-L0 compactions).
if isLikelyCompaction && !isIntraL0Compaction && dfe.Level != manifest.NumLevels-1 {
b.bytesCompactOut[dfe.Level] += metadatas[dfe.FileNum].Size
}
meta, ok := metadatas[dfe.FileNum]
if m.verbose && ok && meta.CreationTime > 0 {
hist := b.fileLifetimeSec[dfe.Level]
if hist == nil {
hist = hdrhistogram.New(0, maxLifetimeSec, 1)
b.fileLifetimeSec[dfe.Level] = hist
}
lifetimeSec := int64((newestOverall.Sub(time.Unix(meta.CreationTime, 0).UTC())) / time.Second)
if lifetimeSec > maxLifetimeSec {
lifetimeSec = maxLifetimeSec
}
if err := hist.RecordValue(lifetimeSec); err != nil {
numHistErrors++
}
}
}
}

Expand All @@ -406,7 +435,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
return humanize.Bytes.Uint64(v).String()
}
formatRate := func(v uint64, dur time.Duration) string {
formatByteRate := func(v uint64, dur time.Duration) string {
if v == 0 {
return "."
}
Expand All @@ -416,6 +445,16 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
return humanize.Bytes.Uint64(uint64(float64(v)/secs)).String() + "/s"
}
formatRate := func(v uint64, dur time.Duration) string {
if v == 0 {
return "."
}
secs := dur.Seconds()
if secs == 0 {
secs = 1
}
return fmt.Sprintf("%.1f/s", float64(v)/secs)
}

if newestOverall.IsZero() {
fmt.Fprintf(stdout, "(no timestamps)\n")
Expand All @@ -432,7 +471,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}

if bi%10 == 0 {
fmt.Fprintf(stdout, " ")
fmt.Fprintf(stdout, " ")
fmt.Fprintf(stdout, "_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL\n")
}
fmt.Fprintf(stdout, "%s\n", bt.Format(time.RFC3339))
Expand All @@ -450,17 +489,19 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
format func(uint64, time.Duration) string
vals [manifest.NumLevels]uint64
}{
{"Ingest+Flush", formatUint64, bucket.bytesAdded},
{"Ingest+Flush", formatRate, bucket.bytesAdded},
{"Compact (out)", formatUint64, bucket.bytesCompactOut},
{"Compact (out)", formatRate, bucket.bytesCompactOut},
{"Ingest+Flush Bytes", formatUint64, bucket.bytesAdded},
{"Ingest+Flush Bytes/s", formatByteRate, bucket.bytesAdded},
{"Compact Out Bytes", formatUint64, bucket.bytesCompactOut},
{"Compact Out Bytes/s", formatByteRate, bucket.bytesCompactOut},
{"Compact In Bytes/s", formatByteRate, bucket.bytesCompactIn},
{"Compact In Files/s", formatRate, bucket.filesCompactIn},
}
for _, stat := range stats {
var sum uint64
for _, v := range stat.vals {
sum += v
}
fmt.Fprintf(stdout, "%20s %8s %8s %8s %8s %8s %8s %8s %8s\n",
fmt.Fprintf(stdout, "%23s %8s %8s %8s %8s %8s %8s %8s %8s\n",
stat.label,
stat.format(stat.vals[0], dur),
stat.format(stat.vals[1], dur),
Expand All @@ -473,13 +514,46 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
}
fmt.Fprintf(stdout, "%s\n", newestOverall.Format(time.RFC3339))

formatSec := func(sec int64) string {
return (time.Second * time.Duration(sec)).String()
}
if m.verbose {
fmt.Fprintf(stdout, "\nLifetime histograms\n")
for bi, bt := 0, oldestOverall; !bt.After(newestOverall); bi, bt = bi+1, bt.Truncate(m.summarizeDur).Add(m.summarizeDur) {
// Truncate the start time to calculate the bucket key, and
// retrieve the appropriate bucket.
bk := bt.Truncate(m.summarizeDur)
var bucket summaryBucket
if buckets[bk] != nil {
bucket = *buckets[bk]
}
fmt.Fprintf(stdout, "%s\n", bt.Format(time.RFC3339))
formatHist := func(level int, hist *hdrhistogram.Histogram) {
if hist == nil {
return
}
fmt.Fprintf(stdout, " L%d: mean: %s p25: %s p50: %s p75: %s p90: %s\n", level,
formatSec(int64(hist.Mean())), formatSec(hist.ValueAtPercentile(25)),
formatSec(hist.ValueAtPercentile(50)), formatSec(hist.ValueAtPercentile(75)),
formatSec(hist.ValueAtPercentile(90)))
}
for i := range bucket.fileLifetimeSec {
formatHist(i, bucket.fileLifetimeSec[i])
}
}
fmt.Fprintf(stdout, "%s\n", newestOverall.Format(time.RFC3339))
}
}

dur := newestOverall.Sub(oldestOverall)
fmt.Fprintf(stdout, "---\n")
fmt.Fprintf(stdout, "Estimated start time: %s\n", oldestOverall.Format(time.RFC3339))
fmt.Fprintf(stdout, "Estimated end time: %s\n", newestOverall.Format(time.RFC3339))
fmt.Fprintf(stdout, "Estimated duration: %s\n", dur.String())
if numHistErrors > 0 {
fmt.Fprintf(stdout, "Errors in lifetime histograms: %d\n", numHistErrors)
}

return nil
}
Expand Down
24 changes: 14 additions & 10 deletions tool/testdata/manifest_summarize
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ manifest summarize
./testdata/find-db/MANIFEST-000001
----
MANIFEST-000001
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
2023-12-12T18:55:00Z
Ingest+Flush 2.0KB . . . . . 671B 2.7KB
Ingest+Flush 2.0KB/s . . . . . 671B/s 2.7KB/s
Compact (out) 2.0KB . . . . . . 2.0KB
Compact (out) 2.0KB/s . . . . . . 2.0KB/s
Ingest+Flush Bytes 2.0KB . . . . . 671B 2.7KB
Ingest+Flush Bytes/s 2.0KB/s . . . . . 671B/s 2.7KB/s
Compact Out Bytes 2.0KB . . . . . . 2.0KB
Compact Out Bytes/s 2.0KB/s . . . . . . 2.0KB/s
Compact In Bytes/s . . . . . . 2.2KB/s 2.2KB/s
Compact In Files/s . . . . . . 3.0/s 3.0/s
2023-12-12T18:55:00Z
---
Estimated start time: 2023-12-12T18:55:00Z
Expand All @@ -32,12 +34,14 @@ manifest summarize
./testdata/mixed/MANIFEST-000001
----
MANIFEST-000001
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
2023-12-11T18:59:04Z
Ingest+Flush 1.0KB . . . . . . 1.0KB
Ingest+Flush 1.0KB/s . . . . . . 1.0KB/s
Compact (out) . . . . . . . .
Compact (out) . . . . . . . .
Ingest+Flush Bytes 1.0KB . . . . . . 1.0KB
Ingest+Flush Bytes/s 1.0KB/s . . . . . . 1.0KB/s
Compact Out Bytes . . . . . . . .
Compact Out Bytes/s . . . . . . . .
Compact In Bytes/s . . . . . . . .
Compact In Files/s . . . . . . . .
2023-12-11T18:59:04Z
---
Estimated start time: 2023-12-11T18:59:04Z
Expand Down

0 comments on commit 864cb26

Please sign in to comment.