From 0e70f5d841d41455be92f6c7d3c4e00467b7bdf1 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 19 Sep 2022 18:34:42 -0400 Subject: [PATCH] runtime: export total GC Assist ns in MemStats and GCStats At a high level, the runtime garbage collector can impact user goroutine latency in two ways. The first is that it pauses all goroutines during its stop-the-world sweep termination and mark termination phases. The second is that it backpressures memory allocations by instructing user goroutines to assist with scanning and marking in response to a high allocation rate. There is plenty of observability into the first of these sources of user-visible latency. There is significantly less observability into the second. As a result, it is often more difficult to diagnose latency problems due to over-assist (e.g. #14812, #27732, #40225). To this point, the ways to determine that GC assist was a problem were to use execution tracing or to use GODEBUG=gctrace=1 tracing, neither of which is easy to access programmatically in a running system. CPU profiles also give some insight, but are rarely as instructive as one might expect because heavy GC assist time is scattered across a profile. Notice even in https://tip.golang.org/doc/gc-guide, the guidance on recognizing and remedying performance problems due to GC assist is sparse. This commit adds a counter to the MemStats and GCStats structs called AssistTotalNs, which tracks the cumulative nanoseconds in GC assist since the program started. This provides a new form of observability into GC assist delays, and one that can be manipulated programmatically. There's more work to be done in this area. This feels like a reasonable first step. --- src/runtime/debug/garbage.go | 8 +++++--- src/runtime/debug/garbage_test.go | 3 +++ src/runtime/malloc_test.go | 4 ++-- src/runtime/mgc.go | 1 + src/runtime/mstats.go | 17 +++++++++++++++-- src/runtime/pprof/pprof.go | 2 ++ 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/runtime/debug/garbage.go b/src/runtime/debug/garbage.go index 0f53928fe8530f..f8117270992b52 100644 --- a/src/runtime/debug/garbage.go +++ b/src/runtime/debug/garbage.go @@ -18,6 +18,7 @@ type GCStats struct { Pause []time.Duration // pause history, most recent first PauseEnd []time.Time // pause end times history, most recent first PauseQuantiles []time.Duration + AssistTotal time.Duration // total assist across all collections } // ReadGCStats reads statistics about garbage collection into stats. @@ -35,8 +36,8 @@ func ReadGCStats(stats *GCStats) { // for end times history and as a temporary buffer for // computing quantiles. const maxPause = len(((*runtime.MemStats)(nil)).PauseNs) - if cap(stats.Pause) < 2*maxPause+3 { - stats.Pause = make([]time.Duration, 2*maxPause+3) + if cap(stats.Pause) < 2*maxPause+4 { + stats.Pause = make([]time.Duration, 2*maxPause+4) } // readGCStats fills in the pause and end times histories (up to @@ -46,10 +47,11 @@ func ReadGCStats(stats *GCStats) { // nanoseconds, so the pauses and the total pause time do not need // any conversion. readGCStats(&stats.Pause) - n := len(stats.Pause) - 3 + n := len(stats.Pause) - 4 stats.LastGC = time.Unix(0, int64(stats.Pause[n])) stats.NumGC = int64(stats.Pause[n+1]) stats.PauseTotal = stats.Pause[n+2] + stats.AssistTotal = stats.Pause[n+3] n /= 2 // buffer holds pauses and end times stats.Pause = stats.Pause[:n] diff --git a/src/runtime/debug/garbage_test.go b/src/runtime/debug/garbage_test.go index 7213bbe641dd6e..058a9a49aff54a 100644 --- a/src/runtime/debug/garbage_test.go +++ b/src/runtime/debug/garbage_test.go @@ -36,6 +36,9 @@ func TestReadGCStats(t *testing.T) { if stats.PauseTotal != time.Duration(mstats.PauseTotalNs) { t.Errorf("stats.PauseTotal = %d, but mstats.PauseTotalNs = %d", stats.PauseTotal, mstats.PauseTotalNs) } + if stats.AssistTotal != time.Duration(mstats.AssistTotalNs) { + t.Errorf("stats.AssistTotal = %d, but mstats.AssistTotalNs = %d", stats.AssistTotal, mstats.AssistTotalNs) + } if stats.LastGC.UnixNano() != int64(mstats.LastGC) { t.Errorf("stats.LastGC.UnixNano = %d, but mstats.LastGC = %d", stats.LastGC.UnixNano(), mstats.LastGC) } diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index cc2007604ddb6f..8bf8d099b9b8ec 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -62,7 +62,7 @@ func TestMemStats(t *testing.T) { return fmt.Errorf("want %v", x) } } - // Of the uint fields, HeapReleased, HeapIdle can be 0. + // Of the uint fields, HeapReleased, HeapIdle, AssistTotalNs can be 0. // PauseTotalNs can be 0 if timer resolution is poor. fields := map[string][]func(any) error{ "Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)}, @@ -74,7 +74,7 @@ func TestMemStats(t *testing.T) { "MCacheInuse": {nz, le(1e10)}, "MCacheSys": {nz, le(1e10)}, "BuckHashSys": {nz, le(1e10)}, "GCSys": {nz, le(1e10)}, "OtherSys": {nz, le(1e10)}, "NextGC": {nz, le(1e10)}, "LastGC": {nz}, - "PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil, + "PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil, "AssistTotalNs": {le(1e10)}, "NumGC": {nz, le(1e9)}, "NumForcedGC": {nz, le(1e9)}, "GCCPUFraction": {le(0.99)}, "EnableGC": {eq(true)}, "DebugGC": {eq(false)}, "BySize": nil, diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 6a9d278187fb79..183a4c68db839a 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1003,6 +1003,7 @@ func gcMarkTermination() { memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) memstats.pause_total_ns += uint64(work.pauseNS) + memstats.assist_total_ns += uint64(gcController.assistTime) sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) // We report idle marking time below, but omit it from the diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 3a5273f3619cf6..ba8f49135eb2d2 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -36,6 +36,7 @@ type mstats struct { pause_total_ns uint64 pause_ns [256]uint64 // circular buffer of recent gc pause lengths pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) + assist_total_ns uint64 numgc uint32 numforcedgc uint32 // number of user-forced GCs gc_cpu_fraction float64 // fraction of CPU time used by GC @@ -275,6 +276,16 @@ type MemStats struct { // last pause in a cycle. PauseEnd [256]uint64 + // AssistTotalNs is the cumulative nanoseconds in GC assist since + // the program started. + // + // During GC assist, user goroutines yield some of their time to + // assist the GC with scanning and marking in response to a high + // allocation rate. A large amount of cumulative time spent here + // indicates that the application is likely out-pacing the GC + // with respect to how fast it is allocating. + AssistTotalNs uint64 + // NumGC is the number of completed GC cycles. NumGC uint32 @@ -530,6 +541,7 @@ func readmemstats_m(stats *MemStats) { stats.PauseTotalNs = memstats.pause_total_ns stats.PauseNs = memstats.pause_ns stats.PauseEnd = memstats.pause_end + stats.AssistTotalNs = memstats.assist_total_ns stats.NumGC = memstats.numgc stats.NumForcedGC = memstats.numforcedgc stats.GCCPUFraction = memstats.gc_cpu_fraction @@ -556,7 +568,7 @@ func readGCStats(pauses *[]uint64) { func readGCStats_m(pauses *[]uint64) { p := *pauses // Calling code in runtime/debug should make the slice large enough. - if cap(p) < len(memstats.pause_ns)+3 { + if cap(p) < len(memstats.pause_ns)+4 { throw("short slice passed to readGCStats") } @@ -582,8 +594,9 @@ func readGCStats_m(pauses *[]uint64) { p[n+n] = memstats.last_gc_unix p[n+n+1] = uint64(memstats.numgc) p[n+n+2] = memstats.pause_total_ns + p[n+n+3] = memstats.assist_total_ns unlock(&mheap_.lock) - *pauses = p[:n+n+3] + *pauses = p[:n+n+4] } // flushmcache flushes the mcache of allp[i]. diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 3a7191e092e7b9..2b6b53b5899b62 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -636,8 +636,10 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) fmt.Fprintf(w, "# LastGC = %d\n", s.LastGC) + fmt.Fprintf(w, "# PauseTotalNs = %d\n", s.PauseTotalNs) fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) fmt.Fprintf(w, "# PauseEnd = %d\n", s.PauseEnd) + fmt.Fprintf(w, "# AssistTotalNs = %d\n", s.AssistTotalNs) fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) fmt.Fprintf(w, "# NumForcedGC = %d\n", s.NumForcedGC) fmt.Fprintf(w, "# GCCPUFraction = %v\n", s.GCCPUFraction)