From cfe0ae0b7070048ceda021988b01fbc6a8589a1b Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 23 Oct 2024 16:28:52 +0000 Subject: [PATCH] [release-branch.go1.23] runtime: uphold goroutine profile invariants in coroswitch Goroutine profiles require checking in with the profiler before any goroutine starts running. coroswitch is a place where a goroutine may start running, but where we do not check in with the profiler, which leads to crashes. Fix this by checking in with the profiler the same way execute does. For #69998. Fixes #70001. Change-Id: Idef6dd31b70a73dd1c967b56c307c7a46a26ba73 Reviewed-on: https://go-review.googlesource.com/c/go/+/622016 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI (cherry picked from commit 2a98a1849f059ffa94ab23a1ab7d8fa0fd0b48dd) Reviewed-on: https://go-review.googlesource.com/c/go/+/622375 Reviewed-by: Michael Pratt --- src/runtime/coro.go | 12 +++++++++ src/runtime/pprof/pprof_test.go | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/runtime/coro.go b/src/runtime/coro.go index 30ada455e4985e..d93817f92f1caa 100644 --- a/src/runtime/coro.go +++ b/src/runtime/coro.go @@ -208,6 +208,18 @@ func coroswitch_m(gp *g) { // directly if possible. setGNoWB(&mp.curg, gnext) setMNoWB(&gnext.m, mp) + + // Synchronize with any out-standing goroutine profile. We're about to start + // executing, and an invariant of the profiler is that we tryRecordGoroutineProfile + // whenever a goroutine is about to start running. + // + // N.B. We must do this before transitioning to _Grunning but after installing gnext + // in curg, so that we have a valid curg for allocation (tryRecordGoroutineProfile + // may allocate). + if goroutineProfile.active { + tryRecordGoroutineProfile(gnext, nil, osyield) + } + if !gnext.atomicstatus.CompareAndSwap(_Gwaiting, _Grunning) { // The CAS failed: use casgstatus, which will take care of // coordinating with the garbage collector about the state change. diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index d16acf54dabd98..41952ff147c4b7 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -15,6 +15,7 @@ import ( "internal/syscall/unix" "internal/testenv" "io" + "iter" "math" "math/big" "os" @@ -1754,6 +1755,50 @@ func TestGoroutineProfileConcurrency(t *testing.T) { } } +// Regression test for #69998. +func TestGoroutineProfileCoro(t *testing.T) { + testenv.MustHaveParallelism(t) + + goroutineProf := Lookup("goroutine") + + // Set up a goroutine to just create and run coroutine goroutines all day. + iterFunc := func() { + p, stop := iter.Pull2( + func(yield func(int, int) bool) { + for i := 0; i < 10000; i++ { + if !yield(i, i) { + return + } + } + }, + ) + defer stop() + for { + _, _, ok := p() + if !ok { + break + } + } + } + var wg sync.WaitGroup + done := make(chan struct{}) + wg.Add(1) + go func() { + defer wg.Done() + for { + iterFunc() + select { + case <-done: + default: + } + } + }() + + // Take a goroutine profile. If the bug in #69998 is present, this will crash + // with high probability. We don't care about the output for this bug. + goroutineProf.WriteTo(io.Discard, 1) +} + func BenchmarkGoroutine(b *testing.B) { withIdle := func(n int, fn func(b *testing.B)) func(b *testing.B) { return func(b *testing.B) {