diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index 64e03aeccfb963..20b44e1e013a92 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -1183,6 +1183,32 @@ func TestTryAdd(t *testing.T) { {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}}, {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}}, }, + }, { + name: "truncated_stack_trace_later", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 5, 0, 50, inlinedCalleePtr, inlinedCallerPtr, + 4, 0, 60, inlinedCalleePtr, + }, + wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}}, + {Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + name: "truncated_stack_trace_first", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 4, 0, 70, inlinedCalleePtr, + 5, 0, 80, inlinedCalleePtr, inlinedCallerPtr, + }, + wantLocs: [][]string{ // the inline info is screwed up, but better than a crash. + {"runtime/pprof.inlinedCallee"}, + {"runtime/pprof.inlinedCaller"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}}, + {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}}, + }, }} for _, tc := range testCases { diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index a42cd80c153b2d..8a30c7151d2709 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -394,7 +394,23 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo // then, record the cached location. locs = append(locs, l.id) - stk = stk[len(l.pcs):] // skip the matching pcs. + + // The stk may be truncated due to the stack depth limit + // (e.g. See maxStack and maxCPUProfStack in runtime) or + // bugs in runtime. Avoid the crash in either case. + // TODO(hyangah): The correct fix may require using the exact + // pcs as the key for b.locs cache management instead of just + // relying on the very first pc. We are late in the go1.14 dev + // cycle, so this is a workaround with little code change. + if len(l.pcs) > len(stk) { + stk = nil + // TODO(hyangah): would be nice if we can enable + // debug print out on demand and report the problematic + // cached location entry and stack traces. Do we already + // have such facility to utilize (e.g. GODEBUG)? + } else { + stk = stk[len(l.pcs):] // skip the matching pcs. + } continue }