Skip to content

Commit

Permalink
WIP: Share callstack b/w Caller and Stack
Browse files Browse the repository at this point in the history
A log entry that includes both, the caller and the stack trace requests
stack information twice: once to determine the caller and once to build
the stack trace.

This change optimizes this by requesting call stack information once and
sharing it for both cases.

---

I ran the benchmarks with `-count 3`.

**Before**

```
BenchmarkAddCallerHook-4         1265024               912 ns/op             248 B/op          3 allocs/op
BenchmarkAddCallerHook-4         1389937               949 ns/op             248 B/op          3 allocs/op
BenchmarkAddCallerHook-4         1327134               862 ns/op             248 B/op          3 allocs/op
BenchmarkAddStack-4               565006              1911 ns/op             304 B/op          2 allocs/op
BenchmarkAddStack-4               644364              2180 ns/op             304 B/op          2 allocs/op
BenchmarkAddStack-4               547028              1896 ns/op             304 B/op          2 allocs/op
BenchmarkAddCallerAndStack-4      405864              2471 ns/op             552 B/op          5 allocs/op
BenchmarkAddCallerAndStack-4      492368              2940 ns/op             552 B/op          5 allocs/op
BenchmarkAddCallerAndStack-4      414658              2588 ns/op             552 B/op          5 allocs/op
```

**After**

```
BenchmarkAddCallerHook-4         1000000              1054 ns/op             240 B/op          2 allocs/op
BenchmarkAddCallerHook-4         1245952              1009 ns/op             240 B/op          2 allocs/op
BenchmarkAddCallerHook-4         1102441               974 ns/op             240 B/op          2 allocs/op
BenchmarkAddStack-4               908425              1269 ns/op             304 B/op          2 allocs/op
BenchmarkAddStack-4               774799              1345 ns/op             304 B/op          2 allocs/op
BenchmarkAddStack-4               836653              1575 ns/op             304 B/op          2 allocs/op
BenchmarkAddCallerAndStack-4      716456              1623 ns/op             544 B/op          4 allocs/op
BenchmarkAddCallerAndStack-4      562585              1826 ns/op             544 B/op          4 allocs/op
BenchmarkAddCallerAndStack-4      705979              1830 ns/op             544 B/op          4 allocs/op
```

On average,

1. Adding caller alone is ~104 nanoseconds *slower*
2. Adding stack alone is ~600 nanoseconds *faster*
3. Adding both, caller and stack is ~906 nanoseconds *faster*

The second of these rightfully raises eyebrows because it was expected
to have no changes at best. A cursory investigation doesn't reveal
any obvious reason for the speed up unless there's something incredibly
wrong with the change.
  • Loading branch information
abhinav committed Mar 27, 2020
1 parent 512b342 commit 7cae0d4
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 27 deletions.
24 changes: 21 additions & 3 deletions logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,33 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry {

// Thread the error output through to the CheckedEntry.
ce.ErrorOutput = log.errorOutput

addStack := log.addStack.Enabled(ce.Entry.Level)
if !log.addCaller && !addStack {
return ce
}

programCounters := newProgramCounters()
defer programCounters.Release()

pcs := programCounters.Callers(callerSkipOffset)
if log.addCaller {
ce.Entry.Caller = zapcore.NewEntryCaller(runtime.Caller(log.callerSkip + callerSkipOffset))
callerIdx := log.callerSkip
if len(pcs) <= callerIdx {
ce.Entry.Caller = zapcore.NewEntryCaller(0, "", 0, false)
} else {
frame, _ := runtime.CallersFrames(pcs[callerIdx:]).Next()
ce.Entry.Caller = zapcore.NewEntryCaller(frame.PC, frame.File, frame.Line, true)
}

if !ce.Entry.Caller.Defined {
fmt.Fprintf(log.errorOutput, "%v Logger.check error: failed to get caller\n", time.Now().UTC())
log.errorOutput.Sync()
}
}
if log.addStack.Enabled(ce.Entry.Level) {
ce.Entry.Stack = Stack("").String

if addStack {
ce.Entry.Stack = makeStacktrace(pcs)
}

return ce
Expand Down
60 changes: 36 additions & 24 deletions stacktrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,40 +31,26 @@ import (
const _zapPackage = "go.uber.org/zap"

var (
_stacktracePool = sync.Pool{
New: func() interface{} {
return newProgramCounters(64)
},
}

// We add "." and "/" suffixes to the package name to ensure we only match
// the exact package and not any package with the same prefix.
_zapStacktracePrefixes = addPrefix(_zapPackage, ".", "/")
_zapStacktraceVendorContains = addPrefix("/vendor/", _zapStacktracePrefixes...)
)

func takeStacktrace() string {
pcs := _stacktracePool.Get().(*programCounters)
defer pcs.Release()

return makeStacktrace(pcs.Callers(1))
}

func makeStacktrace(pcs []uintptr) string {
buffer := bufferpool.Get()
defer buffer.Free()
programCounters := _stacktracePool.Get().(*programCounters)
defer _stacktracePool.Put(programCounters)

var numFrames int
for {
// Skip the call to runtime.Counters and takeStacktrace so that the
// program counters start at the caller of takeStacktrace.
numFrames = runtime.Callers(2, programCounters.pcs)
if numFrames < len(programCounters.pcs) {
break
}
// Don't put the too-short counter slice back into the pool; this lets
// the pool adjust if we consistently take deep stacktraces.
programCounters = newProgramCounters(len(programCounters.pcs) * 2)
}

i := 0
skipZapFrames := true // skip all consecutive zap frames at the beginning.
frames := runtime.CallersFrames(programCounters.pcs[:numFrames])
frames := runtime.CallersFrames(pcs)

// Note: On the last iteration, frames.Next() returns false, with a valid
// frame, but we ignore this frame. The last frame is a a runtime frame which
Expand Down Expand Up @@ -113,8 +99,34 @@ type programCounters struct {
pcs []uintptr
}

func newProgramCounters(size int) *programCounters {
return &programCounters{make([]uintptr, size)}
var _stacktracePool = sync.Pool{
New: func() interface{} {
return &programCounters{make([]uintptr, 64)}
},
}

func newProgramCounters() *programCounters {
return _stacktracePool.Get().(*programCounters)
}

func (pcs *programCounters) Release() {
_stacktracePool.Put(pcs)
}

func (pcs *programCounters) Callers(skip int) []uintptr {
var numFrames int
for {
// Skip the call to runtime.Callers and programCounter.Callers
// so that the program counters start at our caller.
numFrames = runtime.Callers(skip+2, pcs.pcs)
if numFrames < len(pcs.pcs) {
return pcs.pcs[:numFrames]
}

// Don't put the too-short counter slice back into the pool; this lets
// the pool adjust if we consistently take deep stacktraces.
pcs.pcs = make([]uintptr, len(pcs.pcs)*2)
}
}

func addPrefix(prefix string, ss ...string) []string {
Expand Down

0 comments on commit 7cae0d4

Please sign in to comment.