From a93cd4a71e41f04964651740153ca6598d4bf50b Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 31 Jul 2023 22:00:34 -0700 Subject: [PATCH] zap.Any: Reduce stack size with generics (#1310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yet another attempt at reducing the stack size of zap.Any, borrowing from #1301, #1303, #1304, #1305, #1307, and #1308. This approach defines a generic data type for field constructors of a specific type. This is similar to the lookup map in #1307, minus the map lookup, the interface match, or reflection. type anyFieldC[T any] func(string, T) Field The generic data type provides a non-generic method matching the interface: interface{ Any(string, any) Field } **Stack size**: The stack size of zap.Any following this change is 0xc0 (192 bytes). % go build -gcflags -S 2>&1 | grep ^go.uber.org/zap.Any go.uber.org/zap.Any STEXT size=5861 args=0x20 locals=0xc0 funcid=0x0 align=0x0 This is just 8 bytes more than #1305, which is the smallest stack size of all other attempts. **Allocations**: Everything appears to get inlined with no heap escapes: % go build -gcflags -m 2>&1 | grep field.go | perl -n -e 'next unless m{^./field.go:(\d+)}; print if ($1 >= 413)' | grep 'escapes' [no output] (Line 413 declares anyFieldC) Besides that, the output of `-m` for the relevant section of code consists of almost entirely: ./field.go:415:6: can inline anyFieldC[go.shape.bool].Any ./field.go:415:6: can inline anyFieldC[go.shape.[]bool].Any ./field.go:415:6: can inline anyFieldC[go.shape.complex128].Any [...] ./field.go:415:6: inlining call to anyFieldC[go.shape.complex128].Any ./field.go:415:6: inlining call to anyFieldC[go.shape.[]bool].Any ./field.go:415:6: inlining call to anyFieldC[go.shape.bool].Any Followed by: ./field.go:428:10: leaking param: key ./field.go:428:22: leaking param: value **Maintainability**: Unlike some of the other approaches, this variant is more maintainable. The `zap.Any` function looks roughly the same. Adding new branches there is obvious, and requires no duplication. **Performance**: This is a net improvement against master on BenchmarkAny's log-go checks that log inside a new goroutine. ``` name old time/op new time/op delta Any/string/field-only/typed 25.2ns ± 1% 25.6ns ± 2% ~ (p=0.460 n=5+5) Any/string/field-only/any 56.9ns ± 3% 79.4ns ± 0% +39.55% (p=0.008 n=5+5) Any/string/log/typed 1.47µs ± 0% 1.49µs ± 4% +1.58% (p=0.016 n=4+5) Any/string/log/any 1.53µs ± 2% 1.55µs ± 1% +1.37% (p=0.016 n=5+5) Any/string/log-go/typed 5.97µs ± 6% 5.99µs ± 1% ~ (p=0.151 n=5+5) Any/string/log-go/any 10.9µs ± 0% 6.2µs ± 0% -43.32% (p=0.008 n=5+5) Any/stringer/field-only/typed 25.3ns ± 1% 25.5ns ± 1% +1.09% (p=0.008 n=5+5) Any/stringer/field-only/any 85.5ns ± 1% 124.5ns ± 0% +45.66% (p=0.008 n=5+5) Any/stringer/log/typed 1.43µs ± 1% 1.42µs ± 2% ~ (p=0.175 n=4+5) Any/stringer/log/any 1.50µs ± 1% 1.56µs ± 6% +4.20% (p=0.008 n=5+5) Any/stringer/log-go/typed 5.94µs ± 0% 5.92µs ± 0% -0.40% (p=0.032 n=5+5) Any/stringer/log-go/any 11.1µs ± 2% 6.3µs ± 0% -42.93% (p=0.008 n=5+5) name old alloc/op new alloc/op delta Any/string/field-only/typed 0.00B 0.00B ~ (all equal) Any/string/field-only/any 0.00B 0.00B ~ (all equal) Any/string/log/typed 64.0B ± 0% 64.0B ± 0% ~ (all equal) Any/string/log/any 64.0B ± 0% 64.0B ± 0% ~ (all equal) Any/string/log-go/typed 112B ± 0% 112B ± 0% ~ (all equal) Any/string/log-go/any 128B ± 0% 128B ± 0% ~ (all equal) Any/stringer/field-only/typed 0.00B 0.00B ~ (all equal) Any/stringer/field-only/any 0.00B 0.00B ~ (all equal) Any/stringer/log/typed 64.0B ± 0% 64.0B ± 0% ~ (all equal) Any/stringer/log/any 64.0B ± 0% 64.0B ± 0% ~ (all equal) Any/stringer/log-go/typed 112B ± 0% 112B ± 0% ~ (all equal) Any/stringer/log-go/any 128B ± 0% 128B ± 0% ~ (all equal) name old allocs/op new allocs/op delta Any/string/field-only/typed 0.00 0.00 ~ (all equal) Any/string/field-only/any 0.00 0.00 ~ (all equal) Any/string/log/typed 1.00 ± 0% 1.00 ± 0% ~ (all equal) Any/string/log/any 1.00 ± 0% 1.00 ± 0% ~ (all equal) Any/string/log-go/typed 3.00 ± 0% 3.00 ± 0% ~ (all equal) Any/string/log-go/any 3.00 ± 0% 3.00 ± 0% ~ (all equal) Any/stringer/field-only/typed 0.00 0.00 ~ (all equal) Any/stringer/field-only/any 0.00 0.00 ~ (all equal) Any/stringer/log/typed 1.00 ± 0% 1.00 ± 0% ~ (all equal) Any/stringer/log/any 1.00 ± 0% 1.00 ± 0% ~ (all equal) Any/stringer/log-go/typed 3.00 ± 0% 3.00 ± 0% ~ (all equal) Any/stringer/log-go/any 3.00 ± 0% 3.00 ± 0% ~ (all equal) ``` It causes a regression in "field-only" which calls the field constructor and discards the result without using it in a logger. I believe this is acceptable because that's not a real use case; we expect the result to be used with a logger. --- field.go | 169 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 105 insertions(+), 64 deletions(-) diff --git a/field.go b/field.go index bbb745db5..7f22c5349 100644 --- a/field.go +++ b/field.go @@ -410,6 +410,43 @@ func Inline(val zapcore.ObjectMarshaler) Field { } } +// We discovered an issue where zap.Any can cause a performance degradation +// when used in new goroutines. +// +// This happens because the compiler assigns 4.8kb (one zap.Field per arm of +// switch statement) of stack space for zap.Any when it takes the form: +// +// switch v := v.(type) { +// case string: +// return String(key, v) +// case int: +// return Int(key, v) +// // ... +// default: +// return Reflect(key, v) +// } +// +// To avoid this, we use the type switch to assign a value to a single local variable +// and then call a function on it. +// The local variable is just a function reference so it doesn't allocate +// when converted to an interface{}. +// +// A fair bit of experimentation went into this. +// See also: +// +// - https://github.com/uber-go/zap/pull/1301 +// - https://github.com/uber-go/zap/pull/1303 +// - https://github.com/uber-go/zap/pull/1304 +// - https://github.com/uber-go/zap/pull/1305 +// - https://github.com/uber-go/zap/pull/1308 +type anyFieldC[T any] func(string, T) Field + +func (f anyFieldC[T]) Any(key string, val any) Field { + v, _ := val.(T) + // val is guaranteed to be a T, except when it's nil. + return f(key, v) +} + // Any takes a key and an arbitrary value and chooses the best way to represent // them as a field, falling back to a reflection-based approach only if // necessary. @@ -418,132 +455,136 @@ func Inline(val zapcore.ObjectMarshaler) Field { // them. To minimize surprises, []byte values are treated as binary blobs, byte // values are treated as uint8, and runes are always treated as integers. func Any(key string, value interface{}) Field { - switch val := value.(type) { + var c interface{ Any(string, any) Field } + + switch value.(type) { case zapcore.ObjectMarshaler: - return Object(key, val) + c = anyFieldC[zapcore.ObjectMarshaler](Object) case zapcore.ArrayMarshaler: - return Array(key, val) + c = anyFieldC[zapcore.ArrayMarshaler](Array) case bool: - return Bool(key, val) + c = anyFieldC[bool](Bool) case *bool: - return Boolp(key, val) + c = anyFieldC[*bool](Boolp) case []bool: - return Bools(key, val) + c = anyFieldC[[]bool](Bools) case complex128: - return Complex128(key, val) + c = anyFieldC[complex128](Complex128) case *complex128: - return Complex128p(key, val) + c = anyFieldC[*complex128](Complex128p) case []complex128: - return Complex128s(key, val) + c = anyFieldC[[]complex128](Complex128s) case complex64: - return Complex64(key, val) + c = anyFieldC[complex64](Complex64) case *complex64: - return Complex64p(key, val) + c = anyFieldC[*complex64](Complex64p) case []complex64: - return Complex64s(key, val) + c = anyFieldC[[]complex64](Complex64s) case float64: - return Float64(key, val) + c = anyFieldC[float64](Float64) case *float64: - return Float64p(key, val) + c = anyFieldC[*float64](Float64p) case []float64: - return Float64s(key, val) + c = anyFieldC[[]float64](Float64s) case float32: - return Float32(key, val) + c = anyFieldC[float32](Float32) case *float32: - return Float32p(key, val) + c = anyFieldC[*float32](Float32p) case []float32: - return Float32s(key, val) + c = anyFieldC[[]float32](Float32s) case int: - return Int(key, val) + c = anyFieldC[int](Int) case *int: - return Intp(key, val) + c = anyFieldC[*int](Intp) case []int: - return Ints(key, val) + c = anyFieldC[[]int](Ints) case int64: - return Int64(key, val) + c = anyFieldC[int64](Int64) case *int64: - return Int64p(key, val) + c = anyFieldC[*int64](Int64p) case []int64: - return Int64s(key, val) + c = anyFieldC[[]int64](Int64s) case int32: - return Int32(key, val) + c = anyFieldC[int32](Int32) case *int32: - return Int32p(key, val) + c = anyFieldC[*int32](Int32p) case []int32: - return Int32s(key, val) + c = anyFieldC[[]int32](Int32s) case int16: - return Int16(key, val) + c = anyFieldC[int16](Int16) case *int16: - return Int16p(key, val) + c = anyFieldC[*int16](Int16p) case []int16: - return Int16s(key, val) + c = anyFieldC[[]int16](Int16s) case int8: - return Int8(key, val) + c = anyFieldC[int8](Int8) case *int8: - return Int8p(key, val) + c = anyFieldC[*int8](Int8p) case []int8: - return Int8s(key, val) + c = anyFieldC[[]int8](Int8s) case string: - return String(key, val) + c = anyFieldC[string](String) case *string: - return Stringp(key, val) + c = anyFieldC[*string](Stringp) case []string: - return Strings(key, val) + c = anyFieldC[[]string](Strings) case uint: - return Uint(key, val) + c = anyFieldC[uint](Uint) case *uint: - return Uintp(key, val) + c = anyFieldC[*uint](Uintp) case []uint: - return Uints(key, val) + c = anyFieldC[[]uint](Uints) case uint64: - return Uint64(key, val) + c = anyFieldC[uint64](Uint64) case *uint64: - return Uint64p(key, val) + c = anyFieldC[*uint64](Uint64p) case []uint64: - return Uint64s(key, val) + c = anyFieldC[[]uint64](Uint64s) case uint32: - return Uint32(key, val) + c = anyFieldC[uint32](Uint32) case *uint32: - return Uint32p(key, val) + c = anyFieldC[*uint32](Uint32p) case []uint32: - return Uint32s(key, val) + c = anyFieldC[[]uint32](Uint32s) case uint16: - return Uint16(key, val) + c = anyFieldC[uint16](Uint16) case *uint16: - return Uint16p(key, val) + c = anyFieldC[*uint16](Uint16p) case []uint16: - return Uint16s(key, val) + c = anyFieldC[[]uint16](Uint16s) case uint8: - return Uint8(key, val) + c = anyFieldC[uint8](Uint8) case *uint8: - return Uint8p(key, val) + c = anyFieldC[*uint8](Uint8p) case []byte: - return Binary(key, val) + c = anyFieldC[[]byte](Binary) case uintptr: - return Uintptr(key, val) + c = anyFieldC[uintptr](Uintptr) case *uintptr: - return Uintptrp(key, val) + c = anyFieldC[*uintptr](Uintptrp) case []uintptr: - return Uintptrs(key, val) + c = anyFieldC[[]uintptr](Uintptrs) case time.Time: - return Time(key, val) + c = anyFieldC[time.Time](Time) case *time.Time: - return Timep(key, val) + c = anyFieldC[*time.Time](Timep) case []time.Time: - return Times(key, val) + c = anyFieldC[[]time.Time](Times) case time.Duration: - return Duration(key, val) + c = anyFieldC[time.Duration](Duration) case *time.Duration: - return Durationp(key, val) + c = anyFieldC[*time.Duration](Durationp) case []time.Duration: - return Durations(key, val) + c = anyFieldC[[]time.Duration](Durations) case error: - return NamedError(key, val) + c = anyFieldC[error](NamedError) case []error: - return Errors(key, val) + c = anyFieldC[[]error](Errors) case fmt.Stringer: - return Stringer(key, val) + c = anyFieldC[fmt.Stringer](Stringer) default: - return Reflect(key, val) + c = anyFieldC[any](Reflect) } + + return c.Any(key, value) }