From a93cd4a71e41f04964651740153ca6598d4bf50b Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <mail@abhinavg.net>
Date: Mon, 31 Jul 2023 22:00:34 -0700
Subject: [PATCH] zap.Any: Reduce stack size with generics (#1310)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Yet another attempt at reducing the stack size of zap.Any,
borrowing from #1301, #1303, #1304, #1305, #1307, and #1308.

This approach defines a generic data type for field constructors
of a specific type. This is similar to the lookup map in #1307,
minus the map lookup, the interface match, or reflection.

    type anyFieldC[T any] func(string, T) Field

The generic data type provides a non-generic method
matching the interface:

    interface{ Any(string, any) Field }

**Stack size**:
The stack size of zap.Any following this change is 0xc0 (192 bytes).

    % go build -gcflags -S 2>&1 | grep ^go.uber.org/zap.Any
    go.uber.org/zap.Any STEXT size=5861 args=0x20 locals=0xc0 funcid=0x0 align=0x0

This is just 8 bytes more than #1305,
which is the smallest stack size of all other attempts.

**Allocations**:
Everything appears to get inlined with no heap escapes:

    % go build -gcflags -m 2>&1 |
      grep field.go |
      perl -n -e 'next unless m{^./field.go:(\d+)}; print if ($1 >= 413)' |
      grep 'escapes'
    [no output]

(Line 413 declares anyFieldC)

Besides that, the output of `-m` for the relevant section of code
consists of almost entirely:

    ./field.go:415:6: can inline anyFieldC[go.shape.bool].Any
    ./field.go:415:6: can inline anyFieldC[go.shape.[]bool].Any
    ./field.go:415:6: can inline anyFieldC[go.shape.complex128].Any
    [...]
    ./field.go:415:6: inlining call to anyFieldC[go.shape.complex128].Any
    ./field.go:415:6: inlining call to anyFieldC[go.shape.[]bool].Any
    ./field.go:415:6: inlining call to anyFieldC[go.shape.bool].Any

Followed by:

    ./field.go:428:10: leaking param: key
    ./field.go:428:22: leaking param: value

**Maintainability**:
Unlike some of the other approaches, this variant is more maintainable.
The `zap.Any` function looks roughly the same.
Adding new branches there is obvious, and requires no duplication.

**Performance**:
This is a net improvement against master on BenchmarkAny's
log-go checks that log inside a new goroutine.

```
name                           old time/op    new time/op    delta
Any/string/field-only/typed      25.2ns ± 1%    25.6ns ± 2%     ~     (p=0.460 n=5+5)
Any/string/field-only/any        56.9ns ± 3%    79.4ns ± 0%  +39.55%  (p=0.008 n=5+5)
Any/string/log/typed             1.47µs ± 0%    1.49µs ± 4%   +1.58%  (p=0.016 n=4+5)
Any/string/log/any               1.53µs ± 2%    1.55µs ± 1%   +1.37%  (p=0.016 n=5+5)
Any/string/log-go/typed          5.97µs ± 6%    5.99µs ± 1%     ~     (p=0.151 n=5+5)
Any/string/log-go/any            10.9µs ± 0%     6.2µs ± 0%  -43.32%  (p=0.008 n=5+5)
Any/stringer/field-only/typed    25.3ns ± 1%    25.5ns ± 1%   +1.09%  (p=0.008 n=5+5)
Any/stringer/field-only/any      85.5ns ± 1%   124.5ns ± 0%  +45.66%  (p=0.008 n=5+5)
Any/stringer/log/typed           1.43µs ± 1%    1.42µs ± 2%     ~     (p=0.175 n=4+5)
Any/stringer/log/any             1.50µs ± 1%    1.56µs ± 6%   +4.20%  (p=0.008 n=5+5)
Any/stringer/log-go/typed        5.94µs ± 0%    5.92µs ± 0%   -0.40%  (p=0.032 n=5+5)
Any/stringer/log-go/any          11.1µs ± 2%     6.3µs ± 0%  -42.93%  (p=0.008 n=5+5)

name                           old alloc/op   new alloc/op   delta
Any/string/field-only/typed       0.00B          0.00B          ~     (all equal)
Any/string/field-only/any         0.00B          0.00B          ~     (all equal)
Any/string/log/typed              64.0B ± 0%     64.0B ± 0%     ~     (all equal)
Any/string/log/any                64.0B ± 0%     64.0B ± 0%     ~     (all equal)
Any/string/log-go/typed            112B ± 0%      112B ± 0%     ~     (all equal)
Any/string/log-go/any              128B ± 0%      128B ± 0%     ~     (all equal)
Any/stringer/field-only/typed     0.00B          0.00B          ~     (all equal)
Any/stringer/field-only/any       0.00B          0.00B          ~     (all equal)
Any/stringer/log/typed            64.0B ± 0%     64.0B ± 0%     ~     (all equal)
Any/stringer/log/any              64.0B ± 0%     64.0B ± 0%     ~     (all equal)
Any/stringer/log-go/typed          112B ± 0%      112B ± 0%     ~     (all equal)
Any/stringer/log-go/any            128B ± 0%      128B ± 0%     ~     (all equal)

name                           old allocs/op  new allocs/op  delta
Any/string/field-only/typed        0.00           0.00          ~     (all equal)
Any/string/field-only/any          0.00           0.00          ~     (all equal)
Any/string/log/typed               1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Any/string/log/any                 1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Any/string/log-go/typed            3.00 ± 0%      3.00 ± 0%     ~     (all equal)
Any/string/log-go/any              3.00 ± 0%      3.00 ± 0%     ~     (all equal)
Any/stringer/field-only/typed      0.00           0.00          ~     (all equal)
Any/stringer/field-only/any        0.00           0.00          ~     (all equal)
Any/stringer/log/typed             1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Any/stringer/log/any               1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Any/stringer/log-go/typed          3.00 ± 0%      3.00 ± 0%     ~     (all equal)
Any/stringer/log-go/any            3.00 ± 0%      3.00 ± 0%     ~     (all equal)
```

It causes a regression in "field-only"
which calls the field constructor and discards the result
without using it in a logger.
I believe this is acceptable because that's not a real use case;
we expect the result to be used with a logger.
---
 field.go | 169 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 105 insertions(+), 64 deletions(-)

diff --git a/field.go b/field.go
index bbb745db5..7f22c5349 100644
--- a/field.go
+++ b/field.go
@@ -410,6 +410,43 @@ func Inline(val zapcore.ObjectMarshaler) Field {
 	}
 }
 
+// We discovered an issue where zap.Any can cause a performance degradation
+// when used in new goroutines.
+//
+// This happens because the compiler assigns 4.8kb (one zap.Field per arm of
+// switch statement) of stack space for zap.Any when it takes the form:
+//
+//	switch v := v.(type) {
+//	case string:
+//		return String(key, v)
+//	case int:
+//		return Int(key, v)
+//		// ...
+//	default:
+//		return Reflect(key, v)
+//	}
+//
+// To avoid this, we use the type switch to assign a value to a single local variable
+// and then call a function on it.
+// The local variable is just a function reference so it doesn't allocate
+// when converted to an interface{}.
+//
+// A fair bit of experimentation went into this.
+// See also:
+//
+// - https://github.com/uber-go/zap/pull/1301
+// - https://github.com/uber-go/zap/pull/1303
+// - https://github.com/uber-go/zap/pull/1304
+// - https://github.com/uber-go/zap/pull/1305
+// - https://github.com/uber-go/zap/pull/1308
+type anyFieldC[T any] func(string, T) Field
+
+func (f anyFieldC[T]) Any(key string, val any) Field {
+	v, _ := val.(T)
+	// val is guaranteed to be a T, except when it's nil.
+	return f(key, v)
+}
+
 // Any takes a key and an arbitrary value and chooses the best way to represent
 // them as a field, falling back to a reflection-based approach only if
 // necessary.
@@ -418,132 +455,136 @@ func Inline(val zapcore.ObjectMarshaler) Field {
 // them. To minimize surprises, []byte values are treated as binary blobs, byte
 // values are treated as uint8, and runes are always treated as integers.
 func Any(key string, value interface{}) Field {
-	switch val := value.(type) {
+	var c interface{ Any(string, any) Field }
+
+	switch value.(type) {
 	case zapcore.ObjectMarshaler:
-		return Object(key, val)
+		c = anyFieldC[zapcore.ObjectMarshaler](Object)
 	case zapcore.ArrayMarshaler:
-		return Array(key, val)
+		c = anyFieldC[zapcore.ArrayMarshaler](Array)
 	case bool:
-		return Bool(key, val)
+		c = anyFieldC[bool](Bool)
 	case *bool:
-		return Boolp(key, val)
+		c = anyFieldC[*bool](Boolp)
 	case []bool:
-		return Bools(key, val)
+		c = anyFieldC[[]bool](Bools)
 	case complex128:
-		return Complex128(key, val)
+		c = anyFieldC[complex128](Complex128)
 	case *complex128:
-		return Complex128p(key, val)
+		c = anyFieldC[*complex128](Complex128p)
 	case []complex128:
-		return Complex128s(key, val)
+		c = anyFieldC[[]complex128](Complex128s)
 	case complex64:
-		return Complex64(key, val)
+		c = anyFieldC[complex64](Complex64)
 	case *complex64:
-		return Complex64p(key, val)
+		c = anyFieldC[*complex64](Complex64p)
 	case []complex64:
-		return Complex64s(key, val)
+		c = anyFieldC[[]complex64](Complex64s)
 	case float64:
-		return Float64(key, val)
+		c = anyFieldC[float64](Float64)
 	case *float64:
-		return Float64p(key, val)
+		c = anyFieldC[*float64](Float64p)
 	case []float64:
-		return Float64s(key, val)
+		c = anyFieldC[[]float64](Float64s)
 	case float32:
-		return Float32(key, val)
+		c = anyFieldC[float32](Float32)
 	case *float32:
-		return Float32p(key, val)
+		c = anyFieldC[*float32](Float32p)
 	case []float32:
-		return Float32s(key, val)
+		c = anyFieldC[[]float32](Float32s)
 	case int:
-		return Int(key, val)
+		c = anyFieldC[int](Int)
 	case *int:
-		return Intp(key, val)
+		c = anyFieldC[*int](Intp)
 	case []int:
-		return Ints(key, val)
+		c = anyFieldC[[]int](Ints)
 	case int64:
-		return Int64(key, val)
+		c = anyFieldC[int64](Int64)
 	case *int64:
-		return Int64p(key, val)
+		c = anyFieldC[*int64](Int64p)
 	case []int64:
-		return Int64s(key, val)
+		c = anyFieldC[[]int64](Int64s)
 	case int32:
-		return Int32(key, val)
+		c = anyFieldC[int32](Int32)
 	case *int32:
-		return Int32p(key, val)
+		c = anyFieldC[*int32](Int32p)
 	case []int32:
-		return Int32s(key, val)
+		c = anyFieldC[[]int32](Int32s)
 	case int16:
-		return Int16(key, val)
+		c = anyFieldC[int16](Int16)
 	case *int16:
-		return Int16p(key, val)
+		c = anyFieldC[*int16](Int16p)
 	case []int16:
-		return Int16s(key, val)
+		c = anyFieldC[[]int16](Int16s)
 	case int8:
-		return Int8(key, val)
+		c = anyFieldC[int8](Int8)
 	case *int8:
-		return Int8p(key, val)
+		c = anyFieldC[*int8](Int8p)
 	case []int8:
-		return Int8s(key, val)
+		c = anyFieldC[[]int8](Int8s)
 	case string:
-		return String(key, val)
+		c = anyFieldC[string](String)
 	case *string:
-		return Stringp(key, val)
+		c = anyFieldC[*string](Stringp)
 	case []string:
-		return Strings(key, val)
+		c = anyFieldC[[]string](Strings)
 	case uint:
-		return Uint(key, val)
+		c = anyFieldC[uint](Uint)
 	case *uint:
-		return Uintp(key, val)
+		c = anyFieldC[*uint](Uintp)
 	case []uint:
-		return Uints(key, val)
+		c = anyFieldC[[]uint](Uints)
 	case uint64:
-		return Uint64(key, val)
+		c = anyFieldC[uint64](Uint64)
 	case *uint64:
-		return Uint64p(key, val)
+		c = anyFieldC[*uint64](Uint64p)
 	case []uint64:
-		return Uint64s(key, val)
+		c = anyFieldC[[]uint64](Uint64s)
 	case uint32:
-		return Uint32(key, val)
+		c = anyFieldC[uint32](Uint32)
 	case *uint32:
-		return Uint32p(key, val)
+		c = anyFieldC[*uint32](Uint32p)
 	case []uint32:
-		return Uint32s(key, val)
+		c = anyFieldC[[]uint32](Uint32s)
 	case uint16:
-		return Uint16(key, val)
+		c = anyFieldC[uint16](Uint16)
 	case *uint16:
-		return Uint16p(key, val)
+		c = anyFieldC[*uint16](Uint16p)
 	case []uint16:
-		return Uint16s(key, val)
+		c = anyFieldC[[]uint16](Uint16s)
 	case uint8:
-		return Uint8(key, val)
+		c = anyFieldC[uint8](Uint8)
 	case *uint8:
-		return Uint8p(key, val)
+		c = anyFieldC[*uint8](Uint8p)
 	case []byte:
-		return Binary(key, val)
+		c = anyFieldC[[]byte](Binary)
 	case uintptr:
-		return Uintptr(key, val)
+		c = anyFieldC[uintptr](Uintptr)
 	case *uintptr:
-		return Uintptrp(key, val)
+		c = anyFieldC[*uintptr](Uintptrp)
 	case []uintptr:
-		return Uintptrs(key, val)
+		c = anyFieldC[[]uintptr](Uintptrs)
 	case time.Time:
-		return Time(key, val)
+		c = anyFieldC[time.Time](Time)
 	case *time.Time:
-		return Timep(key, val)
+		c = anyFieldC[*time.Time](Timep)
 	case []time.Time:
-		return Times(key, val)
+		c = anyFieldC[[]time.Time](Times)
 	case time.Duration:
-		return Duration(key, val)
+		c = anyFieldC[time.Duration](Duration)
 	case *time.Duration:
-		return Durationp(key, val)
+		c = anyFieldC[*time.Duration](Durationp)
 	case []time.Duration:
-		return Durations(key, val)
+		c = anyFieldC[[]time.Duration](Durations)
 	case error:
-		return NamedError(key, val)
+		c = anyFieldC[error](NamedError)
 	case []error:
-		return Errors(key, val)
+		c = anyFieldC[[]error](Errors)
 	case fmt.Stringer:
-		return Stringer(key, val)
+		c = anyFieldC[fmt.Stringer](Stringer)
 	default:
-		return Reflect(key, val)
+		c = anyFieldC[any](Reflect)
 	}
+
+	return c.Any(key, value)
 }