From b81cba0e88558739aa4786fa0ca2e26c5709a67d Mon Sep 17 00:00:00 2001 From: Aaron Zinger Date: Sat, 24 Sep 2022 16:25:28 -0400 Subject: [PATCH] util: avoid allocations when escaping multibyte characters EncodeEscapedChar (which is called in EncodeSQLStringWithFlags) is pretty optimized, but for escaping a multibyte character it was using fmt.FPrintf, which means every multibyte character ended up on the heap due to https://github.com/golang/go/issues/8618. This had a noticeable impact in changefeed benchmarking. This commit just hand-compiles the two formatting strings that were being used into reasonably efficient go, eliminating the allocs. Benchmark encoding the first 10000 runes shows a 4x speedup: Before: BenchmarkEncodeNonASCIISQLString-16 944 1216130 ns/op After: BenchmarkEncodeNonASCIISQLString-16 3468 300777 ns/op Release note: None --- pkg/sql/lexbase/encode_test.go | 11 +++++++++++ pkg/util/stringencoding/string_encoding.go | 23 +++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pkg/sql/lexbase/encode_test.go b/pkg/sql/lexbase/encode_test.go index d89660620c7e..2ddbfd76a28f 100644 --- a/pkg/sql/lexbase/encode_test.go +++ b/pkg/sql/lexbase/encode_test.go @@ -123,3 +123,14 @@ func BenchmarkEncodeSQLString(b *testing.B) { lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings) } } + +func BenchmarkEncodeNonASCIISQLString(b *testing.B) { + builder := strings.Builder{} + for r := rune(0); r < 10000; r++ { + builder.WriteRune(r) + } + str := builder.String() + for i := 0; i < b.N; i++ { + lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings) + } +} diff --git a/pkg/util/stringencoding/string_encoding.go b/pkg/util/stringencoding/string_encoding.go index f7aeed0fb4b5..8df3cf85a0d1 100644 --- a/pkg/util/stringencoding/string_encoding.go +++ b/pkg/util/stringencoding/string_encoding.go @@ -21,7 +21,6 @@ package stringencoding import ( "bytes" - "fmt" "unicode/utf8" ) @@ -109,11 +108,25 @@ func EncodeEscapedChar( // Escape non-printable characters. buf.Write(HexMap[currentByte]) } - } else if ln == 2 { - // For multi-byte runes, print them based on their width. - fmt.Fprintf(buf, `\u%04X`, currentRune) } else { - fmt.Fprintf(buf, `\U%08X`, currentRune) + writeMultibyteRuneAsHex(buf, currentRune, ln) + } +} + +const uppercaseHex = `0123456789ABCDEF` + +// writeMultibyteRuneAsHex is equivalent to either +// fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`). +// We can't quite just use strconv since we need uppercase hex. +func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) { + if ln == 2 { + buf.WriteString(`\u0000`) + } else { + buf.WriteString(`\U00000000`) + } + for i := 1; r > 0; r /= 16 { + buf.Bytes()[buf.Len()-i] = uppercaseHex[r%16] + i++ } }