From b81cba0e88558739aa4786fa0ca2e26c5709a67d Mon Sep 17 00:00:00 2001
From: Aaron Zinger <zinger@cockroachlabs.com>
Date: Sat, 24 Sep 2022 16:25:28 -0400
Subject: [PATCH] util: avoid allocations when escaping multibyte characters

EncodeEscapedChar (which is called in EncodeSQLStringWithFlags)
is pretty optimized, but for escaping a multibyte character it
was using fmt.FPrintf, which means every multibyte character
ended up on the heap due to https://github.com/golang/go/issues/8618.
This had a noticeable impact in changefeed benchmarking.

This commit just hand-compiles the two formatting strings that
were being used into reasonably efficient go, eliminating the allocs.

Benchmark encoding the first 10000 runes shows a 4x speedup:

Before: BenchmarkEncodeNonASCIISQLString-16    	     944	   1216130 ns/op
After: BenchmarkEncodeNonASCIISQLString-16    	    3468	    300777 ns/op

Release note: None
---
 pkg/sql/lexbase/encode_test.go             | 11 +++++++++++
 pkg/util/stringencoding/string_encoding.go | 23 +++++++++++++++++-----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/pkg/sql/lexbase/encode_test.go b/pkg/sql/lexbase/encode_test.go
index d89660620c7e..2ddbfd76a28f 100644
--- a/pkg/sql/lexbase/encode_test.go
+++ b/pkg/sql/lexbase/encode_test.go
@@ -123,3 +123,14 @@ func BenchmarkEncodeSQLString(b *testing.B) {
 		lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
 	}
 }
+
+func BenchmarkEncodeNonASCIISQLString(b *testing.B) {
+	builder := strings.Builder{}
+	for r := rune(0); r < 10000; r++ {
+		builder.WriteRune(r)
+	}
+	str := builder.String()
+	for i := 0; i < b.N; i++ {
+		lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
+	}
+}
diff --git a/pkg/util/stringencoding/string_encoding.go b/pkg/util/stringencoding/string_encoding.go
index f7aeed0fb4b5..8df3cf85a0d1 100644
--- a/pkg/util/stringencoding/string_encoding.go
+++ b/pkg/util/stringencoding/string_encoding.go
@@ -21,7 +21,6 @@ package stringencoding
 
 import (
 	"bytes"
-	"fmt"
 	"unicode/utf8"
 )
 
@@ -109,11 +108,25 @@ func EncodeEscapedChar(
 			// Escape non-printable characters.
 			buf.Write(HexMap[currentByte])
 		}
-	} else if ln == 2 {
-		// For multi-byte runes, print them based on their width.
-		fmt.Fprintf(buf, `\u%04X`, currentRune)
 	} else {
-		fmt.Fprintf(buf, `\U%08X`, currentRune)
+		writeMultibyteRuneAsHex(buf, currentRune, ln)
+	}
+}
+
+const uppercaseHex = `0123456789ABCDEF`
+
+// writeMultibyteRuneAsHex is equivalent to either
+// fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`).
+// We can't quite just use strconv since we need uppercase hex.
+func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) {
+	if ln == 2 {
+		buf.WriteString(`\u0000`)
+	} else {
+		buf.WriteString(`\U00000000`)
+	}
+	for i := 1; r > 0; r /= 16 {
+		buf.Bytes()[buf.Len()-i] = uppercaseHex[r%16]
+		i++
 	}
 }