util: avoid allocations when escaping multibyte characters

EncodeEscapedChar (which is called in EncodeSQLStringWithFlags) is pretty optimized, but for escaping a multibyte character it was using fmt.FPrintf, which means every multibyte character ended up on the heap due to golang/go#8618. This had a noticeable impact in changefeed benchmarking. This commit just hand-compiles the two formatting strings that were being used into reasonably efficient go, eliminating the allocs. Benchmark encoding the first 10000 runes shows a 4x speedup: Before: BenchmarkEncodeNonASCIISQLString-16 944 1216130 ns/op After: BenchmarkEncodeNonASCIISQLString-16 3468 300777 ns/op Release note: None
cockroachdb · Sep 24, 2022 · cb7a002 · cb7a002
1 parent 7b98d9c
commit cb7a002
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 5 deletions.
diff --git a/pkg/sql/lexbase/encode_test.go b/pkg/sql/lexbase/encode_test.go
@@ -123,3 +123,14 @@ func BenchmarkEncodeSQLString(b *testing.B) {
 		lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
 	}
 }
+
+func BenchmarkEncodeNonASCIISQLString(b *testing.B) {
+	builder := strings.Builder{}
+	for r := rune(0); r < 10000; r++ {
+		builder.WriteRune(r)
+	}
+	str := builder.String()
+	for i := 0; i < b.N; i++ {
+		lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
+	}
+}
diff --git a/pkg/util/stringencoding/string_encoding.go b/pkg/util/stringencoding/string_encoding.go
@@ -21,7 +21,6 @@ package stringencoding
 
 import (
 	"bytes"
-	"fmt"
 	"unicode/utf8"
 )
 
@@ -109,14 +108,29 @@ func EncodeEscapedChar(
 			// Escape non-printable characters.
 			buf.Write(HexMap[currentByte])
 		}
-	} else if ln == 2 {
-		// For multi-byte runes, print them based on their width.
-		fmt.Fprintf(buf, `\u%04X`, currentRune)
 	} else {
-		fmt.Fprintf(buf, `\U%08X`, currentRune)
+		writeMultibyteRuneAsHex(buf, currentRune, ln)
 	}
 }
 
+const uppercaseHex = `0123456789ABCDEF`
+
+// writeMultibyteRuneAsHex is equivalent to either
+// fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`).
+// We can't quite just use strconv since we need uppercase hex.
+func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) {
+	if ln == 2 {
+		buf.WriteString(`\u0000`)
+	} else {
+		buf.WriteString(`\U00000000`)
+	}
+	for i := 1; r > 0; r >>= 4 {
+		buf.Bytes()[buf.Len()-i] = uppercaseHex[r&0x0f]
+		i++
+	}
+
+}
+
 func writeHexDigit(buf *bytes.Buffer, v int) {
 	if v < 10 {
 		buf.WriteByte('0' + byte(v))