diff --git a/pkg/sql/lexbase/encode_test.go b/pkg/sql/lexbase/encode_test.go index d89660620c7e..2ddbfd76a28f 100644 --- a/pkg/sql/lexbase/encode_test.go +++ b/pkg/sql/lexbase/encode_test.go @@ -123,3 +123,14 @@ func BenchmarkEncodeSQLString(b *testing.B) { lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings) } } + +func BenchmarkEncodeNonASCIISQLString(b *testing.B) { + builder := strings.Builder{} + for r := rune(0); r < 10000; r++ { + builder.WriteRune(r) + } + str := builder.String() + for i := 0; i < b.N; i++ { + lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings) + } +} diff --git a/pkg/util/stringencoding/string_encoding.go b/pkg/util/stringencoding/string_encoding.go index f7aeed0fb4b5..3fe32636639d 100644 --- a/pkg/util/stringencoding/string_encoding.go +++ b/pkg/util/stringencoding/string_encoding.go @@ -21,7 +21,6 @@ package stringencoding import ( "bytes" - "fmt" "unicode/utf8" ) @@ -109,14 +108,29 @@ func EncodeEscapedChar( // Escape non-printable characters. buf.Write(HexMap[currentByte]) } - } else if ln == 2 { - // For multi-byte runes, print them based on their width. - fmt.Fprintf(buf, `\u%04X`, currentRune) } else { - fmt.Fprintf(buf, `\U%08X`, currentRune) + writeMultibyteRuneAsHex(buf, currentRune, ln) } } +const uppercaseHex = `0123456789ABCDEF` + +// writeMultibyteRuneAsHex is equivalent to either +// fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`). +// We can't quite just use strconv since we need uppercase hex. +func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) { + if ln == 2 { + buf.WriteString(`\u0000`) + } else { + buf.WriteString(`\U00000000`) + } + for i := 1; r > 0; r >>= 4 { + buf.Bytes()[buf.Len()-i] = uppercaseHex[r&0x0f] + i++ + } + +} + func writeHexDigit(buf *bytes.Buffer, v int) { if v < 10 { buf.WriteByte('0' + byte(v))