From f6f8b27a0d58acff793049873902286e35d0de4b Mon Sep 17 00:00:00 2001 From: Evan Wallace Date: Sun, 4 Dec 2022 18:13:17 -0500 Subject: [PATCH] js: improve lexer identifier parsing performance --- internal/js_lexer/js_lexer.go | 42 ++++++++++++++++++++++++------ internal/js_lexer/js_lexer_test.go | 2 ++ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go index 12af1859d6b..82b90f83b28 100644 --- a/internal/js_lexer/js_lexer.go +++ b/internal/js_lexer/js_lexer.go @@ -1757,23 +1757,49 @@ func (lexer *Lexer) Next() { lexer.addRangeError(lexer.Range(), "JSON strings must use double quotes") } + // Note: This case is hot in profiles case '_', '$', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': + // This is a fast path for long ASCII identifiers. Doing this in a loop + // first instead of doing "step()" and "IsIdentifierContinue()" like we + // do after this is noticeably faster in the common case of ASCII-only + // text. For example, doing this sped up end-to-end consuming of a large + // TypeScript type declaration file from 97ms to 79ms (around 20% faster). + contents := lexer.source.Contents + n := len(contents) + i := lexer.current + for i < n { + c := contents[i] + if (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') && c != '_' && c != '$' { + break + } + i++ + } + lexer.current = i + + // Now do the slow path for any remaining non-ASCII identifier characters lexer.step() - for IsIdentifierContinue(lexer.codePoint) { - lexer.step() + if lexer.codePoint >= 0x80 { + for IsIdentifierContinue(lexer.codePoint) { + lexer.step() + } } + + // If there's a slash, then we're in the extra-slow (and extra-rare) case + // where the identifier has embedded escapes if lexer.codePoint == '\\' { lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier) - } else { - lexer.Identifier = lexer.rawIdentifier() - lexer.Token = Keywords[lexer.Raw()] - if lexer.Token == 0 { - lexer.Token = TIdentifier - } + break + } + + // Otherwise (if there was no escape) we can slice the code verbatim + lexer.Identifier = lexer.rawIdentifier() + lexer.Token = Keywords[lexer.Raw()] + if lexer.Token == 0 { + lexer.Token = TIdentifier } case '\\': diff --git a/internal/js_lexer/js_lexer_test.go b/internal/js_lexer/js_lexer_test.go index 363d686d152..47275ea85c6 100644 --- a/internal/js_lexer/js_lexer_test.go +++ b/internal/js_lexer/js_lexer_test.go @@ -133,6 +133,8 @@ func TestIdentifier(t *testing.T) { expectIdentifier(t, "a\u200C", "a\u200C") expectIdentifier(t, "a\u200D", "a\u200D") + expectIdentifier(t, "a\u200Cb", "a\u200Cb") + expectIdentifier(t, "a\u200Db", "a\u200Db") } func expectNumber(t *testing.T, contents string, expected float64) {