Rework readWord, switch identifier and whitespace to use lookup tables (

#358) Rather than generating a big tree of nested switches in the code, I generate a tree as a lookup table with pointers to other values in the table. Theoretically the two aren't so different, but this seems to perform a lot better in practice, maybe because of better branch prediction. I also changed whitespace and character detection to use a lookup table of size 64K (one for each UTF-16 code unit), which seems to actually work out fine especially since we'd only expect the first page to be in cache anyway. Still, I may want to explore an ASCII-only mode in the future. In the benchmark, this reduces readWord time from about 50ms to about 20ms, or about a 6% improvement overall. When running on a realistic codebase, this seems to improve overall performance by about 40%.
alangpierce · Nov 25, 2018 · 196f86e · 196f86e
1 parent 9f62c81
commit 196f86e
Show file tree

Hide file tree

Showing 25 changed files with 908 additions and 1,305 deletions.
diff --git a/benchmark/microbenchmark.ts b/benchmark/microbenchmark.ts
@@ -1,37 +1,13 @@
 #!./node_modules/.bin/sucrase-node
 /* eslint-disable no-console */
-import * as fs from "fs";
 import {next} from "../src/parser/tokenizer";
 import {initParser} from "../src/parser/traverser/base";
 import {hasPrecedingLineBreak} from "../src/parser/traverser/util";
-import {isWhitespace} from "../src/parser/util/whitespace";
 import runBenchmark from "./runBenchmark";
 
 function main(): void {
   const benchmark = process.argv[2] || "all";
   console.log(`Running microbenchmark ${benchmark}`);
-  const code = fs.readFileSync(`./benchmark/sample/sample.tsx`).toString();
-  if (benchmark === "all" || benchmark === "isWhitespace") {
-    runBenchmark(
-      "isWhitespace",
-      () => {
-        for (let i = 0; i < code.length; i++) {
-          const char = code.charCodeAt(i);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-          isWhitespace(char);
-        }
-      },
-      1000,
-    );
-  }
   if (benchmark === "all" || benchmark === "hasPredecingLineBreak") {
     initParser("let x\nx++;", false, false, false);
     next();

diff --git a/generator/generate.ts b/generator/generate.ts
@@ -2,7 +2,7 @@
 /* eslint-disable no-console */
 import {writeFile} from "mz/fs";
 import run from "../script/run";
-import generateReadWord from "./generateReadWord";
+import generateReadWordTree from "./generateReadWordTree";
 import generateTokenTypes from "./generateTokenTypes";
 
 /**
@@ -11,8 +11,8 @@ import generateTokenTypes from "./generateTokenTypes";
 async function generate(): Promise<void> {
   await writeFile("./src/parser/tokenizer/types.ts", generateTokenTypes());
   await run("./node_modules/.bin/prettier --write ./src/parser/tokenizer/types.ts");
-  await writeFile("./src/parser/tokenizer/readWord.ts", generateReadWord());
-  await run("./node_modules/.bin/prettier --write ./src/parser/tokenizer/readWord.ts");
+  await writeFile("./src/parser/tokenizer/readWordTree.ts", generateReadWordTree());
+  await run("./node_modules/.bin/prettier --write ./src/parser/tokenizer/readWordTree.ts");
   console.log("Done with code generation.");
 }
 

diff --git a/generator/generateReadWord.ts b/generator/generateReadWord.ts