From 4a04c491aff43c05b9bbdbe77213ed4e91ad5f1f Mon Sep 17 00:00:00 2001 From: Alan Pierce Date: Mon, 27 Jun 2022 14:44:55 -0700 Subject: [PATCH] Fix bug in scientific notation parsing Fixes #676 Scientific notation parsing was using the shared `readInt` code path, which I had made more flexible to handle hex digits, but that means that literals like `1e2` were being treated as a complete number (with the hex digit `e`) rather than as scientific notation. Normally this wouldn't be a problem since it's all a number token anyway, but it caused trouble when using dot-style property access like `1e2.toString()`. Expressions like `1.toString()` are expected to fail because `.` is interpreted as a decimal point, but when using scientific notation, the parser needs to be smart enough see that a decimal point wouldn't be valid, and therefore not include it as part of the token. The fix was to change `readInt` to only handle decimal digits, not hex digits, and to inline the hex case into `readRadixNumber`. This should make number parsing slightly faster because it's not checking for hex digits anymore. --- src/parser/tokenizer/index.ts | 40 ++++++++++++++++--------------- test/sucrase-test.ts | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/src/parser/tokenizer/index.ts b/src/parser/tokenizer/index.ts index 63bdc423..ca62c1aa 100644 --- a/src/parser/tokenizer/index.ts +++ b/src/parser/tokenizer/index.ts @@ -751,9 +751,26 @@ function readRegexp(): void { finishToken(tt.regexp); } -// Read an integer. We allow any valid digit, including hex digits, plus numeric separators, and -// stop at any other character. +/** + * Read a decimal integer. Note that this can't be unified with the similar code + * in readRadixNumber (which also handles hex digits) because "e" needs to be + * the end of the integer so that we can properly handle scientific notation. + */ function readInt(): void { + while (true) { + const code = input.charCodeAt(state.pos); + if ((code >= charCodes.digit0 && code <= charCodes.digit9) || code === charCodes.underscore) { + state.pos++; + } else { + break; + } + } +} + +function readRadixNumber(): void { + state.pos += 2; // 0x + + // Walk to the end of the number, allowing hex digits. while (true) { const code = input.charCodeAt(state.pos); if ( @@ -767,29 +784,14 @@ function readInt(): void { break; } } -} - -function readRadixNumber(): void { - let isBigInt = false; - const start = state.pos; - - state.pos += 2; // 0x - readInt(); const nextChar = input.charCodeAt(state.pos); if (nextChar === charCodes.lowercaseN) { ++state.pos; - isBigInt = true; - } else if (nextChar === charCodes.lowercaseM) { - unexpected("Invalid decimal", start); - } - - if (isBigInt) { finishToken(tt.bigint); - return; + } else { + finishToken(tt.num); } - - finishToken(tt.num); } // Read an integer, octal integer, or floating-point number. diff --git a/test/sucrase-test.ts b/test/sucrase-test.ts index 5f41d50c..de25deef 100644 --- a/test/sucrase-test.ts +++ b/test/sucrase-test.ts @@ -1469,4 +1469,48 @@ describe("sucrase", () => { {transforms: ["typescript"]}, ); }); + + it("parses scientific notation number literals followed by dot", () => { + assertResult( + ` + console.log(1e5.toString()); + `, + `"use strict"; + console.log(1e5.toString()); + `, + ); + }); + + it("handles parsing of hex literals", () => { + assertResult( + ` + const x = 0x8badf00d; + `, + `"use strict"; + const x = 0x8badf00d; + `, + ); + }); + + it("handles parsing of hex bigint literals", () => { + assertResult( + ` + const x = 0xabcden; + `, + `"use strict"; + const x = 0xabcden; + `, + ); + }); + + it("handles parsing of negative exponents", () => { + assertResult( + ` + const x = 1e-10; + `, + `"use strict"; + const x = 1e-10; + `, + ); + }); });