diff --git a/CHANGELOG.md b/CHANGELOG.md index ca2c4155357..41326bb149a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,25 @@ import k2 from "keep"; ``` +* Avoid regular expression syntax errors in older browsers ([#2215](https://github.com/evanw/esbuild/issues/2215)) + + Previously esbuild always passed JavaScript regular expression literals through unmodified from the input to the output. This is undesirable when the regular expression uses newer features that the configured target environment doesn't support. For example, the `d` flag (i.e. the [match indices feature](https://v8.dev/features/regexp-match-indices)) is new in ES2022 and doesn't work in older browsers. If esbuild generated a regular expression literal containing the `d` flag, then older browsers would consider esbuild's output to be a syntax error and none of the code would run. + + With this release, esbuild now detects when an unsupported feature is being used and converts the regular expression literal into a `new RegExp()` constructor instead. One consequence of this is that the syntax error is transformed into a run-time error, which allows the output code to run (and to potentially handle the run-time error). Another consequence of this is that it allows you to include a polyfill that overwrites the `RegExp` constructor in older browsers with one that supports modern features. Note that esbuild does not handle polyfills for you, so you will need to include a `RegExp` polyfill yourself if you want one. + + ```js + // Original code + console.log(/b/d.exec('abc').indices) + + // New output (with --target=chrome90) + console.log(/b/d.exec("abc").indices); + + // New output (with --target=chrome89) + console.log(new RegExp("b", "d").exec("abc").indices); + ``` + + This is currently done transparently without a warning. If you would like to debug this transformation to see where in your code esbuild is transforming regular expression literals and why, you can pass `--log-level=debug` to esbuild and review the information present in esbuild's debug logs. + * Add Opera to more internal feature compatibility tables ([#2247](https://github.com/evanw/esbuild/issues/2247), [#2252](https://github.com/evanw/esbuild/pull/2252)) The internal compatibility tables that esbuild uses to determine which environments support which features are derived from multiple sources. Most of it is automatically derived from [these ECMAScript compatibility tables](https://kangax.github.io/compat-table/), but missing information is manually copied from [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/), GitHub PR comments, and various other websites. Version 0.14.35 of esbuild introduced Opera as a possible target environment which was automatically picked up by the compatibility table script, but the manually-copied information wasn't updated to include Opera. This release fixes this omission so Opera feature compatibility should now be accurate. diff --git a/internal/bundler/bundler_lower_test.go b/internal/bundler/bundler_lower_test.go index dd4b227dbec..a04639f4038 100644 --- a/internal/bundler/bundler_lower_test.go +++ b/internal/bundler/bundler_lower_test.go @@ -2280,3 +2280,21 @@ func TestStaticClassBlockES2021(t *testing.T) { }, }) } + +func TestLowerRegExpNameCollision(t *testing.T) { + lower_suite.expectBundled(t, bundled{ + files: map[string]string{ + "/entry.js": ` + export function foo(RegExp) { + return new RegExp(/./d, 'd') + } + `, + }, + entryPaths: []string{"/entry.js"}, + options: config.Options{ + Mode: config.ModeBundle, + AbsOutputFile: "/out.js", + UnsupportedJSFeatures: es(2021), + }, + }) +} diff --git a/internal/bundler/snapshots/snapshots_lower.txt b/internal/bundler/snapshots/snapshots_lower.txt index 46fec517fba..33e81908c24 100644 --- a/internal/bundler/snapshots/snapshots_lower.txt +++ b/internal/bundler/snapshots/snapshots_lower.txt @@ -1650,6 +1650,17 @@ export { Foo }; +================================================================================ +TestLowerRegExpNameCollision +---------- /out.js ---------- +// entry.js +function foo(RegExp2) { + return new RegExp2(new RegExp(".", "d"), "d"); +} +export { + foo +}; + ================================================================================ TestLowerStaticAsyncArrowSuperES2016 ---------- /out.js ---------- diff --git a/internal/compat/js_table.go b/internal/compat/js_table.go index 5bdb8c802ba..93d3654e8dc 100644 --- a/internal/compat/js_table.go +++ b/internal/compat/js_table.go @@ -84,6 +84,12 @@ const ( ObjectRestSpread OptionalCatchBinding OptionalChain + RegExpDotAllFlag + RegExpLookbehindAssertions + RegExpMatchIndices + RegExpNamedCaptureGroups + RegExpStickyAndUnicodeFlags + RegExpUnicodePropertyEscapes RestArgument TemplateLiteral TopLevelAwait @@ -436,7 +442,7 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{ Firefox: {{start: v{2, 0, 0}}}, IE: {{start: v{9, 0, 0}}}, IOS: {{start: v{6, 0, 0}}}, - Node: {{start: v{0, 4, 0}}}, + Node: {{start: v{0, 10, 0}}}, Opera: {{start: v{10, 10, 0}}}, Safari: {{start: v{3, 1, 0}}}, }, @@ -477,6 +483,63 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{ Opera: {{start: v{77, 0, 0}}}, Safari: {{start: v{13, 1, 0}}}, }, + RegExpDotAllFlag: { + Chrome: {{start: v{62, 0, 0}}}, + Edge: {{start: v{79, 0, 0}}}, + ES: {{start: v{2018, 0, 0}}}, + Firefox: {{start: v{78, 0, 0}}}, + IOS: {{start: v{11, 3, 0}}}, + Node: {{start: v{8, 10, 0}}}, + Opera: {{start: v{49, 0, 0}}}, + Safari: {{start: v{11, 1, 0}}}, + }, + RegExpLookbehindAssertions: { + Chrome: {{start: v{62, 0, 0}}}, + Edge: {{start: v{79, 0, 0}}}, + ES: {{start: v{2018, 0, 0}}}, + Firefox: {{start: v{78, 0, 0}}}, + Node: {{start: v{8, 10, 0}}}, + Opera: {{start: v{49, 0, 0}}}, + }, + RegExpMatchIndices: { + Chrome: {{start: v{90, 0, 0}}}, + Edge: {{start: v{90, 0, 0}}}, + ES: {{start: v{2022, 0, 0}}}, + Firefox: {{start: v{88, 0, 0}}}, + IOS: {{start: v{15, 0, 0}}}, + Opera: {{start: v{76, 0, 0}}}, + Safari: {{start: v{15, 0, 0}}}, + }, + RegExpNamedCaptureGroups: { + Chrome: {{start: v{64, 0, 0}}}, + Edge: {{start: v{79, 0, 0}}}, + ES: {{start: v{2018, 0, 0}}}, + Firefox: {{start: v{78, 0, 0}}}, + IOS: {{start: v{11, 3, 0}}}, + Node: {{start: v{10, 0, 0}}}, + Opera: {{start: v{51, 0, 0}}}, + Safari: {{start: v{11, 1, 0}}}, + }, + RegExpStickyAndUnicodeFlags: { + Chrome: {{start: v{50, 0, 0}}}, + Edge: {{start: v{13, 0, 0}}}, + ES: {{start: v{2015, 0, 0}}}, + Firefox: {{start: v{46, 0, 0}}}, + IOS: {{start: v{12, 0, 0}}}, + Node: {{start: v{6, 0, 0}}}, + Opera: {{start: v{37, 0, 0}}}, + Safari: {{start: v{12, 0, 0}}}, + }, + RegExpUnicodePropertyEscapes: { + Chrome: {{start: v{64, 0, 0}}}, + Edge: {{start: v{79, 0, 0}}}, + ES: {{start: v{2018, 0, 0}}}, + Firefox: {{start: v{78, 0, 0}}}, + IOS: {{start: v{11, 3, 0}}}, + Node: {{start: v{10, 0, 0}}}, + Opera: {{start: v{51, 0, 0}}}, + Safari: {{start: v{11, 1, 0}}}, + }, RestArgument: { Chrome: {{start: v{47, 0, 0}}}, Edge: {{start: v{12, 0, 0}}}, diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go index 7e94faf5410..a884ccbde94 100644 --- a/internal/js_lexer/js_lexer.go +++ b/internal/js_lexer/js_lexer.go @@ -2208,7 +2208,7 @@ func (lexer *Lexer) ScanRegExp() { bits := uint32(0) for IsIdentifierContinue(lexer.codePoint) { switch lexer.codePoint { - case 'g', 'i', 'm', 's', 'u', 'y': + case 'd', 'g', 'i', 'm', 's', 'u', 'y': bit := uint32(1) << uint32(lexer.codePoint-'a') if (bit & bits) != 0 { // Reject duplicate flags diff --git a/internal/js_parser/js_parser.go b/internal/js_parser/js_parser.go index e56c7db4151..ea9541e77d4 100644 --- a/internal/js_parser/js_parser.go +++ b/internal/js_parser/js_parser.go @@ -200,6 +200,7 @@ type parser struct { moduleRef js_ast.Ref importMetaRef js_ast.Ref promiseRef js_ast.Ref + regExpRef js_ast.Ref runtimePublicFieldImport js_ast.Ref superCtorRef js_ast.Ref @@ -1510,6 +1511,14 @@ func (p *parser) makePromiseRef() js_ast.Ref { return p.promiseRef } +func (p *parser) makeRegExpRef() js_ast.Ref { + if p.regExpRef == js_ast.InvalidRef { + p.regExpRef = p.newSymbol(js_ast.SymbolUnbound, "RegExp") + p.moduleScope.Generated = append(p.moduleScope.Generated, p.regExpRef) + } + return p.regExpRef +} + // The name is temporarily stored in the ref until the scope traversal pass // happens, at which point a symbol will be generated and the ref will point // to the symbol instead. @@ -11526,6 +11535,144 @@ func containsClosingScriptTag(text string) bool { return false } +func (p *parser) isUnsupportedRegularExpression(loc logger.Loc, value string) (pattern string, flags string, isUnsupported bool) { + var feature compat.JSFeature + var what string + var r logger.Range + + end := strings.LastIndexByte(value, '/') + pattern = value[1:end] + flags = value[end+1:] + isUnicode := strings.IndexByte(flags, 'u') >= 0 + parenDepth := 0 + i := 0 + + // Do a simple scan for unsupported features assuming the regular expression + // is valid. This doesn't do a full validation of the regular expression + // because regular expression grammar is complicated. If it contains a syntax + // error that we don't catch, then we will just generate output code with a + // syntax error. Garbage in, garbage out. +pattern: + for i < len(pattern) { + c := pattern[i] + i++ + + switch c { + case '[': + class: + for i < len(pattern) { + c := pattern[i] + i++ + + switch c { + case ']': + break class + + case '\\': + i++ // Skip the escaped character + } + break + } + + case '(': + tail := pattern[i:] + + if strings.HasPrefix(tail, "?<=") || strings.HasPrefix(tail, "?'); end >= 0 { + feature = compat.RegExpNamedCaptureGroups + what = "Named capture groups in regular expressions are not available" + r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i) + 1}, Len: int32(end) + 1} + isUnsupported = true + break pattern + } + } + } + + parenDepth++ + + case ')': + if parenDepth == 0 { + r := logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: 1} + p.log.Add(logger.Error, &p.tracker, r, "Unexpected \")\" in regular expression") + return + } + + parenDepth-- + + case '\\': + tail := pattern[i:] + + if isUnicode && (strings.HasPrefix(tail, "p{") || strings.HasPrefix(tail, "P{")) { + if p.options.unsupportedJSFeatures.Has(compat.RegExpUnicodePropertyEscapes) { + if end := strings.IndexByte(tail, '}'); end >= 0 { + feature = compat.RegExpUnicodePropertyEscapes + what = "Unicode property escapes in regular expressions are not available" + r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: int32(end) + 2} + isUnsupported = true + break pattern + } + } + } + + i++ // Skip the escaped character + } + } + + if !isUnsupported { + for i, c := range flags { + switch c { + case 'g', 'i', 'm': + continue // These are part of ES5 and are always supported + + case 's': + if !p.options.unsupportedJSFeatures.Has(compat.RegExpDotAllFlag) { + continue // This is part of ES2018 + } + feature = compat.RegExpDotAllFlag + + case 'y', 'u': + if !p.options.unsupportedJSFeatures.Has(compat.RegExpStickyAndUnicodeFlags) { + continue // These are part of ES2018 + } + feature = compat.RegExpStickyAndUnicodeFlags + + case 'd': + if !p.options.unsupportedJSFeatures.Has(compat.RegExpMatchIndices) { + continue // This is part of ES2022 + } + feature = compat.RegExpMatchIndices + + default: + // Unknown flags are never supported + } + + r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(end+1) + int32(i)}, Len: 1} + what = fmt.Sprintf("The regular expression flag \"%c\" is not available", c) + isUnsupported = true + break + } + } + + if isUnsupported { + where, notes := p.prettyPrintTargetEnvironment(feature) + p.log.AddWithNotes(logger.Debug, &p.tracker, r, fmt.Sprintf("%s in %s", what, where), append(notes, logger.MsgData{ + Text: "This regular expression literal has been converted to a \"new RegExp()\" constructor " + + "to avoid generating code with a syntax error. However, you will need to include a " + + "polyfill for \"RegExp\" for your code to have the correct behavior at run-time."})) + } + + return +} + // This function takes "exprIn" as input from the caller and produces "exprOut" // for the caller to pass along extra data. This is mostly for optional chaining. func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprOut) { @@ -11534,9 +11681,29 @@ func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprO } switch e := expr.Data.(type) { - case *js_ast.ENull, *js_ast.ESuper, - *js_ast.EBoolean, *js_ast.EBigInt, - *js_ast.ERegExp, *js_ast.EUndefined: + case *js_ast.ENull, *js_ast.ESuper, *js_ast.EBoolean, *js_ast.EBigInt, *js_ast.EUndefined: + + case *js_ast.ERegExp: + // "/pattern/flags" => "new RegExp('pattern', 'flags')" + if pattern, flags, ok := p.isUnsupportedRegularExpression(expr.Loc, e.Value); ok { + args := []js_ast.Expr{{ + Loc: logger.Loc{Start: expr.Loc.Start + 1}, + Data: &js_ast.EString{Value: helpers.StringToUTF16(pattern)}, + }} + if flags != "" { + args = append(args, js_ast.Expr{ + Loc: logger.Loc{Start: expr.Loc.Start + int32(len(pattern)) + 2}, + Data: &js_ast.EString{Value: helpers.StringToUTF16(flags)}, + }) + } + regExpRef := p.makeRegExpRef() + p.recordUsage(regExpRef) + return js_ast.Expr{Loc: expr.Loc, Data: &js_ast.ENew{ + Target: js_ast.Expr{Loc: expr.Loc, Data: &js_ast.EIdentifier{Ref: regExpRef}}, + Args: args, + CloseParenLoc: logger.Loc{Start: expr.Loc.Start + int32(len(e.Value))}, + }}, exprOut{} + } case *js_ast.ENewTarget: if !p.fnOnlyDataVisit.isNewTargetAllowed { @@ -14952,6 +15119,7 @@ func newParser(log logger.Log, source logger.Source, lexer js_lexer.Lexer, optio options: *options, runtimeImports: make(map[string]js_ast.Ref), promiseRef: js_ast.InvalidRef, + regExpRef: js_ast.InvalidRef, afterArrowBodyLoc: logger.Loc{Start: -1}, importMetaRef: js_ast.InvalidRef, runtimePublicFieldImport: js_ast.InvalidRef, diff --git a/scripts/compat-table.js b/scripts/compat-table.js index 1b0b164fafd..46198276425 100644 --- a/scripts/compat-table.js +++ b/scripts/compat-table.js @@ -37,6 +37,7 @@ const features = { 'class': { target: 'Class' }, 'generators': { target: 'Generator' }, 'Unicode code point escapes': { target: 'UnicodeEscapes' }, + 'RegExp "y" and "u" flags': { target: 'RegExpStickyAndUnicodeFlags' }, // >ES6 features 'exponentiation (**) operator': { target: 'ExponentOperator' }, @@ -44,6 +45,10 @@ const features = { 'nested rest destructuring, parameters': { target: 'NestedRestBinding' }, 'async functions': { target: 'AsyncAwait' }, 'object rest/spread properties': { target: 'ObjectRestSpread' }, + 'RegExp Lookbehind Assertions': { target: 'RegExpLookbehindAssertions' }, + 'RegExp named capture groups': { target: 'RegExpNamedCaptureGroups' }, + 'RegExp Unicode Property Escapes': { target: 'RegExpUnicodePropertyEscapes' }, + 's (dotAll) flag for regular expressions': { target: 'RegExpDotAllFlag' }, 'Asynchronous Iterators: async generators': { target: 'AsyncGenerator' }, 'Asynchronous Iterators: for-await-of loops': { target: 'ForAwait' }, 'optional catch binding': { target: 'OptionalCatchBinding' }, @@ -139,6 +144,7 @@ mergeVersions('Generator', { es2015: true }) mergeVersions('Let', { es2015: true }) mergeVersions('NewTarget', { es2015: true }) mergeVersions('ObjectExtensions', { es2015: true }) +mergeVersions('RegExpStickyAndUnicodeFlags', { es2015: true }) mergeVersions('RestArgument', { es2015: true }) mergeVersions('TemplateLiteral', { es2015: true }) mergeVersions('UnicodeEscapes', { es2015: true }) @@ -150,6 +156,10 @@ mergeVersions('AsyncAwait', { es2017: true }) mergeVersions('AsyncGenerator', { es2018: true }) mergeVersions('ForAwait', { es2018: true }) mergeVersions('ObjectRestSpread', { es2018: true }) +mergeVersions('RegExpDotAllFlag', { es2018: true }) +mergeVersions('RegExpLookbehindAssertions', { es2018: true }) +mergeVersions('RegExpNamedCaptureGroups', { es2018: true }) +mergeVersions('RegExpUnicodePropertyEscapes', { es2018: true }) mergeVersions('OptionalCatchBinding', { es2019: true }) mergeVersions('BigInt', { es2020: true }) mergeVersions('ImportMeta', { es2020: true }) @@ -169,6 +179,7 @@ mergeVersions('ClassStaticBlocks', { es2022: true }) mergeVersions('ClassStaticField', { es2022: true }) mergeVersions('TopLevelAwait', { es2022: true }) mergeVersions('ArbitraryModuleNamespaceNames', { es2022: true }) +mergeVersions('RegExpMatchIndices', { es2022: true }) mergeVersions('ImportAssertions', {}) // Manually copied from https://caniuse.com/?search=export%20*%20as @@ -284,6 +295,16 @@ mergeVersions('ClassStaticBlocks', { opera80: true, }) +// Manually copied from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices +mergeVersions('RegExpMatchIndices', { + chrome90: true, + edge90: true, + firefox88: true, + ios15: true, + opera76: true, + safari15: true, +}) + for (const test of [...es5.tests, ...es6.tests, ...stage4.tests, ...stage1to3.tests]) { const feature = features[test.name] if (feature) { diff --git a/scripts/js-api-tests.js b/scripts/js-api-tests.js index de5ac104020..27a20ac8006 100644 --- a/scripts/js-api-tests.js +++ b/scripts/js-api-tests.js @@ -4350,6 +4350,40 @@ let transformTests = { } }, + async regExpFeatures({ esbuild }) { + const check = async (target, input, expected) => + assert.strictEqual((await esbuild.transform(input, { target })).code, expected) + await Promise.all([ + // RegExpStickyAndUnicodeFlags + check('es6', `x1 = /./y`, `x1 = /./y;\n`), + check('es6', `x2 = /./u`, `x2 = /./u;\n`), + check('es5', `x3 = /./y`, `x3 = new RegExp(".", "y");\n`), + check('es5', `x4 = /./u`, `x4 = new RegExp(".", "u");\n`), + + // RegExpDotAllFlag + check('es2018', `x1 = /a.b/s`, `x1 = /a.b/s;\n`), + check('es2017', `x2 = /a.b/s`, `x2 = new RegExp("a.b", "s");\n`), + + // RegExpLookbehindAssertions + check('es2018', `x1 = /(?<=x)/`, `x1 = /(?<=x)/;\n`), + check('es2018', `x2 = /(?b)/`, `x1 = /(?b)/;\n`), + check('es2017', `x2 = /(?b)/`, `x2 = new RegExp("(?b)");\n`), + + // RegExpUnicodePropertyEscapes + check('es2018', `x1 = /\\p{Emoji}/u`, `x1 = /\\p{Emoji}/u;\n`), + check('es2017', `x2 = /\\p{Emoji}/u`, `x2 = new RegExp("\\\\p{Emoji}", "u");\n`), + + // RegExpMatchIndices + check('es2022', `x1 = /y/d`, `x1 = /y/d;\n`), + check('es2021', `x2 = /y/d`, `x2 = new RegExp("y", "d");\n`), + ]) + }, + // Future syntax forAwait: ({ esbuild }) => futureSyntax(esbuild, 'async function foo() { for await (let x of y) {} }', 'es2017', 'es2018'), bigInt: ({ esbuild }) => futureSyntax(esbuild, '123n', 'es2019', 'es2020'),