From 5aa3528b4763402c33734db0eb323fff87f3e97f Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Thu, 14 Mar 2024 21:54:28 -0700 Subject: [PATCH 01/16] module: eliminate performance cost of detection for cjs entry --- lib/internal/modules/cjs/loader.js | 13 ++++ lib/internal/modules/helpers.js | 32 ++++++++ lib/internal/modules/run_main.js | 34 ++++++--- src/node_contextify.cc | 79 ++++++++++++++++++++ src/node_contextify.h | 1 + test/es-module/test-esm-detect-ambiguous.mjs | 17 +++++ 6 files changed, 166 insertions(+), 10 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index 85bf598192d8f1..95aab0e0c22d46 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -70,10 +70,18 @@ const cjsSourceCache = new SafeWeakMap(); */ const cjsExportsCache = new SafeWeakMap(); +/** + * Map of CJS modules where the initial attempt to parse as CommonJS failed; + * we want to retry as ESM but avoid reading the module from disk again. + * @type {SafeMap} filename: contents + */ +const cjsRetryAsESMCache = new SafeMap(); + // Set first due to cycle with ESM loader functions. module.exports = { cjsExportsCache, cjsSourceCache, + cjsRetryAsESMCache, initializeCJS, Module, wrapSafe, @@ -1337,6 +1345,11 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { return result; } catch (err) { if (process.mainModule === cjsModuleInstance) { + if (getOptionValue('--experimental-detect-module')) { + // For the main entry point, cache the source to potentially retry as ESM. + cjsRetryAsESMCache.set(filename, content); + } + const { enrichCJSError } = require('internal/modules/esm/translators'); enrichCJSError(err, content, filename); } diff --git a/lib/internal/modules/helpers.js b/lib/internal/modules/helpers.js index 665019e93d1ade..690130297b4ce7 100644 --- a/lib/internal/modules/helpers.js +++ b/lib/internal/modules/helpers.js @@ -320,6 +320,37 @@ function normalizeReferrerURL(referrerName) { } +const esmSyntaxErrorMessages = new SafeSet([ + 'Cannot use import statement outside a module', // `import` statements + "Unexpected token 'export'", // `export` statements + "Cannot use 'import.meta' outside a module", // `import.meta` references +]); +const throwsOnlyInCommonJSErrorMessages = new SafeSet([ + "Identifier 'module' has already been declared", + "Identifier 'exports' has already been declared", + "Identifier 'require' has already been declared", + "Identifier '__filename' has already been declared", + "Identifier '__dirname' has already been declared", + 'await is only valid in async functions and the top level bodies of modules', // Top-level `await` +]); +/** + * After an attempt to parse a module as CommonJS throws an error, should we try again as ESM? + * @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS + * @param {string} source Module contents + */ +function shouldRetryAsESM(errorMessage, source) { + if (esmSyntaxErrorMessages.has(errorMessage)) { + return true; + } + + if (throwsOnlyInCommonJSErrorMessages.has(errorMessage)) { + return /** @type {boolean} */(internalBinding('contextify').shouldRetryAsESM(source)); + } + + return false; +} + + // Whether we have started executing any user-provided CJS code. // This is set right before we call the wrapped CJS code (not after, // in case we are half-way in the execution when internals check this). @@ -339,6 +370,7 @@ module.exports = { loadBuiltinModule, makeRequireFunction, normalizeReferrerURL, + shouldRetryAsESM, stripBOM, toRealPath, hasStartedUserCJSExecution() { diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index b134a54cfec92a..596fae7b279a75 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -5,7 +5,6 @@ const { globalThis, } = primordials; -const { containsModuleSyntax } = internalBinding('contextify'); const { getNearestParentPackageJSONType } = internalBinding('modules'); const { getOptionValue } = require('internal/options'); const { checkPackageJSONIntegrity } = require('internal/modules/package_json_reader'); @@ -87,10 +86,6 @@ function shouldUseESMLoader(mainPath) { // No package.json or no `type` field. if (response === undefined || response[0] === 'none') { - if (getOptionValue('--experimental-detect-module')) { - // If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system. - return containsModuleSyntax(undefined, mainPath); - } return false; } @@ -162,7 +157,30 @@ function runEntryPointWithESMLoader(callback) { function executeUserEntryPoint(main = process.argv[1]) { const resolvedMain = resolveMainPath(main); const useESMLoader = shouldUseESMLoader(resolvedMain); - if (useESMLoader) { + + // We might retry as ESM if the CommonJS loader throws because the file is ESM, so + // try the CommonJS loader first unless we know it's supposed to be parsed as ESM. + let retryAsESM = false; + if (!useESMLoader) { + // Module._load is the monkey-patchable CJS module loader. + const { Module } = require('internal/modules/cjs/loader'); + try { + Module._load(main, null, true); + } catch (error) { + if (getOptionValue('--experimental-detect-module')) { + const { cjsRetryAsESMCache } = require('internal/modules/cjs/loader'); + const source = cjsRetryAsESMCache.get(resolvedMain); + cjsRetryAsESMCache.delete(resolvedMain); + const { shouldRetryAsESM } = require('internal/modules/helpers'); + retryAsESM = shouldRetryAsESM(error.message, source); + } + if (!retryAsESM) { + throw error; + } + } + } + + if (useESMLoader || retryAsESM) { const mainPath = resolvedMain || main; const mainURL = pathToFileURL(mainPath).href; @@ -171,10 +189,6 @@ function executeUserEntryPoint(main = process.argv[1]) { // even after the event loop stops running. return cascadedLoader.import(mainURL, undefined, { __proto__: null }, true); }); - } else { - // Module._load is the monkey-patchable CJS module loader. - const { Module } = require('internal/modules/cjs/loader'); - Module._load(main, null, true); } } diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 09079e963c7036..e6a9c341d02448 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -345,6 +345,7 @@ void ContextifyContext::CreatePerIsolateProperties( SetMethod(isolate, target, "makeContext", MakeContext); SetMethod(isolate, target, "compileFunction", CompileFunction); SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax); + SetMethod(isolate, target, "shouldRetryAsESM", ShouldRetryAsESM); } void ContextifyContext::RegisterExternalReferences( @@ -352,6 +353,7 @@ void ContextifyContext::RegisterExternalReferences( registry->Register(MakeContext); registry->Register(CompileFunction); registry->Register(ContainsModuleSyntax); + registry->Register(ShouldRetryAsESM); registry->Register(PropertyGetterCallback); registry->Register(PropertySetterCallback); registry->Register(PropertyDescriptorCallback); @@ -1566,6 +1568,83 @@ void ContextifyContext::ContainsModuleSyntax( args.GetReturnValue().Set(should_retry_as_esm); } +void ContextifyContext::ShouldRetryAsESM( + const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + Local context = env->context(); + + if (args.Length() != 1) { + return THROW_ERR_MISSING_ARGS(env, "shouldRetryAsESM needs 1 argument"); + } + // Argument 1: source code + Local code; + CHECK(args[0]->IsString()); + code = args[0].As(); + + Local script_id = + String::NewFromUtf8(isolate, "throwaway").ToLocalChecked(); + Local id_symbol = Symbol::New(isolate, script_id); + + Local host_defined_options = + GetHostDefinedOptions(isolate, id_symbol); + ScriptCompiler::Source source = GetCommonJSSourceInstance( + isolate, code, script_id, 0, 0, host_defined_options, nullptr); + ScriptCompiler::CompileOptions options = GetCompileOptions(source); + + TryCatchScope try_catch(env); + ShouldNotAbortOnUncaughtScope no_abort_scope(env); + + // Try parsing where instead of the CommonJS wrapper we use an async function + // wrapper. If the parse succeeds, then any CommonJS parse error for this + // module was caused by either a top-level declaration of one of the CommonJS + // module variables, or a top-level `await`. + code = String::Concat( + isolate, + String::NewFromUtf8(isolate, "(async function() {").ToLocalChecked(), + code); + code = String::Concat( + isolate, code, String::NewFromUtf8(isolate, "})();").ToLocalChecked()); + + ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( + isolate, code, script_id, 0, 0, host_defined_options, nullptr); + + std::vector> params = GetCJSParameters(env->isolate_data()); + std::ignore = ScriptCompiler::CompileFunction( + context, + &wrapped_source, + params.size(), + params.data(), + 0, + nullptr, + options, + v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); + + bool should_retry_as_esm = false; + if (!try_catch.HasTerminated()) { + if (try_catch.HasCaught()) { + // If on the second parse an error is thrown by ESM syntax, then + // what happened was that the user had top-level `await` or a + // top-level declaration of one of the CommonJS module variables + // above the first `import` or `export`. + Utf8Value message_value(env->isolate(), try_catch.Message()->Get()); + auto message_view = message_value.ToStringView(); + for (const auto& error_message : esm_syntax_error_messages) { + if (message_view.find(error_message) != std::string_view::npos) { + should_retry_as_esm = true; + break; + } + } + } else { + // No errors thrown in the second parse, so most likely the error + // was caused by a top-level `await` or a top-level declaration of + // one of the CommonJS module variables. + should_retry_as_esm = true; + } + } + args.GetReturnValue().Set(should_retry_as_esm); +} + static void CompileFunctionForCJSLoader( const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); diff --git a/src/node_contextify.h b/src/node_contextify.h index e96df803b7ec2a..196461c35636ff 100644 --- a/src/node_contextify.h +++ b/src/node_contextify.h @@ -106,6 +106,7 @@ class ContextifyContext : public BaseObject { const v8::ScriptCompiler::Source& source); static void ContainsModuleSyntax( const v8::FunctionCallbackInfo& args); + static void ShouldRetryAsESM(const v8::FunctionCallbackInfo& args); static void WeakCallback( const v8::WeakCallbackInfo& data); static void PropertyGetterCallback( diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 2067040114a86b..02800e07013418 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -8,6 +8,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { describe('string input', { concurrency: true }, () => { it('permits ESM syntax in --eval input without requiring --input-type=module', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'import { version } from "node:process"; console.log(version);', @@ -23,6 +24,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits ESM syntax in STDIN input without requiring --input-type=module', async () => { const child = spawn(process.execPath, [ + '--no-warnings', '--experimental-detect-module', ]); child.stdin.end('console.log(typeof import.meta.resolve)'); @@ -32,6 +34,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should be overridden by --input-type', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--input-type=commonjs', '--eval', @@ -46,6 +49,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should be overridden by --experimental-default-type', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--experimental-default-type=commonjs', '--eval', @@ -60,6 +64,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('does not trigger detection via source code `eval()`', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'eval("import \'nonexistent\';");', @@ -101,6 +106,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -142,6 +148,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -156,6 +163,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should not hint wrong format in resolve hook', async () => { let writeSync; const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--no-warnings', '--loader', @@ -194,6 +202,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -223,6 +232,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -239,6 +249,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { describe('syntax that errors in CommonJS but works in ESM', { concurrency: true }, () => { it('permits top-level `await`', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'await Promise.resolve(); console.log("executed");', @@ -252,6 +263,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits top-level `await` above import/export syntax', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'await Promise.resolve(); import "node:os"; console.log("executed");', @@ -265,6 +277,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('still throws on `await` in an ordinary sync function', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'function fn() { await Promise.resolve(); } fn();', @@ -278,6 +291,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('throws on undefined `require` when top-level `await` triggers ESM parsing', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'const fs = require("node:fs"); await Promise.resolve();', @@ -291,6 +305,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits declaration of CommonJS module variables', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', fixtures.path('es-modules/package-without-type/commonjs-wrapper-variables.js'), ]); @@ -303,6 +318,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits declaration of CommonJS module variables above import/export', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'const module = 3; import "node:os"; console.log("executed");', @@ -316,6 +332,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('still throws on double `const` declaration not at the top level', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--no-warnings', '--experimental-detect-module', '--eval', 'function fn() { const require = 1; const require = 2; } fn();', From 29f5a6d33fa22a1bea4a4a7f320c4f1608f0f071 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Fri, 15 Mar 2024 11:48:19 -0700 Subject: [PATCH 02/16] DRY the C++ functions --- src/node_contextify.cc | 123 ++++++++--------------------------------- src/node_contextify.h | 2 + 2 files changed, 26 insertions(+), 99 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index e6a9c341d02448..9f4f64ffd35ac0 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1441,6 +1441,11 @@ void ContextifyContext::ContainsModuleSyntax( env, "containsModuleSyntax needs at least 1 argument"); } + // Argument 1: source code + Local code; + CHECK(args[0]->IsString()); + code = args[0].As(); + // Argument 2: filename; if undefined, use empty string Local filename = String::Empty(isolate); if (!args[1]->IsUndefined()) { @@ -1448,28 +1453,6 @@ void ContextifyContext::ContainsModuleSyntax( filename = args[1].As(); } - // Argument 1: source code; if undefined, read from filename in argument 2 - Local code; - if (args[0]->IsUndefined()) { - CHECK(!filename.IsEmpty()); - const char* filename_str = Utf8Value(isolate, filename).out(); - std::string contents; - int result = ReadFileSync(&contents, filename_str); - if (result != 0) { - isolate->ThrowException( - ERR_MODULE_NOT_FOUND(isolate, "Cannot read file %s", filename_str)); - return; - } - code = String::NewFromUtf8(isolate, - contents.c_str(), - v8::NewStringType::kNormal, - contents.length()) - .ToLocalChecked(); - } else { - CHECK(args[0]->IsString()); - code = args[0].As(); - } - // TODO(geoffreybooth): Centralize this rather than matching the logic in // cjs/loader.js and translators.js Local script_id = String::Concat( @@ -1499,71 +1482,8 @@ void ContextifyContext::ContainsModuleSyntax( bool should_retry_as_esm = false; if (try_catch.HasCaught() && !try_catch.HasTerminated()) { - Utf8Value message_value(env->isolate(), try_catch.Message()->Get()); - auto message = message_value.ToStringView(); - - for (const auto& error_message : esm_syntax_error_messages) { - if (message.find(error_message) != std::string_view::npos) { - should_retry_as_esm = true; - break; - } - } - - if (!should_retry_as_esm) { - for (const auto& error_message : throws_only_in_cjs_error_messages) { - if (message.find(error_message) != std::string_view::npos) { - // Try parsing again where the CommonJS wrapper is replaced by an - // async function wrapper. If the new parse succeeds, then the error - // was caused by either a top-level declaration of one of the CommonJS - // module variables, or a top-level `await`. - TryCatchScope second_parse_try_catch(env); - code = - String::Concat(isolate, - String::NewFromUtf8(isolate, "(async function() {") - .ToLocalChecked(), - code); - code = String::Concat( - isolate, - code, - String::NewFromUtf8(isolate, "})();").ToLocalChecked()); - ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( - isolate, code, filename, 0, 0, host_defined_options, nullptr); - std::ignore = ScriptCompiler::CompileFunction( - context, - &wrapped_source, - params.size(), - params.data(), - 0, - nullptr, - options, - v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); - if (!second_parse_try_catch.HasTerminated()) { - if (second_parse_try_catch.HasCaught()) { - // If on the second parse an error is thrown by ESM syntax, then - // what happened was that the user had top-level `await` or a - // top-level declaration of one of the CommonJS module variables - // above the first `import` or `export`. - Utf8Value second_message_value( - env->isolate(), second_parse_try_catch.Message()->Get()); - auto second_message = second_message_value.ToStringView(); - for (const auto& error_message : esm_syntax_error_messages) { - if (second_message.find(error_message) != - std::string_view::npos) { - should_retry_as_esm = true; - break; - } - } - } else { - // No errors thrown in the second parse, so most likely the error - // was caused by a top-level `await` or a top-level declaration of - // one of the CommonJS module variables. - should_retry_as_esm = true; - } - } - break; - } - } - } + should_retry_as_esm = + ContextifyContext::ShouldRetryAsESMInternal(env, code); } args.GetReturnValue().Set(should_retry_as_esm); } @@ -1571,19 +1491,25 @@ void ContextifyContext::ContainsModuleSyntax( void ContextifyContext::ShouldRetryAsESM( const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - Local context = env->context(); - if (args.Length() != 1) { - return THROW_ERR_MISSING_ARGS(env, "shouldRetryAsESM needs 1 argument"); - } + CHECK_EQ(args.Length(), 1); + // Argument 1: source code Local code; CHECK(args[0]->IsString()); code = args[0].As(); - Local script_id = - String::NewFromUtf8(isolate, "throwaway").ToLocalChecked(); + bool should_retry_as_esm = + ContextifyContext::ShouldRetryAsESMInternal(env, code); + + args.GetReturnValue().Set(should_retry_as_esm); +} + +bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, + Local code) { + Isolate* isolate = env->isolate(); + + Local script_id = FIXED_ONE_BYTE_STRING(isolate, "throwaway"); Local id_symbol = Symbol::New(isolate, script_id); Local host_defined_options = @@ -1609,6 +1535,7 @@ void ContextifyContext::ShouldRetryAsESM( ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( isolate, code, script_id, 0, 0, host_defined_options, nullptr); + Local context = env->context(); std::vector> params = GetCJSParameters(env->isolate_data()); std::ignore = ScriptCompiler::CompileFunction( context, @@ -1620,7 +1547,6 @@ void ContextifyContext::ShouldRetryAsESM( options, v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); - bool should_retry_as_esm = false; if (!try_catch.HasTerminated()) { if (try_catch.HasCaught()) { // If on the second parse an error is thrown by ESM syntax, then @@ -1631,18 +1557,17 @@ void ContextifyContext::ShouldRetryAsESM( auto message_view = message_value.ToStringView(); for (const auto& error_message : esm_syntax_error_messages) { if (message_view.find(error_message) != std::string_view::npos) { - should_retry_as_esm = true; - break; + return true; } } } else { // No errors thrown in the second parse, so most likely the error // was caused by a top-level `await` or a top-level declaration of // one of the CommonJS module variables. - should_retry_as_esm = true; + return true; } } - args.GetReturnValue().Set(should_retry_as_esm); + return false; } static void CompileFunctionForCJSLoader( diff --git a/src/node_contextify.h b/src/node_contextify.h index 196461c35636ff..66a9f765fe9212 100644 --- a/src/node_contextify.h +++ b/src/node_contextify.h @@ -107,6 +107,8 @@ class ContextifyContext : public BaseObject { static void ContainsModuleSyntax( const v8::FunctionCallbackInfo& args); static void ShouldRetryAsESM(const v8::FunctionCallbackInfo& args); + static bool ShouldRetryAsESMInternal(Environment* env, + v8::Local code); static void WeakCallback( const v8::WeakCallbackInfo& data); static void PropertyGetterCallback( From 75dba329a8c3f9ede66bc7409273f1319bc43c0d Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Fri, 15 Mar 2024 20:37:55 -0700 Subject: [PATCH 03/16] restore warning to be printed only on error --- lib/internal/modules/cjs/loader.js | 8 +++++--- lib/internal/modules/run_main.js | 4 ++++ test/es-module/test-esm-detect-ambiguous.mjs | 17 ----------------- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index 95aab0e0c22d46..c7b0691f5e5941 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -1348,10 +1348,12 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { if (getOptionValue('--experimental-detect-module')) { // For the main entry point, cache the source to potentially retry as ESM. cjsRetryAsESMCache.set(filename, content); + } else { + // We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're + // retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`. + const { enrichCJSError } = require('internal/modules/esm/translators'); + enrichCJSError(err, content, filename); } - - const { enrichCJSError } = require('internal/modules/esm/translators'); - enrichCJSError(err, content, filename); } throw err; } diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index 596fae7b279a75..8c290510346742 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -173,6 +173,10 @@ function executeUserEntryPoint(main = process.argv[1]) { cjsRetryAsESMCache.delete(resolvedMain); const { shouldRetryAsESM } = require('internal/modules/helpers'); retryAsESM = shouldRetryAsESM(error.message, source); + if (!retryAsESM) { + const { enrichCJSError } = require('internal/modules/esm/translators'); + enrichCJSError(error, source, resolvedMain); + } } if (!retryAsESM) { throw error; diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 02800e07013418..2067040114a86b 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -8,7 +8,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { describe('string input', { concurrency: true }, () => { it('permits ESM syntax in --eval input without requiring --input-type=module', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'import { version } from "node:process"; console.log(version);', @@ -24,7 +23,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits ESM syntax in STDIN input without requiring --input-type=module', async () => { const child = spawn(process.execPath, [ - '--no-warnings', '--experimental-detect-module', ]); child.stdin.end('console.log(typeof import.meta.resolve)'); @@ -34,7 +32,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should be overridden by --input-type', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--input-type=commonjs', '--eval', @@ -49,7 +46,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should be overridden by --experimental-default-type', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--experimental-default-type=commonjs', '--eval', @@ -64,7 +60,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('does not trigger detection via source code `eval()`', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'eval("import \'nonexistent\';");', @@ -106,7 +101,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -148,7 +142,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -163,7 +156,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('should not hint wrong format in resolve hook', async () => { let writeSync; const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--no-warnings', '--loader', @@ -202,7 +194,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -232,7 +223,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { ]) { it(testName, async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', entryPath, ]); @@ -249,7 +239,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { describe('syntax that errors in CommonJS but works in ESM', { concurrency: true }, () => { it('permits top-level `await`', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'await Promise.resolve(); console.log("executed");', @@ -263,7 +252,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits top-level `await` above import/export syntax', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'await Promise.resolve(); import "node:os"; console.log("executed");', @@ -277,7 +265,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('still throws on `await` in an ordinary sync function', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'function fn() { await Promise.resolve(); } fn();', @@ -291,7 +278,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('throws on undefined `require` when top-level `await` triggers ESM parsing', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'const fs = require("node:fs"); await Promise.resolve();', @@ -305,7 +291,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits declaration of CommonJS module variables', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', fixtures.path('es-modules/package-without-type/commonjs-wrapper-variables.js'), ]); @@ -318,7 +303,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('permits declaration of CommonJS module variables above import/export', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'const module = 3; import "node:os"; console.log("executed");', @@ -332,7 +316,6 @@ describe('--experimental-detect-module', { concurrency: true }, () => { it('still throws on double `const` declaration not at the top level', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ - '--no-warnings', '--experimental-detect-module', '--eval', 'function fn() { const require = 1; const require = 2; } fn();', From f06b12ee92b2005a4a4b0dd0eecad51546110abc Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Fri, 15 Mar 2024 21:04:54 -0700 Subject: [PATCH 04/16] add TypeScript fixture to benchmark --- benchmark/misc/startup-core.js | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/misc/startup-core.js b/benchmark/misc/startup-core.js index 07c0701d128899..d16a989fd2ce75 100644 --- a/benchmark/misc/startup-core.js +++ b/benchmark/misc/startup-core.js @@ -9,6 +9,7 @@ const bench = common.createBenchmark(main, { script: [ 'benchmark/fixtures/require-builtins', 'test/fixtures/semicolon', + 'test/fixtures/snapshot/typescript', ], mode: ['process', 'worker'], count: [30], From c82a15a40a0116a3f124285d6036fe984e6a62ce Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sat, 16 Mar 2024 17:34:21 -0700 Subject: [PATCH 05/16] put try/catch inside our flag, to postpone for now having to deal with the exception handling for unflagged usage --- lib/internal/modules/run_main.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index 8c290510346742..888f22cfe0ec5c 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -164,10 +164,10 @@ function executeUserEntryPoint(main = process.argv[1]) { if (!useESMLoader) { // Module._load is the monkey-patchable CJS module loader. const { Module } = require('internal/modules/cjs/loader'); - try { - Module._load(main, null, true); - } catch (error) { - if (getOptionValue('--experimental-detect-module')) { + if (getOptionValue('--experimental-detect-module')) { + try { + Module._load(main, null, true); + } catch (error) { const { cjsRetryAsESMCache } = require('internal/modules/cjs/loader'); const source = cjsRetryAsESMCache.get(resolvedMain); cjsRetryAsESMCache.delete(resolvedMain); @@ -176,11 +176,11 @@ function executeUserEntryPoint(main = process.argv[1]) { if (!retryAsESM) { const { enrichCJSError } = require('internal/modules/esm/translators'); enrichCJSError(error, source, resolvedMain); + throw error; } } - if (!retryAsESM) { - throw error; - } + } else { + Module._load(main, null, true); } } From 4b6a52a78a6c2fddc2836b7b992ceac1c90d89aa Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 09:56:26 -0700 Subject: [PATCH 06/16] Apply suggestions from code review Co-authored-by: Joyee Cheung --- src/node_contextify.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 9f4f64ffd35ac0..4e940769501f4c 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1509,7 +1509,8 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, Local code) { Isolate* isolate = env->isolate(); - Local script_id = FIXED_ONE_BYTE_STRING(isolate, "throwaway"); + Local script_id = FIXED_ONE_BYTE_STRING(isolate, + "[retry_as_esm_check]"); Local id_symbol = Symbol::New(isolate, script_id); Local host_defined_options = @@ -1527,10 +1528,10 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, // module variables, or a top-level `await`. code = String::Concat( isolate, - String::NewFromUtf8(isolate, "(async function() {").ToLocalChecked(), + FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), code); code = String::Concat( - isolate, code, String::NewFromUtf8(isolate, "})();").ToLocalChecked()); + isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();")); ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( isolate, code, script_id, 0, 0, host_defined_options, nullptr); From 65d690b5ef0ceb854569220de5bc8af9da72e83a Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 16:27:11 -0700 Subject: [PATCH 07/16] Remove cjsRetryAsESMCache --- lib/internal/modules/cjs/loader.js | 10 +--------- lib/internal/modules/run_main.js | 6 +++--- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index c7b0691f5e5941..7aecf53789745c 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -70,18 +70,10 @@ const cjsSourceCache = new SafeWeakMap(); */ const cjsExportsCache = new SafeWeakMap(); -/** - * Map of CJS modules where the initial attempt to parse as CommonJS failed; - * we want to retry as ESM but avoid reading the module from disk again. - * @type {SafeMap} filename: contents - */ -const cjsRetryAsESMCache = new SafeMap(); - // Set first due to cycle with ESM loader functions. module.exports = { cjsExportsCache, cjsSourceCache, - cjsRetryAsESMCache, initializeCJS, Module, wrapSafe, @@ -1347,7 +1339,7 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { if (process.mainModule === cjsModuleInstance) { if (getOptionValue('--experimental-detect-module')) { // For the main entry point, cache the source to potentially retry as ESM. - cjsRetryAsESMCache.set(filename, content); + process.mainModule._retryAsESMSource = content; } else { // We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're // retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`. diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index 888f22cfe0ec5c..bb77d60388ab47 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -168,9 +168,9 @@ function executeUserEntryPoint(main = process.argv[1]) { try { Module._load(main, null, true); } catch (error) { - const { cjsRetryAsESMCache } = require('internal/modules/cjs/loader'); - const source = cjsRetryAsESMCache.get(resolvedMain); - cjsRetryAsESMCache.delete(resolvedMain); + const source = process.mainModule._retryAsESMSource; + // In case the entry point is a large file, such as a bundle, release it from memory since we're done with it. + process.mainModule._retryAsESMSource = undefined; // No need to `delete` since we don't care if the key exists. const { shouldRetryAsESM } = require('internal/modules/helpers'); retryAsESM = shouldRetryAsESM(error.message, source); if (!retryAsESM) { From 9ad9acea0a4f7cef14690092dbf07743f7f6fbc0 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 16:31:13 -0700 Subject: [PATCH 08/16] ScriptCompiler::kNoCompileOptions --- src/node_contextify.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 4e940769501f4c..466a4d3eb99c49 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1517,7 +1517,6 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, GetHostDefinedOptions(isolate, id_symbol); ScriptCompiler::Source source = GetCommonJSSourceInstance( isolate, code, script_id, 0, 0, host_defined_options, nullptr); - ScriptCompiler::CompileOptions options = GetCompileOptions(source); TryCatchScope try_catch(env); ShouldNotAbortOnUncaughtScope no_abort_scope(env); @@ -1545,7 +1544,7 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, params.data(), 0, nullptr, - options, + ScriptCompiler::kNoCompileOptions, v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); if (!try_catch.HasTerminated()) { From 4ad99585282f3a4f45f0492195c7ea9baf04fac0 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 17:14:38 -0700 Subject: [PATCH 09/16] export error messages from c++ as constants --- lib/internal/modules/helpers.js | 32 +++++++++++++++--------------- src/node_contextify.cc | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/lib/internal/modules/helpers.js b/lib/internal/modules/helpers.js index 690130297b4ce7..020a4185215e89 100644 --- a/lib/internal/modules/helpers.js +++ b/lib/internal/modules/helpers.js @@ -19,6 +19,15 @@ const { } = require('internal/errors').codes; const { BuiltinModule } = require('internal/bootstrap/realm'); +const { + shouldRetryAsESM: contextifyShouldRetryAsESM, + constants: { + syntaxDetectionErrors: { + esmSyntaxErrorMessages, + throwsOnlyInCommonJSErrorMessages, + }, + }, +} = internalBinding('contextify'); const { validateString } = require('internal/validators'); const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched. const internalFS = require('internal/fs/utils'); @@ -320,31 +329,22 @@ function normalizeReferrerURL(referrerName) { } -const esmSyntaxErrorMessages = new SafeSet([ - 'Cannot use import statement outside a module', // `import` statements - "Unexpected token 'export'", // `export` statements - "Cannot use 'import.meta' outside a module", // `import.meta` references -]); -const throwsOnlyInCommonJSErrorMessages = new SafeSet([ - "Identifier 'module' has already been declared", - "Identifier 'exports' has already been declared", - "Identifier 'require' has already been declared", - "Identifier '__filename' has already been declared", - "Identifier '__dirname' has already been declared", - 'await is only valid in async functions and the top level bodies of modules', // Top-level `await` -]); +let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM +let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM /** * After an attempt to parse a module as CommonJS throws an error, should we try again as ESM? * @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS * @param {string} source Module contents */ function shouldRetryAsESM(errorMessage, source) { - if (esmSyntaxErrorMessages.has(errorMessage)) { + esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages); + if (esmSyntaxErrorMessagesSet.has(errorMessage)) { return true; } - if (throwsOnlyInCommonJSErrorMessages.has(errorMessage)) { - return /** @type {boolean} */(internalBinding('contextify').shouldRetryAsESM(source)); + throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages); + if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) { + return /** @type {boolean} */(contextifyShouldRetryAsESM(source)); } return false; diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 466a4d3eb99c49..4a54a73aa16e12 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1728,6 +1728,7 @@ static void CreatePerContextProperties(Local target, Local constants = Object::New(env->isolate()); Local measure_memory = Object::New(env->isolate()); Local memory_execution = Object::New(env->isolate()); + Local syntax_detection_errors = Object::New(env->isolate()); { Local memory_mode = Object::New(env->isolate()); @@ -1748,6 +1749,40 @@ static void CreatePerContextProperties(Local target, READONLY_PROPERTY(constants, "measureMemory", measure_memory); + { + Local esm_syntax_error_messages_array = + Array::New(env->isolate(), esm_syntax_error_messages.size()); + for (size_t i = 0; i < esm_syntax_error_messages.size(); i++) { + const char* message = esm_syntax_error_messages[i].data(); + (void)esm_syntax_error_messages_array->Set( + context, + static_cast(i), + String::NewFromUtf8(env->isolate(), message) + .ToLocalChecked()); + } + READONLY_PROPERTY(syntax_detection_errors, "esmSyntaxErrorMessages", + esm_syntax_error_messages_array); + } + + { + Local throws_only_in_cjs_error_messages_array = + Array::New(env->isolate(), throws_only_in_cjs_error_messages.size()); + for (size_t i = 0; i < throws_only_in_cjs_error_messages.size(); i++) { + const char* message = throws_only_in_cjs_error_messages[i].data(); + (void)throws_only_in_cjs_error_messages_array->Set( + context, + static_cast(i), + String::NewFromUtf8(env->isolate(), message) + .ToLocalChecked()); + } + READONLY_PROPERTY(syntax_detection_errors, + "throwsOnlyInCommonJSErrorMessages", + throws_only_in_cjs_error_messages_array); + } + + READONLY_PROPERTY(constants, "syntaxDetectionErrors", + syntax_detection_errors); + target->Set(context, env->constants_string(), constants).Check(); } From 43eb1b5f6f1171dfe23ceb0647b7021919e2b4dd Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 17:16:24 -0700 Subject: [PATCH 10/16] USE() --- src/node_contextify.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 4a54a73aa16e12..5f504e85db9f28 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1537,7 +1537,7 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, Local context = env->context(); std::vector> params = GetCJSParameters(env->isolate_data()); - std::ignore = ScriptCompiler::CompileFunction( + USE(ScriptCompiler::CompileFunction( context, &wrapped_source, params.size(), @@ -1545,7 +1545,7 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, 0, nullptr, ScriptCompiler::kNoCompileOptions, - v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); + v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason)); if (!try_catch.HasTerminated()) { if (try_catch.HasCaught()) { @@ -1754,11 +1754,11 @@ static void CreatePerContextProperties(Local target, Array::New(env->isolate(), esm_syntax_error_messages.size()); for (size_t i = 0; i < esm_syntax_error_messages.size(); i++) { const char* message = esm_syntax_error_messages[i].data(); - (void)esm_syntax_error_messages_array->Set( + USE(esm_syntax_error_messages_array->Set( context, static_cast(i), String::NewFromUtf8(env->isolate(), message) - .ToLocalChecked()); + .ToLocalChecked())); } READONLY_PROPERTY(syntax_detection_errors, "esmSyntaxErrorMessages", esm_syntax_error_messages_array); @@ -1769,11 +1769,11 @@ static void CreatePerContextProperties(Local target, Array::New(env->isolate(), throws_only_in_cjs_error_messages.size()); for (size_t i = 0; i < throws_only_in_cjs_error_messages.size(); i++) { const char* message = throws_only_in_cjs_error_messages[i].data(); - (void)throws_only_in_cjs_error_messages_array->Set( + USE(throws_only_in_cjs_error_messages_array->Set( context, static_cast(i), String::NewFromUtf8(env->isolate(), message) - .ToLocalChecked()); + .ToLocalChecked())); } READONLY_PROPERTY(syntax_detection_errors, "throwsOnlyInCommonJSErrorMessages", From 51268ec3975ffe0df7f7165c6c72d0fbac8bfac6 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 17 Mar 2024 19:47:33 -0700 Subject: [PATCH 11/16] format --- src/node_contextify.cc | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 5f504e85db9f28..471e0bef9538e0 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1509,8 +1509,8 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, Local code) { Isolate* isolate = env->isolate(); - Local script_id = FIXED_ONE_BYTE_STRING(isolate, - "[retry_as_esm_check]"); + Local script_id = + FIXED_ONE_BYTE_STRING(isolate, "[retry_as_esm_check]"); Local id_symbol = Symbol::New(isolate, script_id); Local host_defined_options = @@ -1526,11 +1526,8 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, // module was caused by either a top-level declaration of one of the CommonJS // module variables, or a top-level `await`. code = String::Concat( - isolate, - FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), - code); - code = String::Concat( - isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();")); + isolate, FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), code); + code = String::Concat(isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();")); ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( isolate, code, script_id, 0, 0, host_defined_options, nullptr); @@ -1757,10 +1754,10 @@ static void CreatePerContextProperties(Local target, USE(esm_syntax_error_messages_array->Set( context, static_cast(i), - String::NewFromUtf8(env->isolate(), message) - .ToLocalChecked())); + String::NewFromUtf8(env->isolate(), message).ToLocalChecked())); } - READONLY_PROPERTY(syntax_detection_errors, "esmSyntaxErrorMessages", + READONLY_PROPERTY(syntax_detection_errors, + "esmSyntaxErrorMessages", esm_syntax_error_messages_array); } @@ -1772,16 +1769,15 @@ static void CreatePerContextProperties(Local target, USE(throws_only_in_cjs_error_messages_array->Set( context, static_cast(i), - String::NewFromUtf8(env->isolate(), message) - .ToLocalChecked())); + String::NewFromUtf8(env->isolate(), message).ToLocalChecked())); } READONLY_PROPERTY(syntax_detection_errors, "throwsOnlyInCommonJSErrorMessages", throws_only_in_cjs_error_messages_array); } - READONLY_PROPERTY(constants, "syntaxDetectionErrors", - syntax_detection_errors); + READONLY_PROPERTY( + constants, "syntaxDetectionErrors", syntax_detection_errors); target->Set(context, env->constants_string(), constants).Check(); } From 1c6496c15b9c84f161365e701c6022718d05c515 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Mon, 18 Mar 2024 18:52:06 -0700 Subject: [PATCH 12/16] review notes --- lib/internal/modules/cjs/loader.js | 5 ++++- lib/internal/modules/helpers.js | 3 +++ lib/internal/modules/run_main.js | 13 ++++++++----- src/node_contextify.cc | 15 ++++++++++----- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index 7aecf53789745c..8da6844bf30385 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -70,11 +70,14 @@ const cjsSourceCache = new SafeWeakMap(); */ const cjsExportsCache = new SafeWeakMap(); +const kEntryPointSource = Symbol('kEntryPointSource'); + // Set first due to cycle with ESM loader functions. module.exports = { cjsExportsCache, cjsSourceCache, initializeCJS, + kEntryPointSource, Module, wrapSafe, }; @@ -1339,7 +1342,7 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { if (process.mainModule === cjsModuleInstance) { if (getOptionValue('--experimental-detect-module')) { // For the main entry point, cache the source to potentially retry as ESM. - process.mainModule._retryAsESMSource = content; + process.mainModule[kEntryPointSource] = content; } else { // We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're // retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`. diff --git a/lib/internal/modules/helpers.js b/lib/internal/modules/helpers.js index 020a4185215e89..940f83e49af86f 100644 --- a/lib/internal/modules/helpers.js +++ b/lib/internal/modules/helpers.js @@ -333,6 +333,9 @@ let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM /** * After an attempt to parse a module as CommonJS throws an error, should we try again as ESM? + * We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse + * throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical + * redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM. * @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS * @param {string} source Module contents */ diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index bb77d60388ab47..fba440ad3b1830 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -152,14 +152,16 @@ function runEntryPointWithESMLoader(callback) { * by `require('module')`) even when the entry point is ESM. * This monkey-patchable code is bypassed under `--experimental-default-type=module`. * Because of backwards compatibility, this function is exposed publicly via `import { runMain } from 'node:module'`. + * When `--experimental-detect-module` is passed, this function will attempt to run ambiguous (no explicit extension, no + * `package.json` type field) entry points as CommonJS first; under certain conditions, it will retry running as ESM. * @param {string} main - First positional CLI argument, such as `'entry.js'` from `node entry.js` */ function executeUserEntryPoint(main = process.argv[1]) { const resolvedMain = resolveMainPath(main); const useESMLoader = shouldUseESMLoader(resolvedMain); - // We might retry as ESM if the CommonJS loader throws because the file is ESM, so - // try the CommonJS loader first unless we know it's supposed to be parsed as ESM. + // Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first + // try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM. let retryAsESM = false; if (!useESMLoader) { // Module._load is the monkey-patchable CJS module loader. @@ -168,9 +170,10 @@ function executeUserEntryPoint(main = process.argv[1]) { try { Module._load(main, null, true); } catch (error) { - const source = process.mainModule._retryAsESMSource; + const { kEntryPointSource } = require('internal/modules/cjs/loader'); + const source = process.mainModule[kEntryPointSource]; // In case the entry point is a large file, such as a bundle, release it from memory since we're done with it. - process.mainModule._retryAsESMSource = undefined; // No need to `delete` since we don't care if the key exists. + process.mainModule[kEntryPointSource] = undefined; const { shouldRetryAsESM } = require('internal/modules/helpers'); retryAsESM = shouldRetryAsESM(error.message, source); if (!retryAsESM) { @@ -179,7 +182,7 @@ function executeUserEntryPoint(main = process.argv[1]) { throw error; } } - } else { + } else { // `--experimental-detect-module` is not passed Module._load(main, null, true); } } diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 471e0bef9538e0..792f698661dbe6 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1442,9 +1442,8 @@ void ContextifyContext::ContainsModuleSyntax( } // Argument 1: source code - Local code; CHECK(args[0]->IsString()); - code = args[0].As(); + auto code = args[0].As(); // Argument 2: filename; if undefined, use empty string Local filename = String::Empty(isolate); @@ -1492,7 +1491,7 @@ void ContextifyContext::ShouldRetryAsESM( const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); - CHECK_EQ(args.Length(), 1); + CHECK_EQ(args.Length(), 1); // code // Argument 1: source code Local code; @@ -1515,8 +1514,14 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, Local host_defined_options = GetHostDefinedOptions(isolate, id_symbol); - ScriptCompiler::Source source = GetCommonJSSourceInstance( - isolate, code, script_id, 0, 0, host_defined_options, nullptr); + ScriptCompiler::Source source = + GetCommonJSSourceInstance(isolate, + code, + script_id, // filename + 0, // line offset + 0, // column offset + host_defined_options, + nullptr); // cached_data TryCatchScope try_catch(env); ShouldNotAbortOnUncaughtScope no_abort_scope(env); From 1685245c85b64d888a370364ae3a79cd6ca64ec6 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 19 Mar 2024 09:12:43 -0700 Subject: [PATCH 13/16] Use string approach --- lib/internal/modules/cjs/loader.js | 6 ++---- lib/internal/modules/run_main.js | 12 ++++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index 8da6844bf30385..22e77d8bff70c0 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -70,14 +70,12 @@ const cjsSourceCache = new SafeWeakMap(); */ const cjsExportsCache = new SafeWeakMap(); -const kEntryPointSource = Symbol('kEntryPointSource'); - // Set first due to cycle with ESM loader functions. module.exports = { cjsExportsCache, cjsSourceCache, initializeCJS, - kEntryPointSource, + entryPointSource: undefined, // Set below. Module, wrapSafe, }; @@ -1342,7 +1340,7 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { if (process.mainModule === cjsModuleInstance) { if (getOptionValue('--experimental-detect-module')) { // For the main entry point, cache the source to potentially retry as ESM. - process.mainModule[kEntryPointSource] = content; + module.exports.entryPointSource = content; } else { // We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're // retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`. diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index fba440ad3b1830..332f8a1a8fed1b 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -164,18 +164,18 @@ function executeUserEntryPoint(main = process.argv[1]) { // try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM. let retryAsESM = false; if (!useESMLoader) { - // Module._load is the monkey-patchable CJS module loader. - const { Module } = require('internal/modules/cjs/loader'); + const cjsLoader = require('internal/modules/cjs/loader'); + const { Module } = cjsLoader; if (getOptionValue('--experimental-detect-module')) { try { + // Module._load is the monkey-patchable CJS module loader. Module._load(main, null, true); } catch (error) { - const { kEntryPointSource } = require('internal/modules/cjs/loader'); - const source = process.mainModule[kEntryPointSource]; - // In case the entry point is a large file, such as a bundle, release it from memory since we're done with it. - process.mainModule[kEntryPointSource] = undefined; + const source = cjsLoader.entryPointSource; const { shouldRetryAsESM } = require('internal/modules/helpers'); retryAsESM = shouldRetryAsESM(error.message, source); + // In case the entry point is a large file, such as a bundle, release it from memory since we're done with it. + cjsLoader.entryPointSource = undefined; if (!retryAsESM) { const { enrichCJSError } = require('internal/modules/esm/translators'); enrichCJSError(error, source, resolvedMain); From 8d7e077b0e367aafe2ca30a31666b02b20e3388c Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 19 Mar 2024 09:52:39 -0700 Subject: [PATCH 14/16] simplify array conversion --- src/node_contextify.cc | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 792f698661dbe6..69cf2d60f5de26 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1406,7 +1406,7 @@ Local ContextifyContext::CompileFunctionAndCacheResult( // While top-level `await` is not permitted in CommonJS, it returns the same // error message as when `await` is used in a sync function, so we don't use it // as a disambiguation. -constexpr std::array esm_syntax_error_messages = { +static std::vector esm_syntax_error_messages = { "Cannot use import statement outside a module", // `import` statements "Unexpected token 'export'", // `export` statements "Cannot use 'import.meta' outside a module"}; // `import.meta` references @@ -1421,7 +1421,7 @@ constexpr std::array esm_syntax_error_messages = { // - Top-level `await`: if the user writes `await` at the top level of a // CommonJS module, it will throw a syntax error; but the same code is valid // in ESM. -constexpr std::array throws_only_in_cjs_error_messages = { +static std::vector throws_only_in_cjs_error_messages = { "Identifier 'module' has already been declared", "Identifier 'exports' has already been declared", "Identifier 'require' has already been declared", @@ -1752,30 +1752,16 @@ static void CreatePerContextProperties(Local target, READONLY_PROPERTY(constants, "measureMemory", measure_memory); { - Local esm_syntax_error_messages_array = - Array::New(env->isolate(), esm_syntax_error_messages.size()); - for (size_t i = 0; i < esm_syntax_error_messages.size(); i++) { - const char* message = esm_syntax_error_messages[i].data(); - USE(esm_syntax_error_messages_array->Set( - context, - static_cast(i), - String::NewFromUtf8(env->isolate(), message).ToLocalChecked())); - } + Local esm_syntax_error_messages_array = + ToV8Value(context, esm_syntax_error_messages).ToLocalChecked(); READONLY_PROPERTY(syntax_detection_errors, "esmSyntaxErrorMessages", esm_syntax_error_messages_array); } { - Local throws_only_in_cjs_error_messages_array = - Array::New(env->isolate(), throws_only_in_cjs_error_messages.size()); - for (size_t i = 0; i < throws_only_in_cjs_error_messages.size(); i++) { - const char* message = throws_only_in_cjs_error_messages[i].data(); - USE(throws_only_in_cjs_error_messages_array->Set( - context, - static_cast(i), - String::NewFromUtf8(env->isolate(), message).ToLocalChecked())); - } + Local throws_only_in_cjs_error_messages_array = + ToV8Value(context, throws_only_in_cjs_error_messages).ToLocalChecked(); READONLY_PROPERTY(syntax_detection_errors, "throwsOnlyInCommonJSErrorMessages", throws_only_in_cjs_error_messages_array); From d69398a7a79ccdc334b655255a44fc9c4f92f649 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 19 Mar 2024 11:01:09 -0700 Subject: [PATCH 15/16] lint --- src/node_contextify.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 69cf2d60f5de26..1bc99765ba9f6e 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1521,7 +1521,7 @@ bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env, 0, // line offset 0, // column offset host_defined_options, - nullptr); // cached_data + nullptr); // cached_data TryCatchScope try_catch(env); ShouldNotAbortOnUncaughtScope no_abort_scope(env); From fd2e3305873f2a8412526e7511d99030aea06a84 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Tue, 19 Mar 2024 12:31:17 -0700 Subject: [PATCH 16/16] nit --- lib/internal/modules/run_main.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/internal/modules/run_main.js b/lib/internal/modules/run_main.js index 332f8a1a8fed1b..c06c8cc72cc9f4 100644 --- a/lib/internal/modules/run_main.js +++ b/lib/internal/modules/run_main.js @@ -174,7 +174,8 @@ function executeUserEntryPoint(main = process.argv[1]) { const source = cjsLoader.entryPointSource; const { shouldRetryAsESM } = require('internal/modules/helpers'); retryAsESM = shouldRetryAsESM(error.message, source); - // In case the entry point is a large file, such as a bundle, release it from memory since we're done with it. + // In case the entry point is a large file, such as a bundle, + // ensure no further references can prevent it being garbage-collected. cjsLoader.entryPointSource = undefined; if (!retryAsESM) { const { enrichCJSError } = require('internal/modules/esm/translators');