From 264d94b882af459f75e0f16682c9a1fc4fb9790d Mon Sep 17 00:00:00 2001 From: Dan Lapid Date: Thu, 17 Oct 2024 15:58:58 +0000 Subject: [PATCH] Instantiate Emscripten Runtime for python workers earlier. Move ownership of metrics and limitEnforcer to the api type. Currently ownership is shared even though the Isolate class encapsulates the api class. Moving complete ownership to the underlying api class allows the isolate class to be constructed in a different scope to the api class. This is useful for preinitialization of the api class before a request has come in. Add updateConfiguration function to jsg Isolates This can be used to update the given configuration at runtime. Note that while some jsg structs are lazily using the configuration, others can use it at construction and will have the original configuration value. --- src/pyodide/BUILD.bazel | 20 +- src/pyodide/emscripten_setup.capnp | 13 ++ src/pyodide/internal/pool/emscriptenSetup.ts | 10 +- src/pyodide/internal/python.ts | 39 +--- src/pyodide/types/emscripten.d.ts | 4 + src/pyodide/types/setup-emscripten.d.ts | 5 + src/workerd/api/BUILD.bazel | 4 + src/workerd/api/modules.h | 2 +- src/workerd/api/pyodide/pyodide.c++ | 20 ++ src/workerd/api/pyodide/pyodide.h | 25 ++- src/workerd/api/pyodide/setup-emscripten.c++ | 105 ++++++++++ src/workerd/api/pyodide/setup-emscripten.h | 15 ++ src/workerd/io/worker.c++ | 63 +++--- src/workerd/io/worker.h | 8 + src/workerd/jsg/promise.h | 7 +- src/workerd/jsg/resource.h | 5 + src/workerd/jsg/setup.h | 5 + src/workerd/jsg/type-wrapper.h | 15 ++ src/workerd/jsg/value.h | 6 +- src/workerd/server/workerd-api.c++ | 199 +++++++++++-------- src/workerd/server/workerd-api.h | 5 +- 21 files changed, 402 insertions(+), 173 deletions(-) create mode 100644 src/pyodide/emscripten_setup.capnp create mode 100644 src/pyodide/types/setup-emscripten.d.ts create mode 100644 src/workerd/api/pyodide/setup-emscripten.c++ create mode 100644 src/workerd/api/pyodide/setup-emscripten.h diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 0eabada074b..dd1e1055427 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -197,8 +197,6 @@ INTERNAL_MODULES = glob( [ "internal/*.ts", "internal/topLevelEntropy/*.ts", - # The pool directory is only needed by typescript, it shouldn't be used at runtime. - "internal/pool/*.ts", "types/*.ts", "types/*/*.ts", ], @@ -211,27 +209,27 @@ INTERNAL_DATA_MODULES = glob([ "internal/*.py", "internal/patches/*.py", "internal/topLevelEntropy/*.py", -]) +]) + [ + "generated/python_stdlib.zip", + "generated/pyodide.asm.wasm", + "generated/emscriptenSetup.js", +] wd_ts_bundle( name = "pyodide", eslintrc_json = "eslint.config.mjs", import_name = "pyodide", - internal_data_modules = ["generated/python_stdlib.zip"] + INTERNAL_DATA_MODULES, + internal_data_modules = INTERNAL_DATA_MODULES, internal_json_modules = [ "generated/pyodide-lock.json", "generated/pyodide-bucket.json", ], - internal_modules = [ - "generated/emscriptenSetup.js", - ] + INTERNAL_MODULES, - internal_wasm_modules = ["generated/pyodide.asm.wasm"], + internal_modules = INTERNAL_MODULES, js_deps = [ "generated/emscriptenSetup", - "pyodide.asm.js@rule", "pyodide.asm.wasm@rule", - "pyodide-lock.js@rule", "python_stdlib.zip@rule", + "pyodide-lock.js@rule", "pyodide-bucket.json@rule", ], lint = False, @@ -264,7 +262,7 @@ genrule( for m in INTERNAL_DATA_MODULES if m.endswith(".py") ] + [ - ":pyodide-internal_generated_emscriptenSetup", + ":pyodide-internal_generated_emscriptenSetup.js", ":pyodide-internal_generated_pyodide.asm.wasm", ":pyodide-internal_generated_python_stdlib.zip", ":pyodide-internal_generated_pyodide-lock.json", diff --git a/src/pyodide/emscripten_setup.capnp b/src/pyodide/emscripten_setup.capnp new file mode 100644 index 00000000000..a47d99256c6 --- /dev/null +++ b/src/pyodide/emscripten_setup.capnp @@ -0,0 +1,13 @@ +@0xc00ad00cc650fb45; + +struct EmscriptenSetup { + code @0 :Text; + pyodideAsmWasm @1 :Data; + pythonStdlibZip @2 :Data; +} + +const emscriptenSetup :EmscriptenSetup = ( + code = embed "emscriptenSetup.js", + pyodideAsmWasm = embed "pyodide.asm.wasm", + pythonStdlibZip = embed "python_stdlib.zip", +); diff --git a/src/pyodide/internal/pool/emscriptenSetup.ts b/src/pyodide/internal/pool/emscriptenSetup.ts index b9b37c0ee99..d72946898c6 100644 --- a/src/pyodide/internal/pool/emscriptenSetup.ts +++ b/src/pyodide/internal/pool/emscriptenSetup.ts @@ -13,7 +13,7 @@ import { reportError } from 'pyodide-internal:util'; */ import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; -export { +import { setUnsafeEval, setGetRandomValues, } from 'pyodide-internal:pool/builtin_wrappers'; @@ -56,7 +56,7 @@ function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook { * This is a simplified version of the `prepareFileSystem` function here: * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts */ -function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook { +function getPrepareFileSystem(pythonStdlib: ArrayBuffer): PreRunHook { return function prepareFileSystem(Module: Module): void { try { const pymajor = Module._py_version_major(); @@ -118,7 +118,7 @@ function getInstantiateWasm( */ function getEmscriptenSettings( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): EmscriptenSettings { const config: PyodideConfig = { @@ -193,7 +193,7 @@ function* featureDetectionMonkeyPatchesContextManager() { */ export async function instantiateEmscriptenModule( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, wasmModule: WebAssembly.Module ): Promise { const emscriptenSettings = getEmscriptenSettings( @@ -210,6 +210,8 @@ export async function instantiateEmscriptenModule( // Wait until we've executed all the preRun hooks before proceeding const emscriptenModule = await emscriptenSettings.readyPromise; + emscriptenModule.setUnsafeEval = setUnsafeEval; + emscriptenModule.setGetRandomValues = setGetRandomValues; return emscriptenModule; } catch (e) { console.warn('Error in instantiateEmscriptenModule'); diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index 3fd959690a9..67018b87aec 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -18,39 +18,11 @@ import { entropyBeforeTopLevel, getRandomValues, } from 'pyodide-internal:topLevelEntropy/lib'; +import { default as SetupEmscripten } from 'internal:setup-emscripten'; + import { default as UnsafeEval } from 'internal:unsafe-eval'; import { simpleRunPython } from 'pyodide-internal:util'; -/** - * This file is a simplified version of the Pyodide loader: - * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts - * - * In particular, it drops the package lock, which disables - * `pyodide.loadPackage`. In trade we add memory snapshots here. - */ - -/** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker - */ -import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; - -/** - * The Python and Pyodide stdlib zipped together. The zip format is convenient - * because Python has a "ziploader" that allows one to import directly from a - * zip file. - * - * The ziploader solves bootstrapping problems around unpacking: Python comes - * with a bunch of C libs to unpack various archive formats, but they need stuff - * in this zip file to initialize their runtime state. - */ -import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip'; -import { - instantiateEmscriptenModule, - setUnsafeEval, - setGetRandomValues, -} from 'pyodide-internal:generated/emscriptenSetup'; - /** * After running `instantiateEmscriptenModule` but before calling into any C * APIs, we call this function. If `MEMORY` is defined, then we will have passed @@ -90,14 +62,15 @@ export async function loadPyodide( indexURL: string ): Promise { const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(isWorkerd, pythonStdlib, pyodideWasmModule) + SetupEmscripten.getModule() ); + Module.API.config.jsglobals = globalThis; if (isWorkerd) { Module.API.config.indexURL = indexURL; Module.API.config.resolveLockFilePromise!(lockfile); } - setUnsafeEval(UnsafeEval); - setGetRandomValues(getRandomValues); + Module.setUnsafeEval(UnsafeEval); + Module.setGetRandomValues(getRandomValues); await enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index e465da6c377..5b2ccba9a6f 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -68,4 +68,8 @@ interface Module { addRunDependency(x: string): void; removeRunDependency(x: string): void; noInitialRun: boolean; + setUnsafeEval(mod: typeof import('internal:unsafe-eval').default): void; + setGetRandomValues( + func: typeof import('pyodide-internal:topLevelEntropy/lib').getRandomValues + ): void; } diff --git a/src/pyodide/types/setup-emscripten.d.ts b/src/pyodide/types/setup-emscripten.d.ts new file mode 100644 index 00000000000..191e2ed3c5d --- /dev/null +++ b/src/pyodide/types/setup-emscripten.d.ts @@ -0,0 +1,5 @@ +declare namespace SetupEmscripten { + const getModule: () => Module; +} + +export default SetupEmscripten; diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index 719018cabbf..930ab74e9e3 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -14,6 +14,7 @@ filegroup( "html-rewriter.c++", "hyperdrive.c++", "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", "memory-cache.c++", "r2*.c++", "rtti.c++", @@ -37,6 +38,7 @@ filegroup( "hyperdrive.h", "memory-cache.h", "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "modules.h", "r2*.h", "rtti.h", @@ -126,9 +128,11 @@ wd_cc_library( name = "pyodide", srcs = [ "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", ], hdrs = [ "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "//src/pyodide:generated/pyodide_extra.capnp.h", ], implementation_deps = ["//src/workerd/util:string-buffer"], diff --git a/src/workerd/api/modules.h b/src/workerd/api/modules.h index 4587c403266..d45dfaf059a 100644 --- a/src/workerd/api/modules.h +++ b/src/workerd/api/modules.h @@ -49,7 +49,7 @@ void registerBuiltinModules(jsg::modules::ModuleRegistry::Builder& builder, auto if (featureFlags.getPythonWorkers()) { builder.add(pyodide::getExternalPyodideModuleBundle(featureFlags)); - builder.add(pyodide::getInternalPyodideModuleBundle(featureFlags)); + builder.add(pyodide::getInternalPyodideModuleBundle(featureFlags)); } if (featureFlags.getRttiApi()) { diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index dc9e9f1935e..f1791167e7f 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -3,6 +3,8 @@ // https://opensource.org/licenses/Apache-2.0 #include "pyodide.h" +#include +#include #include #include @@ -484,6 +486,24 @@ void DiskCache::put(jsg::Lock& js, kj::String key, kj::Array data) { } } +jsg::JsValue SetupEmscripten::getModule(jsg::Lock& js) { + KJ_IF_SOME(module, emscriptenModule) { + return module.getHandle(js); + } else { + auto& runtime = KJ_ASSERT_NONNULL(workerd::Worker::Api::current().getEmscriptenRuntime()); + js.v8Context()->SetSecurityToken(runtime.contextToken.getHandle(js)); + emscriptenModule = runtime.emscriptenRuntime; + return KJ_ASSERT_NONNULL(emscriptenModule).getHandle(js); + } +} + +void SetupEmscripten::visitForGc(jsg::GcVisitor& visitor) { + // const_cast is ok because the GcVisitor doesn't actually change the underlying value of the object. + KJ_IF_SOME(module, emscriptenModule) { + visitor.visit(const_cast&>(module)); + } +} + bool hasPythonModules(capnp::List::Reader modules) { for (auto module: modules) { if (module.isPythonModule()) { diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index 6dde213bb6b..bf7064c3d18 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -409,6 +409,24 @@ class SimplePythonLimiter: public jsg::Object { } }; +class SetupEmscripten: public jsg::Object { +public: + SetupEmscripten() {}; + SetupEmscripten(jsg::Lock& js, const jsg::Url&) {} + + jsg::JsValue getModule(jsg::Lock& js); + + JSG_RESOURCE_TYPE(SetupEmscripten) { + JSG_METHOD(getModule); + } + +private: + // Reference to the api value of the emscripten module. + // Used for visitForGc when no js is currently running. + kj::Maybe&> emscriptenModule; + void visitForGc(jsg::GcVisitor& visitor); +}; + using Worker = server::config::Worker; jsg::Ref makePyodideMetadataReader( @@ -420,7 +438,7 @@ bool hasPythonModules(capnp::List::Reader module api::pyodide::PackagesTarReader, api::pyodide::PyodideMetadataReader, \ api::pyodide::ArtifactBundler, api::pyodide::DiskCache, \ api::pyodide::DisabledInternalJaeger, api::pyodide::SimplePythonLimiter, \ - api::pyodide::MemorySnapshotResult + api::pyodide::MemorySnapshotResult, api::pyodide::SetupEmscripten template void registerPyodideModules(Registry& registry, auto featureFlags) { @@ -431,8 +449,11 @@ void registerPyodideModules(Registry& registry, auto featureFlags) { } registry.template addBuiltinModule( "pyodide-internal:packages_tar_reader", workerd::jsg::ModuleRegistry::Type::INTERNAL); + registry.template addBuiltinModule( + "internal:setup-emscripten", workerd::jsg::ModuleRegistry::Type::INTERNAL); } +template kj::Own getInternalPyodideModuleBundle(auto featureFlags) { jsg::modules::ModuleBundle::BuiltinBuilder builder( jsg::modules::ModuleBundle::BuiltinBuilder::Type::BUILTIN_ONLY); @@ -440,6 +461,8 @@ kj::Own getInternalPyodideModuleBundle(auto featureF !util::Autogate::isEnabled(util::AutogateKey::PYTHON_EXTERNAL_BUNDLE)) { jsg::modules::ModuleBundle::getBuiltInBundleFromCapnp(builder, PYODIDE_BUNDLE); } + static const auto kSpecifier = "internal:setup-emscripten"_url; + builder.addObject(kSpecifier); return builder.finish(); } diff --git a/src/workerd/api/pyodide/setup-emscripten.c++ b/src/workerd/api/pyodide/setup-emscripten.c++ new file mode 100644 index 00000000000..6801db5ab5a --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.c++ @@ -0,0 +1,105 @@ +#include "setup-emscripten.h" + +#include "workerd/util/autogate.h" + +#include +#include + +#include +namespace workerd::api::pyodide { + +v8::Local loadEmscriptenSetupModule( + jsg::Lock& js, capnp::Data::Reader emsciptenSetupJsReader) { + v8::Local contentStr = jsg::v8Str(js.v8Isolate, emsciptenSetupJsReader.asChars()); + v8::ScriptOrigin origin( + jsg::v8StrIntern(js.v8Isolate, "pyodide-internal:generated/emscriptenSetup"), 0, 0, false, -1, + {}, false, false, true); + v8::ScriptCompiler::Source source(contentStr, origin); + return jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source)); +} + +jsg::JsValue resolvePromise(jsg::Lock& js, jsg::JsValue prom) { + auto promise = KJ_ASSERT_NONNULL(prom.tryCast()); + if (promise.state() == jsg::PromiseState::PENDING) { + js.runMicrotasks(); + } + KJ_ASSERT(promise.state() == jsg::PromiseState::FULFILLED); + return promise.result(); +} + +void instantiateEmscriptenSetupModule(jsg::Lock& js, v8::Local& module) { + jsg::instantiateModule(js, module); + auto evalPromise = KJ_ASSERT_NONNULL( + jsg::JsValue(jsg::check(module->Evaluate(js.v8Context()))).tryCast()); + resolvePromise(js, evalPromise); + KJ_ASSERT(module->GetStatus() == v8::Module::kEvaluated); +} + +v8::Local getInstantiateEmscriptenModule( + jsg::Lock& js, v8::Local& module) { + auto instantiateEmscriptenModule = + js.v8Get(module->GetModuleNamespace().As(), "instantiateEmscriptenModule"_kj); + KJ_ASSERT(instantiateEmscriptenModule->IsFunction()); + return instantiateEmscriptenModule.As(); +} + +template +jsg::JsValue callFunction(jsg::Lock& js, v8::Local& func, Args... args) { + v8::LocalVector argv( + js.v8Isolate, std::initializer_list>{args...}); + return jsg::JsValue( + jsg::check(func->Call(js.v8Context(), js.v8Null(), argv.size(), argv.data()))); +} + +jsg::JsValue callInstantiateEmscriptenModule(jsg::Lock& js, + v8::Local& func, + bool isWorkerd, + capnp::Data::Reader pythonStdlibZipReader, + capnp::Data::Reader pyodideAsmWasmReader) { + AllowV8BackgroundThreadsScope scope; + js.setAllowEval(true); + KJ_DEFER(js.setAllowEval(false)); + + auto pythonStdlibZip = v8::ArrayBuffer::New(js.v8Isolate, pythonStdlibZipReader.size(), + v8::BackingStoreInitializationMode::kUninitialized); + memcpy(pythonStdlibZip->Data(), pythonStdlibZipReader.begin(), pythonStdlibZipReader.size()); + auto pyodideAsmWasm = jsg::check(v8::WasmModuleObject::Compile(js.v8Isolate, + v8::MemorySpan(pyodideAsmWasmReader.begin(), pyodideAsmWasmReader.size()))); + return resolvePromise(js, + callFunction( + js, func, js.boolean(isWorkerd), kj::mv(pythonStdlibZip), kj::mv(pyodideAsmWasm))); +} + +EmscriptenRuntime EmscriptenRuntime::initialize(jsg::Lock& js, + bool isWorkerd, + const CompatibilityFlags::Reader& featureFlags, + kj::Maybe bundle) { + kj::Maybe emsciptenSetupJsReader; + kj::Maybe pythonStdlibZipReader; + kj::Maybe pyodideAsmWasmReader; + if (!featureFlags.getPythonExternalBundle() && + !util::Autogate::isEnabled(util::AutogateKey::PYTHON_EXTERNAL_BUNDLE)) { + KJ_ASSERT(bundle == kj::none); + bundle = PYODIDE_BUNDLE; + } + KJ_ASSERT(bundle != kj::none); + for (auto module: PYODIDE_BUNDLE->getModules()) { + if (module.getName().endsWith("emscriptenSetup.js")) { + emsciptenSetupJsReader = module.getData(); + } else if (module.getName().endsWith("python_stdlib.zip")) { + pythonStdlibZipReader = module.getData(); + } else if (module.getName().endsWith("pyodide.asm.wasm")) { + pyodideAsmWasmReader = module.getData(); + } + } + auto context = js.v8Context(); + Worker::setupContext(js, context, Worker::ConsoleMode::INSPECTOR_ONLY); + auto module = loadEmscriptenSetupModule(js, KJ_ASSERT_NONNULL(emsciptenSetupJsReader)); + instantiateEmscriptenSetupModule(js, module); + auto instantiateEmscriptenModule = getInstantiateEmscriptenModule(js, module); + auto emscriptenModule = callInstantiateEmscriptenModule(js, instantiateEmscriptenModule, + isWorkerd, KJ_ASSERT_NONNULL(pythonStdlibZipReader), KJ_ASSERT_NONNULL(pyodideAsmWasmReader)); + auto contextToken = jsg::JsValue(context->GetSecurityToken()); + return EmscriptenRuntime{contextToken.addRef(js), emscriptenModule.addRef(js)}; +} +} // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.h b/src/workerd/api/pyodide/setup-emscripten.h new file mode 100644 index 00000000000..579b143e0a0 --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace workerd::api::pyodide { +struct EmscriptenRuntime { + jsg::JsRef contextToken; + jsg::JsRef emscriptenRuntime; + static EmscriptenRuntime initialize(jsg::Lock& js, + bool isWorkerd, + const CompatibilityFlags::Reader& featureFlags, + kj::Maybe bundle); +}; +} // namespace workerd::api::pyodide diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index 6dfa35e8a2b..c56b49438bb 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -467,9 +467,6 @@ private: kj::MutexGuarded state; }; -// Defined later in this file. -void setWebAssemblyModuleHasInstance(jsg::Lock& lock, v8::Local context); - static thread_local const Worker::Api* currentApi = nullptr; const Worker::Api& Worker::Api::current() { @@ -599,40 +596,12 @@ struct Worker::Isolate::Impl { KJ_DISALLOW_COPY_AND_MOVE(Lock); void setupContext(v8::Local context) { - // Set WebAssembly.Module @@HasInstance - setWebAssemblyModuleHasInstance(*lock, context); - // The V8Inspector implements the `console` object. KJ_IF_SOME(i, impl.inspector) { i.get()->contextCreated( v8_inspector::V8ContextInfo(context, 1, jsg::toInspectorStringView("Worker"))); } - - // We replace the default V8 console.log(), etc. methods, to give the worker access to - // logged content, and log formatted values to stdout/stderr locally. - auto global = context->Global(); - auto consoleStr = jsg::v8StrIntern(lock->v8Isolate, "console"); - auto console = jsg::check(global->Get(context, consoleStr)).As(); - auto mode = consoleMode; - - auto setHandler = [&](const char* method, LogLevel level) { - auto methodStr = jsg::v8StrIntern(lock->v8Isolate, method); - v8::Global original( - lock->v8Isolate, jsg::check(console->Get(context, methodStr)).As()); - - auto f = lock->wrapSimpleFunction(context, - [mode, level, original = kj::mv(original)]( - jsg::Lock& js, const v8::FunctionCallbackInfo& info) { - handleLog(js, mode, level, original, info); - }); - jsg::check(console->Set(context, methodStr, f)); - }; - - setHandler("debug", LogLevel::DEBUG_); - setHandler("error", LogLevel::ERROR); - setHandler("info", LogLevel::INFO); - setHandler("log", LogLevel::LOG); - setHandler("warn", LogLevel::WARN); + Worker::setupContext(*lock, context, consoleMode); } void disposeContext(jsg::JsContext context) { @@ -1471,6 +1440,36 @@ void setWebAssemblyModuleHasInstance(jsg::Lock& lock, v8::Local con module->DefineOwnProperty(context, v8::Symbol::GetHasInstance(lock.v8Isolate), function)); } +void Worker::setupContext( + jsg::Lock& lock, v8::Local context, Worker::ConsoleMode consoleMode) { + // Set WebAssembly.Module @@HasInstance + setWebAssemblyModuleHasInstance(lock, context); + + // We replace the default V8 console.log(), etc. methods, to give the worker access to + // logged content, and log formatted values to stdout/stderr locally. + auto global = context->Global(); + auto consoleStr = jsg::v8StrIntern(lock.v8Isolate, "console"); + auto console = jsg::check(global->Get(context, consoleStr)).As(); + + auto setHandler = [&](const char* method, LogLevel level) { + auto methodStr = jsg::v8StrIntern(lock.v8Isolate, method); + v8::Global original( + lock.v8Isolate, jsg::check(console->Get(context, methodStr)).As()); + + auto f = lock.wrapSimpleFunction(context, + [consoleMode, level, original = kj::mv(original)]( + jsg::Lock& js, const v8::FunctionCallbackInfo& info) { + handleLog(js, consoleMode, level, original, info); + }); + jsg::check(console->Set(context, methodStr, f)); + }; + + setHandler("debug", LogLevel::DEBUG_); + setHandler("error", LogLevel::ERROR); + setHandler("info", LogLevel::INFO); + setHandler("log", LogLevel::LOG); + setHandler("warn", LogLevel::WARN); +} // ======================================================================================= namespace { diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index d6d304ccb64..b265e0d2881 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -41,6 +41,9 @@ class Socket; class WebSocket; class WebSocketRequestResponsePair; class ExecutionContext; +namespace pyodide { +struct EmscriptenRuntime; +} } // namespace api class ThreadContext; @@ -153,6 +156,9 @@ class Worker: public kj::AtomicRefcounted { void setConnectOverride(kj::String networkAddress, ConnectFn connectFn); kj::Maybe getConnectOverride(kj::StringPtr networkAddress); + static void setupContext( + jsg::Lock& lock, v8::Local context, Worker::ConsoleMode consoleMode); + private: kj::Own script; @@ -532,6 +538,8 @@ class Worker::Api { virtual IsolateObserver& getMetrics() = 0; virtual const IsolateObserver& getMetrics() const = 0; + virtual const kj::Maybe& getEmscriptenRuntime() const = 0; + // Set the module fallback service callback, if any. using ModuleFallbackCallback = kj::Maybe>( jsg::Lock& js, diff --git a/src/workerd/jsg/promise.h b/src/workerd/jsg/promise.h index 554e4b9539b..07bc724c15c 100644 --- a/src/workerd/jsg/promise.h +++ b/src/workerd/jsg/promise.h @@ -572,6 +572,11 @@ class PromiseWrapper { // std::nullptr_t). The getConfig allows us to handle any case using reasonable defaults. PromiseWrapper(const auto& config): config(getConfig(config)) {} + template + void updateConfiguration(MetaConfiguration&& configuration) { + config = getConfig(kj::fwd(configuration)); + } + template static constexpr const char* getName(Promise*) { return "Promise"; @@ -668,7 +673,7 @@ class PromiseWrapper { } private: - const JsgConfig config; + JsgConfig config; static bool isThenable(v8::Local context, v8::Local handle) { if (handle->IsObject()) { diff --git a/src/workerd/jsg/resource.h b/src/workerd/jsg/resource.h index 9896e0e949c..760607745f3 100644 --- a/src/workerd/jsg/resource.h +++ b/src/workerd/jsg/resource.h @@ -1349,6 +1349,11 @@ class ResourceWrapper { ResourceWrapper(MetaConfiguration&& configuration) : configuration(kj::fwd(configuration)) {} + template + void updateConfiguration(MetaConfiguration&& config) { + configuration = kj::fwd(config); + } + inline void initTypeWrapper() { TypeWrapper& wrapper = static_cast(*this); wrapper.resourceTypeMap.insert(typeid(T), diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index c3ad32cb5d4..e2d3d429e6d 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -409,6 +409,11 @@ class Isolate: public IsolateBase { dropWrappers(kj::mv(wrapper)); } + template + void updateConfiguration(MetaConfiguration&& configuration) { + wrapper->updateConfiguration(kj::fwd(configuration)); + } + kj::Exception unwrapException( v8::Local context, v8::Local exception) override { return wrapper->template unwrap( diff --git a/src/workerd/jsg/type-wrapper.h b/src/workerd/jsg/type-wrapper.h index a1c261b489e..c6299f17a4d 100644 --- a/src/workerd/jsg/type-wrapper.h +++ b/src/workerd/jsg/type-wrapper.h @@ -245,6 +245,8 @@ class TypeWrapperBase TypeWrapperBase(MetaConfiguration& config) {} inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& configuration) {} void unwrap() = delete; // StructWrapper only implements tryUnwrap(), not unwrap() }; @@ -274,6 +276,8 @@ class TypeWrapperBase, JsgKind::EXTENSION> void unwrap() = delete; // extensions only implement tryUnwrap(), not unwrap() inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& configuration) {} }; // Specialization of TypeWrapperBase for InjectConfiguration. @@ -297,6 +301,10 @@ class TypeWrapperBase, JsgKind::EXTENSI void getTemplate() = delete; inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& config) { + configuration = kj::fwd(config); + } private: Configuration configuration; @@ -411,6 +419,13 @@ class TypeWrapper: public DynamicResourceTypeMap, (TypeWrapperBase::initTypeWrapper(), ...); } + template + void updateConfiguration(MetaConfiguration&& configuration) { + (TypeWrapperBase::updateConfiguration(kj::fwd(configuration)), ...); + MaybeWrapper::updateConfiguration(kj::fwd(configuration)); + PromiseWrapper::updateConfiguration(kj::fwd(configuration)); + } + static TypeWrapper& from(v8::Isolate* isolate) { return *reinterpret_cast(isolate->GetData(1)); } diff --git a/src/workerd/jsg/value.h b/src/workerd/jsg/value.h index 54c7fca2d63..70c6dbf0097 100644 --- a/src/workerd/jsg/value.h +++ b/src/workerd/jsg/value.h @@ -587,6 +587,10 @@ class MaybeWrapper { // The getConfig allows us to handle any case using reasonable defaults. MaybeWrapper(const auto& config): config(getConfig(config)) {} + template + void updateConfiguration(MetaConfiguration&& configuration) { + config = getConfig(kj::fwd(configuration)); + } template static constexpr decltype(auto) getName(kj::Maybe*) { return TypeWrapper::getName((kj::Decay*)nullptr); @@ -623,7 +627,7 @@ class MaybeWrapper { } private: - const JsgConfig config; + JsgConfig config; }; // ======================================================================================= diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index a0dcf2df17a..9d0fad59bb2 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,92 @@ static const PythonConfig defaultConfig{ }; } // namespace +namespace { +kj::Path getPyodideBundleFileName(kj::StringPtr version) { + return kj::Path(kj::str("pyodide_", version, ".capnp.bin")); +} + +kj::Maybe> getPyodideBundleFile( + const kj::Maybe>& maybeDir, kj::StringPtr version) { + KJ_IF_SOME(dir, maybeDir) { + kj::Path filename = getPyodideBundleFileName(version); + auto file = dir->tryOpenFile(filename); + + return file; + } + + return kj::none; +} + +void writePyodideBundleFileToDisk(const kj::Maybe>& maybeDir, + kj::StringPtr version, + kj::ArrayPtr bytes) { + KJ_IF_SOME(dir, maybeDir) { + kj::Path filename = getPyodideBundleFileName(version); + auto replacer = dir->replaceFile(filename, kj::WriteMode::CREATE | kj::WriteMode::MODIFY); + + replacer->get().writeAll(bytes); + replacer->commit(); + } +} + +kj::Maybe fetchPyodideBundle( + const api::pyodide::PythonConfig& pyConfig, kj::StringPtr version) { + KJ_IF_SOME(version, pyConfig.pyodideBundleManager.getPyodideBundle(version)) { + return version; + } + + auto maybePyodideBundleFile = getPyodideBundleFile(pyConfig.pyodideDiskCacheRoot, version); + KJ_IF_SOME(pyodideBundleFile, maybePyodideBundleFile) { + auto body = pyodideBundleFile->readAllBytes(); + pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); + return pyConfig.pyodideBundleManager.getPyodideBundle(version); + } + + if (version == "dev") { + // the "dev" version is special and indicates we're using the tip-of-tree version built for testing + // so we shouldn't fetch it from the internet, only check for its existence in the disk cache + return kj::none; + } + + { + KJ_LOG(INFO, "Loading Pyodide package from internet..."); + kj::Thread([&]() { + kj::AsyncIoContext io = kj::setupAsyncIo(); + kj::HttpHeaderTable table; + + kj::TlsContext::Options options; + options.useSystemTrustStore = true; + + kj::Own tls = kj::heap(kj::mv(options)); + auto& network = io.provider->getNetwork(); + auto tlsNetwork = tls->wrapNetwork(network); + auto& timer = io.provider->getTimer(); + + auto client = kj::newHttpClient(timer, table, network, *tlsNetwork); + + kj::HttpHeaders headers(table); + + kj::String url = + kj::str("https://pyodide.runtime-playground.workers.dev/pyodide-capnp-bin/pyodide_", + version, ".capnp.bin"); + + auto req = client->request(kj::HttpMethod::GET, url.asPtr(), headers); + + auto res = req.response.wait(io.waitScope); + auto body = res.body->readAllBytes().wait(io.waitScope); + + writePyodideBundleFileToDisk(pyConfig.pyodideDiskCacheRoot, version, body); + + pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); + }); + } + + KJ_LOG(INFO, "Loaded Pyodide package from internet"); + return pyConfig.pyodideBundleManager.getPyodideBundle(version); +} +} // namespace + struct WorkerdApi::Impl final { kj::Own features; kj::Maybe> maybeOwnedModuleRegistry; @@ -136,6 +223,7 @@ struct WorkerdApi::Impl final { JsgWorkerdIsolate jsgIsolate; api::MemoryCacheProvider& memoryCacheProvider; const PythonConfig& pythonConfig; + kj::Maybe maybeEmscriptenRuntime; class Configuration { public: @@ -174,8 +262,25 @@ struct WorkerdApi::Impl final { limitEnforcer->getCreateParams()), memoryCacheProvider(memoryCacheProvider), pythonConfig(pythonConfig) { - jsgIsolate.runInLockScope( - [&](JsgWorkerdIsolate::Lock& lock) { limitEnforcer->customizeIsolate(lock.v8Isolate); }); + jsgIsolate.runInLockScope([&](JsgWorkerdIsolate::Lock& lock) { + limitEnforcer->customizeIsolate(lock.v8Isolate); + if (features->getPythonWorkers()) { + kj::Maybe bundle; + if (features->getPythonExternalBundle() || + util::Autogate::isEnabled(util::AutogateKey::PYTHON_EXTERNAL_BUNDLE)) { + auto pythonRelease = KJ_ASSERT_NONNULL(getPythonSnapshotRelease(*features)); + auto version = getPythonBundleName(pythonRelease); + bundle = KJ_ASSERT_NONNULL( + fetchPyodideBundle(pythonConfig, version), "Failed to get Pyodide bundle"); + } + auto context = lock.newContext({}, lock.v8Isolate); + v8::Context::Scope scope(context.getHandle(lock)); + // Init emscripten syncronously, the python script will import setup-emscripten and + // call setEmscriptenModele + maybeEmscriptenRuntime = + api::pyodide::EmscriptenRuntime::initialize(lock, true, *features, bundle); + } + }); } static v8::Local compileTextGlobal( @@ -291,6 +396,10 @@ const IsolateObserver& WorkerdApi::getMetrics() const { return *impl->observer; } +const kj::Maybe& WorkerdApi::getEmscriptenRuntime() const { + return impl->maybeEmscriptenRuntime; +} + Worker::Script::Source WorkerdApi::extractSource(kj::StringPtr name, config::Worker::Reader conf, Worker::ValidationErrorReporter& errorReporter, @@ -435,92 +544,6 @@ kj::Maybe WorkerdApi::tryCompileModule(jsg::Loc KJ_UNREACHABLE; } -namespace { -kj::Path getPyodideBundleFileName(kj::StringPtr version) { - return kj::Path(kj::str("pyodide_", version, ".capnp.bin")); -} - -kj::Maybe> getPyodideBundleFile( - const kj::Maybe>& maybeDir, kj::StringPtr version) { - KJ_IF_SOME(dir, maybeDir) { - kj::Path filename = getPyodideBundleFileName(version); - auto file = dir->tryOpenFile(filename); - - return file; - } - - return kj::none; -} - -void writePyodideBundleFileToDisk(const kj::Maybe>& maybeDir, - kj::StringPtr version, - kj::ArrayPtr bytes) { - KJ_IF_SOME(dir, maybeDir) { - kj::Path filename = getPyodideBundleFileName(version); - auto replacer = dir->replaceFile(filename, kj::WriteMode::CREATE | kj::WriteMode::MODIFY); - - replacer->get().writeAll(bytes); - replacer->commit(); - } -} - -kj::Maybe fetchPyodideBundle( - const api::pyodide::PythonConfig& pyConfig, kj::StringPtr version) { - KJ_IF_SOME(version, pyConfig.pyodideBundleManager.getPyodideBundle(version)) { - return version; - } - - auto maybePyodideBundleFile = getPyodideBundleFile(pyConfig.pyodideDiskCacheRoot, version); - KJ_IF_SOME(pyodideBundleFile, maybePyodideBundleFile) { - auto body = pyodideBundleFile->readAllBytes(); - pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); - return pyConfig.pyodideBundleManager.getPyodideBundle(version); - } - - if (version == "dev") { - // the "dev" version is special and indicates we're using the tip-of-tree version built for testing - // so we shouldn't fetch it from the internet, only check for its existence in the disk cache - return kj::none; - } - - { - KJ_LOG(INFO, "Loading Pyodide package from internet..."); - kj::Thread([&]() { - kj::AsyncIoContext io = kj::setupAsyncIo(); - kj::HttpHeaderTable table; - - kj::TlsContext::Options options; - options.useSystemTrustStore = true; - - kj::Own tls = kj::heap(kj::mv(options)); - auto& network = io.provider->getNetwork(); - auto tlsNetwork = tls->wrapNetwork(network); - auto& timer = io.provider->getTimer(); - - auto client = kj::newHttpClient(timer, table, network, *tlsNetwork); - - kj::HttpHeaders headers(table); - - kj::String url = - kj::str("https://pyodide.runtime-playground.workers.dev/pyodide-capnp-bin/pyodide_", - version, ".capnp.bin"); - - auto req = client->request(kj::HttpMethod::GET, url.asPtr(), headers); - - auto res = req.response.wait(io.waitScope); - auto body = res.body->readAllBytes().wait(io.waitScope); - - writePyodideBundleFileToDisk(pyConfig.pyodideDiskCacheRoot, version, body); - - pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); - }); - } - - KJ_LOG(INFO, "Loaded Pyodide package from internet"); - return pyConfig.pyodideBundleManager.getPyodideBundle(version); -} -} // namespace - void WorkerdApi::compileModules(jsg::Lock& lockParam, config::Worker::Reader conf, Worker::ValidationErrorReporter& errorReporter, diff --git a/src/workerd/server/workerd-api.h b/src/workerd/server/workerd-api.h index 3f09658d220..73ebd60e1c4 100644 --- a/src/workerd/server/workerd-api.h +++ b/src/workerd/server/workerd-api.h @@ -12,7 +12,8 @@ namespace workerd { namespace api { namespace pyodide { struct PythonConfig; -} +struct EmscriptenRuntime; +} // namespace pyodide } // namespace api } // namespace workerd namespace workerd { @@ -60,6 +61,8 @@ class WorkerdApi final: public Worker::Api { IsolateObserver& getMetrics() override; const IsolateObserver& getMetrics() const override; + const kj::Maybe& getEmscriptenRuntime() const override; + static Worker::Script::Source extractSource(kj::StringPtr name, config::Worker::Reader conf, Worker::ValidationErrorReporter& errorReporter,