From 7cbd91af34a25436c4706f3fe1fba206fa753961 Mon Sep 17 00:00:00 2001 From: Alistair Date: Thu, 14 Nov 2024 13:19:42 +0000 Subject: [PATCH 1/2] feat(jstz_engine): add safe wrapper for JS `Script`s --- crates/jstz_engine/src/lib.rs | 2 +- crates/jstz_engine/src/script.rs | 137 +++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 crates/jstz_engine/src/script.rs diff --git a/crates/jstz_engine/src/lib.rs b/crates/jstz_engine/src/lib.rs index e57db1482..21deba376 100644 --- a/crates/jstz_engine/src/lib.rs +++ b/crates/jstz_engine/src/lib.rs @@ -1,8 +1,8 @@ mod compartment; mod context; mod realm; +mod script; -#[allow(dead_code)] pub(crate) trait AsRawPtr { type Ptr; diff --git a/crates/jstz_engine/src/script.rs b/crates/jstz_engine/src/script.rs new file mode 100644 index 000000000..cc281953f --- /dev/null +++ b/crates/jstz_engine/src/script.rs @@ -0,0 +1,137 @@ +//! This module provides an interface for JavaScript scripts in SpiderMonkey. +//! A `Script` encapsulates the parsed and compiled form of a JavaScript program, +//! enabling efficient execution and interaction with the JavaScript engine. +//! +//! ## Overview +//! - **Source Representation**: Links to the source text, including metadata +//! like line numbers and column offsets for debugging / stack traces. +//! - **Bytecode**: Contains the compiled bytecode generated during parsing, +//! optimized for SpiderMonkey's interpreter. +//! +//! For more details, refer to the [ECMAScript Specification on Scripts and Modules](https://tc39.es/ecma262/#sec-scripts). + +use std::{marker::PhantomData, path::Path, ptr::NonNull}; + +use mozjs::{ + jsapi::{Compile1, JSScript, JS_ExecuteScript}, + jsval::{JSVal, UndefinedValue}, + rooted, + rust::CompileOptionsWrapper, +}; + +use crate::{ + compartment::Compartment, + context::{CanAlloc, Context, InCompartment}, + AsRawPtr, +}; + +pub struct Script<'a, C: Compartment> { + script: NonNull, + marker: PhantomData<(&'a (), C)>, +} + +impl<'a, C: Compartment> Script<'a, C> { + /// Compiles a script with a given filename and returns the compiled script. + /// Returns `None` if the script could not be compiled. + + // TODO(https://linear.app/tezos/issue/JSTZ-210): + // Add support for error handling / exceptions instead of using `Option` + pub fn compile(path: &Path, script: &str, cx: &'a mut Context) -> Option + where + S: InCompartment + CanAlloc, + { + let mut source = mozjs::rust::transform_str_to_source_text(script); + let options = unsafe { + CompileOptionsWrapper::new(cx.as_raw_ptr(), path.to_str().unwrap(), 1) + }; + + let script = unsafe { Compile1(cx.as_raw_ptr(), options.ptr, &mut source) }; + + Some(Self { + script: NonNull::new(script)?, + marker: PhantomData, + }) + } + + /// Evaluates a script and returns its return value + + // TODO(https://linear.app/tezos/issue/JSTZ-210): + // Add support for error handling / exceptions instead of using `Option` + // TODO(https://linear.app/tezos/issue/JSTZ-211): + // TODO: `JSVal` is not safe, we should return a safe wrapper instead + pub fn evaluate<'b, S>(&self, cx: &'b mut Context) -> Option + where + S: InCompartment + CanAlloc, + 'a: 'b, + { + // TODO(https://linear.app/tezos/issue/JSTZ-196): + // Remove this once we have a proper way to root values + rooted!(in(unsafe { cx.as_raw_ptr() }) let mut rval = UndefinedValue()); + rooted!(in(unsafe { cx.as_raw_ptr() }) let mut rooted_script = unsafe { self.as_raw_ptr() }); + + if unsafe { + JS_ExecuteScript( + cx.as_raw_ptr(), + rooted_script.handle_mut().into(), + rval.handle_mut().into(), + ) + } { + Some(rval.get()) + } else { + None + } + } +} + +impl<'a, C: Compartment> AsRawPtr for Script<'a, C> { + type Ptr = *mut JSScript; + + unsafe fn as_raw_ptr(&self) -> Self::Ptr { + self.script.as_ptr() + } +} + +#[cfg(test)] +mod test { + + use std::path::PathBuf; + + use mozjs::rust::{JSEngine, Runtime}; + + use crate::{compartment, context::Context, script::Script}; + + #[test] + fn test_compile_and_evaluate() { + // Initialize the JS engine. + let engine = JSEngine::init().unwrap(); + let rt = Runtime::new(engine.handle()); + let rt_cx = &mut Context::from_runtime(&rt); + + // Enter a new realm to evaluate the script in. + let mut cx = rt_cx.new_realm().unwrap(); + + // Some example source in a string. + let filename = PathBuf::from("inline.js"); + let source: &'static str = "40 + 2"; + + // Compile the script + let script = Script::compile(&filename, source, &mut cx).unwrap(); + + // TODO(https://linear.app/tezos/issue/JSTZ-196): + // Remove once we have a proper way of rooting things. + // The script is rooted in the context in `eval`, but this doesn't work due to lifetimes. + // So we need to transmute it here. + let rooted_script: Script<'_, compartment::Ref<'_>> = + unsafe { std::mem::transmute(script) }; + + // Evaluate the script + let res = rooted_script.evaluate(&mut cx); + + assert!(res.is_some()); + + let rval = res.unwrap(); + /* Should get a number back from the example source. */ + assert!(rval.is_int32()); + assert_eq!(rval.to_int32(), 42); + } +} From d874eafecd6fab643659a12adb8d4b1f3ae165a9 Mon Sep 17 00:00:00 2001 From: Alistair Date: Thu, 14 Nov 2024 13:57:48 +0000 Subject: [PATCH 2/2] feat(jstz_engine): implement a write-barriered GC pointer abstraction --- crates/jstz_engine/src/gc/mod.rs | 22 +++ crates/jstz_engine/src/gc/ptr.rs | 270 +++++++++++++++++++++++++++++++ crates/jstz_engine/src/lib.rs | 1 + 3 files changed, 293 insertions(+) create mode 100644 crates/jstz_engine/src/gc/mod.rs create mode 100644 crates/jstz_engine/src/gc/ptr.rs diff --git a/crates/jstz_engine/src/gc/mod.rs b/crates/jstz_engine/src/gc/mod.rs new file mode 100644 index 000000000..5ddbd9d7a --- /dev/null +++ b/crates/jstz_engine/src/gc/mod.rs @@ -0,0 +1,22 @@ +//! # Garbage Collection in SpiderMonkey +//! +//! This module implements memory-safe abstractions on SpiderMonkey's garbage collection (GC) system, +//! designed for efficient memory management in the JavaScript engine. The GC is a mark-sweep collector +//! with features such as incremental marking, generational collection, and compaction. +//! +//! # Key Concepts +//! - **Cells**: The atomic unit of memory managed by the GC. All GC-allocated objects, such as `JsObject`, derive from `Cell`. +//! - **Compartments & Zones**: Memory is organized into compartments (for security and isolation) and zones (GC boundaries). +//! +//! # Features +//! - **Incremental GC**: Reduces pause times by interleaving marking work with JavaScript execution. +//! - **Write and Read Barriers**: Ensure correctness during incremental GC by maintaining object reachability. +//! - **Generational GC**: Optimizes for short-lived objects, separating them from long-lived ones. +//! +//! # Implementation Notes +//! - Write barriers, triggered during pointer updates, and read barriers for weak references, prevent GC hazards. +//! - Sweeping and additional GC phases like compaction are integrated into the collection process. +//! +//! For further details, see the [GC Implementation Guide](https://udn.realityripple.com/docs/Mozilla/Projects/SpiderMonkey/Internals/Garbage_collection). + +mod ptr; diff --git a/crates/jstz_engine/src/gc/ptr.rs b/crates/jstz_engine/src/gc/ptr.rs new file mode 100644 index 000000000..dc4dedae8 --- /dev/null +++ b/crates/jstz_engine/src/gc/ptr.rs @@ -0,0 +1,270 @@ +//! A garbage-collected heap pointer used to refer to on-heap objects. +//! All garbage-collected pointers should be wrapped in a `GcPtr` +//! for safety purposes. + +use std::{cell::UnsafeCell, marker::PhantomPinned, mem, pin::Pin, ptr, sync::Arc}; + +use mozjs::{ + jsapi::{ + jsid, HeapBigIntWriteBarriers, HeapObjectWriteBarriers, HeapScriptWriteBarriers, + HeapStringWriteBarriers, HeapValueWriteBarriers, JSFunction, JSObject, JSScript, + JSString, JS::BigInt as JSBigInt, JS::Symbol as JSSymbol, + }, + jsid::VoidId, + jsval::{JSVal, UndefinedValue}, +}; + +/// A GC barrier is a mechanism used to ensure that the garbage collector maintains +/// a valid set of reachable objects. +/// +/// A write barrier is a mechanism used to ensure that the garbage collector is notified +/// when a reference to an object is changed. In general, a write barrier should be invoked +/// whenever a write can cause the set of things traced by the GC to change. +/// +/// Every barriered write should have the following form: +/// ```notrust +/// field = new_value; +/// +/// ``` +/// +/// # Safety +/// +/// - An incorrect implementation of the trait can result in reachability snapshot when +/// performing incremental garbage collection. This can result in segfauts / use-after-frees +/// if not correctly handled. +/// +pub unsafe trait WriteBarrieredPtr: Copy { + /// Creates a uninitialized value + unsafe fn uninit() -> Self; + + /// Perform a write barrier on the given GC value + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self); +} + +/// A garbage-collected pointer used to refer to on-heap objects +/// +/// # Safety +/// +/// `GcPtr` should only be used by values on the heap. Garbage collected pointers +/// on the stack should be rooted. +pub struct GcPtr { + // # Safety + // + // For garbage collection to work correctly, when modifying + // the wrapped value that points to a GC cell, the write barrier + // must be invoked. + // + // This means after calling the `set` method, the `GcPtr` *must not* + // be moved in memory. Doing so would invalidate the local reference. + // For safety, we use `Arc::pin` to pin the `GcPtr`. + inner_ptr: UnsafeCell, + _marker: PhantomPinned, +} + +impl GcPtr { + /// Creates an uninitialized [`GcPtr`] + pub fn uninit() -> Self { + Self { + inner_ptr: UnsafeCell::new(unsafe { T::uninit() }), + _marker: PhantomPinned, + } + } + + /// Creates a new [`GcPtr`] from an existing pointer. + /// + /// # Safety + /// + /// The raw pointer `ptr` must point to an object that extends a `js::gc::Cell`. + pub fn pinned(ptr: T) -> Pin> { + let pinned = Arc::pin(Self::uninit()); + pinned.as_ref().set(ptr); + + pinned + } + + /// Compares two pointers for equality + #[allow(dead_code)] + fn ptr_eq(&self, other: &Self) -> bool { + self.inner_ptr.get() == other.inner_ptr.get() + } + + /// Returns the raw pointer + /// + /// # Safety + /// + /// the caller must guarantee that the pointer is valid for reads and + /// points to a valid `js::gc::Cell`. + pub unsafe fn get(&self) -> T { + // Note: read_unaligned is used since SpiderMonkey doesn't + // guarantee the expected alignment of Rust pointers. + self.inner_ptr.get().read_unaligned() + } + + /// Sets the pointer to a new value + pub fn set(self: Pin<&Self>, next: T) { + let self_ptr = self.inner_ptr.get(); + unsafe { + let prev = *self_ptr; + + *self_ptr = next; + T::write_barrier(self_ptr, prev, next) + } + } +} + +impl Drop for GcPtr { + fn drop(&mut self) { + unsafe { + let inner_ptr = self.inner_ptr.get(); + T::write_barrier(inner_ptr, *inner_ptr, T::uninit()) + } + } +} + +unsafe impl WriteBarrieredPtr for *mut JSObject { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapObjectWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSString { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapStringWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSFunction { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapObjectWriteBarriers( + // SAFETY: JSFunction extends JSObject + mem::transmute::<*mut *mut JSFunction, *mut *mut JSObject>(v), + mem::transmute::<*mut JSFunction, *mut JSObject>(prev), + mem::transmute::<*mut JSFunction, *mut JSObject>(next), + ) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSSymbol { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for JSSymbol + } +} + +unsafe impl WriteBarrieredPtr for *mut JSBigInt { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapBigIntWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSScript { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapScriptWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for jsid { + unsafe fn uninit() -> Self { + VoidId() + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for jsid + } +} + +unsafe impl WriteBarrieredPtr for JSVal { + unsafe fn uninit() -> Self { + UndefinedValue() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapValueWriteBarriers(v, &prev, &next) + } +} + +#[cfg(test)] +mod test { + use std::sync::Mutex; + + use crate::gc::ptr::{GcPtr, WriteBarrieredPtr}; + + #[derive(Debug, PartialEq, Eq, Copy, Clone)] + pub struct TestPtr { + value: i32, + } + + const TEST_PTR_UNINIT: TestPtr = TestPtr { value: 0 }; + + static WRITE_BARRIER_LOG: Mutex> = Mutex::new(Vec::new()); + + unsafe impl WriteBarrieredPtr for TestPtr { + unsafe fn uninit() -> Self { + TEST_PTR_UNINIT + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for TestPtr + + WRITE_BARRIER_LOG.lock().unwrap().push((_prev, _next)); + } + } + + #[test] + fn test_new_triggers_barrier() { + WRITE_BARRIER_LOG.lock().unwrap().clear(); + + let _ptr = GcPtr::pinned(TestPtr { value: 42 }); + + let write_barrier_log = WRITE_BARRIER_LOG.lock().unwrap(); + assert_eq!(write_barrier_log.len(), 1); + assert_eq!( + write_barrier_log[0], + (TEST_PTR_UNINIT, TestPtr { value: 42 }) + ); + } + + #[test] + fn test_set_calls_write_barrier() { + WRITE_BARRIER_LOG.lock().unwrap().clear(); + + let ptr = GcPtr::pinned(TestPtr { value: 42 }); + let new_ptr = TestPtr { value: 43 }; + + ptr.as_ref().set(new_ptr); + + let write_barrier_log = WRITE_BARRIER_LOG.lock().unwrap(); + assert_eq!(write_barrier_log.len(), 2); + assert_eq!( + write_barrier_log[0], + (TEST_PTR_UNINIT, TestPtr { value: 42 }) + ); + assert_eq!( + write_barrier_log[1], + (TestPtr { value: 42 }, TestPtr { value: 43 }) + ); + } +} diff --git a/crates/jstz_engine/src/lib.rs b/crates/jstz_engine/src/lib.rs index 21deba376..6e23ae427 100644 --- a/crates/jstz_engine/src/lib.rs +++ b/crates/jstz_engine/src/lib.rs @@ -1,5 +1,6 @@ mod compartment; mod context; +mod gc; mod realm; mod script;