From e9118625424d3c9ffbd0bbb418039f148239a742 Mon Sep 17 00:00:00 2001 From: Alistair Date: Thu, 14 Nov 2024 13:57:48 +0000 Subject: [PATCH] feat(jstz_engine): implement a write-barriered GC pointer abstraction --- crates/jstz_engine/src/gc/mod.rs | 22 +++ crates/jstz_engine/src/gc/ptr.rs | 266 +++++++++++++++++++++++++++++++ crates/jstz_engine/src/lib.rs | 1 + 3 files changed, 289 insertions(+) create mode 100644 crates/jstz_engine/src/gc/mod.rs create mode 100644 crates/jstz_engine/src/gc/ptr.rs diff --git a/crates/jstz_engine/src/gc/mod.rs b/crates/jstz_engine/src/gc/mod.rs new file mode 100644 index 000000000..5ddbd9d7a --- /dev/null +++ b/crates/jstz_engine/src/gc/mod.rs @@ -0,0 +1,22 @@ +//! # Garbage Collection in SpiderMonkey +//! +//! This module implements memory-safe abstractions on SpiderMonkey's garbage collection (GC) system, +//! designed for efficient memory management in the JavaScript engine. The GC is a mark-sweep collector +//! with features such as incremental marking, generational collection, and compaction. +//! +//! # Key Concepts +//! - **Cells**: The atomic unit of memory managed by the GC. All GC-allocated objects, such as `JsObject`, derive from `Cell`. +//! - **Compartments & Zones**: Memory is organized into compartments (for security and isolation) and zones (GC boundaries). +//! +//! # Features +//! - **Incremental GC**: Reduces pause times by interleaving marking work with JavaScript execution. +//! - **Write and Read Barriers**: Ensure correctness during incremental GC by maintaining object reachability. +//! - **Generational GC**: Optimizes for short-lived objects, separating them from long-lived ones. +//! +//! # Implementation Notes +//! - Write barriers, triggered during pointer updates, and read barriers for weak references, prevent GC hazards. +//! - Sweeping and additional GC phases like compaction are integrated into the collection process. +//! +//! For further details, see the [GC Implementation Guide](https://udn.realityripple.com/docs/Mozilla/Projects/SpiderMonkey/Internals/Garbage_collection). + +mod ptr; diff --git a/crates/jstz_engine/src/gc/ptr.rs b/crates/jstz_engine/src/gc/ptr.rs new file mode 100644 index 000000000..7206955cb --- /dev/null +++ b/crates/jstz_engine/src/gc/ptr.rs @@ -0,0 +1,266 @@ +//! A garbage-collected heap pointer used to refer to on-heap objects. +//! All garbage-collected pointers should be wrapped in a `GcPtr` +//! for safety purposes. + +use std::{cell::UnsafeCell, marker::PhantomPinned, mem, pin::Pin, ptr, sync::Arc}; + +use mozjs::{ + jsapi::{ + jsid, HeapBigIntWriteBarriers, HeapObjectWriteBarriers, HeapScriptWriteBarriers, + HeapStringWriteBarriers, HeapValueWriteBarriers, JSFunction, JSObject, JSScript, + JSString, JS::BigInt as JSBigInt, JS::Symbol as JSSymbol, + }, + jsid::VoidId, + jsval::{JSVal, UndefinedValue}, +}; + +/// A GC barrier is a mechanism used to ensure that the garbage collector maintains +/// a valid set of reachable objects. +/// +/// A write barrier is a mechanism used to ensure that the garbage collector is notified +/// when a reference to an object is changed. In general, a write barrier should be invoked +/// whenever a write can cause the set of things traced by the GC to change. +/// +/// Every barriered write should have the following form: +/// ```notrust +/// field = new_value; +/// +/// ``` +/// +/// # Safety +/// +/// - An incorrect implementation of the trait can result in reachability snapshot when +/// performing incremental garbage collection. This can result in segfauts / use-after-frees +/// if not correctly handled. +/// +pub unsafe trait WriteBarrieredPtr: Copy { + /// Creates a uninitialized value + unsafe fn uninit() -> Self; + + /// Perform a write barrier on the given GC value + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self); +} + +/// A garbage-collected pointer used to refer to on-heap objects +/// +/// # Safety +/// +/// `GcPtr` should only be used by values on the heap. Garbage collected pointers +/// on the stack should be rooted. +pub struct GcPtr { + // # Safety + // + // For garbage collection to work correctly, when modifying + // the wrapped value that points to a GC cell, the write barrier + // must be invoked. + // + // This means after calling the `set` method, the `GcPtr` *must not* + // be moved in memory. Doing so would invalidate the local reference. + // For safety, we use `Arc::pin` to pin the `GcPtr`. + inner_ptr: UnsafeCell, + _marker: PhantomPinned, +} + +impl GcPtr { + /// Creates an uninitialized [`GcPtr`] + pub fn uninit() -> Self { + Self { + inner_ptr: UnsafeCell::new(unsafe { T::uninit() }), + _marker: PhantomPinned, + } + } + + /// Creates a new [`GcPtr`] from an existing pointer. + /// + /// # Safety + /// + /// The raw pointer `ptr` must point to an object that extends a `js::gc::Cell`. + pub fn pinned(ptr: T) -> Pin> { + let pinned = Arc::pin(Self::uninit()); + pinned.as_ref().set(ptr); + + pinned + } + + /// Compares two pointers for equality + #[allow(dead_code)] + fn ptr_eq(&self, other: &Self) -> bool { + self.inner_ptr.get() == other.inner_ptr.get() + } + + /// Returns the raw pointer + pub fn get(&self) -> T { + // SAFETY: the inner_ptr points to a valid `js::gc::Cell`. + // [mut_ptr::read_unaligned] is used since SpiderMonkey doesn't + // guarantee the expected align of Rust pointers. + unsafe { self.inner_ptr.get().read_unaligned() } + } + + /// Sets the pointer to a new value + pub fn set(self: Pin<&Self>, next: T) { + let self_ptr = self.inner_ptr.get(); + unsafe { + let prev = *self_ptr; + + *self_ptr = next; + T::write_barrier(self_ptr, prev, next) + } + } +} + +impl Drop for GcPtr { + fn drop(&mut self) { + unsafe { + let inner_ptr = self.inner_ptr.get(); + T::write_barrier(inner_ptr, *inner_ptr, T::uninit()) + } + } +} + +unsafe impl WriteBarrieredPtr for *mut JSObject { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapObjectWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSString { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapStringWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSFunction { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapObjectWriteBarriers( + // SAFETY: JSFunction extends JSObject + mem::transmute::<*mut *mut JSFunction, *mut *mut JSObject>(v), + mem::transmute::<*mut JSFunction, *mut JSObject>(prev), + mem::transmute::<*mut JSFunction, *mut JSObject>(next), + ) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSSymbol { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for JSSymbol + } +} + +unsafe impl WriteBarrieredPtr for *mut JSBigInt { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapBigIntWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for *mut JSScript { + unsafe fn uninit() -> Self { + ptr::null_mut() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapScriptWriteBarriers(v, prev, next) + } +} + +unsafe impl WriteBarrieredPtr for jsid { + unsafe fn uninit() -> Self { + VoidId() + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for jsid + } +} + +unsafe impl WriteBarrieredPtr for JSVal { + unsafe fn uninit() -> Self { + UndefinedValue() + } + + unsafe fn write_barrier(v: *mut Self, prev: Self, next: Self) { + HeapValueWriteBarriers(v, &prev, &next) + } +} + +#[cfg(test)] +mod test { + use std::sync::Mutex; + + use crate::gc::ptr::{GcPtr, WriteBarrieredPtr}; + + #[derive(Debug, PartialEq, Eq, Copy, Clone)] + pub struct TestPtr { + value: i32, + } + + const TEST_PTR_UNINIT: TestPtr = TestPtr { value: 0 }; + + static WRITE_BARRIER_LOG: Mutex> = Mutex::new(Vec::new()); + + unsafe impl WriteBarrieredPtr for TestPtr { + unsafe fn uninit() -> Self { + TEST_PTR_UNINIT + } + + unsafe fn write_barrier(_v: *mut Self, _prev: Self, _next: Self) { + // No write barrier needed for TestPtr + + WRITE_BARRIER_LOG.lock().unwrap().push((_prev, _next)); + } + } + + #[test] + fn test_new_triggers_barrier() { + WRITE_BARRIER_LOG.lock().unwrap().clear(); + + let _ptr = GcPtr::pinned(TestPtr { value: 42 }); + + let write_barrier_log = WRITE_BARRIER_LOG.lock().unwrap(); + assert_eq!(write_barrier_log.len(), 1); + assert_eq!( + write_barrier_log[0], + (TEST_PTR_UNINIT, TestPtr { value: 42 }) + ); + } + + #[test] + fn test_set_calls_write_barrier() { + WRITE_BARRIER_LOG.lock().unwrap().clear(); + + let ptr = GcPtr::pinned(TestPtr { value: 42 }); + let new_ptr = TestPtr { value: 43 }; + + ptr.as_ref().set(new_ptr); + + let write_barrier_log = WRITE_BARRIER_LOG.lock().unwrap(); + assert_eq!(write_barrier_log.len(), 2); + assert_eq!( + write_barrier_log[0], + (TEST_PTR_UNINIT, TestPtr { value: 42 }) + ); + assert_eq!( + write_barrier_log[1], + (TestPtr { value: 42 }, TestPtr { value: 43 }) + ); + } +} diff --git a/crates/jstz_engine/src/lib.rs b/crates/jstz_engine/src/lib.rs index 21deba376..6e23ae427 100644 --- a/crates/jstz_engine/src/lib.rs +++ b/crates/jstz_engine/src/lib.rs @@ -1,5 +1,6 @@ mod compartment; mod context; +mod gc; mod realm; mod script;