Refactor iterate_meta_bits (#1181)

This PR refactors the mechanism for visiting (reading and/or updating) side metadata in bulk, specifically the function `SideMetadataSpec::iterate_meta_bits`. The function now uses a single `FnMut` callback instead of two `Fn` callbacks. It uses the enum type `BitByteRange` to distinguish whole byte ranges from bit ranges in a byte. This allows the user to capture variables mutably in the callback. This also removes the `Cell` used in `find_prev_non_zero_value_fast`. The function is made non-recursive to improve the performance. Some test cases are added to test for corner cases. The function is moved to a dedicated `ranges` module and renamed to `break_bit_range`, for several reasons: - It was a method of `SideMetadataSpec`, but it does not access any member of `SideMetadataSpec`. - It needs a non-trivial amount of testing to get corner cases correct, especially after refactoring into a non-recursive function. - Related types and functions can be added to the `ranges` module in the future. - Breaking a range of bytes into a range of aligned words and unaligned bytes in the beginning and the end. It will be used by finding VO bits from internal pointers and finding all VO bits in a region (for heap traversal). - Breaking a range of bytes at chunk boundaries. It will be used by `bulk_update_metadata`.
mmtk · Aug 6, 2024 · 33785b9 · 33785b9
1 parent f785236
commit 33785b9
Show file tree

Hide file tree

Showing 11 changed files with 645 additions and 241 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -73,7 +73,10 @@ perf_counter = ["pfm"]
 
 # This feature is only used for tests with MockVM.
 # CI scripts run those tests with this feature.
-mock_test = []
+mock_test = ["test_private"]
+
+# This feature will expose some private functions for testings or benchmarking.
+test_private = []
 
 # .github/scripts/ci-common.sh extracts features from the following part (including from comments).
 # So be careful when editing or adding stuff to the section below.

diff --git a/benches/main.rs b/benches/main.rs
@@ -2,42 +2,25 @@ use criterion::criterion_group;
 use criterion::criterion_main;
 use criterion::Criterion;
 
-// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process.
-// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH,
-// we pick the right benchmark to run.
+#[cfg(all(feature = "mock_test", feature = "test_private"))]
+pub mod mock_bench;
 
-// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM.
-// MMTK_BENCH=alloc cargo bench --features mock_test
-// MMTK_BENCH=sft   cargo bench --features mock_test
+#[cfg(all(not(feature = "mock_test"), feature = "test_private"))]
+pub mod regular_bench;
 
-// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it
-// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM,
-// so they become slower as well. And the slowdown
-// from MockVM may hide the actual performance difference when we change the functions that are benchmarked.
-// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM
-// implementation for benchmarking.
-// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete
-// them.
-
-#[cfg(feature = "mock_test")]
-mod mock_bench;
-
-pub fn bench_main(_c: &mut Criterion) {
-    #[cfg(feature = "mock_test")]
-    match std::env::var("MMTK_BENCH") {
-        Ok(bench) => match bench.as_str() {
-            "alloc" => mock_bench::alloc::bench(_c),
-            "internal_pointer" => mock_bench::internal_pointer::bench(_c),
-            "sft" => mock_bench::sft::bench(_c),
-            _ => panic!("Unknown benchmark {:?}", bench),
-        },
-        Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"),
-    }
-
-    #[cfg(not(feature = "mock_test"))]
-    {
-        eprintln!("ERROR: Currently there are no benchmarks when the \"mock_test\" feature is not enabled.");
-        std::process::exit(1);
+pub fn bench_main(c: &mut Criterion) {
+    cfg_if::cfg_if! {
+        if #[cfg(feature = "mock_test")] {
+            // If the "mock_test" feature is enabled, we only run mock test.
+            mock_bench::bench(c);
+        } else if #[cfg(feature = "test_private")] {
+            regular_bench::bench(c);
+        } else {
+            eprintln!("ERROR: Benchmarks in mmtk_core requires the test_priavte feature (implied by mock_test) to run.");
+            eprintln!("  Rerun with `MMTK_BENCH=\"bench_name\" cargo bench --features mock_test` to run mock-test benchmarks.");
+            eprintln!("  Rerun with `cargo bench --features test_private -- bench_name` to run other benchmarks.");
+            std::process::exit(1);
+        }
     }
 }
 

diff --git a/benches/mock_bench/mod.rs b/benches/mock_bench/mod.rs
@@ -1,3 +1,34 @@
+use criterion::Criterion;
+
 pub mod alloc;
 pub mod internal_pointer;
 pub mod sft;
+
+// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process.
+// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH,
+// we pick the right benchmark to run.
+
+// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM.
+// MMTK_BENCH=alloc cargo bench --features mock_test
+// MMTK_BENCH=sft   cargo bench --features mock_test
+
+// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it
+// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM,
+// so they become slower as well. And the slowdown
+// from MockVM may hide the actual performance difference when we change the functions that are benchmarked.
+// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM
+// implementation for benchmarking.
+// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete
+// them.
+
+pub fn bench(c: &mut Criterion) {
+    match std::env::var("MMTK_BENCH") {
+        Ok(bench) => match bench.as_str() {
+            "alloc" => alloc::bench(c),
+            "internal_pointer" => internal_pointer::bench(c),
+            "sft" => sft::bench(c),
+            _ => panic!("Unknown benchmark {:?}", bench),
+        },
+        Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"),
+    }
+}
diff --git a/benches/regular_bench/bulk_meta/bzero_bset.rs b/benches/regular_bench/bulk_meta/bzero_bset.rs
@@ -0,0 +1,62 @@
+//! Benchmarks for bulk zeroing and setting.
+
+use std::os::raw::c_void;
+
+use criterion::Criterion;
+use mmtk::util::{constants::LOG_BITS_IN_WORD, test_private, Address};
+
+fn allocate_aligned(size: usize) -> Address {
+    let ptr = unsafe {
+        std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap())
+    };
+    Address::from_mut_ptr(ptr)
+}
+
+const LINE_BYTES: usize = 256usize; // Match an Immix line size.
+const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size.
+
+// Asssume one-bit-per-word metadata (matching VO bits).
+const LINE_META_BYTES: usize = LINE_BYTES >> LOG_BITS_IN_WORD;
+const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD;
+
+pub fn bench(c: &mut Criterion) {
+    c.bench_function("bzero_bset_line", |b| {
+        let start = allocate_aligned(LINE_META_BYTES);
+        let end = start + LINE_META_BYTES;
+
+        b.iter(|| {
+            test_private::set_meta_bits(start, 0, end, 0);
+            test_private::zero_meta_bits(start, 0, end, 0);
+        })
+    });
+
+    c.bench_function("bzero_bset_line_memset", |b| {
+        let start = allocate_aligned(LINE_META_BYTES);
+        let end = start + LINE_META_BYTES;
+
+        b.iter(|| unsafe {
+            libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start);
+            libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start);
+        })
+    });
+
+    c.bench_function("bzero_bset_block", |b| {
+        let start = allocate_aligned(BLOCK_META_BYTES);
+        let end = start + BLOCK_META_BYTES;
+
+        b.iter(|| {
+            test_private::set_meta_bits(start, 0, end, 0);
+            test_private::zero_meta_bits(start, 0, end, 0);
+        })
+    });
+
+    c.bench_function("bzero_bset_block_memset", |b| {
+        let start = allocate_aligned(BLOCK_META_BYTES);
+        let end = start + BLOCK_META_BYTES;
+
+        b.iter(|| unsafe {
+            libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start);
+            libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start);
+        })
+    });
+}
diff --git a/benches/regular_bench/bulk_meta/mod.rs b/benches/regular_bench/bulk_meta/mod.rs
@@ -0,0 +1,7 @@
+pub mod bzero_bset;
+
+pub use criterion::Criterion;
+
+pub fn bench(c: &mut Criterion) {
+    bzero_bset::bench(c);
+}
diff --git a/benches/regular_bench/mod.rs b/benches/regular_bench/mod.rs
@@ -0,0 +1,7 @@
+pub use criterion::Criterion;
+
+mod bulk_meta;
+
+pub fn bench(c: &mut Criterion) {
+    bulk_meta::bench(c);
+}