diff --git a/Cargo.toml b/Cargo.toml index b089819007..45f528520c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,7 +73,10 @@ perf_counter = ["pfm"] # This feature is only used for tests with MockVM. # CI scripts run those tests with this feature. -mock_test = [] +mock_test = ["test_private"] + +# This feature will expose some private functions for testings or benchmarking. +test_private = [] # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. diff --git a/benches/main.rs b/benches/main.rs index 6c735ce4e2..651299694f 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -2,42 +2,25 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; -// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process. -// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH, -// we pick the right benchmark to run. +#[cfg(all(feature = "mock_test", feature = "test_private"))] +pub mod mock_bench; -// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM. -// MMTK_BENCH=alloc cargo bench --features mock_test -// MMTK_BENCH=sft cargo bench --features mock_test +#[cfg(all(not(feature = "mock_test"), feature = "test_private"))] +pub mod regular_bench; -// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it -// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM, -// so they become slower as well. And the slowdown -// from MockVM may hide the actual performance difference when we change the functions that are benchmarked. -// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM -// implementation for benchmarking. -// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete -// them. - -#[cfg(feature = "mock_test")] -mod mock_bench; - -pub fn bench_main(_c: &mut Criterion) { - #[cfg(feature = "mock_test")] - match std::env::var("MMTK_BENCH") { - Ok(bench) => match bench.as_str() { - "alloc" => mock_bench::alloc::bench(_c), - "internal_pointer" => mock_bench::internal_pointer::bench(_c), - "sft" => mock_bench::sft::bench(_c), - _ => panic!("Unknown benchmark {:?}", bench), - }, - Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"), - } - - #[cfg(not(feature = "mock_test"))] - { - eprintln!("ERROR: Currently there are no benchmarks when the \"mock_test\" feature is not enabled."); - std::process::exit(1); +pub fn bench_main(c: &mut Criterion) { + cfg_if::cfg_if! { + if #[cfg(feature = "mock_test")] { + // If the "mock_test" feature is enabled, we only run mock test. + mock_bench::bench(c); + } else if #[cfg(feature = "test_private")] { + regular_bench::bench(c); + } else { + eprintln!("ERROR: Benchmarks in mmtk_core requires the test_priavte feature (implied by mock_test) to run."); + eprintln!(" Rerun with `MMTK_BENCH=\"bench_name\" cargo bench --features mock_test` to run mock-test benchmarks."); + eprintln!(" Rerun with `cargo bench --features test_private -- bench_name` to run other benchmarks."); + std::process::exit(1); + } } } diff --git a/benches/mock_bench/mod.rs b/benches/mock_bench/mod.rs index f4ca1c4428..95466bfdc7 100644 --- a/benches/mock_bench/mod.rs +++ b/benches/mock_bench/mod.rs @@ -1,3 +1,34 @@ +use criterion::Criterion; + pub mod alloc; pub mod internal_pointer; pub mod sft; + +// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process. +// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH, +// we pick the right benchmark to run. + +// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM. +// MMTK_BENCH=alloc cargo bench --features mock_test +// MMTK_BENCH=sft cargo bench --features mock_test + +// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it +// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM, +// so they become slower as well. And the slowdown +// from MockVM may hide the actual performance difference when we change the functions that are benchmarked. +// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM +// implementation for benchmarking. +// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete +// them. + +pub fn bench(c: &mut Criterion) { + match std::env::var("MMTK_BENCH") { + Ok(bench) => match bench.as_str() { + "alloc" => alloc::bench(c), + "internal_pointer" => internal_pointer::bench(c), + "sft" => sft::bench(c), + _ => panic!("Unknown benchmark {:?}", bench), + }, + Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"), + } +} diff --git a/benches/regular_bench/bulk_meta/bzero_bset.rs b/benches/regular_bench/bulk_meta/bzero_bset.rs new file mode 100644 index 0000000000..fc01e58b4c --- /dev/null +++ b/benches/regular_bench/bulk_meta/bzero_bset.rs @@ -0,0 +1,62 @@ +//! Benchmarks for bulk zeroing and setting. + +use std::os::raw::c_void; + +use criterion::Criterion; +use mmtk::util::{constants::LOG_BITS_IN_WORD, test_private, Address}; + +fn allocate_aligned(size: usize) -> Address { + let ptr = unsafe { + std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap()) + }; + Address::from_mut_ptr(ptr) +} + +const LINE_BYTES: usize = 256usize; // Match an Immix line size. +const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size. + +// Asssume one-bit-per-word metadata (matching VO bits). +const LINE_META_BYTES: usize = LINE_BYTES >> LOG_BITS_IN_WORD; +const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD; + +pub fn bench(c: &mut Criterion) { + c.bench_function("bzero_bset_line", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| { + test_private::set_meta_bits(start, 0, end, 0); + test_private::zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_line_memset", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); + + c.bench_function("bzero_bset_block", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| { + test_private::set_meta_bits(start, 0, end, 0); + test_private::zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_block_memset", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); +} diff --git a/benches/regular_bench/bulk_meta/mod.rs b/benches/regular_bench/bulk_meta/mod.rs new file mode 100644 index 0000000000..488258cd96 --- /dev/null +++ b/benches/regular_bench/bulk_meta/mod.rs @@ -0,0 +1,7 @@ +pub mod bzero_bset; + +pub use criterion::Criterion; + +pub fn bench(c: &mut Criterion) { + bzero_bset::bench(c); +} diff --git a/benches/regular_bench/mod.rs b/benches/regular_bench/mod.rs new file mode 100644 index 0000000000..63b1f3a1a5 --- /dev/null +++ b/benches/regular_bench/mod.rs @@ -0,0 +1,7 @@ +pub use criterion::Criterion; + +mod bulk_meta; + +pub fn bench(c: &mut Criterion) { + bulk_meta::bench(c); +} diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 39df588b41..23764525a7 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -8,6 +8,7 @@ use crate::util::metadata::metadata_val_traits::*; use crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC; use crate::util::Address; use num_traits::FromPrimitive; +use ranges::BitByteRange; use std::fmt; use std::io::Result; use std::sync::atomic::{AtomicU8, Ordering}; @@ -154,188 +155,77 @@ impl SideMetadataSpec { MMAPPER.is_mapped_address(meta_addr) } - /// This method is used for iterating side metadata for a data address range. As we cannot guarantee - /// that the data address range can be mapped to whole metadata bytes, we have to deal with cases that - /// we need to mask and zero certain bits in a metadata byte. The end address and the end bit are exclusive. - /// The end bit for update_bits could be 8, so overflowing needs to be taken care of. - /// - /// Returns true if we iterate through every bits in the range. Return false if we abort iteration early. - /// - /// Arguments: - /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). Otherwise, - /// we iterate backwards (from end/high address to start/low address). - /// * `visit_bytes`/`visit_bits`: The closures returns whether the itertion is early terminated. - pub(super) fn iterate_meta_bits( + /// This method is used for bulk zeroing side metadata for a data address range. + pub(crate) fn zero_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, - forwards: bool, - visit_bytes: &impl Fn(Address, Address) -> bool, - visit_bits: &impl Fn(Address, u8, u8) -> bool, - ) -> bool { - trace!( - "iterate_meta_bits: {} {}, {} {}", - meta_start_addr, - meta_start_bit, - meta_end_addr, - meta_end_bit - ); - // Start/end is the same, we don't need to do anything. - if meta_start_addr == meta_end_addr && meta_start_bit == meta_end_bit { - return false; - } - - // zeroing bytes - if meta_start_bit == 0 && meta_end_bit == 0 { - return visit_bytes(meta_start_addr, meta_end_addr); - } - - if meta_start_addr == meta_end_addr { - // Update bits in the same byte between start and end bit - visit_bits(meta_start_addr, meta_start_bit, meta_end_bit) - } else if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { - // Update bits in the same byte after the start bit (between start bit and 8) - visit_bits(meta_start_addr, meta_start_bit, 8) - } else { - // Update each segments. - // Clippy wants to move this if block up as a else-if block. But I think this is logically more clear. So disable the clippy warning. - #[allow(clippy::collapsible_else_if)] - if forwards { - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - false - } else { - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; + ) { + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::zero(start, end - start); + false } - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are zeroing selected bit in one byte + // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. + let mask: u8 = + u8::MAX.checked_shl(bit_end as u32).unwrap_or(0) | !(u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); + false } - false } - } - } - - /// This method is used for bulk zeroing side metadata for a data address range. - pub(super) fn zero_meta_bits( - meta_start_addr: Address, - meta_start_bit: u8, - meta_end_addr: Address, - meta_end_bit: u8, - ) { - let zero_bytes = |start: Address, end: Address| -> bool { - memory::zero(start, end - start); - false }; - let zero_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are zeroing selected bits in one byte - let mask: u8 = - u8::MAX.checked_shl(end_bit.into()).unwrap_or(0) | !(u8::MAX << start_bit); // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. - unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); - false - }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, true, - &zero_bytes, - &zero_bits, + &mut visitor, ); } /// This method is used for bulk setting side metadata for a data address range. - pub(super) fn set_meta_bits( + pub(crate) fn set_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, ) { - let set_bytes = |start: Address, end: Address| -> bool { - memory::set(start, 0xff, end - start); - false - }; - let set_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::set(start, 0xff, end - start); + false + } + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are setting selected bits in one byte + // Get a mask that the bits we need to set are 1, and the other bits are 0. + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); + false + } + } }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, true, - &set_bytes, - &set_bits, + &mut visitor, ); } @@ -498,37 +388,44 @@ impl SideMetadataSpec { debug_assert_eq!(dst_meta_start_bit, src_meta_start_bit); - let copy_bytes = |dst_start: Address, dst_end: Address| -> bool { - unsafe { - let byte_offset = dst_start - dst_meta_start_addr; - let src_start = src_meta_start_addr + byte_offset; - let size = dst_end - dst_start; - std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { + start: dst_start, + end: dst_end, + } => unsafe { + let byte_offset = dst_start - dst_meta_start_addr; + let src_start = src_meta_start_addr + byte_offset; + let size = dst_end - dst_start; + std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); + false + }, + BitByteRange::BitsInByte { + addr: dst, + bit_start, + bit_end, + } => { + let byte_offset = dst - dst_meta_start_addr; + let src = src_meta_start_addr + byte_offset; + // we are setting selected bits in one byte + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); // Get a mask that the bits we need to set are 1, and the other bits are 0. + let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); + let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); + let new = (old_src & mask) | (old_dst & !mask); + unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); + false + } } }; - let copy_bits = |dst: Address, start_bit: u8, end_bit: u8| -> bool { - let byte_offset = dst - dst_meta_start_addr; - let src = src_meta_start_addr + byte_offset; - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); - let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); - let new = (old_src & mask) | (old_dst & !mask); - unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); - false - }; - - Self::iterate_meta_bits( + ranges::break_bit_range( dst_meta_start_addr, dst_meta_start_bit, dst_meta_end_addr, dst_meta_end_bit, true, - ©_bytes, - ©_bits, + &mut visitor, ); } @@ -1169,47 +1066,54 @@ impl SideMetadataSpec { // The result will be set by one of the following closures. // Use Cell so it doesn't need to be mutably borrowed by the two closures which Rust will complain. - let res = std::cell::Cell::new(None); - - let check_bytes_backwards = |start: Address, end: Address| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + let mut res = None; + + let mut visitor = |range: BitByteRange| { + match range { + BitByteRange::Bytes { start, end } => { + match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, - } - }; - let check_bits_backwards = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bits(addr, start_bit, end_bit) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + match helpers::find_last_non_zero_bit_in_metadata_bits(addr, bit_start, bit_end) + { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, } }; - Self::iterate_meta_bits( + ranges::break_bit_range( start_meta_addr, start_meta_shift, end_meta_addr, end_meta_shift, false, - &check_bytes_backwards, - &check_bits_backwards, + &mut visitor, ); - res.get() - .map(|addr| addr.align_down(1 << self.log_bytes_in_region)) + res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } } diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index 406f91167d..612c69223f 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -7,6 +7,7 @@ mod helpers; mod helpers_32; mod global; +pub(crate) mod ranges; mod sanity; mod side_metadata_tests; pub(crate) mod spec_defs; diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs new file mode 100644 index 0000000000..48d12a7068 --- /dev/null +++ b/src/util/metadata/side_metadata/ranges.rs @@ -0,0 +1,369 @@ +//! Data types for visiting metadata ranges at different granularities. +//! +//! Currently, the `break_bit_range` function can break a bit range into sub-ranges of whole bytes +//! and in-byte bits. +//! +//! TODO: +//! +//! - Add a function to break a byte range into sub-ranges of whole words and in-word bytes. +//! - And use it for searching side metadata for non-zero bits. +//! - Add a function to break a byte range at chunk boundaries. +//! - And use it for visiting discontiguous side metadata in bulk. + +use crate::util::Address; + +/// The type for bit offset in a byte. +pub type BitOffset = u8; + +/// A range of bytes or bits within a byte. It is the unit of visiting a contiguous bit range of a +/// side metadata. +/// +/// In general, a bit range of a bitmap starts with multiple bits in the byte, followed by many +/// whole bytes, and ends with multiple bits in the last byte. +/// +/// A range is never empty. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BitByteRange { + /// A range of whole bytes. + Bytes { + /// The starting address (inclusive) of the bytes. + start: Address, + /// The ending address (exclusive) of the bytes. + end: Address, + }, + /// A range of bits within a byte. + BitsInByte { + /// The address of the byte. + addr: Address, + /// The starting bit index (inclusive), starting with zero from the low-order bit. + bit_start: BitOffset, + /// The ending bit index (exclusive), starting with zero from the low-order bit. This may + /// be 8 which means the range includes the highest bit. Be careful when shifting a `u8` + /// value because shifting an `u8` by 8 is considered an overflow in Rust. + bit_end: BitOffset, + }, +} + +/// Break a bit range into sub-ranges of whole bytes and in-byte bits. +/// +/// This method is primarily used for iterating side metadata for a data address range. As we cannot +/// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal +/// with visiting only a bit range in a metadata byte. +/// +/// The bit range starts at the bit at index `start_bit` in the byte at address `start_addr`, and +/// ends at (but does not include) the bit at index `end_bit` in the byte at address `end_addr`. +/// +/// Arguments: +/// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). +/// Otherwise, we iterate backwards (from end/high address to start/low address). +/// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the +/// itertion is early terminated. +/// +/// Returns true if we iterate through every bits in the range. Return false if we abort iteration +/// early. +pub fn break_bit_range( + start_addr: Address, + start_bit: BitOffset, + end_addr: Address, + end_bit: BitOffset, + forwards: bool, + visitor: &mut V, +) -> bool +where + V: FnMut(BitByteRange) -> bool, +{ + // The start and the end are the same, we don't need to do anything. + if start_addr == end_addr && start_bit == end_bit { + return false; + } + + // If the range is already byte-aligned, visit the entire range as whole bytes. + if start_bit == 0 && end_bit == 0 { + return visitor(BitByteRange::Bytes { + start: start_addr, + end: end_addr, + }); + } + + // If the start and the end are within the same byte, + // visit the bit range within the byte. + if start_addr == end_addr { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: end_bit, + }); + } + + // If the end is the 0th bit of the next byte of the start, + // visit the bit range from the start to the end (bit 8) of the same byte. + if start_addr + 1usize == end_addr && end_bit == 0 { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: 8_u8, + }); + } + + // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start or + // the end. Try to break it into (at most) three sub-ranges. + + let start_aligned = start_bit == 0; + let end_aligned = end_bit == 0; + + // We cannot let multiple closures capture `visitor` mutably at the same time, so we pass the + // visitor in as `v` every time. + + // visit bits within the first byte + let visit_start = |v: &mut V| { + if !start_aligned { + v(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: 8_u8, + }) + } else { + // The start is already aligned. No sub-byte range at the start. + false + } + }; + + // visit whole bytes in the middle + let visit_middle = |v: &mut V| { + let start = if start_aligned { + start_addr + } else { + // If the start is not aligned, the whole-byte range starts after the first byte. + start_addr + 1usize + }; + let end = end_addr; + if start < end { + v(BitByteRange::Bytes { start, end }) + } else { + // There are no whole bytes in the middle. + false + } + }; + + // visit bits within the last byte + let visit_end = |v: &mut V| { + if !end_aligned { + v(BitByteRange::BitsInByte { + addr: end_addr, + bit_start: 0_u8, + bit_end: end_bit, + }) + } else { + // The end is aligned. No sub-byte range at the end. + false + } + }; + + // Visit the three segments forwards or backwards. + if forwards { + visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) + } else { + visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) + } +} + +#[cfg(test)] +mod tests { + use crate::util::constants::BITS_IN_BYTE; + + use super::*; + + fn mk_addr(addr: usize) -> Address { + unsafe { Address::from_usize(addr) } + } + + fn break_bit_range_wrapped( + start_addr: Address, + start_bit: usize, + end_addr: Address, + end_bit: usize, + ) -> Vec { + let mut vec = vec![]; + break_bit_range( + start_addr, + start_bit as u8, + end_addr, + end_bit as u8, + true, + &mut |range| { + vec.push(range); + false + }, + ); + vec + } + + #[test] + fn test_empty_range() { + let base = mk_addr(0x1000); + for bit in 0..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit, base, bit); + assert!( + result.is_empty(), + "Not empty. bit: {bit}, result: {result:?}" + ); + } + } + + #[test] + fn test_subbyte_range() { + let base = mk_addr(0x1000); + for bit0 in 0..BITS_IN_BYTE { + for bit1 in (bit0 + 1)..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base, bit1); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: bit1 as u8 + }], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_end_byte_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, 0); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8 + }], + "Not equal. bit0: {bit0}", + ); + } + } + + #[test] + fn test_adjacent_grain_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::BitsInByte { + addr: base + 1usize, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_left_and_whole_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for byte1 in 2usize..8 { + let result = break_bit_range_wrapped(base, bit0, base + byte1, 0); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::Bytes { + start: base + 1usize, + end: base + byte1, + }, + ], + "Not equal. bit0: {bit0}, byte1: {byte1}", + ); + } + } + } + + #[test] + fn test_whole_and_right_range() { + let base = mk_addr(0x1000); + for byte0 in 1..8 { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base - byte0, 0, base, bit1); + assert_eq!( + result, + vec![ + BitByteRange::Bytes { + start: base - byte0, + end: base, + }, + BitByteRange::BitsInByte { + addr: base, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. byte0: {byte0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_whole_range() { + let base = mk_addr(0x1000); + let result = break_bit_range_wrapped(base, 0, base + 42usize, 0); + assert_eq!( + result, + vec![BitByteRange::Bytes { + start: base, + end: base + 42usize, + },], + ); + } + + #[test] + fn test_left_whole_right_range() { + let base0 = mk_addr(0x1000); + let base1 = mk_addr(0x2000); + + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base0 - 1usize, bit0, base1, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base0 - 1usize, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::Bytes { + start: base0, + end: base1, + }, + BitByteRange::BitsInByte { + addr: base1, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 54ccc3ba8e..bac77a0b88 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -36,6 +36,8 @@ pub mod metadata; pub mod opaque_pointer; /// MMTk command line options. pub mod options; +#[cfg(feature = "test_private")] +pub mod test_private; /// Test utilities. We need this module for `MockVM` in criterion benches, which does not include code with `cfg(test)`. #[cfg(any(test, feature = "mock_test"))] pub mod test_util; diff --git a/src/util/test_private/mod.rs b/src/util/test_private/mod.rs new file mode 100644 index 0000000000..783dbeeac8 --- /dev/null +++ b/src/util/test_private/mod.rs @@ -0,0 +1,35 @@ +//! This module exposes private items in mmtk-core for testing and benchmarking. They must not be +//! used in production. +//! +//! # Notes on inlining +//! +//! In mmtk-core, we refrain from inserting inlining hints manually. But we use `#[inline(always)]` +//! in this module explicitly because the functions here are simple wrappers of private functions, +//! and the compiler usually fails to make the right decision given that those functions are not +//! used often, and we don't compile the benchmarks using feedback-directed optimizations. + +use crate::util::metadata::side_metadata::SideMetadataSpec; + +use super::Address; + +/// Expose `zero_meta_bits` when running `cargo bench`. +#[inline(always)] +pub fn zero_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, +) { + SideMetadataSpec::zero_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) +} + +/// Expose `set_meta_bits` when running `cargo bench`. +#[inline(always)] +pub fn set_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, +) { + SideMetadataSpec::set_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) +}