Skip to content

Commit

Permalink
Rollup merge of #118029 - saethlin:allocid-gc, r=RalfJung
Browse files Browse the repository at this point in the history
Expand Miri's BorTag GC to a Provenance GC

As suggested in rust-lang/miri#3080 (comment)

We previously solved memory growth issues associated with the Stacked Borrows and Tree Borrows runtimes with a GC. But of course we also have state accumulation associated with whole allocations elsewhere in the interpreter, and this PR starts tackling those.

To do this, we expand the visitor for the GC so that it can visit a BorTag or an AllocId. Instead of collecting all live AllocIds into a single HashSet, we just collect from the Machine itself then go through an accessor `InterpCx::is_alloc_live` which checks a number of allocation data structures in the core interpreter. This avoids the overhead of all the inserts that collecting their keys would require.

r? ``@RalfJung``
  • Loading branch information
Noratrieb authored Nov 21, 2023
2 parents aff407e + 9ada654 commit cbadb2e
Show file tree
Hide file tree
Showing 45 changed files with 459 additions and 316 deletions.
8 changes: 8 additions & 0 deletions compiler/rustc_const_eval/src/const_eval/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ impl<K: Hash + Eq, V> interpret::AllocMap<K, V> for FxIndexMap<K, V> {
FxIndexMap::contains_key(self, k)
}

#[inline(always)]
fn contains_key_ref<Q: ?Sized + Hash + Eq>(&self, k: &Q) -> bool
where
K: Borrow<Q>,
{
FxIndexMap::contains_key(self, k)
}

#[inline(always)]
fn insert(&mut self, k: K, v: V) -> Option<V> {
FxIndexMap::insert(self, k, v)
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ pub trait AllocMap<K: Hash + Eq, V> {
where
K: Borrow<Q>;

/// Callers should prefer [`AllocMap::contains_key`] when it is possible to call because it may
/// be more efficient. This function exists for callers that only have a shared reference
/// (which might make it slightly less efficient than `contains_key`, e.g. if
/// the data is stored inside a `RefCell`).
fn contains_key_ref<Q: ?Sized + Hash + Eq>(&self, k: &Q) -> bool
where
K: Borrow<Q>;

/// Inserts a new entry into the map.
fn insert(&mut self, k: K, v: V) -> Option<V>;

Expand Down
9 changes: 9 additions & 0 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
Ok((&mut alloc.extra, machine))
}

/// Check whether an allocation is live. This is faster than calling
/// [`InterpCx::get_alloc_info`] if all you need to check is whether the kind is
/// [`AllocKind::Dead`] because it doesn't have to look up the type and layout of statics.
pub fn is_alloc_live(&self, id: AllocId) -> bool {
self.tcx.try_get_global_alloc(id).is_some()
|| self.memory.alloc_map.contains_key_ref(&id)
|| self.memory.extra_fn_ptr_map.contains_key(&id)
}

/// Obtain the size and alignment of an allocation, even if that allocation has
/// been deallocated.
pub fn get_alloc_info(&self, id: AllocId) -> (Size, Align, AllocKind) {
Expand Down
7 changes: 0 additions & 7 deletions compiler/rustc_middle/src/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,13 +525,6 @@ impl<'tcx> TyCtxt<'tcx> {
self.alloc_map.lock().reserve()
}

/// Miri's provenance GC needs to see all live allocations. The interpreter manages most
/// allocations but some are managed by [`TyCtxt`] and without this method the interpreter
/// doesn't know their [`AllocId`]s are in use.
pub fn iter_allocs<F: FnMut(AllocId)>(self, func: F) {
self.alloc_map.lock().alloc_map.keys().copied().for_each(func)
}

/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
/// Should only be used for "symbolic" allocations (function pointers, vtables, statics), we
/// don't want to dedup IDs for "real" memory!
Expand Down
11 changes: 10 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-tools/checktools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,16 @@ cat /tmp/toolstate/toolstates.json
python3 "$X_PY" test --stage 2 check-tools
python3 "$X_PY" test --stage 2 src/tools/clippy
python3 "$X_PY" test --stage 2 src/tools/rustfmt
python3 "$X_PY" test --stage 2 src/tools/miri

# Testing Miri is a bit more complicated.
# We set the GC interval to the shortest possible value (0 would be off) to increase the chance
# that bugs which only surface when the GC runs at a specific time are more likely to cause CI to fail.
# This significantly increases the runtime of our test suite, or we'd do this in PR CI too.
if [[ -z "${PR_CI_JOB:-}" ]]; then
MIRIFLAGS=-Zmiri-provenance-gc=1 python3 "$X_PY" test --stage 2 src/tools/miri
else
python3 "$X_PY" test --stage 2 src/tools/miri
fi
# We natively run this script on x86_64-unknown-linux-gnu and x86_64-pc-windows-msvc.
# Also cover some other targets via cross-testing, in particular all tier 1 targets.
export BOOTSTRAP_SKIP_TARGET_SANITY=1 # we don't need `cc` for these targets
Expand Down
2 changes: 1 addition & 1 deletion src/tools/miri/.github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

- name: Set the tag GC interval to 1 on linux
if: runner.os == 'Linux'
run: echo "MIRIFLAGS=-Zmiri-tag-gc=1" >> $GITHUB_ENV
run: echo "MIRIFLAGS=-Zmiri-provenance-gc=1" >> $GITHUB_ENV

# Cache the global cargo directory, but NOT the local `target` directory which
# we cannot reuse anyway when the nightly changes (and it grows quite large
Expand Down
8 changes: 4 additions & 4 deletions src/tools/miri/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,10 @@ to Miri failing to detect cases of undefined behavior in a program.
without an explicit value), `none` means it never recurses, `scalar` means it only recurses for
types where we would also emit `noalias` annotations in the generated LLVM IR (types passed as
individual scalars or pairs of scalars). Setting this to `none` is **unsound**.
* `-Zmiri-tag-gc=<blocks>` configures how often the pointer tag garbage collector runs. The default
is to search for and remove unreachable tags once every `10000` basic blocks. Setting this to
`0` disables the garbage collector, which causes some programs to have explosive memory usage
and/or super-linear runtime.
* `-Zmiri-provenance-gc=<blocks>` configures how often the pointer provenance garbage collector runs.
The default is to search for and remove unreachable provenance once every `10000` basic blocks. Setting
this to `0` disables the garbage collector, which causes some programs to have explosive memory
usage and/or super-linear runtime.
* `-Zmiri-track-alloc-id=<id1>,<id2>,...` shows a backtrace when the given allocations are
being allocated or freed. This helps in debugging memory leaks and
use after free bugs. Specifying this argument multiple times does not overwrite the previous
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/bin/miri.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,10 +531,10 @@ fn main() {
Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err),
};
miri_config.report_progress = Some(interval);
} else if let Some(param) = arg.strip_prefix("-Zmiri-tag-gc=") {
} else if let Some(param) = arg.strip_prefix("-Zmiri-provenance-gc=") {
let interval = match param.parse::<u32>() {
Ok(i) => i,
Err(err) => show_error!("-Zmiri-tag-gc requires a `u32`: {}", err),
Err(err) => show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err),
};
miri_config.gc_interval = interval;
} else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
Expand Down
22 changes: 13 additions & 9 deletions src/tools/miri/src/borrow_tracker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ pub struct FrameState {
protected_tags: SmallVec<[(AllocId, BorTag); 2]>,
}

impl VisitTags for FrameState {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for FrameState {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// `protected_tags` are already recorded by `GlobalStateInner`.
}
}
Expand Down Expand Up @@ -110,10 +110,10 @@ pub struct GlobalStateInner {
unique_is_unique: bool,
}

impl VisitTags for GlobalStateInner {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for GlobalStateInner {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for &tag in self.protected_tags.keys() {
visit(tag);
visit(None, Some(tag));
}
// The only other candidate is base_ptr_tags, and that does not need visiting since we don't ever
// GC the bottommost/root tag.
Expand Down Expand Up @@ -236,6 +236,10 @@ impl GlobalStateInner {
tag
})
}

pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_, '_>) {
self.base_ptr_tags.retain(|id, _| allocs.is_live(*id));
}
}

/// Which borrow tracking method to use
Expand Down Expand Up @@ -503,11 +507,11 @@ impl AllocState {
}
}

impl VisitTags for AllocState {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for AllocState {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
match self {
AllocState::StackedBorrows(sb) => sb.visit_tags(visit),
AllocState::TreeBorrows(tb) => tb.visit_tags(visit),
AllocState::StackedBorrows(sb) => sb.visit_provenance(visit),
AllocState::TreeBorrows(tb) => tb.visit_provenance(visit),
}
}
}
6 changes: 3 additions & 3 deletions src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,10 +462,10 @@ impl Stacks {
}
}

impl VisitTags for Stacks {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Stacks {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for tag in self.exposed_tags.iter().copied() {
visit(tag);
visit(None, Some(tag));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ impl<'tcx> Tree {
/// Climb the tree to get the tag of a distant ancestor.
/// Allows operations on tags that are unreachable by the program
/// but still exist in the tree. Not guaranteed to perform consistently
/// if `tag-gc=1`.
/// if `provenance-gc=1`.
fn nth_parent(&self, tag: BorTag, nth_parent: u8) -> Option<BorTag> {
let mut idx = self.tag_mapping.get(&tag).unwrap();
for _ in 0..nth_parent {
Expand Down
6 changes: 3 additions & 3 deletions src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,11 +742,11 @@ impl Tree {
}
}

impl VisitTags for Tree {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Tree {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
// To ensure that the root never gets removed, we visit it
// (the `root` node of `Tree` is not an `Option<_>`)
visit(self.nodes.get(self.root).unwrap().tag)
visit(None, Some(self.nodes.get(self.root).unwrap().tag))
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/tools/miri/src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,9 +790,9 @@ pub struct VClockAlloc {
alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
}

impl VisitTags for VClockAlloc {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
// No tags here.
impl VisitProvenance for VClockAlloc {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// No tags or allocIds here.
}
}

Expand Down Expand Up @@ -1404,8 +1404,8 @@ pub struct GlobalState {
pub track_outdated_loads: bool,
}

impl VisitTags for GlobalState {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for GlobalState {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// We don't have any tags.
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/tools/miri/src/concurrency/init_once.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ pub(super) struct InitOnce<'mir, 'tcx> {
data_race: VClock,
}

impl<'mir, 'tcx> VisitTags for InitOnce<'mir, 'tcx> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl<'mir, 'tcx> VisitProvenance for InitOnce<'mir, 'tcx> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for waiter in self.waiters.iter() {
waiter.callback.visit_tags(visit);
waiter.callback.visit_provenance(visit);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/tools/miri/src/concurrency/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,10 @@ pub(crate) struct SynchronizationState<'mir, 'tcx> {
pub(super) init_onces: IndexVec<InitOnceId, InitOnce<'mir, 'tcx>>,
}

impl<'mir, 'tcx> VisitTags for SynchronizationState<'mir, 'tcx> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl<'mir, 'tcx> VisitProvenance for SynchronizationState<'mir, 'tcx> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for init_once in self.init_onces.iter() {
init_once.visit_tags(visit);
init_once.visit_provenance(visit);
}
}
}
Expand Down
38 changes: 19 additions & 19 deletions src/tools/miri/src/concurrency/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub enum TlsAllocAction {
}

/// Trait for callbacks that can be executed when some event happens, such as after a timeout.
pub trait MachineCallback<'mir, 'tcx>: VisitTags {
pub trait MachineCallback<'mir, 'tcx>: VisitProvenance {
fn call(&self, ecx: &mut InterpCx<'mir, 'tcx, MiriMachine<'mir, 'tcx>>) -> InterpResult<'tcx>;
}

Expand Down Expand Up @@ -228,8 +228,8 @@ impl<'mir, 'tcx> Thread<'mir, 'tcx> {
}
}

impl VisitTags for Thread<'_, '_> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Thread<'_, '_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Thread {
panic_payloads: panic_payload,
last_error,
Expand All @@ -242,17 +242,17 @@ impl VisitTags for Thread<'_, '_> {
} = self;

for payload in panic_payload {
payload.visit_tags(visit);
payload.visit_provenance(visit);
}
last_error.visit_tags(visit);
last_error.visit_provenance(visit);
for frame in stack {
frame.visit_tags(visit)
frame.visit_provenance(visit)
}
}
}

impl VisitTags for Frame<'_, '_, Provenance, FrameExtra<'_>> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Frame<'_, '_, Provenance, FrameExtra<'_>> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Frame {
return_place,
locals,
Expand All @@ -266,22 +266,22 @@ impl VisitTags for Frame<'_, '_, Provenance, FrameExtra<'_>> {
} = self;

// Return place.
return_place.visit_tags(visit);
return_place.visit_provenance(visit);
// Locals.
for local in locals.iter() {
match local.as_mplace_or_imm() {
None => {}
Some(Either::Left((ptr, meta))) => {
ptr.visit_tags(visit);
meta.visit_tags(visit);
ptr.visit_provenance(visit);
meta.visit_provenance(visit);
}
Some(Either::Right(imm)) => {
imm.visit_tags(visit);
imm.visit_provenance(visit);
}
}
}

extra.visit_tags(visit);
extra.visit_provenance(visit);
}
}

Expand Down Expand Up @@ -341,8 +341,8 @@ pub struct ThreadManager<'mir, 'tcx> {
timeout_callbacks: FxHashMap<ThreadId, TimeoutCallbackInfo<'mir, 'tcx>>,
}

impl VisitTags for ThreadManager<'_, '_> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for ThreadManager<'_, '_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let ThreadManager {
threads,
thread_local_alloc_ids,
Expand All @@ -353,15 +353,15 @@ impl VisitTags for ThreadManager<'_, '_> {
} = self;

for thread in threads {
thread.visit_tags(visit);
thread.visit_provenance(visit);
}
for ptr in thread_local_alloc_ids.borrow().values() {
ptr.visit_tags(visit);
ptr.visit_provenance(visit);
}
for callback in timeout_callbacks.values() {
callback.callback.visit_tags(visit);
callback.callback.visit_provenance(visit);
}
sync.visit_tags(visit);
sync.visit_provenance(visit);
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/tools/miri/src/concurrency/weak_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ pub struct StoreBufferAlloc {
store_buffers: RefCell<RangeObjectMap<StoreBuffer>>,
}

impl VisitTags for StoreBufferAlloc {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for StoreBufferAlloc {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Self { store_buffers } = self;
for val in store_buffers
.borrow()
.iter()
.flat_map(|buf| buf.buffer.iter().map(|element| &element.val))
{
val.visit_tags(visit);
val.visit_provenance(visit);
}
}
}
Expand Down
Loading

0 comments on commit cbadb2e

Please sign in to comment.