Skip to content

Commit

Permalink
Auto merge of #119977 - Mark-Simulacrum:defid-cache, r=cjgillot
Browse files Browse the repository at this point in the history
Cache local DefId-keyed queries without hashing

This caches local DefId-keyed queries using just an IndexVec. This costs ~5% extra max-rss at most but brings significant runtime improvement, up to 13% cycle counts (mean: 4%) on primary benchmarks. It's possible that further tweaks could reduce the memory overhead further but this win seems worth landing despite the increased memory, particularly with regards to eliminating the present set in non-incr or storing it inline (skip list?) with the main data.

We tried applying this scheme to all keys in the [first perf run] but found that it carried a significant memory hit (50%). instructions/cycle counts were also much more mixed, though that may have been due to the lack of the present set optimization (needed for fast iter() calls in incremental scenarios).

Closes #45275

[first perf run]: https://perf.rust-lang.org/compare.html?start=30dfb9e046aeb878db04332c74de76e52fb7db10&end=6235575300d8e6e2cc6f449cb9048722ef43f9c7&stat=instructions:u
  • Loading branch information
bors committed Jan 16, 2024
2 parents 92f2e0a + 3784964 commit 098d4fd
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 3 deletions.
3 changes: 2 additions & 1 deletion compiler/rustc_middle/src/query/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::ty::{self, Ty, TyCtxt};
use crate::ty::{GenericArg, GenericArgsRef};
use rustc_hir::def_id::{CrateNum, DefId, LocalDefId, LocalModDefId, ModDefId, LOCAL_CRATE};
use rustc_hir::hir_id::{HirId, OwnerId};
use rustc_query_system::query::DefIdCacheSelector;
use rustc_query_system::query::{DefaultCacheSelector, SingleCacheSelector, VecCacheSelector};
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
Expand Down Expand Up @@ -152,7 +153,7 @@ impl Key for LocalDefId {
}

impl Key for DefId {
type CacheSelector = DefaultCacheSelector<Self>;
type CacheSelector = DefIdCacheSelector;

fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
tcx.def_span(*self)
Expand Down
79 changes: 78 additions & 1 deletion compiler/rustc_query_system/src/query/caches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ use crate::dep_graph::DepNodeIndex;

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sharded::{self, Sharded};
use rustc_data_structures::sync::OnceLock;
use rustc_data_structures::sync::{Lock, OnceLock};
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_index::{Idx, IndexVec};
use rustc_span::def_id::DefId;
use rustc_span::def_id::DefIndex;
use std::fmt::Debug;
use std::hash::Hash;
use std::marker::PhantomData;
Expand Down Expand Up @@ -148,6 +151,8 @@ where

#[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
// FIXME: lock_shard_by_hash will use high bits which are usually zero in the index() passed
// here. This makes sharding essentially useless, always selecting the zero'th shard.
let lock = self.cache.lock_shard_by_hash(key.index() as u64);
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
}
Expand All @@ -168,3 +173,75 @@ where
}
}
}

pub struct DefIdCacheSelector;

impl<'tcx, V: 'tcx> CacheSelector<'tcx, V> for DefIdCacheSelector {
type Cache = DefIdCache<V>
where
V: Copy;
}

pub struct DefIdCache<V> {
/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
///
/// The second element of the tuple is the set of keys actually present in the IndexVec, used
/// for faster iteration in `iter()`.
// FIXME: This may want to be sharded, like VecCache. However *how* to shard an IndexVec isn't
// super clear; VecCache is effectively not sharded today (see FIXME there). For now just omit
// that complexity here.
local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec<DefIndex>)>,
foreign: DefaultCache<DefId, V>,
}

impl<V> Default for DefIdCache<V> {
fn default() -> Self {
DefIdCache { local: Default::default(), foreign: Default::default() }
}
}

impl<V> QueryCache for DefIdCache<V>
where
V: Copy,
{
type Key = DefId;
type Value = V;

#[inline(always)]
fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
if key.krate == LOCAL_CRATE {
let cache = self.local.lock();
cache.0.get(key.index).and_then(|v| *v)
} else {
self.foreign.lookup(key)
}
}

#[inline]
fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
if key.krate == LOCAL_CRATE {
let mut cache = self.local.lock();
let (cache, present) = &mut *cache;
let slot = cache.ensure_contains_elem(key.index, Default::default);
if slot.is_none() {
// FIXME: Only store the present set when running in incremental mode. `iter` is not
// used outside of saving caches to disk and self-profile.
present.push(key.index);
}
*slot = Some((value, index));
} else {
self.foreign.complete(key, value, index)
}
}

fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
let guard = self.local.lock();
let (cache, present) = &*guard;
for &idx in present.iter() {
let value = cache[idx].unwrap();
f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
}
self.foreign.iter(f);
}
}
3 changes: 2 additions & 1 deletion compiler/rustc_query_system/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ pub use self::job::{

mod caches;
pub use self::caches::{
CacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector, VecCacheSelector,
CacheSelector, DefIdCacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector,
VecCacheSelector,
};

mod config;
Expand Down

0 comments on commit 098d4fd

Please sign in to comment.