Skip to content

Commit

Permalink
TypeId: use a (v0) mangled type to remain sound in the face of hash c…
Browse files Browse the repository at this point in the history
…ollisions.
  • Loading branch information
eddyb committed Apr 9, 2022
1 parent bb34360 commit 48bed67
Show file tree
Hide file tree
Showing 14 changed files with 305 additions and 47 deletions.
93 changes: 87 additions & 6 deletions compiler/rustc_const_eval/src/const_eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

use std::convert::TryFrom;

use rustc_hir::lang_items::LangItem;
use rustc_hir::Mutability;
use rustc_middle::ty::{self, TyCtxt};
use rustc_middle::{
mir::{self, interpret::ConstAlloc},
ty::ScalarInt,
};
use rustc_middle::mir::{self, interpret::ConstAlloc};
use rustc_middle::ty::layout::LayoutOf;
use rustc_middle::ty::{self, ScalarInt, Ty, TyCtxt};
use rustc_span::{source_map::DUMMY_SP, symbol::Symbol};
use rustc_target::abi::Size;

use crate::interpret::{
intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy,
self, intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy,
MemPlaceMeta, Scalar,
};

Expand Down Expand Up @@ -39,6 +39,87 @@ pub(crate) fn const_caller_location(
ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
}

pub(crate) fn const_type_id<'tcx>(
tcx: TyCtxt<'tcx>,
param_env: ty::ParamEnv<'tcx>,
ty: Ty<'tcx>,
) -> ConstValue<'tcx> {
trace!("const_type_id: {}", ty);

// Compute (logical) `TypeId` field values, before trying to encode them.
let hash = tcx.type_id_hash(ty);
let mangling = tcx.type_id_mangling(param_env.and(ty)).name;

let mut ecx = mk_eval_cx(tcx, DUMMY_SP, param_env, false);

let type_id_ty = tcx.type_of(tcx.require_lang_item(LangItem::TypeId, None));
let type_id_layout = ecx.layout_of(type_id_ty).unwrap();

// Encode `TypeId` field values, before putting together the allocation.
let hash_val = Scalar::from_u64(hash);
let mangling_val = {
let mangling_len = u64::try_from(mangling.len()).unwrap();
let mangling_len_val = Scalar::from_machine_usize(mangling_len, &ecx);

// The field is `mangling: &TypeManglingStr`, get `TypeManglingStr` from it.
let mangling_field_ty = type_id_layout.field(&ecx, 1).ty;
let type_mangling_str_ty = mangling_field_ty.builtin_deref(true).unwrap().ty;

// Allocate memory for `TypeManglingStr` struct.
let type_mangling_str_layout = ecx.layout_of(type_mangling_str_ty).unwrap();
let type_mangling_str_place = {
// NOTE(eddyb) this similar to the `ecx.allocate(...)` used below
// for `type_id_place`, except with an additional size for the
// string bytes (`mangling`) being added to the `TypeManglingStr`
// (which is unsized, using an `extern { type }` tail).
let layout = type_mangling_str_layout;
let size = layout.size + Size::from_bytes(mangling_len);
let ptr = ecx
.allocate_ptr(size, layout.align.abi, interpret::MemoryKind::IntrinsicGlobal)
.unwrap();
MPlaceTy::from_aligned_ptr(ptr.into(), layout)
};

// Initialize `TypeManglingStr` fields.
ecx.write_scalar(
mangling_len_val,
&ecx.mplace_field(&type_mangling_str_place, 0).unwrap().into(),
)
.unwrap();
ecx.write_bytes_ptr(
ecx.mplace_field(&type_mangling_str_place, 1).unwrap().ptr,
mangling.bytes(),
)
.unwrap();

// `&TypeManglingStr` has no metadata, thanks to the length being stored
// behind the reference (in the first field of `TypeManglingStr`).
type_mangling_str_place.to_ref(&ecx).to_scalar().unwrap()
};

// FIXME(eddyb) everything below would be unnecessary if `ConstValue` could
// hold a pair of `Scalar`s, or if we moved to valtrees.

// Allocate memory for `TypeId` struct.
let type_id_place =
ecx.allocate(type_id_layout, interpret::MemoryKind::IntrinsicGlobal).unwrap();

// Initialize `TypeId` fields.
ecx.write_scalar(hash_val, &ecx.mplace_field(&type_id_place, 0).unwrap().into()).unwrap();
ecx.write_scalar(mangling_val, &ecx.mplace_field(&type_id_place, 1).unwrap().into()).unwrap();

// Convert the `TypeId` allocation from being in `ecx`, to a global `ConstValue`.
if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &type_id_place).is_err() {
bug!("intern_const_alloc_recursive should not error in this case")
}
let (type_id_alloc_id, type_id_offset) =
type_id_place.ptr.into_pointer_or_addr().unwrap().into_parts();
ConstValue::ByRef {
alloc: tcx.global_alloc(type_id_alloc_id).unwrap_memory(),
offset: type_id_offset,
}
}

/// Convert an evaluated constant to a type level constant
pub(crate) fn const_to_valtree<'tcx>(
tcx: TyCtxt<'tcx>,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/intern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fn intern_shallow<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval:
match kind {
MemoryKind::Stack
| MemoryKind::Machine(const_eval::MemoryKind::Heap)
| MemoryKind::CallerLocation => {}
| MemoryKind::IntrinsicGlobal => {}
}
// Set allocation mutability as appropriate. This is used by LLVM to put things into
// read-only memory, and also by Miri when evaluating other globals that
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ crate fn eval_nullary_intrinsic<'tcx>(
}
sym::type_id => {
ensure_monomorphic_enough(tcx, tp_ty)?;
ConstValue::from_u64(tcx.type_id_hash(tp_ty))
crate::const_eval::const_type_id(tcx, param_env, tp_ty)
}
sym::variant_count => match tp_ty.kind() {
// Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,25 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
) -> MPlaceTy<'tcx, M::PointerTag> {
let loc_details = &self.tcx.sess.opts.debugging_opts.location_detail;
let file = if loc_details.file {
self.allocate_str(filename.as_str(), MemoryKind::CallerLocation, Mutability::Not)
self.allocate_str(filename.as_str(), MemoryKind::IntrinsicGlobal, Mutability::Not)
} else {
// FIXME: This creates a new allocation each time. It might be preferable to
// perform this allocation only once, and re-use the `MPlaceTy`.
// See https://github.com/rust-lang/rust/pull/89920#discussion_r730012398
self.allocate_str("<redacted>", MemoryKind::CallerLocation, Mutability::Not)
self.allocate_str("<redacted>", MemoryKind::IntrinsicGlobal, Mutability::Not)
};
let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) };
let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) };

// Allocate memory for `CallerLocation` struct.
// Allocate memory for `panic::Location` struct.
let loc_ty = self
.tcx
.type_of(self.tcx.require_lang_item(LangItem::PanicLocation, None))
.subst(*self.tcx, self.tcx.mk_substs([self.tcx.lifetimes.re_erased.into()].iter()));
let loc_layout = self.layout_of(loc_ty).unwrap();
// This can fail if rustc runs out of memory right here. Trying to emit an error would be
// pointless, since that would require allocating more memory than a Location.
let location = self.allocate(loc_layout, MemoryKind::CallerLocation).unwrap();
let location = self.allocate(loc_layout, MemoryKind::IntrinsicGlobal).unwrap();

// Initialize fields.
self.write_immediate(file.to_ref(self), &self.mplace_field(&location, 0).unwrap().into())
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ use super::{
pub enum MemoryKind<T> {
/// Stack memory. Error if deallocated except during a stack pop.
Stack,
/// Memory allocated by `caller_location` intrinsic. Error if ever deallocated.
CallerLocation,
/// Global memory allocated by an intrinsic. Error if ever deallocated.
IntrinsicGlobal,
/// Additional memory kinds a machine wishes to distinguish from the builtin ones.
Machine(T),
}
Expand All @@ -40,7 +40,7 @@ impl<T: MayLeak> MayLeak for MemoryKind<T> {
fn may_leak(self) -> bool {
match self {
MemoryKind::Stack => false,
MemoryKind::CallerLocation => true,
MemoryKind::IntrinsicGlobal => true,
MemoryKind::Machine(k) => k.may_leak(),
}
}
Expand All @@ -50,7 +50,7 @@ impl<T: fmt::Display> fmt::Display for MemoryKind<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MemoryKind::Stack => write!(f, "stack variable"),
MemoryKind::CallerLocation => write!(f, "caller location"),
MemoryKind::IntrinsicGlobal => write!(f, "global memory (from intrinsic)"),
MemoryKind::Machine(m) => write!(f, "{}", m),
}
}
Expand Down
50 changes: 47 additions & 3 deletions compiler/rustc_middle/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -995,14 +995,58 @@ rustc_queries! {
desc { |tcx| "generating MIR shim for `{}`", tcx.def_path_str(key.def_id()) }
}

/// The `symbol_name` query provides the symbol name for calling a
/// given instance from the local crate. In particular, it will also
/// look up the correct symbol name of instances from upstream crates.
/// The `symbol_name` query provides the symbol name for the given instance.
///
/// Both `static` and `fn` instances have symbol names, whether definitions
/// (on the Rust side, either from the local crate or an upstream one), or
/// imports in a "foreign block" (`extern {...}`).
///
/// This symbol name is the canonical one for that instance, and must be
/// used for both linker-level exports (definitions) and imports (uses),
/// of that instance (i.e. it's the sole connection the linker sees).
///
/// By default, Rust definitions have mangled symbols, to avoid conflicts,
/// and to allow for many instances ("monomorphizations") of generic `fn`s.
/// The exact choice of mangling can vary, and not all type information from
/// the instance may always be present in a form that allows demangling back
/// to a human-readable form. See also the `symbol_mangling_version` query
/// and the `rustc_symbol_mangling` crate.
///
/// Note however that `fn` lifetime parameters are erased (and so they never
/// participate in monomorphization), meaning mangled Rust symbol names will
/// never contain information about such lifetimes (mangled lifetimes only
/// occur for higher-ranked types, e.g. `foo::<for<'a> fn(&'a X)>`).
query symbol_name(key: ty::Instance<'tcx>) -> ty::SymbolName<'tcx> {
desc { "computing the symbol for `{}`", key }
cache_on_disk_if { true }
}

/// The `type_id_mangling` query provides the Rust mangling of the given type,
/// for use in `TypeId`, as a guard against `type_id_hash` collisions.
///
/// Unlike the `symbol_name` query, the mangling used for types doesn't vary
/// between crates, and encodes all the type information "structurally"
/// (i.e. lossy encodings such as hashing aren't allowed, as that would
/// effectively defeat the purpose of guarding against hash collisions).
///
/// If this is used outside of `TypeId`, some additional caveats apply:
/// * it's not a full symbol, so it could collide with unrelated exports,
/// if used directly as a linker symbol without a prefix and/or suffix
/// * mangling features such as compression (e.g. `v0` backrefs) mean that
/// it cannot be trivially embedded in a larger mangled Rust symbol - for
/// that usecase, prefer using `symbol_name` with an instance of a either
/// a custom `InstanceDef`, or at least a generic lang item (`fn`, though
/// associated `const` may work better for a type-dependent `static`)
/// * every Rust mangling erases most lifetimes, with the only exception
/// being those found in higher-ranked types (e.g. `for<'a> fn(&'a X)`)
//
// FIXME(eddyb) this shouldn't be using `ty::SymbolName`, but `&'tcx str`,
// or `ty::SymbolName` should be renamed to "tcx-interned string".
query type_id_mangling(key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>) -> ty::SymbolName<'tcx> {
desc { "computing the type mangling of `{}`", key.value }
cache_on_disk_if { true }
}

query opt_def_kind(def_id: DefId) -> Option<DefKind> {
desc { |tcx| "looking up definition kind of `{}`", tcx.def_path_str(def_id) }
separate_provide_extern
Expand Down
15 changes: 10 additions & 5 deletions compiler/rustc_symbol_mangling/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,10 @@ pub fn symbol_name_for_instance_in_crate<'tcx>(
}

pub fn provide(providers: &mut Providers) {
*providers = Providers { symbol_name: symbol_name_provider, ..*providers };
*providers = Providers { symbol_name, type_id_mangling, ..*providers };
}

// The `symbol_name` query provides the symbol name for calling a given
// instance from the local crate. In particular, it will also look up the
// correct symbol name of instances from upstream crates.
fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> {
fn symbol_name<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> {
let symbol_name = compute_symbol_name(tcx, instance, || {
// This closure determines the instantiating crate for instances that
// need an instantiating-crate-suffix for their symbol name, in order
Expand All @@ -150,6 +147,14 @@ fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty
ty::SymbolName::new(tcx, &symbol_name)
}

fn type_id_mangling<'tcx>(
tcx: TyCtxt<'tcx>,
query: ty::ParamEnvAnd<'tcx, Ty<'tcx>>,
) -> ty::SymbolName<'tcx> {
let (param_env, ty) = query.into_parts();
ty::SymbolName::new(tcx, &v0::mangle_type(tcx, param_env, ty))
}

/// This function computes the LLVM CFI typeid for the given `FnAbi`.
pub fn llvm_cfi_typeid_for_fn_abi<'tcx>(
_tcx: TyCtxt<'tcx>,
Expand Down
40 changes: 30 additions & 10 deletions compiler/rustc_symbol_mangling/src/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,11 @@ pub(super) fn mangle<'tcx>(
// FIXME(eddyb) this should ideally not be needed.
let substs = tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), instance.substs);

let prefix = "_R";
let mut cx = &mut SymbolMangler {
tcx,
start_offset: prefix.len(),
paths: FxHashMap::default(),
types: FxHashMap::default(),
consts: FxHashMap::default(),
binders: vec![],
out: String::from(prefix),
};
let mut cx = &mut SymbolMangler::new(tcx);

// The `_R` prefix indicates a Rust mangled symbol.
cx.push("_R");
cx.start_offset = cx.out.len();

// Append `::{shim:...#0}` to shims that can coexist with a non-shim instance.
let shim_kind = match instance.def {
Expand All @@ -57,6 +52,19 @@ pub(super) fn mangle<'tcx>(
std::mem::take(&mut cx.out)
}

pub(super) fn mangle_type<'tcx>(
tcx: TyCtxt<'tcx>,
param_env: ty::ParamEnv<'tcx>,
ty: Ty<'tcx>,
) -> String {
let param_env = param_env.with_reveal_all_normalized(tcx);
let ty = tcx.normalize_erasing_regions(param_env, ty);

let mut cx = SymbolMangler::new(tcx);
cx.print_type(ty).unwrap();
cx.out
}

struct BinderLevel {
/// The range of distances from the root of what's
/// being printed, to the lifetimes in a binder.
Expand Down Expand Up @@ -85,6 +93,18 @@ struct SymbolMangler<'tcx> {
}

impl<'tcx> SymbolMangler<'tcx> {
fn new(tcx: TyCtxt<'tcx>) -> Self {
Self {
tcx,
start_offset: 0,
paths: FxHashMap::default(),
types: FxHashMap::default(),
consts: FxHashMap::default(),
binders: vec![],
out: String::new(),
}
}

fn push(&mut self, s: &str) {
self.out.push_str(s);
}
Expand Down
Loading

0 comments on commit 48bed67

Please sign in to comment.