Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code coverage foundation for hash and num_counters #73488

Merged
merged 10 commits into from
Jun 24, 2020
32 changes: 11 additions & 21 deletions src/librustc_codegen_llvm/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use rustc_middle::ty::layout::{FnAbiExt, HasTyCtxt};
use rustc_middle::ty::{self, Ty};
use rustc_middle::{bug, span_bug};
use rustc_span::Span;
use rustc_span::Symbol;
use rustc_target::abi::{self, HasDataLayout, LayoutOf, Primitive};
use rustc_target::spec::PanicStrategy;

Expand Down Expand Up @@ -141,26 +140,17 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
self.call(llfn, &[], None)
}
"count_code_region" => {
if let ty::InstanceDef::Item(fn_def_id) = caller_instance.def {
let caller_fn_path = tcx.def_path_str(fn_def_id);
debug!(
"count_code_region to llvm.instrprof.increment(fn_name={})",
caller_fn_path
);

// FIXME(richkadel): (1) Replace raw function name with mangled function name;
// (2) Replace hardcoded `1234` in `hash` with a computed hash (as discussed in)
// the MCP (compiler-team/issues/278); and replace the hardcoded `1` for
// `num_counters` with the actual number of counters per function (when the
// changes are made to inject more than one counter per function).
let (fn_name, _len_val) = self.const_str(Symbol::intern(&caller_fn_path));
let index = args[0].immediate();
let hash = self.const_u64(1234);
let num_counters = self.const_u32(1);
self.instrprof_increment(fn_name, hash, num_counters, index)
} else {
bug!("intrinsic count_code_region: no src.instance");
}
let coverage_data = tcx.coverage_data(caller_instance.def_id());
let mangled_fn = tcx.symbol_name(caller_instance);
let (mangled_fn_name, _len_val) = self.const_str(mangled_fn.name);
let hash = self.const_u64(coverage_data.hash);
let num_counters = self.const_u32(coverage_data.num_counters);
let index = args[0].immediate();
debug!(
"count_code_region to LLVM intrinsic instrprof.increment(fn_name={}, hash={:?}, num_counters={:?}, index={:?})",
mangled_fn.name, hash, num_counters, index
);
self.instrprof_increment(mangled_fn_name, hash, num_counters, index)
}
"va_start" => self.va_start(args[0].immediate()),
"va_end" => self.va_end(args[0].immediate()),
Expand Down
2 changes: 2 additions & 0 deletions src/librustc_metadata/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ impl<'a> CrateLocator<'a> {
&& self.triple != TargetTriple::from_triple(config::host_triple())
{
err.note(&format!("the `{}` target may not be installed", self.triple));
} else if self.crate_name == sym::profiler_builtins {
err.note(&"the compiler may have been built without the profiler runtime");
}
err.span_label(self.span, "can't find crate");
err
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_middle/hir/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ fn fn_sig<'hir>(node: Node<'hir>) -> Option<&'hir FnSig<'hir>> {
}
}

fn associated_body<'hir>(node: Node<'hir>) -> Option<BodyId> {
pub fn associated_body<'hir>(node: Node<'hir>) -> Option<BodyId> {
match node {
Node::Item(Item {
kind: ItemKind::Const(_, body) | ItemKind::Static(.., body) | ItemKind::Fn(.., body),
Expand Down
33 changes: 31 additions & 2 deletions src/librustc_middle/ich/hcx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,15 @@ impl<'a> StableHashingContext<'a> {
/// Don't use it for anything else or you'll run the risk of
/// leaking data out of the tracking system.
#[inline]
pub fn new(
fn new_with_or_without_spans(
sess: &'a Session,
krate: &'a hir::Crate<'a>,
definitions: &'a Definitions,
cstore: &'a dyn CrateStore,
always_ignore_spans: bool,
) -> Self {
let hash_spans_initial = !sess.opts.debugging_opts.incremental_ignore_spans;
let hash_spans_initial =
!always_ignore_spans && !sess.opts.debugging_opts.incremental_ignore_spans;

StableHashingContext {
sess,
Expand All @@ -88,6 +90,33 @@ impl<'a> StableHashingContext<'a> {
}
}

#[inline]
pub fn new(
sess: &'a Session,
krate: &'a hir::Crate<'a>,
definitions: &'a Definitions,
cstore: &'a dyn CrateStore,
) -> Self {
Self::new_with_or_without_spans(
sess,
krate,
definitions,
cstore,
/*always_ignore_spans=*/ false,
)
}

#[inline]
pub fn ignore_spans(
sess: &'a Session,
krate: &'a hir::Crate<'a>,
definitions: &'a Definitions,
cstore: &'a dyn CrateStore,
) -> Self {
let always_ignore_spans = true;
Self::new_with_or_without_spans(sess, krate, definitions, cstore, always_ignore_spans)
}

#[inline]
pub fn sess(&self) -> &'a Session {
self.sess
Expand Down
19 changes: 17 additions & 2 deletions src/librustc_middle/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ pub struct Body<'tcx> {
/// The user may be writing e.g. `&[(SOME_CELL, 42)][i].1` and this would get promoted, because
/// we'd statically know that no thing with interior mutability will ever be available to the
/// user without some serious unsafe code. Now this means that our promoted is actually
/// `&[(SOME_CELL, 42)]` and the MIR using it will do the `&promoted[i].1` projection because the
/// index may be a runtime value. Such a promoted value is illegal because it has reachable
/// `&[(SOME_CELL, 42)]` and the MIR using it will do the `&promoted[i].1` projection because
/// the index may be a runtime value. Such a promoted value is illegal because it has reachable
/// interior mutability. This flag just makes this situation very obvious where the previous
/// implementation without the flag hid this situation silently.
/// FIXME(oli-obk): rewrite the promoted during promotion to eliminate the cell components.
Expand Down Expand Up @@ -2919,3 +2919,18 @@ impl Location {
}
}
}

/// Coverage data associated with each function (MIR) instrumented with coverage counters, when
/// compiled with `-Zinstrument_coverage`. The query `tcx.coverage_data(DefId)` computes these
/// values on demand (during code generation). This query is only valid after executing the MIR pass
/// `InstrumentCoverage`.
#[derive(Clone, RustcEncodable, RustcDecodable, Debug, HashStable)]
pub struct CoverageData {
/// A hash value that can be used by the consumer of the coverage profile data to detect
/// changes to the instrumented source of the associated MIR body (typically, for an
/// individual function).
pub hash: u64,

/// The total number of coverage region counters added to the MIR `Body`.
pub num_counters: u32,
}
6 changes: 6 additions & 0 deletions src/librustc_middle/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ rustc_queries! {
cache_on_disk_if { key.is_local() }
}

query coverage_data(key: DefId) -> mir::CoverageData {
desc { |tcx| "retrieving coverage data from MIR for `{}`", tcx.def_path_str(key) }
storage(ArenaCacheSelector<'tcx>)
cache_on_disk_if { key.is_local() }
}

query promoted_mir(key: DefId) -> IndexVec<mir::Promoted, mir::Body<'tcx>> {
desc { |tcx| "optimizing promoted MIR for `{}`", tcx.def_path_str(key) }
storage(ArenaCacheSelector<'tcx>)
Expand Down
7 changes: 7 additions & 0 deletions src/librustc_middle/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,13 @@ impl<'tcx> TyCtxt<'tcx> {
StableHashingContext::new(self.sess, krate, self.definitions, &*self.cstore)
}

#[inline(always)]
pub fn create_no_span_stable_hashing_context(self) -> StableHashingContext<'tcx> {
let krate = self.gcx.untracked_crate;

StableHashingContext::ignore_spans(self.sess, krate, self.definitions, &*self.cstore)
}

// This method makes sure that we have a DepNode and a Fingerprint for
// every upstream crate. It needs to be called once right after the tcx is
// created.
Expand Down
191 changes: 136 additions & 55 deletions src/librustc_mir/transform/instrument_coverage.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
use crate::transform::{MirPass, MirSource};
use crate::util::patch::MirPatch;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_hir::lang_items;
use rustc_middle::hir;
use rustc_middle::ich::StableHashingContext;
use rustc_middle::mir::interpret::Scalar;
use rustc_middle::mir::*;
use rustc_middle::mir::{
self, traversal, BasicBlock, BasicBlockData, CoverageData, Operand, Place, SourceInfo,
StatementKind, Terminator, TerminatorKind, START_BLOCK,
};
use rustc_middle::ty;
use rustc_middle::ty::query::Providers;
use rustc_middle::ty::FnDef;
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
use rustc_span::Span;
Expand All @@ -12,64 +21,119 @@ use rustc_span::Span;
/// the intrinsic llvm.instrprof.increment.
pub struct InstrumentCoverage;

/// The `query` provider for `CoverageData`, requested by `codegen_intrinsic_call()` when
/// constructing the arguments for `llvm.instrprof.increment`.
pub(crate) fn provide(providers: &mut Providers<'_>) {
providers.coverage_data = |tcx, def_id| {
let mir_body = tcx.optimized_mir(def_id);
let count_code_region_fn =
tcx.require_lang_item(lang_items::CountCodeRegionFnLangItem, None);
let mut num_counters: u32 = 0;
for (_, data) in traversal::preorder(mir_body) {
if let Some(terminator) = &data.terminator {
if let TerminatorKind::Call { func: Operand::Constant(func), .. } = &terminator.kind
{
if let FnDef(called_fn_def_id, _) = func.literal.ty.kind {
if called_fn_def_id == count_code_region_fn {
num_counters += 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It occurs to me that a MIR optimization could (a) duplicate a counter, or (b) inline counters from other functions. How should we deal with cases like that?

For (a) we could keep a set of counter id's we've already seen to deduplicate.

For handling (b), I'm assuming we'll want the name of the function where the counter originated in source? We could pass a def_id of the source function as part of the "intrinsic". But then how would we keep track of monomorphizations? We can always address this point in a follow-up PR, but it's worth thinking about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For (a), I'll change the query implementation to return the max(counters) + 1. This fits well with the LLVM spec for instrprof.increment.

But (b) raises some interesting questions. Assuming it is possible for the MIR to include BasicBlocks from multiple "functions" in the source (which sounds logical), then I'm thinking the same thing you suggested; I'll probably need to add the DefId of the function in the injected Call terminator, rather than assuming the code region to be counted is part of the function represented by the MIR's DefId.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I addressed (a) in the latest commit.

For (b) I added FIXME comments for now. I'd like to address this in a separate PR, but will start looking into it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I just remembered, we did talk about this (effect of inlining, for example) and my recommendation at the time is still something I'd like to propose: Disable inlining, and any other optimizations that might "break" code coverage, if the instrument_coverage option is turned on.

I'm not sure if we can address all of the edge cases like (b) with this, but I think it's plausible.

WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can disable inlining in that case, sure (it's still not enabled by default). It seems like we might want a solution for this long-term, but it doesn't seem like a super high priority.

}
}
}
}
}
let hash = if num_counters > 0 { hash_mir_source(tcx, def_id) } else { 0 };
CoverageData { num_counters, hash }
};
}

struct Instrumentor<'tcx> {
tcx: TyCtxt<'tcx>,
num_counters: u32,
}

impl<'tcx> MirPass<'tcx> for InstrumentCoverage {
fn run_pass(&self, tcx: TyCtxt<'tcx>, src: MirSource<'tcx>, body: &mut Body<'tcx>) {
fn run_pass(&self, tcx: TyCtxt<'tcx>, src: MirSource<'tcx>, mir_body: &mut mir::Body<'tcx>) {
if tcx.sess.opts.debugging_opts.instrument_coverage {
debug!("instrumenting {:?}", src.def_id());
instrument_coverage(tcx, body);
// If the InstrumentCoverage pass is called on promoted MIRs, skip them.
// See: https://github.com/rust-lang/rust/pull/73011#discussion_r438317601
if src.promoted.is_none() {
debug!(
"instrumenting {:?}, span: {}",
src.def_id(),
tcx.sess.source_map().span_to_string(mir_body.span)
);
Instrumentor::new(tcx).inject_counters(mir_body);
}
}
}
}

// The first counter (start of the function) is index zero.
const INIT_FUNCTION_COUNTER: u32 = 0;

/// Injects calls to placeholder function `count_code_region()`.
// FIXME(richkadel): As a first step, counters are only injected at the top of each function.
// The complete solution will inject counters at each conditional code branch.
pub fn instrument_coverage<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
let span = body.span.shrink_to_lo();

let count_code_region_fn = function_handle(
tcx,
tcx.require_lang_item(lang_items::CountCodeRegionFnLangItem, None),
span,
);
let counter_index = Operand::const_from_scalar(
tcx,
tcx.types.u32,
Scalar::from_u32(INIT_FUNCTION_COUNTER),
span,
);

let mut patch = MirPatch::new(body);

let new_block = patch.new_block(placeholder_block(SourceInfo::outermost(body.span)));
let next_block = START_BLOCK;

let temp = patch.new_temp(tcx.mk_unit(), body.span);
patch.patch_terminator(
new_block,
TerminatorKind::Call {
func: count_code_region_fn,
args: vec![counter_index],
// new_block will swapped with the next_block, after applying patch
destination: Some((Place::from(temp), new_block)),
cleanup: None,
from_hir_call: false,
fn_span: span,
},
);

patch.add_statement(new_block.start_location(), StatementKind::StorageLive(temp));
patch.add_statement(next_block.start_location(), StatementKind::StorageDead(temp));

patch.apply(body);

// To insert the `new_block` in front of the first block in the counted branch (for example,
// the START_BLOCK, at the top of the function), just swap the indexes, leaving the rest of the
// graph unchanged.
body.basic_blocks_mut().swap(next_block, new_block);
impl<'tcx> Instrumentor<'tcx> {
fn new(tcx: TyCtxt<'tcx>) -> Self {
Self { tcx, num_counters: 0 }
}

fn next_counter(&mut self) -> u32 {
let next = self.num_counters;
self.num_counters += 1;
next
}

fn inject_counters(&mut self, mir_body: &mut mir::Body<'tcx>) {
// FIXME(richkadel): As a first step, counters are only injected at the top of each
// function. The complete solution will inject counters at each conditional code branch.
let top_of_function = START_BLOCK;
let entire_function = mir_body.span;

self.inject_counter(mir_body, top_of_function, entire_function);
}

fn inject_counter(
&mut self,
mir_body: &mut mir::Body<'tcx>,
next_block: BasicBlock,
code_region: Span,
) {
let injection_point = code_region.shrink_to_lo();

let count_code_region_fn = function_handle(
self.tcx,
self.tcx.require_lang_item(lang_items::CountCodeRegionFnLangItem, None),
injection_point,
);
let counter_index = Operand::const_from_scalar(
self.tcx,
self.tcx.types.u32,
Scalar::from_u32(self.next_counter()),
injection_point,
);

let mut patch = MirPatch::new(mir_body);

let temp = patch.new_temp(self.tcx.mk_unit(), code_region);
let new_block = patch.new_block(placeholder_block(code_region));
patch.patch_terminator(
new_block,
TerminatorKind::Call {
func: count_code_region_fn,
args: vec![counter_index],
// new_block will swapped with the next_block, after applying patch
destination: Some((Place::from(temp), new_block)),
cleanup: None,
from_hir_call: false,
fn_span: injection_point,
},
);

patch.add_statement(new_block.start_location(), StatementKind::StorageLive(temp));
patch.add_statement(next_block.start_location(), StatementKind::StorageDead(temp));

patch.apply(mir_body);

// To insert the `new_block` in front of the first block in the counted branch (the
// `next_block`), just swap the indexes, leaving the rest of the graph unchanged.
mir_body.basic_blocks_mut().swap(next_block, new_block);
}
}

fn function_handle<'tcx>(tcx: TyCtxt<'tcx>, fn_def_id: DefId, span: Span) -> Operand<'tcx> {
Expand All @@ -79,14 +143,31 @@ fn function_handle<'tcx>(tcx: TyCtxt<'tcx>, fn_def_id: DefId, span: Span) -> Ope
Operand::function_handle(tcx, fn_def_id, substs, span)
}

fn placeholder_block<'tcx>(source_info: SourceInfo) -> BasicBlockData<'tcx> {
fn placeholder_block(span: Span) -> BasicBlockData<'tcx> {
BasicBlockData {
statements: vec![],
terminator: Some(Terminator {
source_info,
source_info: SourceInfo::outermost(span),
// this gets overwritten by the counter Call
kind: TerminatorKind::Unreachable,
}),
is_cleanup: false,
}
}

fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> u64 {
let hir_node = tcx.hir().get_if_local(def_id).expect("DefId is local");
let fn_body_id = hir::map::associated_body(hir_node).expect("HIR node is a function with body");
let hir_body = tcx.hir().body(fn_body_id);
let mut hcx = tcx.create_no_span_stable_hashing_context();
hash(&mut hcx, &hir_body.value).to_smaller_hash()
}

fn hash(
hcx: &mut StableHashingContext<'tcx>,
node: &impl HashStable<StableHashingContext<'tcx>>,
) -> Fingerprint {
let mut stable_hasher = StableHasher::new();
node.hash_stable(hcx, &mut stable_hasher);
stable_hasher.finish()
}
Loading