Skip to content

Commit

Permalink
perf: improve LocalTraceIdentifier performance (#7118)
Browse files Browse the repository at this point in the history
* perf: improve LocalTraceIdentifier performance

Implementation modified from #7105,
see that PR for more information.

Co-authored-by: Arsenii Kulikov <[email protected]>

* docs

* fix: account for multiple matches

* feat: search len*0.9 .. len*1.1

* perf: start searching at same code length for common case

* fix: oob

---------

Co-authored-by: Arsenii Kulikov <[email protected]>
  • Loading branch information
DaniPopes and klkvr authored Feb 14, 2024
1 parent 92e50bf commit 73383b5
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 29 deletions.
10 changes: 0 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions crates/common/src/contracts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use once_cell::sync::Lazy;
use regex::Regex;
use std::{
collections::BTreeMap,
fmt,
ops::{Deref, DerefMut},
path::PathBuf,
};
Expand All @@ -20,6 +21,12 @@ type ArtifactWithContractRef<'a> = (&'a ArtifactId, &'a (JsonAbi, Vec<u8>));
#[derive(Clone, Default)]
pub struct ContractsByArtifact(pub BTreeMap<ArtifactId, (JsonAbi, Vec<u8>)>);

impl fmt::Debug for ContractsByArtifact {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.iter().map(|(k, (v1, v2))| (k, (v1, hex::encode(v2))))).finish()
}
}

impl ContractsByArtifact {
/// Finds a contract which has a similar bytecode as `code`.
pub fn find_by_code(&self, code: &[u8]) -> Option<ArtifactWithContractRef> {
Expand Down
1 change: 0 additions & 1 deletion crates/evm/traces/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ hashbrown = "0.14"
hex.workspace = true
itertools.workspace = true
once_cell = "1"
ordered-float = "4"
serde = "1"
tokio = { version = "1", features = ["time", "macros"] }
tracing = "0.1"
Expand Down
100 changes: 82 additions & 18 deletions crates/evm/traces/src/identifier/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,24 @@ use alloy_json_abi::JsonAbi;
use alloy_primitives::Address;
use foundry_common::contracts::{bytecode_diff_score, ContractsByArtifact};
use foundry_compilers::ArtifactId;
use ordered_float::OrderedFloat;
use std::borrow::Cow;

/// A trace identifier that tries to identify addresses using local contracts.
pub struct LocalTraceIdentifier<'a> {
/// Known contracts to search through.
known_contracts: &'a ContractsByArtifact,
/// Vector of pairs of artifact ID and the code length of the given artifact.
ordered_ids: Vec<(&'a ArtifactId, usize)>,
}

impl<'a> LocalTraceIdentifier<'a> {
/// Creates a new local trace identifier.
#[inline]
pub fn new(known_contracts: &'a ContractsByArtifact) -> Self {
Self { known_contracts }
let mut ordered_ids =
known_contracts.iter().map(|(id, contract)| (id, contract.1.len())).collect::<Vec<_>>();
ordered_ids.sort_by_key(|(_, len)| *len);
Self { known_contracts, ordered_ids }
}

/// Returns the known contracts.
Expand All @@ -24,21 +29,73 @@ impl<'a> LocalTraceIdentifier<'a> {
self.known_contracts
}

fn find_contract_from_bytecode(
&mut self,
code: &[u8],
) -> Option<(&'a ArtifactId, &'a JsonAbi)> {
self.known_contracts
.iter()
.filter_map(|(id, (abi, known_code))| {
// Note: the diff score can be inaccurate for small contracts so we're using
// a relatively high threshold here to avoid filtering out too many
// contracts.
let score = bytecode_diff_score(known_code, code);
(score < 0.85).then_some((score, id, abi))
})
.min_by_key(|(score, _, _)| OrderedFloat(*score))
.map(|(_, id, abi)| (id, abi))
/// Iterates over artifacts with code length less than or equal to the given code and tries to
/// find a match.
///
/// We do not consider artifacts with code length greater than the given code length as it is
/// considered that after compilation code can only be extended by additional parameters
/// (immutables) and cannot be shortened.
pub fn identify_code(&self, code: &[u8]) -> Option<(&'a ArtifactId, &'a JsonAbi)> {
let len = code.len();

let mut min_score = f64::MAX;
let mut min_score_id = None;

let mut check = |id| {
let (abi, known_code) = self.known_contracts.get(id)?;
let score = bytecode_diff_score(known_code, code);
if score <= 0.1 {
trace!(%score, "found close-enough match");
return Some((id, abi));
}
if score < min_score {
min_score = score;
min_score_id = Some((id, abi));
}
None
};

// Check `[len * 0.9, ..., len * 1.1]`.
let max_len = (len * 11) / 10;

// Start at artifacts with the same code length: `len..len*1.1`.
let same_length_idx = self.find_index(len);
for idx in same_length_idx..self.ordered_ids.len() {
let (id, len) = self.ordered_ids[idx];
if len > max_len {
break;
}
if let found @ Some(_) = check(id) {
return found;
}
}

// Iterate over the remaining artifacts with less code length: `len*0.9..len`.
let min_len = (len * 9) / 10;
let idx = self.find_index(min_len);
for i in idx..same_length_idx {
let (id, _) = self.ordered_ids[i];
if let found @ Some(_) = check(id) {
return found;
}
}

trace!(%min_score, "no close-enough match found");
min_score_id
}

/// Returns the index of the artifact with the given code length, or the index of the first
/// artifact with a greater code length if the exact code length is not found.
fn find_index(&self, len: usize) -> usize {
let (Ok(mut idx) | Err(mut idx)) =
self.ordered_ids.binary_search_by(|(_, probe)| probe.cmp(&len));

// In case of multiple artifacts with the same code length, we need to find the first one.
while idx > 0 && self.ordered_ids[idx - 1].1 == len {
idx -= 1;
}

idx
}
}

Expand All @@ -47,9 +104,16 @@ impl TraceIdentifier for LocalTraceIdentifier<'_> {
where
A: Iterator<Item = (&'a Address, Option<&'a [u8]>)>,
{
trace!("identify {:?} addresses", addresses.size_hint().1);

addresses
.filter_map(|(address, code)| {
let (id, abi) = self.find_contract_from_bytecode(code?)?;
let _span = trace_span!("identify", %address).entered();

trace!("identifying");
let (id, abi) = self.identify_code(code?)?;
trace!(id=%id.identifier(), "identified");

Some(AddressIdentity {
address: *address,
contract: Some(id.identifier()),
Expand Down

0 comments on commit 73383b5

Please sign in to comment.