Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add metrics to measure compiled-contract cache hits/misses for chunk production vs. chunk validation. #11178

Merged
merged 24 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1f8cff8
Initial commit.
tayfunelmas Apr 26, 2024
11e66e1
Connect pieces second commit.
tayfunelmas Apr 27, 2024
deccb30
Move reason to primitives and not use string.
tayfunelmas Apr 27, 2024
162e583
Merge branch 'master' into compile-metric
tayfunelmas Apr 29, 2024
005cde2
Change string to ApplyChunkReason in ApplyState
tayfunelmas Apr 29, 2024
eec1b28
Address clippy findings.
tayfunelmas Apr 29, 2024
7436146
Merge branch 'master' into compile-metric
tayfunelmas Apr 29, 2024
c3e8ccd
Merge branch 'master' into compile-metric
tayfunelmas Apr 30, 2024
4347216
Remove apply_reason from VContext and use exported Metrics structure …
tayfunelmas Apr 30, 2024
9857410
fix clippy errors and format
tayfunelmas Apr 30, 2024
15d66a9
Add one more cache hit record.
tayfunelmas Apr 30, 2024
b4dfbf1
Merge hit and miss functions and also move metrics.record to record o…
tayfunelmas Apr 30, 2024
7beb96d
Address comments v1
tayfunelmas May 1, 2024
d975a96
Make reset and report operations static
tayfunelmas May 1, 2024
292b613
Set the metric correctly to consider memory and disk cache
tayfunelmas May 1, 2024
35f9ec9
Merge branch 'master' into compile-metric
tayfunelmas May 1, 2024
565dd0c
Remove extra call to record_compiled_contract_cache_lookup
tayfunelmas May 1, 2024
a68c34e
Merge branch 'compile-metric' of https://github.com/tayfunelmas/nearc…
tayfunelmas May 1, 2024
e720ebd
Merge branch 'master' into compile-metric
tayfunelmas May 3, 2024
60617ad
Merge branch 'master' into compile-metric
tayfunelmas May 6, 2024
17f0a9b
Merge branch 'master' into compile-metric
tayfunelmas May 6, 2024
21886bf
Merge branch 'master' into compile-metric
tayfunelmas May 6, 2024
7d60706
Add instrumentation to enable record_compiled_contract_cache_lookup f…
tayfunelmas May 6, 2024
b129a73
Also add x86_64 annotation to skip MacOS tests (the codepath is not e…
tayfunelmas May 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions chain/chain/src/chain_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use near_chain_primitives::error::Error;
use near_epoch_manager::shard_tracker::ShardTracker;
use near_epoch_manager::types::BlockHeaderInfo;
use near_epoch_manager::EpochManagerAdapter;
use near_primitives::apply::ApplyChunkReason;
use near_primitives::block::{Block, Tip};
use near_primitives::block_header::BlockHeader;
use near_primitives::congestion_info::CongestionInfo;
Expand Down Expand Up @@ -764,6 +765,7 @@ impl<'a> ChainUpdate<'a> {

let apply_result = self.runtime_adapter.apply_chunk(
RuntimeStorageConfig::new(chunk_header.prev_state_root(), true),
ApplyChunkReason::UpdateTrackedShard,
ApplyChunkShardContext {
shard_id,
gas_limit,
Expand Down Expand Up @@ -877,6 +879,7 @@ impl<'a> ChainUpdate<'a> {

let apply_result = self.runtime_adapter.apply_chunk(
RuntimeStorageConfig::new(*chunk_extra.state_root(), true),
ApplyChunkReason::UpdateTrackedShard,
ApplyChunkShardContext {
shard_id,
last_validator_proposals: chunk_extra.validator_proposals(),
Expand Down
5 changes: 5 additions & 0 deletions chain/chain/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use near_epoch_manager::{EpochManagerAdapter, EpochManagerHandle};
use near_parameters::{ActionCosts, ExtCosts, RuntimeConfigStore};
use near_pool::types::TransactionGroupIterator;
use near_primitives::account::{AccessKey, Account};
use near_primitives::apply::ApplyChunkReason;
use near_primitives::checked_feature;
use near_primitives::congestion_info::CongestionInfo;
use near_primitives::errors::{InvalidTxError, RuntimeError, StorageError};
Expand Down Expand Up @@ -247,6 +248,7 @@ impl NightshadeRuntime {
fn process_state_update(
&self,
trie: Trie,
apply_reason: ApplyChunkReason,
chunk: ApplyChunkShardContext,
block: ApplyChunkBlockContext,
receipts: &[Receipt],
Expand Down Expand Up @@ -362,6 +364,7 @@ impl NightshadeRuntime {
);

let apply_state = ApplyState {
apply_reason: Some(apply_reason),
block_height,
prev_block_hash: *prev_block_hash,
block_hash,
Expand Down Expand Up @@ -897,6 +900,7 @@ impl RuntimeAdapter for NightshadeRuntime {
fn apply_chunk(
&self,
storage_config: RuntimeStorageConfig,
apply_reason: ApplyChunkReason,
chunk: ApplyChunkShardContext,
block: ApplyChunkBlockContext,
receipts: &[Receipt],
Expand Down Expand Up @@ -936,6 +940,7 @@ impl RuntimeAdapter for NightshadeRuntime {

match self.process_state_update(
trie,
apply_reason,
chunk,
block,
receipts,
Expand Down
2 changes: 2 additions & 0 deletions chain/chain/src/runtime/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use near_epoch_manager::{EpochManager, RngSeed};
use near_pool::{
InsertTransactionResult, PoolIteratorWrapper, TransactionGroupIteratorWrapper, TransactionPool,
};
use near_primitives::apply::ApplyChunkReason;
use near_primitives::checked_feature;
use near_primitives::test_utils::create_test_signer;
use near_primitives::types::validator_stake::{ValidatorStake, ValidatorStakeIter};
Expand Down Expand Up @@ -96,6 +97,7 @@ impl NightshadeRuntime {
let mut result = self
.apply_chunk(
RuntimeStorageConfig::new(*state_root, true),
ApplyChunkReason::UpdateTrackedShard,
ApplyChunkShardContext {
shard_id,
last_validator_proposals,
Expand Down
2 changes: 2 additions & 0 deletions chain/chain/src/test_utils/kv_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use near_epoch_manager::types::BlockHeaderInfo;
use near_epoch_manager::{EpochManagerAdapter, RngSeed};
use near_pool::types::TransactionGroupIterator;
use near_primitives::account::{AccessKey, Account};
use near_primitives::apply::ApplyChunkReason;
use near_primitives::block::Tip;
use near_primitives::block_header::{Approval, ApprovalInner};
use near_primitives::congestion_info::CongestionInfo;
Expand Down Expand Up @@ -1119,6 +1120,7 @@ impl RuntimeAdapter for KeyValueRuntime {
fn apply_chunk(
&self,
storage_config: RuntimeStorageConfig,
_apply_reason: ApplyChunkReason,
chunk: ApplyChunkShardContext,
block: ApplyChunkBlockContext,
receipts: &[Receipt],
Expand Down
2 changes: 2 additions & 0 deletions chain/chain/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use near_chain_configs::ReshardingConfig;
use near_chain_primitives::Error;
pub use near_epoch_manager::EpochManagerAdapter;
use near_pool::types::TransactionGroupIterator;
use near_primitives::apply::ApplyChunkReason;
pub use near_primitives::block::{Block, BlockHeader, Tip};
use near_primitives::challenge::{ChallengesResult, PartialState};
use near_primitives::checked_feature;
Expand Down Expand Up @@ -450,6 +451,7 @@ pub trait RuntimeAdapter: Send + Sync {
fn apply_chunk(
&self,
storage: RuntimeStorageConfig,
apply_reason: ApplyChunkReason,
chunk: ApplyChunkShardContext,
block: ApplyChunkBlockContext,
receipts: &[Receipt],
Expand Down
7 changes: 7 additions & 0 deletions chain/chain/src/update_shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::types::{
use near_async::time::Clock;
use near_chain_primitives::Error;
use near_epoch_manager::EpochManagerAdapter;
use near_primitives::apply::ApplyChunkReason;
use near_primitives::hash::CryptoHash;
use near_primitives::receipt::Receipt;
use near_primitives::sandbox::state_patch::SandboxStatePatch;
Expand Down Expand Up @@ -128,13 +129,15 @@ pub(crate) fn process_shard_update(
) -> Result<ShardUpdateResult, Error> {
Ok(match shard_update_reason {
ShardUpdateReason::NewChunk(data) => ShardUpdateResult::NewChunk(apply_new_chunk(
ApplyChunkReason::UpdateTrackedShard,
parent_span,
data,
shard_context,
runtime,
epoch_manager,
)?),
ShardUpdateReason::OldChunk(data) => ShardUpdateResult::OldChunk(apply_old_chunk(
ApplyChunkReason::UpdateTrackedShard,
parent_span,
data,
shard_context,
Expand All @@ -154,6 +157,7 @@ pub(crate) fn process_shard_update(
/// Applies new chunk, which includes applying transactions from chunk and
/// receipts filtered from outgoing receipts from previous chunks.
pub fn apply_new_chunk(
apply_reason: ApplyChunkReason,
parent_span: &tracing::Span,
data: NewChunkData,
shard_context: ShardContext,
Expand Down Expand Up @@ -187,6 +191,7 @@ pub fn apply_new_chunk(
};
match runtime.apply_chunk(
storage_config,
apply_reason,
ApplyChunkShardContext {
shard_id,
last_validator_proposals: chunk_header.prev_validator_proposals(),
Expand Down Expand Up @@ -225,6 +230,7 @@ pub fn apply_new_chunk(
/// (logunov) From what I know, the state update may include only validator
/// accounts update on epoch start.
pub fn apply_old_chunk(
apply_reason: ApplyChunkReason,
parent_span: &tracing::Span,
data: OldChunkData,
shard_context: ShardContext,
Expand All @@ -248,6 +254,7 @@ pub fn apply_old_chunk(
};
match runtime.apply_chunk(
storage_config,
apply_reason,
ApplyChunkShardContext {
shard_id,
last_validator_proposals: prev_chunk_extra.validator_proposals(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use near_chain_primitives::Error;
use near_epoch_manager::EpochManagerAdapter;
use near_network::types::{NetworkRequests, PeerManagerMessageRequest};
use near_pool::TransactionGroupIteratorWrapper;
use near_primitives::apply::ApplyChunkReason;
use near_primitives::hash::{hash, CryptoHash};
use near_primitives::merkle::merklize;
use near_primitives::receipt::Receipt;
Expand Down Expand Up @@ -477,6 +478,7 @@ pub(crate) fn validate_chunk_state_witness(
MainTransition::NewChunk(new_chunk_data) => {
let chunk_header = new_chunk_data.chunk_header.clone();
let NewChunkResult { apply_result: mut main_apply_result, .. } = apply_new_chunk(
ApplyChunkReason::ValidateChunkStateWitness,
&span,
new_chunk_data,
ShardContext {
Expand Down Expand Up @@ -524,6 +526,7 @@ pub(crate) fn validate_chunk_state_witness(
},
};
let OldChunkResult { apply_result, .. } = apply_old_chunk(
ApplyChunkReason::ValidateChunkStateWitness,
&span,
old_chunk_data,
ShardContext {
Expand Down
30 changes: 30 additions & 0 deletions core/primitives-core/src/apply.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use std::fmt::{Display, Formatter};

/// Indicates the phase of block production the apply-chunk operation is invoked.
/// This is currently used to analyze and compare the metrics collected while applying the receipts
/// for different purposes, eg. while updating state of a tracked shard vs. validating a chunk.
/// TODO: Consider combining ApplyChunkReason, ApplyChunkBlockContext, and ApplyChunkBlockContext
/// under a common wrapper struct such as ApplyChunkContext.
#[derive(Clone, Debug)]
pub enum ApplyChunkReason {
/// Apply-chunk is invoked to update the state of a shards being tracked.
UpdateTrackedShard,
/// Apply-chunk is invoked to validate the state witness for a shard in the context of stateless validation.
ValidateChunkStateWitness,
}

impl ApplyChunkReason {
/// Returns a static, short string representation of the reason, to be used for metrics.
pub fn as_str(&self) -> &'static str {
match self {
ApplyChunkReason::UpdateTrackedShard => "update_shard",
ApplyChunkReason::ValidateChunkStateWitness => "validate_chunk",
}
}
}

impl Display for ApplyChunkReason {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
f.write_str(self.as_str())
}
}
1 change: 1 addition & 0 deletions core/primitives-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub use borsh;
pub use num_rational;

pub mod account;
pub mod apply;
pub mod chains;
pub mod config;
pub mod hash;
Expand Down
1 change: 1 addition & 0 deletions core/primitives/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub use near_primitives_core::account;
pub use near_primitives_core::apply;
pub use near_primitives_core::borsh;
pub use near_primitives_core::config;
pub use near_primitives_core::hash;
Expand Down
1 change: 1 addition & 0 deletions integration-tests/src/user/runtime_user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl RuntimeUser {
let all_shard_ids = [0, 1, 2, 3, 4, 5];

ApplyState {
apply_reason: None,
block_height: 1,
prev_block_hash: Default::default(),
block_hash: Default::default(),
Expand Down
2 changes: 1 addition & 1 deletion runtime/near-vm-runner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub use cache::{
NoContractRuntimeCache,
};
pub use code::ContractCode;
pub use metrics::Metrics;
pub use metrics::{report_metrics, reset_metrics};
pub use profile::ProfileDataV3;
pub use runner::{run, VM};

Expand Down
102 changes: 72 additions & 30 deletions runtime/near-vm-runner/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
use near_o11y::metrics::{try_create_histogram_vec, HistogramVec};
use near_o11y::metrics::{
try_create_histogram_vec, try_create_int_counter_vec, HistogramVec, IntCounterVec,
};
use once_cell::sync::Lazy;
use std::{cell::RefCell, time::Duration};

thread_local! {
static METRICS: RefCell<Metrics> = const { RefCell::new(Metrics {
near_vm_compilation_time: Duration::new(0, 0),
wasmtime_compilation_time: Duration::new(0, 0),
compiled_contract_cache_lookups: 0,
compiled_contract_cache_hits: 0,
}) };
}

pub static COMPILATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
static COMPILATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
try_create_histogram_vec(
"near_vm_runner_compilation_seconds",
"Histogram of how long it takes to compile things",
Expand All @@ -19,47 +23,85 @@ pub static COMPILATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
.unwrap()
});

static COMPILED_CONTRACT_CACHE_LOOKUPS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_vm_compiled_contract_cache_lookups_total",
"The number of times the runtime looks up for an entry in the compiled-contract cache for the given caller context and shard_id",
&["context", "shard_id"],
)
.unwrap()
});

static COMPILED_CONTRACT_CACHE_HITS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_vm_compiled_contract_cache_hits_total",
"The number of times the runtime finds an entry in the compiled-contract cache for the given caller context and shard_id",
&["context", "shard_id"],
)
.unwrap()
});

#[derive(Default, Copy, Clone)]
pub struct Metrics {
struct Metrics {
near_vm_compilation_time: Duration,
wasmtime_compilation_time: Duration,
/// Number of lookups from the compiled contract cache.
compiled_contract_cache_lookups: u64,
/// Number of times the lookup from the compiled contract cache finds a match.
compiled_contract_cache_hits: u64,
}

#[cfg(any(feature = "near_vm", feature = "wasmtime_vm"))]
pub(crate) fn compilation_duration(kind: near_parameters::vm::VMKind, duration: Duration) {
use near_parameters::vm::VMKind;
METRICS.with_borrow_mut(|m| match kind {
VMKind::Wasmer0 => {}
VMKind::Wasmtime => m.wasmtime_compilation_time += duration,
VMKind::Wasmer2 => {}
VMKind::NearVm => m.near_vm_compilation_time += duration,
});
}

impl Metrics {
pub fn reset() {
METRICS.with_borrow_mut(|m| *m = Self::default());
}
/// Updates metrics to record a compiled-contract cache lookup,
/// where is_hit=true indicates that we found an entry in the cache.
#[cfg(all(feature = "near_vm", target_arch = "x86_64"))]
pub(crate) fn record_compiled_contract_cache_lookup(is_hit: bool) {
METRICS.with_borrow_mut(|m| {
m.compiled_contract_cache_lookups += 1;
if is_hit {
m.compiled_contract_cache_hits += 1;
}
});
}

/// Get the current metrics.
///
/// Note that this is a thread-local operation.
pub fn get() -> Metrics {
METRICS.with_borrow(|m| *m)
}
pub fn reset_metrics() {
METRICS.with_borrow_mut(|m| *m = Metrics::default());
}

pub fn report(&mut self, shard_id: &str) {
if !self.near_vm_compilation_time.is_zero() {
/// Reports the current metrics at the end of a single VM invocation (eg. to run a function call).
pub fn report_metrics(shard_id: &str, caller_context: &str) {
METRICS.with_borrow_mut(|m| {
if !m.near_vm_compilation_time.is_zero() {
COMPILATION_TIME
.with_label_values(&["near_vm", shard_id])
.observe(self.near_vm_compilation_time.as_secs_f64());
self.near_vm_compilation_time = Duration::default();
.observe(m.near_vm_compilation_time.as_secs_f64());
}
if !self.wasmtime_compilation_time.is_zero() {
if !m.wasmtime_compilation_time.is_zero() {
COMPILATION_TIME
.with_label_values(&["wasmtime", shard_id])
.observe(self.wasmtime_compilation_time.as_secs_f64());
self.wasmtime_compilation_time = Duration::default();
.observe(m.wasmtime_compilation_time.as_secs_f64());
}
if m.compiled_contract_cache_lookups > 0 {
COMPILED_CONTRACT_CACHE_LOOKUPS_TOTAL
.with_label_values(&[caller_context, shard_id])
.inc_by(m.compiled_contract_cache_lookups);
}
if m.compiled_contract_cache_hits > 0 {
COMPILED_CONTRACT_CACHE_HITS_TOTAL
.with_label_values(&[caller_context, shard_id])
.inc_by(m.compiled_contract_cache_lookups);
}
}
}

#[cfg(any(feature = "near_vm", feature = "wasmtime_vm"))]
pub(crate) fn compilation_duration(kind: near_parameters::vm::VMKind, duration: Duration) {
use near_parameters::vm::VMKind;
METRICS.with_borrow_mut(|m| match kind {
VMKind::Wasmer0 => {}
VMKind::Wasmtime => m.wasmtime_compilation_time += duration,
VMKind::Wasmer2 => {}
VMKind::NearVm => m.near_vm_compilation_time += duration,
*m = Metrics::default();
});
}
Loading
Loading