Skip to content
This repository has been archived by the owner on Aug 28, 2024. It is now read-only.

Commit

Permalink
feat: Add metrics for transaction execution result in state keeper (m…
Browse files Browse the repository at this point in the history
…atter-labs#2021)

## What ❔

<!-- What are the changes this PR brings about? -->
<!-- Example: This PR adds a PR template to the repo. -->
<!-- (For bigger PRs adding more context is appreciated) -->

## Why ❔

<!-- Why are these changes done? What goal do they contribute to? What
are the principles behind them? -->
<!-- Example: PR templates ensure PR reviewers, observers, and future
iterators are in context about the evolution of repos. -->

## Checklist

<!-- Check your PR fulfills the following items. -->
<!-- For draft PRs check the boxes as you complete them. -->

- [ ] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [ ] Tests for the changes have been added / updated.
- [ ] Documentation comments have been added / updated.
- [ ] Code has been formatted via `zk fmt` and `zk lint`.
- [ ] Spellcheck has been run via `zk spellcheck`.
  • Loading branch information
AnastasiiaVashchuk authored Jun 5, 2024
1 parent 800b8f4 commit dde0fc4
Show file tree
Hide file tree
Showing 14 changed files with 217 additions and 62 deletions.
6 changes: 3 additions & 3 deletions core/node/node_sync/src/external_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use zksync_state_keeper::{
L1BatchParams, L2BlockParams, PendingBatchData, StateKeeperIO,
},
metrics::KEEPER_METRICS,
seal_criteria::IoSealCriteria,
seal_criteria::{IoSealCriteria, UnexecutableReason},
updates::UpdatesManager,
};
use zksync_types::{
Expand Down Expand Up @@ -304,10 +304,10 @@ impl StateKeeperIO for ExternalIO {
anyhow::bail!("Rollback requested. Transaction hash: {:?}", tx.hash());
}

async fn reject(&mut self, tx: &Transaction, error: &str) -> anyhow::Result<()> {
async fn reject(&mut self, tx: &Transaction, reason: UnexecutableReason) -> anyhow::Result<()> {
// We are replaying the already executed transactions so no rejections are expected to occur.
anyhow::bail!(
"Requested rejection of transaction {:?} because of the following error: {error}. \
"Requested rejection of transaction {:?} because of the following error: {reason}. \
This is not supported on external node",
tx.hash()
);
Expand Down
23 changes: 16 additions & 7 deletions core/node/state_keeper/src/io/mempool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use crate::{
},
mempool_actor::l2_tx_filter,
metrics::KEEPER_METRICS,
seal_criteria::{IoSealCriteria, L2BlockMaxPayloadSizeSealer, TimeoutSealer},
seal_criteria::{
IoSealCriteria, L2BlockMaxPayloadSizeSealer, TimeoutSealer, UnexecutableReason,
},
updates::UpdatesManager,
MempoolGuard,
};
Expand Down Expand Up @@ -245,7 +247,8 @@ impl StateKeeperIO for MempoolIO {
tx.hash(),
tx.gas_limit()
);
self.reject(&tx, &Halt::TooBigGasLimit.to_string()).await?;
self.reject(&tx, UnexecutableReason::Halt(Halt::TooBigGasLimit))
.await?;
continue;
}
return Ok(Some(tx));
Expand All @@ -265,25 +268,31 @@ impl StateKeeperIO for MempoolIO {
Ok(())
}

async fn reject(&mut self, rejected: &Transaction, error: &str) -> anyhow::Result<()> {
async fn reject(
&mut self,
rejected: &Transaction,
reason: UnexecutableReason,
) -> anyhow::Result<()> {
anyhow::ensure!(
!rejected.is_l1(),
"L1 transactions should not be rejected: {error}"
"L1 transactions should not be rejected: {reason}"
);

// Reset the nonces in the mempool, but don't insert the transaction back.
self.mempool.rollback(rejected);

// Mark tx as rejected in the storage.
let mut storage = self.pool.connection_tagged("state_keeper").await?;
KEEPER_METRICS.rejected_transactions.inc();

KEEPER_METRICS.inc_rejected_txs(reason.as_metric_label());

tracing::warn!(
"Transaction {} is rejected with error: {error}",
"Transaction {} is rejected with error: {reason}",
rejected.hash()
);
storage
.transactions_dal()
.mark_tx_as_rejected(rejected.hash(), &format!("rejected: {error}"))
.mark_tx_as_rejected(rejected.hash(), &format!("rejected: {reason}"))
.await?;
Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions core/node/state_keeper/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use self::{
output_handler::{OutputHandler, StateKeeperOutputHandler},
persistence::{L2BlockSealerTask, StateKeeperPersistence, TreeWritesPersistence},
};
use super::seal_criteria::IoSealCriteria;
use super::seal_criteria::{IoSealCriteria, UnexecutableReason};

pub mod common;
pub(crate) mod mempool;
Expand Down Expand Up @@ -136,7 +136,7 @@ pub trait StateKeeperIO: 'static + Send + Sync + fmt::Debug + IoSealCriteria {
/// Marks the transaction as "not executed", so it can be retrieved from the IO again.
async fn rollback(&mut self, tx: Transaction) -> anyhow::Result<()>;
/// Marks the transaction as "rejected", e.g. one that is not correct and can't be executed.
async fn reject(&mut self, tx: &Transaction, error: &str) -> anyhow::Result<()>;
async fn reject(&mut self, tx: &Transaction, reason: UnexecutableReason) -> anyhow::Result<()>;

/// Loads base system contracts with the specified version.
async fn load_base_system_contracts(
Expand Down
21 changes: 14 additions & 7 deletions core/node/state_keeper/src/keeper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use super::{
updates::UpdatesManager,
utils::gas_count_from_writes,
};
use crate::seal_criteria::UnexecutableReason;

/// Amount of time to block on waiting for some resource. The exact value is not really important,
/// we only need it to not block on waiting indefinitely and be able to process cancellation requests.
Expand Down Expand Up @@ -581,7 +582,7 @@ impl ZkSyncStateKeeper {
format!("failed rolling back transaction {tx_hash:?} in batch executor")
})?;
self.io
.reject(&tx, reason)
.reject(&tx, reason.clone())
.await
.with_context(|| format!("cannot reject transaction {tx_hash:?}"))?;
}
Expand Down Expand Up @@ -690,23 +691,29 @@ impl ZkSyncStateKeeper {
| TxExecutionResult::RejectedByVm {
reason: Halt::NotEnoughGasProvided,
} => {
let error_message = match &exec_result {
TxExecutionResult::BootloaderOutOfGasForTx => "bootloader_tx_out_of_gas",
let (reason, criterion) = match &exec_result {
TxExecutionResult::BootloaderOutOfGasForTx => (
UnexecutableReason::BootloaderOutOfGas,
"bootloader_tx_out_of_gas",
),
TxExecutionResult::RejectedByVm {
reason: Halt::NotEnoughGasProvided,
} => "not_enough_gas_provided_to_start_tx",
} => (
UnexecutableReason::NotEnoughGasProvided,
"not_enough_gas_provided_to_start_tx",
),
_ => unreachable!(),
};
let resolution = if is_first_tx {
SealResolution::Unexecutable(error_message.to_string())
SealResolution::Unexecutable(reason)
} else {
SealResolution::ExcludeAndSeal
};
AGGREGATION_METRICS.inc(error_message, &resolution);
AGGREGATION_METRICS.inc(criterion, &resolution);
resolution
}
TxExecutionResult::RejectedByVm { reason } => {
SealResolution::Unexecutable(reason.to_string())
UnexecutableReason::Halt(reason.clone()).into()
}
TxExecutionResult::Success {
tx_result,
Expand Down
58 changes: 55 additions & 3 deletions core/node/state_keeper/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{
time::Duration,
};

use multivm::interface::VmExecutionResultAndLogs;
use multivm::interface::{VmExecutionResultAndLogs, VmRevertReason};
use vise::{
Buckets, Counter, EncodeLabelSet, EncodeLabelValue, Family, Gauge, Histogram, LatencyObserver,
Metrics,
Expand All @@ -30,6 +30,20 @@ pub enum TxExecutionType {
L2,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EncodeLabelValue)]
#[metrics(rename_all = "snake_case")]
pub enum TxExecutionStatus {
Success,
Rejected,
Reverted,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelSet)]
pub struct TxExecutionResult {
status: TxExecutionStatus,
reason: Option<&'static str>,
}

impl TxExecutionType {
pub fn from_is_l1(is_l1: bool) -> TxExecutionType {
match is_l1 {
Expand Down Expand Up @@ -57,8 +71,8 @@ pub struct StateKeeperMetrics {
/// Latency of the state keeper getting a transaction from the mempool.
#[metrics(buckets = Buckets::LATENCIES)]
pub get_tx_from_mempool: Histogram<Duration>,
/// Number of transactions rejected by the state keeper.
pub rejected_transactions: Counter,
/// Number of transactions completed with a specific result.
pub tx_execution_result: Family<TxExecutionResult, Counter>,
/// Time spent waiting for the hash of a previous L1 batch.
#[metrics(buckets = Buckets::LATENCIES)]
pub wait_for_prev_hash_time: Histogram<Duration>,
Expand All @@ -77,6 +91,44 @@ pub struct StateKeeperMetrics {
pub blob_base_fee_too_high: Counter,
}

fn vm_revert_reason_as_metric_label(reason: &VmRevertReason) -> &'static str {
match reason {
VmRevertReason::General { .. } => "General",
VmRevertReason::InnerTxError => "InnerTxError",
VmRevertReason::VmError => "VmError",
VmRevertReason::Unknown { .. } => "Unknown",
}
}

impl StateKeeperMetrics {
pub fn inc_rejected_txs(&self, reason: &'static str) {
let result = TxExecutionResult {
status: TxExecutionStatus::Rejected,
reason: Some(reason),
};

self.tx_execution_result[&result].inc();
}

pub fn inc_succeeded_txs(&self) {
let result = TxExecutionResult {
status: TxExecutionStatus::Success,
reason: None,
};

self.tx_execution_result[&result].inc();
}

pub fn inc_reverted_txs(&self, reason: &VmRevertReason) {
let result = TxExecutionResult {
status: TxExecutionStatus::Reverted,
reason: Some(vm_revert_reason_as_metric_label(reason)),
};

self.tx_execution_result[&result].inc();
}
}

#[vise::register]
pub static KEEPER_METRICS: vise::Global<StateKeeperMetrics> = vise::Global::new();

Expand Down
8 changes: 5 additions & 3 deletions core/node/state_keeper/src/seal_criteria/criteria/gas.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use zksync_types::ProtocolVersionId;

use crate::{
seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig},
seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
},
utils::new_block_gas_count,
};

Expand Down Expand Up @@ -30,7 +32,7 @@ impl SealCriterion for GasCriterion {
(config.max_single_tx_gas as f64 * config.close_block_at_gas_percentage).round() as u32;

if (tx_data.gas_count + new_block_gas_count()).any_field_greater_than(tx_bound) {
SealResolution::Unexecutable("Transaction requires too much gas".into())
UnexecutableReason::TooMuchGas.into()
} else if block_data
.gas_count
.any_field_greater_than(config.max_single_tx_gas)
Expand Down Expand Up @@ -103,7 +105,7 @@ mod tests {
);
assert_eq!(
huge_transaction_resolution,
SealResolution::Unexecutable("Transaction requires too much gas".into())
UnexecutableReason::TooMuchGas.into()
);

// Check criterion workflow
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::gas_bootloader_batch_tip_overhead;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

/// Checks whether we should exclude the transaction because we don't have enough gas for batch tip.
#[derive(Debug)]
Expand All @@ -22,7 +24,7 @@ impl SealCriterion for GasForBatchTipCriterion {

if tx_data.gas_remaining < batch_tip_overhead {
if is_tx_first {
SealResolution::Unexecutable("not_enough_gas_for_batch_tip".to_string())
UnexecutableReason::OutOfGasForBatchTip.into()
} else {
SealResolution::ExcludeAndSeal
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use zksync_config::configs::chain::StateKeeperConfig;
use zksync_types::ProtocolVersionId;

// Local uses
use crate::seal_criteria::{SealCriterion, SealData, SealResolution};
use crate::seal_criteria::{SealCriterion, SealData, SealResolution, UnexecutableReason};

// Collected vm execution metrics should fit into geometry limits.
// Otherwise witness generation will fail and proof won't be generated.
Expand Down Expand Up @@ -52,7 +52,7 @@ impl SealCriterion for CircuitsCriterion {
let used_circuits_batch = block_data.execution_metrics.circuit_statistic.total();

if used_circuits_tx + batch_tip_circuit_overhead >= reject_bound {
SealResolution::Unexecutable("ZK proof cannot be generated for a transaction".into())
UnexecutableReason::ProofWillFail.into()
} else if used_circuits_batch + batch_tip_circuit_overhead >= config.max_circuits_per_batch
{
SealResolution::ExcludeAndSeal
Expand Down Expand Up @@ -162,10 +162,7 @@ mod tests {
protocol_version,
);

assert_eq!(
block_resolution,
SealResolution::Unexecutable("ZK proof cannot be generated for a transaction".into())
);
assert_eq!(block_resolution, UnexecutableReason::ProofWillFail.into());
}

#[test]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::execution_metrics_bootloader_batch_tip_overhead;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

#[derive(Debug)]
pub struct PubDataBytesCriterion {
Expand Down Expand Up @@ -41,8 +43,7 @@ impl SealCriterion for PubDataBytesCriterion {
if tx_size + execution_metrics_bootloader_batch_tip_overhead(protocol_version.into())
> reject_bound as usize
{
let message = "Transaction cannot be sent to L1 due to pubdata limits";
SealResolution::Unexecutable(message.into())
UnexecutableReason::PubdataLimit.into()
} else if block_size
+ execution_metrics_bootloader_batch_tip_overhead(protocol_version.into())
> max_pubdata_per_l1_batch
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::get_bootloader_encoding_space;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

#[derive(Debug)]
pub struct TxEncodingSizeCriterion;
Expand All @@ -26,8 +28,7 @@ impl SealCriterion for TxEncodingSizeCriterion {
.round();

if tx_data.cumulative_size > reject_bound as usize {
let message = "Transaction cannot be included due to large encoding size";
SealResolution::Unexecutable(message.into())
UnexecutableReason::LargeEncodingSize.into()
} else if block_data.cumulative_size > bootloader_tx_encoding_space as usize {
SealResolution::ExcludeAndSeal
} else if block_data.cumulative_size > include_and_seal_bound as usize {
Expand Down Expand Up @@ -83,9 +84,7 @@ mod tests {
);
assert_eq!(
unexecutable_resolution,
SealResolution::Unexecutable(
"Transaction cannot be included due to large encoding size".into()
)
UnexecutableReason::LargeEncodingSize.into()
);

let exclude_and_seal_resolution = criterion.should_seal(
Expand Down
Loading

0 comments on commit dde0fc4

Please sign in to comment.