diff --git a/Cargo.lock b/Cargo.lock index 050262076aa12..8f4551b38d720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -438,6 +438,7 @@ dependencies = [ "aptos-mvhashmap", "aptos-state-view", "aptos-types", + "aptos-vm-logging", "arc-swap", "bcs 0.1.4 (git+https://github.com/aptos-labs/bcs.git?rev=d31fab9d81748e2594be5cd5cdf845786a30562d)", "claims", @@ -2975,6 +2976,7 @@ dependencies = [ "aptos-mvhashmap", "aptos-state-view", "aptos-types", + "aptos-vm-logging", "bcs 0.1.4 (git+https://github.com/aptos-labs/bcs.git?rev=d31fab9d81748e2594be5cd5cdf845786a30562d)", "dashmap", "fail 0.5.0", @@ -3022,6 +3024,21 @@ dependencies = [ "serde 1.0.149", ] +[[package]] +name = "aptos-vm-logging" +version = "0.1.0" +dependencies = [ + "aptos-crypto", + "aptos-logger", + "aptos-metrics-core", + "aptos-speculative-state-helper", + "aptos-state-view", + "aptos-types", + "arc-swap", + "once_cell", + "serde 1.0.149", +] + [[package]] name = "aptos-vm-profiling" version = "0.1.0" @@ -6295,6 +6312,7 @@ dependencies = [ "aptos-types", "aptos-vm", "aptos-vm-genesis", + "aptos-vm-logging", "aptos-writeset-generator", "bcs 0.1.4 (git+https://github.com/aptos-labs/bcs.git?rev=d31fab9d81748e2594be5cd5cdf845786a30562d)", "itertools", diff --git a/Cargo.toml b/Cargo.toml index ed5b913e2f796..3d8aacd412e79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "aptos-move/aptos-transactional-test-harness", "aptos-move/aptos-validator-interface", "aptos-move/aptos-vm", + "aptos-move/aptos-vm-logging", "aptos-move/aptos-vm-profiling", "aptos-move/block-executor", "aptos-move/e2e-move-tests", @@ -293,6 +294,7 @@ aptos-types = { path = "types" } aptos-validator-interface = { path = "aptos-move/aptos-validator-interface" } aptos-vault-client = { path = "secure/storage/vault" } aptos-vm = { path = "aptos-move/aptos-vm" } +aptos-vm-logging = { path = "aptos-move/aptos-vm-logging" } aptos-vm-genesis = { path = "aptos-move/vm-genesis" } aptos-vm-validator = { path = "vm-validator" } aptos-warp-webserver = { path = "crates/aptos-warp-webserver" } diff --git a/aptos-move/aptos-vm-logging/Cargo.toml b/aptos-move/aptos-vm-logging/Cargo.toml new file mode 100644 index 0000000000000..28e4155f7bb89 --- /dev/null +++ b/aptos-move/aptos-vm-logging/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "aptos-vm-logging" +description = "Aptos VM speculative logging utility" +version = "0.1.0" + +# Workspace inherited keys +authors = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } + +[dependencies] +aptos-crypto = { workspace = true } +aptos-logger = { workspace = true } +aptos-metrics-core = { workspace = true } +aptos-speculative-state-helper = { workspace = true } +aptos-state-view = { workspace = true } +aptos-types = { workspace = true } +arc-swap = { workspace = true } +once_cell = { workspace = true } +serde = { workspace = true } diff --git a/aptos-move/aptos-vm-logging/src/counters.rs b/aptos-move/aptos-vm-logging/src/counters.rs new file mode 100644 index 0000000000000..106541e018e35 --- /dev/null +++ b/aptos-move/aptos-vm-logging/src/counters.rs @@ -0,0 +1,11 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use aptos_metrics_core::{register_int_counter, IntCounter}; +use once_cell::sync::Lazy; + +/// Count the number of errors. This is not intended for display on a dashboard, +/// but rather for triggering alerts. +pub static CRITICAL_ERRORS: Lazy = Lazy::new(|| { + register_int_counter!("aptos_vm_critical_errors", "Number of critical errors").unwrap() +}); diff --git a/aptos-move/aptos-vm-logging/src/lib.rs b/aptos-move/aptos-vm-logging/src/lib.rs new file mode 100644 index 0000000000000..cfbf3757bc410 --- /dev/null +++ b/aptos-move/aptos-vm-logging/src/lib.rs @@ -0,0 +1,167 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +pub mod counters; +pub mod log_schema; + +pub mod prelude { + pub use crate::{ + alert, counters::CRITICAL_ERRORS, speculative_debug, speculative_error, speculative_info, + speculative_log, speculative_trace, speculative_warn, + }; +} + +use crate::{counters::CRITICAL_ERRORS, log_schema::AdapterLogSchema}; +use aptos_logger::{prelude::*, Level}; +use aptos_speculative_state_helper::{SpeculativeEvent, SpeculativeEvents}; +use arc_swap::ArcSwapOption; +use once_cell::sync::Lazy; +use std::sync::Arc; + +struct VMLogEntry { + level: Level, + context: AdapterLogSchema, + message: String, +} + +impl VMLogEntry { + fn new(level: Level, context: AdapterLogSchema, message: String) -> Self { + Self { + level, + context, + message, + } + } +} + +// Implement dispatching for a recorded VM log entry to support recording +// as speculative logging event (and eventual dispatching as needed). +impl SpeculativeEvent for VMLogEntry { + fn dispatch(self) { + match self.level { + Level::Error => { + // TODO: Consider using SpeculativeCounter to increase CRITICAL_ERRORS + // on the critical path instead of async dispatching. + alert!(self.context, "{}", self.message); + }, + Level::Warn => warn!(self.context, "{}", self.message), + Level::Info => info!(self.context, "{}", self.message), + Level::Debug => debug!(self.context, "{}", self.message), + Level::Trace => trace!(self.context, "{}", self.message), + } + } +} + +static BUFFERED_LOG_EVENTS: Lazy>> = + Lazy::new(|| ArcSwapOption::from(None)); + +/// Initializes the storage of speculative logs for num_txns many transactions. +pub fn init_speculative_logs(num_txns: usize) { + BUFFERED_LOG_EVENTS.swap(Some(Arc::new(SpeculativeEvents::new(num_txns)))); +} + +/// Adds a message at a specified logging level and given context (that includes txn index) +/// to speculative buffer. Logs directly and logs a separate (new error) if the speculative +/// events storage is not initialized or appropriately sized. +pub fn speculative_log(level: Level, context: &AdapterLogSchema, message: String) { + let txn_idx = context.get_txn_idx(); + match &*BUFFERED_LOG_EVENTS.load() { + Some(log_events) => { + let log_event = VMLogEntry::new(level, context.clone(), message); + if let Err(e) = log_events.record(txn_idx, log_event) { + alert!("{:?}", e); + }; + }, + None => { + alert!( + "Speculative state not initialized to log message = {}", + message + ); + }, + }; +} + +/// Flushes the currently stored logs, and swaps the speculative log / event storage with None. +/// Must be called after block execution is complete (removes the storage from Arc). +pub fn flush_speculative_logs() { + match BUFFERED_LOG_EVENTS.swap(None) { + Some(log_events_ptr) => match Arc::try_unwrap(log_events_ptr) { + Ok(log_events) => log_events.flush(), + Err(_) => { + alert!("Speculative log storage must be uniquely owned to flush"); + }, + }, + None => { + alert!("Flush called on uninitialized speculative log storage"); + }, + } +} + +/// Clear speculative logs recorded for a specific transction, useful when transaction +/// execution fails validation and aborts - setting stage for the re-execution. +pub fn clear_speculative_txn_logs(txn_idx: usize) { + match &*BUFFERED_LOG_EVENTS.load() { + Some(log_events) => { + if let Err(e) = log_events.clear_txn_events(txn_idx) { + alert!("{:?}", e); + }; + }, + None => { + alert!("Clear all logs called on uninitialized speculative log storage"); + }, + } +} + +/// Combine logging and error and incrementing critical errors counter for alerting. +#[macro_export] +macro_rules! alert { + ($($args:tt)+) => { + error!($($args)+); + CRITICAL_ERRORS.inc(); + }; +} + +#[macro_export] +macro_rules! speculative_error { + ($($args:tt)+) => { + if enabled!(Level::Error) { + speculative_log(Level::Error, $($args)+); + } + }; +} + +#[macro_export] +macro_rules! speculative_warn { + ($($args:tt)+) => { + if enabled!(Level::Warn) { + speculative_log(Level::Warn, $($args)+); + } + }; +} + +#[macro_export] +macro_rules! speculative_info { + ($($args:tt)+) => { + if enabled!(Level::Info) { + speculative_log(Level::Info, $($args)+); + } + }; +} + +#[macro_export] +macro_rules! speculative_debug { + ($($args:tt)+) => { + if enabled!(Level::Debug) { + speculative_log(Level::Debug, $($args)+); + } + }; +} + +#[macro_export] +macro_rules! speculative_trace { + ($($args:tt)+) => { + if enabled!(Level::Trace) { + speculative_log(Level::Trace, $($args)+); + } + }; +} diff --git a/aptos-move/aptos-vm/src/logging.rs b/aptos-move/aptos-vm-logging/src/log_schema.rs similarity index 86% rename from aptos-move/aptos-vm/src/logging.rs rename to aptos-move/aptos-vm-logging/src/log_schema.rs index 9c2b6c5ab65fb..bd6de5ffe18a8 100644 --- a/aptos-move/aptos-vm/src/logging.rs +++ b/aptos-move/aptos-vm-logging/src/log_schema.rs @@ -2,7 +2,6 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::counters::CRITICAL_ERRORS; use aptos_crypto::HashValue; use aptos_logger::Schema; use aptos_state_view::StateViewId; @@ -27,46 +26,45 @@ pub struct AdapterLogSchema { // transaction position in the list of transactions in the block, // 0 if the transaction is not part of a block (i.e. validation). - txn_id: usize, + txn_idx: usize, } impl AdapterLogSchema { - pub fn new(view_id: StateViewId, txn_id: usize) -> Self { + pub fn new(view_id: StateViewId, txn_idx: usize) -> Self { match view_id { StateViewId::BlockExecution { block_id } => Self { name: LogEntry::Execution, block_id: Some(block_id), first_version: None, base_version: None, - txn_id, + txn_idx, }, StateViewId::ChunkExecution { first_version } => Self { name: LogEntry::Execution, block_id: None, first_version: Some(first_version), base_version: None, - txn_id, + txn_idx, }, StateViewId::TransactionValidation { base_version } => Self { name: LogEntry::Validation, block_id: None, first_version: None, base_version: Some(base_version), - txn_id, + txn_idx, }, StateViewId::Miscellaneous => Self { name: LogEntry::Miscellaneous, block_id: None, first_version: None, base_version: None, - txn_id, + txn_idx, }, } } - // Increment the `CRITICAL_ERRORS` monitor event that will fire an alert - pub fn alert(&self) { - CRITICAL_ERRORS.inc(); + pub(crate) fn get_txn_idx(&self) -> usize { + self.txn_idx } } diff --git a/aptos-move/aptos-vm/Cargo.toml b/aptos-move/aptos-vm/Cargo.toml index c1e2293ca23d1..64f1d27ce079f 100644 --- a/aptos-move/aptos-vm/Cargo.toml +++ b/aptos-move/aptos-vm/Cargo.toml @@ -25,6 +25,7 @@ aptos-metrics-core = { workspace = true } aptos-mvhashmap = { workspace = true } aptos-state-view = { workspace = true } aptos-types = { workspace = true } +aptos-vm-logging = { workspace = true } bcs = { workspace = true } dashmap = { workspace = true } fail = { workspace = true } diff --git a/aptos-move/aptos-vm/src/adapter_common.rs b/aptos-move/aptos-vm/src/adapter_common.rs index 5eabb15aa8221..6c67f12f9ac9b 100644 --- a/aptos-move/aptos-vm/src/adapter_common.rs +++ b/aptos-move/aptos-vm/src/adapter_common.rs @@ -2,10 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::{ - logging::AdapterLogSchema, - move_vm_ext::{MoveResolverExt, SessionExt, SessionId}, -}; +use crate::move_vm_ext::{MoveResolverExt, SessionExt, SessionId}; use anyhow::Result; use aptos_aggregator::transaction::TransactionOutputExt; use aptos_types::{ @@ -17,6 +14,7 @@ use aptos_types::{ vm_status::{StatusCode, VMStatus}, write_set::WriteSet, }; +use aptos_vm_logging::log_schema::AdapterLogSchema; /// This trait describes the VM adapter's interface. /// TODO: bring more of the execution logic in aptos_vm into this file. diff --git a/aptos-move/aptos-vm/src/aptos_vm.rs b/aptos-move/aptos-vm/src/aptos_vm.rs index f20b2c4c0e39b..44a94934de6ae 100644 --- a/aptos-move/aptos-vm/src/aptos_vm.rs +++ b/aptos-move/aptos-vm/src/aptos_vm.rs @@ -12,7 +12,6 @@ use crate::{ data_cache::{AsMoveResolver, IntoMoveResolver}, delta_state_view::DeltaStateView, errors::expect_only_successful_execution, - logging::AdapterLogSchema, move_vm_ext::{MoveResolverExt, SessionExt, SessionId}, system_module_names::*, transaction_metadata::TransactionMetadata, @@ -42,6 +41,7 @@ use aptos_types::{ vm_status::{AbortLocation, DiscardedVMStatus, StatusCode, VMStatus}, write_set::WriteSet, }; +use aptos_vm_logging::log_schema::AdapterLogSchema; use fail::fail_point; use move_binary_format::{ access::ModuleAccess, diff --git a/aptos-move/aptos-vm/src/aptos_vm_impl.rs b/aptos-move/aptos-vm/src/aptos_vm_impl.rs index b562af74ad1dd..c624407d15077 100644 --- a/aptos-move/aptos-vm/src/aptos_vm_impl.rs +++ b/aptos-move/aptos-vm/src/aptos_vm_impl.rs @@ -4,10 +4,8 @@ use crate::{ access_path_cache::AccessPathCache, - counters::*, data_cache::{MoveResolverWithVMMetadata, StorageAdapter}, errors::{convert_epilogue_error, convert_prologue_error, expect_only_successful_execution}, - logging::AdapterLogSchema, move_vm_ext::{MoveResolverExt, MoveVmExt, SessionExt, SessionId}, system_module_names::{MULTISIG_ACCOUNT_MODULE, VALIDATE_MULTISIG_TRANSACTION}, transaction_metadata::TransactionMetadata, @@ -18,7 +16,7 @@ use aptos_gas::{ AbstractValueSizeGasParameters, AptosGasParameters, ChangeSetConfigs, FromOnChainGasSchedule, Gas, NativeGasParameters, StorageGasParameters, }; -use aptos_logger::prelude::*; +use aptos_logger::{enabled, prelude::*, Level}; use aptos_state_view::StateView; use aptos_types::{ account_config::{TransactionValidation, APTOS_TRANSACTION_VALIDATION, CORE_CODE_ADDRESS}, @@ -30,6 +28,7 @@ use aptos_types::{ transaction::{AbortInfo, ExecutionStatus, Multisig, TransactionOutput, TransactionStatus}, vm_status::{StatusCode, VMStatus}, }; +use aptos_vm_logging::{log_schema::AdapterLogSchema, prelude::*}; use fail::fail_point; use move_binary_format::{errors::VMResult, CompiledModule}; use move_core_types::{ @@ -199,8 +198,10 @@ impl AptosVMImpl { log_context: &AdapterLogSchema, ) -> Result<&AptosGasParameters, VMStatus> { self.gas_params.as_ref().ok_or_else(|| { - log_context.alert(); - error!(*log_context, "VM Startup Failed. Gas Parameters Not Found"); + speculative_error!( + log_context, + "VM Startup Failed. Gas Parameters Not Found".into() + ); VMStatus::Error(StatusCode::VM_STARTUP_FAILURE) }) } @@ -210,10 +211,9 @@ impl AptosVMImpl { log_context: &AdapterLogSchema, ) -> Result<&StorageGasParameters, VMStatus> { self.storage_gas_params.as_ref().ok_or_else(|| { - log_context.alert(); - error!( - *log_context, - "VM Startup Failed. Storage Gas Parameters Not Found" + speculative_error!( + log_context, + "VM Startup Failed. Storage Gas Parameters Not Found".into() ); VMStatus::Error(StatusCode::VM_STARTUP_FAILURE) }) @@ -225,8 +225,7 @@ impl AptosVMImpl { pub fn get_version(&self) -> Result { self.version.clone().ok_or_else(|| { - CRITICAL_ERRORS.inc(); - error!("VM Startup Failed. Version Not Found"); + alert!("VM Startup Failed. Version Not Found"); VMStatus::Error(StatusCode::VM_STARTUP_FAILURE) }) } @@ -273,11 +272,12 @@ impl AptosVMImpl { }; if !valid { - warn!( - *log_context, - "[VM] Transaction size too big {} (max {})", - raw_bytes_len, - txn_gas_params.max_transaction_size_in_bytes, + speculative_warn!( + log_context, + format!( + "[VM] Transaction size too big {} (max {})", + raw_bytes_len, txn_gas_params.max_transaction_size_in_bytes + ), ); return Err(VMStatus::Error(StatusCode::EXCEEDED_MAX_TRANSACTION_SIZE)); } @@ -287,11 +287,13 @@ impl AptosVMImpl { // maximum number of gas units bound that we have set for any // transaction. if txn_data.max_gas_amount() > txn_gas_params.maximum_number_of_gas_units { - warn!( - *log_context, - "[VM] Gas unit error; max {}, submitted {}", - txn_gas_params.maximum_number_of_gas_units, - txn_data.max_gas_amount(), + speculative_warn!( + log_context, + format!( + "[VM] Gas unit error; max {}, submitted {}", + txn_gas_params.maximum_number_of_gas_units, + txn_data.max_gas_amount() + ), ); return Err(VMStatus::Error( StatusCode::MAX_GAS_UNITS_EXCEEDS_MAX_GAS_UNITS_BOUND, @@ -306,11 +308,13 @@ impl AptosVMImpl { .to_unit_round_up_with_params(txn_gas_params); if txn_data.max_gas_amount() < intrinsic_gas { - warn!( - *log_context, - "[VM] Gas unit error; min {}, submitted {}", - intrinsic_gas, - txn_data.max_gas_amount(), + speculative_warn!( + log_context, + format!( + "[VM] Gas unit error; min {}, submitted {}", + intrinsic_gas, + txn_data.max_gas_amount() + ), ); return Err(VMStatus::Error( StatusCode::MAX_GAS_UNITS_BELOW_MIN_TRANSACTION_GAS_UNITS, @@ -323,22 +327,26 @@ impl AptosVMImpl { #[allow(clippy::absurd_extreme_comparisons)] let below_min_bound = txn_data.gas_unit_price() < txn_gas_params.min_price_per_gas_unit; if below_min_bound { - warn!( - *log_context, - "[VM] Gas unit error; min {}, submitted {}", - txn_gas_params.min_price_per_gas_unit, - txn_data.gas_unit_price(), + speculative_warn!( + log_context, + format!( + "[VM] Gas unit error; min {}, submitted {}", + txn_gas_params.min_price_per_gas_unit, + txn_data.gas_unit_price() + ), ); return Err(VMStatus::Error(StatusCode::GAS_UNIT_PRICE_BELOW_MIN_BOUND)); } // The submitted gas price is greater than the maximum gas unit price set by the VM. if txn_data.gas_unit_price() > txn_gas_params.max_price_per_gas_unit { - warn!( - *log_context, - "[VM] Gas unit error; min {}, submitted {}", - txn_gas_params.max_price_per_gas_unit, - txn_data.gas_unit_price(), + speculative_warn!( + log_context, + format!( + "[VM] Gas unit error; min {}, submitted {}", + txn_gas_params.max_price_per_gas_unit, + txn_data.gas_unit_price() + ), ); return Err(VMStatus::Error(StatusCode::GAS_UNIT_PRICE_ABOVE_MAX_BOUND)); } diff --git a/aptos-move/aptos-vm/src/block_executor/mod.rs b/aptos-move/aptos-vm/src/block_executor/mod.rs index f90513ff2cc61..179c4c46fce86 100644 --- a/aptos-move/aptos-vm/src/block_executor/mod.rs +++ b/aptos-move/aptos-vm/src/block_executor/mod.rs @@ -28,6 +28,7 @@ use aptos_types::{ transaction::{Transaction, TransactionOutput, TransactionStatus}, write_set::{WriteOp, WriteSet, WriteSetMut}, }; +use aptos_vm_logging::{flush_speculative_logs, init_speculative_logs}; use move_core_types::vm_status::VMStatus; use rayon::prelude::*; @@ -104,6 +105,8 @@ impl BlockAptosVM { }); drop(signature_verification_timer); + init_speculative_logs(signature_verified_block.len()); + BLOCK_EXECUTOR_CONCURRENCY.set(concurrency_level as i64); let executor = BlockExecutor::, S>::new( concurrency_level, @@ -125,6 +128,8 @@ impl BlockAptosVM { }) }); + flush_speculative_logs(); + match ret { Ok(outputs) => Ok(outputs), Err(Error::ModulePathReadWrite) => { diff --git a/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs b/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs index 8b4e3aba6c260..db454764ea827 100644 --- a/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs +++ b/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs @@ -7,12 +7,12 @@ use crate::{ aptos_vm::AptosVM, block_executor::AptosTransactionOutput, data_cache::{AsMoveResolver, StorageAdapter}, - logging::AdapterLogSchema, }; use aptos_aggregator::{delta_change_set::DeltaChangeSet, transaction::TransactionOutputExt}; use aptos_block_executor::task::{ExecutionStatus, ExecutorTask}; -use aptos_logger::prelude::*; +use aptos_logger::{enabled, Level}; use aptos_state_view::StateView; +use aptos_vm_logging::{log_schema::AdapterLogSchema, prelude::*}; use move_core_types::{ ident_str, language_storage::{ModuleId, CORE_CODE_ADDRESS}, @@ -79,19 +79,26 @@ impl<'a, S: 'a + StateView + Sync> ExecutorTask for AptosExecutorTask<'a, S> { if output_ext.txn_output().status().is_discarded() { match sender { - Some(s) => trace!( - log_context, - "Transaction discarded, sender: {}, error: {:?}", - s, - vm_status, + Some(s) => speculative_trace!( + &log_context, + format!( + "Transaction discarded, sender: {}, error: {:?}", + s, vm_status + ), ), None => { - trace!(log_context, "Transaction malformed, error: {:?}", vm_status,) + speculative_trace!( + &log_context, + format!("Transaction malformed, error: {:?}", vm_status), + ) }, }; } if AptosVM::should_restart_execution(output_ext.txn_output()) { - info!(log_context, "Reconfiguration occurred: restart required",); + speculative_info!( + &log_context, + "Reconfiguration occurred: restart required".into() + ); ExecutionStatus::SkipRest(AptosTransactionOutput::new(output_ext)) } else { ExecutionStatus::Success(AptosTransactionOutput::new(output_ext)) diff --git a/aptos-move/aptos-vm/src/counters.rs b/aptos-move/aptos-vm/src/counters.rs index a85ec1950c6dc..e6dc21720c7ba 100644 --- a/aptos-move/aptos-vm/src/counters.rs +++ b/aptos-move/aptos-vm/src/counters.rs @@ -105,9 +105,3 @@ pub static TXN_VALIDATION_SECONDS: Lazy = Lazy::new(|| { pub static TXN_GAS_USAGE: Lazy = Lazy::new(|| { register_histogram!("aptos_vm_txn_gas_usage", "Gas used per transaction").unwrap() }); - -/// Count the number of critical errors. This is not intended for display -/// on a dashboard but rather for triggering alerts. -pub static CRITICAL_ERRORS: Lazy = Lazy::new(|| { - register_int_counter!("aptos_vm_critical_errors", "Number of critical errors").unwrap() -}); diff --git a/aptos-move/aptos-vm/src/errors.rs b/aptos-move/aptos-vm/src/errors.rs index 68fe88998f435..9aab829d23aa0 100644 --- a/aptos-move/aptos-vm/src/errors.rs +++ b/aptos-move/aptos-vm/src/errors.rs @@ -2,9 +2,9 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::logging::AdapterLogSchema; -use aptos_logger::prelude::*; +use aptos_logger::{enabled, Level}; use aptos_types::account_config::TransactionValidation; +use aptos_vm_logging::{log_schema::AdapterLogSchema, prelude::*}; use move_binary_format::errors::VMError; use move_core_types::vm_status::{StatusCode, VMStatus}; @@ -82,12 +82,11 @@ pub fn convert_prologue_error( StatusCode::MULTISIG_TRANSACTION_PAYLOAD_DOES_NOT_MATCH_HASH }, (category, reason) => { - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected prologue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", - location, code, category, reason, - ); + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected prologue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", + location, code, category, reason), + ); return Err(VMStatus::Error( StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION, )); @@ -119,11 +118,10 @@ pub fn convert_prologue_error( StatusCode::SECONDARY_KEYS_ADDRESSES_COUNT_MISMATCH }, (category, reason) => { - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected prologue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", - location, code, category, reason, + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected prologue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", + location, code, category, reason), ); return Err(VMStatus::Error( StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION, @@ -133,10 +131,9 @@ pub fn convert_prologue_error( VMStatus::Error(new_major_status) }, status @ VMStatus::ExecutionFailure { .. } | status @ VMStatus::Error(_) => { - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected prologue error: {:?}", status + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected prologue error: {:?}", status), ); VMStatus::Error(StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION) }, @@ -158,11 +155,10 @@ pub fn convert_epilogue_error( if !transaction_validation.is_account_module_abort(&location) => { let (category, reason) = error_split(code); - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected success epilogue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", - location, code, category, reason, + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected success epilogue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", + location, code, category, reason), ); VMStatus::Error(StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION) }, @@ -170,21 +166,19 @@ pub fn convert_epilogue_error( VMStatus::MoveAbort(location, code) => match error_split(code) { (LIMIT_EXCEEDED, ECANT_PAY_GAS_DEPOSIT) => VMStatus::MoveAbort(location, code), (category, reason) => { - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected success epilogue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", - location, code, category, reason, + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected success epilogue Move abort: {:?}::{:?} (Category: {:?} Reason: {:?})", + location, code, category, reason), ); VMStatus::Error(StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION) }, }, status => { - log_context.alert(); - error!( - *log_context, - "[aptos_vm] Unexpected success epilogue error: {:?}", status, + speculative_error!( + log_context, + format!("[aptos_vm] Unexpected success epilogue error: {:?}", status), ); VMStatus::Error(StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION) }, @@ -206,11 +200,12 @@ pub fn expect_only_successful_execution( status => { // Only trigger a warning here as some errors could be a result of the speculative parallel execution. // We will report the errors after we obtained the final transaction output in update_counters_for_processed_chunk - warn!( - *log_context, - "[aptos_vm] Unexpected error from known Move function, '{}'. Error: {:?}", - function_name, - status, + speculative_warn!( + log_context, + format!( + "[aptos_vm] Unexpected error from known Move function, '{}'. Error: {:?}", + function_name, status + ), ); VMStatus::Error(StatusCode::UNEXPECTED_ERROR_FROM_KNOWN_MOVE_FUNCTION) }, diff --git a/aptos-move/aptos-vm/src/lib.rs b/aptos-move/aptos-vm/src/lib.rs index 101df86509389..854cb2cba36a1 100644 --- a/aptos-move/aptos-vm/src/lib.rs +++ b/aptos-move/aptos-vm/src/lib.rs @@ -115,7 +115,6 @@ mod aptos_vm_impl; pub mod block_executor; mod delta_state_view; mod errors; -pub mod logging; pub mod move_vm_ext; pub mod natives; pub mod read_write_set_analysis; diff --git a/aptos-move/block-executor/Cargo.toml b/aptos-move/block-executor/Cargo.toml index a84cd89ed3d01..74c1d1599a4b0 100644 --- a/aptos-move/block-executor/Cargo.toml +++ b/aptos-move/block-executor/Cargo.toml @@ -21,6 +21,7 @@ aptos-metrics-core = { workspace = true } aptos-mvhashmap = { workspace = true } aptos-state-view = { workspace = true } aptos-types = { workspace = true } +aptos-vm-logging = { workspace = true } arc-swap = { workspace = true } bcs = { workspace = true } criterion = { workspace = true, optional = true } diff --git a/aptos-move/block-executor/src/executor.rs b/aptos-move/block-executor/src/executor.rs index 457cfe67a081c..ea03efe6c673c 100644 --- a/aptos-move/block-executor/src/executor.rs +++ b/aptos-move/block-executor/src/executor.rs @@ -19,6 +19,7 @@ use aptos_logger::debug; use aptos_mvhashmap::{MVHashMap, MVHashMapError, MVHashMapOutput}; use aptos_state_view::TStateView; use aptos_types::write_set::WriteOp; +use aptos_vm_logging::{clear_speculative_txn_logs, init_speculative_logs}; use num_cpus; use once_cell::sync::Lazy; use std::{ @@ -177,6 +178,9 @@ where if aborted { counters::SPECULATIVE_ABORT_COUNT.inc(); + // Any logs from the aborted execution should be cleared and not reported. + clear_speculative_txn_logs(idx_to_validate); + // Not valid and successfully aborted, mark the latest write/delta sets as estimates. for k in last_input_output.modified_keys(idx_to_validate) { versioned_data_cache.mark_estimate(&k, idx_to_validate); @@ -405,6 +409,10 @@ where if matches!(ret, Err(Error::ModulePathReadWrite)) { debug!("[Execution]: Module read & written, sequential fallback"); + // All logs from the parallel execution should be cleared and not reported. + // Clear by re-initializing the speculative logs. + init_speculative_logs(signature_verified_block.len()); + ret = self.execute_transactions_sequential( executor_arguments, &signature_verified_block, diff --git a/aptos-move/block-executor/src/proptest_types/types.rs b/aptos-move/block-executor/src/proptest_types/types.rs index 81c809c0e98e7..4568070426e59 100644 --- a/aptos-move/block-executor/src/proptest_types/types.rs +++ b/aptos-move/block-executor/src/proptest_types/types.rs @@ -389,7 +389,7 @@ impl> + Arbitrary + Clone + Debug + Eq + Sync + Send> Transactio impl TransactionType for Transaction where - K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + 'static, + K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + Debug + 'static, V: Debug + Send + Sync + Debug + Clone + TransactionWrite + 'static, { type Key = K; @@ -411,7 +411,7 @@ impl Task { impl ExecutorTask for Task where - K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + 'static, + K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + Debug + 'static, V: Send + Sync + Debug + Clone + TransactionWrite + 'static, { type Argument = (); @@ -467,7 +467,7 @@ pub struct Output(Vec<(K, V)>, Vec<(K, DeltaOp)>, Vec>>); impl TransactionOutput for Output where - K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + 'static, + K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + Debug + 'static, V: Send + Sync + Debug + Clone + TransactionWrite + 'static, { type Txn = Transaction; diff --git a/aptos-move/block-executor/src/task.rs b/aptos-move/block-executor/src/task.rs index 60fa868d9ca75..e2dbc10214073 100644 --- a/aptos-move/block-executor/src/task.rs +++ b/aptos-move/block-executor/src/task.rs @@ -42,7 +42,7 @@ impl ModulePath for StateKey { /// Trait that defines a transaction that could be parallel executed by the scheduler. Each /// transaction will write to a key value storage as their side effect. pub trait Transaction: Sync + Send + 'static { - type Key: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath; + type Key: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + Debug; type Value: Send + Sync + TransactionWrite; } diff --git a/aptos-move/block-executor/src/unit_tests/mod.rs b/aptos-move/block-executor/src/unit_tests/mod.rs index 5dac3fa1928a1..a04b7a44acec8 100644 --- a/aptos-move/block-executor/src/unit_tests/mod.rs +++ b/aptos-move/block-executor/src/unit_tests/mod.rs @@ -20,7 +20,7 @@ use std::{ fn run_and_assert(transactions: Vec>) where - K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + 'static, + K: PartialOrd + Ord + Send + Sync + Clone + Hash + Eq + ModulePath + Debug + 'static, V: Send + Sync + Debug + Clone + Eq + TransactionWrite + 'static, { let data_view = DeltaDataView:: { diff --git a/aptos-move/block-executor/src/view.rs b/aptos-move/block-executor/src/view.rs index ed8df81f96584..cdb5ddcdaaff8 100644 --- a/aptos-move/block-executor/src/view.rs +++ b/aptos-move/block-executor/src/view.rs @@ -9,6 +9,7 @@ use crate::{ }; use anyhow::Result; use aptos_aggregator::delta_change_set::{deserialize, serialize, DeltaOp}; +use aptos_logger::error; use aptos_mvhashmap::{MVHashMap, MVHashMapError, MVHashMapOutput}; use aptos_state_view::{StateViewId, TStateView}; use aptos_types::{ @@ -16,6 +17,7 @@ use aptos_types::{ vm_status::{StatusCode, VMStatus}, write_set::TransactionWrite, }; +use aptos_vm_logging::{log_schema::AdapterLogSchema, prelude::*}; use move_binary_format::errors::Location; use std::{cell::RefCell, collections::BTreeMap, hash::Hash, sync::Arc}; @@ -177,6 +179,22 @@ impl<'a, T: Transaction, S: TStateView> LatestView<'a, T, S> { txn_idx, } } + + fn get_base_value(&self, state_key: &T::Key) -> anyhow::Result> { + let ret = self.base_view.get_state_value(state_key); + + if ret.is_err() { + // Even speculatively, reading from base view should not return an error. + // Thus, this critical error log and count does not need to be buffered. + let log_context = AdapterLogSchema::new(self.base_view.id(), self.txn_idx); + alert!( + log_context, + "[VM, StateView] Error getting data from storage for {:?}", + state_key + ); + } + ret + } } impl<'a, T: Transaction, S: TStateView> TStateView for LatestView<'a, T, S> { @@ -199,25 +217,10 @@ impl<'a, T: Transaction, S: TStateView> TStateView for LatestView< .map_err(|pe| pe.finish(Location::Undefined).into_vm_status())?; Ok(Some(StateValue::new_legacy(serialize(&result)))) }, - ReadResult::None => self.base_view.get_state_value(state_key), + ReadResult::None => self.get_base_value(state_key), }, ViewMapKind::BTree(map) => map.get(state_key).map_or_else( - || { - // let ret = - self.base_view.get_state_value(state_key) - - // TODO: common treatment with the above case. - // TODO: enable below when logging isn't a circular dependency. - // Even speculatively, reading from base view should not return an error. - // let log_context = AdapterLogSchema::new(self.base_view.id(), self.txn_idx); - // error!( - // log_context, - // "[VM, StateView] Error getting data from storage for {:?}", state_key - // ); - // Alert (increase critical error count). - // log_context.alert(); - // ret - }, + || self.get_base_value(state_key), |v| Ok(v.as_state_value()), ), } diff --git a/aptos-move/e2e-testsuite/Cargo.toml b/aptos-move/e2e-testsuite/Cargo.toml index d55bac44ef268..79531ffd65522 100644 --- a/aptos-move/e2e-testsuite/Cargo.toml +++ b/aptos-move/e2e-testsuite/Cargo.toml @@ -25,6 +25,7 @@ aptos-state-view = { workspace = true } aptos-types = { workspace = true } aptos-vm = { workspace = true } aptos-vm-genesis = { workspace = true } +aptos-vm-logging = { workspace = true } aptos-writeset-generator = { workspace = true } bcs = { workspace = true } itertools = { workspace = true } diff --git a/aptos-move/e2e-testsuite/src/tests/failed_transaction_tests.rs b/aptos-move/e2e-testsuite/src/tests/failed_transaction_tests.rs index c90b6d919957e..d3e4b288abd84 100644 --- a/aptos-move/e2e-testsuite/src/tests/failed_transaction_tests.rs +++ b/aptos-move/e2e-testsuite/src/tests/failed_transaction_tests.rs @@ -11,10 +11,8 @@ use aptos_types::{ transaction::ExecutionStatus, vm_status::{StatusCode, VMStatus}, }; -use aptos_vm::{ - data_cache::AsMoveResolver, logging::AdapterLogSchema, - transaction_metadata::TransactionMetadata, AptosVM, -}; +use aptos_vm::{data_cache::AsMoveResolver, transaction_metadata::TransactionMetadata, AptosVM}; +use aptos_vm_logging::log_schema::AdapterLogSchema; use move_core_types::vm_status::StatusCode::TYPE_MISMATCH; #[test] diff --git a/crates/aptos-speculative-state-helper/src/lib.rs b/crates/aptos-speculative-state-helper/src/lib.rs index f36e3d01dda56..04c55cab679dc 100644 --- a/crates/aptos-speculative-state-helper/src/lib.rs +++ b/crates/aptos-speculative-state-helper/src/lib.rs @@ -86,14 +86,14 @@ impl SpeculativeEvents { Ok(()) } - /// Logically clears events recorded so far for a given transaction. + /// Clears events recorded so far for a given transaction. pub fn clear_txn_events(&self, txn_idx: usize) -> anyhow::Result<()> { let events = self.events_with_checked_length(txn_idx + 1)?; events[txn_idx].lock().clear(); Ok(()) } - /// Logically clears all events. + /// Clears all events. pub fn clear_all_events(&self) { // TODO: Parallelize if needed. for event in &self.events {