From 18bcbd22ecadb818a917ca68b0fe0d3af991ebc8 Mon Sep 17 00:00:00 2001 From: Rati Gelashvili Date: Wed, 2 Oct 2024 22:44:50 +0400 Subject: [PATCH] DeltaApplicationError & predicted value fixes, error fallbacks (#14422) * DeltaApplicationError & latest predicted value fixes, fallbacks to sequential * addressing comments --- Cargo.lock | 2 +- aptos-move/aptos-aggregator/Cargo.toml | 1 - .../src/aggregator_v1_extension.rs | 3 +- .../aptos-aggregator/src/delayed_change.rs | 7 +- .../src/delayed_field_extension.rs | 14 +-- .../aptos-aggregator/src/delta_change_set.rs | 7 +- aptos-move/aptos-aggregator/src/delta_math.rs | 5 +- aptos-move/aptos-aggregator/src/resolver.rs | 7 +- .../aptos-aggregator/src/tests/types.rs | 6 +- aptos-move/aptos-aggregator/src/types.rs | 75 +----------- aptos-move/aptos-vm-types/src/change_set.rs | 5 +- aptos-move/aptos-vm-types/src/output.rs | 5 +- .../src/tests/test_change_set.rs | 5 +- aptos-move/aptos-vm/src/block_executor/mod.rs | 4 +- .../aptos-vm/src/block_executor/vm_wrapper.rs | 4 +- aptos-move/aptos-vm/src/data_cache.rs | 4 +- aptos-move/aptos-vm/src/errors.rs | 6 +- .../session/view_with_change_set.rs | 6 +- aptos-move/aptos-vm/src/natives.rs | 7 +- .../block-executor/src/captured_reads.rs | 109 ++++++++++++------ aptos-move/block-executor/src/errors.rs | 6 +- aptos-move/block-executor/src/executor.rs | 31 ++++- .../block-executor/src/executor_utilities.rs | 8 +- .../src/proptest_types/types.rs | 2 +- aptos-move/block-executor/src/scheduler.rs | 3 +- aptos-move/block-executor/src/task.rs | 2 +- .../src/txn_last_input_output.rs | 36 +++--- .../block-executor/src/value_exchange.rs | 6 +- aptos-move/block-executor/src/view.rs | 24 ++-- .../aggregator_natives/aggregator_v2.rs | 10 +- aptos-move/mvhashmap/src/types.rs | 6 +- aptos-move/mvhashmap/src/unsync_map.rs | 4 +- .../mvhashmap/src/versioned_delayed_fields.rs | 100 ++++++++++++---- .../move/move-core/types/src/vm_status.rs | 16 ++- .../move-vm/types/src/delayed_values/error.rs | 3 +- types/Cargo.toml | 1 + types/src/delayed_fields.rs | 31 +---- types/src/error.rs | 105 ++++++++++++++++- 38 files changed, 402 insertions(+), 274 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3f5b01055359..6a4b1d2d960cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,7 +405,6 @@ dependencies = [ name = "aptos-aggregator" version = "0.1.0" dependencies = [ - "aptos-logger", "aptos-types", "bcs 0.1.4", "claims", @@ -4348,6 +4347,7 @@ dependencies = [ "strum_macros 0.24.3", "thiserror", "tokio", + "tracing", "url", ] diff --git a/aptos-move/aptos-aggregator/Cargo.toml b/aptos-move/aptos-aggregator/Cargo.toml index 3152a494a97a6..6c73b0b750b5a 100644 --- a/aptos-move/aptos-aggregator/Cargo.toml +++ b/aptos-move/aptos-aggregator/Cargo.toml @@ -13,7 +13,6 @@ repository = { workspace = true } rust-version = { workspace = true } [dependencies] -aptos-logger = { workspace = true } aptos-types = { workspace = true } bcs = { workspace = true } claims = { workspace = true } diff --git a/aptos-move/aptos-aggregator/src/aggregator_v1_extension.rs b/aptos-move/aptos-aggregator/src/aggregator_v1_extension.rs index 4c4b3573ff2d5..6f51b30b5e983 100644 --- a/aptos-move/aptos-aggregator/src/aggregator_v1_extension.rs +++ b/aptos-move/aptos-aggregator/src/aggregator_v1_extension.rs @@ -5,9 +5,10 @@ use crate::{ bounded_math::{BoundedMath, SignedU128}, delta_math::DeltaHistory, resolver::AggregatorV1Resolver, - types::{expect_ok, DelayedFieldsSpeculativeError, DeltaApplicationFailureReason}, + types::{DelayedFieldsSpeculativeError, DeltaApplicationFailureReason}, }; use aptos_types::{ + error::expect_ok, state_store::{state_key::StateKey, table::TableHandle}, PeerId, }; diff --git a/aptos-move/aptos-aggregator/src/delayed_change.rs b/aptos-move/aptos-aggregator/src/delayed_change.rs index 4535d891bd644..64344255b32e0 100644 --- a/aptos-move/aptos-aggregator/src/delayed_change.rs +++ b/aptos-move/aptos-aggregator/src/delayed_change.rs @@ -3,9 +3,12 @@ use crate::{ delta_change_set::{DeltaOp, DeltaWithMax}, - types::{code_invariant_error, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr}, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError}, +}; +use aptos_types::{ + delayed_fields::SnapshotToStringFormula, + error::{code_invariant_error, PanicOr}, }; -use aptos_types::delayed_fields::SnapshotToStringFormula; #[derive(Clone, Debug, Eq, PartialEq)] pub enum DelayedApplyChange { diff --git a/aptos-move/aptos-aggregator/src/delayed_field_extension.rs b/aptos-move/aptos-aggregator/src/delayed_field_extension.rs index 9a67226fc335b..1451af79fcec4 100644 --- a/aptos-move/aptos-aggregator/src/delayed_field_extension.rs +++ b/aptos-move/aptos-aggregator/src/delayed_field_extension.rs @@ -6,14 +6,14 @@ use crate::{ delayed_change::{ApplyBase, DelayedApplyChange, DelayedChange}, delta_change_set::DeltaWithMax, resolver::DelayedFieldResolver, - types::{ - code_invariant_error, expect_ok, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, - ReadPosition, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError, ReadPosition}, }; -use aptos_types::delayed_fields::{ - calculate_width_for_constant_string, calculate_width_for_integer_embedded_string, - SnapshotToStringFormula, +use aptos_types::{ + delayed_fields::{ + calculate_width_for_constant_string, calculate_width_for_integer_embedded_string, + SnapshotToStringFormula, + }, + error::{code_invariant_error, expect_ok, PanicOr}, }; use move_binary_format::errors::PartialVMResult; use move_vm_types::delayed_values::delayed_field_id::DelayedFieldID; diff --git a/aptos-move/aptos-aggregator/src/delta_change_set.rs b/aptos-move/aptos-aggregator/src/delta_change_set.rs index a1bea4eb752e3..ab7a616128962 100644 --- a/aptos-move/aptos-aggregator/src/delta_change_set.rs +++ b/aptos-move/aptos-aggregator/src/delta_change_set.rs @@ -8,10 +8,9 @@ use crate::{ bounded_math::{BoundedMath, SignedU128}, delta_math::{merge_data_and_delta, merge_two_deltas, DeltaHistory}, - types::{ - code_invariant_error, DelayedFieldsSpeculativeError, DeltaApplicationFailureReason, PanicOr, - }, + types::{DelayedFieldsSpeculativeError, DeltaApplicationFailureReason}, }; +use aptos_types::error::{code_invariant_error, PanicOr}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct DeltaWithMax { @@ -219,7 +218,7 @@ mod test { FakeAggregatorView, }; use aptos_types::{ - delayed_fields::PanicError, + error::PanicError, state_store::{ state_key::StateKey, state_value::{StateValue, StateValueMetadata}, diff --git a/aptos-move/aptos-aggregator/src/delta_math.rs b/aptos-move/aptos-aggregator/src/delta_math.rs index 7078b03223a88..b4e079ad73bcf 100644 --- a/aptos-move/aptos-aggregator/src/delta_math.rs +++ b/aptos-move/aptos-aggregator/src/delta_math.rs @@ -4,10 +4,11 @@ use crate::{ bounded_math::{ok_overflow, ok_underflow, BoundedMath, SignedU128}, types::{ - expect_ok, DelayedFieldsSpeculativeError, DeltaApplicationFailureReason, - DeltaHistoryMergeOffsetFailureReason, PanicOr, + DelayedFieldsSpeculativeError, DeltaApplicationFailureReason, + DeltaHistoryMergeOffsetFailureReason, }, }; +use aptos_types::error::{expect_ok, PanicOr}; /// Tracks values seen by aggregator. In particular, stores information about /// the biggest and the smallest deltas that were applied successfully during diff --git a/aptos-move/aptos-aggregator/src/resolver.rs b/aptos-move/aptos-aggregator/src/resolver.rs index c15c6542f0517..0f522753392a1 100644 --- a/aptos-move/aptos-aggregator/src/resolver.rs +++ b/aptos-move/aptos-aggregator/src/resolver.rs @@ -5,13 +5,10 @@ use crate::{ aggregator_v1_extension::{addition_v1_error, subtraction_v1_error}, bounded_math::SignedU128, delta_change_set::{serialize, DeltaOp}, - types::{ - code_invariant_error, DelayedFieldValue, DelayedFieldsSpeculativeError, - DeltaApplicationFailureReason, PanicOr, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError, DeltaApplicationFailureReason}, }; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, PanicError, PanicOr}, state_store::{ state_key::StateKey, state_value::{StateValue, StateValueMetadata}, diff --git a/aptos-move/aptos-aggregator/src/tests/types.rs b/aptos-move/aptos-aggregator/src/tests/types.rs index 6cae8d5d36fc3..18096caae79b1 100644 --- a/aptos-move/aptos-aggregator/src/tests/types.rs +++ b/aptos-move/aptos-aggregator/src/tests/types.rs @@ -6,12 +6,10 @@ use crate::{ bounded_math::{BoundedMath, SignedU128}, delta_change_set::serialize, resolver::{TAggregatorV1View, TDelayedFieldView}, - types::{ - code_invariant_error, expect_ok, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError}, }; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, expect_ok, PanicError, PanicOr}, state_store::{ state_key::StateKey, state_value::{StateValue, StateValueMetadata}, diff --git a/aptos-move/aptos-aggregator/src/types.rs b/aptos-move/aptos-aggregator/src/types.rs index 81b99d6a2445b..9571c0fb9819e 100644 --- a/aptos-move/aptos-aggregator/src/types.rs +++ b/aptos-move/aptos-aggregator/src/types.rs @@ -2,8 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::bounded_math::SignedU128; -use aptos_logger::error; -use aptos_types::delayed_fields::PanicError; +use aptos_types::error::{code_invariant_error, NonPanic, PanicError, PanicOr}; use move_binary_format::errors::PartialVMError; use move_core_types::{ value::{IdentifierMappingKind, MoveTypeLayout}, @@ -20,84 +19,12 @@ use move_vm_types::{ values::{Struct, Value}, }; -// Wrapping another error, to add a variant that represents -// something that should never happen - i.e. a code invariant error, -// which we would generally just panic, but since we are inside of the VM, -// we cannot do that. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum PanicOr { - CodeInvariantError(String), - Or(T), -} - -impl PanicOr { - pub fn map_non_panic(self, f: impl FnOnce(T) -> E) -> PanicOr { - match self { - PanicOr::CodeInvariantError(msg) => PanicOr::CodeInvariantError(msg), - PanicOr::Or(value) => PanicOr::Or(f(value)), - } - } -} - -pub fn code_invariant_error(message: M) -> PanicError { - let msg = format!( - "Delayed materialization code invariant broken (there is a bug in the code), {:?}", - message - ); - error!("{}", msg); - PanicError::CodeInvariantError(msg) -} - -pub fn expect_ok(value: Result) -> Result { - value.map_err(|e| code_invariant_error(format!("Expected Ok, got Err({:?})", e))) -} - -impl From for PanicOr { - fn from(err: PanicError) -> Self { - match err { - PanicError::CodeInvariantError(e) => PanicOr::CodeInvariantError(e), - } - } -} - -pub trait NonPanic {} - -impl From for PanicOr { - fn from(err: T) -> Self { - PanicOr::Or(err) - } -} - impl From for PartialVMError { fn from(err: DelayedFieldsSpeculativeError) -> Self { PartialVMError::from(PanicOr::from(err)) } } -impl From<&PanicOr> for StatusCode { - fn from(err: &PanicOr) -> Self { - match err { - PanicOr::CodeInvariantError(_) => { - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR - }, - PanicOr::Or(_) => StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR, - } - } -} - -impl From> for PartialVMError { - fn from(err: PanicOr) -> Self { - match err { - PanicOr::CodeInvariantError(msg) => { - PartialVMError::new(StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR) - .with_message(msg) - }, - PanicOr::Or(err) => PartialVMError::new(StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR) - .with_message(format!("{:?}", err)), - } - } -} - /// Different reasons for why applying new start_value doesn't /// satisfy history bounds #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/aptos-move/aptos-vm-types/src/change_set.rs b/aptos-move/aptos-vm-types/src/change_set.rs index 3c2ca78492403..6c539b6b9045e 100644 --- a/aptos-move/aptos-vm-types/src/change_set.rs +++ b/aptos-move/aptos-vm-types/src/change_set.rs @@ -13,11 +13,10 @@ use aptos_aggregator::{ delayed_change::DelayedChange, delta_change_set::{serialize, DeltaOp}, resolver::AggregatorV1Resolver, - types::code_invariant_error, }; use aptos_types::{ contract_event::ContractEvent, - delayed_fields::PanicError, + error::{code_invariant_error, PanicError}, state_store::{ state_key::{inner::StateKeyInner, StateKey}, state_value::StateValueMetadata, @@ -193,7 +192,7 @@ impl VMChangeSet { let (key, value) = element?; if acc.insert(key, value).is_some() { Err(PartialVMError::new( - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, ) .with_message( "Found duplicate key across resource change sets.".to_string(), diff --git a/aptos-move/aptos-vm-types/src/output.rs b/aptos-move/aptos-vm-types/src/output.rs index 22bf8b0af5a2a..80db4b7e778f0 100644 --- a/aptos-move/aptos-vm-types/src/output.rs +++ b/aptos-move/aptos-vm-types/src/output.rs @@ -8,11 +8,10 @@ use crate::{ }; use aptos_aggregator::{ delayed_change::DelayedChange, delta_change_set::DeltaOp, resolver::AggregatorV1Resolver, - types::code_invariant_error, }; use aptos_types::{ contract_event::ContractEvent, - delayed_fields::PanicError, + error::{code_invariant_error, PanicError}, fee_statement::FeeStatement, state_store::state_key::StateKey, transaction::{TransactionAuxiliaryData, TransactionOutput, TransactionStatus}, @@ -167,7 +166,7 @@ impl VMOutput { self.try_materialize(resolver)?; self.into_transaction_output().map_err(|e| { VMStatus::error( - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, Some(e.to_string()), ) }) diff --git a/aptos-move/aptos-vm-types/src/tests/test_change_set.rs b/aptos-move/aptos-vm-types/src/tests/test_change_set.rs index af095be82cd8e..2d887e12e5c80 100644 --- a/aptos-move/aptos-vm-types/src/tests/test_change_set.rs +++ b/aptos-move/aptos-vm-types/src/tests/test_change_set.rs @@ -22,7 +22,8 @@ use aptos_aggregator::{ delta_change_set::DeltaWithMax, }; use aptos_types::{ - delayed_fields::{PanicError, SnapshotToStringFormula}, + delayed_fields::SnapshotToStringFormula, + error::PanicError, state_store::{state_key::StateKey, state_value::StateValueMetadata}, transaction::ChangeSet as StorageChangeSet, write_set::{WriteOp, WriteSetMut}, @@ -199,7 +200,7 @@ macro_rules! assert_invariant_violation { assert!( err.major_status() == StatusCode::UNKNOWN_INVARIANT_VIOLATION_ERROR || err.major_status() - == StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR + == StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR ); }; diff --git a/aptos-move/aptos-vm/src/block_executor/mod.rs b/aptos-move/aptos-vm/src/block_executor/mod.rs index 08f3dcd0e3ce9..47a5622ecca58 100644 --- a/aptos-move/aptos-vm/src/block_executor/mod.rs +++ b/aptos-move/aptos-vm/src/block_executor/mod.rs @@ -20,7 +20,7 @@ use aptos_infallible::Mutex; use aptos_types::{ block_executor::config::BlockExecutorConfig, contract_event::ContractEvent, - delayed_fields::PanicError, + error::PanicError, executable::ExecutableTestType, fee_statement::FeeStatement, state_store::{state_key::StateKey, state_value::StateValueMetadata, StateView, StateViewId}, @@ -436,7 +436,7 @@ impl BlockAptosVM { Err(BlockExecutionError::FatalBlockExecutorError(PanicError::CodeInvariantError( err_msg, ))) => Err(VMStatus::Error { - status_code: StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + status_code: StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, sub_status: None, message: Some(err_msg), }), diff --git a/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs b/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs index 058ea9de2553d..85e7a7bee305a 100644 --- a/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs +++ b/aptos-move/aptos-vm/src/block_executor/vm_wrapper.rs @@ -71,7 +71,7 @@ impl ExecutorTask for AptosExecutorTask { vm_status.message().cloned().unwrap_or_default(), ) } else if vm_status.status_code() - == StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR + == StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR { ExecutionStatus::DelayedFieldsCodeInvariantError( vm_status.message().cloned().unwrap_or_default(), @@ -98,7 +98,7 @@ impl ExecutorTask for AptosExecutorTask { err.message().cloned().unwrap_or_default(), ) } else if err.status_code() - == StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR + == StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR { ExecutionStatus::DelayedFieldsCodeInvariantError( err.message().cloned().unwrap_or_default(), diff --git a/aptos-move/aptos-vm/src/data_cache.rs b/aptos-move/aptos-vm/src/data_cache.rs index 7861880b40479..fafd7cb646e4c 100644 --- a/aptos-move/aptos-vm/src/data_cache.rs +++ b/aptos-move/aptos-vm/src/data_cache.rs @@ -13,11 +13,11 @@ use crate::{ use aptos_aggregator::{ bounded_math::SignedU128, resolver::{TAggregatorV1View, TDelayedFieldView}, - types::{DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr}, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError}, }; use aptos_table_natives::{TableHandle, TableResolver}; use aptos_types::{ - delayed_fields::PanicError, + error::{PanicError, PanicOr}, on_chain_config::{ConfigStorage, Features, OnChainConfig}, state_store::{ errors::StateviewError, diff --git a/aptos-move/aptos-vm/src/errors.rs b/aptos-move/aptos-vm/src/errors.rs index f054a7c819222..695fe6166dd9e 100644 --- a/aptos-move/aptos-vm/src/errors.rs +++ b/aptos-move/aptos-vm/src/errors.rs @@ -151,7 +151,7 @@ pub fn convert_prologue_error( e @ VMStatus::Error { status_code: StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR - | StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + | StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, .. } => e, status @ VMStatus::ExecutionFailure { .. } | status @ VMStatus::Error { .. } => { @@ -207,7 +207,7 @@ pub fn convert_epilogue_error( e @ VMStatus::Error { status_code: StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR - | StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + | StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, .. } => e, status => { @@ -237,7 +237,7 @@ pub fn expect_only_successful_execution( e @ VMStatus::Error { status_code: StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR - | StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + | StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, .. } => e, status => { diff --git a/aptos-move/aptos-vm/src/move_vm_ext/session/view_with_change_set.rs b/aptos-move/aptos-vm/src/move_vm_ext/session/view_with_change_set.rs index 50e94ddba816e..06862df40c7aa 100644 --- a/aptos-move/aptos-vm/src/move_vm_ext/session/view_with_change_set.rs +++ b/aptos-move/aptos-vm/src/move_vm_ext/session/view_with_change_set.rs @@ -6,12 +6,10 @@ use aptos_aggregator::{ delayed_change::{ApplyBase, DelayedApplyChange, DelayedChange}, delta_change_set::DeltaWithMax, resolver::{TAggregatorV1View, TDelayedFieldView}, - types::{ - code_invariant_error, expect_ok, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError}, }; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, expect_ok, PanicError, PanicOr}, state_store::{ errors::StateviewError, state_key::StateKey, diff --git a/aptos-move/aptos-vm/src/natives.rs b/aptos-move/aptos-vm/src/natives.rs index 4fb5a39b11273..25515277c7bab 100644 --- a/aptos-move/aptos-vm/src/natives.rs +++ b/aptos-move/aptos-vm/src/natives.rs @@ -5,10 +5,7 @@ #[cfg(feature = "testing")] use aptos_aggregator::resolver::TAggregatorV1View; #[cfg(feature = "testing")] -use aptos_aggregator::{ - bounded_math::SignedU128, - types::{DelayedFieldsSpeculativeError, PanicOr}, -}; +use aptos_aggregator::{bounded_math::SignedU128, types::DelayedFieldsSpeculativeError}; #[cfg(feature = "testing")] use aptos_aggregator::{resolver::TDelayedFieldView, types::DelayedFieldValue}; #[cfg(feature = "testing")] @@ -26,7 +23,7 @@ use aptos_types::{ #[cfg(feature = "testing")] use aptos_types::{ chain_id::ChainId, - delayed_fields::PanicError, + error::{PanicError, PanicOr}, state_store::{ state_key::StateKey, state_value::{StateValue, StateValueMetadata}, diff --git a/aptos-move/block-executor/src/captured_reads.rs b/aptos-move/block-executor/src/captured_reads.rs index aaec491043d24..58176d7e4fb61 100644 --- a/aptos-move/block-executor/src/captured_reads.rs +++ b/aptos-move/block-executor/src/captured_reads.rs @@ -5,10 +5,7 @@ use crate::{types::InputOutputKey, value_exchange::filter_value_for_exchange}; use anyhow::bail; use aptos_aggregator::{ delta_math::DeltaHistory, - types::{ - code_invariant_error, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, - ReadPosition, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError, ReadPosition}, }; use aptos_mvhashmap::{ types::{ @@ -20,8 +17,10 @@ use aptos_mvhashmap::{ versioned_group_data::VersionedGroupData, }; use aptos_types::{ - delayed_fields::PanicError, state_store::state_value::StateValueMetadata, - transaction::BlockExecutableTransaction as Transaction, write_set::TransactionWrite, + error::{code_invariant_error, PanicError, PanicOr}, + state_store::state_value::StateValueMetadata, + transaction::BlockExecutableTransaction as Transaction, + write_set::TransactionWrite, }; use aptos_vm_types::resolver::ResourceGroupSize; use derivative::Derivative; @@ -304,12 +303,13 @@ pub(crate) struct CapturedReads { delayed_field_reads: HashMap, - /// If there is a speculative failure (e.g. delta application failure, or an - /// observed inconsistency), the transaction output is irrelevant (must be - /// discarded and transaction re-executed). We have a global flag, as which - /// read observed the inconsistency is irrelevant (moreover, typically, - /// an error is returned to the VM to wrap up the ongoing execution). - speculative_failure: bool, + /// If there is a speculative failure (e.g. delta application failure, or an observed + /// inconsistency), the transaction output is irrelevant (must be discarded and transaction + /// re-executed). We have two global flags, one for speculative failures regarding + /// delayed fields, and the second for all other speculative failures, because these + /// require different validation behavior (delayed fields are validated commit-time). + delayed_field_speculative_failure: bool, + non_delayed_field_speculative_failure: bool, /// Set if the invarint on CapturedReads intended use is violated. Leads to an alert /// and sequential execution fallback. incorrect_use: bool, @@ -444,7 +444,7 @@ impl CapturedReads { }, UpdateResult::Inconsistency(m) => { // Record speculative failure. - self.speculative_failure = true; + self.non_delayed_field_speculative_failure = true; bail!(m); }, UpdateResult::Updated | UpdateResult::Inserted => Ok(()), @@ -521,7 +521,7 @@ impl CapturedReads { }, UpdateResult::Inconsistency(_) => { // Record speculative failure. - self.speculative_failure = true; + self.delayed_field_speculative_failure = true; Err(PanicOr::Or(DelayedFieldsSpeculativeError::InconsistentRead)) }, UpdateResult::Updated | UpdateResult::Inserted => Ok(()), @@ -531,7 +531,7 @@ impl CapturedReads { pub(crate) fn capture_delayed_field_read_error(&mut self, e: &PanicOr) { match e { PanicOr::CodeInvariantError(_) => self.incorrect_use = true, - PanicOr::Or(_) => self.speculative_failure = true, + PanicOr::Or(_) => self.delayed_field_speculative_failure = true, }; } @@ -554,7 +554,7 @@ impl CapturedReads { data_map: &VersionedData, idx_to_validate: TxnIndex, ) -> bool { - if self.speculative_failure { + if self.non_delayed_field_speculative_failure { return false; } @@ -590,7 +590,7 @@ impl CapturedReads { ) -> bool { use MVGroupError::*; - if self.speculative_failure { + if self.non_delayed_field_speculative_failure { return false; } @@ -631,19 +631,19 @@ impl CapturedReads { } // This validation needs to be called at commit time - // (as it internally uses read_latest_committed_value to get the current value). + // (as it internally uses read_latest_predicted_value to get the current value). pub(crate) fn validate_delayed_field_reads( &self, delayed_fields: &dyn TVersionedDelayedFieldView, idx_to_validate: TxnIndex, ) -> Result { - if self.speculative_failure { + if self.delayed_field_speculative_failure { return Ok(false); } use MVDelayedFieldsError::*; for (id, read_value) in &self.delayed_field_reads { - match delayed_fields.read_latest_committed_value( + match delayed_fields.read_latest_predicted_value( id, idx_to_validate, ReadPosition::BeforeCurrentTxn, @@ -707,8 +707,12 @@ impl CapturedReads { ret } - pub(crate) fn mark_failure(&mut self) { - self.speculative_failure = true; + pub(crate) fn mark_failure(&mut self, delayed_field_failure: bool) { + if delayed_field_failure { + self.delayed_field_speculative_failure = true; + } else { + self.non_delayed_field_speculative_failure = true; + } } pub(crate) fn mark_incorrect_use(&mut self) { @@ -756,8 +760,11 @@ impl UnsyncReadSet { mod test { use super::*; use crate::proptest_types::types::{raw_metadata, KeyType, MockEvent, ValueType}; - use aptos_mvhashmap::types::StorageVersion; - use claims::{assert_err, assert_gt, assert_matches, assert_none, assert_ok, assert_some_eq}; + use aptos_mvhashmap::{types::StorageVersion, MVHashMap}; + use aptos_types::executable::ExecutableTestType; + use claims::{ + assert_err, assert_gt, assert_matches, assert_none, assert_ok, assert_ok_eq, assert_some_eq, + }; use move_vm_types::delayed_values::delayed_field_id::DelayedFieldID; use test_case::test_case; @@ -1268,7 +1275,8 @@ mod test { let deletion_metadata = DataRead::Metadata(None); let exists = DataRead::Exists(true); - assert!(!captured_reads.speculative_failure); + assert!(!captured_reads.non_delayed_field_speculative_failure); + assert!(!captured_reads.delayed_field_speculative_failure); let key = KeyType::(20, false); assert_ok!(captured_reads.capture_read(key, use_tag.then_some(30), exists)); assert_err!(captured_reads.capture_read( @@ -1276,22 +1284,57 @@ mod test { use_tag.then_some(30), deletion_metadata.clone() )); - assert!(captured_reads.speculative_failure); + assert!(captured_reads.non_delayed_field_speculative_failure); + assert!(!captured_reads.delayed_field_speculative_failure); + + let mvhashmap = + MVHashMap::, u32, ValueType, ExecutableTestType, DelayedFieldID>::new(); - captured_reads.speculative_failure = false; + captured_reads.non_delayed_field_speculative_failure = false; + captured_reads.delayed_field_speculative_failure = false; let key = KeyType::(21, false); assert_ok!(captured_reads.capture_read(key, use_tag.then_some(30), deletion_metadata)); assert_err!(captured_reads.capture_read(key, use_tag.then_some(30), resolved)); - assert!(captured_reads.speculative_failure); + assert!(captured_reads.non_delayed_field_speculative_failure); + assert!(!captured_reads.validate_data_reads(mvhashmap.data(), 0)); + assert!(!captured_reads.validate_group_reads(mvhashmap.group_data(), 0)); + assert!(!captured_reads.delayed_field_speculative_failure); + assert_ok_eq!( + captured_reads.validate_delayed_field_reads(mvhashmap.delayed_fields(), 0), + true + ); - captured_reads.speculative_failure = false; + captured_reads.non_delayed_field_speculative_failure = false; + captured_reads.delayed_field_speculative_failure = false; let key = KeyType::(22, false); assert_ok!(captured_reads.capture_read(key, use_tag.then_some(30), metadata)); assert_err!(captured_reads.capture_read(key, use_tag.then_some(30), versioned_legacy)); - assert!(captured_reads.speculative_failure); + assert!(captured_reads.non_delayed_field_speculative_failure); + assert!(!captured_reads.delayed_field_speculative_failure); + + let mut captured_reads = CapturedReads::::new(); + captured_reads.non_delayed_field_speculative_failure = false; + captured_reads.delayed_field_speculative_failure = false; + captured_reads.mark_failure(true); + assert!(!captured_reads.non_delayed_field_speculative_failure); + assert!(captured_reads.validate_data_reads(mvhashmap.data(), 0)); + assert!(captured_reads.validate_group_reads(mvhashmap.group_data(), 0)); + assert!(captured_reads.delayed_field_speculative_failure); + assert_ok_eq!( + captured_reads.validate_delayed_field_reads(mvhashmap.delayed_fields(), 0), + false + ); - captured_reads.speculative_failure = false; - captured_reads.mark_failure(); - assert!(captured_reads.speculative_failure); + captured_reads.mark_failure(true); + assert!(!captured_reads.non_delayed_field_speculative_failure); + assert!(captured_reads.delayed_field_speculative_failure); + + captured_reads.delayed_field_speculative_failure = false; + captured_reads.mark_failure(false); + assert!(captured_reads.non_delayed_field_speculative_failure); + assert!(!captured_reads.delayed_field_speculative_failure); + captured_reads.mark_failure(true); + assert!(captured_reads.non_delayed_field_speculative_failure); + assert!(captured_reads.delayed_field_speculative_failure); } } diff --git a/aptos-move/block-executor/src/errors.rs b/aptos-move/block-executor/src/errors.rs index ec1c35724b61e..6e74abe88f027 100644 --- a/aptos-move/block-executor/src/errors.rs +++ b/aptos-move/block-executor/src/errors.rs @@ -2,7 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use aptos_types::delayed_fields::PanicError; +use aptos_types::error::PanicError; #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum ParallelBlockExecutionError { @@ -13,6 +13,10 @@ pub(crate) enum ParallelBlockExecutionError { ModulePathReadWriteError, /// unrecoverable VM error FatalVMError, + // Incarnation number that is higher than a threshold is observed during parallel execution. + // This might be indicative of some sort of livelock, or at least some sort of inefficiency + // that would warrants investigating the root cause. Execution can fallback to sequential. + IncarnationTooHigh, } // This is separate error because we need to match the error variant to provide a specialized diff --git a/aptos-move/block-executor/src/executor.rs b/aptos-move/block-executor/src/executor.rs index cde73c85b7992..6b67ac0b2e95b 100644 --- a/aptos-move/block-executor/src/executor.rs +++ b/aptos-move/block-executor/src/executor.rs @@ -22,7 +22,6 @@ use crate::{ use aptos_aggregator::{ delayed_change::{ApplyBase, DelayedChange}, delta_change_set::serialize, - types::{code_invariant_error, expect_ok, PanicOr}, }; use aptos_drop_helper::DEFAULT_DROPPER; use aptos_logger::{debug, error, info}; @@ -34,7 +33,7 @@ use aptos_mvhashmap::{ }; use aptos_types::{ block_executor::config::BlockExecutorConfig, - delayed_fields::PanicError, + error::{code_invariant_error, expect_ok, PanicError, PanicOr}, executable::Executable, on_chain_config::BlockGasLimitType, state_store::{state_value::StateValue, TStateView}, @@ -543,7 +542,7 @@ where && block_limit_processor.should_end_block_parallel() { // Set the execution output status to be SkipRest, to skip the rest of the txns. - last_input_output.update_to_skip_rest(txn_idx); + last_input_output.update_to_skip_rest(txn_idx)?; } } @@ -732,7 +731,7 @@ where } let mut final_results = final_results.acquire(); - match last_input_output.take_output(txn_idx) { + match last_input_output.take_output(txn_idx)? { ExecutionStatus::Success(t) | ExecutionStatus::SkipRest(t) => { final_results[txn_idx as usize] = t; }, @@ -761,6 +760,7 @@ where num_workers: usize, ) -> Result<(), PanicOr> { // Make executor for each task. TODO: fast concurrent executor. + let num_txns = block.len(); let init_timer = VM_INIT_SECONDS.start_timer(); let executor = E::init(env.clone(), base_view); drop(init_timer); @@ -785,6 +785,16 @@ where }; loop { + if let SchedulerTask::ValidationTask(txn_idx, incarnation, _) = &scheduler_task { + if *incarnation as usize > num_workers.pow(2) + num_txns + 10 { + // Something is wrong if we observe high incarnations (e.g. a bug + // might manifest as an execution-invalidation cycle). Break out + // to fallback to sequential execution. + error!("Observed incarnation {} of txn {txn_idx}", *incarnation); + return Err(PanicOr::Or(ParallelBlockExecutionError::IncarnationTooHigh)); + } + } + while scheduler.should_coordinate_commits() { self.prepare_and_queue_commit_ready_txns( &self.config.onchain.block_gas_limit_type, @@ -941,6 +951,15 @@ where }); drop(timer); + if !shared_maybe_error.load(Ordering::SeqCst) && scheduler.pop_from_commit_queue().is_ok() { + // No error is recorded, parallel execution workers are done, but there is + // still a commit task remaining. Commit tasks must be drained before workers + // exit, hence we log an error and fallback to sequential execution. + alert!("[BlockSTM] error: commit tasks not drained after parallel execution"); + + shared_maybe_error.store(true, Ordering::Relaxed); + } + counters::update_state_counters(versioned_cache.stats(), true); // Explicit async drops. @@ -1219,7 +1238,7 @@ where // fallback is to just skip any transactions that would cause such serialization errors. alert!("Discarding transaction because serialization failed in bcs fallback"); ret.push(E::Output::discard_output( - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, )); continue; } @@ -1426,7 +1445,7 @@ where // StateCheckpoint will be added afterwards. let error_code = match sequential_error { BlockExecutionError::FatalBlockExecutorError(_) => { - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR }, BlockExecutionError::FatalVMError(_) => { StatusCode::UNKNOWN_INVARIANT_VIOLATION_ERROR diff --git a/aptos-move/block-executor/src/executor_utilities.rs b/aptos-move/block-executor/src/executor_utilities.rs index 82274f741c76b..e4fee9852054e 100644 --- a/aptos-move/block-executor/src/executor_utilities.rs +++ b/aptos-move/block-executor/src/executor_utilities.rs @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{errors::*, view::LatestView}; -use aptos_aggregator::types::code_invariant_error; use aptos_logger::error; use aptos_mvhashmap::types::ValueWithLayout; use aptos_types::{ - contract_event::TransactionEvent, delayed_fields::PanicError, executable::Executable, - state_store::TStateView, transaction::BlockExecutableTransaction as Transaction, + contract_event::TransactionEvent, + error::{code_invariant_error, PanicError}, + executable::Executable, + state_store::TStateView, + transaction::BlockExecutableTransaction as Transaction, write_set::TransactionWrite, }; use aptos_vm_logging::{alert, prelude::*}; diff --git a/aptos-move/block-executor/src/proptest_types/types.rs b/aptos-move/block-executor/src/proptest_types/types.rs index f6d5de2dd1b39..648c8d86d51c4 100644 --- a/aptos-move/block-executor/src/proptest_types/types.rs +++ b/aptos-move/block-executor/src/proptest_types/types.rs @@ -12,7 +12,7 @@ use aptos_mvhashmap::types::TxnIndex; use aptos_types::{ account_address::AccountAddress, contract_event::TransactionEvent, - delayed_fields::PanicError, + error::PanicError, executable::ModulePath, fee_statement::FeeStatement, on_chain_config::CurrentTimeMicroseconds, diff --git a/aptos-move/block-executor/src/scheduler.rs b/aptos-move/block-executor/src/scheduler.rs index 385e0076a606a..27ee7e80d1772 100644 --- a/aptos-move/block-executor/src/scheduler.rs +++ b/aptos-move/block-executor/src/scheduler.rs @@ -3,10 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 use crate::explicit_sync_wrapper::ExplicitSyncWrapper; -use aptos_aggregator::types::code_invariant_error; use aptos_infallible::Mutex; use aptos_mvhashmap::types::{Incarnation, TxnIndex}; -use aptos_types::delayed_fields::PanicError; +use aptos_types::error::{code_invariant_error, PanicError}; use concurrent_queue::{ConcurrentQueue, PopError}; use crossbeam::utils::CachePadded; use parking_lot::{RwLock, RwLockUpgradableReadGuard}; diff --git a/aptos-move/block-executor/src/task.rs b/aptos-move/block-executor/src/task.rs index 350b566175924..edf3242322501 100644 --- a/aptos-move/block-executor/src/task.rs +++ b/aptos-move/block-executor/src/task.rs @@ -8,7 +8,7 @@ use aptos_aggregator::{ }; use aptos_mvhashmap::types::TxnIndex; use aptos_types::{ - delayed_fields::PanicError, + error::PanicError, fee_statement::FeeStatement, state_store::{state_value::StateValueMetadata, TStateView}, transaction::BlockExecutableTransaction as Transaction, diff --git a/aptos-move/block-executor/src/txn_last_input_output.rs b/aptos-move/block-executor/src/txn_last_input_output.rs index c928a88005bac..2de1490bf40c0 100644 --- a/aptos-move/block-executor/src/txn_last_input_output.rs +++ b/aptos-move/block-executor/src/txn_last_input_output.rs @@ -8,13 +8,14 @@ use crate::{ task::{ExecutionStatus, TransactionOutput}, types::{InputOutputKey, ReadWriteSummary}, }; -use aptos_aggregator::types::code_invariant_error; use aptos_logger::error; use aptos_mvhashmap::types::{TxnIndex, ValueWithLayout}; use aptos_types::{ - delayed_fields::PanicError, fee_statement::FeeStatement, + error::{code_invariant_error, PanicError}, + fee_statement::FeeStatement, state_store::state_value::StateValueMetadata, - transaction::BlockExecutableTransaction as Transaction, write_set::WriteOp, + transaction::BlockExecutableTransaction as Transaction, + write_set::WriteOp, }; use arc_swap::ArcSwapOption; use crossbeam::utils::CachePadded; @@ -250,18 +251,21 @@ impl, E: Debug + Send + Clone> } } - pub(crate) fn update_to_skip_rest(&self, txn_idx: TxnIndex) { + pub(crate) fn update_to_skip_rest(&self, txn_idx: TxnIndex) -> Result<(), PanicError> { if self.block_skips_rest_at_idx(txn_idx) { // Already skipping. - return; + return Ok(()); } // check_execution_status_during_commit must be used for checks re:status. // Hence, since the status is not SkipRest, it must be Success. - if let ExecutionStatus::Success(output) = self.take_output(txn_idx) { + if let ExecutionStatus::Success(output) = self.take_output(txn_idx)? { self.outputs[txn_idx as usize].store(Some(Arc::new(ExecutionStatus::SkipRest(output)))); + Ok(()) } else { - unreachable!("Unexpected status, must be Success"); + Err(code_invariant_error( + "Unexpected status to change to SkipRest, must be Success", + )) } } @@ -446,12 +450,16 @@ impl, E: Debug + Send + Clone> // Must be executed after parallel execution is done, grabs outputs. Will panic if // other outstanding references to the recorded outputs exist. - pub(crate) fn take_output(&self, txn_idx: TxnIndex) -> ExecutionStatus { - let owning_ptr = self.outputs[txn_idx as usize] - .swap(None) - .expect("[BlockSTM]: Output must be recorded after execution"); - - Arc::try_unwrap(owning_ptr) - .expect("[BlockSTM]: Output should be uniquely owned after execution") + pub(crate) fn take_output( + &self, + txn_idx: TxnIndex, + ) -> Result, PanicError> { + let owning_ptr = self.outputs[txn_idx as usize].swap(None).ok_or_else(|| { + code_invariant_error("[BlockSTM]: Output must be recorded after execution") + })?; + + Arc::try_unwrap(owning_ptr).map_err(|_| { + code_invariant_error("[BlockSTM]: Output must be uniquely owned after execution") + }) } } diff --git a/aptos-move/block-executor/src/value_exchange.rs b/aptos-move/block-executor/src/value_exchange.rs index 5170291e6dba5..e28ec35dddd03 100644 --- a/aptos-move/block-executor/src/value_exchange.rs +++ b/aptos-move/block-executor/src/value_exchange.rs @@ -4,11 +4,11 @@ use crate::view::{LatestView, ViewState}; use aptos_aggregator::{ resolver::TDelayedFieldView, - types::{code_invariant_error, DelayedFieldValue, ReadPosition}, + types::{DelayedFieldValue, ReadPosition}, }; use aptos_mvhashmap::{types::TxnIndex, versioned_delayed_fields::TVersionedDelayedFieldView}; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, PanicError}, executable::Executable, state_store::{state_value::StateValueMetadata, TStateView}, transaction::BlockExecutableTransaction as Transaction, @@ -92,7 +92,7 @@ impl<'a, T: Transaction, S: TStateView, X: Executable> ValueToIden ViewState::Sync(state) => state .versioned_map .delayed_fields() - .read_latest_committed_value( + .read_latest_predicted_value( &identifier, self.txn_idx, ReadPosition::AfterCurrentTxn, diff --git a/aptos-move/block-executor/src/view.rs b/aptos-move/block-executor/src/view.rs index 837a47d419ed9..54841a1dff1db 100644 --- a/aptos-move/block-executor/src/view.rs +++ b/aptos-move/block-executor/src/view.rs @@ -19,10 +19,7 @@ use aptos_aggregator::{ delta_change_set::serialize, delta_math::DeltaHistory, resolver::{TAggregatorV1View, TDelayedFieldView}, - types::{ - code_invariant_error, expect_ok, DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, - ReadPosition, - }, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError, ReadPosition}, }; use aptos_logger::error; use aptos_mvhashmap::{ @@ -36,7 +33,7 @@ use aptos_mvhashmap::{ MVHashMap, }; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, expect_ok, PanicError, PanicOr}, executable::{Executable, ModulePath}, state_store::{ errors::StateviewError, @@ -362,8 +359,8 @@ fn delayed_field_try_add_delta_outcome_impl( .into()); } - let last_committed_value = loop { - match versioned_delayed_fields.read_latest_committed_value( + let predicted_value = loop { + match versioned_delayed_fields.read_latest_predicted_value( id, txn_idx, ReadPosition::BeforeCurrentTxn, @@ -388,7 +385,7 @@ fn delayed_field_try_add_delta_outcome_impl( compute_delayed_field_try_add_delta_outcome_first_time( delta, max_value, - last_committed_value, + predicted_value, )?; captured_reads @@ -643,7 +640,7 @@ impl<'a, T: Transaction, X: Executable> ResourceState for ParallelState<'a, T )); }, Ok(false) => { - self.captured_reads.borrow_mut().mark_failure(); + self.captured_reads.borrow_mut().mark_failure(false); return ReadResult::HaltSpeculativeExecution( "Interrupted as block execution was halted".to_string(), ); @@ -655,7 +652,7 @@ impl<'a, T: Transaction, X: Executable> ResourceState for ParallelState<'a, T }, Err(DeltaApplicationFailure) => { // AggregatorV1 may have delta application failure due to speculation. - self.captured_reads.borrow_mut().mark_failure(); + self.captured_reads.borrow_mut().mark_failure(false); return ReadResult::HaltSpeculativeExecution( "Delta application failure (must be speculative)".to_string(), ); @@ -1064,7 +1061,7 @@ impl<'a, T: Transaction, S: TStateView, X: Executable> LatestView< ); self.mark_incorrect_use(); return Err(PartialVMError::new( - StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR, + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR, ) .with_message(format!("{}", err))); }, @@ -1759,7 +1756,7 @@ mod test { use aptos_aggregator::{ bounded_math::{BoundedMath, SignedU128}, delta_math::DeltaHistory, - types::{DelayedFieldValue, DelayedFieldsSpeculativeError, PanicOr, ReadPosition}, + types::{DelayedFieldValue, DelayedFieldsSpeculativeError, ReadPosition}, }; use aptos_mvhashmap::{ types::{MVDelayedFieldsError, TxnIndex}, @@ -1768,6 +1765,7 @@ mod test { MVHashMap, }; use aptos_types::{ + error::PanicOr, executable::Executable, state_store::{ errors::StateviewError, state_storage_usage::StateStorageUsage, @@ -1813,7 +1811,7 @@ mod test { .ok_or(PanicOr::Or(MVDelayedFieldsError::NotFound)) } - fn read_latest_committed_value( + fn read_latest_predicted_value( &self, id: &DelayedFieldID, _current_txn_idx: TxnIndex, diff --git a/aptos-move/framework/src/natives/aggregator_natives/aggregator_v2.rs b/aptos-move/framework/src/natives/aggregator_natives/aggregator_v2.rs index 521d62bd6ae22..6a277b36bd864 100644 --- a/aptos-move/framework/src/natives/aggregator_natives/aggregator_v2.rs +++ b/aptos-move/framework/src/natives/aggregator_natives/aggregator_v2.rs @@ -6,7 +6,6 @@ use aptos_aggregator::{ bounded_math::{BoundedMath, SignedU128}, delayed_field_extension::DelayedFieldData, resolver::DelayedFieldResolver, - types::code_invariant_error, }; use aptos_gas_algebra::NumBytes; use aptos_gas_schedule::gas_params::natives::aptos_framework::*; @@ -14,9 +13,12 @@ use aptos_native_interface::{ safely_pop_arg, RawSafeNative, SafeNativeBuilder, SafeNativeContext, SafeNativeError, SafeNativeResult, }; -use aptos_types::delayed_fields::{ - calculate_width_for_constant_string, calculate_width_for_integer_embedded_string, - SnapshotToStringFormula, +use aptos_types::{ + delayed_fields::{ + calculate_width_for_constant_string, calculate_width_for_integer_embedded_string, + SnapshotToStringFormula, + }, + error::code_invariant_error, }; use move_binary_format::errors::PartialVMError; use move_vm_runtime::native_functions::NativeFunction; diff --git a/aptos-move/mvhashmap/src/types.rs b/aptos-move/mvhashmap/src/types.rs index 62cc81e30eaa6..31967be962d0f 100644 --- a/aptos-move/mvhashmap/src/types.rs +++ b/aptos-move/mvhashmap/src/types.rs @@ -1,12 +1,10 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use aptos_aggregator::{ - delta_change_set::DeltaOp, - types::{DelayedFieldsSpeculativeError, PanicOr}, -}; +use aptos_aggregator::{delta_change_set::DeltaOp, types::DelayedFieldsSpeculativeError}; use aptos_crypto::hash::HashValue; use aptos_types::{ + error::PanicOr, executable::ExecutableDescriptor, write_set::{TransactionWrite, WriteOpKind}, }; diff --git a/aptos-move/mvhashmap/src/unsync_map.rs b/aptos-move/mvhashmap/src/unsync_map.rs index f3bbcef5f404d..6867d56fcd5ea 100644 --- a/aptos-move/mvhashmap/src/unsync_map.rs +++ b/aptos-move/mvhashmap/src/unsync_map.rs @@ -6,10 +6,10 @@ use crate::{ utils::module_hash, BlockStateStats, }; -use aptos_aggregator::types::{code_invariant_error, DelayedFieldValue}; +use aptos_aggregator::types::DelayedFieldValue; use aptos_crypto::hash::HashValue; use aptos_types::{ - delayed_fields::PanicError, + error::{code_invariant_error, PanicError}, executable::{Executable, ExecutableDescriptor, ModulePath}, write_set::TransactionWrite, }; diff --git a/aptos-move/mvhashmap/src/versioned_delayed_fields.rs b/aptos-move/mvhashmap/src/versioned_delayed_fields.rs index 5a5de5f44f09f..bd1c4deccd71e 100644 --- a/aptos-move/mvhashmap/src/versioned_delayed_fields.rs +++ b/aptos-move/mvhashmap/src/versioned_delayed_fields.rs @@ -4,9 +4,9 @@ use crate::types::{AtomicTxnIndex, MVDelayedFieldsError, TxnIndex}; use aptos_aggregator::{ delayed_change::{ApplyBase, DelayedApplyEntry, DelayedEntry}, - types::{code_invariant_error, DelayedFieldValue, PanicOr, ReadPosition}, + types::{DelayedFieldValue, ReadPosition}, }; -use aptos_types::delayed_fields::PanicError; +use aptos_types::error::{code_invariant_error, PanicError, PanicOr}; use claims::assert_matches; use crossbeam::utils::CachePadded; use dashmap::DashMap; @@ -201,8 +201,12 @@ impl VersionedValue { } // Given a transaction index which should be committed next, returns the latest value - // below this version, or an error if such a value does not exist. - fn read_latest_committed_value( + // below this version, or if no such value exists, then the delayed field must have been + // created in the same block. In this case predict the value in the first (lowest) entry, + // or an error if such an entry cannot be found (must be due to speculation). The lowest + // entry is picked without regards to the indices, as it's for optimistic prediction + // purposes only (better to have some value than error). + fn read_latest_predicted_value( &self, next_idx_to_commit: TxnIndex, ) -> Result { @@ -212,10 +216,15 @@ impl VersionedValue { .range(0..next_idx_to_commit) .next_back() .map_or_else( - || { - self.base_value - .clone() - .ok_or(MVDelayedFieldsError::NotFound) + || match &self.base_value { + Some(value) => Ok(value.clone()), + None => match self.versioned_map.first_key_value() { + Some((_, entry)) => match entry.as_ref().deref() { + Value(v, _) => Ok(v.clone()), + Apply(_) | Estimate(_) => Err(MVDelayedFieldsError::NotFound), + }, + None => Err(MVDelayedFieldsError::NotFound), + }, }, |(_, entry)| match entry.as_ref().deref() { Value(v, _) => Ok(v.clone()), @@ -347,10 +356,12 @@ pub trait TVersionedDelayedFieldView { txn_idx: TxnIndex, ) -> Result>; - /// Returns the committed value from largest transaction index that is - /// smaller than the given current_txn_idx (read_position defined whether - /// inclusively or exclusively from the current transaction itself). - fn read_latest_committed_value( + /// Returns the committed value from largest transaction index that is smaller than the + /// given current_txn_idx (read_position defined whether inclusively or exclusively from + /// the current transaction itself). If such a value does not exist, the value might + /// be created in the current block, and the value from the first (lowest) entry is taken + /// as the prediction. + fn read_latest_predicted_value( &self, id: &K, current_txn_idx: TxnIndex, @@ -536,7 +547,7 @@ impl VersionedDelayedFields { // remove delta in the commit VersionEntry::Value(v, Some(_)) => Some(v.clone()), VersionEntry::Apply(AggregatorDelta { delta }) => { - let prev_value = versioned_value.read_latest_committed_value(idx_to_commit) + let prev_value = versioned_value.read_latest_predicted_value(idx_to_commit) .map_err(|e| CommitError::CodeInvariantError(format!("Cannot read latest committed value for Apply(AggregatorDelta) during commit: {:?}", e)))?; if let DelayedFieldValue::Aggregator(base) = prev_value { let new_value = delta.apply_to(base).map_err(|e| { @@ -584,7 +595,7 @@ impl VersionedDelayedFields { let prev_value = self.values .get_mut(&base_aggregator) .ok_or_else(|| CommitError::CodeInvariantError("Cannot find base_aggregator for Apply(SnapshotDelta) during commit".to_string()))? - .read_latest_committed_value(idx_to_commit) + .read_latest_predicted_value(idx_to_commit) .map_err(|e| CommitError::CodeInvariantError(format!("Cannot read latest committed value for base aggregator for ApplySnapshotDelta) during commit: {:?}", e)))?; if let DelayedFieldValue::Aggregator(base) = prev_value { @@ -615,7 +626,7 @@ impl VersionedDelayedFields { .get_mut(&base_snapshot) .ok_or_else(|| CommitError::CodeInvariantError("Cannot find base_aggregator for Apply(SnapshotDelta) during commit".to_string()))? // Read values committed in this commit - .read_latest_committed_value(idx_to_commit + 1) + .read_latest_predicted_value(idx_to_commit + 1) .map_err(|e| CommitError::CodeInvariantError(format!("Cannot read latest committed value for base aggregator for ApplySnapshotDelta) during commit: {:?}", e)))?; if let DelayedFieldValue::Snapshot(base) = prev_value { @@ -705,7 +716,7 @@ impl TVersionedDelayedFieldView /// Returns the committed value from largest transaction index that is /// smaller than the given current_txn_idx (read_position defined whether /// inclusively or exclusively from the current transaction itself). - fn read_latest_committed_value( + fn read_latest_predicted_value( &self, id: &K, current_txn_idx: TxnIndex, @@ -715,7 +726,7 @@ impl TVersionedDelayedFieldView .get_mut(id) .ok_or(MVDelayedFieldsError::NotFound) .and_then(|v| { - v.read_latest_committed_value( + v.read_latest_predicted_value( match read_position { ReadPosition::BeforeCurrentTxn => current_txn_idx, ReadPosition::AfterCurrentTxn => current_txn_idx + 1, @@ -1194,7 +1205,50 @@ mod test { if let Some(entry) = aggregator_entry(type_index) { v.insert_speculative_value(10, entry).unwrap(); } - let _ = v.read_latest_committed_value(11); + let _ = v.read_latest_predicted_value(11); + } + + #[test_case(APPLY_AGGREGATOR)] + #[test_case(APPLY_SNAPSHOT)] + #[test_case(APPLY_DERIVED)] + fn read_first_entry_not_value(type_index: usize) { + let mut v = VersionedValue::new(None); + assert_matches!( + v.read_latest_predicted_value(11), + Err(MVDelayedFieldsError::NotFound) + ); + + if let Some(entry) = aggregator_entry(type_index) { + v.insert_speculative_value(12, entry).unwrap(); + } + assert_matches!( + v.read_latest_predicted_value(11), + Err(MVDelayedFieldsError::NotFound) + ); + } + + #[test] + fn read_first_entry_value() { + let mut v = VersionedValue::new(None); + v.insert_speculative_value(13, aggregator_entry(APPLY_AGGREGATOR).unwrap()) + .unwrap(); + v.insert_speculative_value(12, aggregator_entry(VALUE_AGGREGATOR).unwrap()) + .unwrap(); + + assert_matches!( + v.read_latest_predicted_value(11), + Ok(DelayedFieldValue::Aggregator(10)) + ); + + v.insert_speculative_value( + 9, + VersionEntry::Value(DelayedFieldValue::Aggregator(9), None), + ) + .unwrap(); + assert_matches!( + v.read_latest_predicted_value(11), + Ok(DelayedFieldValue::Aggregator(9)) + ); } #[should_panic] @@ -1204,11 +1258,11 @@ mod test { v.insert_speculative_value(3, aggregator_entry(VALUE_AGGREGATOR).unwrap()) .unwrap(); v.mark_estimate(3); - let _ = v.read_latest_committed_value(11); + let _ = v.read_latest_predicted_value(11); } #[test] - fn read_latest_committed_value() { + fn read_latest_predicted_value() { let mut v = VersionedValue::new(Some(DelayedFieldValue::Aggregator(5))); v.insert_speculative_value(2, aggregator_entry(VALUE_AGGREGATOR).unwrap()) .unwrap(); @@ -1219,15 +1273,15 @@ mod test { .unwrap(); assert_ok_eq!( - v.read_latest_committed_value(5), + v.read_latest_predicted_value(5), DelayedFieldValue::Aggregator(15) ); assert_ok_eq!( - v.read_latest_committed_value(4), + v.read_latest_predicted_value(4), DelayedFieldValue::Aggregator(10) ); assert_ok_eq!( - v.read_latest_committed_value(2), + v.read_latest_predicted_value(2), DelayedFieldValue::Aggregator(5) ); } diff --git a/third_party/move/move-core/types/src/vm_status.rs b/third_party/move/move-core/types/src/vm_status.rs index 9c4fd678b6fa1..0db1e408c3a3b 100644 --- a/third_party/move/move-core/types/src/vm_status.rs +++ b/third_party/move/move-core/types/src/vm_status.rs @@ -763,11 +763,17 @@ pub enum StatusCode { TYPE_RESOLUTION_FAILURE = 2021, DUPLICATE_NATIVE_FUNCTION = 2022, // code invariant error while handling delayed materialization, should never happen, - // always indicates a code bug. - // Delayed materialization includes handling of Resource Groups and Delayed Fields. - // Unlike regular CODE_INVARIANT_ERROR, this is a signal to BlockSTM, - // which it might do something about (i.e. fallback to sequential execution) - DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR = 2023, + // always indicates a code bug. Delayed materialization includes handling of + // Resource Groups and Delayed Fields. Unlike regular CODE_INVARIANT_ERROR, this + // is a signal to BlockSTM, which it might do something about (i.e. fallback to + // sequential execution). + // Note: This status is created both from third_party (move) and block executor + // (aptos-move in the adapter). In the later case, it can now also represent more + // general invariant violations beyond delayed fields, due to the convenience of + // handling such issues with asserts (e.g. by falling back to sequential execution). + // TODO: can be audited and broken down into specific types, once implementation + // is also not duplicated. + DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR = 2023, // Speculative error means that there was an issue because of speculative // reads provided to the transaction, and the transaction needs to // be re-executed. diff --git a/third_party/move/move-vm/types/src/delayed_values/error.rs b/third_party/move/move-vm/types/src/delayed_values/error.rs index fa245735ba14b..50801748f901a 100644 --- a/third_party/move/move-vm/types/src/delayed_values/error.rs +++ b/third_party/move/move-vm/types/src/delayed_values/error.rs @@ -14,5 +14,6 @@ pub fn code_invariant_error(message: M) -> PartialVMError { message ); println!("ERROR: {}", msg); - PartialVMError::new(StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR).with_message(msg) + PartialVMError::new(StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR) + .with_message(msg) } diff --git a/types/Cargo.toml b/types/Cargo.toml index 86fb4c4aa960a..df07844a31988 100644 --- a/types/Cargo.toml +++ b/types/Cargo.toml @@ -68,6 +68,7 @@ serde_yaml = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } thiserror = { workspace = true } +tracing = { workspace = true } [dev-dependencies] ahash = { workspace = true } diff --git a/types/src/delayed_fields.rs b/types/src/delayed_fields.rs index 6b1898546d527..d7872537938bb 100644 --- a/types/src/delayed_fields.rs +++ b/types/src/delayed_fields.rs @@ -3,41 +3,12 @@ // SPDX-License-Identifier: Apache-2.0 use crate::serde_helper::bcs_utils::bcs_size_of_byte_array; -use move_binary_format::errors::{PartialVMError, PartialVMResult}; -use move_core_types::vm_status::StatusCode; +use move_binary_format::errors::PartialVMResult; use move_vm_types::delayed_values::{ delayed_field_id::{DelayedFieldID, ExtractWidth}, error::code_invariant_error, }; use once_cell::sync::Lazy; -use std::fmt::Display; - -// Represents something that should never happen - i.e. a code invariant error, -// which we would generally just panic, but since we are inside of the VM, -// we cannot do that. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum PanicError { - CodeInvariantError(String), -} - -impl Display for PanicError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PanicError::CodeInvariantError(e) => write!(f, "{}", e), - } - } -} - -impl From for PartialVMError { - fn from(err: PanicError) -> Self { - match err { - PanicError::CodeInvariantError(msg) => { - PartialVMError::new(StatusCode::DELAYED_MATERIALIZATION_CODE_INVARIANT_ERROR) - .with_message(msg) - }, - } - } -} static U64_MAX_DIGITS: Lazy = Lazy::new(|| u64::MAX.to_string().len()); static U128_MAX_DIGITS: Lazy = Lazy::new(|| u128::MAX.to_string().len()); diff --git a/types/src/error.rs b/types/src/error.rs index 6e130826b06b0..9d5e5308853f2 100644 --- a/types/src/error.rs +++ b/types/src/error.rs @@ -2,7 +2,110 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -//! Error codes that follow the Move error convention of the Aptos Framework. +use move_binary_format::errors::PartialVMError; +use move_core_types::vm_status::StatusCode; +use std::fmt::Display; +use tracing::error; + +/// Wrapping other errors, to add a variant that represents something that should never +/// happen - i.e. a code invariant error, which we would generally just panic, but since +/// we are inside of the VM, we cannot do that. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum PanicError { + CodeInvariantError(String), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum PanicOr { + CodeInvariantError(String), + Or(T), +} + +// code_invariant_error is also redefined in third-party/move-vm (for delayed fields errors). +pub fn code_invariant_error(message: M) -> PanicError { + let msg = format!( + "Code invariant broken (there is a bug in the code), {:?}", + message + ); + error!("{}", msg); + PanicError::CodeInvariantError(msg) +} + +pub fn expect_ok(value: Result) -> Result { + value.map_err(|e| code_invariant_error(format!("Expected Ok, got Err({:?})", e))) +} + +impl Display for PanicError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PanicError::CodeInvariantError(e) => write!(f, "{}", e), + } + } +} + +impl From for PartialVMError { + fn from(err: PanicError) -> Self { + match err { + PanicError::CodeInvariantError(msg) => { + PartialVMError::new(StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR) + .with_message(msg) + }, + } + } +} + +impl PanicOr { + pub fn map_non_panic(self, f: impl FnOnce(T) -> E) -> PanicOr { + match self { + PanicOr::CodeInvariantError(msg) => PanicOr::CodeInvariantError(msg), + PanicOr::Or(value) => PanicOr::Or(f(value)), + } + } +} + +impl From for PanicOr { + fn from(err: PanicError) -> Self { + match err { + PanicError::CodeInvariantError(e) => PanicOr::CodeInvariantError(e), + } + } +} + +impl From<&PanicOr> for StatusCode { + fn from(err: &PanicOr) -> Self { + match err { + PanicOr::CodeInvariantError(_) => { + StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR + }, + PanicOr::Or(_) => StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR, + } + } +} + +impl From> for PartialVMError { + fn from(err: PanicOr) -> Self { + match err { + PanicOr::CodeInvariantError(msg) => { + PartialVMError::new(StatusCode::DELAYED_FIELD_OR_BLOCKSTM_CODE_INVARIANT_ERROR) + .with_message(msg) + }, + PanicOr::Or(err) => PartialVMError::new(StatusCode::SPECULATIVE_EXECUTION_ABORT_ERROR) + .with_message(format!("{:?}", err)), + } + } +} + +pub trait NonPanic {} + +impl From for PanicOr { + fn from(err: T) -> Self { + PanicOr::Or(err) + } +} + +/// +/// Error codes that follow the Move error convention of the Aptos Framework. +/// /// Caller specified an invalid argument (http: 400) pub const INVALID_ARGUMENT: u64 = 0x1;