From 574fa48194ca42013b4b5171ec81ebfb5ea5ac14 Mon Sep 17 00:00:00 2001 From: igor-aptos <110557261+igor-aptos@users.noreply.github.com> Date: Sun, 15 Sep 2024 00:49:49 -0700 Subject: [PATCH 01/36] cleanup info logs (#14555) --- consensus/src/liveness/proposal_generator.rs | 4 ++-- consensus/src/pipeline/buffer_manager.rs | 2 +- consensus/src/round_manager.rs | 12 ++++++++++-- types/src/transaction/use_case.rs | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index 411d24c7ac2fa..334b0a76fbf4e 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -29,7 +29,7 @@ use aptos_consensus_types::{ }; use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_infallible::Mutex; -use aptos_logger::{error, info, sample, sample::SampleRate, warn}; +use aptos_logger::{error, sample, sample::SampleRate, warn}; use aptos_types::{on_chain_config::ValidatorTxnConfig, validator_txn::ValidatorTransaction}; use aptos_validator_transaction_pool as vtxn_pool; use futures::future::BoxFuture; @@ -203,7 +203,7 @@ impl PipelineBackpressureConfig { PROPOSER_ESTIMATED_CALIBRATED_BLOCK_TXNS.observe(calibrated_block_size as f64); // Check if calibrated block size is reduction in size, to turn on backpressure. if max_block_txns > calibrated_block_size { - info!( + warn!( block_execution_times = format!("{:?}", block_execution_times), estimated_calibrated_block_sizes = format!("{:?}", sizes), calibrated_block_size = calibrated_block_size, diff --git a/consensus/src/pipeline/buffer_manager.rs b/consensus/src/pipeline/buffer_manager.rs index b3ebe706f608c..603a246b228a0 100644 --- a/consensus/src/pipeline/buffer_manager.rs +++ b/consensus/src/pipeline/buffer_manager.rs @@ -708,7 +708,7 @@ impl BufferManager { // find the corresponding item let author = vote.author(); let commit_info = vote.commit_info().clone(); - info!("Receive commit vote {} from {}", commit_info, author); + trace!("Receive commit vote {} from {}", commit_info, author); let target_block_id = vote.commit_info().id(); let current_cursor = self .buffer diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index 748d01f29ad88..03893ac4e79c2 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -1109,11 +1109,19 @@ impl RoundManager { .await?; } else { ORDER_VOTE_VERY_OLD.inc(); - info!( + sample!( + SampleRate::Duration(Duration::from_secs(30)), + info!( + "[sampled] Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}", + order_vote_msg.order_vote().ledger_info().round(), + self.block_store.sync_info().highest_ordered_round() + ) + ); + debug!( "Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}", order_vote_msg.order_vote().ledger_info().round(), self.block_store.sync_info().highest_ordered_round() - ); + ) } } Ok(()) diff --git a/types/src/transaction/use_case.rs b/types/src/transaction/use_case.rs index ee72a61b5d964..d947b76874b44 100644 --- a/types/src/transaction/use_case.rs +++ b/types/src/transaction/use_case.rs @@ -18,7 +18,7 @@ impl std::fmt::Debug for UseCaseKey { match self { Platform => write!(f, "PP"), - ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[31..])), + ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[29..])), Others => write!(f, "OO"), } } From bb012f63ebc37cd08c3e35c8cf33e46632153a5f Mon Sep 17 00:00:00 2001 From: igor-aptos <110557261+igor-aptos@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:24:07 -0700 Subject: [PATCH 02/36] cleanup warn logs (#14614) --- consensus/src/round_manager.rs | 26 ++++++++++++------- consensus/src/round_manager_test.rs | 5 +++- crates/reliable-broadcast/src/lib.rs | 4 +-- .../executor/src/components/chunk_output.rs | 15 ++++++----- .../framework/src/application/interface.rs | 2 +- network/framework/src/peer/mod.rs | 19 ++++++++------ network/framework/src/protocols/rpc/mod.rs | 17 +++++++----- .../storage-service/server/src/handler.rs | 2 +- 8 files changed, 54 insertions(+), 36 deletions(-) diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index 03893ac4e79c2..aed3118835b9e 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -555,20 +555,28 @@ impl RoundManager { block_parent_hash = proposal_msg.proposal().quorum_cert().certified_block().id(), ); - ensure!( - self.ensure_round_and_sync_up( + let in_correct_round = self + .ensure_round_and_sync_up( proposal_msg.proposal().round(), proposal_msg.sync_info(), proposal_msg.proposer(), ) .await - .context("[RoundManager] Process proposal")?, - "Stale proposal {}, current round {}", - proposal_msg.proposal(), - self.round_state.current_round() - ); - - self.process_proposal(proposal_msg.take_proposal()).await + .context("[RoundManager] Process proposal")?; + if in_correct_round { + self.process_proposal(proposal_msg.take_proposal()).await + } else { + sample!( + SampleRate::Duration(Duration::from_secs(30)), + warn!( + "[sampled] Stale proposal {}, current round {}", + proposal_msg.proposal(), + self.round_state.current_round() + ) + ); + counters::ERROR_COUNT.inc(); + Ok(()) + } } pub async fn process_delayed_proposal_msg(&mut self, proposal: Block) -> anyhow::Result<()> { diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs index cf34840d95a15..a01fef7b06bab 100644 --- a/consensus/src/round_manager_test.rs +++ b/consensus/src/round_manager_test.rs @@ -4,6 +4,7 @@ use crate::{ block_storage::{pending_blocks::PendingBlocks, BlockReader, BlockStore}, + counters, liveness::{ proposal_generator::{ ChainHealthBackoffConfig, PipelineBackpressureConfig, ProposalGenerator, @@ -1147,11 +1148,13 @@ fn new_round_on_timeout_certificate() { None, ), ); + let before = counters::ERROR_COUNT.get(); assert!(node .round_manager .process_proposal_msg(old_good_proposal) .await - .is_err()); + .is_ok()); // we eat the error + assert_eq!(counters::ERROR_COUNT.get(), before + 1); // but increase the counter }); } diff --git a/crates/reliable-broadcast/src/lib.rs b/crates/reliable-broadcast/src/lib.rs index a46e806f9aca9..12647b7a1581a 100644 --- a/crates/reliable-broadcast/src/lib.rs +++ b/crates/reliable-broadcast/src/lib.rs @@ -210,8 +210,8 @@ where fn log_rpc_failure(error: anyhow::Error, receiver: Author) { // Log a sampled warning (to prevent spam) sample!( - SampleRate::Duration(Duration::from_secs(1)), - warn!(error = ?error, "rpc to {} failed, error {}", receiver, error) + SampleRate::Duration(Duration::from_secs(30)), + warn!(error = ?error, "[sampled] rpc to {} failed, error {}", receiver, error) ); // Log at the debug level (this is useful for debugging diff --git a/execution/executor/src/components/chunk_output.rs b/execution/executor/src/components/chunk_output.rs index 7de6d39417a0d..3e471f5dcf714 100644 --- a/execution/executor/src/components/chunk_output.rs +++ b/execution/executor/src/components/chunk_output.rs @@ -301,13 +301,6 @@ pub fn update_counters_for_processed_chunk( ), }, TransactionStatus::Discard(discard_status_code) => { - sample!( - SampleRate::Duration(Duration::from_secs(15)), - warn!( - "Txn being discarded is {:?} with status code {:?}", - txn, discard_status_code - ) - ); ( // Specialize duplicate txns for alerts if *discard_status_code == StatusCode::SEQUENCE_NUMBER_TOO_OLD { @@ -317,6 +310,14 @@ pub fn update_counters_for_processed_chunk( } else if *discard_status_code == StatusCode::TRANSACTION_EXPIRED { "discard_transaction_expired" } else { + // Only log if it is an interesting discard + sample!( + SampleRate::Duration(Duration::from_secs(15)), + warn!( + "[sampled] Txn being discarded is {:?} with status code {:?}", + txn, discard_status_code + ) + ); "discard" }, "error_code", diff --git a/network/framework/src/application/interface.rs b/network/framework/src/application/interface.rs index 6ccb2cf36354e..912e34c49e98b 100644 --- a/network/framework/src/application/interface.rs +++ b/network/framework/src/application/interface.rs @@ -177,7 +177,7 @@ impl NetworkClient { sample!( SampleRate::Duration(Duration::from_secs(10)), warn!( - "Unavailable peers (without a common network protocol): {:?}", + "[sampled] Unavailable peers (without a common network protocol): {:?}", peers_without_a_protocol ) ); diff --git a/network/framework/src/peer/mod.rs b/network/framework/src/peer/mod.rs index 094e3d70c0421..651d5fed0eece 100644 --- a/network/framework/src/peer/mod.rs +++ b/network/framework/src/peer/mod.rs @@ -639,14 +639,17 @@ where .outbound_rpcs .handle_outbound_request(request, write_reqs_tx) { - warn!( - NetworkSchema::new(&self.network_context) - .connection_metadata(&self.connection_metadata), - error = %e, - "Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}", - protocol_id, - self.remote_peer_id().short_str(), - e, + sample!( + SampleRate::Duration(Duration::from_secs(10)), + warn!( + NetworkSchema::new(&self.network_context) + .connection_metadata(&self.connection_metadata), + error = %e, + "[sampled] Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}", + protocol_id, + self.remote_peer_id().short_str(), + e, + ) ); } }, diff --git a/network/framework/src/protocols/rpc/mod.rs b/network/framework/src/protocols/rpc/mod.rs index b948226c4cd70..2be2a22a5f667 100644 --- a/network/framework/src/protocols/rpc/mod.rs +++ b/network/framework/src/protocols/rpc/mod.rs @@ -666,13 +666,16 @@ impl OutboundRpcs { FAILED_LABEL, ) .inc(); - warn!( - NetworkSchema::new(network_context).remote_peer(peer_id), - "{} Error making outbound RPC request to {} (request_id {}). Error: {}", - network_context, - peer_id.short_str(), - request_id, - error + sample!( + SampleRate::Duration(Duration::from_secs(10)), + warn!( + NetworkSchema::new(network_context).remote_peer(peer_id), + "[sampled] {} Error making outbound RPC request to {} (request_id {}). Error: {}", + network_context, + peer_id.short_str(), + request_id, + error + ) ); } }, diff --git a/state-sync/storage-service/server/src/handler.rs b/state-sync/storage-service/server/src/handler.rs index d1748ae8a72d9..fc642e212ded2 100644 --- a/state-sync/storage-service/server/src/handler.rs +++ b/state-sync/storage-service/server/src/handler.rs @@ -247,7 +247,7 @@ impl Handler { { sample!( SampleRate::Duration(Duration::from_secs(ERROR_LOG_FREQUENCY_SECS)), - warn!(LogSchema::new(LogEntry::OptimisticFetchRequest) + trace!(LogSchema::new(LogEntry::OptimisticFetchRequest) .error(&Error::InvalidRequest( "An active optimistic fetch was already found for the peer!".into() )) From f76560e216157ef094c0973d1e7c0bbf752bb422 Mon Sep 17 00:00:00 2001 From: igor-aptos <110557261+igor-aptos@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:30:27 -0700 Subject: [PATCH 03/36] remove backtrace from common warn logs (#14622) --- consensus/src/round_manager.rs | 4 ++-- crates/reliable-broadcast/src/lib.rs | 4 ++-- network/framework/src/protocols/health_checker/mod.rs | 6 ++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index aed3118835b9e..27382493cc2eb 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -1535,7 +1535,7 @@ impl RoundManager { Ok(_) => trace!(RoundStateLogSchema::new(round_state)), Err(e) => { counters::ERROR_COUNT.inc(); - warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state)); + warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e); } } } @@ -1583,7 +1583,7 @@ impl RoundManager { Ok(_) => trace!(RoundStateLogSchema::new(round_state)), Err(e) => { counters::ERROR_COUNT.inc(); - warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state)); + warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e); } } }, diff --git a/crates/reliable-broadcast/src/lib.rs b/crates/reliable-broadcast/src/lib.rs index 12647b7a1581a..7246f2b729a52 100644 --- a/crates/reliable-broadcast/src/lib.rs +++ b/crates/reliable-broadcast/src/lib.rs @@ -211,12 +211,12 @@ fn log_rpc_failure(error: anyhow::Error, receiver: Author) { // Log a sampled warning (to prevent spam) sample!( SampleRate::Duration(Duration::from_secs(30)), - warn!(error = ?error, "[sampled] rpc to {} failed, error {}", receiver, error) + warn!("[sampled] rpc to {} failed, error {:#}", receiver, error) ); // Log at the debug level (this is useful for debugging // and won't spam the logs in a production environment). - debug!(error = ?error, "rpc to {} failed, error {}", receiver, error); + debug!("rpc to {} failed, error {:#}", receiver, error); } pub struct DropGuard { diff --git a/network/framework/src/protocols/health_checker/mod.rs b/network/framework/src/protocols/health_checker/mod.rs index fea7da738dd95..c59bc8a4a3dde 100644 --- a/network/framework/src/protocols/health_checker/mod.rs +++ b/network/framework/src/protocols/health_checker/mod.rs @@ -342,11 +342,9 @@ impl + Unpin> HealthChec }, Err(err) => { warn!( - NetworkSchema::new(&self.network_context) - .remote_peer(&peer_id), - error = ?err, + NetworkSchema::new(&self.network_context).remote_peer(&peer_id), round = round, - "{} Ping failed for peer: {} round: {} with error: {:?}", + "{} Ping failed for peer: {} round: {} with error: {:#}", self.network_context, peer_id.short_str(), round, From 4dd94c673534d30d665d0ac855cdf918ad30acad Mon Sep 17 00:00:00 2001 From: Greg Nazario Date: Mon, 16 Sep 2024 18:17:47 -0700 Subject: [PATCH 04/36] [cli] Release 4.2.0 (#14653) --- Cargo.lock | 2 +- crates/aptos/CHANGELOG.md | 4 ++++ crates/aptos/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e0f8ca54acc8..413ce73564963 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,7 +262,7 @@ dependencies = [ [[package]] name = "aptos" -version = "4.1.0" +version = "4.2.0" dependencies = [ "anyhow", "aptos-api-types", diff --git a/crates/aptos/CHANGELOG.md b/crates/aptos/CHANGELOG.md index 79547763c1bd9..516bace3b2591 100644 --- a/crates/aptos/CHANGELOG.md +++ b/crates/aptos/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to the Aptos CLI will be captured in this file. This project ## Unreleased +## [4.2.0] - 2024/09/16 +- Update latest VM and associated changes +- Update to latest compiler + ## [4.1.0] - 2024/08/30 - Marks Move 2 and compiler v2 as stable. - Adds new `--move-2` flag to work with Move 2 without need for multiple other flags. diff --git a/crates/aptos/Cargo.toml b/crates/aptos/Cargo.toml index 46e0ee8a6f92d..b1270bc827272 100644 --- a/crates/aptos/Cargo.toml +++ b/crates/aptos/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "aptos" description = "Aptos tool for management of nodes and interacting with the blockchain" -version = "4.1.0" +version = "4.2.0" # Workspace inherited keys authors = { workspace = true } From 637311bda2d070a4e32e7e813570b00bc8fd971e Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 16 Sep 2024 19:30:36 -0700 Subject: [PATCH 05/36] Addressing PR comments --- Cargo.lock | 1 + types/Cargo.toml | 1 + types/src/ledger_info.rs | 50 ++++++++++++++++++--------------- types/src/validator_verifier.rs | 16 +++++------ 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e0f8ca54acc8..551fdaee2bb63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4225,6 +4225,7 @@ dependencies = [ "claims", "coset", "criterion", + "dashmap", "derivative", "fixed", "fxhash", diff --git a/types/Cargo.toml b/types/Cargo.toml index 79c5b17de97c1..d52bec17b259b 100644 --- a/types/Cargo.toml +++ b/types/Cargo.toml @@ -28,6 +28,7 @@ arr_macro = { workspace = true } base64 = { workspace = true } bcs = { workspace = true } bytes = { workspace = true } +dashmap = { workspace = true } fixed = { workspace = true } fxhash = { workspace = true } hashbrown = { workspace = true } diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 60e737a8cc214..d704defe627ad 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -475,9 +475,13 @@ impl LedgerInfoWithMixedSignatures { pub fn check_voting_power( &self, verifier: &ValidatorVerifier, + check_super_majority: bool, ) -> std::result::Result { let all_voters = self.all_voters(); - verifier.check_voting_power(all_voters.iter().collect_vec().into_iter(), true) + verifier.check_voting_power( + all_voters.iter().collect_vec().into_iter(), + check_super_majority, + ) } // Aggregates all the signatures, verifies the aggregate signature, and returns the aggregate signature. @@ -485,7 +489,7 @@ impl LedgerInfoWithMixedSignatures { &mut self, epoch_state: Arc, ) -> Result { - self.check_voting_power(&epoch_state.verifier)?; + self.check_voting_power(&epoch_state.verifier, true)?; let mut all_signatures = self.verified_signatures.clone(); for (author, signature) in self.unverified_signatures.signatures() { @@ -494,7 +498,7 @@ impl LedgerInfoWithMixedSignatures { let aggregated_sig = epoch_state.verifier.aggregate_signatures(&all_signatures)?; - let (verified_aggregate_signature, malicious_authors) = match epoch_state + match epoch_state .verifier .clone() .verify_multi_signatures(self.ledger_info(), &aggregated_sig) @@ -505,7 +509,10 @@ impl LedgerInfoWithMixedSignatures { .add_signature(*account_address, signature.clone()); } self.unverified_signatures = PartialSignatures::empty(); - (aggregated_sig, vec![]) + Ok(LedgerInfoWithSignatures::new( + self.ledger_info.clone(), + aggregated_sig, + )) }, Err(_) => { // Question: Should we assign min tasks per thread here for into_par_iter()? @@ -537,22 +544,21 @@ impl LedgerInfoWithMixedSignatures { .collect(); self.unverified_signatures = PartialSignatures::empty(); - let aggregated_sig = epoch_state + epoch_state .verifier - .aggregate_signatures(&self.verified_signatures)?; - // epoch_state - // .read() - // .verifier - // .verify_multi_signatures(self.ledger_info(), &aggregated_sig)?; - (aggregated_sig, malicious_authors) + .add_malicious_authors(malicious_authors); + + match self.check_voting_power(&epoch_state.verifier, true) { + Ok(_) => Ok(LedgerInfoWithSignatures::new( + self.ledger_info.clone(), + epoch_state + .verifier + .aggregate_signatures(&self.verified_signatures)?, + )), + Err(e) => Err(e), + } }, - }; - epoch_state - .verifier - .add_malicious_authors(malicious_authors); - self.check_voting_power(&epoch_state.verifier).map(|_| { - LedgerInfoWithSignatures::new(self.ledger_info.clone(), verified_aggregate_signature) - }) + } } pub fn ledger_info(&self) -> &LedgerInfo { @@ -746,7 +752,7 @@ mod tests { 2 ); assert_eq!( - ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier), + ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier, true), Err(VerifyError::TooLittleVotingPower { voting_power: 4, expected_voting_power: 5 @@ -776,7 +782,7 @@ mod tests { ); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 5 ); @@ -831,7 +837,7 @@ mod tests { ); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 5 ); @@ -872,7 +878,7 @@ mod tests { assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 6); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 6 ); diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs index 32ca1572a58a2..6ca856ae7b61f 100644 --- a/types/src/validator_verifier.rs +++ b/types/src/validator_verifier.rs @@ -17,13 +17,13 @@ use aptos_crypto::{ hash::CryptoHash, Signature, VerifyingKey, }; -use aptos_infallible::RwLock; +use dashmap::DashSet; use itertools::Itertools; #[cfg(any(test, feature = "fuzzing"))] use proptest_derive::Arbitrary; use serde::{Deserialize, Deserializer, Serialize}; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, HashMap}, fmt, sync::Arc, }; @@ -149,7 +149,7 @@ pub struct ValidatorVerifier { /// submitted bad votes that has resulted in having to verify each vote individually. Further votes by these validators /// will be verified individually bypassing the optimization. #[serde(skip)] - malicious_authors: Arc>>, + malicious_authors: Arc>, } // Implement Eq and PartialEq for ValidatorVerifier. Skip malicious_authors field in the comparison. @@ -200,7 +200,7 @@ impl ValidatorVerifier { quorum_voting_power, total_voting_power, address_to_validator_index, - malicious_authors: Arc::new(RwLock::new(HashSet::new())), + malicious_authors: Arc::new(DashSet::new()), } } @@ -238,16 +238,16 @@ impl ValidatorVerifier { pub fn add_malicious_authors(&self, malicious_authors: Vec) { for author in malicious_authors { - self.malicious_authors.write().insert(author); + self.malicious_authors.insert(author); } } - pub fn malicious_authors(&self) -> HashSet { - self.malicious_authors.read().clone() + pub fn malicious_authors(&self) -> Arc> { + self.malicious_authors.clone() } pub fn is_malicious_author(&self, author: &AccountAddress) -> bool { - self.malicious_authors.read().contains(author) + self.malicious_authors.contains(author) } /// Helper method to initialize with a single author and public key with quorum voting power 1. From 8b4d7e2527c9c0d4c1a034d6bda37712804f668e Mon Sep 17 00:00:00 2001 From: Bo Wu Date: Fri, 13 Sep 2024 10:43:34 -0700 Subject: [PATCH 06/36] add a field for internal indexer version --- api/src/accounts.rs | 23 ++-------- api/src/context.rs | 86 ++++++++++++++++++++---------------- api/src/events.rs | 4 +- api/src/index.rs | 1 - api/src/transactions.rs | 2 +- api/types/src/ledger_info.rs | 20 +++++++++ 6 files changed, 74 insertions(+), 62 deletions(-) diff --git a/api/src/accounts.rs b/api/src/accounts.rs index 3ac9a13005e61..d94454f6b34e3 100644 --- a/api/src/accounts.rs +++ b/api/src/accounts.rs @@ -66,7 +66,7 @@ impl AccountsApi { let context = self.context.clone(); api_spawn_blocking(move || { - let account = Account::new(context, address.0, ledger_version.0, None, None, false)?; + let account = Account::new(context, address.0, ledger_version.0, None, None)?; account.account(&accept_type) }) .await @@ -118,7 +118,6 @@ impl AccountsApi { ledger_version.0, start.0.map(StateKey::from), limit.0, - true, )?; account.resources(&accept_type) }) @@ -171,7 +170,6 @@ impl AccountsApi { ledger_version.0, start.0.map(StateKey::from), limit.0, - true, )?; account.modules(&accept_type) }) @@ -201,24 +199,11 @@ impl Account { requested_ledger_version: Option, start: Option, limit: Option, - require_state_indices: bool, ) -> Result { - let sharding_enabled = context - .node_config - .storage - .rocksdb_configs - .enable_storage_sharding; - - let (latest_ledger_info, requested_version) = if sharding_enabled && require_state_indices { - context.get_latest_ledger_info_and_verify_internal_indexer_lookup_version( + let (latest_ledger_info, requested_version) = context + .get_latest_ledger_info_and_verify_lookup_version( requested_ledger_version.map(|inner| inner.0), - )? - } else { - // Use the latest ledger version, or the requested associated version - context.get_latest_ledger_info_and_verify_lookup_version( - requested_ledger_version.map(|inner| inner.0), - )? - }; + )?; Ok(Self { context, diff --git a/api/src/context.rs b/api/src/context.rs index aa9e59848544f..73b3c31b11d91 100644 --- a/api/src/context.rs +++ b/api/src/context.rs @@ -221,20 +221,26 @@ impl Context { .map_err(|e| e.into()) } - pub fn get_latest_ledger_info(&self) -> Result { + pub fn get_oldest_version_and_block_height( + &self, + ) -> Result<(Version, u64), E> { + self.db + .get_first_viable_block() + .context("Failed to retrieve oldest block information") + .map_err(|e| E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError)) + } + + pub fn get_latest_storage_ledger_info( + &self, + ) -> Result { let ledger_info = self .get_latest_ledger_info_with_signatures() .context("Failed to retrieve latest ledger info") .map_err(|e| { E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError) })?; - let (oldest_version, oldest_block_height) = self - .db - .get_first_viable_block() - .context("Failed to retrieve oldest block information") - .map_err(|e| { - E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError) - })?; + + let (oldest_version, oldest_block_height) = self.get_oldest_version_and_block_height()?; let (_, _, newest_block_event) = self .db .get_block_info_by_version(ledger_info.ledger_info().version()) @@ -252,32 +258,12 @@ impl Context { )) } - pub fn get_latest_ledger_info_and_verify_internal_indexer_lookup_version( - &self, - requested_ledger_version: Option, - ) -> Result<(LedgerInfo, Version), E> { - if self.indexer_reader.is_none() { - return Err(E::internal_with_code_no_info( - "Indexer reader doesn't exist", - AptosErrorCode::InternalError, - )); - } - - let (latest_ledger_info, latest_internal_indexer_ledger_version) = - self.get_latest_internal_indexer_ledger_version_and_main_db_info()?; - if let Some(version) = requested_ledger_version { - let request_ledger_version = Version::from(version); - if latest_internal_indexer_ledger_version < request_ledger_version { - return Err(version_not_found( - request_ledger_version, - &latest_ledger_info, - )); - } else if request_ledger_version < latest_ledger_info.oldest_ledger_version.0 { - return Err(version_pruned(request_ledger_version, &latest_ledger_info)); - } - Ok((latest_ledger_info, request_ledger_version)) + pub fn get_latest_ledger_info(&self) -> Result { + if self.indexer_reader.is_some() { + let ledger_info = self.get_latest_internal_indexer_ledger_version_and_ledger_info()?; + Ok(ledger_info) } else { - Ok((latest_ledger_info, latest_internal_indexer_ledger_version)) + self.get_latest_storage_ledger_info() } } @@ -306,20 +292,42 @@ impl Context { Ok((latest_ledger_info, requested_ledger_version)) } - pub fn get_latest_internal_indexer_ledger_version_and_main_db_info( + pub fn get_latest_internal_indexer_ledger_version_and_ledger_info< + E: ServiceUnavailableError, + >( &self, - ) -> Result<(LedgerInfo, Version), E> { + ) -> Result { if let Some(indexer_reader) = self.indexer_reader.as_ref() { if let Some(latest_version) = indexer_reader .get_latest_internal_indexer_ledger_version() - .map_err(|err| E::internal_with_code_no_info(err, AptosErrorCode::InternalError))? + .map_err(|err| { + E::service_unavailable_with_code_no_info(err, AptosErrorCode::InternalError) + })? { - let latest_ledger_info = self.get_latest_ledger_info()?; - return Ok((latest_ledger_info, latest_version)); + let (_, _, new_block_event) = self + .db + .get_block_info_by_version(latest_version) + .map_err(|_| { + E::service_unavailable_with_code_no_info( + "Failed to get block", + AptosErrorCode::InternalError, + ) + })?; + let (oldest_version, oldest_block_height) = + self.get_oldest_version_and_block_height()?; + return Ok(LedgerInfo::new_ledger_info( + &self.chain_id(), + new_block_event.epoch(), + latest_version, + oldest_version, + oldest_block_height, + new_block_event.height(), + new_block_event.proposed_time(), + )); } } - Err(E::internal_with_code_no_info( + Err(E::service_unavailable_with_code_no_info( "Indexer reader doesn't exist, or doesn't have data.", AptosErrorCode::InternalError, )) diff --git a/api/src/events.rs b/api/src/events.rs index 5c9266df373b8..49c4fad21ce9f 100644 --- a/api/src/events.rs +++ b/api/src/events.rs @@ -77,7 +77,7 @@ impl EventsApi { // Ensure that account exists let api = self.clone(); api_spawn_blocking(move || { - let account = Account::new(api.context.clone(), address.0, None, None, None, true)?; + let account = Account::new(api.context.clone(), address.0, None, None, None)?; account.verify_account_or_object_resource()?; api.list( account.latest_ledger_info, @@ -144,7 +144,7 @@ impl EventsApi { let api = self.clone(); api_spawn_blocking(move || { - let account = Account::new(api.context.clone(), address.0, None, None, None, true)?; + let account = Account::new(api.context.clone(), address.0, None, None, None)?; let key = account.find_event_key(event_handle.0, field_name.0.into())?; api.list(account.latest_ledger_info, accept_type, page, key) }) diff --git a/api/src/index.rs b/api/src/index.rs index 94b5289636413..ba91cbb34c342 100644 --- a/api/src/index.rs +++ b/api/src/index.rs @@ -33,7 +33,6 @@ impl IndexApi { self.context .check_api_output_enabled("Get ledger info", &accept_type)?; let ledger_info = self.context.get_latest_ledger_info()?; - let node_role = self.context.node_role(); api_spawn_blocking(move || match accept_type { diff --git a/api/src/transactions.rs b/api/src/transactions.rs index 86a16b8a356bd..1e1214361961b 100644 --- a/api/src/transactions.rs +++ b/api/src/transactions.rs @@ -986,7 +986,7 @@ impl TransactionsApi { address: Address, ) -> BasicResultWith404> { // Verify the account exists - let account = Account::new(self.context.clone(), address, None, None, None, true)?; + let account = Account::new(self.context.clone(), address, None, None, None)?; account.get_account_resource()?; let latest_ledger_info = account.latest_ledger_info; diff --git a/api/types/src/ledger_info.rs b/api/types/src/ledger_info.rs index ef912190c94c9..97438ae104013 100644 --- a/api/types/src/ledger_info.rs +++ b/api/types/src/ledger_info.rs @@ -40,6 +40,26 @@ impl LedgerInfo { } } + pub fn new_ledger_info( + chain_id: &ChainId, + epoch: u64, + ledger_version: u64, + oldest_ledger_version: u64, + oldest_block_height: u64, + block_height: u64, + ledger_timestamp: u64, + ) -> Self { + Self { + chain_id: chain_id.id(), + epoch: epoch.into(), + ledger_version: ledger_version.into(), + oldest_ledger_version: oldest_ledger_version.into(), + block_height: block_height.into(), + oldest_block_height: oldest_block_height.into(), + ledger_timestamp: ledger_timestamp.into(), + } + } + pub fn epoch(&self) -> u64 { self.epoch.into() } From 45f5d6570fa101c242beb02d13197a8c50b4aab7 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 06:33:23 -0400 Subject: [PATCH 07/36] [Consensus Observer] Wrap block and payload stores in locks. --- .../observer/active_state.rs | 51 ++-- .../observer/consensus_observer.rs | 79 ++++-- .../observer/ordered_blocks.rs | 104 +++---- .../observer/payload_store.rs | 116 ++++---- .../observer/pending_blocks.rs | 263 ++++++++++-------- 5 files changed, 343 insertions(+), 270 deletions(-) diff --git a/consensus/src/consensus_observer/observer/active_state.rs b/consensus/src/consensus_observer/observer/active_state.rs index 73c03af670eee..fb5482bba3306 100644 --- a/consensus/src/consensus_observer/observer/active_state.rs +++ b/consensus/src/consensus_observer/observer/active_state.rs @@ -101,8 +101,8 @@ impl ActiveObserverState { /// root ledger info and remove the blocks from the given stores. pub fn create_commit_callback( &self, - pending_ordered_blocks: OrderedBlockStore, - block_payload_store: BlockPayloadStore, + pending_ordered_blocks: Arc>, + block_payload_store: Arc>, ) -> StateComputerCommitCallBackType { // Clone the root pointer let root = self.root.clone(); @@ -282,15 +282,17 @@ async fn extract_on_chain_configs( /// A simple helper function that handles the committed blocks /// (as part of the commit callback). fn handle_committed_blocks( - pending_ordered_blocks: OrderedBlockStore, - block_payload_store: BlockPayloadStore, + pending_ordered_blocks: Arc>, + block_payload_store: Arc>, root: Arc>, blocks: &[Arc], ledger_info: LedgerInfoWithSignatures, ) { // Remove the committed blocks from the payload and pending stores - block_payload_store.remove_committed_blocks(blocks); - pending_ordered_blocks.remove_blocks_for_commit(&ledger_info); + block_payload_store.lock().remove_committed_blocks(blocks); + pending_ordered_blocks + .lock() + .remove_blocks_for_commit(&ledger_info); // Verify the ledger info is for the same epoch let mut root = root.lock(); @@ -407,8 +409,12 @@ mod test { let root = Arc::new(Mutex::new(create_ledger_info(epoch, round))); // Create the ordered block store and block payload store - let ordered_block_store = OrderedBlockStore::new(node_config.consensus_observer); - let mut block_payload_store = BlockPayloadStore::new(node_config.consensus_observer); + let ordered_block_store = Arc::new(Mutex::new(OrderedBlockStore::new( + node_config.consensus_observer, + ))); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + node_config.consensus_observer, + ))); // Handle the committed blocks at the wrong epoch and verify the root is not updated handle_committed_blocks( @@ -432,12 +438,16 @@ mod test { // Add pending ordered blocks let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, epoch, round); + let ordered_blocks = create_and_add_ordered_blocks( + ordered_block_store.clone(), + num_ordered_blocks, + epoch, + round, + ); // Add block payloads for the ordered blocks for ordered_block in &ordered_blocks { - create_and_add_payloads_for_ordered_block(&mut block_payload_store, ordered_block); + create_and_add_payloads_for_ordered_block(block_payload_store.clone(), ordered_block); } // Create the commit ledger info (for the second to last block) @@ -461,8 +471,11 @@ mod test { ); // Verify the committed blocks are removed from the stores - assert_eq!(ordered_block_store.get_all_ordered_blocks().len(), 1); - assert_eq!(block_payload_store.get_block_payloads().lock().len(), 1); + assert_eq!(ordered_block_store.lock().get_all_ordered_blocks().len(), 1); + assert_eq!( + block_payload_store.lock().get_block_payloads().lock().len(), + 1 + ); // Verify the root is updated assert_eq!(root.lock().clone(), committed_ledger_info); @@ -495,7 +508,7 @@ mod test { /// Creates and adds the specified number of ordered blocks to the ordered blocks fn create_and_add_ordered_blocks( - ordered_block_store: &OrderedBlockStore, + ordered_block_store: Arc>, num_ordered_blocks: usize, epoch: u64, starting_round: Round, @@ -532,7 +545,9 @@ mod test { let ordered_block = OrderedBlock::new(blocks, ordered_proof); // Insert the block into the ordered block store - ordered_block_store.insert_ordered_block(ordered_block.clone()); + ordered_block_store + .lock() + .insert_ordered_block(ordered_block.clone()); // Add the block to the ordered blocks ordered_blocks.push(ordered_block); @@ -543,13 +558,15 @@ mod test { /// Creates and adds payloads for the ordered block fn create_and_add_payloads_for_ordered_block( - block_payload_store: &mut BlockPayloadStore, + block_payload_store: Arc>, ordered_block: &OrderedBlock, ) { for block in ordered_block.blocks() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } } diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index 250b338d23344..9917adad0f74a 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -32,6 +32,7 @@ use aptos_config::config::{ConsensusObserverConfig, NodeConfig}; use aptos_consensus_types::{pipeline, pipelined_block::PipelinedBlock}; use aptos_crypto::{bls12381, Genesis}; use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener}; +use aptos_infallible::Mutex; use aptos_logger::{debug, error, info, warn}; use aptos_network::{ application::interface::NetworkClient, protocols::wire::handshake::v1::ProtocolId, @@ -63,13 +64,13 @@ pub struct ConsensusObserver { active_observer_state: ActiveObserverState, // The block payload store (containing the block transaction payloads) - block_payload_store: BlockPayloadStore, + block_payload_store: Arc>, // The ordered block store (containing ordered blocks that are ready for execution) - ordered_block_store: OrderedBlockStore, + ordered_block_store: Arc>, // The pending block store (containing pending blocks that are without payloads) - pending_block_store: PendingBlockStore, + pending_block_store: Arc>, // The execution client to the buffer manager execution_client: Arc, @@ -116,12 +117,17 @@ impl ConsensusObserver { let active_observer_state = ActiveObserverState::new(node_config, db_reader, reconfig_events, consensus_publisher); + // Create the block and payload stores + let ordered_block_store = OrderedBlockStore::new(consensus_observer_config); + let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let pending_block_store = PendingBlockStore::new(consensus_observer_config); + // Create the consensus observer Self { active_observer_state, - ordered_block_store: OrderedBlockStore::new(consensus_observer_config), - block_payload_store: BlockPayloadStore::new(consensus_observer_config), - pending_block_store: PendingBlockStore::new(consensus_observer_config), + ordered_block_store: Arc::new(Mutex::new(ordered_block_store)), + block_payload_store: Arc::new(Mutex::new(block_payload_store)), + pending_block_store: Arc::new(Mutex::new(pending_block_store)), execution_client, sync_notification_sender, sync_handle: None, @@ -137,7 +143,7 @@ impl ConsensusObserver { } // Otherwise, check if all the payloads exist in the payload store - self.block_payload_store.all_payloads_exist(blocks) + self.block_payload_store.lock().all_payloads_exist(blocks) } /// Checks the progress of the consensus observer @@ -171,13 +177,13 @@ impl ConsensusObserver { /// subscriptions, where we want to wipe all state and restart). async fn clear_pending_block_state(&self) { // Clear the payload store - self.block_payload_store.clear_all_payloads(); + self.block_payload_store.lock().clear_all_payloads(); // Clear the pending blocks - self.pending_block_store.clear_missing_blocks(); + self.pending_block_store.lock().clear_missing_blocks(); // Clear the ordered blocks - self.ordered_block_store.clear_all_ordered_blocks(); + self.ordered_block_store.lock().clear_all_ordered_blocks(); // Reset the execution pipeline for the root let root = self.active_observer_state.root(); @@ -256,9 +262,9 @@ impl ConsensusObserver { self.active_observer_state.epoch_state() } - /// Returns the last known block - fn get_last_block(&self) -> BlockInfo { - if let Some(last_pending_block) = self.ordered_block_store.get_last_ordered_block() { + /// Returns the last ordered block + fn get_last_ordered_block(&self) -> BlockInfo { + if let Some(last_pending_block) = self.ordered_block_store.lock().get_last_ordered_block() { last_pending_block } else { // Return the root ledger info @@ -278,12 +284,16 @@ impl ConsensusObserver { /// Orders any ready pending blocks for the given epoch and round async fn order_ready_pending_block(&mut self, block_epoch: u64, block_round: Round) { - if let Some(ordered_block) = self.pending_block_store.remove_ready_block( + // Get any ready ordered block + let ready_ordered_block = self.pending_block_store.lock().remove_ready_block( block_epoch, block_round, - &self.block_payload_store, - ) { - self.process_ordered_block(ordered_block).await; + self.block_payload_store.clone(), + ); + + // Process the ready ordered block (if it exists) + if let Some(ready_ordered_block) = ready_ordered_block { + self.process_ordered_block(ready_ordered_block).await; } } @@ -332,6 +342,7 @@ impl ConsensusObserver { // Update the payload store with the payload self.block_payload_store + .lock() .insert_block_payload(block_payload, verified_payload); // Check if there are blocks that were missing payloads but are @@ -379,7 +390,7 @@ impl ConsensusObserver { // Otherwise, we failed to process the commit decision. If the commit // is for a future epoch or round, we need to state sync. - let last_block = self.get_last_block(); + let last_block = self.get_last_ordered_block(); let commit_decision_round = commit_decision.round(); let epoch_changed = commit_decision_epoch > last_block.epoch(); if epoch_changed || commit_decision_round > last_block.round() { @@ -408,8 +419,10 @@ impl ConsensusObserver { self.active_observer_state .update_root(commit_decision.commit_proof().clone()); self.block_payload_store + .lock() .remove_blocks_for_epoch_round(commit_decision_epoch, commit_decision_round); self.ordered_block_store + .lock() .remove_blocks_for_commit(commit_decision.commit_proof()); // Start the state sync process @@ -431,6 +444,7 @@ impl ConsensusObserver { // Get the pending block for the commit decision let pending_block = self .ordered_block_store + .lock() .get_ordered_block(commit_decision.epoch(), commit_decision.round()); // Process the pending block @@ -444,6 +458,7 @@ impl ConsensusObserver { )) ); self.ordered_block_store + .lock() .update_commit_decision(commit_decision); // If we are not in sync mode, forward the commit decision to the execution pipeline @@ -553,7 +568,9 @@ impl ConsensusObserver { if self.all_payloads_exist(ordered_block.blocks()) { self.process_ordered_block(ordered_block).await; } else { - self.pending_block_store.insert_pending_block(ordered_block); + self.pending_block_store + .lock() + .insert_pending_block(ordered_block); } } @@ -587,6 +604,7 @@ impl ConsensusObserver { // Verify the block payloads against the ordered block if let Err(error) = self .block_payload_store + .lock() .verify_payloads_against_ordered_block(&ordered_block) { error!( @@ -601,9 +619,10 @@ impl ConsensusObserver { // The block was verified correctly. If the block is a child of our // last block, we can insert it into the ordered block store. - if self.get_last_block().id() == ordered_block.first_block().parent_id() { + if self.get_last_ordered_block().id() == ordered_block.first_block().parent_id() { // Insert the ordered block into the pending blocks self.ordered_block_store + .lock() .insert_ordered_block(ordered_block.clone()); // If we're not in sync mode, finalize the ordered blocks @@ -655,6 +674,7 @@ impl ConsensusObserver { let new_epoch_state = self.get_epoch_state(); let verified_payload_rounds = self .block_payload_store + .lock() .verify_payload_signatures(&new_epoch_state); // Order all the pending blocks that are now ready (these were buffered during state sync) @@ -668,9 +688,8 @@ impl ConsensusObserver { self.sync_handle = None; // Process all the newly ordered blocks - for (_, (ordered_block, commit_decision)) in - self.ordered_block_store.get_all_ordered_blocks() - { + let all_ordered_blocks = self.ordered_block_store.lock().get_all_ordered_blocks(); + for (_, (ordered_block, commit_decision)) in all_ordered_blocks { // Finalize the ordered block self.finalize_ordered_block(ordered_block).await; @@ -684,19 +703,25 @@ impl ConsensusObserver { /// Updates the metrics for the processed blocks fn update_processed_blocks_metrics(&self) { // Update the payload store metrics - self.block_payload_store.update_payload_store_metrics(); + self.block_payload_store + .lock() + .update_payload_store_metrics(); // Update the pending block metrics - self.pending_block_store.update_pending_blocks_metrics(); + self.pending_block_store + .lock() + .update_pending_blocks_metrics(); // Update the pending block metrics - self.ordered_block_store.update_ordered_blocks_metrics(); + self.ordered_block_store + .lock() + .update_ordered_blocks_metrics(); } /// Waits for a new epoch to start async fn wait_for_epoch_start(&mut self) { // Wait for the active state epoch to update - let block_payloads = self.block_payload_store.get_block_payloads(); + let block_payloads = self.block_payload_store.lock().get_block_payloads(); let (payload_manager, consensus_config, execution_config, randomness_config) = self .active_observer_state .wait_for_epoch_start(block_payloads) diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs index edfde50a4ed8f..7bb43fbc4108f 100644 --- a/consensus/src/consensus_observer/observer/ordered_blocks.rs +++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs @@ -10,46 +10,43 @@ use crate::consensus_observer::{ }; use aptos_config::config::ConsensusObserverConfig; use aptos_consensus_types::common::Round; -use aptos_infallible::Mutex; use aptos_logger::{debug, warn}; use aptos_types::{block_info::BlockInfo, ledger_info::LedgerInfoWithSignatures}; -use std::{collections::BTreeMap, sync::Arc}; +use std::collections::BTreeMap; /// A simple struct to store ordered blocks -#[derive(Clone)] pub struct OrderedBlockStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, // Ordered blocks. The key is the epoch and round of the last block in the // ordered block. Each entry contains the block and the commit decision (if any). - ordered_blocks: Arc)>>>, + ordered_blocks: BTreeMap<(u64, Round), (OrderedBlock, Option)>, } impl OrderedBlockStore { pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self { Self { consensus_observer_config, - ordered_blocks: Arc::new(Mutex::new(BTreeMap::new())), + ordered_blocks: BTreeMap::new(), } } /// Clears all ordered blocks - pub fn clear_all_ordered_blocks(&self) { - self.ordered_blocks.lock().clear(); + pub fn clear_all_ordered_blocks(&mut self) { + self.ordered_blocks.clear(); } /// Returns a copy of the ordered blocks pub fn get_all_ordered_blocks( &self, ) -> BTreeMap<(u64, Round), (OrderedBlock, Option)> { - self.ordered_blocks.lock().clone() + self.ordered_blocks.clone() } /// Returns the last ordered block (if any) pub fn get_last_ordered_block(&self) -> Option { self.ordered_blocks - .lock() .last_key_value() .map(|(_, (ordered_block, _))| ordered_block.last_block().block_info()) } @@ -57,7 +54,6 @@ impl OrderedBlockStore { /// Returns the ordered block for the given epoch and round (if any) pub fn get_ordered_block(&self, epoch: u64, round: Round) -> Option { self.ordered_blocks - .lock() .get(&(epoch, round)) .map(|(ordered_block, _)| ordered_block.clone()) } @@ -65,10 +61,10 @@ impl OrderedBlockStore { /// Inserts the given ordered block into the ordered blocks. This function /// assumes the block has already been checked to extend the current ordered /// blocks, and that the ordered proof has been verified. - pub fn insert_ordered_block(&self, ordered_block: OrderedBlock) { + pub fn insert_ordered_block(&mut self, ordered_block: OrderedBlock) { // Verify that the number of ordered blocks doesn't exceed the maximum let max_num_ordered_blocks = self.consensus_observer_config.max_num_pending_blocks as usize; - if self.ordered_blocks.lock().len() >= max_num_ordered_blocks { + if self.ordered_blocks.len() >= max_num_ordered_blocks { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Exceeded the maximum number of ordered blocks: {:?}. Dropping block: {:?}.", @@ -94,32 +90,32 @@ impl OrderedBlockStore { // Insert the ordered block self.ordered_blocks - .lock() .insert((last_block_epoch, last_block_round), (ordered_block, None)); } /// Removes the ordered blocks for the given commit ledger info. This will /// remove all blocks up to (and including) the epoch and round of the commit. - pub fn remove_blocks_for_commit(&self, commit_ledger_info: &LedgerInfoWithSignatures) { + pub fn remove_blocks_for_commit(&mut self, commit_ledger_info: &LedgerInfoWithSignatures) { // Determine the epoch and round to split off let split_off_epoch = commit_ledger_info.ledger_info().epoch(); let split_off_round = commit_ledger_info.commit_info().round().saturating_add(1); // Remove the blocks from the ordered blocks - let mut ordered_blocks = self.ordered_blocks.lock(); - *ordered_blocks = ordered_blocks.split_off(&(split_off_epoch, split_off_round)); + self.ordered_blocks = self + .ordered_blocks + .split_off(&(split_off_epoch, split_off_round)); } /// Updates the commit decision of the ordered block (if found) - pub fn update_commit_decision(&self, commit_decision: &CommitDecision) { + pub fn update_commit_decision(&mut self, commit_decision: &CommitDecision) { // Get the epoch and round of the commit decision let commit_decision_epoch = commit_decision.epoch(); let commit_decision_round = commit_decision.round(); // Update the commit decision for the ordered blocks - let mut ordered_blocks = self.ordered_blocks.lock(); - if let Some((_, existing_commit_decision)) = - ordered_blocks.get_mut(&(commit_decision_epoch, commit_decision_round)) + if let Some((_, existing_commit_decision)) = self + .ordered_blocks + .get_mut(&(commit_decision_epoch, commit_decision_round)) { *existing_commit_decision = Some(commit_decision.clone()); } @@ -128,8 +124,7 @@ impl OrderedBlockStore { /// Updates the metrics for the ordered blocks pub fn update_ordered_blocks_metrics(&self) { // Update the number of ordered block entries - let ordered_blocks = self.ordered_blocks.lock(); - let num_entries = ordered_blocks.len() as u64; + let num_entries = self.ordered_blocks.len() as u64; metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, metrics::ORDERED_BLOCK_ENTRIES_LABEL, @@ -137,7 +132,8 @@ impl OrderedBlockStore { ); // Update the total number of ordered blocks - let num_ordered_blocks = ordered_blocks + let num_ordered_blocks = self + .ordered_blocks .values() .map(|(ordered_block, _)| ordered_block.blocks().len() as u64) .sum(); @@ -148,7 +144,8 @@ impl OrderedBlockStore { ); // Update the highest round for the ordered blocks - let highest_ordered_round = ordered_blocks + let highest_ordered_round = self + .ordered_blocks .last_key_value() .map(|(_, (ordered_block, _))| ordered_block.last_block().round()) .unwrap_or(0); @@ -173,28 +170,29 @@ mod test { use aptos_types::{ aggregate_signature::AggregateSignature, ledger_info::LedgerInfo, transaction::Version, }; + use std::sync::Arc; #[test] fn test_clear_all_ordered_blocks() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 10; - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch); // Clear all ordered blocks ordered_block_store.clear_all_ordered_blocks(); // Check that all the ordered blocks were removed - assert!(ordered_block_store.ordered_blocks.lock().is_empty()); + assert!(ordered_block_store.ordered_blocks.is_empty()); } #[test] fn test_get_last_ordered_block() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Verify that we have no last ordered block assert!(ordered_block_store.get_last_ordered_block().is_none()); @@ -202,8 +200,11 @@ mod test { // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 50; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Verify the last ordered block is the block with the highest round let last_ordered_block = ordered_blocks.last().unwrap(); @@ -217,7 +218,7 @@ mod test { let next_epoch = current_epoch + 1; let num_ordered_blocks = 50; let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); // Verify the last ordered block is the block with the highest epoch and round let last_ordered_block = ordered_blocks.last().unwrap(); @@ -231,13 +232,16 @@ mod test { #[test] fn test_get_ordered_block() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 50; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Ensure the ordered blocks were all inserted let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks(); @@ -272,12 +276,12 @@ mod test { }; // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(consensus_observer_config); + let mut ordered_block_store = OrderedBlockStore::new(consensus_observer_config); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = max_num_pending_blocks * 2; // Insert more than the maximum - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch); // Verify the ordered blocks were inserted up to the maximum let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks(); @@ -287,7 +291,7 @@ mod test { let next_epoch = current_epoch + 1; let num_ordered_blocks = max_num_pending_blocks - 1; // Insert one less than the maximum let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); // Verify the ordered blocks were not inserted (they should have just been dropped) for ordered_block in &ordered_blocks { @@ -305,19 +309,22 @@ mod test { #[test] fn test_remove_blocks_for_commit() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 10; let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Insert several ordered blocks for the next epoch let next_epoch = current_epoch + 1; let num_ordered_blocks_next_epoch = 20; let ordered_blocks_next_epoch = create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_next_epoch, next_epoch, ); @@ -326,7 +333,7 @@ mod test { let future_epoch = next_epoch + 1; let num_ordered_blocks_future_epoch = 30; create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_future_epoch, future_epoch, ); @@ -399,19 +406,22 @@ mod test { #[test] fn test_update_commit_decision() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Insert several ordered blocks for the next epoch let next_epoch = current_epoch + 1; let num_ordered_blocks_next_epoch = 20; let ordered_blocks_next_epoch = create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_next_epoch, next_epoch, ); @@ -499,7 +509,7 @@ mod test { /// Creates and adds the specified number of ordered blocks to the ordered blocks fn create_and_add_ordered_blocks( - ordered_block_store: &OrderedBlockStore, + ordered_block_store: &mut OrderedBlockStore, num_ordered_blocks: usize, epoch: u64, ) -> Vec { diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs index bae1225c58118..8781595026194 100644 --- a/consensus/src/consensus_observer/observer/payload_store.rs +++ b/consensus/src/consensus_observer/observer/payload_store.rs @@ -26,12 +26,12 @@ pub enum BlockPayloadStatus { } /// A simple struct to store the block payloads of ordered and committed blocks -#[derive(Clone)] pub struct BlockPayloadStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, - // Block transaction payloads (indexed by epoch and round) + // Block transaction payloads (indexed by epoch and round). + // This is directly accessed by the payload manager. block_payloads: Arc>>, } @@ -299,16 +299,12 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store let num_blocks_in_store = 100; - let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 1, - false, - ); + let unverified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false); // Verify the payloads don't exist in the block payload store assert!(!block_payload_store.all_payloads_exist(&unverified_blocks)); @@ -320,12 +316,8 @@ mod test { // Add some verified blocks to the payload store let num_blocks_in_store = 100; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that all the payloads exist in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -355,22 +347,18 @@ mod test { fn test_all_payloads_exist_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add several verified blocks to the payload store let num_blocks_in_store = 10; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the payloads exists in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); // Mark the payload of the first block as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Check that the payload no longer exists in the block payload store assert!(!block_payload_store.all_payloads_exist(&verified_blocks)); @@ -383,19 +371,15 @@ mod test { fn test_clear_all_payloads() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store let num_blocks_in_store = 30; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 1, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false); // Add some verified blocks to the payload store - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the payloads exist in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -423,12 +407,8 @@ mod test { // Add some verified blocks to the payload store let num_blocks_in_store = 20; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the block payload store contains the new block payloads assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -438,7 +418,7 @@ mod test { check_num_verified_payloads(&block_payload_store, num_blocks_in_store); // Mark the payload of the first block as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Check that the payload no longer exists in the block payload store assert!(!block_payload_store.all_payloads_exist(&verified_blocks)); @@ -465,11 +445,11 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add the maximum number of verified blocks to the payload store let num_blocks_in_store = max_num_pending_blocks as usize; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, num_blocks_in_store); @@ -477,7 +457,7 @@ mod test { // Add more blocks to the payload store let num_blocks_to_add = 5; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -485,7 +465,7 @@ mod test { // Add a large number of blocks to the payload store let num_blocks_to_add = 100; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -502,11 +482,11 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add the maximum number of unverified blocks to the payload store let num_blocks_in_store = max_num_pending_blocks as usize; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, num_blocks_in_store); @@ -514,7 +494,7 @@ mod test { // Add more blocks to the payload store let num_blocks_to_add = 5; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -522,7 +502,7 @@ mod test { // Add a large number of blocks to the payload store let num_blocks_to_add = 100; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -533,13 +513,13 @@ mod test { fn test_remove_blocks_for_epoch_round_verified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some verified blocks to the payload store for the current epoch let current_epoch = 0; let num_blocks_in_store = 100; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, true, @@ -573,7 +553,7 @@ mod test { // Add some verified blocks to the payload store for the next epoch let next_epoch = current_epoch + 1; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, true, @@ -591,13 +571,13 @@ mod test { fn test_remove_blocks_for_epoch_round_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store for the current epoch let current_epoch = 10; let num_blocks_in_store = 100; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, false, @@ -630,7 +610,7 @@ mod test { // Add some unverified blocks to the payload store for the next epoch let next_epoch = current_epoch + 1; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, false, @@ -648,13 +628,13 @@ mod test { fn test_remove_committed_blocks_verified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some blocks to the payload store for the current epoch let current_epoch = 0; let num_blocks_in_store = 100; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, true, @@ -700,7 +680,7 @@ mod test { // Add some blocks to the payload store for the next epoch let next_epoch = 1; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, true, @@ -717,13 +697,13 @@ mod test { fn test_remove_committed_blocks_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some blocks to the payload store for the current epoch let current_epoch = 10; let num_blocks_in_store = 100; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, false, @@ -768,7 +748,7 @@ mod test { // Add some blocks to the payload store for the next epoch let next_epoch = 11; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, false, @@ -791,7 +771,7 @@ mod test { let current_epoch = 0; let num_verified_blocks = 10; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -801,7 +781,7 @@ mod test { let next_epoch = current_epoch + 1; let num_unverified_blocks = 20; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_unverified_blocks, next_epoch, false, @@ -811,7 +791,7 @@ mod test { let future_epoch = current_epoch + 30; let num_future_blocks = 30; let future_unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_future_blocks, future_epoch, false, @@ -877,7 +857,7 @@ mod test { let current_epoch = 0; let num_verified_blocks = 10; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -895,7 +875,7 @@ mod test { .unwrap(); // Mark the first block payload as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Verify the ordered block and ensure it fails (since the payloads are unverified) let error = block_payload_store @@ -923,7 +903,7 @@ mod test { let current_epoch = 10; let num_verified_blocks = 6; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -933,7 +913,7 @@ mod test { let next_epoch = current_epoch + 1; let num_unverified_blocks = 15; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_unverified_blocks, next_epoch, false, @@ -943,7 +923,7 @@ mod test { let future_epoch = next_epoch + 1; let num_future_blocks = 10; let unverified_future_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_future_blocks, future_epoch, false, @@ -986,7 +966,7 @@ mod test { /// Creates and adds the given number of blocks to the block payload store fn create_and_add_blocks_to_store( - mut block_payload_store: BlockPayloadStore, + block_payload_store: &mut BlockPayloadStore, num_blocks: usize, epoch: u64, verified_payload_signatures: bool, @@ -1110,7 +1090,7 @@ mod test { /// Marks the payload of the given block as unverified fn mark_payload_as_unverified( - block_payload_store: BlockPayloadStore, + block_payload_store: &BlockPayloadStore, block: &Arc, ) { // Get the payload entry for the given block diff --git a/consensus/src/consensus_observer/observer/pending_blocks.rs b/consensus/src/consensus_observer/observer/pending_blocks.rs index 46c0586f08130..d3ce297cd5fdb 100644 --- a/consensus/src/consensus_observer/observer/pending_blocks.rs +++ b/consensus/src/consensus_observer/observer/pending_blocks.rs @@ -19,41 +19,36 @@ use std::{ }; /// A simple struct to hold blocks that are waiting for payloads -#[derive(Clone)] pub struct PendingBlockStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, - // A map of ordered blocks that are without payloads. The key is the - // (epoch, round) of the first block in the ordered block. - blocks_without_payloads: Arc>>, + // A map of ordered blocks that are without payloads. The key is + // the (epoch, round) of the first block in the ordered block. + blocks_without_payloads: BTreeMap<(u64, Round), OrderedBlock>, } impl PendingBlockStore { pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self { Self { consensus_observer_config, - blocks_without_payloads: Arc::new(Mutex::new(BTreeMap::new())), + blocks_without_payloads: BTreeMap::new(), } } /// Clears all missing blocks from the store - pub fn clear_missing_blocks(&self) { - self.blocks_without_payloads.lock().clear(); + pub fn clear_missing_blocks(&mut self) { + self.blocks_without_payloads.clear(); } /// Inserts a block (without payloads) into the store - pub fn insert_pending_block(&self, ordered_block: OrderedBlock) { + pub fn insert_pending_block(&mut self, ordered_block: OrderedBlock) { // Get the epoch and round of the first block let first_block = ordered_block.first_block(); let first_block_epoch_round = (first_block.epoch(), first_block.round()); // Insert the block into the store using the round of the first block - match self - .blocks_without_payloads - .lock() - .entry(first_block_epoch_round) - { + match self.blocks_without_payloads.entry(first_block_epoch_round) { Entry::Occupied(_) => { // The block is already in the store warn!( @@ -75,16 +70,15 @@ impl PendingBlockStore { /// Garbage collects the pending blocks store by removing /// the oldest blocks if the store is too large. - fn garbage_collect_pending_blocks(&self) { + fn garbage_collect_pending_blocks(&mut self) { // Calculate the number of blocks to remove - let mut blocks_without_payloads = self.blocks_without_payloads.lock(); - let num_pending_blocks = blocks_without_payloads.len() as u64; + let num_pending_blocks = self.blocks_without_payloads.len() as u64; let max_pending_blocks = self.consensus_observer_config.max_num_pending_blocks; let num_blocks_to_remove = num_pending_blocks.saturating_sub(max_pending_blocks); // Remove the oldest blocks if the store is too large for _ in 0..num_blocks_to_remove { - if let Some((oldest_epoch_round, _)) = blocks_without_payloads.pop_first() { + if let Some((oldest_epoch_round, _)) = self.blocks_without_payloads.pop_first() { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "The pending block store is too large: {:?} blocks. Removing the block for the oldest epoch and round: {:?}", @@ -98,25 +92,28 @@ impl PendingBlockStore { /// Removes and returns the block from the store that is now ready /// to be processed (after the new payload has been received). pub fn remove_ready_block( - &self, + &mut self, received_payload_epoch: u64, received_payload_round: Round, - block_payload_store: &BlockPayloadStore, + block_payload_store: Arc>, ) -> Option { // Calculate the round at which to split the blocks let split_round = received_payload_round.saturating_add(1); // Split the blocks at the epoch and round - let mut blocks_without_payloads = self.blocks_without_payloads.lock(); - let mut blocks_at_higher_rounds = - blocks_without_payloads.split_off(&(received_payload_epoch, split_round)); + let mut blocks_at_higher_rounds = self + .blocks_without_payloads + .split_off(&(received_payload_epoch, split_round)); // Check if the last block is ready (this should be the only ready block). // Any earlier blocks are considered out-of-date and will be dropped. let mut ready_block = None; - if let Some((epoch_and_round, ordered_block)) = blocks_without_payloads.pop_last() { + if let Some((epoch_and_round, ordered_block)) = self.blocks_without_payloads.pop_last() { // If all payloads exist for the block, then the block is ready - if block_payload_store.all_payloads_exist(ordered_block.blocks()) { + if block_payload_store + .lock() + .all_payloads_exist(ordered_block.blocks()) + { ready_block = Some(ordered_block); } else { // Otherwise, check if we're still waiting for higher payloads for the block @@ -127,18 +124,18 @@ impl PendingBlockStore { } // Check if any out-of-date blocks were dropped - if !blocks_without_payloads.is_empty() { + if !self.blocks_without_payloads.is_empty() { info!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Dropped {:?} out-of-date pending blocks before epoch and round: {:?}", - blocks_without_payloads.len(), + self.blocks_without_payloads.len(), (received_payload_epoch, received_payload_round) )) ); } // Update the pending blocks to only include the blocks at higher rounds - *blocks_without_payloads = blocks_at_higher_rounds; + self.blocks_without_payloads = blocks_at_higher_rounds; // Return the ready block (if one exists) ready_block @@ -147,8 +144,7 @@ impl PendingBlockStore { /// Updates the metrics for the pending blocks pub fn update_pending_blocks_metrics(&self) { // Update the number of pending block entries - let blocks_without_payloads = self.blocks_without_payloads.lock(); - let num_entries = blocks_without_payloads.len() as u64; + let num_entries = self.blocks_without_payloads.len() as u64; metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, metrics::PENDING_BLOCK_ENTRIES_LABEL, @@ -156,7 +152,8 @@ impl PendingBlockStore { ); // Update the total number of pending blocks - let num_pending_blocks = blocks_without_payloads + let num_pending_blocks = self + .blocks_without_payloads .values() .map(|block| block.blocks().len() as u64) .sum(); @@ -167,7 +164,8 @@ impl PendingBlockStore { ); // Update the highest round for the pending blocks - let highest_pending_round = blocks_without_payloads + let highest_pending_round = self + .blocks_without_payloads .last_key_value() .map(|(_, pending_block)| pending_block.last_block().round()) .unwrap_or(0); @@ -208,13 +206,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let missing_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -223,17 +223,19 @@ mod test { // Verify that the store is not empty verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &missing_blocks, ); // Clear the missing blocks from the store - pending_block_store.clear_missing_blocks(); + pending_block_store.lock().clear_missing_blocks(); // Verify that the store is now empty - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); - assert!(blocks_without_payloads.is_empty()); + assert!(pending_block_store + .lock() + .blocks_without_payloads + .is_empty()); } #[test] @@ -244,13 +246,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -259,7 +263,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -267,7 +271,7 @@ mod test { // Insert the maximum number of blocks into the store again let starting_round = (max_num_pending_blocks * 100) as Round; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -276,7 +280,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -284,12 +288,17 @@ mod test { // Insert one more block into the store (for the next epoch) let next_epoch = 1; let starting_round = 0; - let new_pending_block = - create_and_add_pending_blocks(&pending_block_store, 1, next_epoch, starting_round, 5); + let new_pending_block = create_and_add_pending_blocks( + pending_block_store.clone(), + 1, + next_epoch, + starting_round, + 5, + ); // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -303,13 +312,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 200; let mut pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -318,7 +329,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -329,7 +340,7 @@ mod test { // Insert one more block into the store let starting_round = ((max_num_pending_blocks * 10) + (i * 100)) as Round; let new_pending_block = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), 1, current_epoch, starting_round, @@ -338,7 +349,7 @@ mod test { // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -348,7 +359,8 @@ mod test { let oldest_block_round = oldest_block.first_block().round(); // Verify that the oldest block was garbage collected - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); + let blocks_without_payloads = + pending_block_store.lock().blocks_without_payloads.clone(); assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round))); } @@ -359,7 +371,7 @@ mod test { // Insert one more block into the store let starting_round = i; let new_pending_block = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), 1, next_epoch, starting_round, @@ -368,7 +380,7 @@ mod test { // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -378,7 +390,8 @@ mod test { let oldest_block_round = oldest_block.first_block().round(); // Verify that the oldest block was garbage collected - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); + let blocks_without_payloads = + pending_block_store.lock().blocks_without_payloads.clone(); assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round))); } } @@ -391,13 +404,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -405,43 +420,45 @@ mod test { ); // Create a new block payload store and insert payloads for the second block - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); let second_block = pending_blocks[1].clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &second_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); // Remove the second block (which is now ready) let payload_round = second_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(second_block)); // Verify that the first and second blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 2, &pending_blocks[2..].to_vec(), ); // Insert payloads for the last block let last_block = pending_blocks.last().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &last_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block); // Remove the last block (which is now ready) let payload_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // Verify that the last block was removed assert_eq!(ready_block, Some(last_block)); // Verify that the store is empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -452,13 +469,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 10; let starting_round = 100; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -466,7 +485,9 @@ mod test { ); // Create an empty block payload store - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); // Incrementally insert and process each payload for the first block let first_block = pending_blocks.first().unwrap().clone(); @@ -474,14 +495,16 @@ mod test { // Insert the block let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); // Attempt to remove the block (which might not be ready) let payload_round = block.round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // If the block is ready, verify that it was removed. @@ -492,7 +515,7 @@ mod test { // Verify that the block was removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 1, &pending_blocks[1..].to_vec(), ); @@ -502,7 +525,7 @@ mod test { // Verify that the block still remains verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -517,14 +540,16 @@ mod test { if payload_round != last_block.first_block().round() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } // Attempt to remove the block (which might not be ready) - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // The block should not be ready @@ -532,14 +557,14 @@ mod test { // Verify that the block still remains or has been removed on the last insert if payload_round == last_block.last_block().round() { - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } else { - verify_pending_blocks(&pending_block_store, 1, &vec![last_block.clone()]); + verify_pending_blocks(pending_block_store.clone(), 1, &vec![last_block.clone()]); } } // Verify that the store is now empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -550,13 +575,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -564,63 +591,65 @@ mod test { ); // Create a new block payload store and insert payloads for the first block - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); let first_block = pending_blocks.first().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &first_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &first_block); // Remove the first block (which is now ready) let payload_round = first_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(first_block)); // Verify that the first block was removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 1, &pending_blocks[1..].to_vec(), ); // Insert payloads for the second block let second_block = pending_blocks[1].clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &second_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); // Remove the second block (which is now ready) let payload_round = second_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(second_block)); // Verify that the first and second blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 2, &pending_blocks[2..].to_vec(), ); // Insert payloads for the last block let last_block = pending_blocks.last().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &last_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block); // Remove the last block (which is now ready) let payload_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // Verify that the last block was removed assert_eq!(ready_block, Some(last_block)); // Verify that the store is empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -631,13 +660,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 10; let starting_round = 100; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -645,21 +676,23 @@ mod test { ); // Create an empty block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); // Remove the third block (which is not ready) let third_block = pending_blocks[2].clone(); let third_block_round = third_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, third_block_round, - &block_payload_store, + block_payload_store.clone(), ); assert!(ready_block.is_none()); // Verify that the first three blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 3, &pending_blocks[3..].to_vec(), ); @@ -667,20 +700,20 @@ mod test { // Remove the last block (which is not ready) let last_block = pending_blocks.last().unwrap().clone(); let last_block_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, last_block_round, - &block_payload_store, + block_payload_store.clone(), ); assert!(ready_block.is_none()); // Verify that the store is now empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } /// Creates and adds the specified number of blocks to the pending block store fn create_and_add_pending_blocks( - pending_block_store: &PendingBlockStore, + pending_block_store: Arc>, num_pending_blocks: usize, epoch: u64, starting_round: Round, @@ -732,7 +765,9 @@ mod test { let ordered_block = OrderedBlock::new(pipelined_blocks, ordered_proof.clone()); // Insert the ordered block into the pending block store - pending_block_store.insert_pending_block(ordered_block.clone()); + pending_block_store + .lock() + .insert_pending_block(ordered_block.clone()); // Add the ordered block to the pending blocks pending_blocks.push(ordered_block); @@ -743,31 +778,37 @@ mod test { /// Inserts payloads into the payload store for the ordered block fn insert_payloads_for_ordered_block( - block_payload_store: &mut BlockPayloadStore, + block_payload_store: Arc>, ordered_block: &OrderedBlock, ) { for block in ordered_block.blocks() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } } /// Verifies that the pending block store contains the expected blocks fn verify_pending_blocks( - pending_block_store: &PendingBlockStore, + pending_block_store: Arc>, num_expected_blocks: usize, pending_blocks: &Vec, ) { // Check the number of pending blocks - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); - assert_eq!(blocks_without_payloads.len(), num_expected_blocks); + assert_eq!( + pending_block_store.lock().blocks_without_payloads.len(), + num_expected_blocks + ); // Check that all pending blocks are in the store for pending_block in pending_blocks { let first_block = pending_block.first_block(); assert_eq!( - blocks_without_payloads + pending_block_store + .lock() + .blocks_without_payloads .get(&(first_block.epoch(), first_block.round())) .unwrap(), pending_block From 1fef8b8d46997e1937356e819ec5e2c7b09e6bdb Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 06:33:48 -0400 Subject: [PATCH 08/36] [Consensus Observer] Handle duplicate commit messages. --- .../src/consensus_observer/common/metrics.rs | 11 + .../observer/consensus_observer.rs | 205 +++++++++++++----- .../observer/ordered_blocks.rs | 149 +++++++++++++ 3 files changed, 310 insertions(+), 55 deletions(-) diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs index 8cf8144d25a86..e290d74640c70 100644 --- a/consensus/src/consensus_observer/common/metrics.rs +++ b/consensus/src/consensus_observer/common/metrics.rs @@ -13,6 +13,7 @@ use once_cell::sync::Lazy; // Useful metric labels pub const BLOCK_PAYLOAD_LABEL: &str = "block_payload"; pub const COMMIT_DECISION_LABEL: &str = "commit_decision"; +pub const COMMITTED_BLOCKS_LABEL: &str = "committed_blocks"; pub const CREATED_SUBSCRIPTION_LABEL: &str = "created_subscription"; pub const ORDERED_BLOCK_ENTRIES_LABEL: &str = "ordered_block_entries"; pub const ORDERED_BLOCKS_LABEL: &str = "ordered_blocks"; @@ -30,6 +31,16 @@ pub static OBSERVER_CREATED_SUBSCRIPTIONS: Lazy = Lazy::new(|| { .unwrap() }); +/// Counter for tracking dropped (direct send) messages by the consensus observer +pub static OBSERVER_DROPPED_MESSAGES: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "consensus_observer_dropped_messages", + "Counters related to dropped (direct send) messages by the consensus observer", + &["message_type", "network_id"] + ) + .unwrap() +}); + /// Gauge for tracking the number of active subscriptions for the consensus observer pub static OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS: Lazy = Lazy::new(|| { register_int_gauge_vec!( diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index 9917adad0f74a..ce641e10457e6 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -28,7 +28,10 @@ use crate::{ pipeline::execution_client::TExecutionClient, }; use aptos_channels::{aptos_channel, aptos_channel::Receiver, message_queues::QueueStyle}; -use aptos_config::config::{ConsensusObserverConfig, NodeConfig}; +use aptos_config::{ + config::{ConsensusObserverConfig, NodeConfig}, + network_id::PeerNetworkId, +}; use aptos_consensus_types::{pipeline, pipelined_block::PipelinedBlock}; use aptos_crypto::{bls12381, Genesis}; use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener}; @@ -262,10 +265,25 @@ impl ConsensusObserver { self.active_observer_state.epoch_state() } + /// Returns the highest committed block epoch and round + fn get_highest_committed_epoch_round(&self) -> (u64, Round) { + if let Some(epoch_round) = self + .ordered_block_store + .lock() + .get_highest_committed_epoch_round() + { + epoch_round + } else { + // Return the root epoch and round + let root_block_info = self.active_observer_state.root().commit_info().clone(); + (root_block_info.epoch(), root_block_info.round()) + } + } + /// Returns the last ordered block fn get_last_ordered_block(&self) -> BlockInfo { - if let Some(last_pending_block) = self.ordered_block_store.lock().get_last_ordered_block() { - last_pending_block + if let Some(last_ordered_block) = self.ordered_block_store.lock().get_last_ordered_block() { + last_ordered_block } else { // Return the root ledger info self.active_observer_state.root().commit_info().clone() @@ -298,18 +316,18 @@ impl ConsensusObserver { } /// Processes the block payload message - async fn process_block_payload_message(&mut self, block_payload: BlockPayload) { + async fn process_block_payload_message( + &mut self, + peer_network_id: PeerNetworkId, + block_payload: BlockPayload, + ) { + // Update the metrics for the received block payload + update_metrics_for_block_payload_message(peer_network_id, &block_payload); + // Get the epoch and round for the block let block_epoch = block_payload.block.epoch(); let block_round = block_payload.block.round(); - // Update the metrics for the received block payload - metrics::set_gauge_with_label( - &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, - metrics::BLOCK_PAYLOAD_LABEL, - block_round, - ); - // Verify the block payload digests if let Err(error) = block_payload.verify_payload_digests() { error!( @@ -355,18 +373,28 @@ impl ConsensusObserver { } /// Processes the commit decision message - fn process_commit_decision_message(&mut self, commit_decision: CommitDecision) { + fn process_commit_decision_message( + &mut self, + peer_network_id: PeerNetworkId, + commit_decision: CommitDecision, + ) { + // Get the commit decision epoch and round + let commit_epoch = commit_decision.epoch(); + let commit_round = commit_decision.round(); + + // If the commit message is behind our highest committed block, ignore it + if (commit_epoch, commit_round) <= self.get_highest_committed_epoch_round() { + // Update the metrics for the dropped commit decision + update_metrics_for_dropped_commit_decision_message(peer_network_id, &commit_decision); + return; + } + // Update the metrics for the received commit decision - metrics::set_gauge_with_label( - &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, - metrics::COMMIT_DECISION_LABEL, - commit_decision.round(), - ); + update_metrics_for_commit_decision_message(peer_network_id, &commit_decision); // If the commit decision is for the current epoch, verify and process it let epoch_state = self.get_epoch_state(); - let commit_decision_epoch = commit_decision.epoch(); - if commit_decision_epoch == epoch_state.epoch { + if commit_epoch == epoch_state.epoch { // Verify the commit decision if let Err(error) = commit_decision.verify_commit_proof(&epoch_state) { error!( @@ -391,9 +419,8 @@ impl ConsensusObserver { // Otherwise, we failed to process the commit decision. If the commit // is for a future epoch or round, we need to state sync. let last_block = self.get_last_ordered_block(); - let commit_decision_round = commit_decision.round(); - let epoch_changed = commit_decision_epoch > last_block.epoch(); - if epoch_changed || commit_decision_round > last_block.round() { + let epoch_changed = commit_epoch > last_block.epoch(); + if epoch_changed || commit_round > last_block.round() { // If we're waiting for state sync to transition into a new epoch, // we should just wait and not issue a new state sync request. if self.in_state_sync_epoch_change() { @@ -420,7 +447,7 @@ impl ConsensusObserver { .update_root(commit_decision.commit_proof().clone()); self.block_payload_store .lock() - .remove_blocks_for_epoch_round(commit_decision_epoch, commit_decision_round); + .remove_blocks_for_epoch_round(commit_epoch, commit_round); self.ordered_block_store .lock() .remove_blocks_for_commit(commit_decision.commit_proof()); @@ -428,8 +455,8 @@ impl ConsensusObserver { // Start the state sync process let abort_handle = sync_to_commit_decision( commit_decision, - commit_decision_epoch, - commit_decision_round, + commit_epoch, + commit_round, self.execution_client.clone(), self.sync_notification_sender.clone(), ); @@ -509,39 +536,15 @@ impl ConsensusObserver { // Process the message based on the type match message { ConsensusObserverDirectSend::OrderedBlock(ordered_block) => { - // Log the received ordered block message - let log_message = format!( - "Received ordered block: {}, from peer: {}!", - ordered_block.proof_block_info(), - peer_network_id - ); - log_received_message(log_message); - - // Process the ordered block message - self.process_ordered_block_message(ordered_block).await; + self.process_ordered_block_message(peer_network_id, ordered_block) + .await; }, ConsensusObserverDirectSend::CommitDecision(commit_decision) => { - // Log the received commit decision message - let log_message = format!( - "Received commit decision: {}, from peer: {}!", - commit_decision.proof_block_info(), - peer_network_id - ); - log_received_message(log_message); - - // Process the commit decision message - self.process_commit_decision_message(commit_decision); + self.process_commit_decision_message(peer_network_id, commit_decision); }, ConsensusObserverDirectSend::BlockPayload(block_payload) => { - // Log the received block payload message - let log_message = format!( - "Received block payload: {}, from peer: {}!", - block_payload.block, peer_network_id - ); - log_received_message(log_message); - - // Process the block payload message - self.process_block_payload_message(block_payload).await; + self.process_block_payload_message(peer_network_id, block_payload) + .await; }, } @@ -550,7 +553,14 @@ impl ConsensusObserver { } /// Processes the ordered block - async fn process_ordered_block_message(&mut self, ordered_block: OrderedBlock) { + async fn process_ordered_block_message( + &mut self, + peer_network_id: PeerNetworkId, + ordered_block: OrderedBlock, + ) { + // Update the metrics for the received ordered block + update_metrics_for_ordered_block_message(peer_network_id, &ordered_block); + // Verify the ordered blocks before processing if let Err(error) = ordered_block.verify_ordered_blocks() { error!( @@ -847,3 +857,88 @@ fn sync_to_commit_decision( )); abort_handle } + +/// Updates the metrics for the received block payload message +fn update_metrics_for_block_payload_message( + peer_network_id: PeerNetworkId, + block_payload: &BlockPayload, +) { + // Log the received block payload message + let log_message = format!( + "Received block payload: {}, from peer: {}!", + block_payload.block, peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received block payload + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::BLOCK_PAYLOAD_LABEL, + block_payload.block.round(), + ); +} + +/// Updates the metrics for the received commit decision message +fn update_metrics_for_commit_decision_message( + peer_network_id: PeerNetworkId, + commit_decision: &CommitDecision, +) { + // Log the received commit decision message + let log_message = format!( + "Received commit decision: {}, from peer: {}!", + commit_decision.proof_block_info(), + peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received commit decision + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::COMMIT_DECISION_LABEL, + commit_decision.round(), + ); +} + +/// Updates the metrics for the dropped commit decision message +fn update_metrics_for_dropped_commit_decision_message( + peer_network_id: PeerNetworkId, + commit_decision: &CommitDecision, +) { + // Increment the dropped message counter + metrics::increment_request_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::COMMITTED_BLOCKS_LABEL, + &peer_network_id, + ); + + // Log the dropped commit decision message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring commit decision message from peer: {:?}! Commit epoch and round: ({}, {})", + peer_network_id, + commit_decision.epoch(), + commit_decision.round() + )) + ); +} + +/// Updates the metrics for the received ordered block message +fn update_metrics_for_ordered_block_message( + peer_network_id: PeerNetworkId, + ordered_block: &OrderedBlock, +) { + // Log the received ordered block message + let log_message = format!( + "Received ordered block: {}, from peer: {}!", + ordered_block.proof_block_info(), + peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received ordered block + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::ORDERED_BLOCKS_LABEL, + ordered_block.proof_block_info().round(), + ); +} diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs index 7bb43fbc4108f..60aa56cf48d85 100644 --- a/consensus/src/consensus_observer/observer/ordered_blocks.rs +++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs @@ -19,6 +19,9 @@ pub struct OrderedBlockStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, + // The highest committed block (epoch and round) + highest_committed_epoch_round: Option<(u64, Round)>, + // Ordered blocks. The key is the epoch and round of the last block in the // ordered block. Each entry contains the block and the commit decision (if any). ordered_blocks: BTreeMap<(u64, Round), (OrderedBlock, Option)>, @@ -28,6 +31,7 @@ impl OrderedBlockStore { pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self { Self { consensus_observer_config, + highest_committed_epoch_round: None, ordered_blocks: BTreeMap::new(), } } @@ -44,6 +48,11 @@ impl OrderedBlockStore { self.ordered_blocks.clone() } + /// Returns the highest committed epoch and round (if any) + pub fn get_highest_committed_epoch_round(&self) -> Option<(u64, Round)> { + self.highest_committed_epoch_round + } + /// Returns the last ordered block (if any) pub fn get_last_ordered_block(&self) -> Option { self.ordered_blocks @@ -104,6 +113,9 @@ impl OrderedBlockStore { self.ordered_blocks = self .ordered_blocks .split_off(&(split_off_epoch, split_off_round)); + + // Update the highest committed epoch and round + self.update_highest_committed_epoch_round(commit_ledger_info); } /// Updates the commit decision of the ordered block (if found) @@ -119,6 +131,32 @@ impl OrderedBlockStore { { *existing_commit_decision = Some(commit_decision.clone()); } + + // Update the highest committed epoch and round + self.update_highest_committed_epoch_round(commit_decision.commit_proof()); + } + + /// Updates the highest committed epoch and round based on the commit ledger info + fn update_highest_committed_epoch_round( + &mut self, + commit_ledger_info: &LedgerInfoWithSignatures, + ) { + // Get the epoch and round of the commit ledger info + let commit_epoch = commit_ledger_info.ledger_info().epoch(); + let commit_round = commit_ledger_info.commit_info().round(); + let commit_epoch_round = (commit_epoch, commit_round); + + // Update the highest committed epoch and round (if appropriate) + match self.highest_committed_epoch_round { + Some(highest_committed_epoch_round) => { + if commit_epoch_round > highest_committed_epoch_round { + self.highest_committed_epoch_round = Some(commit_epoch_round); + } + }, + None => { + self.highest_committed_epoch_round = Some(commit_epoch_round); + }, + } } /// Updates the metrics for the ordered blocks @@ -154,6 +192,17 @@ impl OrderedBlockStore { metrics::ORDERED_BLOCKS_LABEL, highest_ordered_round, ); + + // Update the highest round for the committed blocks + let highest_committed_round = self + .highest_committed_epoch_round + .map(|(_, round)| round) + .unwrap_or(0); + metrics::set_gauge_with_label( + &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS, + metrics::COMMITTED_BLOCKS_LABEL, + highest_committed_round, + ); } } @@ -189,6 +238,91 @@ mod test { assert!(ordered_block_store.ordered_blocks.is_empty()); } + #[test] + fn test_get_highest_committed_epoch_round() { + // Create a new ordered block store + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + + // Verify that we have no highest committed epoch and round + assert!(ordered_block_store + .get_highest_committed_epoch_round() + .is_none()); + + // Insert several ordered blocks for the current epoch + let current_epoch = 10; + let num_ordered_blocks = 50; + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); + + // Create a commit decision for the first ordered block + let first_ordered_block = ordered_blocks.first().unwrap(); + let first_ordered_block_info = first_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the first ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the first ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info); + + // Create a commit decision for the last ordered block + let last_ordered_block = ordered_blocks.last().unwrap(); + let last_ordered_block_info = last_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the last ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the last ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Insert several ordered blocks for the next epoch + let next_epoch = current_epoch + 1; + let num_ordered_blocks = 10; + let ordered_blocks = + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); + + // Verify the highest committed epoch and round is still the last ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Create a commit decision for the first ordered block (in the next epoch) + let first_ordered_block = ordered_blocks.first().unwrap(); + let first_ordered_block_info = first_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the first ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the first ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info); + + // Create a commit decision for the last ordered block (in the next epoch) + let last_ordered_block = ordered_blocks.last().unwrap(); + let last_ordered_block_info = last_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Remove the ordered blocks for the commit decision + ordered_block_store.remove_blocks_for_commit(commit_decision.commit_proof()); + + // Verify the highest committed epoch and round is the last ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + } + #[test] fn test_get_last_ordered_block() { // Create a new ordered block store @@ -581,4 +715,19 @@ mod test { updated_commit_decision.as_ref().unwrap().clone() ); } + + /// Verifies the highest committed epoch and round matches the given block info + fn verify_highest_committed_epoch_round( + ordered_block_store: &OrderedBlockStore, + block_info: &BlockInfo, + ) { + // Verify the highest committed epoch and round is the block info + let highest_committed_epoch_round = ordered_block_store + .get_highest_committed_epoch_round() + .unwrap(); + assert_eq!( + highest_committed_epoch_round, + (block_info.epoch(), block_info.round()) + ); + } } From 6b06cad8e825f4ccfaeac38eb1808cf6e68d1c1c Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 06:33:59 -0400 Subject: [PATCH 09/36] [Consensus Observer] Handle duplicate payload messages. --- .../observer/consensus_observer.rs | 45 ++++++++++++++-- .../observer/payload_store.rs | 51 +++++++++++++++++++ 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index ce641e10457e6..fdd2763fd5765 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -321,13 +321,29 @@ impl ConsensusObserver { peer_network_id: PeerNetworkId, block_payload: BlockPayload, ) { - // Update the metrics for the received block payload - update_metrics_for_block_payload_message(peer_network_id, &block_payload); - // Get the epoch and round for the block let block_epoch = block_payload.block.epoch(); let block_round = block_payload.block.round(); + // Determine if the payload is behind the last ordered block, or it already exists + let last_ordered_block = self.get_last_ordered_block(); + let payload_out_of_date = + (block_epoch, block_round) <= (last_ordered_block.epoch(), last_ordered_block.round()); + let payload_exists = self + .block_payload_store + .lock() + .existing_payload_entry(&block_payload); + + // If the payload already exists, or is behind the last ordered block, we should ignore it + if payload_exists || payload_out_of_date { + // Update the metrics for the dropped block payload + update_metrics_for_dropped_block_payload_message(peer_network_id, &block_payload); + return; + } + + // Update the metrics for the received block payload + update_metrics_for_block_payload_message(peer_network_id, &block_payload); + // Verify the block payload digests if let Err(error) = block_payload.verify_payload_digests() { error!( @@ -899,6 +915,29 @@ fn update_metrics_for_commit_decision_message( ); } +/// Updates the metrics for the dropped block payload message +fn update_metrics_for_dropped_block_payload_message( + peer_network_id: PeerNetworkId, + block_payload: &BlockPayload, +) { + // Increment the dropped message counter + metrics::increment_request_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::BLOCK_PAYLOAD_LABEL, + &peer_network_id, + ); + + // Log the dropped block payload message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring block payload message from peer: {:?}! Block epoch and round: ({}, {})", + peer_network_id, + block_payload.block.epoch(), + block_payload.block.round() + )) + ); +} + /// Updates the metrics for the dropped commit decision message fn update_metrics_for_dropped_commit_decision_message( peer_network_id: PeerNetworkId, diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs index 8781595026194..edea188be6f0e 100644 --- a/consensus/src/consensus_observer/observer/payload_store.rs +++ b/consensus/src/consensus_observer/observer/payload_store.rs @@ -61,6 +61,16 @@ impl BlockPayloadStore { self.block_payloads.lock().clear(); } + /// Returns true iff we already have a payload entry for the given block + pub fn existing_payload_entry(&self, block_payload: &BlockPayload) -> bool { + // Get the epoch and round of the payload + let block_info = &block_payload.block; + let epoch_and_round = (block_info.epoch(), block_info.round()); + + // Check if a payload already exists in the store + self.block_payloads.lock().contains_key(&epoch_and_round) + } + /// Returns a reference to the block payloads pub fn get_block_payloads(&self) -> Arc>> { self.block_payloads.clone() @@ -399,6 +409,41 @@ mod test { check_num_verified_payloads(&block_payload_store, 0); } + #[test] + fn test_existing_payload_entry() { + // Create a new block payload store + let consensus_observer_config = ConsensusObserverConfig::default(); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + + // Create a new block payload + let epoch = 10; + let round = 100; + let block_payload = create_block_payload(epoch, round); + + // Check that the payload doesn't exist in the block payload store + assert!(!block_payload_store.existing_payload_entry(&block_payload)); + + // Insert the verified block payload into the block payload store + block_payload_store.insert_block_payload(block_payload.clone(), true); + + // Check that the payload now exists in the block payload store + assert!(block_payload_store.existing_payload_entry(&block_payload)); + + // Create another block payload + let epoch = 5; + let round = 101; + let block_payload = create_block_payload(epoch, round); + + // Check that the payload doesn't exist in the block payload store + assert!(!block_payload_store.existing_payload_entry(&block_payload)); + + // Insert the unverified block payload into the block payload store + block_payload_store.insert_block_payload(block_payload.clone(), false); + + // Check that the payload now exists in the block payload store + assert!(block_payload_store.existing_payload_entry(&block_payload)); + } + #[test] fn test_insert_block_payload() { // Create a new block payload store @@ -1040,6 +1085,12 @@ mod test { pipelined_blocks } + /// Creates a new block payload with the given epoch and round + fn create_block_payload(epoch: u64, round: Round) -> BlockPayload { + let block_info = BlockInfo::random_with_epoch(epoch, round); + BlockPayload::new(block_info, BlockTransactionPayload::empty()) + } + /// Checks the number of unverified payloads in the block payload store fn check_num_unverified_payloads( block_payload_store: &BlockPayloadStore, From f39c6b137208b2c76aa38ba1cd2dfdea7ec564c4 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 06:34:07 -0400 Subject: [PATCH 10/36] [Consensus Observer] Handle duplicate ordered block messages. --- .../observer/consensus_observer.rs | 55 ++++++++++++-- .../observer/ordered_blocks.rs | 14 ++++ .../observer/pending_blocks.rs | 72 +++++++++++++++++++ 3 files changed, 135 insertions(+), 6 deletions(-) diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index fdd2763fd5765..6939567dbb178 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -325,7 +325,7 @@ impl ConsensusObserver { let block_epoch = block_payload.block.epoch(); let block_round = block_payload.block.round(); - // Determine if the payload is behind the last ordered block, or it already exists + // Determine if the payload is behind the last ordered block, or if it already exists let last_ordered_block = self.get_last_ordered_block(); let payload_out_of_date = (block_epoch, block_round) <= (last_ordered_block.epoch(), last_ordered_block.round()); @@ -334,8 +334,8 @@ impl ConsensusObserver { .lock() .existing_payload_entry(&block_payload); - // If the payload already exists, or is behind the last ordered block, we should ignore it - if payload_exists || payload_out_of_date { + // If the payload is out of date or already exists, ignore it + if payload_out_of_date || payload_exists { // Update the metrics for the dropped block payload update_metrics_for_dropped_block_payload_message(peer_network_id, &block_payload); return; @@ -574,9 +574,6 @@ impl ConsensusObserver { peer_network_id: PeerNetworkId, ordered_block: OrderedBlock, ) { - // Update the metrics for the received ordered block - update_metrics_for_ordered_block_message(peer_network_id, &ordered_block); - // Verify the ordered blocks before processing if let Err(error) = ordered_block.verify_ordered_blocks() { error!( @@ -589,6 +586,29 @@ impl ConsensusObserver { return; }; + // Get the epoch and round of the first block + let first_block = ordered_block.first_block(); + let first_block_epoch_round = (first_block.epoch(), first_block.round()); + + // Determine if the block is behind the last ordered block, or if it is already pending + let last_ordered_block = self.get_last_ordered_block(); + let block_out_of_date = + first_block_epoch_round <= (last_ordered_block.epoch(), last_ordered_block.round()); + let block_pending = self + .pending_block_store + .lock() + .existing_pending_block(&ordered_block); + + // If the block is out of date or already pending, ignore it + if block_out_of_date || block_pending { + // Update the metrics for the dropped ordered block + update_metrics_for_dropped_ordered_block_message(peer_network_id, &ordered_block); + return; + } + + // Update the metrics for the received ordered block + update_metrics_for_ordered_block_message(peer_network_id, &ordered_block); + // If all payloads exist, process the block. Otherwise, store it // in the pending block store and wait for the payloads to arrive. if self.all_payloads_exist(ordered_block.blocks()) { @@ -961,6 +981,29 @@ fn update_metrics_for_dropped_commit_decision_message( ); } +/// Updates the metrics for the dropped ordered block message +fn update_metrics_for_dropped_ordered_block_message( + peer_network_id: PeerNetworkId, + ordered_block: &OrderedBlock, +) { + // Increment the dropped message counter + metrics::increment_request_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::ORDERED_BLOCKS_LABEL, + &peer_network_id, + ); + + // Log the dropped ordered block message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring ordered block message from peer: {:?}! Block epoch and round: ({}, {})", + peer_network_id, + ordered_block.proof_block_info().epoch(), + ordered_block.proof_block_info().round() + )) + ); +} + /// Updates the metrics for the received ordered block message fn update_metrics_for_ordered_block_message( peer_network_id: PeerNetworkId, diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs index 60aa56cf48d85..36af25939232e 100644 --- a/consensus/src/consensus_observer/observer/ordered_blocks.rs +++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs @@ -321,6 +321,20 @@ mod test { // Verify the highest committed epoch and round is the last ordered block (in the next epoch) verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Create a commit decision for an out-of-date ordered block + let out_of_date_ordered_block = ordered_blocks.first().unwrap(); + let out_of_date_ordered_block_info = out_of_date_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(out_of_date_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the out-of-date ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is still the last ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); } #[test] diff --git a/consensus/src/consensus_observer/observer/pending_blocks.rs b/consensus/src/consensus_observer/observer/pending_blocks.rs index d3ce297cd5fdb..2a7ebbde0519f 100644 --- a/consensus/src/consensus_observer/observer/pending_blocks.rs +++ b/consensus/src/consensus_observer/observer/pending_blocks.rs @@ -41,6 +41,17 @@ impl PendingBlockStore { self.blocks_without_payloads.clear(); } + /// Returns true iff the store contains an entry for the given ordered block + pub fn existing_pending_block(&self, ordered_block: &OrderedBlock) -> bool { + // Get the epoch and round of the first block + let first_block = ordered_block.first_block(); + let first_block_epoch_round = (first_block.epoch(), first_block.round()); + + // Check if the block is already in the store + self.blocks_without_payloads + .contains_key(&first_block_epoch_round) + } + /// Inserts a block (without payloads) into the store pub fn insert_pending_block(&mut self, ordered_block: OrderedBlock) { // Get the epoch and round of the first block @@ -238,6 +249,67 @@ mod test { .is_empty()); } + #[test] + fn test_existing_pending_block() { + // Create a new pending block store + let max_num_pending_blocks = 10; + let consensus_observer_config = ConsensusObserverConfig { + max_num_pending_blocks: max_num_pending_blocks as u64, + ..ConsensusObserverConfig::default() + }; + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + ConsensusObserverConfig::default(), + ))); + + // Insert the maximum number of blocks into the store + let current_epoch = 10; + let starting_round = 100; + let pending_blocks = create_and_add_pending_blocks( + pending_block_store.clone(), + max_num_pending_blocks, + current_epoch, + starting_round, + 5, + ); + + // Verify that all blocks were inserted correctly + for pending_block in &pending_blocks { + assert!(pending_block_store + .lock() + .existing_pending_block(pending_block)); + } + + // Create a new block payload store and insert payloads for the second block + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); + let second_block = pending_blocks[1].clone(); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); + + // Remove the second block (which is now ready) + let payload_round = second_block.first_block().round(); + let ready_block = pending_block_store.lock().remove_ready_block( + current_epoch, + payload_round, + block_payload_store.clone(), + ); + assert_eq!(ready_block, Some(second_block)); + + // Verify that the first and second blocks were removed + verify_pending_blocks( + pending_block_store.clone(), + max_num_pending_blocks - 2, + &pending_blocks[2..].to_vec(), + ); + + // Verify that the first and second blocks are no longer in the store + for pending_block in &pending_blocks[..2] { + assert!(!pending_block_store + .lock() + .existing_pending_block(pending_block)); + } + } + #[test] fn test_insert_pending_block() { // Create a new pending block store From b015ee41dfa89ae4fc95e4264aad1ada61ee310b Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 06:48:00 -0400 Subject: [PATCH 11/36] [Consensus Observer] Small renames and refactors. --- .../src/consensus_observer/common/metrics.rs | 6 ++-- .../network/observer_client.rs | 12 +++---- .../network/observer_message.rs | 32 +++++++++++++++---- .../observer/consensus_observer.rs | 30 +++++++++-------- .../observer/ordered_blocks.rs | 4 +-- .../observer/payload_store.rs | 12 +++---- .../observer/subscription_manager.rs | 4 +-- .../publisher/consensus_publisher.rs | 2 +- consensus/src/payload_manager.rs | 2 +- 9 files changed, 63 insertions(+), 41 deletions(-) diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs index e290d74640c70..0e91e1d9af702 100644 --- a/consensus/src/consensus_observer/common/metrics.rs +++ b/consensus/src/consensus_observer/common/metrics.rs @@ -16,7 +16,7 @@ pub const COMMIT_DECISION_LABEL: &str = "commit_decision"; pub const COMMITTED_BLOCKS_LABEL: &str = "committed_blocks"; pub const CREATED_SUBSCRIPTION_LABEL: &str = "created_subscription"; pub const ORDERED_BLOCK_ENTRIES_LABEL: &str = "ordered_block_entries"; -pub const ORDERED_BLOCKS_LABEL: &str = "ordered_blocks"; +pub const ORDERED_BLOCK_LABEL: &str = "ordered_block"; pub const PENDING_BLOCK_ENTRIES_LABEL: &str = "pending_block_entries"; pub const PENDING_BLOCKS_LABEL: &str = "pending_blocks"; pub const STORED_PAYLOADS_LABEL: &str = "stored_payloads"; @@ -191,8 +191,8 @@ pub static PUBLISHER_SENT_MESSAGES: Lazy = Lazy::new(|| { .unwrap() }); -/// Increments the given request counter with the provided values -pub fn increment_request_counter( +/// Increments the given counter with the provided values +pub fn increment_counter( counter: &Lazy, label: &str, peer_network_id: &PeerNetworkId, diff --git a/consensus/src/consensus_observer/network/observer_client.rs b/consensus/src/consensus_observer/network/observer_client.rs index a2f94ff44524f..33c4ce902af33 100644 --- a/consensus/src/consensus_observer/network/observer_client.rs +++ b/consensus/src/consensus_observer/network/observer_client.rs @@ -46,7 +46,7 @@ impl> message_label: &str, ) -> Result<(), Error> { // Increment the message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGES, message_label, peer_network_id, @@ -74,7 +74,7 @@ impl> .message(&format!("Failed to send message: {:?}", error))); // Update the direct send error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, @@ -125,7 +125,7 @@ impl> .message(&format!("Failed to serialize message: {:?}", error))); // Update the direct send error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, @@ -147,7 +147,7 @@ impl> let request_id = rand::thread_rng().gen(); // Increment the request counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_SENT_REQUESTS, request.get_label(), peer_network_id, @@ -174,7 +174,7 @@ impl> match result { Ok(consensus_observer_response) => { // Update the RPC success metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_RECEIVED_MESSAGE_RESPONSES, request_label, peer_network_id, @@ -192,7 +192,7 @@ impl> .error(&error)); // Update the RPC error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, diff --git a/consensus/src/consensus_observer/network/observer_message.rs b/consensus/src/consensus_observer/network/observer_message.rs index 6c68384cda32e..6ecb14d7995de 100644 --- a/consensus/src/consensus_observer/network/observer_message.rs +++ b/consensus/src/consensus_observer/network/observer_message.rs @@ -312,8 +312,8 @@ impl CommitDecision { /// The transaction payload and proof of each block #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PayloadWithProof { - pub transactions: Vec, - pub proofs: Vec, + transactions: Vec, + proofs: Vec, } impl PayloadWithProof { @@ -337,8 +337,8 @@ impl PayloadWithProof { /// The transaction payload and proof of each block with a transaction limit #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PayloadWithProofAndLimit { - pub payload_with_proof: PayloadWithProof, - pub transaction_limit: Option, + payload_with_proof: PayloadWithProof, + transaction_limit: Option, } impl PayloadWithProofAndLimit { @@ -629,8 +629,8 @@ impl BlockTransactionPayload { /// Payload message contains the block and transaction payload #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct BlockPayload { - pub block: BlockInfo, - pub transaction_payload: BlockTransactionPayload, + block: BlockInfo, + transaction_payload: BlockTransactionPayload, } impl BlockPayload { @@ -641,6 +641,26 @@ impl BlockPayload { } } + /// Returns a reference to the block info + pub fn block(&self) -> &BlockInfo { + &self.block + } + + /// Returns the epoch of the block info + pub fn epoch(&self) -> u64 { + self.block.epoch() + } + + /// Returns the round of the block info + pub fn round(&self) -> Round { + self.block.round() + } + + /// Returns a reference to the block transaction payload + pub fn transaction_payload(&self) -> &BlockTransactionPayload { + &self.transaction_payload + } + /// Verifies the block payload digests and returns an error if the data is invalid pub fn verify_payload_digests(&self) -> Result<(), Error> { // Verify the proof of store digests against the transaction diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index 6939567dbb178..e1468748a781c 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -322,8 +322,8 @@ impl ConsensusObserver { block_payload: BlockPayload, ) { // Get the epoch and round for the block - let block_epoch = block_payload.block.epoch(); - let block_round = block_payload.block.round(); + let block_epoch = block_payload.epoch(); + let block_round = block_payload.round(); // Determine if the payload is behind the last ordered block, or if it already exists let last_ordered_block = self.get_last_ordered_block(); @@ -349,7 +349,8 @@ impl ConsensusObserver { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to verify block payload digests! Ignoring block: {:?}. Error: {:?}", - block_payload.block, error + block_payload.block(), + error )) ); return; @@ -363,7 +364,7 @@ impl ConsensusObserver { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to verify block payload signatures! Ignoring block: {:?}. Error: {:?}", - block_payload.block, error + block_payload.block(), error )) ); return; @@ -543,7 +544,7 @@ impl ConsensusObserver { } // Increment the received message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_RECEIVED_MESSAGES, message.get_label(), &peer_network_id, @@ -902,7 +903,8 @@ fn update_metrics_for_block_payload_message( // Log the received block payload message let log_message = format!( "Received block payload: {}, from peer: {}!", - block_payload.block, peer_network_id + block_payload.block(), + peer_network_id ); log_received_message(log_message); @@ -910,7 +912,7 @@ fn update_metrics_for_block_payload_message( metrics::set_gauge_with_label( &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, metrics::BLOCK_PAYLOAD_LABEL, - block_payload.block.round(), + block_payload.round(), ); } @@ -941,7 +943,7 @@ fn update_metrics_for_dropped_block_payload_message( block_payload: &BlockPayload, ) { // Increment the dropped message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_DROPPED_MESSAGES, metrics::BLOCK_PAYLOAD_LABEL, &peer_network_id, @@ -952,8 +954,8 @@ fn update_metrics_for_dropped_block_payload_message( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Ignoring block payload message from peer: {:?}! Block epoch and round: ({}, {})", peer_network_id, - block_payload.block.epoch(), - block_payload.block.round() + block_payload.epoch(), + block_payload.round() )) ); } @@ -964,7 +966,7 @@ fn update_metrics_for_dropped_commit_decision_message( commit_decision: &CommitDecision, ) { // Increment the dropped message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_DROPPED_MESSAGES, metrics::COMMITTED_BLOCKS_LABEL, &peer_network_id, @@ -987,9 +989,9 @@ fn update_metrics_for_dropped_ordered_block_message( ordered_block: &OrderedBlock, ) { // Increment the dropped message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_DROPPED_MESSAGES, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, &peer_network_id, ); @@ -1020,7 +1022,7 @@ fn update_metrics_for_ordered_block_message( // Update the metrics for the received ordered block metrics::set_gauge_with_label( &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, ordered_block.proof_block_info().round(), ); } diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs index 36af25939232e..a2408b3a4b20d 100644 --- a/consensus/src/consensus_observer/observer/ordered_blocks.rs +++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs @@ -177,7 +177,7 @@ impl OrderedBlockStore { .sum(); metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, num_ordered_blocks, ); @@ -189,7 +189,7 @@ impl OrderedBlockStore { .unwrap_or(0); metrics::set_gauge_with_label( &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, highest_ordered_round, ); diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs index edea188be6f0e..59859ec0b82ea 100644 --- a/consensus/src/consensus_observer/observer/payload_store.rs +++ b/consensus/src/consensus_observer/observer/payload_store.rs @@ -64,8 +64,7 @@ impl BlockPayloadStore { /// Returns true iff we already have a payload entry for the given block pub fn existing_payload_entry(&self, block_payload: &BlockPayload) -> bool { // Get the epoch and round of the payload - let block_info = &block_payload.block; - let epoch_and_round = (block_info.epoch(), block_info.round()); + let epoch_and_round = (block_payload.epoch(), block_payload.round()); // Check if a payload already exists in the store self.block_payloads.lock().contains_key(&epoch_and_round) @@ -88,14 +87,15 @@ impl BlockPayloadStore { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Exceeded the maximum number of payloads: {:?}. Dropping block: {:?}!", - max_num_pending_blocks, block_payload.block, + max_num_pending_blocks, + block_payload.block(), )) ); return; // Drop the block if we've exceeded the maximum } // Create the new payload status - let epoch_and_round = (block_payload.block.epoch(), block_payload.block.round()); + let epoch_and_round = (block_payload.epoch(), block_payload.round()); let payload_status = if verified_payload_signatures { BlockPayloadStatus::AvailableAndVerified(block_payload) } else { @@ -171,7 +171,7 @@ impl BlockPayloadStore { // Get the block transaction payload let transaction_payload = match entry.get() { BlockPayloadStatus::AvailableAndVerified(block_payload) => { - &block_payload.transaction_payload + block_payload.transaction_payload() }, BlockPayloadStatus::AvailableAndUnverified(_) => { // The payload should have already been verified @@ -261,7 +261,7 @@ impl BlockPayloadStore { // Collect the rounds of all newly verified blocks let verified_payload_rounds: Vec = verified_payloads_to_update .iter() - .map(|block_payload| block_payload.block.round()) + .map(|block_payload| block_payload.round()) .collect(); // Update the verified block payloads. Note: this will cause diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index 2f124e5841cd3..8f70fe21b9261 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -360,7 +360,7 @@ impl SubscriptionManager { ); // Update the number of created subscriptions - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, metrics::CREATED_SUBSCRIPTION_LABEL, &peer_network_id, @@ -381,7 +381,7 @@ impl SubscriptionManager { ); // Update the number of terminated subscriptions - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, error.get_label(), &peer_network_id, diff --git a/consensus/src/consensus_observer/publisher/consensus_publisher.rs b/consensus/src/consensus_observer/publisher/consensus_publisher.rs index 11e2f63aa92de..1379c87131cc5 100644 --- a/consensus/src/consensus_observer/publisher/consensus_publisher.rs +++ b/consensus/src/consensus_observer/publisher/consensus_publisher.rs @@ -150,7 +150,7 @@ impl ConsensusPublisher { let (peer_network_id, message, response_sender) = network_message.into_parts(); // Update the RPC request counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_RECEIVED_REQUESTS, message.get_label(), &peer_network_id, diff --git a/consensus/src/payload_manager.rs b/consensus/src/payload_manager.rs index 4749efb10c643..c2e7c580fb9b3 100644 --- a/consensus/src/payload_manager.rs +++ b/consensus/src/payload_manager.rs @@ -471,7 +471,7 @@ async fn get_transactions_for_observer( }; // If the payload is valid, publish it to any downstream observers - let transaction_payload = block_payload.transaction_payload; + let transaction_payload = block_payload.transaction_payload(); if let Some(consensus_publisher) = consensus_publisher { let message = ConsensusObserverMessage::new_block_payload_message( block.gen_block_info(HashValue::zero(), 0, None), From 9e4556c330a8bf6a82a4a2c43ba23b4defefcb53 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Sun, 8 Sep 2024 07:00:45 -0400 Subject: [PATCH 12/36] [Consensus Observer] Support multiple subscriptions. --- .../src/config/consensus_observer_config.rs | 5 +- .../src/consensus_observer/common/error.rs | 4 + .../src/consensus_observer/common/metrics.rs | 27 +- .../observer/consensus_observer.rs | 32 +- .../observer/subscription.rs | 426 ++++---- .../observer/subscription_manager.rs | 976 +++++++++++++----- 6 files changed, 1002 insertions(+), 468 deletions(-) diff --git a/config/src/config/consensus_observer_config.rs b/config/src/config/consensus_observer_config.rs index 8d930cf17c8d3..0ca55c31d50e9 100644 --- a/config/src/config/consensus_observer_config.rs +++ b/config/src/config/consensus_observer_config.rs @@ -30,6 +30,8 @@ pub struct ConsensusObserverConfig { /// Interval (in milliseconds) to garbage collect peer state pub garbage_collection_interval_ms: u64, + /// The maximum number of concurrent subscriptions + pub max_concurrent_subscriptions: u64, /// Maximum number of blocks to keep in memory (e.g., pending blocks, ordered blocks, etc.) pub max_num_pending_blocks: u64, /// Maximum timeout (in milliseconds) for active subscriptions @@ -52,8 +54,9 @@ impl Default for ConsensusObserverConfig { publisher_enabled: false, max_network_channel_size: 1000, max_parallel_serialization_tasks: num_cpus::get(), // Default to the number of CPUs - network_request_timeout_ms: 10_000, // 10 seconds + network_request_timeout_ms: 5_000, // 5 seconds garbage_collection_interval_ms: 60_000, // 60 seconds + max_concurrent_subscriptions: 2, // 2 streams should be sufficient max_num_pending_blocks: 100, // 100 blocks max_subscription_timeout_ms: 30_000, // 30 seconds max_synced_version_timeout_ms: 60_000, // 60 seconds diff --git a/consensus/src/consensus_observer/common/error.rs b/consensus/src/consensus_observer/common/error.rs index 37a516d10115c..7fc6a78785a96 100644 --- a/consensus/src/consensus_observer/common/error.rs +++ b/consensus/src/consensus_observer/common/error.rs @@ -21,6 +21,9 @@ pub enum Error { #[error("Subscription progress stopped: {0}")] SubscriptionProgressStopped(String), + #[error("Subscriptions reset: {0}")] + SubscriptionsReset(String), + #[error("Subscription suboptimal: {0}")] SubscriptionSuboptimal(String), @@ -40,6 +43,7 @@ impl Error { Self::RpcError(_) => "rpc_error", Self::SubscriptionDisconnected(_) => "subscription_disconnected", Self::SubscriptionProgressStopped(_) => "subscription_progress_stopped", + Self::SubscriptionsReset(_) => "subscriptions_reset", Self::SubscriptionSuboptimal(_) => "subscription_suboptimal", Self::SubscriptionTimeout(_) => "subscription_timeout", Self::UnexpectedError(_) => "unexpected_error", diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs index 0e91e1d9af702..5888bbfcaca26 100644 --- a/consensus/src/consensus_observer/common/metrics.rs +++ b/consensus/src/consensus_observer/common/metrics.rs @@ -5,8 +5,8 @@ use aptos_config::network_id::{NetworkId, PeerNetworkId}; use aptos_metrics_core::{ - register_histogram_vec, register_int_counter_vec, register_int_gauge_vec, HistogramVec, - IntCounterVec, IntGaugeVec, + register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge_vec, + HistogramVec, IntCounter, IntCounterVec, IntGaugeVec, }; use once_cell::sync::Lazy; @@ -31,6 +31,14 @@ pub static OBSERVER_CREATED_SUBSCRIPTIONS: Lazy = Lazy::new(|| { .unwrap() }); +/// Counter for tracking the number of times the block state was cleared by the consensus observer +pub static OBSERVER_CLEARED_BLOCK_STATE: Lazy = Lazy::new(|| { + register_int_counter!( + "consensus_observer_cleared_block_state", + "Counter for tracking the number of times the block state was cleared by the consensus observer", + ).unwrap() +}); + /// Counter for tracking dropped (direct send) messages by the consensus observer pub static OBSERVER_DROPPED_MESSAGES: Lazy = Lazy::new(|| { register_int_counter_vec!( @@ -41,6 +49,16 @@ pub static OBSERVER_DROPPED_MESSAGES: Lazy = Lazy::new(|| { .unwrap() }); +/// Counter for tracking rejected (direct send) messages by the consensus observer +pub static OBSERVER_REJECTED_MESSAGES: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "consensus_observer_rejected_messages", + "Counters related to rejected (direct send) messages by the consensus observer", + &["message_type", "network_id"] + ) + .unwrap() +}); + /// Gauge for tracking the number of active subscriptions for the consensus observer pub static OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS: Lazy = Lazy::new(|| { register_int_gauge_vec!( @@ -203,6 +221,11 @@ pub fn increment_counter( .inc(); } +/// Increments the given counter without labels +pub fn increment_counter_without_labels(counter: &Lazy) { + counter.inc(); +} + /// Observes the value for the provided histogram and label pub fn observe_value_with_label( histogram: &Lazy, diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index e1468748a781c..032a3fa38f8bc 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -85,7 +85,7 @@ pub struct ConsensusObserver { // The flag indicates if we're waiting to transition to a new epoch. sync_handle: Option<(DropGuard, bool)>, - // The subscription manager + // The consensus observer subscription manager subscription_manager: SubscriptionManager, } @@ -165,13 +165,15 @@ impl ConsensusObserver { return; } - // Otherwise, check the health of the active subscription - let new_subscription_created = self + // Otherwise, check the health of the active subscriptions + if let Err(error) = self .subscription_manager .check_and_manage_subscriptions() - .await; - if new_subscription_created { - // Clear the pending block state (a new subscription was created) + .await + { + // Log the failure and clear the pending block state + warn!(LogSchema::new(LogEntry::ConsensusObserver) + .message(&format!("Subscription checks failed! Error: {:?}", error))); self.clear_pending_block_state().await; } } @@ -198,6 +200,9 @@ impl ConsensusObserver { )) ); } + + // Increment the cleared block state counter + metrics::increment_counter_without_labels(&metrics::OBSERVER_CLEARED_BLOCK_STATE); } /// Finalizes the ordered block by sending it to the execution pipeline @@ -528,18 +533,25 @@ impl ConsensusObserver { // Unpack the network message let (peer_network_id, message) = network_message.into_parts(); - // Verify the message is from the peer we've subscribed to + // Verify the message is from the peers we've subscribed to if let Err(error) = self .subscription_manager - .verify_message_sender(peer_network_id) + .verify_message_for_subscription(peer_network_id) { + // Increment the rejected message counter + metrics::increment_counter( + &metrics::OBSERVER_REJECTED_MESSAGES, + message.get_label(), + &peer_network_id, + ); + + // Log the error and return warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Message failed subscription sender verification! Error: {:?}", + "Received message that was not from an active subscription! Error: {:?}", error, )) ); - return; } diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs index fe29aa6a5a577..d3023da292d00 100644 --- a/consensus/src/consensus_observer/observer/subscription.rs +++ b/consensus/src/consensus_observer/observer/subscription.rs @@ -31,7 +31,7 @@ pub struct ConsensusObserverSubscription { // The peer network id of the active subscription peer_network_id: PeerNetworkId, - // The timestamp of the last message received from the peer + // The timestamp of the last message received for the subscription last_message_receive_time: Instant, // The timestamp and connected peers for the last optimality check @@ -71,7 +71,7 @@ impl ConsensusObserverSubscription { /// last check; or (ii) enough time has elapsed to force a refresh. pub fn check_subscription_peer_optimality( &mut self, - peers_and_metadata: HashMap, + peers_and_metadata: &HashMap, ) -> Result<(), Error> { // Get the last optimality check time and connected peers let (last_optimality_check_time, last_optimality_check_peers) = @@ -106,16 +106,20 @@ impl ConsensusObserverSubscription { self.last_optimality_check_time_and_peers = (time_now, current_connected_peers); // Sort the peers by subscription optimality - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - - // Verify that we're subscribed to the most optimal peer - if let Some(optimal_peer) = sorted_peers.first() { - if *optimal_peer != self.peer_network_id { - return Err(Error::SubscriptionSuboptimal(format!( - "Subscription to peer: {} is no longer optimal! New optimal peer: {}", - self.peer_network_id, optimal_peer - ))); - } + let sorted_peers = sort_peers_by_subscription_optimality(peers_and_metadata); + + // Verify that this peer is one of the most optimal peers + let max_concurrent_subscriptions = + self.consensus_observer_config.max_concurrent_subscriptions as usize; + if !sorted_peers + .iter() + .take(max_concurrent_subscriptions) + .any(|peer| peer == &self.peer_network_id) + { + return Err(Error::SubscriptionSuboptimal(format!( + "Subscription to peer: {} is no longer optimal! New optimal peers: {:?}", + self.peer_network_id, sorted_peers + ))); } Ok(()) @@ -180,25 +184,9 @@ impl ConsensusObserverSubscription { Ok(()) } - /// Returns the peer network id of the subscription - pub fn get_peer_network_id(&self) -> PeerNetworkId { - self.peer_network_id - } - - /// Verifies the given message is from the expected peer - pub fn verify_message_sender(&mut self, peer_network_id: &PeerNetworkId) -> Result<(), Error> { - // Verify the message is from the expected peer - if self.peer_network_id != *peer_network_id { - return Err(Error::UnexpectedError(format!( - "Received message from unexpected peer: {}! Subscribed to: {}", - peer_network_id, self.peer_network_id - ))); - } - - // Update the last message receive time + /// Updates the last message receive time to the current time + pub fn update_last_message_receive_time(&mut self) { self.last_message_receive_time = self.time_service.now(); - - Ok(()) } } @@ -346,6 +334,7 @@ mod test { }; use aptos_storage_interface::Result; use aptos_types::{network_address::NetworkAddress, transaction::Version}; + use claims::assert_matches; use mockall::mock; // This is a simple mock of the DbReader (it generates a MockDatabaseReader) @@ -357,12 +346,12 @@ mod test { } #[test] - fn check_subscription_peer_optimality() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_peer_optimality_single() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -372,46 +361,27 @@ mod test { ); // Verify the time and peers for the last optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, time_service.now()); - assert!(last_check_peers.is_empty()); + let mock_time_service = time_service.into_mock(); + verify_last_check_time_and_peers(&subscription, mock_time_service.now(), HashSet::new()); // Create a peers and metadata map for the subscription let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( - peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); // Add a more optimal peer to the set of peers let new_optimal_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - new_optimal_peer, - PeerMetadata::new_for_test( - create_connection_metadata(new_optimal_peer, true), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); // Verify that the peer is optimal (not enough time has elapsed to check) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse some amount of time (but not enough to check optimality) - let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.subscription_peer_change_interval_ms / 2, )); // Verify that the peer is still optimal (not enough time has elapsed to check) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse enough time to check the peer optimality mock_time_service.advance(Duration::from_millis( @@ -419,17 +389,13 @@ mod test { )); // Verify that the peer is no longer optimal (a more optimal peer has been added) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Verify the time of the last peer optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, mock_time_service.now()); - assert_eq!( - last_check_peers, - peers_and_metadata.keys().cloned().collect() + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), ); // Elapse enough time to check the peer optimality @@ -438,35 +404,29 @@ mod test { )); // Verify that the peer is now optimal (the peers haven't changed) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Remove the current peer from the list of peers peers_and_metadata.remove(&peer_network_id); // Verify that the peer is not optimal (the peers have changed) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Verify the time of the last peer optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, mock_time_service.now()); - assert_eq!( - last_check_peers, - peers_and_metadata.keys().cloned().collect() + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), ); } #[test] - fn check_subscription_peer_refresh() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_peer_optimality_multiple() { + // Create a consensus observer config with a maximum of 2 subscriptions + let consensus_observer_config = create_observer_config(2); // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -477,33 +437,73 @@ mod test { // Create a peers and metadata map for the subscription let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Add a more optimal peer to the set of peers + let new_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); + + // Elapse enough time to check the peer optimality + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is optimal (it's in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); + + // Add another more optimal peer to the set of peers + let another_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, another_optimal_peer, true, true); + + // Elapse enough time to check the peer optimality + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is no longer optimal (it's not in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); + + // Remove the previous optimal peer from the list of peers + peers_and_metadata.remove(&new_optimal_peer); + + // Elapse enough time to check the peer optimality + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is optimal (it's in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); + } + + #[test] + fn test_check_subscription_peer_refresh() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), + time_service.clone(), ); + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + // Verify that the peer is optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add a more optimal peer to the set of peers let new_optimal_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - new_optimal_peer, - PeerMetadata::new_for_test( - create_connection_metadata(new_optimal_peer, true), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); // Verify that the peer is still optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse enough time to refresh optimality let mock_time_service = time_service.into_mock(); @@ -512,9 +512,7 @@ mod test { )); // Verify that the peer is no longer optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Elapse some amount of time (but not enough to refresh) mock_time_service.advance(Duration::from_millis( @@ -522,9 +520,7 @@ mod test { )); // Verify that the peer is now optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Remove the more optimal peer from the list of peers peers_and_metadata.remove(&new_optimal_peer); @@ -535,23 +531,23 @@ mod test { )); // Verify that the peer is optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Verify the time of the last peer optimality check - let current_time = mock_time_service.now(); - let (last_check_time, _) = subscription.last_optimality_check_time_and_peers; - assert_eq!(last_check_time, current_time); + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), + ); } #[test] - fn check_subscription_peer_optimality_supported() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_peer_optimality_supported() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -562,13 +558,7 @@ mod test { // Insert empty metadata for the subscription peer let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( - peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); // Elapse enough time to check optimality let mock_time_service = time_service.into_mock(); @@ -577,19 +567,11 @@ mod test { )); // Verify that the peer is still optimal (there are no other peers) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add a more optimal peer without consensus observer support let unsupported_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - unsupported_peer, - PeerMetadata::new_for_test( - create_connection_metadata(unsupported_peer, false), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, unsupported_peer, false, false); // Elapse enough time to check optimality mock_time_service.advance(Duration::from_millis( @@ -597,19 +579,11 @@ mod test { )); // Verify that the peer is still optimal (the unsupported peer is ignored) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add another more optimal peer with consensus observer support let supported_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - supported_peer, - PeerMetadata::new_for_test( - create_connection_metadata(supported_peer, true), - PeerMonitoringMetadata::new(Some(0.01), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, supported_peer, true, true); // Elapse enough time to check optimality mock_time_service.advance(Duration::from_millis( @@ -617,9 +591,7 @@ mod test { )); // Verify that the peer is no longer optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); } #[test] @@ -637,7 +609,7 @@ mod test { // Verify that the subscription has not timed out and that the last message time is updated let current_time = time_service.now(); - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); assert_eq!(subscription.last_message_receive_time, current_time); // Elapse some amount of time (but not enough to timeout) @@ -647,17 +619,15 @@ mod test { )); // Verify that the subscription has not timed out - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); - // Verify a new message is received successfully and that the last message time is updated + // Update the last message receive time let current_time = mock_time_service.now(); - subscription - .verify_message_sender(&peer_network_id) - .unwrap(); + subscription.update_last_message_receive_time(); assert_eq!(subscription.last_message_receive_time, current_time); // Verify that the subscription has not timed out - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); // Elapse enough time to timeout the subscription mock_time_service.advance(Duration::from_millis( @@ -665,7 +635,7 @@ mod test { )); // Verify that the subscription has timed out - assert!(subscription.check_subscription_timeout().is_err()); + verify_subscription_time_out(&subscription, true); } #[test] @@ -694,25 +664,23 @@ mod test { ); // Verify that the DB is making sync progress and that the highest synced version is updated - let current_time = time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (first_synced_version, current_time) + let mock_time_service = time_service.into_mock(); + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), ); // Elapse some amount of time (not enough to timeout) - let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_synced_version_timeout_ms / 2, )); // Verify that the DB is still making sync progress - let current_time = mock_time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (first_synced_version, current_time) + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), ); // Elapse enough time to timeout the subscription @@ -721,11 +689,10 @@ mod test { )); // Verify that the DB is still making sync progress (the next version is higher) - let current_time = mock_time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (second_synced_version, current_time) + verify_subscription_syncing_progress( + &mut subscription, + second_synced_version, + mock_time_service.now(), ); // Elapse enough time to timeout the subscription @@ -734,11 +701,14 @@ mod test { )); // Verify that the DB is not making sync progress and that the subscription has timed out - assert!(subscription.check_syncing_progress().is_err()); + assert_matches!( + subscription.check_syncing_progress(), + Err(Error::SubscriptionProgressStopped(_)) + ); } #[test] - fn test_verify_message_sender() { + fn test_update_last_message_receive_time() { // Create a new observer subscription let consensus_observer_config = ConsensusObserverConfig::default(); let peer_network_id = PeerNetworkId::random(); @@ -750,28 +720,18 @@ mod test { time_service.clone(), ); - // Verify that the message sender is valid - let current_time = time_service.now(); - assert!(subscription.verify_message_sender(&peer_network_id).is_ok()); - assert_eq!(subscription.last_message_receive_time, current_time); + // Verify the initial last message time + assert_eq!(subscription.last_message_receive_time, time_service.now()); // Elapse some amount of time let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_secs(10)); - // Verify that the message sender is not the expected peer - let other_peer_network_id = PeerNetworkId::random(); - assert!(subscription - .verify_message_sender(&other_peer_network_id) - .is_err()); - assert_eq!(subscription.last_message_receive_time, current_time); - - // Elapse more time - mock_time_service.advance(Duration::from_secs(10)); - - // Verify that the message sender is the expected peer and that the last message time is updated + // Update the last message time let current_time = mock_time_service.now(); - assert!(subscription.verify_message_sender(&peer_network_id).is_ok()); + subscription.update_last_message_receive_time(); + + // Verify that the last message time is updated assert_eq!(subscription.last_message_receive_time, current_time); } @@ -886,6 +846,26 @@ mod test { assert_eq!(sorted_peers, vec![*supported_peer]); } + /// Adds metadata for the specified peer to the map of peers and metadata + fn add_metadata_for_peer( + peers_and_metadata: &mut HashMap, + peer_network_id: PeerNetworkId, + support_consensus_observer: bool, + set_ping_latency: bool, + ) { + // Determine the ping latency to use for the peer + let average_ping_latency = if set_ping_latency { Some(0.1) } else { None }; + + // Add the peer and metadata to the map + peers_and_metadata.insert( + peer_network_id, + PeerMetadata::new_for_test( + create_connection_metadata(peer_network_id, support_consensus_observer), + PeerMonitoringMetadata::new(average_ping_latency, None, None, None, None), + ), + ); + } + /// Creates a new connection metadata for testing fn create_connection_metadata( peer_network_id: PeerNetworkId, @@ -913,6 +893,14 @@ mod test { } } + /// Creates a consensus observer config with the given max concurrent subscriptions + fn create_observer_config(max_concurrent_subscriptions: u64) -> ConsensusObserverConfig { + ConsensusObserverConfig { + max_concurrent_subscriptions, + ..ConsensusObserverConfig::default() + } + } + /// Creates a new peer and metadata for testing fn create_peer_and_metadata( latency: Option, @@ -991,4 +979,62 @@ mod test { previous_distance = distance; } } + + /// Verifies that the last check time and peers are as expected + fn verify_last_check_time_and_peers( + subscription: &ConsensusObserverSubscription, + expected_last_check_time: Instant, + expected_last_check_peers: HashSet, + ) { + // Get the last check time and peers from the subscription + let (last_check_time, last_check_peers) = + subscription.last_optimality_check_time_and_peers.clone(); + + // Verify the last check time and peers match the expected values + assert_eq!(last_check_time, expected_last_check_time); + assert_eq!(last_check_peers, expected_last_check_peers); + } + + /// Verifies that the subscription time out matches the expected value + fn verify_subscription_time_out(subscription: &ConsensusObserverSubscription, timed_out: bool) { + // Check if the subscription has timed out + let result = subscription.check_subscription_timeout(); + + // Verify the result + if timed_out { + assert_matches!(result, Err(Error::SubscriptionTimeout(_))); + } else { + assert!(result.is_ok()); + } + } + + /// Verifies that the peer optimality matches the expected value + fn verify_subscription_peer_optimality( + subscription: &mut ConsensusObserverSubscription, + peers_and_metadata: &HashMap, + is_optimal: bool, + ) { + // Check the subscription peer optimality + let result = subscription.check_subscription_peer_optimality(peers_and_metadata); + + // Verify the result + if is_optimal { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionSuboptimal(_))); + } + } + + /// Verifies that the syncing progress is as expected + fn verify_subscription_syncing_progress( + subscription: &mut ConsensusObserverSubscription, + first_synced_version: Version, + time: Instant, + ) { + assert!(subscription.check_syncing_progress().is_ok()); + assert_eq!( + subscription.highest_synced_version_and_time, + (first_synced_version, time) + ); + } } diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index 8f70fe21b9261..e63fdfc68fa23 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -21,12 +21,13 @@ use aptos_logger::{error, info, warn}; use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata}; use aptos_storage_interface::DbReader; use aptos_time_service::TimeService; +use itertools::Itertools; use std::{collections::HashMap, sync::Arc}; /// The manager for consensus observer subscriptions pub struct SubscriptionManager { - // The currently active consensus observer subscription - active_observer_subscription: Option, + // The currently active set of consensus observer subscriptions + active_observer_subscriptions: HashMap, // The consensus observer client to send network messages consensus_observer_client: @@ -56,7 +57,7 @@ impl SubscriptionManager { time_service: TimeService, ) -> Self { Self { - active_observer_subscription: None, + active_observer_subscriptions: HashMap::new(), consensus_observer_client, consensus_observer_config, consensus_publisher, @@ -65,244 +66,356 @@ impl SubscriptionManager { } } - /// Checks if the active subscription is still healthy. If not, an error is returned. - fn check_active_subscription(&mut self) -> Result<(), Error> { - let active_observer_subscription = self.active_observer_subscription.take(); - if let Some(mut active_subscription) = active_observer_subscription { - // Check if the peer for the subscription is still connected - let peer_network_id = active_subscription.get_peer_network_id(); - let peer_still_connected = self - .get_connected_peers_and_metadata() - .map_or(false, |peers_and_metadata| { - peers_and_metadata.contains_key(&peer_network_id) - }); - - // Verify the peer is still connected - if !peer_still_connected { - return Err(Error::SubscriptionDisconnected( - "The peer is no longer connected!".to_string(), - )); - } + /// Checks if the subscription to the given peer is still healthy. + /// If not, an error explaining why it is unhealthy is returned. + fn check_subscription_health( + &mut self, + connected_peers_and_metadata: &HashMap, + peer_network_id: PeerNetworkId, + ) -> Result<(), Error> { + match self.active_observer_subscriptions.get_mut(&peer_network_id) { + Some(active_subscription) => { + // Verify the peer is still connected + if !connected_peers_and_metadata.contains_key(&peer_network_id) { + return Err(Error::SubscriptionDisconnected(format!( + "The peer: {:?} is no longer connected!", + peer_network_id + ))); + } - // Verify the subscription has not timed out - active_subscription.check_subscription_timeout()?; + // Verify the subscription has not timed out + active_subscription.check_subscription_timeout()?; - // Verify that the DB is continuing to sync and commit new data - active_subscription.check_syncing_progress()?; + // Verify that the DB is continuing to sync and commit new data + active_subscription.check_syncing_progress()?; - // Verify that the subscription peer is optimal - if let Some(peers_and_metadata) = self.get_connected_peers_and_metadata() { - active_subscription.check_subscription_peer_optimality(peers_and_metadata)?; - } + // Verify that the subscription peer is still optimal + active_subscription + .check_subscription_peer_optimality(connected_peers_and_metadata)?; - // The subscription seems healthy, we can keep it - self.active_observer_subscription = Some(active_subscription); + // The subscription seems healthy + Ok(()) + }, + None => Err(Error::UnexpectedError(format!( + "The subscription to peer: {:?} is not active!", + peer_network_id + ))), } - - Ok(()) } - /// Checks the health of the active subscription. If the subscription is - /// unhealthy, it will be terminated and a new subscription will be created. - /// This returns true iff a new subscription was created. - pub async fn check_and_manage_subscriptions(&mut self) -> bool { - // Get the peer ID of the currently active subscription (if any) - let active_subscription_peer = self - .active_observer_subscription - .as_ref() - .map(|subscription| subscription.get_peer_network_id()); - - // If we have an active subscription, verify that the subscription - // is still healthy. If not, the subscription should be terminated. - if let Some(active_subscription_peer) = active_subscription_peer { - if let Err(error) = self.check_active_subscription() { - // Log the subscription termination - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Terminating subscription to peer: {:?}! Error: {:?}", - active_subscription_peer, error - )) - ); - - // Unsubscribe from the peer - self.unsubscribe_from_peer(active_subscription_peer); + /// Checks the health of the active subscriptions. If any subscription is + /// unhealthy, it will be terminated and new subscriptions will be created. + /// This returns an error iff all subscriptions were unhealthy and terminated. + pub async fn check_and_manage_subscriptions(&mut self) -> Result<(), Error> { + // Get the subscription and connected peers + let initial_subscription_peers = self.get_active_subscription_peers(); + let connected_peers_and_metadata = self.get_connected_peers_and_metadata(); + + // Terminate any unhealthy subscriptions + let terminated_subscriptions = + self.terminate_unhealthy_subscriptions(&connected_peers_and_metadata); + + // Check if all subscriptions were terminated + let num_terminated_subscriptions = terminated_subscriptions.len(); + let all_subscriptions_terminated = num_terminated_subscriptions > 0 + && num_terminated_subscriptions == initial_subscription_peers.len(); + + // Calculate the number of new subscriptions to create + let max_concurrent_subscriptions = + self.consensus_observer_config.max_concurrent_subscriptions as usize; + let num_subscriptions_to_create = + max_concurrent_subscriptions.saturating_sub(self.active_observer_subscriptions.len()); + + // Create the new subscriptions (if required) + let terminated_subscription_peers = terminated_subscriptions + .iter() + .map(|(peer, _)| *peer) + .collect(); + let new_subscription_peers = self + .create_new_subscriptions( + connected_peers_and_metadata, + num_subscriptions_to_create, + terminated_subscription_peers, + ) + .await; - // Update the subscription termination metrics - self.update_subscription_termination_metrics(active_subscription_peer, error); - } + // Log a warning if we failed to create as many subscriptions as requested + let num_subscriptions_created = new_subscription_peers.len(); + if num_subscriptions_created < num_subscriptions_to_create { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Failed to create the requested number of subscriptions! Number of subscriptions \ + requested: {:?}, number of subscriptions created: {:?}.", + num_subscriptions_to_create, + num_subscriptions_created + )) + ); } - // If we don't have a subscription, we should select a new peer to - // subscribe to. If we had a previous subscription (and it was - // terminated) it should be excluded from the selection process. - if self.active_observer_subscription.is_none() { - // Create a new observer subscription - self.create_new_observer_subscription(active_subscription_peer) - .await; - - // If we successfully created a new subscription, update the metrics - if let Some(active_subscription) = &self.active_observer_subscription { - // Update the subscription creation metrics - self.update_subscription_creation_metrics( - active_subscription.get_peer_network_id(), - ); + // Update the subscription metrics + self.update_subscription_metrics(&new_subscription_peers, terminated_subscriptions); - return true; // A new subscription was created - } + // Return an error if all subscriptions were terminated + if all_subscriptions_terminated { + Err(Error::SubscriptionsReset(format!( + "All subscriptions were unhealthy and terminated! Number of terminated \ + subscriptions: {:?}, number of new subscriptions created: {:?}.", + num_terminated_subscriptions, num_subscriptions_created, + ))) + } else { + Ok(()) } - - false // No new subscription was created } - /// Creates a new observer subscription by sending subscription requests to - /// appropriate peers and waiting for a successful response. If `previous_subscription_peer` - /// is provided, it will be excluded from the selection process. - async fn create_new_observer_subscription( + /// Attempts to create the given number of new subscriptions + /// and returns the peer IDs of the newly created subscriptions. + /// Any `unhealthy_subscription_peers` are excluded from selection. + async fn create_new_subscriptions( &mut self, - previous_subscription_peer: Option, - ) { - // Get a set of sorted peers to service our subscription request - let sorted_peers = match self.sort_peers_for_subscription(previous_subscription_peer) { + connected_peers_and_metadata: HashMap, + num_subscriptions_to_create: usize, + unhealthy_subscription_peers: Vec, + ) -> Vec { + // Return early if we don't need to create any new subscriptions + if num_subscriptions_to_create == 0 { + return vec![]; + } + + // Sort the potential peers for subscription requests + let mut sorted_potential_peers = match self.sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + ) { Some(sorted_peers) => sorted_peers, None => { error!(LogSchema::new(LogEntry::ConsensusObserver) .message("Failed to sort peers for subscription requests!")); - return; + return vec![]; }, }; - // Verify that we have potential peers - if sorted_peers.is_empty() { + // Verify that we have potential peers to subscribe to + if sorted_potential_peers.is_empty() { warn!(LogSchema::new(LogEntry::ConsensusObserver) - .message("There are no peers to subscribe to!")); - return; + .message("There are no potential peers to subscribe to!")); + return vec![]; } - // Go through the sorted peers and attempt to subscribe to a single peer. - // The first peer that responds successfully will be the selected peer. - for selected_peer in &sorted_peers { + // Go through the potential peers and attempt to create new subscriptions + let mut created_subscription_peers = vec![]; + for _ in 0..num_subscriptions_to_create { + // If there are no peers left to subscribe to, return early + if sorted_potential_peers.is_empty() { + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "There are no more potential peers to subscribe to! \ + Num created subscriptions: {:?}", + created_subscription_peers.len() + )) + ); + break; + } + + // Attempt to create a subscription + let (subscription_peer, failed_subscription_peers) = self + .create_single_subscription(sorted_potential_peers.clone()) + .await; + + // Remove the failed peers from the sorted list + sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer)); + + // Process a successful subscription creation + if let Some(subscription_peer) = subscription_peer { + // Add the peer to the list of created subscriptions + created_subscription_peers.push(subscription_peer); + + // Remove the peer from the sorted list (for the next selection) + sorted_potential_peers.retain(|peer| peer != &subscription_peer); + } + } + + // Return the list of created subscriptions + created_subscription_peers + } + + /// Attempts to create a new subscription to a single peer from + /// the sorted list of potential peers. If a new subscription is + /// successfully created, the peer is returned. Likewise, any + /// peers with failed subscription attempts are also returned. + async fn create_single_subscription( + &mut self, + sorted_potential_peers: Vec, + ) -> (Option, Vec) { + let mut peers_with_failed_attempts = vec![]; + for potential_peer in sorted_potential_peers { + // Log the subscription attempt info!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Attempting to subscribe to peer: {}!", - selected_peer + "Attempting to subscribe to potential peer: {}!", + potential_peer )) ); // Send a subscription request to the peer and wait for the response. - // Note: it is fine to block here because we assume only a single active subscription. + // TODO: we should make this non-blocking! let subscription_request = ConsensusObserverRequest::Subscribe; let request_timeout_ms = self.consensus_observer_config.network_request_timeout_ms; let response = self .consensus_observer_client - .send_rpc_request_to_peer(selected_peer, subscription_request, request_timeout_ms) + .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms) .await; // Process the response and update the active subscription match response { Ok(ConsensusObserverResponse::SubscribeAck) => { + // Log the successful subscription info!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Successfully subscribed to peer: {}!", - selected_peer + potential_peer )) ); - // Update the active subscription + // Create the new subscription let subscription = ConsensusObserverSubscription::new( self.consensus_observer_config, self.db_reader.clone(), - *selected_peer, + potential_peer, self.time_service.clone(), ); - self.active_observer_subscription = Some(subscription); - return; // Return after successfully subscribing + // Add the subscription to the active subscriptions + self.active_observer_subscriptions + .insert(potential_peer, subscription); + + // Return the successful subscription peer + return (Some(potential_peer), peers_with_failed_attempts); }, Ok(response) => { // We received an invalid response warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Got unexpected response type: {:?}", + "Got unexpected response type for subscription request: {:?}", response.get_label() )) ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); }, Err(error) => { // We encountered an error while sending the request error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to send subscription request to peer: {}! Error: {:?}", - selected_peer, error + potential_peer, error )) ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); }, } } - // We failed to connect to any peers - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to subscribe to any peers! Num peers attempted: {:?}", - sorted_peers.len() - )) - ); + // We failed to create a new subscription + (None, peers_with_failed_attempts) } - /// Gets the connected peers and metadata. If an error occurred, - /// it is logged and None is returned. - fn get_connected_peers_and_metadata(&self) -> Option> { - match self - .consensus_observer_client + /// Returns the currently active subscription peers + fn get_active_subscription_peers(&self) -> Vec { + self.active_observer_subscriptions.keys().cloned().collect() + } + + /// Gets the connected peers and metadata. If an error + /// occurred, it is logged and an empty map is returned. + fn get_connected_peers_and_metadata(&self) -> HashMap { + self.consensus_observer_client .get_peers_and_metadata() .get_connected_peers_and_metadata() - { - Ok(connected_peers_and_metadata) => Some(connected_peers_and_metadata), - Err(error) => { + .unwrap_or_else(|error| { + // Log the error error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to get connected peers and metadata! Error: {:?}", error )) ); - None - }, - } + + // Return an empty map + HashMap::new() + }) } - /// Produces a list of sorted peers to service our subscription request. - /// Note: if `previous_subscription_peer` is provided, it will be excluded + /// Produces a list of sorted peers to service our subscription requests. + /// Note: if `unhealthy_subscription_peers` are provided, they will be excluded /// from the selection process. Likewise, all peers currently subscribed to us /// will be excluded from the selection process. - fn sort_peers_for_subscription( + fn sort_peers_for_subscriptions( &mut self, - previous_subscription_peer: Option, + mut connected_peers_and_metadata: HashMap, + unhealthy_subscription_peers: Vec, ) -> Option> { - if let Some(mut peers_and_metadata) = self.get_connected_peers_and_metadata() { - // Remove the previous subscription peer (if provided) - if let Some(previous_subscription_peer) = previous_subscription_peer { - let _ = peers_and_metadata.remove(&previous_subscription_peer); - } + // Remove any peers we're already subscribed to + for active_subscription_peer in self.get_active_subscription_peers() { + let _ = connected_peers_and_metadata.remove(&active_subscription_peer); + } - // Remove any peers that are currently subscribed to us - if let Some(consensus_publisher) = &self.consensus_publisher { - for peer_network_id in consensus_publisher.get_active_subscribers() { - let _ = peers_and_metadata.remove(&peer_network_id); - } + // Remove any unhealthy subscription peers + for unhealthy_peer in unhealthy_subscription_peers { + let _ = connected_peers_and_metadata.remove(&unhealthy_peer); + } + + // Remove any peers that are currently subscribed to us + if let Some(consensus_publisher) = &self.consensus_publisher { + for peer_network_id in consensus_publisher.get_active_subscribers() { + let _ = connected_peers_and_metadata.remove(&peer_network_id); } + } - // Sort the peers by subscription optimality - let sorted_peers = - subscription::sort_peers_by_subscription_optimality(&peers_and_metadata); + // Sort the peers by subscription optimality + let sorted_peers = + subscription::sort_peers_by_subscription_optimality(&connected_peers_and_metadata); - // Return the sorted peers - Some(sorted_peers) - } else { - None // No connected peers were found + // Return the sorted peers + Some(sorted_peers) + } + + /// Terminates any unhealthy subscriptions and returns the list of terminated subscriptions + fn terminate_unhealthy_subscriptions( + &mut self, + connected_peers_and_metadata: &HashMap, + ) -> Vec<(PeerNetworkId, Error)> { + let mut terminated_subscriptions = vec![]; + for subscription_peer in self.get_active_subscription_peers() { + // Check the health of the subscription and terminate it if needed + if let Err(error) = + self.check_subscription_health(connected_peers_and_metadata, subscription_peer) + { + // Log the subscription termination error + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Terminating subscription to peer: {:?}! Termination reason: {:?}", + subscription_peer, error + )) + ); + + // Unsubscribe from the peer and remove the subscription + self.unsubscribe_from_peer(subscription_peer); + + // Add the peer to the list of terminated subscriptions + terminated_subscriptions.push((subscription_peer, error)); + } } + + terminated_subscriptions } /// Unsubscribes from the given peer by sending an unsubscribe request - fn unsubscribe_from_peer(&self, peer_network_id: PeerNetworkId) { + fn unsubscribe_from_peer(&mut self, peer_network_id: PeerNetworkId) { + // Remove the peer from the active subscriptions + self.active_observer_subscriptions.remove(&peer_network_id); + // Send an unsubscribe request to the peer and process the response. // Note: we execute this asynchronously, as we don't need to wait for the response. let consensus_observer_client = self.consensus_observer_client.clone(); @@ -350,63 +463,64 @@ impl SubscriptionManager { }); } - /// Updates the subscription creation metrics for the given peer - fn update_subscription_creation_metrics(&self, peer_network_id: PeerNetworkId) { - // Set the number of active subscriptions - metrics::set_gauge( - &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, - &peer_network_id.network_id(), - 1, - ); - - // Update the number of created subscriptions - metrics::increment_counter( - &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, - metrics::CREATED_SUBSCRIPTION_LABEL, - &peer_network_id, - ); - } - - /// Updates the subscription termination metrics for the given peer - fn update_subscription_termination_metrics( + /// Updates the subscription creation and termination metrics + fn update_subscription_metrics( &self, - peer_network_id: PeerNetworkId, - error: Error, + new_subscription_peers: &[PeerNetworkId], + terminated_subscription_peers: Vec<(PeerNetworkId, Error)>, ) { - // Reset the number of active subscriptions - metrics::set_gauge( - &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, - &peer_network_id.network_id(), - 0, - ); + // Update the created subscriptions metrics + for peer_network_id in new_subscription_peers { + metrics::increment_counter( + &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, + metrics::CREATED_SUBSCRIPTION_LABEL, + peer_network_id, + ); + } - // Update the number of terminated subscriptions - metrics::increment_counter( - &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, - error.get_label(), - &peer_network_id, - ); - } + // Update the terminated subscriptions metrics + for (peer_network_id, termination_reason) in terminated_subscription_peers { + metrics::increment_counter( + &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, + termination_reason.get_label(), + &peer_network_id, + ); + } - /// Verifies that the message sender is the currently subscribed peer. - /// If the sender is not the subscribed peer, an error is returned. - pub fn verify_message_sender(&mut self, message_sender: PeerNetworkId) -> Result<(), Error> { - if let Some(active_subscription) = &mut self.active_observer_subscription { - active_subscription - .verify_message_sender(&message_sender) - .map_err(|error| { - // Send another unsubscription request to the peer (in case the previous was lost) - self.unsubscribe_from_peer(message_sender); - error - }) - } else { - // Send another unsubscription request to the peer (in case the previous was lost) - self.unsubscribe_from_peer(message_sender); + // Set the number of active subscriptions (grouped by network ID) + let active_subscription_peers = self.get_active_subscription_peers(); + for (network_id, active_subscription_peers) in &active_subscription_peers + .iter() + .chunk_by(|peer_network_id| peer_network_id.network_id()) + { + metrics::set_gauge( + &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, + &network_id, + active_subscription_peers.collect::>().len() as i64, + ); + } + } - Err(Error::UnexpectedError(format!( - "Received message from unexpected peer: {}! No active subscription found!", - message_sender - ))) + /// Verifies that the message is from an active subscription. + /// If not, an error is returned. + pub fn verify_message_for_subscription( + &mut self, + message_sender: PeerNetworkId, + ) -> Result<(), Error> { + match self.active_observer_subscriptions.get_mut(&message_sender) { + Some(active_subscription) => { + // The message is from an active subscription (update the last message time) + active_subscription.update_last_message_receive_time(); + Ok(()) + }, + None => { + // The message is not from an active subscription (send another unsubscribe request) + self.unsubscribe_from_peer(message_sender); + Err(Error::InvalidMessageError(format!( + "Received message from unexpected peer, and not an active subscription: {}!", + message_sender + ))) + }, } } } @@ -439,7 +553,7 @@ mod test { } #[tokio::test] - async fn test_check_active_subscription_connected() { + async fn test_check_subscription_health_connected() { // Create a consensus observer client let network_id = NetworkId::Public; let (peers_and_metadata, consensus_observer_client) = @@ -457,20 +571,23 @@ mod test { ); // Create a new subscription - let observer_subscription = ConsensusObserverSubscription::new( + let peer_network_id = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, consensus_observer_config, db_reader.clone(), - PeerNetworkId::random(), + peer_network_id, TimeService::mock(), ); - subscription_manager.active_observer_subscription = Some(observer_subscription); - // Check the active subscription and verify that it is removed (the peer is not connected) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionDisconnected(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it unhealthy (the peer is not connected) + check_subscription_connection(&mut subscription_manager, peer_network_id, false); + + // Terminate the subscription + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert_eq!(terminated_subscriptions.len(), 1); + assert_eq!(terminated_subscriptions.first().unwrap().0, peer_network_id); // Add a new connected peer let connected_peer = @@ -485,14 +602,17 @@ mod test { TimeService::mock(), ); - // Check the active subscription and verify that it is still active (the peer is connected) - assert!(subscription_manager.check_active_subscription().is_ok()); - let active_subscription = subscription_manager.active_observer_subscription.unwrap(); - assert_eq!(active_subscription.get_peer_network_id(), connected_peer); + // Check the active subscriptions is still healthy + check_subscription_connection(&mut subscription_manager, connected_peer, true); + + // Verify that the active subscription is still present + assert!(subscription_manager + .get_active_subscription_peers() + .contains(&connected_peer)); } #[tokio::test] - async fn test_check_active_subscription_progress_stopped() { + async fn test_check_subscription_health_progress_stopped() { // Create a consensus observer config let consensus_observer_config = ConsensusObserverConfig { max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out @@ -528,22 +648,32 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_progress(&mut subscription_manager, connected_peer, true); + // Elapse time to simulate a DB progress error let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_synced_version_timeout_ms + 1, )); - // Check the active subscription and verify that it is removed (the DB is not syncing) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionProgressStopped(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the DB is not syncing) + check_subscription_progress(&mut subscription_manager, connected_peer, false); + + // Terminate the subscription + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert_eq!(terminated_subscriptions.len(), 1); + assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer); + + // Verify the active subscription is no longer present + assert!(subscription_manager + .get_active_subscription_peers() + .is_empty()); } #[tokio::test] - async fn test_check_active_subscription_timeout() { + async fn test_check_subscription_health_timeout() { // Create a consensus observer client let network_id = NetworkId::Public; let (peers_and_metadata, consensus_observer_client) = @@ -574,25 +704,36 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_timeout(&mut subscription_manager, connected_peer, true); + // Elapse time to simulate a timeout let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_subscription_timeout_ms + 1, )); - // Check the active subscription and verify that it is removed (the subscription timed out) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionTimeout(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the subscription timed out) + check_subscription_timeout(&mut subscription_manager, connected_peer, false); + + // Terminate the subscription + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert_eq!(terminated_subscriptions.len(), 1); + assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer); + + // Verify the active subscription is no longer present + assert!(subscription_manager + .get_active_subscription_peers() + .is_empty()); } #[tokio::test] - async fn test_check_active_subscription_suboptimal() { + async fn test_check_subscription_health_suboptimal() { // Create a consensus observer config let consensus_observer_config = ConsensusObserverConfig { max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + max_concurrent_subscriptions: 1, // Only allow one subscription max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors ..ConsensusObserverConfig::default() }; @@ -618,7 +759,7 @@ mod test { // Add a suboptimal validator peer let suboptimal_peer = - create_peer_and_connection(network_id, peers_and_metadata.clone(), 0, None, true); + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); // Create a new subscription to the suboptimal peer create_observer_subscription( @@ -629,22 +770,38 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_optimality(&mut subscription_manager, suboptimal_peer, true); + // Elapse enough time to trigger the peer optimality check let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.subscription_peer_change_interval_ms + 1, )); - // Check the active subscription and verify that it is removed (the peer is suboptimal) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionSuboptimal(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the peer is suboptimal) + check_subscription_optimality(&mut subscription_manager, suboptimal_peer, false); + + // Elapse enough time to trigger the peer optimality check again + let mock_time_service = time_service.clone().into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_refresh_interval_ms + 1, + )); + + // Terminate the subscription + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert_eq!(terminated_subscriptions.len(), 1); + assert_eq!(terminated_subscriptions.first().unwrap().0, suboptimal_peer); + + // Verify the active subscription is no longer present + assert!(subscription_manager + .get_active_subscription_peers() + .is_empty()); } #[tokio::test] - async fn test_sort_peers_for_subscription() { + async fn test_sort_peers_for_subscriptions() { // Create a consensus observer client let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; let (peers_and_metadata, consensus_observer_client) = @@ -661,10 +818,8 @@ mod test { TimeService::mock(), ); - // Sort the peers for a subscription and verify that no peers are returned - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); + // Sort the peers and verify that no peers are returned + let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); assert!(sorted_peers.is_empty()); // Add a connected validator peer, VFN peer and public peer @@ -683,28 +838,34 @@ mod test { ); } - // Sort the peers for a subscription and verify the ordering (according to distance) - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); + // Sort the peers and verify the ordering (according to distance) + let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator); assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn); assert_eq!(sorted_peers[2].network_id(), NetworkId::Public); assert_eq!(sorted_peers.len(), 3); - // Sort the peers, but mark the validator as the last subscribed peer - let previous_subscription_peer = sorted_peers[0]; - let sorted_peer_subset = subscription_manager - .sort_peers_for_subscription(Some(previous_subscription_peer)) - .unwrap(); + // Sort the peers, but mark the validator as unhealthy (so it's ignored) + let sorted_peer_subset = + sort_subscription_peers(&mut subscription_manager, vec![sorted_peers[0]]); assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn); assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public); assert_eq!(sorted_peer_subset.len(), 2); - // Remove all the peers and verify that no peers are returned + // Sort the peers, but mark the VFN and validator as unhealthy (so they're ignored) + let sorted_peer_subset = sort_subscription_peers(&mut subscription_manager, vec![ + sorted_peers[0], + sorted_peers[1], + ]); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public); + assert_eq!(sorted_peer_subset.len(), 1); + + // Remove all the peers and verify that no peers are returned upon sorting for peer_network_id in sorted_peers { remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id); } + let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); + assert!(sorted_peers.is_empty()); // Add multiple validator peers, with different latencies let mut validator_peers = vec![]; @@ -719,16 +880,89 @@ mod test { validator_peers.push(validator_peer); } - // Sort the peers for a subscription and verify the ordering (according to latency) - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); + // Sort the peers and verify the ordering (according to latency) + let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); let expected_peers = validator_peers.into_iter().rev().collect::>(); assert_eq!(sorted_peers, expected_peers); } #[tokio::test] - async fn test_verify_message_sender() { + async fn test_terminate_unhealthy_subscriptions() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (peers_and_metadata, consensus_observer_client) = + create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + time_service.clone(), + ); + + // Create two new subscriptions + let subscription_peer_1 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + let subscription_peer_2 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + for peer in &[subscription_peer_1, subscription_peer_2] { + // Create the subscription + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + *peer, + time_service.clone(), + ); + } + + // Terminate any unhealthy subscriptions and verify that both subscriptions are still healthy + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert!(terminated_subscriptions.is_empty()); + assert_eq!( + subscription_manager.get_active_subscription_peers().len(), + 2 + ); + + // Create another subscription + let subscription_peer_3 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer_3, + TimeService::mock(), // Use a different time service (to avoid timeouts) + ); + + // Elapse time to simulate a timeout (on the first two subscriptions) + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Terminate the unhealthy subscriptions and verify the first two subscriptions were terminated + let terminated_subscriptions = + terminate_any_unhealthy_subscriptions(&mut subscription_manager); + assert_eq!(terminated_subscriptions.len(), 2); + assert_eq!(subscription_manager.get_active_subscription_peers(), vec![ + subscription_peer_3 + ]); + + // Verify that both subscriptions were terminated due to a timeout + for (_, error) in terminated_subscriptions { + assert_matches!(error, Error::SubscriptionTimeout(_)); + } + } + + #[tokio::test] + async fn test_unsubscribe_from_peer() { // Create a consensus observer client let network_id = NetworkId::Public; let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); @@ -744,30 +978,212 @@ mod test { TimeService::mock(), ); - // Check that message verification fails (we have no active subscription) + // Verify that no subscriptions are active assert!(subscription_manager - .verify_message_sender(PeerNetworkId::random()) - .is_err()); + .get_active_subscription_peers() + .is_empty()); // Create a new subscription - let subscription_peer = PeerNetworkId::random(); + let subscription_peer_1 = PeerNetworkId::random(); create_observer_subscription( &mut subscription_manager, consensus_observer_config, db_reader.clone(), - subscription_peer, + subscription_peer_1, TimeService::mock(), ); - // Check that message verification fails if the peer doesn't match the subscription + // Verify the subscription is active assert!(subscription_manager - .verify_message_sender(PeerNetworkId::random()) - .is_err()); + .get_active_subscription_peers() + .contains(&subscription_peer_1)); - // Check that message verification passes if the peer matches the subscription + // Create another subscription + let subscription_peer_2 = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer_2, + TimeService::mock(), + ); + + // Verify the second subscription is active assert!(subscription_manager - .verify_message_sender(subscription_peer) - .is_ok()); + .get_active_subscription_peers() + .contains(&subscription_peer_2)); + + // Unsubscribe from the first peer + subscription_manager.unsubscribe_from_peer(subscription_peer_1); + + // Verify that the first subscription is no longer active + assert!(!subscription_manager + .get_active_subscription_peers() + .contains(&subscription_peer_1)); + + // Verify that only the second subscription is still active + assert!(subscription_manager + .get_active_subscription_peers() + .contains(&subscription_peer_2)); + assert_eq!( + subscription_manager.get_active_subscription_peers().len(), + 1 + ); + } + + #[tokio::test] + async fn test_verify_message_for_subscription() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = Arc::new(MockDatabaseReader::new()); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + TimeService::mock(), + ); + + // Check that message verification fails (we have no active subscriptions) + check_message_verification_result( + &mut subscription_manager, + PeerNetworkId::random(), + false, + ); + + // Create a new subscription + let subscription_peer = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer, + TimeService::mock(), + ); + + // Check that message verification passes for the subscription + check_message_verification_result(&mut subscription_manager, subscription_peer, true); + + // Create another subscription + let second_subscription_peer = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + second_subscription_peer, + TimeService::mock(), + ); + + // Check that message verification passes for the second subscription + check_message_verification_result( + &mut subscription_manager, + second_subscription_peer, + true, + ); + + // Check that message verification fails if the peer doesn't match either subscription + check_message_verification_result( + &mut subscription_manager, + PeerNetworkId::random(), + false, + ); + } + + /// Checks the result of verifying a message from a given peer + fn check_message_verification_result( + subscription_manager: &mut SubscriptionManager, + peer_network_id: PeerNetworkId, + pass_verification: bool, + ) { + // Verify the message for the given peer + let result = subscription_manager.verify_message_for_subscription(peer_network_id); + + // Ensure the result matches the expected value + if pass_verification { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::InvalidMessageError(_))); + } + } + + /// Checks the health of a subscription and verifies the connection status + fn check_subscription_connection( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_connected: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected connection status + if expect_connected { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionDisconnected(_))); + } + } + + /// Checks the health of a subscription and verifies the optimality status + fn check_subscription_optimality( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_optimal: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected optimality status + if expect_optimal { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionSuboptimal(_))); + } + } + + /// Checks the health of a subscription and verifies the progress status + fn check_subscription_progress( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_progress: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected progress status + if expect_progress { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionProgressStopped(_))); + } + } + + /// Checks the health of a subscription and verifies the timeout status + fn check_subscription_timeout( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_timeout: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected timeout status + if expect_timeout { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionTimeout(_))); + } } /// Creates a new consensus observer client and a peers and metadata container @@ -808,7 +1224,9 @@ mod test { subscription_peer, time_service, ); - subscription_manager.active_observer_subscription = Some(observer_subscription); + subscription_manager + .active_observer_subscriptions + .insert(subscription_peer, observer_subscription); } /// Creates a new peer with the specified connection metadata @@ -879,4 +1297,32 @@ mod test { .remove_peer_metadata(peer_network_id, connection_id) .unwrap(); } + + /// A simple helper method that sorts the given peers for a subscription + fn sort_subscription_peers( + subscription_manager: &mut SubscriptionManager, + unhealthy_subscription_peers: Vec, + ) -> Vec { + // Get the connected peers and metadata + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + + // Sort the peers for subscription requests + subscription_manager + .sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + ) + .unwrap() + } + + /// A simple helper method that terminates any unhealthy subscriptions + fn terminate_any_unhealthy_subscriptions( + subscription_manager: &mut SubscriptionManager, + ) -> Vec<(PeerNetworkId, Error)> { + // Get the connected peers and metadata + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + + // Terminate any unhealthy subscriptions + subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata) + } } From 994bd3c1389b9c23c003798a7b304adf079e742a Mon Sep 17 00:00:00 2001 From: Greg Nazario Date: Tue, 17 Sep 2024 13:48:56 -0700 Subject: [PATCH 13/36] [cli] Add contribution guide (#14435) --- crates/aptos/CONTRIBUTING.md | 247 +++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 crates/aptos/CONTRIBUTING.md diff --git a/crates/aptos/CONTRIBUTING.md b/crates/aptos/CONTRIBUTING.md new file mode 100644 index 0000000000000..7bd0fe7d976ee --- /dev/null +++ b/crates/aptos/CONTRIBUTING.md @@ -0,0 +1,247 @@ +# Aptos CLI Development Guide + +This is a list of design decisions and guidelines for adding commands to the Aptos CLI. + +## Command Groups + +Commands should be grouped into the existing categories. The current categories are: + +- account +- config +- genesis +- governance +- key +- move +- multisig +- node +- stake +- update + +All categories must have a doc comment that describes the command. It must also derive `Parser` and `Subcommand`. For +example: + +```rust +/// Tool for interacting with accounts +/// +/// This tool is used to create accounts, get information about the +/// account's resources, and transfer resources between accounts. +#[derive(Debug, Subcommand)] +pub enum AccountTool { + Create(create::CreateAccount), + CreateResourceAccount(create_resource_account::CreateResourceAccount), + DeriveResourceAccountAddress(derive_resource_account::DeriveResourceAccount), + FundWithFaucet(fund::FundWithFaucet), + Balance(balance::Balance), + List(list::ListAccount), + LookupAddress(key_rotation::LookupAddress), + RotateKey(key_rotation::RotateKey), + Transfer(transfer::TransferCoins), +} +``` + +Then it must also be added to the top level command structure: + +```rust +/// Command Line Interface (CLI) for developing and interacting with the Aptos blockchain +#[derive(Parser)] +#[clap(name = "aptos", author, version, propagate_version = true, styles = aptos_cli_common::aptos_cli_style())] +pub enum Tool { + #[clap(subcommand)] + Account(account::AccountTool), + #[clap(subcommand)] + Config(config::ConfigTool), + #[clap(subcommand)] + Genesis(genesis::GenesisTool), + #[clap(subcommand)] + Governance(governance::GovernanceTool), + Info(InfoTool), + Init(common::init::InitTool), + #[clap(subcommand)] + Key(op::key::KeyTool), + #[clap(subcommand)] + Move(move_tool::MoveTool), + #[clap(subcommand)] + Multisig(account::MultisigAccountTool), + #[clap(subcommand)] + Node(node::NodeTool), + #[clap(subcommand)] + Stake(stake::StakeTool), + #[clap(subcommand)] + Update(update::UpdateTool), +} +``` + +## Commands + +A command is a single top level command for the CLI. The CLI command must complete it's action in the single command +execution. + +### Command Names + +```rust +/// Compiles a package and returns the associated ModuleIds +#[derive(Parser)] +pub struct CompilePackage { + /// Save the package metadata in the package's build directory + /// + /// If set, package metadata should be generated and stored in the package's build directory. + /// This metadata can be used to construct a transaction to publish a package. + #[clap(long)] + pub(crate) save_metadata: bool, + + #[clap(flatten)] + pub(crate) included_artifacts_args: IncludedArtifactsArgs, + #[clap(flatten)] + pub(crate) move_options: MovePackageDir, +} +``` + +Command names should be simple, identifiable, and easy to use. For example, compilation is grouped in `move` and uses +the subcommand `compile`. + +```bash +aptos move compile +``` + +Once the new command is created, it should have `#[derive(Parser)]` added above. Additionally, it will need to be added +the higher level tool: + +```rust +#[derive(Subcommand)] +pub enum MoveTool { + #[clap(alias = "build")] + Compile(CompilePackage), + #[clap(alias = "build-script")] + CompileScript(CompileScript), + Init(Init), + // ... +} + +impl MoveTool { + pub async fn execute(self) -> CliResult { + match self { + MoveTool::Compile(tool) => tool.execute_serialized().await, + MoveTool::CompileScript(tool) => tool.execute_serialized().await, + MoveTool::Init(tool) => tool.execute_serialized_success().await, + } + } +} +``` + +Note that, there are two types of commands here `execute_serialized()` and `execute_serialized_success()`, if the +command must be returning a value, then it should call `execute_serialized()`, which will convert the input type as JSON +to `stdout`. + +Additionally, `alias` is allowed, but discouraged for new commands. This is mostly to provide either backwards +compatibility or reduce confusion for new users. + +### Command flags + +```rust +#[derive(Parser)] +pub struct CompilePackage { + /// Save the package metadata in the package's build directory + /// + /// If set, package metadata should be generated and stored in the package's build directory. + /// This metadata can be used to construct a transaction to publish a package. + #[clap(long)] + pub(crate) save_metadata: bool, + + // ... +} +``` + +Command inputs should always be documented for help to show up in the CLI. for example, below is the example for +`save_metadata`. They should be snake case, and will show up as a flag. Do not use `short` commands, as they can be +confused between different commands. + +```bash +aptos move compile --save-metadata +``` + +### Command flag groupings + +```rust +/// Compiles a package and returns the associated ModuleIds +#[derive(Parser)] +pub struct CompilePackage { + // ... + #[clap(flatten)] + pub(crate) included_artifacts_args: IncludedArtifactsArgs, + #[clap(flatten)] + pub(crate) move_options: MovePackageDir, +} +``` + +Command flags can be grouped into common structs to be used across multiple commands. These should be flattened by +adding the struct associated and using `#[clap(flatten)]` like above. These should not have a doc comment, and any doc +comments will not end up in the command. Instead, document the structs directly like so: + +```rust +#[derive(Parser)] +pub struct IncludedArtifactsArgs { + /// Artifacts to be generated when building the package + /// + /// Which artifacts to include in the package. This can be one of `none`, `sparse`, and + /// `all`. `none` is the most compact form and does not allow to reconstruct a source + /// package from chain; `sparse` is the minimal set of artifacts needed to reconstruct + /// a source package; `all` includes all available artifacts. The choice of included + /// artifacts heavily influences the size and therefore gas cost of publishing: `none` + /// is the size of bytecode alone; `sparse` is roughly 2 times as much; and `all` 3-4 + /// as much. + #[clap(long, default_value_t = IncludedArtifacts::Sparse)] + pub(crate) included_artifacts: IncludedArtifacts, +} +``` + +### Command Implementation + +```rust +#[async_trait] +impl CliCommand> for CompilePackage { + fn command_name(&self) -> &'static str { + "CompilePackage" + } + + async fn execute(self) -> CliTypedResult> { + let build_options = BuildOptions { + install_dir: self.move_options.output_dir.clone(), + ..self + .included_artifacts_args + .included_artifacts + .build_options( + self.move_options.dev, + self.move_options.skip_fetch_latest_git_deps, + self.move_options.named_addresses(), + self.move_options.override_std.clone(), + self.move_options.bytecode_version, + self.move_options.compiler_version, + self.move_options.language_version, + self.move_options.skip_attribute_checks, + self.move_options.check_test_code, + ) + }; + let pack = BuiltPackage::build(self.move_options.get_package_path()?, build_options) + .map_err(|e| CliError::MoveCompilationError(format!("{:#}", e)))?; + if self.save_metadata { + pack.extract_metadata_and_save()?; + } + let ids = pack + .modules() + .map(|m| m.self_id().to_string()) + .collect::>(); + // TODO: Also say how many scripts are compiled + Ok(ids) + } +} +``` + +Commands should implement the `CliCommand` trait for the package. This allows it to be called upstream generically +and `T` will automatically be serialized to JSON for the output. This allows for typed testing in unit tests, while +still having output converted for the total CLI. + +It's an anti-pattern to `panic`, please avoid panicking, and instead provide `CliError` or `CliError` conversion for the +current types. + +All output from the CLI should use `eprintln!()`, rather than `println!()`. `stdout` is reserved for the JSON output at +the end of the command, `stderr` is used for the rest of the output. From 1f35406cbbb136ce7216539e678235074d14f078 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Wed, 11 Sep 2024 20:30:26 -0400 Subject: [PATCH 14/36] [Consensus Observer] Make subscription creation asynchronous. --- .../src/consensus_observer/observer/mod.rs | 1 + .../observer/subscription.rs | 347 +------- .../observer/subscription_manager.rs | 555 ++++-------- .../observer/subscription_utils.rs | 823 ++++++++++++++++++ .../publisher/consensus_publisher.rs | 20 + 5 files changed, 1024 insertions(+), 722 deletions(-) create mode 100644 consensus/src/consensus_observer/observer/subscription_utils.rs diff --git a/consensus/src/consensus_observer/observer/mod.rs b/consensus/src/consensus_observer/observer/mod.rs index 35dd0ea2ec72e..4a4e5d42881a3 100644 --- a/consensus/src/consensus_observer/observer/mod.rs +++ b/consensus/src/consensus_observer/observer/mod.rs @@ -8,3 +8,4 @@ pub mod payload_store; pub mod pending_blocks; pub mod subscription; pub mod subscription_manager; +pub mod subscription_utils; diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs index d3023da292d00..7b368fe3417c6 100644 --- a/consensus/src/consensus_observer/observer/subscription.rs +++ b/consensus/src/consensus_observer/observer/subscription.rs @@ -1,25 +1,17 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::consensus_observer::common::{ - error::Error, - logging::{LogEntry, LogSchema}, -}; +use crate::consensus_observer::{common::error::Error, observer::subscription_utils}; use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; -use aptos_logger::{info, warn}; -use aptos_network::{application::metadata::PeerMetadata, ProtocolId}; +use aptos_network::application::metadata::PeerMetadata; use aptos_storage_interface::DbReader; use aptos_time_service::{TimeService, TimeServiceTrait}; -use ordered_float::OrderedFloat; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{HashMap, HashSet}, sync::Arc, time::{Duration, Instant}, }; -// A useful constant for representing the maximum ping latency -const MAX_PING_LATENCY_SECS: f64 = 10_000.0; - /// A single consensus observer subscription pub struct ConsensusObserverSubscription { // The configuration of the consensus observer @@ -106,7 +98,8 @@ impl ConsensusObserverSubscription { self.last_optimality_check_time_and_peers = (time_now, current_connected_peers); // Sort the peers by subscription optimality - let sorted_peers = sort_peers_by_subscription_optimality(peers_and_metadata); + let sorted_peers = + subscription_utils::sort_peers_by_subscription_optimality(peers_and_metadata); // Verify that this peer is one of the most optimal peers let max_concurrent_subscriptions = @@ -184,142 +177,17 @@ impl ConsensusObserverSubscription { Ok(()) } + /// Returns the peer network id of the subscription + pub fn get_peer_network_id(&self) -> PeerNetworkId { + self.peer_network_id + } + /// Updates the last message receive time to the current time pub fn update_last_message_receive_time(&mut self) { self.last_message_receive_time = self.time_service.now(); } } -/// Gets the distance from the validators for the specified peer from the peer metadata -fn get_distance_for_peer( - peer_network_id: &PeerNetworkId, - peer_metadata: &PeerMetadata, -) -> Option { - // Get the distance for the peer - let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); - let distance = peer_monitoring_metadata - .latest_network_info_response - .as_ref() - .map(|response| response.distance_from_validators); - - // If the distance is missing, log a warning - if distance.is_none() { - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Unable to get distance for peer! Peer: {:?}", - peer_network_id - )) - ); - } - - distance -} - -/// Gets the latency for the specified peer from the peer metadata -fn get_latency_for_peer( - peer_network_id: &PeerNetworkId, - peer_metadata: &PeerMetadata, -) -> Option { - // Get the latency for the peer - let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); - let latency = peer_monitoring_metadata.average_ping_latency_secs; - - // If the latency is missing, log a warning - if latency.is_none() { - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Unable to get latency for peer! Peer: {:?}", - peer_network_id - )) - ); - } - - latency -} - -/// Sorts the peers by subscription optimality (in descending order of -/// optimality). This requires: (i) sorting the peers by distance from the -/// validator set and ping latency (lower values are more optimal); and (ii) -/// filtering out peers that don't support consensus observer. -/// -/// Note: we prioritize distance over latency as we want to avoid close -/// but not up-to-date peers. If peers don't have sufficient metadata -/// for sorting, they are given a lower priority. -pub fn sort_peers_by_subscription_optimality( - peers_and_metadata: &HashMap, -) -> Vec { - // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)] - let mut unsupported_peers = Vec::new(); - let mut peers_and_latencies_by_distance = BTreeMap::new(); - for (peer_network_id, peer_metadata) in peers_and_metadata { - // Verify that the peer supports consensus observer - if !supports_consensus_observer(peer_metadata) { - unsupported_peers.push(*peer_network_id); - continue; // Skip the peer - } - - // Get the distance and latency for the peer - let distance = get_distance_for_peer(peer_network_id, peer_metadata); - let latency = get_latency_for_peer(peer_network_id, peer_metadata); - - // If the distance is not found, use the maximum distance - let distance = - distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS); - - // If the latency is not found, use a large latency - let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS); - - // Add the peer and latency to the distance group - peers_and_latencies_by_distance - .entry(distance) - .or_insert_with(Vec::new) - .push((*peer_network_id, OrderedFloat(latency))); - } - - // If there are peers that don't support consensus observer, log them - if !unsupported_peers.is_empty() { - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Found {} peers that don't support consensus observer! Peers: {:?}", - unsupported_peers.len(), - unsupported_peers - )) - ); - } - - // Sort the peers by distance and latency. Note: BTreeMaps are - // sorted by key, so the entries will be sorted by distance in ascending order. - let mut sorted_peers = Vec::new(); - for (_, mut peers_and_latencies) in peers_and_latencies_by_distance { - // Sort the peers by latency - peers_and_latencies.sort_by_key(|(_, latency)| *latency); - - // Add the peers to the sorted list (in sorted order) - sorted_peers.extend( - peers_and_latencies - .into_iter() - .map(|(peer_network_id, _)| peer_network_id), - ); - } - - // Log the sorted peers - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Sorted {} peers by subscription optimality! Peers: {:?}", - sorted_peers.len(), - sorted_peers - )) - ); - - sorted_peers -} - -/// Returns true iff the peer metadata indicates support for consensus observer -fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool { - peer_metadata.supports_protocol(ProtocolId::ConsensusObserver) - && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc) -} - #[cfg(test)] mod test { use super::*; @@ -328,10 +196,9 @@ mod test { use aptos_network::{ protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, transport::{ConnectionId, ConnectionMetadata}, + ProtocolId, }; - use aptos_peer_monitoring_service_types::{ - response::NetworkInformationResponse, PeerMonitoringMetadata, - }; + use aptos_peer_monitoring_service_types::PeerMonitoringMetadata; use aptos_storage_interface::Result; use aptos_types::{network_address::NetworkAddress, transaction::Version}; use claims::assert_matches; @@ -735,117 +602,6 @@ mod test { assert_eq!(subscription.last_message_receive_time, current_time); } - #[test] - fn test_sort_peers_by_distance_and_latency() { - // Sort an empty list of peers - let peers_and_metadata = HashMap::new(); - assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); - - // Create a list of peers with empty metadata - let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with valid metadata - let peers_and_metadata = create_peers_and_metadata(false, false, true, 10); - - // Sort the peers - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - - // Verify the order of the peers - verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with and without metadata - let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10); - peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10)); - peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10)); - peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10)); - - // Sort the peers - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 40); - - // Verify the order of the first 20 peers - let (first_20_peers, sorted_peers) = sorted_peers.split_at(20); - verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers); - - // Verify that the next 10 peers only have latency metadata - let (next_10_peers, sorted_peers) = sorted_peers.split_at(10); - for sorted_peer in next_10_peers { - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); - assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some()); - } - - // Verify that the last 10 peers have no metadata - let (last_10_peers, remaining_peers) = sorted_peers.split_at(10); - for sorted_peer in last_10_peers { - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); - assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none()); - } - assert!(remaining_peers.is_empty()); - } - - #[test] - fn test_sort_peers_by_distance_and_latency_filter() { - // Sort an empty list of peers - let peers_and_metadata = HashMap::new(); - assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); - - // Create a list of peers with empty metadata (with consensus observer support) - let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with empty metadata (without consensus observer support) - let peers_and_metadata = create_peers_and_metadata(true, true, false, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert!(sorted_peers.is_empty()); - - // Create a list of peers with valid metadata (without consensus observer support) - let peers_and_metadata = create_peers_and_metadata(false, false, false, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert!(sorted_peers.is_empty()); - - // Create a list of peers with empty metadata (with and without consensus observer support) - let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5); - peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50)); - - // Sort the peers and verify the results (only the supported peers are sorted) - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 5); - - // Create a list of peers with valid metadata (with and without consensus observer support) - let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50); - peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10)); - - // Sort the peers and verify the results (only the supported peers are sorted) - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 50); - - // Create a list of peers with valid metadata (with and without consensus observer support) - let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1); - let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1); - let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.extend(supported_peer_and_metadata.clone()); - peers_and_metadata.extend(unsupported_peer_and_metadata); - - // Sort the peers and verify the results (only the supported peer is sorted) - let supported_peer = supported_peer_and_metadata.keys().next().unwrap(); - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers, vec![*supported_peer]); - } - /// Adds metadata for the specified peer to the map of peers and metadata fn add_metadata_for_peer( peers_and_metadata: &mut HashMap, @@ -901,85 +657,6 @@ mod test { } } - /// Creates a new peer and metadata for testing - fn create_peer_and_metadata( - latency: Option, - distance_from_validators: Option, - support_consensus_observer: bool, - ) -> (PeerNetworkId, PeerMetadata) { - // Create a random peer - let peer_network_id = PeerNetworkId::random(); - - // Create a new peer metadata with the given latency and distance - let connection_metadata = - create_connection_metadata(peer_network_id, support_consensus_observer); - let network_information_response = - distance_from_validators.map(|distance| NetworkInformationResponse { - connected_peers: BTreeMap::new(), - distance_from_validators: distance, - }); - let peer_monitoring_metadata = - PeerMonitoringMetadata::new(latency, None, network_information_response, None, None); - let peer_metadata = - PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata); - - (peer_network_id, peer_metadata) - } - - /// Creates a list of peers and metadata for testing - fn create_peers_and_metadata( - empty_latency: bool, - empty_distance: bool, - support_consensus_observer: bool, - num_peers: u64, - ) -> HashMap { - let mut peers_and_metadata = HashMap::new(); - for i in 1..num_peers + 1 { - // Determine the distance for the peer - let distance = if empty_distance { None } else { Some(i) }; - - // Determine the latency for the peer - let latency = if empty_latency { None } else { Some(i as f64) }; - - // Create a new peer and metadata - let (peer_network_id, peer_metadata) = - create_peer_and_metadata(latency, distance, support_consensus_observer); - peers_and_metadata.insert(peer_network_id, peer_metadata); - } - peers_and_metadata - } - - /// Verifies that the distance and latencies for the peers are in - /// increasing order (with the distance taking precedence over the latency). - fn verify_increasing_distance_latencies( - peers_and_metadata: &HashMap, - sorted_peers: &[PeerNetworkId], - ) { - let mut previous_latency = None; - let mut previous_distance = 0; - for sorted_peer in sorted_peers { - // Get the distance and latency for the peer - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap(); - let latency = get_latency_for_peer(sorted_peer, peer_metadata); - - // Verify the order of the peers - if distance == previous_distance { - if let Some(latency) = latency { - if let Some(previous_latency) = previous_latency { - assert!(latency >= previous_latency); - } - } - } else { - assert!(distance > previous_distance); - } - - // Update the previous latency and distance - previous_latency = latency; - previous_distance = distance; - } - } - /// Verifies that the last check time and peers are as expected fn verify_last_check_time_and_peers( subscription: &ConsensusObserverSubscription, diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index e63fdfc68fa23..16cd756c176dd 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -13,21 +13,27 @@ use crate::consensus_observer::{ ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse, }, }, - observer::{subscription, subscription::ConsensusObserverSubscription}, + observer::{subscription::ConsensusObserverSubscription, subscription_utils}, publisher::consensus_publisher::ConsensusPublisher, }; use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; +use aptos_infallible::Mutex; use aptos_logger::{error, info, warn}; use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata}; use aptos_storage_interface::DbReader; use aptos_time_service::TimeService; use itertools::Itertools; use std::{collections::HashMap, sync::Arc}; +use tokio::task::JoinHandle; /// The manager for consensus observer subscriptions pub struct SubscriptionManager { // The currently active set of consensus observer subscriptions - active_observer_subscriptions: HashMap, + active_observer_subscriptions: + Arc>>, + + // The active subscription creation task (if one is currently running) + active_subscription_creation_task: Arc>>>, // The consensus observer client to send network messages consensus_observer_client: @@ -57,7 +63,8 @@ impl SubscriptionManager { time_service: TimeService, ) -> Self { Self { - active_observer_subscriptions: HashMap::new(), + active_observer_subscriptions: Arc::new(Mutex::new(HashMap::new())), + active_subscription_creation_task: Arc::new(Mutex::new(None)), consensus_observer_client, consensus_observer_config, consensus_publisher, @@ -73,7 +80,12 @@ impl SubscriptionManager { connected_peers_and_metadata: &HashMap, peer_network_id: PeerNetworkId, ) -> Result<(), Error> { - match self.active_observer_subscriptions.get_mut(&peer_network_id) { + // Get the active subscription for the peer + let mut active_observer_subscriptions = self.active_observer_subscriptions.lock(); + let active_subscription = active_observer_subscriptions.get_mut(&peer_network_id); + + // Check the health of the subscription + match active_subscription { Some(active_subscription) => { // Verify the peer is still connected if !connected_peers_and_metadata.contains_key(&peer_network_id) { @@ -121,210 +133,39 @@ impl SubscriptionManager { && num_terminated_subscriptions == initial_subscription_peers.len(); // Calculate the number of new subscriptions to create + let remaining_subscription_peers = self.get_active_subscription_peers(); let max_concurrent_subscriptions = self.consensus_observer_config.max_concurrent_subscriptions as usize; let num_subscriptions_to_create = - max_concurrent_subscriptions.saturating_sub(self.active_observer_subscriptions.len()); - - // Create the new subscriptions (if required) - let terminated_subscription_peers = terminated_subscriptions - .iter() - .map(|(peer, _)| *peer) - .collect(); - let new_subscription_peers = self - .create_new_subscriptions( - connected_peers_and_metadata, - num_subscriptions_to_create, - terminated_subscription_peers, - ) - .await; + max_concurrent_subscriptions.saturating_sub(remaining_subscription_peers.len()); - // Log a warning if we failed to create as many subscriptions as requested - let num_subscriptions_created = new_subscription_peers.len(); - if num_subscriptions_created < num_subscriptions_to_create { - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to create the requested number of subscriptions! Number of subscriptions \ - requested: {:?}, number of subscriptions created: {:?}.", - num_subscriptions_to_create, - num_subscriptions_created - )) - ); - } + // Update the total subscription metrics + update_total_subscription_metrics(&remaining_subscription_peers); - // Update the subscription metrics - self.update_subscription_metrics(&new_subscription_peers, terminated_subscriptions); + // Spawn a task to create the new subscriptions (asynchronously) + self.spawn_subscription_creation_task( + num_subscriptions_to_create, + remaining_subscription_peers, + terminated_subscriptions, + connected_peers_and_metadata, + ) + .await; // Return an error if all subscriptions were terminated if all_subscriptions_terminated { Err(Error::SubscriptionsReset(format!( - "All subscriptions were unhealthy and terminated! Number of terminated \ - subscriptions: {:?}, number of new subscriptions created: {:?}.", - num_terminated_subscriptions, num_subscriptions_created, + "All {:?} subscriptions were unhealthy and terminated!", + num_terminated_subscriptions, ))) } else { Ok(()) } } - /// Attempts to create the given number of new subscriptions - /// and returns the peer IDs of the newly created subscriptions. - /// Any `unhealthy_subscription_peers` are excluded from selection. - async fn create_new_subscriptions( - &mut self, - connected_peers_and_metadata: HashMap, - num_subscriptions_to_create: usize, - unhealthy_subscription_peers: Vec, - ) -> Vec { - // Return early if we don't need to create any new subscriptions - if num_subscriptions_to_create == 0 { - return vec![]; - } - - // Sort the potential peers for subscription requests - let mut sorted_potential_peers = match self.sort_peers_for_subscriptions( - connected_peers_and_metadata, - unhealthy_subscription_peers, - ) { - Some(sorted_peers) => sorted_peers, - None => { - error!(LogSchema::new(LogEntry::ConsensusObserver) - .message("Failed to sort peers for subscription requests!")); - return vec![]; - }, - }; - - // Verify that we have potential peers to subscribe to - if sorted_potential_peers.is_empty() { - warn!(LogSchema::new(LogEntry::ConsensusObserver) - .message("There are no potential peers to subscribe to!")); - return vec![]; - } - - // Go through the potential peers and attempt to create new subscriptions - let mut created_subscription_peers = vec![]; - for _ in 0..num_subscriptions_to_create { - // If there are no peers left to subscribe to, return early - if sorted_potential_peers.is_empty() { - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "There are no more potential peers to subscribe to! \ - Num created subscriptions: {:?}", - created_subscription_peers.len() - )) - ); - break; - } - - // Attempt to create a subscription - let (subscription_peer, failed_subscription_peers) = self - .create_single_subscription(sorted_potential_peers.clone()) - .await; - - // Remove the failed peers from the sorted list - sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer)); - - // Process a successful subscription creation - if let Some(subscription_peer) = subscription_peer { - // Add the peer to the list of created subscriptions - created_subscription_peers.push(subscription_peer); - - // Remove the peer from the sorted list (for the next selection) - sorted_potential_peers.retain(|peer| peer != &subscription_peer); - } - } - - // Return the list of created subscriptions - created_subscription_peers - } - - /// Attempts to create a new subscription to a single peer from - /// the sorted list of potential peers. If a new subscription is - /// successfully created, the peer is returned. Likewise, any - /// peers with failed subscription attempts are also returned. - async fn create_single_subscription( - &mut self, - sorted_potential_peers: Vec, - ) -> (Option, Vec) { - let mut peers_with_failed_attempts = vec![]; - for potential_peer in sorted_potential_peers { - // Log the subscription attempt - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Attempting to subscribe to potential peer: {}!", - potential_peer - )) - ); - - // Send a subscription request to the peer and wait for the response. - // TODO: we should make this non-blocking! - let subscription_request = ConsensusObserverRequest::Subscribe; - let request_timeout_ms = self.consensus_observer_config.network_request_timeout_ms; - let response = self - .consensus_observer_client - .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms) - .await; - - // Process the response and update the active subscription - match response { - Ok(ConsensusObserverResponse::SubscribeAck) => { - // Log the successful subscription - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Successfully subscribed to peer: {}!", - potential_peer - )) - ); - - // Create the new subscription - let subscription = ConsensusObserverSubscription::new( - self.consensus_observer_config, - self.db_reader.clone(), - potential_peer, - self.time_service.clone(), - ); - - // Add the subscription to the active subscriptions - self.active_observer_subscriptions - .insert(potential_peer, subscription); - - // Return the successful subscription peer - return (Some(potential_peer), peers_with_failed_attempts); - }, - Ok(response) => { - // We received an invalid response - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Got unexpected response type for subscription request: {:?}", - response.get_label() - )) - ); - - // Add the peer to the list of failed attempts - peers_with_failed_attempts.push(potential_peer); - }, - Err(error) => { - // We encountered an error while sending the request - error!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to send subscription request to peer: {}! Error: {:?}", - potential_peer, error - )) - ); - - // Add the peer to the list of failed attempts - peers_with_failed_attempts.push(potential_peer); - }, - } - } - - // We failed to create a new subscription - (None, peers_with_failed_attempts) - } - /// Returns the currently active subscription peers fn get_active_subscription_peers(&self) -> Vec { - self.active_observer_subscriptions.keys().cloned().collect() + let active_observer_subscriptions = self.active_observer_subscriptions.lock(); + active_observer_subscriptions.keys().cloned().collect() } /// Gets the connected peers and metadata. If an error @@ -347,38 +188,89 @@ impl SubscriptionManager { }) } - /// Produces a list of sorted peers to service our subscription requests. - /// Note: if `unhealthy_subscription_peers` are provided, they will be excluded - /// from the selection process. Likewise, all peers currently subscribed to us - /// will be excluded from the selection process. - fn sort_peers_for_subscriptions( + /// Spawns a new subscription creation task to create + /// the specified number of new subscriptions. + async fn spawn_subscription_creation_task( &mut self, - mut connected_peers_and_metadata: HashMap, - unhealthy_subscription_peers: Vec, - ) -> Option> { - // Remove any peers we're already subscribed to - for active_subscription_peer in self.get_active_subscription_peers() { - let _ = connected_peers_and_metadata.remove(&active_subscription_peer); + num_subscriptions_to_create: usize, + active_subscription_peers: Vec, + terminated_subscriptions: Vec<(PeerNetworkId, Error)>, + connected_peers_and_metadata: HashMap, + ) { + // If there are no new subscriptions to create, return early + if num_subscriptions_to_create == 0 { + return; } - // Remove any unhealthy subscription peers - for unhealthy_peer in unhealthy_subscription_peers { - let _ = connected_peers_and_metadata.remove(&unhealthy_peer); + // If there is an active subscription creation task, return early + if let Some(subscription_creation_task) = &*self.active_subscription_creation_task.lock() { + if !subscription_creation_task.is_finished() { + return; // The task is still running + } } - // Remove any peers that are currently subscribed to us - if let Some(consensus_publisher) = &self.consensus_publisher { - for peer_network_id in consensus_publisher.get_active_subscribers() { - let _ = connected_peers_and_metadata.remove(&peer_network_id); + // Clone the shared state for the task + let active_observer_subscriptions = self.active_observer_subscriptions.clone(); + let consensus_observer_config = self.consensus_observer_config; + let consensus_observer_client = self.consensus_observer_client.clone(); + let consensus_publisher = self.consensus_publisher.clone(); + let db_reader = self.db_reader.clone(); + let time_service = self.time_service.clone(); + + // Otherwise, we should spawn a new subscription creation task + let subscription_creation_task = tokio::spawn(async move { + // Identify the terminated subscription peers + let terminated_subscription_peers = terminated_subscriptions + .iter() + .map(|(peer, _)| *peer) + .collect(); + + // Create the new subscriptions + let new_subscriptions = subscription_utils::create_new_subscriptions( + consensus_observer_config, + consensus_observer_client, + consensus_publisher, + db_reader, + time_service, + connected_peers_and_metadata, + num_subscriptions_to_create, + active_subscription_peers, + terminated_subscription_peers, + ) + .await; + + // Identify the new subscription peers + let new_subscription_peers = new_subscriptions + .iter() + .map(|subscription| subscription.get_peer_network_id()) + .collect::>(); + + // Add the new subscriptions to the list of active subscriptions + for subscription in new_subscriptions { + active_observer_subscriptions + .lock() + .insert(subscription.get_peer_network_id(), subscription); } - } - // Sort the peers by subscription optimality - let sorted_peers = - subscription::sort_peers_by_subscription_optimality(&connected_peers_and_metadata); + // Log a warning if we failed to create as many subscriptions as requested + let num_subscriptions_created = new_subscription_peers.len(); + if num_subscriptions_created < num_subscriptions_to_create { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Failed to create the requested number of subscriptions! Number of subscriptions \ + requested: {:?}, number of subscriptions created: {:?}.", + num_subscriptions_to_create, + num_subscriptions_created + )) + ); + } + + // Update the subscription change metrics + update_subscription_change_metrics(new_subscription_peers, terminated_subscriptions); + }); - // Return the sorted peers - Some(sorted_peers) + // Update the active subscription creation task + *self.active_subscription_creation_task.lock() = Some(subscription_creation_task); } /// Terminates any unhealthy subscriptions and returns the list of terminated subscriptions @@ -414,7 +306,9 @@ impl SubscriptionManager { /// Unsubscribes from the given peer by sending an unsubscribe request fn unsubscribe_from_peer(&mut self, peer_network_id: PeerNetworkId) { // Remove the peer from the active subscriptions - self.active_observer_subscriptions.remove(&peer_network_id); + self.active_observer_subscriptions + .lock() + .remove(&peer_network_id); // Send an unsubscribe request to the peer and process the response. // Note: we execute this asynchronously, as we don't need to wait for the response. @@ -463,65 +357,68 @@ impl SubscriptionManager { }); } - /// Updates the subscription creation and termination metrics - fn update_subscription_metrics( - &self, - new_subscription_peers: &[PeerNetworkId], - terminated_subscription_peers: Vec<(PeerNetworkId, Error)>, - ) { - // Update the created subscriptions metrics - for peer_network_id in new_subscription_peers { - metrics::increment_counter( - &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, - metrics::CREATED_SUBSCRIPTION_LABEL, - peer_network_id, - ); - } - - // Update the terminated subscriptions metrics - for (peer_network_id, termination_reason) in terminated_subscription_peers { - metrics::increment_counter( - &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, - termination_reason.get_label(), - &peer_network_id, - ); - } - - // Set the number of active subscriptions (grouped by network ID) - let active_subscription_peers = self.get_active_subscription_peers(); - for (network_id, active_subscription_peers) in &active_subscription_peers - .iter() - .chunk_by(|peer_network_id| peer_network_id.network_id()) - { - metrics::set_gauge( - &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, - &network_id, - active_subscription_peers.collect::>().len() as i64, - ); - } - } - - /// Verifies that the message is from an active subscription. - /// If not, an error is returned. + /// Verifies that the message is from an active + /// subscription. If not, an error is returned. pub fn verify_message_for_subscription( &mut self, message_sender: PeerNetworkId, ) -> Result<(), Error> { - match self.active_observer_subscriptions.get_mut(&message_sender) { - Some(active_subscription) => { - // The message is from an active subscription (update the last message time) - active_subscription.update_last_message_receive_time(); - Ok(()) - }, - None => { - // The message is not from an active subscription (send another unsubscribe request) - self.unsubscribe_from_peer(message_sender); - Err(Error::InvalidMessageError(format!( - "Received message from unexpected peer, and not an active subscription: {}!", - message_sender - ))) - }, + // Check if the message is from an active subscription + if let Some(active_subscription) = self + .active_observer_subscriptions + .lock() + .get_mut(&message_sender) + { + // Update the last message receive time and return early + active_subscription.update_last_message_receive_time(); + return Ok(()); } + + // Otherwise, the message is not from an active subscription. + // Send another unsubscribe request, and return an error. + self.unsubscribe_from_peer(message_sender); + Err(Error::InvalidMessageError(format!( + "Received message from unexpected peer, and not an active subscription: {}!", + message_sender + ))) + } +} + +/// Updates the subscription creation and termination metrics +fn update_subscription_change_metrics( + new_subscription_peers: Vec, + terminated_subscription_peers: Vec<(PeerNetworkId, Error)>, +) { + // Update the created subscriptions metrics + for peer_network_id in new_subscription_peers { + metrics::increment_counter( + &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, + metrics::CREATED_SUBSCRIPTION_LABEL, + &peer_network_id, + ); + } + + // Update the terminated subscriptions metrics + for (peer_network_id, termination_reason) in terminated_subscription_peers { + metrics::increment_counter( + &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, + termination_reason.get_label(), + &peer_network_id, + ); + } +} + +/// Updates the total subscription metrics (grouped by network ID) +fn update_total_subscription_metrics(active_subscription_peers: &[PeerNetworkId]) { + for (network_id, active_subscription_peers) in &active_subscription_peers + .iter() + .chunk_by(|peer_network_id| peer_network_id.network_id()) + { + metrics::set_gauge( + &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, + &network_id, + active_subscription_peers.collect::>().len() as i64, + ); } } @@ -800,92 +697,6 @@ mod test { .is_empty()); } - #[tokio::test] - async fn test_sort_peers_for_subscriptions() { - // Create a consensus observer client - let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; - let (peers_and_metadata, consensus_observer_client) = - create_consensus_observer_client(network_ids); - - // Create a new subscription manager - let consensus_observer_config = ConsensusObserverConfig::default(); - let db_reader = create_mock_db_reader(); - let mut subscription_manager = SubscriptionManager::new( - consensus_observer_client, - consensus_observer_config, - None, - db_reader.clone(), - TimeService::mock(), - ); - - // Sort the peers and verify that no peers are returned - let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); - assert!(sorted_peers.is_empty()); - - // Add a connected validator peer, VFN peer and public peer - for network_id in network_ids { - let distance_from_validators = match network_id { - NetworkId::Validator => 0, - NetworkId::Vfn => 1, - NetworkId::Public => 2, - }; - create_peer_and_connection( - *network_id, - peers_and_metadata.clone(), - distance_from_validators, - None, - true, - ); - } - - // Sort the peers and verify the ordering (according to distance) - let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); - assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator); - assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn); - assert_eq!(sorted_peers[2].network_id(), NetworkId::Public); - assert_eq!(sorted_peers.len(), 3); - - // Sort the peers, but mark the validator as unhealthy (so it's ignored) - let sorted_peer_subset = - sort_subscription_peers(&mut subscription_manager, vec![sorted_peers[0]]); - assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn); - assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public); - assert_eq!(sorted_peer_subset.len(), 2); - - // Sort the peers, but mark the VFN and validator as unhealthy (so they're ignored) - let sorted_peer_subset = sort_subscription_peers(&mut subscription_manager, vec![ - sorted_peers[0], - sorted_peers[1], - ]); - assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public); - assert_eq!(sorted_peer_subset.len(), 1); - - // Remove all the peers and verify that no peers are returned upon sorting - for peer_network_id in sorted_peers { - remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id); - } - let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); - assert!(sorted_peers.is_empty()); - - // Add multiple validator peers, with different latencies - let mut validator_peers = vec![]; - for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] { - let validator_peer = create_peer_and_connection( - NetworkId::Validator, - peers_and_metadata.clone(), - 0, - Some(ping_latency_secs), - true, - ); - validator_peers.push(validator_peer); - } - - // Sort the peers and verify the ordering (according to latency) - let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]); - let expected_peers = validator_peers.into_iter().rev().collect::>(); - assert_eq!(sorted_peers, expected_peers); - } - #[tokio::test] async fn test_terminate_unhealthy_subscriptions() { // Create a consensus observer client @@ -1226,6 +1037,7 @@ mod test { ); subscription_manager .active_observer_subscriptions + .lock() .insert(subscription_peer, observer_subscription); } @@ -1284,37 +1096,6 @@ mod test { peer_network_id } - /// Removes the peer and connection metadata for the given peer - fn remove_peer_and_connection( - peers_and_metadata: Arc, - peer_network_id: PeerNetworkId, - ) { - let peer_metadata = peers_and_metadata - .get_metadata_for_peer(peer_network_id) - .unwrap(); - let connection_id = peer_metadata.get_connection_metadata().connection_id; - peers_and_metadata - .remove_peer_metadata(peer_network_id, connection_id) - .unwrap(); - } - - /// A simple helper method that sorts the given peers for a subscription - fn sort_subscription_peers( - subscription_manager: &mut SubscriptionManager, - unhealthy_subscription_peers: Vec, - ) -> Vec { - // Get the connected peers and metadata - let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); - - // Sort the peers for subscription requests - subscription_manager - .sort_peers_for_subscriptions( - connected_peers_and_metadata, - unhealthy_subscription_peers, - ) - .unwrap() - } - /// A simple helper method that terminates any unhealthy subscriptions fn terminate_any_unhealthy_subscriptions( subscription_manager: &mut SubscriptionManager, diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs new file mode 100644 index 0000000000000..7dd5ffa9b2ace --- /dev/null +++ b/consensus/src/consensus_observer/observer/subscription_utils.rs @@ -0,0 +1,823 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use crate::consensus_observer::{ + common::logging::{LogEntry, LogSchema}, + network::{ + observer_client::ConsensusObserverClient, + observer_message::{ + ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse, + }, + }, + observer::subscription::ConsensusObserverSubscription, + publisher::consensus_publisher::ConsensusPublisher, +}; +use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; +use aptos_logger::{error, info, warn}; +use aptos_network::{ + application::{interface::NetworkClient, metadata::PeerMetadata}, + ProtocolId, +}; +use aptos_storage_interface::DbReader; +use aptos_time_service::TimeService; +use ordered_float::OrderedFloat; +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; + +// A useful constant for representing the maximum ping latency +const MAX_PING_LATENCY_SECS: f64 = 10_000.0; + +/// Attempts to create the given number of new subscriptions +/// from the connected peers and metadata. Any active or unhealthy +/// subscriptions are excluded from the selection process. +pub async fn create_new_subscriptions( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + consensus_publisher: Option>, + db_reader: Arc, + time_service: TimeService, + connected_peers_and_metadata: HashMap, + num_subscriptions_to_create: usize, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, +) -> Vec { + // Sort the potential peers for subscription requests + let mut sorted_potential_peers = match sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + active_subscription_peers, + consensus_publisher, + ) { + Some(sorted_peers) => sorted_peers, + None => { + error!(LogSchema::new(LogEntry::ConsensusObserver) + .message("Failed to sort peers for subscription requests!")); + return vec![]; + }, + }; + + // Verify that we have potential peers to subscribe to + if sorted_potential_peers.is_empty() { + warn!(LogSchema::new(LogEntry::ConsensusObserver) + .message("There are no potential peers to subscribe to!")); + return vec![]; + } + + // Go through the potential peers and attempt to create new subscriptions + let mut created_subscriptions = vec![]; + for _ in 0..num_subscriptions_to_create { + // If there are no peers left to subscribe to, return early + if sorted_potential_peers.is_empty() { + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "There are no more potential peers to subscribe to! \ + Num created subscriptions: {:?}", + created_subscriptions.len() + )) + ); + break; + } + + // Attempt to create a new subscription + let (observer_subscription, failed_subscription_peers) = create_single_subscription( + consensus_observer_config, + consensus_observer_client.clone(), + db_reader.clone(), + sorted_potential_peers.clone(), + time_service.clone(), + ) + .await; + + // Remove the failed peers from the sorted list + sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer)); + + // Process a successful subscription creation + if let Some(observer_subscription) = observer_subscription { + // Remove the peer from the sorted list (for the next selection) + sorted_potential_peers + .retain(|peer| *peer != observer_subscription.get_peer_network_id()); + + // Add the newly created subscription to the subscription list + created_subscriptions.push(observer_subscription); + } + } + + // Return the list of created subscriptions + created_subscriptions +} + +/// Attempts to create a new subscription to a single peer from the +/// sorted list of potential peers. If successful, the new subscription +/// is returned, alongside any peers with failed attempts. +async fn create_single_subscription( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + db_reader: Arc, + sorted_potential_peers: Vec, + time_service: TimeService, +) -> (Option, Vec) { + let mut peers_with_failed_attempts = vec![]; + for potential_peer in sorted_potential_peers { + // Log the subscription attempt + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Attempting to subscribe to potential peer: {}!", + potential_peer + )) + ); + + // Send a subscription request to the peer and wait for the response + let subscription_request = ConsensusObserverRequest::Subscribe; + let request_timeout_ms = consensus_observer_config.network_request_timeout_ms; + let response = consensus_observer_client + .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms) + .await; + + // Process the response and update the active subscription + match response { + Ok(ConsensusObserverResponse::SubscribeAck) => { + // Log the successful subscription + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Successfully subscribed to peer: {}!", + potential_peer + )) + ); + + // Create the new subscription + let subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + db_reader.clone(), + potential_peer, + time_service.clone(), + ); + + // Return the successful subscription + return (Some(subscription), peers_with_failed_attempts); + }, + Ok(response) => { + // We received an invalid response + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Got unexpected response type for subscription request: {:?}", + response.get_label() + )) + ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); + }, + Err(error) => { + // We encountered an error while sending the request + error!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Failed to send subscription request to peer: {}! Error: {:?}", + potential_peer, error + )) + ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); + }, + } + } + + // We failed to create a new subscription + (None, peers_with_failed_attempts) +} + +/// Gets the distance from the validators for the specified peer from the peer metadata +fn get_distance_for_peer( + peer_network_id: &PeerNetworkId, + peer_metadata: &PeerMetadata, +) -> Option { + // Get the distance for the peer + let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); + let distance = peer_monitoring_metadata + .latest_network_info_response + .as_ref() + .map(|response| response.distance_from_validators); + + // If the distance is missing, log a warning + if distance.is_none() { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Unable to get distance for peer! Peer: {:?}", + peer_network_id + )) + ); + } + + distance +} + +/// Gets the latency for the specified peer from the peer metadata +fn get_latency_for_peer( + peer_network_id: &PeerNetworkId, + peer_metadata: &PeerMetadata, +) -> Option { + // Get the latency for the peer + let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); + let latency = peer_monitoring_metadata.average_ping_latency_secs; + + // If the latency is missing, log a warning + if latency.is_none() { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Unable to get latency for peer! Peer: {:?}", + peer_network_id + )) + ); + } + + latency +} + +/// Produces a list of sorted peers to service the subscription requests. +/// Any active or unhealthy subscriptions are excluded from the selection process. +/// Likewise, any peers currently subscribed to us are also excluded. +fn sort_peers_for_subscriptions( + mut connected_peers_and_metadata: HashMap, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, + consensus_publisher: Option>, +) -> Option> { + // Remove any peers we're already subscribed to + for active_subscription_peer in active_subscription_peers { + let _ = connected_peers_and_metadata.remove(&active_subscription_peer); + } + + // Remove any unhealthy subscription peers + for unhealthy_peer in unhealthy_subscription_peers { + let _ = connected_peers_and_metadata.remove(&unhealthy_peer); + } + + // Remove any peers that are currently subscribed to us + if let Some(consensus_publisher) = consensus_publisher { + for peer_network_id in consensus_publisher.get_active_subscribers() { + let _ = connected_peers_and_metadata.remove(&peer_network_id); + } + } + + // Sort the peers by subscription optimality + let sorted_peers = sort_peers_by_subscription_optimality(&connected_peers_and_metadata); + + // Return the sorted peers + Some(sorted_peers) +} + +/// Sorts the peers by subscription optimality (in descending order of +/// optimality). This requires: (i) sorting the peers by distance from the +/// validator set and ping latency (lower values are more optimal); and (ii) +/// filtering out peers that don't support consensus observer. +/// +/// Note: we prioritize distance over latency as we want to avoid close +/// but not up-to-date peers. If peers don't have sufficient metadata +/// for sorting, they are given a lower priority. +pub fn sort_peers_by_subscription_optimality( + peers_and_metadata: &HashMap, +) -> Vec { + // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)] + let mut unsupported_peers = Vec::new(); + let mut peers_and_latencies_by_distance = BTreeMap::new(); + for (peer_network_id, peer_metadata) in peers_and_metadata { + // Verify that the peer supports consensus observer + if !supports_consensus_observer(peer_metadata) { + unsupported_peers.push(*peer_network_id); + continue; // Skip the peer + } + + // Get the distance and latency for the peer + let distance = get_distance_for_peer(peer_network_id, peer_metadata); + let latency = get_latency_for_peer(peer_network_id, peer_metadata); + + // If the distance is not found, use the maximum distance + let distance = + distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS); + + // If the latency is not found, use a large latency + let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS); + + // Add the peer and latency to the distance group + peers_and_latencies_by_distance + .entry(distance) + .or_insert_with(Vec::new) + .push((*peer_network_id, OrderedFloat(latency))); + } + + // If there are peers that don't support consensus observer, log them + if !unsupported_peers.is_empty() { + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Found {} peers that don't support consensus observer! Peers: {:?}", + unsupported_peers.len(), + unsupported_peers + )) + ); + } + + // Sort the peers by distance and latency. Note: BTreeMaps are + // sorted by key, so the entries will be sorted by distance in ascending order. + let mut sorted_peers = Vec::new(); + for (_, mut peers_and_latencies) in peers_and_latencies_by_distance { + // Sort the peers by latency + peers_and_latencies.sort_by_key(|(_, latency)| *latency); + + // Add the peers to the sorted list (in sorted order) + sorted_peers.extend( + peers_and_latencies + .into_iter() + .map(|(peer_network_id, _)| peer_network_id), + ); + } + + // Log the sorted peers + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Sorted {} peers by subscription optimality! Peers: {:?}", + sorted_peers.len(), + sorted_peers + )) + ); + + sorted_peers +} + +/// Returns true iff the peer metadata indicates support for consensus observer +fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool { + peer_metadata.supports_protocol(ProtocolId::ConsensusObserver) + && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc) +} + +#[cfg(test)] +mod tests { + use super::*; + use aptos_config::{config::PeerRole, network_id::NetworkId}; + use aptos_netcore::transport::ConnectionOrigin; + use aptos_network::{ + application::storage::PeersAndMetadata, + protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, + transport::{ConnectionId, ConnectionMetadata}, + }; + use aptos_peer_monitoring_service_types::{ + response::NetworkInformationResponse, PeerMonitoringMetadata, + }; + use aptos_types::{network_address::NetworkAddress, PeerId}; + use maplit::hashmap; + use std::collections::HashSet; + + #[test] + fn test_sort_peers_by_distance_and_latency() { + // Sort an empty list of peers + let peers_and_metadata = HashMap::new(); + assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); + + // Create a list of peers with empty metadata + let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with valid metadata + let peers_and_metadata = create_peers_and_metadata(false, false, true, 10); + + // Sort the peers + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + + // Verify the order of the peers + verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with and without metadata + let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10); + peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10)); + peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10)); + peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10)); + + // Sort the peers + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 40); + + // Verify the order of the first 20 peers + let (first_20_peers, sorted_peers) = sorted_peers.split_at(20); + verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers); + + // Verify that the next 10 peers only have latency metadata + let (next_10_peers, sorted_peers) = sorted_peers.split_at(10); + for sorted_peer in next_10_peers { + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); + assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some()); + } + + // Verify that the last 10 peers have no metadata + let (last_10_peers, remaining_peers) = sorted_peers.split_at(10); + for sorted_peer in last_10_peers { + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); + assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none()); + } + assert!(remaining_peers.is_empty()); + } + + #[test] + fn test_sort_peers_by_distance_and_latency_filter() { + // Sort an empty list of peers + let peers_and_metadata = HashMap::new(); + assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); + + // Create a list of peers with empty metadata (with consensus observer support) + let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with empty metadata (without consensus observer support) + let peers_and_metadata = create_peers_and_metadata(true, true, false, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert!(sorted_peers.is_empty()); + + // Create a list of peers with valid metadata (without consensus observer support) + let peers_and_metadata = create_peers_and_metadata(false, false, false, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert!(sorted_peers.is_empty()); + + // Create a list of peers with empty metadata (with and without consensus observer support) + let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5); + peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50)); + + // Sort the peers and verify the results (only the supported peers are sorted) + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 5); + + // Create a list of peers with valid metadata (with and without consensus observer support) + let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50); + peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10)); + + // Sort the peers and verify the results (only the supported peers are sorted) + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 50); + + // Create a list of peers with valid metadata (with and without consensus observer support) + let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1); + let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1); + let mut peers_and_metadata = HashMap::new(); + peers_and_metadata.extend(supported_peer_and_metadata.clone()); + peers_and_metadata.extend(unsupported_peer_and_metadata); + + // Sort the peers and verify the results (only the supported peer is sorted) + let supported_peer = supported_peer_and_metadata.keys().next().unwrap(); + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers, vec![*supported_peer]); + } + + #[tokio::test] + async fn test_sort_peers_for_subscriptions() { + // Create a consensus observer client + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client) = + create_consensus_observer_client(network_ids); + + // Create a consensus publisher + let consensus_observer_config = ConsensusObserverConfig::default(); + let (consensus_publisher, _) = + ConsensusPublisher::new(consensus_observer_config, consensus_observer_client.clone()); + let consensus_publisher = Arc::new(consensus_publisher); + + // Sort the peers and verify that no peers are returned + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert!(sorted_peers.is_empty()); + + // Add a connected validator peer, VFN peer and public peer + for network_id in network_ids { + let distance_from_validators = match network_id { + NetworkId::Validator => 0, + NetworkId::Vfn => 1, + NetworkId::Public => 2, + }; + create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + distance_from_validators, + None, + true, + ); + } + + // Sort the peers and verify the ordering (according to distance) + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator); + assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peers[2].network_id(), NetworkId::Public); + assert_eq!(sorted_peers.len(), 3); + + // Sort the peers, but mark the validator as unhealthy (so it's ignored) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![sorted_peers[0]], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public); + assert_eq!(sorted_peer_subset.len(), 2); + + // Sort the peers, but mark the VFN and validator as active subscriptions (so they're ignored) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![sorted_peers[0], sorted_peers[1]], + vec![], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public); + assert_eq!(sorted_peer_subset.len(), 1); + + // Create a consensus publisher with the PFN as an active subscriber + let consensus_publisher_with_subscribers = + Arc::new(ConsensusPublisher::new_with_active_subscribers( + consensus_observer_config, + consensus_observer_client.clone(), + HashSet::from_iter(vec![sorted_peers[2]]), + )); + + // Sort the peers, and verify the PFN is ignored (since it's an active subscriber) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher_with_subscribers, + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Validator); + assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peer_subset.len(), 2); + + // Remove all the peers and verify that no peers are returned upon sorting + for peer_network_id in sorted_peers { + remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id); + } + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert!(sorted_peers.is_empty()); + + // Add multiple validator peers, with different latencies + let mut validator_peers = vec![]; + for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] { + let validator_peer = create_peer_and_connection( + NetworkId::Validator, + peers_and_metadata.clone(), + 0, + Some(ping_latency_secs), + true, + ); + validator_peers.push(validator_peer); + } + + // Sort the peers and verify the ordering (according to latency) + let sorted_peers = sort_subscription_peers( + consensus_publisher, + peers_and_metadata.clone(), + vec![], + vec![], + ); + let expected_peers = validator_peers.into_iter().rev().collect::>(); + assert_eq!(sorted_peers, expected_peers); + } + + /// Creates a new connection metadata for testing + fn create_connection_metadata( + peer_network_id: PeerNetworkId, + support_consensus_observer: bool, + ) -> ConnectionMetadata { + if support_consensus_observer { + // Create a protocol set that supports consensus observer + let protocol_set = ProtocolIdSet::from_iter(vec![ + ProtocolId::ConsensusObserver, + ProtocolId::ConsensusObserverRpc, + ]); + + // Create the connection metadata with the protocol set + ConnectionMetadata::new( + peer_network_id.peer_id(), + ConnectionId::default(), + NetworkAddress::mock(), + ConnectionOrigin::Inbound, + MessagingProtocolVersion::V1, + protocol_set, + PeerRole::PreferredUpstream, + ) + } else { + ConnectionMetadata::mock(peer_network_id.peer_id()) + } + } + + /// Creates a new consensus observer client and a peers and metadata container + fn create_consensus_observer_client( + network_ids: &[NetworkId], + ) -> ( + Arc, + Arc>>, + ) { + let peers_and_metadata = PeersAndMetadata::new(network_ids); + let network_client = + NetworkClient::new(vec![], vec![], hashmap![], peers_and_metadata.clone()); + let consensus_observer_client = Arc::new(ConsensusObserverClient::new(network_client)); + + (peers_and_metadata, consensus_observer_client) + } + + /// Creates a new peer with the specified connection metadata + fn create_peer_and_connection( + network_id: NetworkId, + peers_and_metadata: Arc, + distance_from_validators: u64, + ping_latency_secs: Option, + support_consensus_observer: bool, + ) -> PeerNetworkId { + // Create the connection metadata + let peer_network_id = PeerNetworkId::new(network_id, PeerId::random()); + let connection_metadata = if support_consensus_observer { + // Create a protocol set that supports consensus observer + let protocol_set = ProtocolIdSet::from_iter(vec![ + ProtocolId::ConsensusObserver, + ProtocolId::ConsensusObserverRpc, + ]); + + // Create the connection metadata with the protocol set + ConnectionMetadata::new( + peer_network_id.peer_id(), + ConnectionId::default(), + NetworkAddress::mock(), + ConnectionOrigin::Inbound, + MessagingProtocolVersion::V1, + protocol_set, + PeerRole::PreferredUpstream, + ) + } else { + ConnectionMetadata::mock(peer_network_id.peer_id()) + }; + + // Insert the connection into peers and metadata + peers_and_metadata + .insert_connection_metadata(peer_network_id, connection_metadata.clone()) + .unwrap(); + + // Update the peer monitoring metadata + let latest_network_info_response = NetworkInformationResponse { + connected_peers: BTreeMap::new(), + distance_from_validators, + }; + let monitoring_metdata = PeerMonitoringMetadata::new( + ping_latency_secs, + ping_latency_secs, + Some(latest_network_info_response), + None, + None, + ); + peers_and_metadata + .update_peer_monitoring_metadata(peer_network_id, monitoring_metdata.clone()) + .unwrap(); + + peer_network_id + } + + /// Creates a new peer and metadata for testing + fn create_peer_and_metadata( + latency: Option, + distance_from_validators: Option, + support_consensus_observer: bool, + ) -> (PeerNetworkId, PeerMetadata) { + // Create a random peer + let peer_network_id = PeerNetworkId::random(); + + // Create a new peer metadata with the given latency and distance + let connection_metadata = + create_connection_metadata(peer_network_id, support_consensus_observer); + let network_information_response = + distance_from_validators.map(|distance| NetworkInformationResponse { + connected_peers: BTreeMap::new(), + distance_from_validators: distance, + }); + let peer_monitoring_metadata = + PeerMonitoringMetadata::new(latency, None, network_information_response, None, None); + let peer_metadata = + PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata); + + (peer_network_id, peer_metadata) + } + + /// Creates a list of peers and metadata for testing + fn create_peers_and_metadata( + empty_latency: bool, + empty_distance: bool, + support_consensus_observer: bool, + num_peers: u64, + ) -> HashMap { + let mut peers_and_metadata = HashMap::new(); + for i in 1..num_peers + 1 { + // Determine the distance for the peer + let distance = if empty_distance { None } else { Some(i) }; + + // Determine the latency for the peer + let latency = if empty_latency { None } else { Some(i as f64) }; + + // Create a new peer and metadata + let (peer_network_id, peer_metadata) = + create_peer_and_metadata(latency, distance, support_consensus_observer); + peers_and_metadata.insert(peer_network_id, peer_metadata); + } + peers_and_metadata + } + + /// Removes the peer and connection metadata for the given peer + fn remove_peer_and_connection( + peers_and_metadata: Arc, + peer_network_id: PeerNetworkId, + ) { + let peer_metadata = peers_and_metadata + .get_metadata_for_peer(peer_network_id) + .unwrap(); + let connection_id = peer_metadata.get_connection_metadata().connection_id; + peers_and_metadata + .remove_peer_metadata(peer_network_id, connection_id) + .unwrap(); + } + + /// A simple helper method that sorts the given peers for a subscription + fn sort_subscription_peers( + consensus_publisher: Arc, + peers_and_metadata: Arc, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, + ) -> Vec { + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Sort the peers for subscription requests + sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + active_subscription_peers, + Some(consensus_publisher), + ) + .unwrap() + } + + /// Verifies that the distance and latencies for the peers are in + /// increasing order (with the distance taking precedence over the latency). + fn verify_increasing_distance_latencies( + peers_and_metadata: &HashMap, + sorted_peers: &[PeerNetworkId], + ) { + let mut previous_latency = None; + let mut previous_distance = 0; + for sorted_peer in sorted_peers { + // Get the distance and latency for the peer + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap(); + let latency = get_latency_for_peer(sorted_peer, peer_metadata); + + // Verify the order of the peers + if distance == previous_distance { + if let Some(latency) = latency { + if let Some(previous_latency) = previous_latency { + assert!(latency >= previous_latency); + } + } + } else { + assert!(distance > previous_distance); + } + + // Update the previous latency and distance + previous_latency = latency; + previous_distance = distance; + } + } +} diff --git a/consensus/src/consensus_observer/publisher/consensus_publisher.rs b/consensus/src/consensus_observer/publisher/consensus_publisher.rs index 1379c87131cc5..899901593f7ed 100644 --- a/consensus/src/consensus_observer/publisher/consensus_publisher.rs +++ b/consensus/src/consensus_observer/publisher/consensus_publisher.rs @@ -70,6 +70,26 @@ impl ConsensusPublisher { (consensus_publisher, outbound_message_receiver) } + #[cfg(test)] + /// Creates a new consensus publisher with the given active subscribers + pub fn new_with_active_subscribers( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + active_subscribers: HashSet, + ) -> Self { + // Create the consensus publisher + let (consensus_publisher, _) = + ConsensusPublisher::new(consensus_observer_config, consensus_observer_client); + + // Update the active subscribers + *consensus_publisher.active_subscribers.write() = active_subscribers; + + // Return the publisher + consensus_publisher + } + /// Adds the given subscriber to the set of active subscribers fn add_active_subscriber(&self, peer_network_id: PeerNetworkId) { self.active_subscribers.write().insert(peer_network_id); From d779590fb37a710232760c5bca5264ca4fed8181 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Thu, 12 Sep 2024 09:05:10 -0400 Subject: [PATCH 15/36] [Consensus Observer] Move subscription health check to subscription.rs --- .../observer/subscription.rs | 208 +++++++++++++++++- .../observer/subscription_manager.rs | 23 +- 2 files changed, 206 insertions(+), 25 deletions(-) diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs index 7b368fe3417c6..5d9ae4d43def1 100644 --- a/consensus/src/consensus_observer/observer/subscription.rs +++ b/consensus/src/consensus_observer/observer/subscription.rs @@ -58,10 +58,38 @@ impl ConsensusObserverSubscription { } } + /// Checks if the subscription is still healthy. If not, an error + /// is returned indicating the reason for the subscription failure. + pub fn check_subscription_health( + &mut self, + connected_peers_and_metadata: &HashMap, + ) -> Result<(), Error> { + // Verify the subscription peer is still connected + let peer_network_id = self.get_peer_network_id(); + if !connected_peers_and_metadata.contains_key(&peer_network_id) { + return Err(Error::SubscriptionDisconnected(format!( + "The peer: {:?} is no longer connected!", + peer_network_id + ))); + } + + // Verify the subscription has not timed out + self.check_subscription_timeout()?; + + // Verify that the DB is continuing to sync and commit new data + self.check_syncing_progress()?; + + // Verify that the subscription peer is still optimal + self.check_subscription_peer_optimality(connected_peers_and_metadata)?; + + // The subscription seems healthy + Ok(()) + } + /// Verifies that the peer currently selected for the subscription is /// optimal. This is only done if: (i) the peers have changed since the /// last check; or (ii) enough time has elapsed to force a refresh. - pub fn check_subscription_peer_optimality( + fn check_subscription_peer_optimality( &mut self, peers_and_metadata: &HashMap, ) -> Result<(), Error> { @@ -120,7 +148,7 @@ impl ConsensusObserverSubscription { /// Verifies that the subscription has not timed out based /// on the last received message time. - pub fn check_subscription_timeout(&self) -> Result<(), Error> { + fn check_subscription_timeout(&self) -> Result<(), Error> { // Calculate the duration since the last message let time_now = self.time_service.now(); let duration_since_last_message = time_now.duration_since(self.last_message_receive_time); @@ -139,7 +167,7 @@ impl ConsensusObserverSubscription { } /// Verifies that the DB is continuing to sync and commit new data - pub fn check_syncing_progress(&mut self) -> Result<(), Error> { + fn check_syncing_progress(&mut self) -> Result<(), Error> { // Get the current synced version from storage let current_synced_version = self.db_reader @@ -212,6 +240,161 @@ mod test { } } + #[test] + fn test_check_subscription_health_connected_and_timeout() { + // Create a consensus observer config + let consensus_observer_config = ConsensusObserverConfig { + max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors + ..ConsensusObserverConfig::default() + }; + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), + peer_network_id, + time_service.clone(), + ); + + // Verify that the subscription is unhealthy (the peer is not connected) + assert_matches!( + subscription.check_subscription_health(&HashMap::new()), + Err(Error::SubscriptionDisconnected(_)) + ); + + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Elapse enough time to timeout the subscription + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Verify that the subscription has timed out + assert_matches!( + subscription.check_subscription_health(&peers_and_metadata), + Err(Error::SubscriptionTimeout(_)) + ); + } + + #[test] + fn test_check_subscription_health_progress() { + // Create a consensus observer config with a large timeout + let consensus_observer_config = ConsensusObserverConfig { + max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + ..ConsensusObserverConfig::default() + }; + + // Create a mock DB reader with expectations + let first_synced_version = 1; + let second_synced_version = 2; + let mut mock_db_reader = MockDatabaseReader::new(); + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(first_synced_version)) + .times(1); // Only allow one call for the first version + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(second_synced_version)); // Allow multiple calls for the second version + + // Create a new observer subscription + let peer_network_id = PeerNetworkId::random(); + let time_service = TimeService::mock(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(mock_db_reader), + peer_network_id, + time_service.clone(), + ); + + // Verify that the DB is making sync progress and that the highest synced version is updated + let mock_time_service = time_service.into_mock(); + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), + ); + + // Elapse enough time to timeout the subscription + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_synced_version_timeout_ms + 1, + )); + + // Verify that the DB is still making sync progress (the next version is higher) + verify_subscription_syncing_progress( + &mut subscription, + second_synced_version, + mock_time_service.now(), + ); + + // Elapse enough time to timeout the subscription + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_synced_version_timeout_ms + 1, + )); + + // Verify that the DB is not making sync progress and that the subscription has timed out + assert_matches!( + subscription.check_syncing_progress(), + Err(Error::SubscriptionProgressStopped(_)) + ); + } + + #[test] + fn test_check_subscription_health_optimality() { + // Create a consensus observer config with a single subscription and large timeouts + let consensus_observer_config = ConsensusObserverConfig { + max_concurrent_subscriptions: 1, + max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors + ..ConsensusObserverConfig::default() + }; + + // Create a mock DB reader with expectations + let mut mock_db_reader = MockDatabaseReader::new(); + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(1)); + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(mock_db_reader), + peer_network_id, + time_service.clone(), + ); + + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Verify that the subscription is healthy + assert!(subscription + .check_subscription_health(&peers_and_metadata) + .is_ok()); + + // Add a more optimal peer to the set of peers + let new_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); + + // Elapse enough time for a peer optimality check + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the subscription is no longer optimal + assert_matches!( + subscription.check_subscription_health(&peers_and_metadata), + Err(Error::SubscriptionSuboptimal(_)) + ); + } + #[test] fn test_check_subscription_peer_optimality_single() { // Create a consensus observer config with a maximum of 1 subscription @@ -344,7 +527,7 @@ mod test { } #[test] - fn test_check_subscription_peer_refresh() { + fn test_check_subscription_peer_optimality_refresh() { // Create a consensus observer config with a maximum of 1 subscription let consensus_observer_config = create_observer_config(1); @@ -574,6 +757,23 @@ mod test { ); } + #[test] + fn test_get_peer_network_id() { + // Create a new observer subscription + let consensus_observer_config = ConsensusObserverConfig::default(); + let peer_network_id = PeerNetworkId::random(); + let time_service = TimeService::mock(); + let subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), + peer_network_id, + time_service.clone(), + ); + + // Verify that the peer network id matches the expected value + assert_eq!(subscription.get_peer_network_id(), peer_network_id); + } + #[test] fn test_update_last_message_receive_time() { // Create a new observer subscription diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index 16cd756c176dd..deba8d52a98e6 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -87,26 +87,7 @@ impl SubscriptionManager { // Check the health of the subscription match active_subscription { Some(active_subscription) => { - // Verify the peer is still connected - if !connected_peers_and_metadata.contains_key(&peer_network_id) { - return Err(Error::SubscriptionDisconnected(format!( - "The peer: {:?} is no longer connected!", - peer_network_id - ))); - } - - // Verify the subscription has not timed out - active_subscription.check_subscription_timeout()?; - - // Verify that the DB is continuing to sync and commit new data - active_subscription.check_syncing_progress()?; - - // Verify that the subscription peer is still optimal - active_subscription - .check_subscription_peer_optimality(connected_peers_and_metadata)?; - - // The subscription seems healthy - Ok(()) + active_subscription.check_subscription_health(connected_peers_and_metadata) }, None => Err(Error::UnexpectedError(format!( "The subscription to peer: {:?} is not active!", @@ -217,7 +198,7 @@ impl SubscriptionManager { let db_reader = self.db_reader.clone(); let time_service = self.time_service.clone(); - // Otherwise, we should spawn a new subscription creation task + // Spawn a new subscription creation task let subscription_creation_task = tokio::spawn(async move { // Identify the terminated subscription peers let terminated_subscription_peers = terminated_subscriptions From 0e6ca9887234f67731b99f1f909885d33ffbce8a Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Thu, 12 Sep 2024 11:44:48 -0400 Subject: [PATCH 16/36] [Consensus Observer] Improve subscription manager unit tests. --- .../observer/subscription_manager.rs | 337 ++++++++++++++---- 1 file changed, 260 insertions(+), 77 deletions(-) diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index deba8d52a98e6..2d89163e1ae86 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -430,6 +430,95 @@ mod test { } } + #[tokio::test] + async fn test_check_and_manage_subscriptions() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (peers_and_metadata, consensus_observer_client) = + create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + time_service.clone(), + ); + + // Verify that no subscriptions are active + verify_active_subscription_peers(&subscription_manager, vec![]); + + // Check and manage the subscriptions + let result = subscription_manager.check_and_manage_subscriptions().await; + + // Verify that no subscriptions were terminated + assert!(result.is_ok()); + verify_active_subscription_peers(&subscription_manager, vec![]); + + // Add a new connected peer and subscription + let connected_peer_1 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + connected_peer_1, + time_service.clone(), + ); + + // Add another connected peer and subscription + let connected_peer_2 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 2, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + connected_peer_2, + TimeService::mock(), // Use a different time service (to avoid timeouts!) + ); + + // Check and manage the subscriptions + subscription_manager + .check_and_manage_subscriptions() + .await + .unwrap(); + + // Verify that the subscriptions are still active + verify_active_subscription_peers(&subscription_manager, vec![ + connected_peer_1, + connected_peer_2, + ]); + + // Elapse time to simulate a timeout for peer 1 + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Check and manage the subscriptions + subscription_manager + .check_and_manage_subscriptions() + .await + .unwrap(); + + // Verify that the first subscription was terminated + verify_active_subscription_peers(&subscription_manager, vec![connected_peer_2]); + + // Disconnect the second peer + remove_peer_and_connection(peers_and_metadata.clone(), connected_peer_2); + + // Check and manage the subscriptions + let result = subscription_manager.check_and_manage_subscriptions().await; + + // Verify that the second subscription was terminated and an error was returned + verify_active_subscription_peers(&subscription_manager, vec![]); + assert_matches!(result, Err(Error::SubscriptionsReset(_))); + } + #[tokio::test] async fn test_check_subscription_health_connected() { // Create a consensus observer client @@ -461,11 +550,8 @@ mod test { // Check the active subscription and verify that it unhealthy (the peer is not connected) check_subscription_connection(&mut subscription_manager, peer_network_id, false); - // Terminate the subscription - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert_eq!(terminated_subscriptions.len(), 1); - assert_eq!(terminated_subscriptions.first().unwrap().0, peer_network_id); + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![peer_network_id]); // Add a new connected peer let connected_peer = @@ -480,13 +566,14 @@ mod test { TimeService::mock(), ); - // Check the active subscriptions is still healthy + // Check the active subscription is still healthy check_subscription_connection(&mut subscription_manager, connected_peer, true); + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Verify that the active subscription is still present - assert!(subscription_manager - .get_active_subscription_peers() - .contains(&connected_peer)); + verify_active_subscription_peers(&subscription_manager, vec![connected_peer]); } #[tokio::test] @@ -529,6 +616,9 @@ mod test { // Check the active subscription and verify that it is healthy check_subscription_progress(&mut subscription_manager, connected_peer, true); + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse time to simulate a DB progress error let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( @@ -538,16 +628,11 @@ mod test { // Check the active subscription and verify that it is unhealthy (the DB is not syncing) check_subscription_progress(&mut subscription_manager, connected_peer, false); - // Terminate the subscription - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert_eq!(terminated_subscriptions.len(), 1); - assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer); + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]); // Verify the active subscription is no longer present - assert!(subscription_manager - .get_active_subscription_peers() - .is_empty()); + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] @@ -585,6 +670,9 @@ mod test { // Check the active subscription and verify that it is healthy check_subscription_timeout(&mut subscription_manager, connected_peer, true); + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse time to simulate a timeout let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( @@ -594,16 +682,11 @@ mod test { // Check the active subscription and verify that it is unhealthy (the subscription timed out) check_subscription_timeout(&mut subscription_manager, connected_peer, false); - // Terminate the subscription - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert_eq!(terminated_subscriptions.len(), 1); - assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer); + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]); // Verify the active subscription is no longer present - assert!(subscription_manager - .get_active_subscription_peers() - .is_empty()); + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] @@ -651,6 +734,9 @@ mod test { // Check the active subscription and verify that it is healthy check_subscription_optimality(&mut subscription_manager, suboptimal_peer, true); + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse enough time to trigger the peer optimality check let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( @@ -666,20 +752,89 @@ mod test { consensus_observer_config.subscription_refresh_interval_ms + 1, )); - // Terminate the subscription - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert_eq!(terminated_subscriptions.len(), 1); - assert_eq!(terminated_subscriptions.first().unwrap().0, suboptimal_peer); + // Terminate any unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![suboptimal_peer]); // Verify the active subscription is no longer present - assert!(subscription_manager - .get_active_subscription_peers() - .is_empty()); + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] - async fn test_terminate_unhealthy_subscriptions() { + #[allow(clippy::await_holding_lock)] // Required to wait on the subscription creation task + async fn test_spawn_subscription_creation_task() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + time_service.clone(), + ); + + // Verify that the active subscription creation task is empty + verify_subscription_creation_task(&subscription_manager, false); + + // Spawn a subscription creation task with 0 subscriptions to create + subscription_manager + .spawn_subscription_creation_task(0, vec![], vec![], hashmap![]) + .await; + + // Verify that the active subscription creation task is still empty (no task was spawned) + verify_subscription_creation_task(&subscription_manager, false); + + // Spawn a subscription creation task with 1 subscription to create + subscription_manager + .spawn_subscription_creation_task(1, vec![], vec![], hashmap![]) + .await; + + // Verify that the active subscription creation task is now populated + verify_subscription_creation_task(&subscription_manager, true); + + // Wait for the active subscription creation task to finish + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_mut() + { + active_task.await.unwrap(); + } + + // Verify that the active subscription creation task is still present + verify_subscription_creation_task(&subscription_manager, true); + + // Verify that the active subscription creation task is finished + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_ref() + { + assert!(active_task.is_finished()); + } + + // Spawn a subscription creation task with 2 subscriptions to create + subscription_manager + .spawn_subscription_creation_task(2, vec![], vec![], hashmap![]) + .await; + + // Verify the new active subscription creation task is not finished + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_ref() + { + assert!(!active_task.is_finished()); + }; + } + + #[tokio::test] + async fn test_terminate_unhealthy_subscriptions_multiple() { // Create a consensus observer client let network_id = NetworkId::Public; let (peers_and_metadata, consensus_observer_client) = @@ -713,14 +868,8 @@ mod test { ); } - // Terminate any unhealthy subscriptions and verify that both subscriptions are still healthy - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert!(terminated_subscriptions.is_empty()); - assert_eq!( - subscription_manager.get_active_subscription_peers().len(), - 2 - ); + // Terminate unhealthy subscriptions and verify that both subscriptions are still healthy + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); // Create another subscription let subscription_peer_3 = @@ -739,18 +888,14 @@ mod test { consensus_observer_config.max_subscription_timeout_ms + 1, )); - // Terminate the unhealthy subscriptions and verify the first two subscriptions were terminated - let terminated_subscriptions = - terminate_any_unhealthy_subscriptions(&mut subscription_manager); - assert_eq!(terminated_subscriptions.len(), 2); - assert_eq!(subscription_manager.get_active_subscription_peers(), vec![ - subscription_peer_3 + // Terminate unhealthy subscriptions and verify the first two subscriptions were terminated + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![ + subscription_peer_1, + subscription_peer_2, ]); - // Verify that both subscriptions were terminated due to a timeout - for (_, error) in terminated_subscriptions { - assert_matches!(error, Error::SubscriptionTimeout(_)); - } + // Verify the third subscription is still active + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_3]); } #[tokio::test] @@ -771,9 +916,7 @@ mod test { ); // Verify that no subscriptions are active - assert!(subscription_manager - .get_active_subscription_peers() - .is_empty()); + verify_active_subscription_peers(&subscription_manager, vec![]); // Create a new subscription let subscription_peer_1 = PeerNetworkId::random(); @@ -786,9 +929,7 @@ mod test { ); // Verify the subscription is active - assert!(subscription_manager - .get_active_subscription_peers() - .contains(&subscription_peer_1)); + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_1]); // Create another subscription let subscription_peer_2 = PeerNetworkId::random(); @@ -801,26 +942,16 @@ mod test { ); // Verify the second subscription is active - assert!(subscription_manager - .get_active_subscription_peers() - .contains(&subscription_peer_2)); + verify_active_subscription_peers(&subscription_manager, vec![ + subscription_peer_1, + subscription_peer_2, + ]); // Unsubscribe from the first peer subscription_manager.unsubscribe_from_peer(subscription_peer_1); // Verify that the first subscription is no longer active - assert!(!subscription_manager - .get_active_subscription_peers() - .contains(&subscription_peer_1)); - - // Verify that only the second subscription is still active - assert!(subscription_manager - .get_active_subscription_peers() - .contains(&subscription_peer_2)); - assert_eq!( - subscription_manager.get_active_subscription_peers().len(), - 1 - ); + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_2]); } #[tokio::test] @@ -1077,14 +1208,66 @@ mod test { peer_network_id } - /// A simple helper method that terminates any unhealthy subscriptions - fn terminate_any_unhealthy_subscriptions( + /// Removes the peer and connection metadata for the given peer + fn remove_peer_and_connection( + peers_and_metadata: Arc, + peer_network_id: PeerNetworkId, + ) { + let peer_metadata = peers_and_metadata + .get_metadata_for_peer(peer_network_id) + .unwrap(); + let connection_id = peer_metadata.get_connection_metadata().connection_id; + peers_and_metadata + .remove_peer_metadata(peer_network_id, connection_id) + .unwrap(); + } + + /// Verifies the active subscription peers + fn verify_active_subscription_peers( + subscription_manager: &SubscriptionManager, + expected_active_peers: Vec, + ) { + // Get the active subscription peers + let active_peers = subscription_manager.get_active_subscription_peers(); + + // Verify the active subscription peers + for peer in &expected_active_peers { + assert!(active_peers.contains(peer)); + } + assert_eq!(active_peers.len(), expected_active_peers.len()); + } + + /// Verifies the status of the active subscription creation task + fn verify_subscription_creation_task( + subscription_manager: &SubscriptionManager, + expect_active_task: bool, + ) { + let current_active_task = subscription_manager + .active_subscription_creation_task + .lock() + .is_some(); + assert_eq!(current_active_task, expect_active_task); + } + + /// Verifies the list of terminated unhealthy subscriptions + fn verify_terminated_unhealthy_subscriptions( subscription_manager: &mut SubscriptionManager, - ) -> Vec<(PeerNetworkId, Error)> { + expected_terminated_peers: Vec, + ) { // Get the connected peers and metadata let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); // Terminate any unhealthy subscriptions - subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata) + let terminated_subscriptions = + subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata); + + // Verify the terminated subscriptions + for (terminated_subscription_peer, _) in &terminated_subscriptions { + assert!(expected_terminated_peers.contains(terminated_subscription_peer)); + } + assert_eq!( + terminated_subscriptions.len(), + expected_terminated_peers.len() + ); } } From 4a87ad175d514dd1e1bd1cc766f6fffe5d065d43 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Fri, 13 Sep 2024 14:03:58 -0400 Subject: [PATCH 17/36] [Consensus Observer] Improve subscription utility unit tests. --- .../observer/subscription_utils.rs | 391 +++++++++++++++++- 1 file changed, 377 insertions(+), 14 deletions(-) diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs index 7dd5ffa9b2ace..d654af8aaf0d5 100644 --- a/consensus/src/consensus_observer/observer/subscription_utils.rs +++ b/consensus/src/consensus_observer/observer/subscription_utils.rs @@ -358,20 +358,227 @@ fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool { #[cfg(test)] mod tests { use super::*; + use aptos_channels::{aptos_channel, message_queues::QueueStyle}; use aptos_config::{config::PeerRole, network_id::NetworkId}; use aptos_netcore::transport::ConnectionOrigin; use aptos_network::{ application::storage::PeersAndMetadata, - protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, + peer_manager::{ConnectionRequestSender, PeerManagerRequest, PeerManagerRequestSender}, + protocols::{ + network::{NetworkSender, NewNetworkSender}, + wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, + }, transport::{ConnectionId, ConnectionMetadata}, }; use aptos_peer_monitoring_service_types::{ response::NetworkInformationResponse, PeerMonitoringMetadata, }; - use aptos_types::{network_address::NetworkAddress, PeerId}; - use maplit::hashmap; + use aptos_storage_interface::Result; + use aptos_types::{network_address::NetworkAddress, transaction::Version, PeerId}; + use bytes::Bytes; + use futures::StreamExt; + use mockall::mock; use std::collections::HashSet; + // This is a simple mock of the DbReader (it generates a MockDatabaseReader) + mock! { + pub DatabaseReader {} + impl DbReader for DatabaseReader { + fn get_latest_ledger_info_version(&self) -> Result; + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_new_subscriptions() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + get_distance_from_validators(network_id), + None, + true, + ); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Spawn the subscription creation task to create 2 subscriptions + let num_subscriptions_to_create = 2; + let subscription_creation_handle = tokio::spawn(async move { + create_new_subscriptions( + consensus_observer_config, + consensus_observer_client.clone(), + None, + Arc::new(MockDatabaseReader::new()), + TimeService::mock(), + connected_peers_and_metadata, + num_subscriptions_to_create, + vec![], + vec![], + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task. + // The VFN peer should fail the subscription request. + for connected_peer in &connected_peers { + let network_id = connected_peer.network_id(); + handle_next_subscription_request( + network_id, + &mut peer_manager_request_receivers, + network_id != NetworkId::Vfn, // The VFN peer should fail the subscription request + ) + .await; + } + + // Wait for the subscription creation task to complete + let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap(); + + // Verify the number of created subscriptions + assert_eq!( + consensus_observer_subscriptions.len(), + num_subscriptions_to_create + ); + + // Verify the created subscription peers + let first_peer = *connected_peers.first().unwrap(); + let last_peer = *connected_peers.last().unwrap(); + let expected_subscription_peers = [first_peer, last_peer]; + for consensus_observer_subscription in consensus_observer_subscriptions { + let peer_network_id = consensus_observer_subscription.get_peer_network_id(); + assert!(expected_subscription_peers.contains(&peer_network_id)); + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_new_subscriptions_multiple() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + get_distance_from_validators(network_id), + None, + true, + ); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Create multiple sets of subscriptions and verify the results + for num_subscriptions_to_create in [0, 1, 2, 3, 10] { + // Determine the expected subscription peers + let expected_subscription_peers = connected_peers + .iter() + .take(num_subscriptions_to_create) + .cloned() + .collect(); + + // Create the subscriptions and verify the result + create_and_verify_subscriptions( + consensus_observer_config, + peers_and_metadata.clone(), + consensus_observer_client.clone(), + &mut peer_manager_request_receivers, + num_subscriptions_to_create, + expected_subscription_peers, + ) + .await; + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_single_subscription() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = + create_peer_and_connection(*network_id, peers_and_metadata.clone(), 0, None, true); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Spawn the subscription creation task + let sorted_potential_peers = connected_peers.clone(); + let subscription_creation_handle = tokio::spawn(async move { + create_single_subscription( + consensus_observer_config, + consensus_observer_client.clone(), + Arc::new(MockDatabaseReader::new()), + sorted_potential_peers, + TimeService::mock(), + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task. + // We should only respond successfully to the peer on the public network. + handle_next_subscription_request( + NetworkId::Validator, + &mut peer_manager_request_receivers, + false, + ) + .await; + handle_next_subscription_request( + NetworkId::Vfn, + &mut peer_manager_request_receivers, + false, + ) + .await; + handle_next_subscription_request( + NetworkId::Public, + &mut peer_manager_request_receivers, + true, + ) + .await; + + // Wait for the subscription creation task to complete + let (observer_subscription, failed_subscription_peers) = + subscription_creation_handle.await.unwrap(); + + // Verify that the public peer was successfully subscribed to + assert_eq!( + &observer_subscription.unwrap().get_peer_network_id(), + connected_peers.last().unwrap() + ); + + // Verify that the other peers failed our subscription attempts + let expected_failed_peers = connected_peers.iter().take(2).cloned().collect::>(); + assert_eq!(failed_subscription_peers, expected_failed_peers); + } + #[test] fn test_sort_peers_by_distance_and_latency() { // Sort an empty list of peers @@ -487,7 +694,7 @@ mod tests { async fn test_sort_peers_for_subscriptions() { // Create a consensus observer client let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; - let (peers_and_metadata, consensus_observer_client) = + let (peers_and_metadata, consensus_observer_client, _) = create_consensus_observer_client(network_ids); // Create a consensus publisher @@ -507,15 +714,10 @@ mod tests { // Add a connected validator peer, VFN peer and public peer for network_id in network_ids { - let distance_from_validators = match network_id { - NetworkId::Validator => 0, - NetworkId::Vfn => 1, - NetworkId::Public => 2, - }; create_peer_and_connection( *network_id, peers_and_metadata.clone(), - distance_from_validators, + get_distance_from_validators(network_id), None, true, ); @@ -609,6 +811,64 @@ mod tests { assert_eq!(sorted_peers, expected_peers); } + /// Creates new subscriptions and verifies the results + async fn create_and_verify_subscriptions( + consensus_observer_config: ConsensusObserverConfig, + peers_and_metadata: Arc, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + peer_manager_request_receivers: &mut HashMap< + NetworkId, + aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>, + >, + num_subscriptions_to_create: usize, + expected_subscription_peers: Vec, + ) { + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Spawn the subscription creation task + let subscription_creation_handle = tokio::spawn(async move { + create_new_subscriptions( + consensus_observer_config, + consensus_observer_client.clone(), + None, + Arc::new(MockDatabaseReader::new()), + TimeService::mock(), + connected_peers_and_metadata, + num_subscriptions_to_create, + vec![], + vec![], + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task + for expected_subscription_peer in &expected_subscription_peers { + handle_next_subscription_request( + expected_subscription_peer.network_id(), + peer_manager_request_receivers, + true, + ) + .await; + } + + // Wait for the subscription creation task to complete + let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap(); + + // Verify the created subscriptions + assert_eq!( + consensus_observer_subscriptions.len(), + expected_subscription_peers.len() + ); + for subscription in consensus_observer_subscriptions { + assert!(expected_subscription_peers.contains(&subscription.get_peer_network_id())); + } + } + /// Creates a new connection metadata for testing fn create_connection_metadata( peer_network_id: PeerNetworkId, @@ -636,19 +896,52 @@ mod tests { } } - /// Creates a new consensus observer client and a peers and metadata container + /// Creates a new consensus observer client, along with the + /// associated network senders and peers and metadata. fn create_consensus_observer_client( network_ids: &[NetworkId], ) -> ( Arc, Arc>>, + HashMap>, ) { + // Create the network senders and receivers for each network + let mut network_senders = HashMap::new(); + let mut peer_manager_request_receivers = HashMap::new(); + for network_id in network_ids { + // Create the request managers + let queue_cfg = aptos_channel::Config::new(10).queue_style(QueueStyle::FIFO); + let (peer_manager_request_sender, peer_manager_request_receiver) = queue_cfg.build(); + let (connected_request_sender, _) = queue_cfg.build(); + + // Create the network sender + let network_sender = NetworkSender::new( + PeerManagerRequestSender::new(peer_manager_request_sender), + ConnectionRequestSender::new(connected_request_sender), + ); + + // Save the network sender and the request receiver + network_senders.insert(*network_id, network_sender); + peer_manager_request_receivers.insert(*network_id, peer_manager_request_receiver); + } + + // Create the network client let peers_and_metadata = PeersAndMetadata::new(network_ids); - let network_client = - NetworkClient::new(vec![], vec![], hashmap![], peers_and_metadata.clone()); + let network_client = NetworkClient::new( + vec![ProtocolId::ConsensusObserver], + vec![ProtocolId::ConsensusObserverRpc], + network_senders, + peers_and_metadata.clone(), + ); + + // Create the consensus observer client let consensus_observer_client = Arc::new(ConsensusObserverClient::new(network_client)); - (peers_and_metadata, consensus_observer_client) + ( + peers_and_metadata, + consensus_observer_client, + peer_manager_request_receivers, + ) } /// Creates a new peer with the specified connection metadata @@ -754,6 +1047,76 @@ mod tests { peers_and_metadata } + /// Returns the distance from the validators for the specified network + fn get_distance_from_validators(network_id: &NetworkId) -> u64 { + match network_id { + NetworkId::Validator => 0, + NetworkId::Vfn => 1, + NetworkId::Public => 2, + } + } + + /// Fetches and handles the next subscription request from the peer manager + async fn handle_next_subscription_request( + network_id: NetworkId, + peer_manager_request_receivers: &mut HashMap< + NetworkId, + aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>, + >, + return_successfully: bool, + ) { + // Get the request receiver for the given network + let peer_manager_request_receiver = + peer_manager_request_receivers.get_mut(&network_id).unwrap(); + + // Wait for the next subscription request + match peer_manager_request_receiver.next().await { + Some(PeerManagerRequest::SendRpc(_, network_request)) => { + // Parse the network request + let data = network_request.data; + let response_sender = network_request.res_tx; + let message: ConsensusObserverMessage = bcs::from_bytes(data.as_ref()).unwrap(); + + // Process the network message + match message { + ConsensusObserverMessage::Request(request) => { + // Verify the request is for a new subscription + match request { + ConsensusObserverRequest::Subscribe => (), + _ => panic!( + "Unexpected consensus observer request received: {:?}!", + request + ), + } + + // Determine the response to send + let response = if return_successfully { + // Ack the subscription request + ConsensusObserverResponse::SubscribeAck + } else { + // Respond with the wrong message type + ConsensusObserverResponse::UnsubscribeAck + }; + let response_message = ConsensusObserverMessage::Response(response); + + // Send the response to the peer + let response_bytes = + bcs::to_bytes(&response_message).map(Bytes::from).unwrap(); + let _ = response_sender.send(Ok(response_bytes)); + }, + _ => panic!( + "Unexpected consensus observer message type received: {:?}!", + message + ), + } + }, + Some(PeerManagerRequest::SendDirectSend(_, _)) => { + panic!("Unexpected direct send message received!") + }, + None => panic!("No subscription request received!"), + } + } + /// Removes the peer and connection metadata for the given peer fn remove_peer_and_connection( peers_and_metadata: Arc, From f92c74969eae70e5e23dfef4b6f49db675849e64 Mon Sep 17 00:00:00 2001 From: Wolfgang Grieskamp Date: Tue, 17 Sep 2024 15:17:15 -0700 Subject: [PATCH 18/36] [move-vm] Fixes to enum type implementation (#14657) --- aptos-move/framework/src/module_metadata.rs | 22 +++-- .../move-binary-format/src/check_bounds.rs | 7 +- .../src/check_complexity.rs | 1 + .../src/proptest_types/types.rs | 17 ++-- .../src/unit_tests/mod.rs | 1 + .../src/unit_tests/variant_name_test.rs | 81 +++++++++++++++++++ .../src/check_duplication.rs | 67 ++++++++++++++- .../move/move-bytecode-verifier/src/limits.rs | 18 ++++- .../src/signature_v2.rs | 26 ++++-- .../move-bytecode-verifier/src/verifier.rs | 13 ++- 10 files changed, 228 insertions(+), 25 deletions(-) create mode 100644 third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs diff --git a/aptos-move/framework/src/module_metadata.rs b/aptos-move/framework/src/module_metadata.rs index e0dc1d36b4fa6..18a6178e23bc6 100644 --- a/aptos-move/framework/src/module_metadata.rs +++ b/aptos-move/framework/src/module_metadata.rs @@ -624,11 +624,23 @@ fn check_module_complexity(module: &CompiledModule) -> Result<(), MetaDataValida check_ident_complexity(module, &mut meter, handle.name)?; } for def in module.struct_defs() { - if let StructFieldInformation::Declared(fields) = &def.field_information { - for field in fields { - check_ident_complexity(module, &mut meter, field.name)?; - check_sigtok_complexity(module, &mut meter, &field.signature.0)? - } + match &def.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fields) => { + for field in fields { + check_ident_complexity(module, &mut meter, field.name)?; + check_sigtok_complexity(module, &mut meter, &field.signature.0)? + } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + check_ident_complexity(module, &mut meter, variant.name)?; + for field in &variant.fields { + check_ident_complexity(module, &mut meter, field.name)?; + check_sigtok_complexity(module, &mut meter, &field.signature.0)? + } + } + }, } } for def in module.function_defs() { diff --git a/third_party/move/move-binary-format/src/check_bounds.rs b/third_party/move/move-binary-format/src/check_bounds.rs index a3629a450af90..cee325ef32f87 100644 --- a/third_party/move/move-binary-format/src/check_bounds.rs +++ b/third_party/move/move-binary-format/src/check_bounds.rs @@ -385,8 +385,11 @@ impl<'a> BoundsChecker<'a> { } }, StructFieldInformation::DeclaredVariants(variants) => { - for field in variants.iter().flat_map(|v| v.fields.iter()) { - self.check_field_def(type_param_count, field)?; + for variant in variants { + check_bounds_impl(self.view.identifiers(), variant.name)?; + for field in &variant.fields { + self.check_field_def(type_param_count, field)?; + } } if variants.is_empty() { // Empty variants are not allowed diff --git a/third_party/move/move-binary-format/src/check_complexity.rs b/third_party/move/move-binary-format/src/check_complexity.rs index 79ccc6b48bfc3..232d530404cc9 100644 --- a/third_party/move/move-binary-format/src/check_complexity.rs +++ b/third_party/move/move-binary-format/src/check_complexity.rs @@ -244,6 +244,7 @@ impl<'a> BinaryComplexityMeter<'a> { }, StructFieldInformation::DeclaredVariants(variants) => { for variant in variants { + self.meter_identifier(variant.name)?; for field in &variant.fields { self.charge(field.signature.0.num_nodes() as u64)?; } diff --git a/third_party/move/move-binary-format/src/proptest_types/types.rs b/third_party/move/move-binary-format/src/proptest_types/types.rs index 03f5a4f7544c3..566d45809a735 100644 --- a/third_party/move/move-binary-format/src/proptest_types/types.rs +++ b/third_party/move/move-binary-format/src/proptest_types/types.rs @@ -230,15 +230,22 @@ impl StructDefinitionGen { for (i, fd) in fields.into_iter().enumerate() { variant_fields[i % self.variants.len()].push(fd) } + let mut seen_names = BTreeSet::new(); StructFieldInformation::DeclaredVariants( variant_fields .into_iter() .zip(self.variants.iter()) - .map(|(fields, name)| VariantDefinition { - name: IdentifierIndex( - name.index(state.identifiers_len) as TableIndex - ), - fields, + .filter_map(|(fields, name)| { + let variant_name = name.index(state.identifiers_len) as TableIndex; + // avoid duplicates + if seen_names.insert(variant_name) { + Some(VariantDefinition { + name: IdentifierIndex(variant_name), + fields, + }) + } else { + None + } }) .collect(), ) diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs index 8f0d3704aa5ae..0540045fb8b43 100644 --- a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs +++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs @@ -22,4 +22,5 @@ pub mod negative_stack_size_tests; pub mod reference_safety_tests; pub mod signature_tests; pub mod struct_defs_tests; +pub mod variant_name_test; pub mod vec_pack_tests; diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs new file mode 100644 index 0000000000000..fd936241cd1af --- /dev/null +++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs @@ -0,0 +1,81 @@ +// Copyright (c) The Move Contributors +// SPDX-License-Identifier: Apache-2.0 + +use move_binary_format::{ + file_format::{ + AbilitySet, AddressIdentifierIndex, FieldDefinition, IdentifierIndex, ModuleHandle, + ModuleHandleIndex, Signature, SignatureToken, StructDefinition, StructFieldInformation, + StructHandle, StructHandleIndex, StructTypeParameter, TypeSignature, VariantDefinition, + }, + file_format_common::VERSION_7, + CompiledModule, +}; +use move_bytecode_verifier::{ + verifier::verify_module_with_config_for_test_with_version, VerifierConfig, +}; +use move_core_types::{identifier::Identifier, vm_status::StatusCode}; + +/// Tests whether the name of a variant is in bounds. (That is, the IdentifierIndex +/// is in bounds of the identifier table.) +#[test] +fn test_variant_name() { + // This is a POC produced during auditing + let ty = SignatureToken::Bool; + + let cm = CompiledModule { + version: 7, + self_module_handle_idx: ModuleHandleIndex(0), + module_handles: vec![ModuleHandle { + address: AddressIdentifierIndex(0), + name: IdentifierIndex(0), + }], + struct_handles: vec![StructHandle { + module: ModuleHandleIndex(0), + name: IdentifierIndex(0), + abilities: AbilitySet::ALL, + type_parameters: vec![StructTypeParameter { + constraints: AbilitySet::EMPTY, + is_phantom: true, + }], + }], + function_handles: vec![], + field_handles: vec![], + friend_decls: vec![], + struct_def_instantiations: vec![], + function_instantiations: vec![], + field_instantiations: vec![], + signatures: vec![Signature(vec![]), Signature(vec![ty])], + identifiers: vec![Identifier::new("M").unwrap()], + address_identifiers: vec![], + constant_pool: vec![], + metadata: vec![], + struct_defs: vec![StructDefinition { + struct_handle: StructHandleIndex(0), + field_information: StructFieldInformation::DeclaredVariants(vec![VariantDefinition { + fields: vec![FieldDefinition { + name: IdentifierIndex(0), + signature: TypeSignature(SignatureToken::Bool), + }], + // <---- out of bound + name: IdentifierIndex(1), + }]), + }], + function_defs: vec![], + struct_variant_handles: vec![], + struct_variant_instantiations: vec![], + variant_field_handles: vec![], + variant_field_instantiations: vec![], + }; + + let result = verify_module_with_config_for_test_with_version( + "test_variant_name", + &VerifierConfig::production(), + &cm, + Some(VERSION_7), + ); + + assert_eq!( + result.unwrap_err().major_status(), + StatusCode::INDEX_OUT_OF_BOUNDS, + ); +} diff --git a/third_party/move/move-bytecode-verifier/src/check_duplication.rs b/third_party/move/move-bytecode-verifier/src/check_duplication.rs index e79f279dc021d..ad6d317c7b579 100644 --- a/third_party/move/move-bytecode-verifier/src/check_duplication.rs +++ b/third_party/move/move-bytecode-verifier/src/check_duplication.rs @@ -15,7 +15,7 @@ use move_binary_format::{ file_format::{ CompiledModule, CompiledScript, Constant, FieldDefinition, FunctionHandle, FunctionHandleIndex, FunctionInstantiation, ModuleHandle, Signature, - StructFieldInformation, StructHandle, StructHandleIndex, TableIndex, + StructFieldInformation, StructHandle, StructHandleIndex, TableIndex, VariantDefinition, }, IndexKind, }; @@ -52,6 +52,10 @@ impl<'a> DuplicationChecker<'a> { let checker = Self { module }; checker.check_field_handles()?; checker.check_field_instantiations()?; + checker.check_variant_field_handles()?; + checker.check_variant_field_instantiations()?; + checker.check_struct_variant_handles()?; + checker.check_struct_variant_instantiations()?; checker.check_function_definitions()?; checker.check_struct_definitions()?; checker.check_struct_instantiations() @@ -201,6 +205,50 @@ impl<'a> DuplicationChecker<'a> { Ok(()) } + fn check_variant_field_handles(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.variant_field_handles()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantFieldHandle, + idx, + )), + None => Ok(()), + } + } + + fn check_variant_field_instantiations(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.variant_field_instantiations()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantFieldInstantiation, + idx, + )), + None => Ok(()), + } + } + + fn check_struct_variant_handles(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.struct_variant_handles()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::StructVariantHandle, + idx, + )), + None => Ok(()), + } + } + + fn check_struct_variant_instantiations(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.struct_variant_instantiations()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::StructVariantInstantiation, + idx, + )), + None => Ok(()), + } + } + fn check_struct_definitions(&self) -> PartialVMResult<()> { // StructDefinition - contained StructHandle defines uniqueness if let Some(idx) = @@ -212,7 +260,7 @@ impl<'a> DuplicationChecker<'a> { idx, )); } - // Field names in structs must be unique + // Field names in variants and structs must be unique for (struct_idx, struct_def) in self.module.struct_defs().iter().enumerate() { match &struct_def.field_information { StructFieldInformation::Native => continue, @@ -227,6 +275,7 @@ impl<'a> DuplicationChecker<'a> { Self::check_duplicate_fields(fields.iter())? }, StructFieldInformation::DeclaredVariants(variants) => { + Self::check_duplicate_variants(variants.iter())?; for variant in variants { Self::check_duplicate_fields(variant.fields.iter())? } @@ -278,6 +327,20 @@ impl<'a> DuplicationChecker<'a> { } } + fn check_duplicate_variants<'l>( + variants: impl Iterator, + ) -> PartialVMResult<()> { + if let Some(idx) = Self::first_duplicate_element(variants.map(|x| x.name)) { + Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantDefinition, + idx, + )) + } else { + Ok(()) + } + } + fn check_function_definitions(&self) -> PartialVMResult<()> { // FunctionDefinition - contained FunctionHandle defines uniqueness if let Some(idx) = diff --git a/third_party/move/move-bytecode-verifier/src/limits.rs b/third_party/move/move-bytecode-verifier/src/limits.rs index 8d95b0b55aa13..1fcb2436be6f2 100644 --- a/third_party/move/move-bytecode-verifier/src/limits.rs +++ b/third_party/move/move-bytecode-verifier/src/limits.rs @@ -97,10 +97,20 @@ impl<'a> LimitsVerifier<'a> { } if let Some(sdefs) = self.resolver.struct_defs() { for sdef in sdefs { - if let StructFieldInformation::Declared(fdefs) = &sdef.field_information { - for fdef in fdefs { - self.verify_type_node(config, &fdef.signature.0)? - } + match &sdef.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fdefs) => { + for fdef in fdefs { + self.verify_type_node(config, &fdef.signature.0)? + } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + for fdef in &variant.fields { + self.verify_type_node(config, &fdef.signature.0)? + } + } + }, } } } diff --git a/third_party/move/move-bytecode-verifier/src/signature_v2.rs b/third_party/move/move-bytecode-verifier/src/signature_v2.rs index e618353a0725f..77388dec740ad 100644 --- a/third_party/move/move-bytecode-verifier/src/signature_v2.rs +++ b/third_party/move/move-bytecode-verifier/src/signature_v2.rs @@ -1151,14 +1151,28 @@ fn max_num_of_ty_params_or_args(resolver: BinaryIndexedView) -> usize { if let Some(struct_defs) = resolver.struct_defs() { for struct_def in struct_defs { - if let StructFieldInformation::Declared(fields) = &struct_def.field_information { - for field in fields { - for ty in field.signature.0.preorder_traversal() { - if let SignatureToken::TypeParameter(ty_param_idx) = ty { - n = n.max(*ty_param_idx as usize + 1) + match &struct_def.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fields) => { + for field in fields { + for ty in field.signature.0.preorder_traversal() { + if let SignatureToken::TypeParameter(ty_param_idx) = ty { + n = n.max(*ty_param_idx as usize + 1) + } } } - } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + for field in &variant.fields { + for ty in field.signature.0.preorder_traversal() { + if let SignatureToken::TypeParameter(ty_param_idx) = ty { + n = n.max(*ty_param_idx as usize + 1) + } + } + } + } + }, } } } diff --git a/third_party/move/move-bytecode-verifier/src/verifier.rs b/third_party/move/move-bytecode-verifier/src/verifier.rs index 9783d8f33e9e3..506560dacc4cf 100644 --- a/third_party/move/move-bytecode-verifier/src/verifier.rs +++ b/third_party/move/move-bytecode-verifier/src/verifier.rs @@ -63,10 +63,21 @@ pub fn verify_module_with_config_for_test( name: &str, config: &VerifierConfig, module: &CompiledModule, +) -> VMResult<()> { + verify_module_with_config_for_test_with_version(name, config, module, None) +} + +pub fn verify_module_with_config_for_test_with_version( + name: &str, + config: &VerifierConfig, + module: &CompiledModule, + bytecode_version: Option, ) -> VMResult<()> { const MAX_MODULE_SIZE: usize = 65355; let mut bytes = vec![]; - module.serialize(&mut bytes).unwrap(); + module + .serialize_for_version(bytecode_version, &mut bytes) + .unwrap(); let now = Instant::now(); let result = verify_module_with_config(config, module); eprintln!( From a0193e637dd6862662f3535b59f54171a88e6d59 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 17 Sep 2024 15:48:43 -0700 Subject: [PATCH 19/36] Sync up QC in order vote message (#14637) --- .../src/wrapped_ledger_info.rs | 4 + consensus/src/counters.rs | 4 +- consensus/src/pending_order_votes.rs | 94 +++++++---- consensus/src/round_manager.rs | 156 +++++++++++------- 4 files changed, 170 insertions(+), 88 deletions(-) diff --git a/consensus/consensus-types/src/wrapped_ledger_info.rs b/consensus/consensus-types/src/wrapped_ledger_info.rs index 6125f85ca2c94..ee254af17304b 100644 --- a/consensus/consensus-types/src/wrapped_ledger_info.rs +++ b/consensus/consensus-types/src/wrapped_ledger_info.rs @@ -77,6 +77,10 @@ impl WrappedLedgerInfo { &self.signed_ledger_info } + pub fn epoch(&self) -> u64 { + self.ledger_info().ledger_info().epoch() + } + pub fn commit_info(&self) -> &BlockInfo { self.ledger_info().ledger_info().commit_info() } diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 214506e6f92bc..1af6f4f8c6da1 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -662,9 +662,9 @@ pub static ORDER_VOTE_ADDED: Lazy = Lazy::new(|| { .unwrap() }); -pub static ORDER_VOTE_VERY_OLD: Lazy = Lazy::new(|| { +pub static ORDER_VOTE_NOT_IN_RANGE: Lazy = Lazy::new(|| { register_int_counter!( - "aptos_consensus_order_vote_very_old", + "aptos_consensus_order_vote_not_in_range", "Count of the number of order votes that are very old" ) .unwrap() diff --git a/consensus/src/pending_order_votes.rs b/consensus/src/pending_order_votes.rs index 7420b565ce3c1..94b1ba6d15451 100644 --- a/consensus/src/pending_order_votes.rs +++ b/consensus/src/pending_order_votes.rs @@ -2,7 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use aptos_consensus_types::{common::Author, order_vote::OrderVote}; +use aptos_consensus_types::{common::Author, order_vote::OrderVote, quorum_cert::QuorumCert}; use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_logger::prelude::*; use aptos_types::{ @@ -10,7 +10,7 @@ use aptos_types::{ ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, validator_verifier::{ValidatorVerifier, VerifyError}, }; -use std::collections::HashMap; +use std::{collections::HashMap, sync::Arc}; /// Result of the order vote processing. The failure case (Verification error) is returned /// as the Error part of the result. @@ -20,7 +20,8 @@ pub enum OrderVoteReceptionResult { /// QC currently has. VoteAdded(u128), /// This block has just been certified after adding the vote. - NewLedgerInfoWithSignatures(LedgerInfoWithSignatures), + /// Returns the created order certificate and the QC on which the order certificate is based. + NewLedgerInfoWithSignatures((Arc, LedgerInfoWithSignatures)), /// There might be some issues adding a vote ErrorAddingVote(VerifyError), /// Error happens when aggregating signature @@ -39,7 +40,9 @@ enum OrderVoteStatus { pub struct PendingOrderVotes { /// Maps LedgerInfo digest to associated signatures (contained in a partial LedgerInfoWithSignatures). /// Order vote status stores caches the information on whether the votes are enough to form a QC. - li_digest_to_votes: HashMap, + /// We also store the QC that the order votes certify. + li_digest_to_votes: + HashMap, } impl PendingOrderVotes { @@ -50,29 +53,42 @@ impl PendingOrderVotes { } } + pub fn exists(&self, li_digest: &HashValue) -> bool { + self.li_digest_to_votes.contains_key(li_digest) + } + /// Add a vote to the pending votes // TODO: Should we add any counters here? pub fn insert_order_vote( &mut self, order_vote: &OrderVote, validator_verifier: &ValidatorVerifier, + verified_quorum_cert: Option, ) -> OrderVoteReceptionResult { // derive data from order vote let li_digest = order_vote.ledger_info().hash(); // obtain the ledger info with signatures associated to the order vote's ledger info - let status = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| { + let (quorum_cert, status) = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| { // if the ledger info with signatures doesn't exist yet, create it - OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new( - order_vote.ledger_info().clone(), - PartialSignatures::empty(), - )) + ( + verified_quorum_cert.expect( + "Quorum Cert is expected when creating a new entry in pending order votes", + ), + OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new( + order_vote.ledger_info().clone(), + PartialSignatures::empty(), + )), + ) }); match status { OrderVoteStatus::EnoughVotes(li_with_sig) => { // we already have enough votes for this ledger info - OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig.clone()) + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + Arc::new(quorum_cert.clone()), + li_with_sig.clone(), + )) }, OrderVoteStatus::NotEnoughVotes(li_with_sig) => { // we don't have enough votes for this ledger info yet @@ -107,9 +123,10 @@ impl PendingOrderVotes { Ok(ledger_info_with_sig) => { *status = OrderVoteStatus::EnoughVotes(ledger_info_with_sig.clone()); - OrderVoteReceptionResult::NewLedgerInfoWithSignatures( + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + Arc::new(quorum_cert.clone()), ledger_info_with_sig, - ) + )) }, Err(e) => OrderVoteReceptionResult::ErrorAggregatingSignature(e), } @@ -135,19 +152,21 @@ impl PendingOrderVotes { // Removes votes older than highest_ordered_round pub fn garbage_collect(&mut self, highest_ordered_round: u64) { - self.li_digest_to_votes.retain(|_, status| match status { - OrderVoteStatus::EnoughVotes(li_with_sig) => { - li_with_sig.ledger_info().round() > highest_ordered_round - }, - OrderVoteStatus::NotEnoughVotes(li_with_sig) => { - li_with_sig.ledger_info().round() > highest_ordered_round - }, - }); + self.li_digest_to_votes + .retain(|_, (_, status)| match status { + OrderVoteStatus::EnoughVotes(li_with_sig) => { + li_with_sig.ledger_info().round() > highest_ordered_round + }, + OrderVoteStatus::NotEnoughVotes(li_with_sig) => { + li_with_sig.ledger_info().round() > highest_ordered_round + }, + }); } pub fn has_enough_order_votes(&self, ledger_info: &LedgerInfo) -> bool { let li_digest = ledger_info.hash(); - if let Some(OrderVoteStatus::EnoughVotes(_)) = self.li_digest_to_votes.get(&li_digest) { + if let Some((_, OrderVoteStatus::EnoughVotes(_))) = self.li_digest_to_votes.get(&li_digest) + { return true; } false @@ -157,7 +176,7 @@ impl PendingOrderVotes { #[cfg(test)] mod tests { use super::{OrderVoteReceptionResult, PendingOrderVotes}; - use aptos_consensus_types::order_vote::OrderVote; + use aptos_consensus_types::{order_vote::OrderVote, quorum_cert::QuorumCert}; use aptos_crypto::HashValue; use aptos_types::{ block_info::BlockInfo, ledger_info::LedgerInfo, @@ -182,6 +201,7 @@ mod tests { // create random vote from validator[0] let li1 = random_ledger_info(); + let qc = QuorumCert::dummy(); let order_vote_1_author_0 = OrderVote::new_with_signature( signers[0].author(), li1.clone(), @@ -190,13 +210,21 @@ mod tests { // first time a new order vote is added -> OrderVoteAdded assert_eq!( - pending_order_votes.insert_order_vote(&order_vote_1_author_0, &validator), - OrderVoteReceptionResult::VoteAdded(1) + pending_order_votes.insert_order_vote( + &order_vote_1_author_0, + &validator, + Some(qc.clone()) + ), + OrderVoteReceptionResult::VoteAdded(1), ); // same author voting for the same thing -> OrderVoteAdded assert_eq!( - pending_order_votes.insert_order_vote(&order_vote_1_author_0, &validator), + pending_order_votes.insert_order_vote( + &order_vote_1_author_0, + &validator, + Some(qc.clone()) + ), OrderVoteReceptionResult::VoteAdded(1) ); @@ -208,8 +236,12 @@ mod tests { signers[1].sign(&li2).expect("Unable to sign ledger info"), ); assert_eq!( - pending_order_votes.insert_order_vote(&order_vote_2_author_1, &validator), - OrderVoteReceptionResult::VoteAdded(1) + pending_order_votes.insert_order_vote( + &order_vote_2_author_1, + &validator, + Some(qc.clone()) + ), + OrderVoteReceptionResult::VoteAdded(1), ); assert!(!pending_order_votes.has_enough_order_votes(&li1)); @@ -220,8 +252,12 @@ mod tests { li2.clone(), signers[2].sign(&li2).expect("Unable to sign ledger info"), ); - match pending_order_votes.insert_order_vote(&order_vote_2_author_2, &validator) { - OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig) => { + match pending_order_votes.insert_order_vote( + &order_vote_2_author_2, + &validator, + Some(qc.clone()), + ) { + OrderVoteReceptionResult::NewLedgerInfoWithSignatures((_, li_with_sig)) => { assert!(li_with_sig.check_voting_power(&validator).is_ok()); }, _ => { diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index 27382493cc2eb..bd7be5172d775 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -9,8 +9,8 @@ use crate::{ }, counters::{ self, ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE, ORDER_VOTE_ADDED, - ORDER_VOTE_BROADCASTED, ORDER_VOTE_OTHER_ERRORS, ORDER_VOTE_VERY_OLD, PROPOSAL_VOTE_ADDED, - PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT, + ORDER_VOTE_BROADCASTED, ORDER_VOTE_NOT_IN_RANGE, ORDER_VOTE_OTHER_ERRORS, + PROPOSAL_VOTE_ADDED, PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT, QC_AGGREGATED_FROM_VOTES, SYNC_INFO_RECEIVED_WITH_NEWER_CERT, }, error::{error_kind, VerifyError}, @@ -51,7 +51,7 @@ use aptos_consensus_types::{ vote_msg::VoteMsg, wrapped_ledger_info::WrappedLedgerInfo, }; -use aptos_crypto::HashValue; +use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_infallible::{checked, Mutex}; use aptos_logger::prelude::*; #[cfg(test)] @@ -1090,8 +1090,6 @@ impl RoundManager { }); let order_vote = order_vote_msg.order_vote(); - self.new_qc_from_order_vote_msg(&order_vote_msg).await?; - debug!( self.new_log(LogEvent::ReceiveOrderVote) .remote_peer(order_vote.author()), @@ -1107,26 +1105,53 @@ impl RoundManager { return Ok(()); } - if order_vote_msg.order_vote().ledger_info().round() - > self.block_store.sync_info().highest_ordered_round() + let highest_ordered_round = self.block_store.sync_info().highest_ordered_round(); + let order_vote_round = order_vote_msg.order_vote().ledger_info().round(); + let li_digest = order_vote_msg.order_vote().ledger_info().hash(); + if order_vote_round > highest_ordered_round + && order_vote_round < highest_ordered_round + 100 { - let vote_reception_result = self - .pending_order_votes - .insert_order_vote(order_vote_msg.order_vote(), &self.epoch_state.verifier); - self.process_order_vote_reception_result(vote_reception_result) - .await?; + // If it is the first order vote received for the block, verify the QC and insert along with QC. + // For the subsequent order votes for the same block, we don't have to verify the QC. Just inserting the + // order vote is enough. + let vote_reception_result = if !self.pending_order_votes.exists(&li_digest) { + let start = Instant::now(); + order_vote_msg + .quorum_cert() + .verify(&self.epoch_state().verifier) + .context("[OrderVoteMsg QuorumCert verification failed")?; + counters::VERIFY_MSG + .with_label_values(&["order_vote_qc"]) + .observe(start.elapsed().as_secs_f64()); + self.pending_order_votes.insert_order_vote( + order_vote_msg.order_vote(), + &self.epoch_state.verifier, + Some(order_vote_msg.quorum_cert().clone()), + ) + } else { + self.pending_order_votes.insert_order_vote( + order_vote_msg.order_vote(), + &self.epoch_state.verifier, + None, + ) + }; + self.process_order_vote_reception_result( + vote_reception_result, + order_vote_msg.order_vote().author(), + ) + .await?; } else { - ORDER_VOTE_VERY_OLD.inc(); + ORDER_VOTE_NOT_IN_RANGE.inc(); sample!( - SampleRate::Duration(Duration::from_secs(30)), + SampleRate::Duration(Duration::from_secs(1)), info!( - "[sampled] Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}", + "[sampled] Received an order vote not in the 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}", order_vote_msg.order_vote().ledger_info().round(), self.block_store.sync_info().highest_ordered_round() ) ); debug!( - "Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}", + "Received an order vote not in the next 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}", order_vote_msg.order_vote().ledger_info().round(), self.block_store.sync_info().highest_ordered_round() ) @@ -1315,13 +1340,18 @@ impl RoundManager { async fn process_order_vote_reception_result( &mut self, result: OrderVoteReceptionResult, + preferred_peer: Author, ) -> anyhow::Result<()> { match result { - OrderVoteReceptionResult::NewLedgerInfoWithSignatures(ledger_info_with_signatures) => { - self.new_ordered_cert(WrappedLedgerInfo::new( - VoteData::dummy(), - ledger_info_with_signatures, - )) + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + verified_qc, + ledger_info_with_signatures, + )) => { + self.new_ordered_cert( + WrappedLedgerInfo::new(VoteData::dummy(), ledger_info_with_signatures), + verified_qc, + preferred_peer, + ) .await }, OrderVoteReceptionResult::VoteAdded(_) => { @@ -1351,49 +1381,61 @@ impl RoundManager { async fn new_qc_from_order_vote_msg( &mut self, - order_vote_msg: &OrderVoteMsg, + verified_qc: Arc, + preferred_peer: Author, ) -> anyhow::Result<()> { - if let NeedFetchResult::QCAlreadyExist = self + match self .block_store - .need_fetch_for_quorum_cert(order_vote_msg.quorum_cert()) + .need_fetch_for_quorum_cert(verified_qc.as_ref()) { - return Ok(()); + NeedFetchResult::QCAlreadyExist => Ok(()), + NeedFetchResult::QCBlockExist => { + // If the block is already in the block store, but QC isn't available in the block store, insert QC. + let result = self + .block_store + .insert_quorum_cert( + verified_qc.as_ref(), + &mut self.create_block_retriever(preferred_peer), + ) + .await + .context("[RoundManager] Failed to process the QC from order vote msg"); + self.process_certificates().await?; + result + }, + NeedFetchResult::NeedFetch => { + // If the block doesn't exist, we could ideally do sync up based on the qc. + // But this could trigger fetching a lot of past blocks in case the node is lagging behind. + // So, we just log a warning here to avoid a long sequence of block fetchs. + // One of the subsequence syncinfo messages will trigger the block fetch or state sync if required. + ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc(); + sample!( + SampleRate::Duration(Duration::from_millis(200)), + info!( + "Ordered certificate created without block in block store: {:?}", + verified_qc.certified_block() + ); + ); + Err(anyhow::anyhow!( + "Ordered certificate created without block in block store" + )) + }, + NeedFetchResult::QCRoundBeforeRoot => { + Err(anyhow::anyhow!("Ordered certificate is old")) + }, } - - let start = Instant::now(); - order_vote_msg - .quorum_cert() - .verify(&self.epoch_state().verifier) - .context("[OrderVoteMsg QuorumCert verification failed")?; - counters::VERIFY_MSG - .with_label_values(&["order_vote_qc"]) - .observe(start.elapsed().as_secs_f64()); - - let result = self - .block_store - .insert_quorum_cert( - order_vote_msg.quorum_cert(), - &mut self.create_block_retriever(order_vote_msg.order_vote().author()), - ) - .await - .context("[RoundManager] Failed to process the QC from order vote msg"); - self.process_certificates().await?; - result } // Insert ordered certificate formed by aggregating order votes - async fn new_ordered_cert(&mut self, ordered_cert: WrappedLedgerInfo) -> anyhow::Result<()> { - if self - .block_store - .get_block(ordered_cert.commit_info().id()) - .is_none() - { - ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc(); - error!( - "Ordered certificate created without block in block store: {:?}", - ordered_cert - ); - } + async fn new_ordered_cert( + &mut self, + ordered_cert: WrappedLedgerInfo, + verified_qc: Arc, + preferred_peer: Author, + ) -> anyhow::Result<()> { + self.new_qc_from_order_vote_msg(verified_qc, preferred_peer) + .await?; + + // If the block and qc now exist in the quorum store, insert the ordered cert let result = self .block_store .insert_ordered_cert(&ordered_cert) From 3f920cba65f5146f68a75eb952dce848bad83222 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Fri, 6 Sep 2024 19:42:43 -0400 Subject: [PATCH 20/36] [Consensus Observer] Downgrade unnecessary error logs. --- consensus/src/consensus_observer/observer/active_state.rs | 2 +- consensus/src/network.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/src/consensus_observer/observer/active_state.rs b/consensus/src/consensus_observer/observer/active_state.rs index fb5482bba3306..f162fab553e15 100644 --- a/consensus/src/consensus_observer/observer/active_state.rs +++ b/consensus/src/consensus_observer/observer/active_state.rs @@ -243,7 +243,7 @@ async fn extract_on_chain_configs( let onchain_randomness_config_seq_num: anyhow::Result = on_chain_configs.get(); if let Err(error) = &onchain_randomness_config_seq_num { - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to read on-chain randomness config seq num! Error: {:?}", error diff --git a/consensus/src/network.rs b/consensus/src/network.rs index 698e089638513..517c01fce472c 100644 --- a/consensus/src/network.rs +++ b/consensus/src/network.rs @@ -346,7 +346,7 @@ impl NetworkSender { if self.author == peer { let self_msg = Event::Message(self.author, msg.clone()); if let Err(err) = self_sender.send(self_msg).await { - error!(error = ?err, "Error delivering a self msg"); + warn!(error = ?err, "Error delivering a self msg"); } continue; } From ecfa13033b4e674bfdeafc39ade89c498ed683e7 Mon Sep 17 00:00:00 2001 From: Wolfgang Grieskamp Date: Tue, 17 Sep 2024 16:31:41 -0700 Subject: [PATCH 21/36] [compiler-v2] Disallow empty enum types (#14658) Previously, those where compiled to empty structs which in turn lead to ZERO_STRUCT_SIZE bytecode verifier errors. --- .../checking/variants/variants_empty.exp | 7 ++++++ .../checking/variants/variants_empty.move | 3 +++ .../move-model/src/builder/module_builder.rs | 23 ++++++++++++------- 3 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp create mode 100644 third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp new file mode 100644 index 0000000000000..4ebc0cefaba81 --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp @@ -0,0 +1,7 @@ + +Diagnostics: +error: enum type `T` must have at least one variant. + ┌─ tests/checking/variants/variants_empty.move:2:5 + │ +2 │ enum T{} + │ ^^^^^^^^ diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move new file mode 100644 index 0000000000000..ceecab879f3de --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move @@ -0,0 +1,3 @@ +module 0x42::variants_empty { + enum T{} +} diff --git a/third_party/move/move-model/src/builder/module_builder.rs b/third_party/move/move-model/src/builder/module_builder.rs index c0ebb45a8c29a..7cf73ae32f664 100644 --- a/third_party/move/move-model/src/builder/module_builder.rs +++ b/third_party/move/move-model/src/builder/module_builder.rs @@ -1257,6 +1257,15 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { } }) .collect_vec(); + if variant_maps.is_empty() { + self.parent.error( + &self.parent.to_loc(&def.loc), + &format!( + "enum type `{}` must have at least one variant.", + qsym.symbol.display(self.parent.env.symbol_pool()) + ), + ) + } (StructLayout::Variants(variant_maps), false) }, EA::StructLayout::Native(_) => (StructLayout::None, false), @@ -3480,9 +3489,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { let spec = self.struct_specs.remove(&name.symbol).unwrap_or_default(); let mut field_data: BTreeMap = BTreeMap::new(); let mut variants: BTreeMap = BTreeMap::new(); - match &entry.layout { + let is_enum = match &entry.layout { StructLayout::Singleton(fields, _) => { field_data.extend(fields.values().map(|f| (FieldId::new(f.name), f.clone()))); + false }, StructLayout::Variants(entry_variants) => { for (order, variant) in entry_variants.iter().enumerate() { @@ -3501,9 +3511,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { field_data.insert(field_id, field); } } + true }, - StructLayout::None => {}, - } + StructLayout::None => false, + }; let data = StructData { name: name.symbol, loc: entry.loc.clone(), @@ -3513,11 +3524,7 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { abilities: entry.abilities, spec_var_opt: None, field_data, - variants: if variants.is_empty() { - None - } else { - Some(variants) - }, + variants: if is_enum { Some(variants) } else { None }, spec: RefCell::new(spec), is_native: entry.is_native, }; From 3b73588de37a99fc622d311310fad7729e33b884 Mon Sep 17 00:00:00 2001 From: Guoteng Rao <3603304+grao1991@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:26:08 -0700 Subject: [PATCH 22/36] Remove an unnecessary clone. (#14659) --- third_party/move/tools/move-resource-viewer/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/move/tools/move-resource-viewer/src/lib.rs b/third_party/move/tools/move-resource-viewer/src/lib.rs index 6691bed783f3f..54ffa7f3c65ab 100644 --- a/third_party/move/tools/move-resource-viewer/src/lib.rs +++ b/third_party/move/tools/move-resource-viewer/src/lib.rs @@ -476,8 +476,8 @@ impl MoveValueAnnotator { values .iter() .zip(tys) - .zip(field_names.iter()) - .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n.clone(), v))) + .zip(field_names) + .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n, v))) .collect::>>() }; From e74444909d848e5511132e06fd0ea2cf20bcea50 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 17 Sep 2024 18:29:19 -0700 Subject: [PATCH 23/36] Addressing PR comments --- types/Cargo.toml | 1 + types/src/ledger_info.rs | 43 ++++++++++++------------------ types/src/validator_verifier.rs | 46 +++++++++++++++++---------------- 3 files changed, 42 insertions(+), 48 deletions(-) diff --git a/types/Cargo.toml b/types/Cargo.toml index d52bec17b259b..e4b52e7e8521c 100644 --- a/types/Cargo.toml +++ b/types/Cargo.toml @@ -29,6 +29,7 @@ base64 = { workspace = true } bcs = { workspace = true } bytes = { workspace = true } dashmap = { workspace = true } +derivative = { workspace = true } fixed = { workspace = true } fxhash = { workspace = true } hashbrown = { workspace = true } diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index d704defe627ad..0a8fc80e35962 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -463,13 +463,11 @@ impl LedgerInfoWithMixedSignatures { } // Collecting all the authors from verified signatures, unverified signatures and the aggregated signature. - pub fn all_voters(&self) -> Vec { + pub fn all_voters(&self) -> impl Iterator { self.verified_signatures .signatures() .keys() .chain(self.unverified_signatures.signatures().keys()) - .cloned() - .collect() } pub fn check_voting_power( @@ -478,10 +476,7 @@ impl LedgerInfoWithMixedSignatures { check_super_majority: bool, ) -> std::result::Result { let all_voters = self.all_voters(); - verifier.check_voting_power( - all_voters.iter().collect_vec().into_iter(), - check_super_majority, - ) + verifier.check_voting_power(all_voters, check_super_majority) } // Aggregates all the signatures, verifies the aggregate signature, and returns the aggregate signature. @@ -500,7 +495,6 @@ impl LedgerInfoWithMixedSignatures { match epoch_state .verifier - .clone() .verify_multi_signatures(self.ledger_info(), &aggregated_sig) { Ok(_) => { @@ -515,7 +509,7 @@ impl LedgerInfoWithMixedSignatures { )) }, Err(_) => { - // Question: Should we assign min tasks per thread here for into_par_iter()? + // Question: How to add counters to keep track of the total time spent in the parallel threads? let verified = self .unverified_signatures .signatures() @@ -536,17 +530,14 @@ impl LedgerInfoWithMixedSignatures { .add_signature(account_address, signature.clone()); self.unverified_signatures.remove_signature(account_address); } - let malicious_authors = self - .unverified_signatures - .signatures() - .keys() - .cloned() - .collect(); - self.unverified_signatures = PartialSignatures::empty(); + // For these authors, we will not use optimistic signature verification in the future. + let pessimistic_authors = self.unverified_signatures.signatures().keys().cloned(); epoch_state .verifier - .add_malicious_authors(malicious_authors); + .add_pessimistic_verify_set(pessimistic_authors); + + self.unverified_signatures = PartialSignatures::empty(); match self.check_voting_power(&epoch_state.verifier, true) { Ok(_) => Ok(LedgerInfoWithSignatures::new( @@ -736,7 +727,7 @@ mod tests { validator_signers[3].sign(&ledger_info).unwrap(), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -765,7 +756,7 @@ mod tests { VerificationStatus::Unverified, ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -807,8 +798,8 @@ mod tests { .len(), 4 ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 1); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1); ledger_info_with_mixed_signatures.add_signature( validator_signers[5].author(), @@ -820,7 +811,7 @@ mod tests { validator_signers[5].sign(&ledger_info).unwrap(), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -867,7 +858,7 @@ mod tests { .len(), 5 ); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 1); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1); ledger_info_with_mixed_signatures.add_signature( validator_signers[6].author(), @@ -875,7 +866,7 @@ mod tests { VerificationStatus::Unverified, ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 6); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 6); assert_eq!( ledger_info_with_mixed_signatures .check_voting_power(&validator_verifier, true) @@ -902,7 +893,7 @@ mod tests { .len(), 5 ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 2); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 2); } } diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs index 6ca856ae7b61f..26005cf374a5a 100644 --- a/types/src/validator_verifier.rs +++ b/types/src/validator_verifier.rs @@ -18,6 +18,7 @@ use aptos_crypto::{ Signature, VerifyingKey, }; use dashmap::DashSet; +use derivative::Derivative; use itertools::Itertools; #[cfg(any(test, feature = "fuzzing"))] use proptest_derive::Arbitrary; @@ -130,7 +131,8 @@ impl TryFrom for ValidatorConsensusInfo { /// Supports validation of signatures for known authors with individual voting powers. This struct /// can be used for all signature verification operations including block and network signature /// verification, respectively. -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, Derivative, Serialize)] +#[derivative(PartialEq, Eq)] pub struct ValidatorVerifier { /// A vector of each validator's on-chain account address to its pubkeys and voting power. validator_infos: Vec, @@ -149,20 +151,21 @@ pub struct ValidatorVerifier { /// submitted bad votes that has resulted in having to verify each vote individually. Further votes by these validators /// will be verified individually bypassing the optimization. #[serde(skip)] - malicious_authors: Arc>, + #[derivative(PartialEq = "ignore")] + pessimistic_verify_set: Arc>, } -// Implement Eq and PartialEq for ValidatorVerifier. Skip malicious_authors field in the comparison. -impl PartialEq for ValidatorVerifier { - fn eq(&self, other: &Self) -> bool { - self.validator_infos == other.validator_infos - && self.quorum_voting_power == other.quorum_voting_power - && self.total_voting_power == other.total_voting_power - && self.address_to_validator_index == other.address_to_validator_index - } -} +// // Implement Eq and PartialEq for ValidatorVerifier. Skip pessimistic_verify_set field in the comparison. +// impl PartialEq for ValidatorVerifier { +// fn eq(&self, other: &Self) -> bool { +// self.validator_infos == other.validator_infos +// && self.quorum_voting_power == other.quorum_voting_power +// && self.total_voting_power == other.total_voting_power +// && self.address_to_validator_index == other.address_to_validator_index +// } +// } -impl Eq for ValidatorVerifier {} +// impl Eq for ValidatorVerifier {} /// Reconstruct fields from the raw data upon deserialization. impl<'de> Deserialize<'de> for ValidatorVerifier { @@ -200,7 +203,7 @@ impl ValidatorVerifier { quorum_voting_power, total_voting_power, address_to_validator_index, - malicious_authors: Arc::new(DashSet::new()), + pessimistic_verify_set: Arc::new(DashSet::new()), } } @@ -236,18 +239,17 @@ impl ValidatorVerifier { )) } - pub fn add_malicious_authors(&self, malicious_authors: Vec) { - for author in malicious_authors { - self.malicious_authors.insert(author); + pub fn add_pessimistic_verify_set( + &self, + pessimistic_authors: impl Iterator, + ) { + for author in pessimistic_authors { + self.pessimistic_verify_set.insert(author); } } - pub fn malicious_authors(&self) -> Arc> { - self.malicious_authors.clone() - } - - pub fn is_malicious_author(&self, author: &AccountAddress) -> bool { - self.malicious_authors.contains(author) + pub fn pessimistic_verify_set(&self) -> Arc> { + self.pessimistic_verify_set.clone() } /// Helper method to initialize with a single author and public key with quorum voting power 1. From cd56587f6eca6020a72cb554f5912facd4ffc86e Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 17 Sep 2024 18:30:01 -0700 Subject: [PATCH 24/36] Addressing PR comments --- types/src/validator_verifier.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs index 26005cf374a5a..763db6c23c30b 100644 --- a/types/src/validator_verifier.rs +++ b/types/src/validator_verifier.rs @@ -155,18 +155,6 @@ pub struct ValidatorVerifier { pessimistic_verify_set: Arc>, } -// // Implement Eq and PartialEq for ValidatorVerifier. Skip pessimistic_verify_set field in the comparison. -// impl PartialEq for ValidatorVerifier { -// fn eq(&self, other: &Self) -> bool { -// self.validator_infos == other.validator_infos -// && self.quorum_voting_power == other.quorum_voting_power -// && self.total_voting_power == other.total_voting_power -// && self.address_to_validator_index == other.address_to_validator_index -// } -// } - -// impl Eq for ValidatorVerifier {} - /// Reconstruct fields from the raw data upon deserialization. impl<'de> Deserialize<'de> for ValidatorVerifier { fn deserialize(deserializer: D) -> Result From 09ce976ab373c834b1a3ef4de06a743456765404 Mon Sep 17 00:00:00 2001 From: 0xbe1 <0xbetrue@gmail.com> Date: Wed, 18 Sep 2024 16:28:03 +0800 Subject: [PATCH 25/36] fix `aptos move disassemble` help message (#14594) --- crates/aptos/src/move_tool/bytecode.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/aptos/src/move_tool/bytecode.rs b/crates/aptos/src/move_tool/bytecode.rs index 7a80844d5b55f..6a662300b20db 100644 --- a/crates/aptos/src/move_tool/bytecode.rs +++ b/crates/aptos/src/move_tool/bytecode.rs @@ -39,7 +39,7 @@ const DECOMPILER_EXTENSION: &str = "mv.move"; /// /// For example, if you want to disassemble an on-chain package `PackName` at account `0x42`: /// 1. Download the package with `aptos move download --account 0x42 --package PackName --bytecode` -/// 2. Disassemble the package bytecode with `aptos disassemble --package-path PackName/bytecode_modules` +/// 2. Disassemble the package bytecode with `aptos move disassemble --package-path PackName/bytecode_modules` #[derive(Debug, Parser)] pub struct Disassemble { #[clap(flatten)] From 94966aec081c554b115ab44fbda6eb240f2f1b42 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 10:57:28 -0700 Subject: [PATCH 26/36] Cache commit votes received for future rounds in the buffer manager (#14570) --- config/src/config/consensus_config.rs | 3 + consensus/src/pipeline/buffer_item.rs | 3 +- consensus/src/pipeline/buffer_manager.rs | 61 +++++++++++++++++-- .../src/pipeline/decoupled_execution_utils.rs | 2 + consensus/src/pipeline/execution_client.rs | 2 + .../pipeline/tests/buffer_manager_tests.rs | 1 + 6 files changed, 67 insertions(+), 5 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index bc45494f2de4c..90526afc77510 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -89,6 +89,8 @@ pub struct ConsensusConfig { pub rand_rb_config: ReliableBroadcastConfig, pub num_bounded_executor_tasks: u64, pub enable_pre_commit: bool, + + pub max_pending_rounds_in_commit_vote_cache: u64, } #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] @@ -354,6 +356,7 @@ impl Default for ConsensusConfig { }, num_bounded_executor_tasks: 16, enable_pre_commit: true, + max_pending_rounds_in_commit_vote_cache: 100, } } } diff --git a/consensus/src/pipeline/buffer_item.rs b/consensus/src/pipeline/buffer_item.rs index f44cf291c04ae..46e92047d3d18 100644 --- a/consensus/src/pipeline/buffer_item.rs +++ b/consensus/src/pipeline/buffer_item.rs @@ -146,9 +146,10 @@ impl BufferItem { ordered_blocks: Vec, ordered_proof: LedgerInfoWithSignatures, callback: StateComputerCommitCallBackType, + unverified_signatures: PartialSignatures, ) -> Self { Self::Ordered(Box::new(OrderedItem { - unverified_signatures: PartialSignatures::empty(), + unverified_signatures, commit_proof: None, callback, ordered_blocks, diff --git a/consensus/src/pipeline/buffer_manager.rs b/consensus/src/pipeline/buffer_manager.rs index 603a246b228a0..38d5aa8578893 100644 --- a/consensus/src/pipeline/buffer_manager.rs +++ b/consensus/src/pipeline/buffer_manager.rs @@ -28,6 +28,7 @@ use aptos_bounded_executor::BoundedExecutor; use aptos_config::config::ConsensusObserverConfig; use aptos_consensus_types::{ common::{Author, Round}, + pipeline::commit_vote::CommitVote, pipelined_block::PipelinedBlock, }; use aptos_crypto::HashValue; @@ -37,8 +38,8 @@ use aptos_network::protocols::{rpc::error::RpcError, wire::handshake::v1::Protoc use aptos_reliable_broadcast::{DropGuard, ReliableBroadcast}; use aptos_time_service::TimeService; use aptos_types::{ - account_address::AccountAddress, epoch_change::EpochChangeProof, epoch_state::EpochState, - ledger_info::LedgerInfoWithSignatures, + account_address::AccountAddress, aggregate_signature::PartialSignatures, + epoch_change::EpochChangeProof, epoch_state::EpochState, ledger_info::LedgerInfoWithSignatures, }; use bytes::Bytes; use futures::{ @@ -51,7 +52,7 @@ use futures::{ }; use once_cell::sync::OnceCell; use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap}, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, @@ -164,6 +165,11 @@ pub struct BufferManager { consensus_publisher: Option>, pending_commit_proofs: BTreeMap, + + max_pending_rounds_in_commit_vote_cache: u64, + // If the buffer manager receives a commit vote for a block that is not in buffer items, then + // the vote will be cached. We can cache upto max_pending_rounds_in_commit_vote_cache (100) blocks. + pending_commit_votes: BTreeMap>, } impl BufferManager { @@ -194,6 +200,7 @@ impl BufferManager { highest_committed_round: Round, consensus_observer_config: ConsensusObserverConfig, consensus_publisher: Option>, + max_pending_rounds_in_commit_vote_cache: u64, ) -> Self { let buffer = Buffer::::new(); @@ -257,6 +264,9 @@ impl BufferManager { consensus_publisher, pending_commit_proofs: BTreeMap::new(), + + max_pending_rounds_in_commit_vote_cache, + pending_commit_votes: BTreeMap::new(), } } @@ -333,6 +343,30 @@ impl BufferManager { } } + fn try_add_pending_commit_vote(&mut self, vote: CommitVote) -> bool { + let block_id = vote.commit_info().id(); + let round = vote.commit_info().round(); + + // Store the commit vote only if it is for one of the next 100 rounds. + if round > self.highest_committed_round + && self.highest_committed_round + self.max_pending_rounds_in_commit_vote_cache > round + { + self.pending_commit_votes + .entry(round) + .or_default() + .insert(vote.author(), vote); + true + } else { + debug!( + round = round, + highest_committed_round = self.highest_committed_round, + block_id = block_id, + "Received a commit vote not in the next 100 rounds, ignored." + ); + false + } + } + fn drain_pending_commit_proof_till( &mut self, round: Round, @@ -381,7 +415,23 @@ impl BufferManager { .await .expect("Failed to send execution schedule request"); - let item = BufferItem::new_ordered(ordered_blocks, ordered_proof, callback); + let mut unverified_signatures = PartialSignatures::empty(); + if let Some(block) = ordered_blocks.last() { + if let Some(votes) = self.pending_commit_votes.remove(&block.round()) { + votes + .values() + .filter(|vote| vote.commit_info().id() == block.id()) + .for_each(|vote| { + unverified_signatures.add_signature(vote.author(), vote.signature().clone()) + }); + } + } + let item = BufferItem::new_ordered( + ordered_blocks, + ordered_proof, + callback, + unverified_signatures, + ); self.buffer.push_back(item); } @@ -741,6 +791,8 @@ impl BufferManager { } else { return None; } + } else if self.try_add_pending_commit_vote(vote) { + reply_ack(protocol, response_sender); } else { reply_nack(protocol, response_sender); // TODO: send_commit_vote() doesn't care about the response and this should be direct send not RPC } @@ -944,6 +996,7 @@ impl BufferManager { }, Some(Ok(round)) = self.persisting_phase_rx.next() => { // see where `need_backpressure()` is called. + self.pending_commit_votes.retain(|rnd, _| *rnd > round); self.highest_committed_round = round }, Some(rpc_request) = verified_commit_msg_rx.next() => { diff --git a/consensus/src/pipeline/decoupled_execution_utils.rs b/consensus/src/pipeline/decoupled_execution_utils.rs index 039834497bce9..8178d871e7efc 100644 --- a/consensus/src/pipeline/decoupled_execution_utils.rs +++ b/consensus/src/pipeline/decoupled_execution_utils.rs @@ -44,6 +44,7 @@ pub fn prepare_phases_and_buffer_manager( highest_committed_round: u64, consensus_observer_config: ConsensusObserverConfig, consensus_publisher: Option>, + max_pending_rounds_in_commit_vote_cache: u64, ) -> ( PipelinePhase, PipelinePhase, @@ -134,6 +135,7 @@ pub fn prepare_phases_and_buffer_manager( highest_committed_round, consensus_observer_config, consensus_publisher, + max_pending_rounds_in_commit_vote_cache, ), ) } diff --git a/consensus/src/pipeline/execution_client.rs b/consensus/src/pipeline/execution_client.rs index 9228c2dcaedc8..9d50fe08e4a3f 100644 --- a/consensus/src/pipeline/execution_client.rs +++ b/consensus/src/pipeline/execution_client.rs @@ -282,6 +282,8 @@ impl ExecutionProxyClient { highest_committed_round, consensus_observer_config, consensus_publisher, + self.consensus_config + .max_pending_rounds_in_commit_vote_cache, ); tokio::spawn(execution_schedule_phase.start()); diff --git a/consensus/src/pipeline/tests/buffer_manager_tests.rs b/consensus/src/pipeline/tests/buffer_manager_tests.rs index d8ca6523d1c66..9ef9ed94600cd 100644 --- a/consensus/src/pipeline/tests/buffer_manager_tests.rs +++ b/consensus/src/pipeline/tests/buffer_manager_tests.rs @@ -161,6 +161,7 @@ pub fn prepare_buffer_manager( 0, ConsensusObserverConfig::default(), None, + 100, ); ( From 0a9d6543943313cb373c5dc7bb239cacf24ce969 Mon Sep 17 00:00:00 2001 From: Greg Nazario Date: Wed, 28 Aug 2024 10:33:47 -0700 Subject: [PATCH 27/36] [framework] Disable object burn --- .../framework/aptos-framework/doc/object.md | 90 ++++++++++--------- .../aptos-framework/sources/object.move | 44 ++++++--- .../aptos-framework/sources/object.spec.move | 6 +- .../sources/primary_fungible_store.move | 4 +- .../simple_dispatchable_token_pfs_tests.move | 4 +- 5 files changed, 88 insertions(+), 60 deletions(-) diff --git a/aptos-move/framework/aptos-framework/doc/object.md b/aptos-move/framework/aptos-framework/doc/object.md index bba128592ff31..f3dae60d94a88 100644 --- a/aptos-move/framework/aptos-framework/doc/object.md +++ b/aptos-move/framework/aptos-framework/doc/object.md @@ -604,6 +604,16 @@ generate_unique_address uses this for domain separation within its native implem + + +Objects cannot be burnt + + +
const EBURN_NOT_ALLOWED: u64 = 10;
+
+ + + The object does not allow for deletion @@ -2130,12 +2140,13 @@ objects may have cyclic dependencies. ## Function `burn` -Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. -This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. -Original owners can reclaim burnt objects any time in the future by calling unburn. +Previously allowed to burn objects, has now been disabled. Objects can still be unburnt. +Please use the test only [object::burn_object] for testing with previously burned objects. -
public entry fun burn<T: key>(owner: &signer, object: object::Object<T>)
+
+
#[deprecated]
+public entry fun burn<T: key>(_owner: &signer, _object: object::Object<T>)
 
@@ -2144,12 +2155,8 @@ Original owners can reclaim burnt objects any time in the future by calling unbu Implementation -
public entry fun burn<T: key>(owner: &signer, object: Object<T>) acquires ObjectCore {
-    let original_owner = signer::address_of(owner);
-    assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER));
-    let object_addr = object.inner;
-    move_to(&create_signer(object_addr), TombStone { original_owner });
-    transfer_raw_inner(object_addr, BURN_ADDRESS);
+
public entry fun burn<T: key>(_owner: &signer, _object: Object<T>) {
+    abort error::permission_denied(EBURN_NOT_ALLOWED)
 }
 
@@ -2441,6 +2448,33 @@ to determine the identity of the starting point of ownership. + + + + +
fun spec_create_object_address(source: address, seed: vector<u8>): address;
+
+ + + + + + + +
fun spec_create_user_derived_object_address(source: address, derive_from: address): address;
+
+ + + + + + + +
fun spec_create_guid_object_address(source: address, creation_num: u64): address;
+
+ + + ### Function `address_to_object` @@ -3245,17 +3279,14 @@ to determine the identity of the starting point of ownership. ### Function `burn` -
public entry fun burn<T: key>(owner: &signer, object: object::Object<T>)
+
#[deprecated]
+public entry fun burn<T: key>(_owner: &signer, _object: object::Object<T>)
 
-
pragma aborts_if_is_partial;
-let object_address = object.inner;
-aborts_if !exists<ObjectCore>(object_address);
-aborts_if owner(object) != signer::address_of(owner);
-aborts_if is_burnt(object);
+
aborts_if true;
 
@@ -3368,31 +3399,4 @@ to determine the identity of the starting point of ownership.
- - - - - -
fun spec_create_object_address(source: address, seed: vector<u8>): address;
-
- - - - - - - -
fun spec_create_user_derived_object_address(source: address, derive_from: address): address;
-
- - - - - - - -
fun spec_create_guid_object_address(source: address, creation_num: u64): address;
-
- - [move-book]: https://aptos.dev/move/book/SUMMARY diff --git a/aptos-move/framework/aptos-framework/sources/object.move b/aptos-move/framework/aptos-framework/sources/object.move index 6e809e87e8736..c03914fb7675c 100644 --- a/aptos-move/framework/aptos-framework/sources/object.move +++ b/aptos-move/framework/aptos-framework/sources/object.move @@ -50,6 +50,8 @@ module aptos_framework::object { const EOBJECT_NOT_BURNT: u64 = 8; /// Object is untransferable any operations that might result in a transfer are disallowed. const EOBJECT_NOT_TRANSFERRABLE: u64 = 9; + /// Objects cannot be burnt + const EBURN_NOT_ALLOWED: u64 = 10; /// Explicitly separate the GUID space between Object and Account to prevent accidental overlap. const INIT_GUID_CREATION_NUM: u64 = 0x4000000000000; @@ -610,15 +612,12 @@ module aptos_framework::object { }; } - /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. - /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. - /// Original owners can reclaim burnt objects any time in the future by calling unburn. - public entry fun burn(owner: &signer, object: Object) acquires ObjectCore { - let original_owner = signer::address_of(owner); - assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER)); - let object_addr = object.inner; - move_to(&create_signer(object_addr), TombStone { original_owner }); - transfer_raw_inner(object_addr, BURN_ADDRESS); + #[deprecated] + /// Previously allowed to burn objects, has now been disabled. Objects can still be unburnt. + /// + /// Please use the test only [`object::burn_object`] for testing with previously burned objects. + public entry fun burn(_owner: &signer, _object: Object) { + abort error::permission_denied(EBURN_NOT_ALLOWED) } /// Allow origin owners to reclaim any objects they previous burnt. @@ -705,6 +704,20 @@ module aptos_framework::object { #[test_only] const EWEAPON_DOES_NOT_EXIST: u64 = 0x101; + #[test_only] + /// For testing the previous behavior of `object::burn()` + /// + /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. + /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. + /// Original owners can reclaim burnt objects any time in the future by calling unburn. + public fun burn_object(owner: &signer, object: Object) acquires ObjectCore { + let original_owner = signer::address_of(owner); + assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER)); + let object_addr = object.inner; + move_to(&create_signer(object_addr), TombStone { original_owner }); + transfer_raw_inner(object_addr, BURN_ADDRESS); + } + #[test_only] struct HeroEquipEvent has drop, store { weapon_id: Option>, @@ -820,7 +833,7 @@ module aptos_framework::object { #[expected_failure(abort_code = 0x10008, location = Self)] fun test_cannot_unburn_after_transfer_with_ref(creator: &signer) acquires ObjectCore, TombStone { let (hero_constructor, hero) = create_hero(creator); - burn(creator, hero); + burn_object(creator, hero); let transfer_ref = generate_transfer_ref(&hero_constructor); transfer_with_ref(generate_linear_transfer_ref(&transfer_ref), @0x456); unburn(creator, hero); @@ -876,7 +889,7 @@ module aptos_framework::object { disable_ungated_transfer(&transfer_ref); // Owner should be able to burn, despite ungated transfer disallowed. - burn(creator, hero); + burn_object(creator, hero); assert!(owner(hero) == BURN_ADDRESS, 0); assert!(!ungated_transfer_allowed(hero), 0); @@ -897,7 +910,7 @@ module aptos_framework::object { // Owner should be not be able to burn weapon directly. assert!(owner(weapon) == object_address(&hero), 0); assert!(owns(weapon, signer::address_of(creator)), 0); - burn(creator, weapon); + burn_object(creator, weapon); } #[test(creator = @0x123)] @@ -907,6 +920,13 @@ module aptos_framework::object { unburn(creator, hero); } + #[test(creator = @0x123)] + #[expected_failure(abort_code = 0x5000A, location = Self)] + fun test_burn_should_fail(creator: &signer) acquires ObjectCore { + let (_, hero) = create_hero(creator); + burn(creator, hero); + } + #[test_only] fun create_simple_object(creator: &signer, seed: vector): Object { object_from_constructor_ref(&create_named_object(creator, seed)) diff --git a/aptos-move/framework/aptos-framework/sources/object.spec.move b/aptos-move/framework/aptos-framework/sources/object.spec.move index d2627d649fd61..51ae05b568368 100644 --- a/aptos-move/framework/aptos-framework/sources/object.spec.move +++ b/aptos-move/framework/aptos-framework/sources/object.spec.move @@ -475,7 +475,11 @@ spec aptos_framework::object { aborts_if !global(object_address).allow_ungated_transfer; } - spec burn(owner: &signer, object: Object) { + spec burn(_owner: &signer, _object: Object) { + aborts_if true; + } + + spec burn_object(owner: &signer, object: Object) { pragma aborts_if_is_partial; let object_address = object.inner; aborts_if !exists(object_address); diff --git a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move index fc20e1cf311a6..9e39b97fa2854 100644 --- a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move +++ b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move @@ -372,7 +372,7 @@ module aptos_framework::primary_fungible_store { // User 2 burns their primary store but should still be able to transfer afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); // Balance still works assert!(balance(user_2_address, metadata) == 80, 0); @@ -396,7 +396,7 @@ module aptos_framework::primary_fungible_store { // User 2 burns their primary store but should still be able to withdraw afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); let coins = withdraw(user_2, metadata, 70); assert!(balance(user_2_address, metadata) == 10, 0); diff --git a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move index 1b80c489024e5..d069923a5f8ef 100644 --- a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move +++ b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move @@ -28,7 +28,7 @@ module aptos_framework::simple_token_pfs_tests { // User 2 burns their primary store but should still be able to transfer afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); // Balance still works assert!(balance(user_2_address, metadata) == 80, 0); @@ -54,7 +54,7 @@ module aptos_framework::simple_token_pfs_tests { // User 2 burns their primary store but should still be able to withdraw afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); let coins = withdraw(user_2, metadata, 70); assert!(balance(user_2_address, metadata) == 10, 0); From 20335bba8659f1df6b708115aca45db6a40ea32f Mon Sep 17 00:00:00 2001 From: Victor Gao <10379359+vgao1996@users.noreply.github.com> Date: Wed, 18 Sep 2024 11:33:35 -0700 Subject: [PATCH 28/36] [gas] bump gas feature version to 1.20 (#14668) --- aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs | 8 ++++---- aptos-move/aptos-gas-schedule/src/ver.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs index 26557162fb157..1e7f1a4860103 100644 --- a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs +++ b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs @@ -50,15 +50,15 @@ crate::gas_schedule::macros::define_gas_parameters!( [mut_borrow_variant_field: InternalGas, { RELEASE_V1_18.. => "mut_borrow_variant_field" }, 835], [imm_borrow_variant_field_generic: InternalGas, - { RELEASE_V1_18 => "imm_borrow_variant_field_generic" }, 835], + { RELEASE_V1_18.. => "imm_borrow_variant_field_generic" }, 835], [mut_borrow_variant_field_generic: InternalGas, - { RELEASE_V1_18 => "mut_borrow_variant_field_generic" }, 835], + { RELEASE_V1_18.. => "mut_borrow_variant_field_generic" }, 835], // variant testing [test_variant: InternalGas, - { RELEASE_V1_18 => "test_variant" }, 535], + { RELEASE_V1_18.. => "test_variant" }, 535], [test_variant_generic: InternalGas, - { RELEASE_V1_18 => "test_variant_generic" }, 535], + { RELEASE_V1_18.. => "test_variant_generic" }, 535], // locals [copy_loc_base: InternalGas, "copy_loc.base", 294], diff --git a/aptos-move/aptos-gas-schedule/src/ver.rs b/aptos-move/aptos-gas-schedule/src/ver.rs index f8b5d7617151c..2df67131a21f7 100644 --- a/aptos-move/aptos-gas-schedule/src/ver.rs +++ b/aptos-move/aptos-gas-schedule/src/ver.rs @@ -69,7 +69,7 @@ /// global operations. /// - V1 /// - TBA -pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_18; +pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_20; pub mod gas_feature_versions { pub const RELEASE_V1_8: u64 = 11; From f5fa2f8d5044c5811734c68fe25d37f79a46c105 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Tue, 17 Sep 2024 18:00:03 -0400 Subject: [PATCH 29/36] [Consensus Observer] Enable CO for VFNs. --- config/src/config/consensus_observer_config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/src/config/consensus_observer_config.rs b/config/src/config/consensus_observer_config.rs index 0ca55c31d50e9..02d8572134950 100644 --- a/config/src/config/consensus_observer_config.rs +++ b/config/src/config/consensus_observer_config.rs @@ -9,8 +9,8 @@ use serde::{Deserialize, Serialize}; use serde_yaml::Value; // Useful constants for enabling consensus observer on different node types -const ENABLE_ON_VALIDATORS: bool = false; -const ENABLE_ON_VALIDATOR_FULLNODES: bool = false; +const ENABLE_ON_VALIDATORS: bool = true; +const ENABLE_ON_VALIDATOR_FULLNODES: bool = true; const ENABLE_ON_PUBLIC_FULLNODES: bool = false; #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)] From 25a081116546670e62ca927ba90478de78557056 Mon Sep 17 00:00:00 2001 From: Josh Lind Date: Wed, 18 Sep 2024 11:00:23 -0400 Subject: [PATCH 30/36] [Consensus Observer] Improve error messages for payload verification. --- .../network/network_handler.rs | 2 +- .../network/observer_message.rs | 45 +++++++++++++++---- .../observer/subscription_manager.rs | 6 +-- .../observer/subscription_utils.rs | 2 +- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/consensus/src/consensus_observer/network/network_handler.rs b/consensus/src/consensus_observer/network/network_handler.rs index d8aa1447312f7..bbaeca0dc4843 100644 --- a/consensus/src/consensus_observer/network/network_handler.rs +++ b/consensus/src/consensus_observer/network/network_handler.rs @@ -208,7 +208,7 @@ impl ConsensusObserverNetworkHandler { None => { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Missing response sender for RCP request: {:?}", + "Missing response sender for the RPC request: {:?}", request )) ); diff --git a/consensus/src/consensus_observer/network/observer_message.rs b/consensus/src/consensus_observer/network/observer_message.rs index 6ecb14d7995de..8b673f6335f56 100644 --- a/consensus/src/consensus_observer/network/observer_message.rs +++ b/consensus/src/consensus_observer/network/observer_message.rs @@ -663,23 +663,50 @@ impl BlockPayload { /// Verifies the block payload digests and returns an error if the data is invalid pub fn verify_payload_digests(&self) -> Result<(), Error> { - // Verify the proof of store digests against the transaction + // Get the transactions, payload proofs and inline batches let transactions = self.transaction_payload.transactions(); + let payload_proofs = self.transaction_payload.payload_proofs(); + let inline_batches = self.transaction_payload.inline_batches(); + + // Get the number of transactions, payload proofs and inline batches + let num_transactions = transactions.len(); + let num_payload_proofs = payload_proofs.len(); + let num_inline_batches = inline_batches.len(); + + // Verify the payload proof digests using the transactions let mut transactions_iter = transactions.iter(); - for proof_of_store in &self.transaction_payload.payload_proofs() { - reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info())?; + for proof_of_store in &payload_proofs { + reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info()).map_err( + |error| { + Error::InvalidMessageError(format!( + "Failed to verify payload proof digests! Num transactions: {:?}, \ + num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}", + num_transactions, num_payload_proofs, num_inline_batches, proof_of_store.info(), error + )) + }, + )?; } - // Verify the inline batch digests against the inline batches - for batch_info in self.transaction_payload.inline_batches() { - reconstruct_and_verify_batch(&mut transactions_iter, batch_info)?; + // Verify the inline batch digests using the transactions + for batch_info in inline_batches.into_iter() { + reconstruct_and_verify_batch(&mut transactions_iter, batch_info).map_err( + |error| { + Error::InvalidMessageError(format!( + "Failed to verify inline batch digests! Num transactions: {:?}, \ + num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}", + num_transactions, num_payload_proofs, num_inline_batches, batch_info, error + )) + }, + )?; } - // Verify that there are no transactions remaining + // Verify that there are no transactions remaining (all transactions should be consumed) let remaining_transactions = transactions_iter.as_slice(); if !remaining_transactions.is_empty() { return Err(Error::InvalidMessageError(format!( - "Failed to verify payload transactions! Transactions remaining: {:?}. Expected: 0", + "Failed to verify payload transactions! Num transactions: {:?}, \ + transactions remaining: {:?}. Expected: 0", + num_transactions, remaining_transactions.len() ))); } @@ -740,7 +767,7 @@ fn reconstruct_and_verify_batch( let expected_digest = expected_batch_info.digest(); if batch_digest != *expected_digest { return Err(Error::InvalidMessageError(format!( - "The reconstructed batch digest does not match the expected digest!\ + "The reconstructed batch digest does not match the expected digest! \ Batch: {:?}, Expected digest: {:?}, Reconstructed digest: {:?}", expected_batch_info, expected_digest, batch_digest ))); diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index 2d89163e1ae86..24ae1f7d321b4 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -18,7 +18,7 @@ use crate::consensus_observer::{ }; use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; use aptos_infallible::Mutex; -use aptos_logger::{error, info, warn}; +use aptos_logger::{info, warn}; use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata}; use aptos_storage_interface::DbReader; use aptos_time_service::TimeService; @@ -157,7 +157,7 @@ impl SubscriptionManager { .get_connected_peers_and_metadata() .unwrap_or_else(|error| { // Log the error - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to get connected peers and metadata! Error: {:?}", error @@ -327,7 +327,7 @@ impl SubscriptionManager { }, Err(error) => { // We encountered an error while sending the request - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to send unsubscribe request to peer: {}! Error: {:?}", peer_network_id, error diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs index d654af8aaf0d5..0bca7c61b007d 100644 --- a/consensus/src/consensus_observer/observer/subscription_utils.rs +++ b/consensus/src/consensus_observer/observer/subscription_utils.rs @@ -175,7 +175,7 @@ async fn create_single_subscription( }, Err(error) => { // We encountered an error while sending the request - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to send subscription request to peer: {}! Error: {:?}", potential_peer, error From 16370eb0265228e801e28492bb16786d54bb3b43 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 12:29:53 -0700 Subject: [PATCH 31/36] Addressing PR comments --- types/src/ledger_info.rs | 35 ++++++++++++++++----------------- types/src/validator_verifier.rs | 9 ++------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 0a8fc80e35962..1e2d475833fdd 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize}; use std::{ collections::BTreeMap, fmt::{Display, Formatter}, + mem, ops::{Deref, DerefMut}, sync::Arc, }; @@ -380,8 +381,12 @@ impl LedgerInfoWithPartialSignatures { } } -/// Contains the ledger info and partially aggregated signature from a set of validators, this data -/// is only used during the aggregating the votes from different validators and is not persisted in DB. +/// This data structure is used to support the optimistic signature verification feature. +/// Contains the ledger info and the signatures received on the ledger info from different validators. +/// Some of the signatures could be verified before inserting into this data structure. Some of the signatures +/// are not verified. Rather than verifying the signatures immediately, we aggregate all the signatures and +/// verify the aggregated signature at once. If the aggregated signature is invalid, then we verify each individual +/// unverified signature and remove the invalid signatures. #[derive(Clone, Debug, Eq, PartialEq)] pub struct LedgerInfoWithMixedSignatures { ledger_info: LedgerInfo, @@ -447,19 +452,11 @@ impl LedgerInfoWithMixedSignatures { } pub fn verified_voters(&self) -> Vec<&AccountAddress> { - self.verified_signatures - .signatures() - .keys() - .collect_vec() - .clone() + self.verified_signatures.signatures().keys().collect_vec() } pub fn unverified_voters(&self) -> Vec<&AccountAddress> { - self.unverified_signatures - .signatures() - .keys() - .collect_vec() - .clone() + self.unverified_signatures.signatures().keys().collect_vec() } // Collecting all the authors from verified signatures, unverified signatures and the aggregated signature. @@ -532,12 +529,14 @@ impl LedgerInfoWithMixedSignatures { } // For these authors, we will not use optimistic signature verification in the future. - let pessimistic_authors = self.unverified_signatures.signatures().keys().cloned(); - epoch_state - .verifier - .add_pessimistic_verify_set(pessimistic_authors); - - self.unverified_signatures = PartialSignatures::empty(); + for author in mem::replace( + &mut self.unverified_signatures.signatures(), + &BTreeMap::new(), + ) + .keys() + { + epoch_state.verifier.add_pessimistic_verify_set(*author); + } match self.check_voting_power(&epoch_state.verifier, true) { Ok(_) => Ok(LedgerInfoWithSignatures::new( diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs index 763db6c23c30b..45470fc658352 100644 --- a/types/src/validator_verifier.rs +++ b/types/src/validator_verifier.rs @@ -227,13 +227,8 @@ impl ValidatorVerifier { )) } - pub fn add_pessimistic_verify_set( - &self, - pessimistic_authors: impl Iterator, - ) { - for author in pessimistic_authors { - self.pessimistic_verify_set.insert(author); - } + pub fn add_pessimistic_verify_set(&self, author: AccountAddress) { + self.pessimistic_verify_set.insert(author); } pub fn pessimistic_verify_set(&self) -> Arc> { From a7b5e82fa229fc8c1ebb536d903a07b4a22d5381 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 12:59:49 -0700 Subject: [PATCH 32/36] Minor changes --- types/src/ledger_info.rs | 54 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 1e2d475833fdd..3e9557e62f0f7 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -381,6 +381,11 @@ impl LedgerInfoWithPartialSignatures { } } +pub enum SignatureWithStatus { + Verified(bls12381::Signature), + Unverified(bls12381::Signature), +} + /// This data structure is used to support the optimistic signature verification feature. /// Contains the ledger info and the signatures received on the ledger info from different validators. /// Some of the signatures could be verified before inserting into this data structure. Some of the signatures @@ -442,12 +447,15 @@ impl LedgerInfoWithMixedSignatures { pub fn add_signature( &mut self, validator: AccountAddress, - signature: bls12381::Signature, - verification_status: VerificationStatus, + signature_with_status: SignatureWithStatus, ) { - match verification_status { - VerificationStatus::Verified => self.add_verified_signature(validator, signature), - VerificationStatus::Unverified => self.add_unverified_signature(validator, signature), + match signature_with_status { + SignatureWithStatus::Verified(signature) => { + self.add_verified_signature(validator, signature) + }, + SignatureWithStatus::Unverified(signature) => { + self.add_unverified_signature(validator, signature) + }, }; } @@ -495,11 +503,13 @@ impl LedgerInfoWithMixedSignatures { .verify_multi_signatures(self.ledger_info(), &aggregated_sig) { Ok(_) => { - for (account_address, signature) in self.unverified_signatures.signatures() { + for (account_address, signature) in + mem::replace(&mut self.unverified_signatures, PartialSignatures::empty()) + .signatures() + { self.verified_signatures .add_signature(*account_address, signature.clone()); } - self.unverified_signatures = PartialSignatures::empty(); Ok(LedgerInfoWithSignatures::new( self.ledger_info.clone(), aggregated_sig, @@ -529,11 +539,10 @@ impl LedgerInfoWithMixedSignatures { } // For these authors, we will not use optimistic signature verification in the future. - for author in mem::replace( - &mut self.unverified_signatures.signatures(), - &BTreeMap::new(), - ) - .keys() + for author in + mem::replace(&mut self.unverified_signatures, PartialSignatures::empty()) + .signatures() + .keys() { epoch_state.verifier.add_pessimistic_verify_set(*author); } @@ -688,8 +697,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[0].author(), - validator_signers[0].sign(&ledger_info).unwrap(), - VerificationStatus::Verified, + SignatureWithStatus::Verified(validator_signers[0].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[0].author(), @@ -698,8 +706,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[1].author(), - validator_signers[1].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[1].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[1].author(), @@ -708,8 +715,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[2].author(), - validator_signers[2].sign(&ledger_info).unwrap(), - VerificationStatus::Verified, + SignatureWithStatus::Verified(validator_signers[2].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[2].author(), @@ -718,8 +724,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[3].author(), - validator_signers[3].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[3].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[3].author(), @@ -751,8 +756,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[4].author(), - bls12381::Signature::dummy_signature(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()), ); assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); @@ -802,8 +806,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[5].author(), - validator_signers[5].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[5].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[5].author(), @@ -861,8 +864,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[6].author(), - bls12381::Signature::dummy_signature(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()), ); assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 6); From 2e46bb343ae6250026734a10d5c44099b4d8e752 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 13:06:51 -0700 Subject: [PATCH 33/36] Minor change --- types/src/ledger_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 3e9557e62f0f7..18aff380a54f9 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -534,7 +534,7 @@ impl LedgerInfoWithMixedSignatures { .collect::>(); for (account_address, signature) in verified { self.verified_signatures - .add_signature(account_address, signature.clone()); + .add_signature(account_address, signature); self.unverified_signatures.remove_signature(account_address); } From 30f76bfee45d1c419f7ee3cd463ee8d904a97aab Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 13:19:33 -0700 Subject: [PATCH 34/36] Deprecate delayed QC aggregate msg (#14640) --- config/src/config/consensus_config.rs | 39 +--- .../consensus-types/src/delayed_qc_msg.rs | 32 ---- consensus/consensus-types/src/lib.rs | 1 - .../src/block_storage/block_store_test.rs | 10 +- consensus/src/epoch_manager.rs | 37 +--- consensus/src/lib.rs | 1 - consensus/src/liveness/round_state.rs | 35 +--- consensus/src/liveness/round_state_test.rs | 11 +- consensus/src/pending_votes.rs | 88 ++++----- consensus/src/qc_aggregator.rs | 181 ------------------ consensus/src/round_manager.rs | 39 +--- consensus/src/round_manager_fuzzing.rs | 15 +- consensus/src/round_manager_test.rs | 12 +- 13 files changed, 51 insertions(+), 450 deletions(-) delete mode 100644 consensus/consensus-types/src/delayed_qc_msg.rs delete mode 100644 consensus/src/qc_aggregator.rs diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 90526afc77510..021edf0b365b4 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -93,48 +93,11 @@ pub struct ConsensusConfig { pub max_pending_rounds_in_commit_vote_cache: u64, } +/// Deprecated #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] pub enum QcAggregatorType { #[default] NoDelay, - Delayed(DelayedQcAggregatorConfig), -} - -impl QcAggregatorType { - pub fn default_delayed() -> Self { - // TODO: Enable the delayed aggregation by default once we have tested it more. - Self::Delayed(DelayedQcAggregatorConfig::default()) - } -} - -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct DelayedQcAggregatorConfig { - // Maximum Delay for a QC to be aggregated after round start (in milliseconds). This assumes that - // we have enough voting power to form a QC. If we don't have enough voting power, we will wait - // until we have enough voting power to form a QC. - pub max_delay_after_round_start_ms: u64, - // Percentage of aggregated voting power to wait for before aggregating a QC. For example, if this - // is set to 95% then, a QC is formed as soon as we have 95% of the voting power aggregated without - // any additional waiting. - pub aggregated_voting_power_pct_to_wait: usize, - // This knob control what is the % of the time (as compared to time between round start and time when we - // have enough voting power to form a QC) we wait after we have enough voting power to form a QC. In a sense, - // this knobs controls how much slower we are willing to make consensus to wait for more votes. - pub pct_delay_after_qc_aggregated: usize, - // In summary, let's denote the time we have enough voting power (2f + 1) to form a QC as T1 and - // the time we have aggregated `aggregated_voting_power_pct_to_wait` as T2. Then, we wait for - // min((T1 + `pct_delay_after_qc_aggregated` * T1 / 100), `max_delay_after_round_start_ms`, T2) - // before forming a QC. -} - -impl Default for DelayedQcAggregatorConfig { - fn default() -> Self { - Self { - max_delay_after_round_start_ms: 700, - aggregated_voting_power_pct_to_wait: 90, - pct_delay_after_qc_aggregated: 30, - } - } } /// Execution backpressure which handles gas/s variance, diff --git a/consensus/consensus-types/src/delayed_qc_msg.rs b/consensus/consensus-types/src/delayed_qc_msg.rs deleted file mode 100644 index 75d9752c2ea5b..0000000000000 --- a/consensus/consensus-types/src/delayed_qc_msg.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright © Aptos Foundation -// Parts of the project are originally copyright © Meta Platforms, Inc. -// SPDX-License-Identifier: Apache-2.0 - -use crate::vote::Vote; -use serde::{Deserialize, Serialize}; -use std::fmt::{Display, Formatter}; - -/// DelayedQCMsg is the struct that is sent by the proposer to self when it receives enough votes -/// for a QC but it still delays the creation of the QC to ensure that slow nodes are given enough -/// time to catch up to the chain and cast their votes. -#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)] -pub struct DelayedQcMsg { - /// Vote data for the QC that is being delayed. - pub vote: Vote, -} - -impl Display for DelayedQcMsg { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "DelayedQcMsg: vote [{}]", self.vote,) - } -} - -impl DelayedQcMsg { - pub fn new(vote: Vote) -> Self { - Self { vote } - } - - pub fn vote(&self) -> &Vote { - &self.vote - } -} diff --git a/consensus/consensus-types/src/lib.rs b/consensus/consensus-types/src/lib.rs index c9e555da27a7d..bc70a1ad942f2 100644 --- a/consensus/consensus-types/src/lib.rs +++ b/consensus/consensus-types/src/lib.rs @@ -8,7 +8,6 @@ pub mod block; pub mod block_data; pub mod block_retrieval; pub mod common; -pub mod delayed_qc_msg; pub mod epoch_retrieval; pub mod order_vote; pub mod order_vote_msg; diff --git a/consensus/src/block_storage/block_store_test.rs b/consensus/src/block_storage/block_store_test.rs index 7328688f2f48e..41def8f1c322d 100644 --- a/consensus/src/block_storage/block_store_test.rs +++ b/consensus/src/block_storage/block_store_test.rs @@ -8,9 +8,7 @@ use crate::{ test_utils::{ build_empty_tree, build_simple_tree, consensus_runtime, timed_block_on, TreeInserter, }, - util::mock_time_service::SimulatedTimeService, }; -use aptos_config::config::QcAggregatorType; use aptos_consensus_types::{ block::{ block_test_utils::{ @@ -27,9 +25,8 @@ use aptos_crypto::{HashValue, PrivateKey}; use aptos_types::{ validator_signer::ValidatorSigner, validator_verifier::random_validator_verifier, }; -use futures_channel::mpsc::unbounded; use proptest::prelude::*; -use std::{cmp::min, collections::HashSet, sync::Arc}; +use std::{cmp::min, collections::HashSet}; #[tokio::test] async fn test_highest_block_and_quorum_cert() { @@ -284,11 +281,8 @@ async fn test_insert_vote() { let block = inserter .insert_block_with_qc(certificate_for_genesis(), &genesis, 1) .await; - let time_service = Arc::new(SimulatedTimeService::new()); - let (delayed_qc_tx, _) = unbounded(); - let mut pending_votes = - PendingVotes::new(time_service, delayed_qc_tx, QcAggregatorType::NoDelay); + let mut pending_votes = PendingVotes::new(); assert!(block_store.get_quorum_cert_for_block(block.id()).is_none()); for (i, voter) in signers.iter().enumerate().take(10).skip(1) { diff --git a/consensus/src/epoch_manager.rs b/consensus/src/epoch_manager.rs index da8c23ea509d8..a6c43221f9a0d 100644 --- a/consensus/src/epoch_manager.rs +++ b/consensus/src/epoch_manager.rs @@ -56,12 +56,9 @@ use crate::{ use anyhow::{anyhow, bail, ensure, Context}; use aptos_bounded_executor::BoundedExecutor; use aptos_channels::{aptos_channel, message_queues::QueueStyle}; -use aptos_config::config::{ - ConsensusConfig, DagConsensusConfig, ExecutionConfig, NodeConfig, QcAggregatorType, -}; +use aptos_config::config::{ConsensusConfig, DagConsensusConfig, ExecutionConfig, NodeConfig}; use aptos_consensus_types::{ common::{Author, Round}, - delayed_qc_msg::DelayedQcMsg, epoch_retrieval::EpochRetrievalRequest, proof_of_store::ProofCache, utils::PayloadTxnsSize, @@ -96,11 +93,7 @@ use aptos_types::{ use aptos_validator_transaction_pool::VTxnPoolState; use fail::fail_point; use futures::{ - channel::{ - mpsc, - mpsc::{unbounded, Sender, UnboundedSender}, - oneshot, - }, + channel::{mpsc, mpsc::Sender, oneshot}, SinkExt, StreamExt, }; use itertools::Itertools; @@ -265,21 +258,13 @@ impl EpochManager

{ &self, time_service: Arc, timeout_sender: aptos_channels::Sender, - delayed_qc_tx: UnboundedSender, - qc_aggregator_type: QcAggregatorType, ) -> RoundState { let time_interval = Box::new(ExponentialTimeInterval::new( Duration::from_millis(self.config.round_initial_timeout_ms), self.config.round_timeout_backoff_exponent_base, self.config.round_timeout_backoff_max_exponent, )); - RoundState::new( - time_interval, - time_service, - timeout_sender, - delayed_qc_tx, - qc_aggregator_type, - ) + RoundState::new(time_interval, time_service, timeout_sender) } /// Create a proposer election handler based on proposers @@ -793,15 +778,10 @@ impl EpochManager

{ "Unable to initialize safety rules.", ); } - let (delayed_qc_tx, delayed_qc_rx) = unbounded(); info!(epoch = epoch, "Create RoundState"); - let round_state = self.create_round_state( - self.time_service.clone(), - self.timeout_sender.clone(), - delayed_qc_tx, - self.config.qc_aggregator_type.clone(), - ); + let round_state = + self.create_round_state(self.time_service.clone(), self.timeout_sender.clone()); info!(epoch = epoch, "Create ProposerElection"); let proposer_election = @@ -913,12 +893,7 @@ impl EpochManager

{ let (close_tx, close_rx) = oneshot::channel(); self.round_manager_close_tx = Some(close_tx); - tokio::spawn(round_manager.start( - round_manager_rx, - buffered_proposal_rx, - delayed_qc_rx, - close_rx, - )); + tokio::spawn(round_manager.start(round_manager_rx, buffered_proposal_rx, close_rx)); self.spawn_block_retrieval_task(epoch, block_store, max_blocks_allowed); } diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs index 87eb81e0f40cc..f8545073966bd 100644 --- a/consensus/src/lib.rs +++ b/consensus/src/lib.rs @@ -58,7 +58,6 @@ mod execution_pipeline; /// AptosNet interface. pub mod network_interface; mod payload_manager; -mod qc_aggregator; mod transaction_deduper; mod transaction_filter; mod transaction_shuffler; diff --git a/consensus/src/liveness/round_state.rs b/consensus/src/liveness/round_state.rs index ea7e6e7f5b362..0dc03ab88d105 100644 --- a/consensus/src/liveness/round_state.rs +++ b/consensus/src/liveness/round_state.rs @@ -7,10 +7,9 @@ use crate::{ pending_votes::{PendingVotes, VoteReceptionResult}, util::time_service::{SendTask, TimeService}, }; -use aptos_config::config::QcAggregatorType; use aptos_consensus_types::{ - common::Round, delayed_qc_msg::DelayedQcMsg, sync_info::SyncInfo, - timeout_2chain::TwoChainTimeoutWithPartialSignatures, vote::Vote, + common::Round, sync_info::SyncInfo, timeout_2chain::TwoChainTimeoutWithPartialSignatures, + vote::Vote, }; use aptos_crypto::HashValue; use aptos_logger::{prelude::*, Schema}; @@ -18,7 +17,6 @@ use aptos_types::{ ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier, }; use futures::future::AbortHandle; -use futures_channel::mpsc::UnboundedSender; use serde::Serialize; use std::{fmt, sync::Arc, time::Duration}; @@ -163,9 +161,6 @@ pub struct RoundState { vote_sent: Option, // The handle to cancel previous timeout task when moving to next round. abort_handle: Option, - // Self sender to send delayed QC aggregation events to the round manager. - delayed_qc_tx: UnboundedSender, - qc_aggregator_type: QcAggregatorType, } #[derive(Default, Schema)] @@ -194,8 +189,6 @@ impl RoundState { time_interval: Box, time_service: Arc, timeout_sender: aptos_channels::Sender, - delayed_qc_tx: UnboundedSender, - qc_aggregator_type: QcAggregatorType, ) -> Self { // Our counters are initialized lazily, so they're not going to appear in // Prometheus if some conditions never happen. Invoking get() function enforces creation. @@ -203,11 +196,7 @@ impl RoundState { counters::TIMEOUT_ROUNDS_COUNT.get(); counters::TIMEOUT_COUNT.get(); - let pending_votes = PendingVotes::new( - time_service.clone(), - delayed_qc_tx.clone(), - qc_aggregator_type.clone(), - ); + let pending_votes = PendingVotes::new(); Self { time_interval, highest_ordered_round: 0, @@ -218,8 +207,6 @@ impl RoundState { pending_votes, vote_sent: None, abort_handle: None, - delayed_qc_tx, - qc_aggregator_type, } } @@ -262,11 +249,7 @@ impl RoundState { // Start a new round. self.current_round = new_round; - self.pending_votes = PendingVotes::new( - self.time_service.clone(), - self.delayed_qc_tx.clone(), - self.qc_aggregator_type.clone(), - ); + self.pending_votes = PendingVotes::new(); self.vote_sent = None; let timeout = self.setup_timeout(1); // The new round reason is QCReady in case both QC.round + 1 == new_round, otherwise @@ -310,16 +293,6 @@ impl RoundState { } } - pub fn process_delayed_qc_msg( - &mut self, - validator_verifier: &ValidatorVerifier, - msg: DelayedQcMsg, - ) -> VoteReceptionResult { - let DelayedQcMsg { vote } = msg; - self.pending_votes - .process_delayed_qc(validator_verifier, vote) - } - pub fn vote_sent(&self) -> Option { self.vote_sent.clone() } diff --git a/consensus/src/liveness/round_state_test.rs b/consensus/src/liveness/round_state_test.rs index 03f1d245359d1..ad2eec8809e53 100644 --- a/consensus/src/liveness/round_state_test.rs +++ b/consensus/src/liveness/round_state_test.rs @@ -8,7 +8,6 @@ use crate::{ }, util::mock_time_service::SimulatedTimeService, }; -use aptos_config::config::QcAggregatorType; use aptos_consensus_types::{ common::Round, quorum_cert::QuorumCert, @@ -23,7 +22,6 @@ use aptos_types::{ ledger_info::{LedgerInfo, LedgerInfoWithSignatures}, }; use futures::StreamExt; -use futures_channel::mpsc::unbounded; use std::{sync::Arc, time::Duration}; #[test] @@ -88,15 +86,8 @@ fn make_round_state() -> (RoundState, aptos_channels::Receiver) { let time_interval = Box::new(ExponentialTimeInterval::fixed(Duration::from_millis(2))); let simulated_time = SimulatedTimeService::auto_advance_until(Duration::from_millis(4)); let (timeout_tx, timeout_rx) = aptos_channels::new_test(1_024); - let (delayed_qc_tx, _) = unbounded(); ( - RoundState::new( - time_interval, - Arc::new(simulated_time), - timeout_tx, - delayed_qc_tx, - QcAggregatorType::NoDelay, - ), + RoundState::new(time_interval, Arc::new(simulated_time), timeout_tx), timeout_rx, ) } diff --git a/consensus/src/pending_votes.rs b/consensus/src/pending_votes.rs index ff8bc37a1ae70..05abc30dc63a2 100644 --- a/consensus/src/pending_votes.rs +++ b/consensus/src/pending_votes.rs @@ -8,15 +8,9 @@ //! when enough votes (or timeout votes) have been observed. //! Votes are automatically dropped when the structure goes out of scope. -use crate::{ - counters, - qc_aggregator::{create_qc_aggregator, QcAggregator}, - util::time_service::TimeService, -}; -use aptos_config::config::QcAggregatorType; +use crate::counters; use aptos_consensus_types::{ common::Author, - delayed_qc_msg::DelayedQcMsg, quorum_cert::QuorumCert, timeout_2chain::{TwoChainTimeoutCertificate, TwoChainTimeoutWithPartialSignatures}, vote::Vote, @@ -29,7 +23,6 @@ use aptos_types::{ ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::{ValidatorVerifier, VerifyError}, }; -use futures_channel::mpsc::UnboundedSender; use std::{ collections::{BTreeMap, HashMap}, fmt, @@ -43,9 +36,6 @@ pub enum VoteReceptionResult { /// The vote has been added but QC has not been formed yet. Return the amount of voting power /// QC currently has. VoteAdded(u128), - /// The vote has been added and we have gather enough voting power to form the QC but we have - /// delayed the QC to aggregate as many signatures as possible. - VoteAddedQCDelayed(u128), /// The very same vote message has been processed in past. DuplicateVote, /// The very same author has already voted for another proposal in this round (equivocation). @@ -79,23 +69,16 @@ pub struct PendingVotes { author_to_vote: HashMap, /// Whether we have echoed timeout for this round. echo_timeout: bool, - - qc_aggregator: Box, } impl PendingVotes { /// Creates an empty PendingVotes structure for a specific epoch and round - pub fn new( - time_service: Arc, - delayed_qc_tx: UnboundedSender, - qc_aggregator_type: QcAggregatorType, - ) -> Self { + pub fn new() -> Self { PendingVotes { li_digest_to_votes: HashMap::new(), maybe_partial_2chain_tc: None, author_to_vote: HashMap::new(), echo_timeout: false, - qc_aggregator: create_qc_aggregator(qc_aggregator_type, time_service, delayed_qc_tx), } } @@ -189,30 +172,37 @@ impl PendingVotes { li_with_sig.add_signature(vote.author(), vote.signature().clone()); // check if we have enough signatures to create a QC - let voting_power = - match validator_verifier.check_voting_power(li_with_sig.signatures().keys(), true) { - // a quorum of signature was reached, a new QC is formed - Ok(aggregated_voting_power) => { - return self.qc_aggregator.handle_aggregated_qc( - validator_verifier, - aggregated_voting_power, - vote, - li_with_sig, + let voting_power = match validator_verifier + .check_voting_power(li_with_sig.signatures().keys(), true) + { + // a quorum of signature was reached, a new QC is formed + Ok(aggregated_voting_power) => { + assert!( + aggregated_voting_power >= validator_verifier.quorum_voting_power(), + "QC aggregation should not be triggered if we don't have enough votes to form a QC" ); - }, + match li_with_sig.aggregate_signatures(validator_verifier) { + Ok(ledger_info_with_sig) => { + return VoteReceptionResult::NewQuorumCertificate(Arc::new( + QuorumCert::new(vote.vote_data().clone(), ledger_info_with_sig), + )) + }, + Err(e) => return VoteReceptionResult::ErrorAggregatingSignature(e), + } + }, - // not enough votes - Err(VerifyError::TooLittleVotingPower { voting_power, .. }) => voting_power, + // not enough votes + Err(VerifyError::TooLittleVotingPower { voting_power, .. }) => voting_power, - // error - Err(error) => { - error!( - "MUST_FIX: vote received could not be added: {}, vote: {}", - error, vote - ); - return VoteReceptionResult::ErrorAddingVote(error); - }, - }; + // error + Err(error) => { + error!( + "MUST_FIX: vote received could not be added: {}, vote: {}", + error, vote + ); + return VoteReceptionResult::ErrorAddingVote(error); + }, + }; // // 4. We couldn't form a QC, let's check if we can create a TC @@ -405,8 +395,6 @@ impl fmt::Display for PendingVotes { #[cfg(test)] mod tests { use super::{PendingVotes, VoteReceptionResult}; - use crate::util::mock_time_service::SimulatedTimeService; - use aptos_config::config::QcAggregatorType; use aptos_consensus_types::{ block::block_test_utils::certificate_for_genesis, vote::Vote, vote_data::VoteData, }; @@ -415,9 +403,7 @@ mod tests { block_info::BlockInfo, ledger_info::LedgerInfo, validator_verifier::random_validator_verifier, }; - use futures_channel::mpsc::unbounded; use itertools::Itertools; - use std::sync::Arc; /// Creates a random ledger info for epoch 1 and round 1. fn random_ledger_info() -> LedgerInfo { @@ -440,12 +426,7 @@ mod tests { // set up 4 validators let (signers, validator) = random_validator_verifier(4, Some(2), false); - let (delayed_qc_tx, _) = unbounded(); - let mut pending_votes = PendingVotes::new( - Arc::new(SimulatedTimeService::new()), - delayed_qc_tx, - QcAggregatorType::NoDelay, - ); + let mut pending_votes = PendingVotes::new(); // create random vote from validator[0] let li1 = random_ledger_info(); @@ -512,12 +493,7 @@ mod tests { // set up 4 validators let (signers, validator) = random_validator_verifier(4, None, false); - let (delayed_qc_tx, _) = unbounded(); - let mut pending_votes = PendingVotes::new( - Arc::new(SimulatedTimeService::new()), - delayed_qc_tx, - QcAggregatorType::NoDelay, - ); + let mut pending_votes = PendingVotes::new(); // submit a new vote from validator[0] -> VoteAdded let li0 = random_ledger_info(); diff --git a/consensus/src/qc_aggregator.rs b/consensus/src/qc_aggregator.rs deleted file mode 100644 index 2f695c651927b..0000000000000 --- a/consensus/src/qc_aggregator.rs +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright © Aptos Foundation -// Parts of the project are originally copyright © Meta Platforms, Inc. -// SPDX-License-Identifier: Apache-2.0 - -use crate::{ - pending_votes::{PendingVotes, VoteReceptionResult}, - util::time_service::TimeService, -}; -use aptos_config::config::{DelayedQcAggregatorConfig, QcAggregatorType}; -use aptos_consensus_types::{delayed_qc_msg::DelayedQcMsg, vote::Vote}; -use aptos_logger::{error, info}; -use aptos_types::{ - ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier, -}; -use futures::SinkExt; -use futures_channel::mpsc::UnboundedSender; -use std::{sync::Arc, time::Duration}; -use tokio::time::sleep; - -pub trait QcAggregator: Send + Sync { - fn handle_aggregated_qc( - &mut self, - validator_verifier: &ValidatorVerifier, - aggregated_voting_power: u128, - vote: &Vote, - li_with_sig: &LedgerInfoWithPartialSignatures, - ) -> VoteReceptionResult; -} - -struct NoDelayQcAggregator {} - -pub fn create_qc_aggregator( - qc_aggregator_type: QcAggregatorType, - time_service: Arc, - delayed_qc_tx: UnboundedSender, -) -> Box { - match qc_aggregator_type { - QcAggregatorType::NoDelay => Box::new(NoDelayQcAggregator {}), - QcAggregatorType::Delayed(delay_config) => { - let DelayedQcAggregatorConfig { - max_delay_after_round_start_ms, - aggregated_voting_power_pct_to_wait, - pct_delay_after_qc_aggregated, - } = delay_config; - Box::new(DelayedQcAggregator::new( - Duration::from_millis(max_delay_after_round_start_ms), - aggregated_voting_power_pct_to_wait, - pct_delay_after_qc_aggregated, - time_service, - delayed_qc_tx, - )) - }, - } -} - -impl QcAggregator for NoDelayQcAggregator { - fn handle_aggregated_qc( - &mut self, - validator_verifier: &ValidatorVerifier, - aggregated_voting_power: u128, - vote: &Vote, - li_with_sig: &LedgerInfoWithPartialSignatures, - ) -> VoteReceptionResult { - assert!( - aggregated_voting_power >= validator_verifier.quorum_voting_power(), - "QC aggregation should not be triggered if we don't have enough votes to form a QC" - ); - PendingVotes::aggregate_qc_now(validator_verifier, li_with_sig, vote.vote_data()) - } -} - -struct DelayedQcAggregator { - round_start_time: Duration, - max_delay_after_round_start: Duration, - aggregated_voting_power_pct_to_wait: usize, - pct_delay_after_qc_aggregated: usize, - time_service: Arc, - // True, if we already have enough vote to aggregate a QC, but we have trigged a delayed QC - // aggregation event to collect as many votes as possible. - qc_aggregation_delayed: bool, - // To send delayed QC aggregation events to the round manager. - delayed_qc_tx: UnboundedSender, -} - -impl DelayedQcAggregator { - pub fn new( - max_delay_after_round_start: Duration, - aggregated_voting_power_pct_to_wait: usize, - pct_delay_after_qc_aggregated: usize, - time_service: Arc, - delayed_qc_tx: UnboundedSender, - ) -> Self { - let round_start_time = time_service.get_current_timestamp(); - Self { - round_start_time, - max_delay_after_round_start, - aggregated_voting_power_pct_to_wait, - pct_delay_after_qc_aggregated, - time_service, - qc_aggregation_delayed: false, - delayed_qc_tx, - } - } -} - -impl QcAggregator for DelayedQcAggregator { - fn handle_aggregated_qc( - &mut self, - validator_verifier: &ValidatorVerifier, - aggregated_voting_power: u128, - vote: &Vote, - li_with_sig: &LedgerInfoWithPartialSignatures, - ) -> VoteReceptionResult { - assert!( - aggregated_voting_power >= validator_verifier.quorum_voting_power(), - "QC aggregation should not be triggered if we don't have enough votes to form a QC" - ); - let current_time = self.time_service.get_current_timestamp(); - - // If we have reached the aggregated voting power threshold, we should aggregate the QC now. - if aggregated_voting_power - >= self.aggregated_voting_power_pct_to_wait as u128 - * validator_verifier.total_voting_power() - / 100 - { - // Voting power is u128 so there is no overflow here. - info!( - "QC aggregation triggered by aggregated voting power: {}", - aggregated_voting_power - ); - return PendingVotes::aggregate_qc_now( - validator_verifier, - li_with_sig, - vote.vote_data(), - ); - } - - // If we have not reached the aggregated voting power threshold and have - // already triggered a delayed QC aggregation event, we should not trigger another - // one. - if self.qc_aggregation_delayed { - return VoteReceptionResult::VoteAddedQCDelayed(aggregated_voting_power); - } - - let time_since_round_start = current_time - self.round_start_time; - if time_since_round_start >= self.max_delay_after_round_start { - info!( - "QC aggregation triggered by time: {} ms", - time_since_round_start.as_millis() - ); - return PendingVotes::aggregate_qc_now( - validator_verifier, - li_with_sig, - vote.vote_data(), - ); - } - - let wait_time = (self.max_delay_after_round_start - time_since_round_start) - .min(time_since_round_start * self.pct_delay_after_qc_aggregated as u32 / 100); - - let delayed_qc_event = DelayedQcMsg::new(vote.clone()); - self.qc_aggregation_delayed = true; - - let mut delayed_qc_sender = self.delayed_qc_tx.clone(); - - info!( - "QC aggregation delayed by {} ms, wait time: {} ms", - time_since_round_start.as_millis(), - wait_time.as_millis() - ); - - tokio::spawn(async move { - sleep(wait_time).await; - if let Err(e) = delayed_qc_sender.send(delayed_qc_event).await { - error!("Failed to send event to round manager {:?}", e); - } - }); - - VoteReceptionResult::VoteAddedQCDelayed(aggregated_voting_power) - } -} diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index bd7be5172d775..f423d93d1e0ff 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -39,7 +39,6 @@ use aptos_consensus_types::{ block::Block, block_data::BlockType, common::{Author, Round}, - delayed_qc_msg::DelayedQcMsg, order_vote_msg::OrderVoteMsg, proof_of_store::{ProofCache, ProofOfStoreMsg, SignedBatchInfoMsg}, proposal_msg::ProposalMsg, @@ -70,7 +69,6 @@ use aptos_types::{ }; use fail::fail_point; use futures::{channel::oneshot, stream::FuturesUnordered, Future, FutureExt, StreamExt}; -use futures_channel::mpsc::UnboundedReceiver; use lru::LruCache; use serde::Serialize; use std::{mem::Discriminant, pin::Pin, sync::Arc, time::Duration}; @@ -591,25 +589,6 @@ impl RoundManager { self.process_verified_proposal(proposal).await } - pub async fn process_delayed_qc_msg(&mut self, msg: DelayedQcMsg) -> anyhow::Result<()> { - ensure!( - msg.vote.vote_data().proposed().round() == self.round_state.current_round(), - "Discarding stale delayed QC for round {}, current round {}", - msg.vote.vote_data().proposed().round(), - self.round_state.current_round() - ); - let vote = msg.vote().clone(); - let vote_reception_result = self - .round_state - .process_delayed_qc_msg(&self.epoch_state.verifier, msg); - trace!( - "Received delayed QC message and vote reception result is {:?}", - vote_reception_result - ); - self.process_vote_reception_result(&vote, vote_reception_result) - .await - } - /// Sync to the sync info sending from peer if it has newer certificates. async fn sync_up(&mut self, sync_info: &SyncInfo, author: Author) -> anyhow::Result<()> { let local_sync_info = self.block_store.sync_info(); @@ -1330,9 +1309,7 @@ impl RoundManager { PROPOSAL_VOTE_ADDED.inc(); Ok(()) }, - VoteReceptionResult::VoteAddedQCDelayed(_) - | VoteReceptionResult::EchoTimeout(_) - | VoteReceptionResult::DuplicateVote => Ok(()), + VoteReceptionResult::EchoTimeout(_) | VoteReceptionResult::DuplicateVote => Ok(()), e => Err(anyhow::anyhow!("{:?}", e)), } } @@ -1511,7 +1488,6 @@ impl RoundManager { (Author, VerifiedEvent), >, mut buffered_proposal_rx: aptos_channel::Receiver, - mut delayed_qc_rx: UnboundedReceiver, close_rx: oneshot::Receiver>, ) { info!(epoch = self.epoch_state().epoch, "RoundManager started"); @@ -1524,19 +1500,6 @@ impl RoundManager { ack_sender.send(()).expect("[RoundManager] Fail to ack shutdown"); } break; - } - delayed_qc_msg = delayed_qc_rx.select_next_some() => { - let result = monitor!( - "process_delayed_qc", - self.process_delayed_qc_msg(delayed_qc_msg).await - ); - match result { - Ok(_) => trace!(RoundStateLogSchema::new(self.round_state())), - Err(e) => { - counters::ERROR_COUNT.inc(); - warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(self.round_state())); - } - } }, proposal = buffered_proposal_rx.select_next_some() => { let mut proposals = vec![proposal]; diff --git a/consensus/src/round_manager_fuzzing.rs b/consensus/src/round_manager_fuzzing.rs index ab7a14740624f..2eefa70b0752d 100644 --- a/consensus/src/round_manager_fuzzing.rs +++ b/consensus/src/round_manager_fuzzing.rs @@ -24,10 +24,7 @@ use crate::{ util::{mock_time_service::SimulatedTimeService, time_service::TimeService}, }; use aptos_channels::{self, aptos_channel, message_queues::QueueStyle}; -use aptos_config::{ - config::{ConsensusConfig, QcAggregatorType}, - network_id::NetworkId, -}; +use aptos_config::{config::ConsensusConfig, network_id::NetworkId}; use aptos_consensus_types::{proposal_msg::ProposalMsg, utils::PayloadTxnsSize}; use aptos_infallible::Mutex; use aptos_network::{ @@ -50,7 +47,6 @@ use aptos_types::{ validator_verifier::ValidatorVerifier, }; use futures::{channel::mpsc, executor::block_on}; -use futures_channel::mpsc::unbounded; use maplit::hashmap; use once_cell::sync::Lazy; use std::{sync::Arc, time::Duration}; @@ -113,16 +109,9 @@ fn create_round_state() -> RoundState { let base_timeout = std::time::Duration::new(60, 0); let time_interval = Box::new(ExponentialTimeInterval::fixed(base_timeout)); let (round_timeout_sender, _) = aptos_channels::new_test(1_024); - let (delayed_qc_tx, _) = unbounded(); let time_service = Arc::new(SimulatedTimeService::new()); - RoundState::new( - time_interval, - time_service, - round_timeout_sender, - delayed_qc_tx, - QcAggregatorType::NoDelay, - ) + RoundState::new(time_interval, time_service, round_timeout_sender) } // Creates an RoundManager for fuzzing diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs index a01fef7b06bab..c12e476a7f56c 100644 --- a/consensus/src/round_manager_test.rs +++ b/consensus/src/round_manager_test.rs @@ -30,7 +30,7 @@ use crate::{ }; use aptos_channels::{self, aptos_channel, message_queues::QueueStyle}; use aptos_config::{ - config::{ConsensusConfig, QcAggregatorType}, + config::ConsensusConfig, network_id::{NetworkId, PeerNetworkId}, }; use aptos_consensus_types::{ @@ -83,7 +83,6 @@ use futures::{ stream::select, FutureExt, Stream, StreamExt, }; -use futures_channel::mpsc::unbounded; use maplit::hashmap; use std::{ iter::FromIterator, @@ -124,14 +123,7 @@ impl NodeSetup { let base_timeout = Duration::new(60, 0); let time_interval = Box::new(ExponentialTimeInterval::fixed(base_timeout)); let (round_timeout_sender, _) = aptos_channels::new_test(1_024); - let (delayed_qc_tx, _) = unbounded(); - RoundState::new( - time_interval, - time_service, - round_timeout_sender, - delayed_qc_tx, - QcAggregatorType::NoDelay, - ) + RoundState::new(time_interval, time_service, round_timeout_sender) } fn create_proposer_election(proposers: Vec) -> Arc { From 065d7602cf402188c8395ec105265c640d8e7c50 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 16:42:30 -0700 Subject: [PATCH 35/36] Changing names --- consensus/consensus-types/src/block_test.rs | 6 +++--- consensus/consensus-types/src/timeout_2chain.rs | 4 ++-- consensus/safety-rules/src/test_utils.rs | 4 ++-- consensus/src/liveness/round_state.rs | 4 ++-- consensus/src/pending_order_votes.rs | 6 +++--- consensus/src/pending_votes.rs | 10 +++++----- consensus/src/pipeline/buffer_item.rs | 8 ++++---- types/src/ledger_info.rs | 14 +++++++------- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/consensus/consensus-types/src/block_test.rs b/consensus/consensus-types/src/block_test.rs index bc33ddec8bc5a..54ece0539e2c8 100644 --- a/consensus/consensus-types/src/block_test.rs +++ b/consensus/consensus-types/src/block_test.rs @@ -17,7 +17,7 @@ use aptos_types::{ account_address::AccountAddress, aggregate_signature::PartialSignatures, block_info::{BlockInfo, Round}, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures}, on_chain_config::ValidatorSet, validator_signer::ValidatorSigner, validator_verifier::{random_validator_verifier, ValidatorVerifier}, @@ -131,7 +131,7 @@ fn test_same_qc_different_authors() { .unwrap(); let signature = signer.sign(genesis_qc.ledger_info().ledger_info()).unwrap(); - let mut ledger_info_altered = LedgerInfoWithPartialSignatures::new( + let mut ledger_info_altered = LedgerInfoWithVerifiedSignatures::new( genesis_qc.ledger_info().ledger_info().clone(), PartialSignatures::empty(), ); @@ -201,7 +201,7 @@ fn test_block_metadata_bitvec() { ); let mut ledger_info_1 = - LedgerInfoWithPartialSignatures::new(ledger_info.clone(), PartialSignatures::empty()); + LedgerInfoWithVerifiedSignatures::new(ledger_info.clone(), PartialSignatures::empty()); let votes_1 = vec![true, false, true, true]; votes_1 .iter() diff --git a/consensus/consensus-types/src/timeout_2chain.rs b/consensus/consensus-types/src/timeout_2chain.rs index c0d62edc6ffb3..87d35bc99bd92 100644 --- a/consensus/consensus-types/src/timeout_2chain.rs +++ b/consensus/consensus-types/src/timeout_2chain.rs @@ -406,7 +406,7 @@ mod tests { use aptos_types::{ aggregate_signature::PartialSignatures, block_info::BlockInfo, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures}, validator_verifier::random_validator_verifier, }; @@ -415,7 +415,7 @@ mod tests { let quorum_size = validators.quorum_voting_power() as usize; let generate_quorum = |round, num_of_signature| { let vote_data = VoteData::new(BlockInfo::random(round), BlockInfo::random(0)); - let mut ledger_info = LedgerInfoWithPartialSignatures::new( + let mut ledger_info = LedgerInfoWithVerifiedSignatures::new( LedgerInfo::new(BlockInfo::empty(), vote_data.hash()), PartialSignatures::empty(), ); diff --git a/consensus/safety-rules/src/test_utils.rs b/consensus/safety-rules/src/test_utils.rs index 07b9159c66a45..ce161c0a5fb14 100644 --- a/consensus/safety-rules/src/test_utils.rs +++ b/consensus/safety-rules/src/test_utils.rs @@ -24,7 +24,7 @@ use aptos_types::{ block_info::BlockInfo, epoch_change::EpochChangeProof, epoch_state::EpochState, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures}, on_chain_config::ValidatorSet, proof::AccumulatorExtensionProof, validator_info::ValidatorInfo, @@ -168,7 +168,7 @@ pub fn make_proposal_with_parent_and_overrides( ) .unwrap(); - let mut ledger_info_with_signatures = LedgerInfoWithPartialSignatures::new( + let mut ledger_info_with_signatures = LedgerInfoWithVerifiedSignatures::new( vote.ledger_info().clone(), PartialSignatures::empty(), ); diff --git a/consensus/src/liveness/round_state.rs b/consensus/src/liveness/round_state.rs index 0dc03ab88d105..74e78e9c9f024 100644 --- a/consensus/src/liveness/round_state.rs +++ b/consensus/src/liveness/round_state.rs @@ -14,7 +14,7 @@ use aptos_consensus_types::{ use aptos_crypto::HashValue; use aptos_logger::{prelude::*, Schema}; use aptos_types::{ - ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier, + ledger_info::LedgerInfoWithVerifiedSignatures, validator_verifier::ValidatorVerifier, }; use futures::future::AbortHandle; use serde::Serialize; @@ -45,7 +45,7 @@ pub struct NewRoundEvent { pub round: Round, pub reason: NewRoundReason, pub timeout: Duration, - pub prev_round_votes: Vec<(HashValue, LedgerInfoWithPartialSignatures)>, + pub prev_round_votes: Vec<(HashValue, LedgerInfoWithVerifiedSignatures)>, pub prev_round_timeout_votes: Option, } diff --git a/consensus/src/pending_order_votes.rs b/consensus/src/pending_order_votes.rs index 94b1ba6d15451..46cf23cfe2b90 100644 --- a/consensus/src/pending_order_votes.rs +++ b/consensus/src/pending_order_votes.rs @@ -7,7 +7,7 @@ use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_logger::prelude::*; use aptos_types::{ aggregate_signature::PartialSignatures, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures}, validator_verifier::{ValidatorVerifier, VerifyError}, }; use std::{collections::HashMap, sync::Arc}; @@ -33,7 +33,7 @@ pub enum OrderVoteReceptionResult { #[derive(Debug, PartialEq, Eq)] enum OrderVoteStatus { EnoughVotes(LedgerInfoWithSignatures), - NotEnoughVotes(LedgerInfoWithPartialSignatures), + NotEnoughVotes(LedgerInfoWithVerifiedSignatures), } /// A PendingVotes structure keep track of order votes for the last few rounds @@ -75,7 +75,7 @@ impl PendingOrderVotes { verified_quorum_cert.expect( "Quorum Cert is expected when creating a new entry in pending order votes", ), - OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new( + OrderVoteStatus::NotEnoughVotes(LedgerInfoWithVerifiedSignatures::new( order_vote.ledger_info().clone(), PartialSignatures::empty(), )), diff --git a/consensus/src/pending_votes.rs b/consensus/src/pending_votes.rs index 05abc30dc63a2..b2177d2c5889a 100644 --- a/consensus/src/pending_votes.rs +++ b/consensus/src/pending_votes.rs @@ -20,7 +20,7 @@ use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_logger::prelude::*; use aptos_types::{ aggregate_signature::PartialSignatures, - ledger_info::LedgerInfoWithPartialSignatures, + ledger_info::LedgerInfoWithVerifiedSignatures, validator_verifier::{ValidatorVerifier, VerifyError}, }; use std::{ @@ -62,7 +62,7 @@ pub struct PendingVotes { /// This might keep multiple LedgerInfos for the current round: either due to different proposals (byzantine behavior) /// or due to different NIL proposals (clients can have a different view of what block to extend). li_digest_to_votes: - HashMap, + HashMap, /// Tracks all the signatures of the 2-chain timeout for the given round. maybe_partial_2chain_tc: Option, /// Map of Author to (vote, li_digest). This is useful to discard multiple votes. @@ -138,7 +138,7 @@ impl PendingVotes { // if the ledger info with signatures doesn't exist yet, create it ( len, - LedgerInfoWithPartialSignatures::new( + LedgerInfoWithVerifiedSignatures::new( vote.ledger_info().clone(), PartialSignatures::empty(), ), @@ -264,7 +264,7 @@ impl PendingVotes { pub fn aggregate_qc_now( validator_verifier: &ValidatorVerifier, - li_with_sig: &LedgerInfoWithPartialSignatures, + li_with_sig: &LedgerInfoWithVerifiedSignatures, vote_data: &VoteData, ) -> VoteReceptionResult { match li_with_sig.aggregate_signatures(validator_verifier) { @@ -317,7 +317,7 @@ impl PendingVotes { pub fn drain_votes( &mut self, ) -> ( - Vec<(HashValue, LedgerInfoWithPartialSignatures)>, + Vec<(HashValue, LedgerInfoWithVerifiedSignatures)>, Option, ) { for (hash_index, _) in self.li_digest_to_votes.values() { diff --git a/consensus/src/pipeline/buffer_item.rs b/consensus/src/pipeline/buffer_item.rs index 46e92047d3d18..3d06658cd4323 100644 --- a/consensus/src/pipeline/buffer_item.rs +++ b/consensus/src/pipeline/buffer_item.rs @@ -16,7 +16,7 @@ use aptos_reliable_broadcast::DropGuard; use aptos_types::{ aggregate_signature::PartialSignatures, block_info::BlockInfo, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures}, validator_verifier::ValidatorVerifier, }; use futures::future::BoxFuture; @@ -68,7 +68,7 @@ fn generate_executed_item_from_ordered( order_vote_enabled: bool, ) -> BufferItem { debug!("{} advance to executed from ordered", commit_info); - let partial_commit_proof = LedgerInfoWithPartialSignatures::new( + let partial_commit_proof = LedgerInfoWithVerifiedSignatures::new( generate_commit_ledger_info(&commit_info, &ordered_proof, order_vote_enabled), verified_signatures, ); @@ -106,7 +106,7 @@ pub struct OrderedItem { pub struct ExecutedItem { pub executed_blocks: Vec, - pub partial_commit_proof: LedgerInfoWithPartialSignatures, + pub partial_commit_proof: LedgerInfoWithVerifiedSignatures, pub callback: StateComputerCommitCallBackType, pub commit_info: BlockInfo, pub ordered_proof: LedgerInfoWithSignatures, @@ -114,7 +114,7 @@ pub struct ExecutedItem { pub struct SignedItem { pub executed_blocks: Vec, - pub partial_commit_proof: LedgerInfoWithPartialSignatures, + pub partial_commit_proof: LedgerInfoWithVerifiedSignatures, pub callback: StateComputerCommitCallBackType, pub commit_vote: CommitVote, pub rb_handle: Option<(Instant, DropGuard)>, diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 18aff380a54f9..7e0da229864e0 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -326,18 +326,18 @@ pub enum VerificationStatus { /// Contains the ledger info and partially aggregated signature from a set of validators, this data /// is only used during the aggregating the votes from different validators and is not persisted in DB. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct LedgerInfoWithPartialSignatures { +pub struct LedgerInfoWithVerifiedSignatures { ledger_info: LedgerInfo, partial_sigs: PartialSignatures, } -impl Display for LedgerInfoWithPartialSignatures { +impl Display for LedgerInfoWithVerifiedSignatures { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "{}", self.ledger_info) } } -impl LedgerInfoWithPartialSignatures { +impl LedgerInfoWithVerifiedSignatures { pub fn new(ledger_info: LedgerInfo, signatures: PartialSignatures) -> Self { Self { ledger_info, @@ -393,7 +393,7 @@ pub enum SignatureWithStatus { /// verify the aggregated signature at once. If the aggregated signature is invalid, then we verify each individual /// unverified signature and remove the invalid signatures. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct LedgerInfoWithMixedSignatures { +pub struct LedgerInfoWithUnverifiedSignatures { ledger_info: LedgerInfo, // These signatures are not yet verified. For efficiency, once enough unverified signatures are collected, // they will be aggregated and verified. @@ -401,13 +401,13 @@ pub struct LedgerInfoWithMixedSignatures { verified_signatures: PartialSignatures, } -impl Display for LedgerInfoWithMixedSignatures { +impl Display for LedgerInfoWithUnverifiedSignatures { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "{}", self.ledger_info) } } -impl LedgerInfoWithMixedSignatures { +impl LedgerInfoWithUnverifiedSignatures { pub fn new(ledger_info: LedgerInfo) -> Self { Self { ledger_info, @@ -691,7 +691,7 @@ mod tests { let epoch_state = Arc::new(EpochState::new(10, validator_verifier.clone())); let mut ledger_info_with_mixed_signatures = - LedgerInfoWithMixedSignatures::new(ledger_info.clone()); + LedgerInfoWithUnverifiedSignatures::new(ledger_info.clone()); let mut partial_sig = PartialSignatures::empty(); From 5ed8ad557de8026aa8592aed38ffafd69466e891 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 18 Sep 2024 23:31:45 -0700 Subject: [PATCH 36/36] Addressing PR comments --- types/src/aggregate_signature.rs | 6 +++--- types/src/ledger_info.rs | 21 ++++++++------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/types/src/aggregate_signature.rs b/types/src/aggregate_signature.rs index 3202583b6252d..24ac789da671e 100644 --- a/types/src/aggregate_signature.rs +++ b/types/src/aggregate_signature.rs @@ -86,12 +86,12 @@ impl PartialSignatures { self.signatures.is_empty() } - pub fn remove_signature(&mut self, validator: AccountAddress) { - self.signatures.remove(&validator); + pub fn remove_signature(&mut self, validator: AccountAddress) -> Option { + self.signatures.remove(&validator) } pub fn add_signature(&mut self, validator: AccountAddress, signature: bls12381::Signature) { - self.signatures.entry(validator).or_insert(signature); + self.signatures.insert(validator, signature); } pub fn signatures(&self) -> &BTreeMap { diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 7e0da229864e0..9f3aead311f81 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -318,11 +318,6 @@ impl LedgerInfoWithV0 { } } -pub enum VerificationStatus { - Verified, - Unverified, -} - /// Contains the ledger info and partially aggregated signature from a set of validators, this data /// is only used during the aggregating the votes from different validators and is not persisted in DB. #[derive(Clone, Debug, Eq, PartialEq)] @@ -437,9 +432,6 @@ impl LedgerInfoWithUnverifiedSignatures { if self.verified_signatures.contains_voter(&validator) { return; } - if self.unverified_signatures.contains_voter(&validator) { - self.unverified_signatures.remove_signature(validator); - } self.unverified_signatures .add_signature(validator, signature); } @@ -527,15 +519,18 @@ impl LedgerInfoWithUnverifiedSignatures { .verify(*account_address, self.ledger_info(), signature) .is_ok() { - return Some((*account_address, signature.clone())); + return Some(*account_address); } None }) .collect::>(); - for (account_address, signature) in verified { - self.verified_signatures - .add_signature(account_address, signature); - self.unverified_signatures.remove_signature(account_address); + for account_address in verified { + if let Some(signature) = + self.unverified_signatures.remove_signature(account_address) + { + self.verified_signatures + .add_signature(account_address, signature); + } } // For these authors, we will not use optimistic signature verification in the future.