diff --git a/Cargo.lock b/Cargo.lock index 6e0f8ca54acc8..17e87b5ee16bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,7 +262,7 @@ dependencies = [ [[package]] name = "aptos" -version = "4.1.0" +version = "4.2.0" dependencies = [ "anyhow", "aptos-api-types", @@ -4225,6 +4225,7 @@ dependencies = [ "claims", "coset", "criterion", + "dashmap", "derivative", "fixed", "fxhash", diff --git a/api/src/accounts.rs b/api/src/accounts.rs index 3ac9a13005e61..d94454f6b34e3 100644 --- a/api/src/accounts.rs +++ b/api/src/accounts.rs @@ -66,7 +66,7 @@ impl AccountsApi { let context = self.context.clone(); api_spawn_blocking(move || { - let account = Account::new(context, address.0, ledger_version.0, None, None, false)?; + let account = Account::new(context, address.0, ledger_version.0, None, None)?; account.account(&accept_type) }) .await @@ -118,7 +118,6 @@ impl AccountsApi { ledger_version.0, start.0.map(StateKey::from), limit.0, - true, )?; account.resources(&accept_type) }) @@ -171,7 +170,6 @@ impl AccountsApi { ledger_version.0, start.0.map(StateKey::from), limit.0, - true, )?; account.modules(&accept_type) }) @@ -201,24 +199,11 @@ impl Account { requested_ledger_version: Option, start: Option, limit: Option, - require_state_indices: bool, ) -> Result { - let sharding_enabled = context - .node_config - .storage - .rocksdb_configs - .enable_storage_sharding; - - let (latest_ledger_info, requested_version) = if sharding_enabled && require_state_indices { - context.get_latest_ledger_info_and_verify_internal_indexer_lookup_version( + let (latest_ledger_info, requested_version) = context + .get_latest_ledger_info_and_verify_lookup_version( requested_ledger_version.map(|inner| inner.0), - )? - } else { - // Use the latest ledger version, or the requested associated version - context.get_latest_ledger_info_and_verify_lookup_version( - requested_ledger_version.map(|inner| inner.0), - )? - }; + )?; Ok(Self { context, diff --git a/api/src/context.rs b/api/src/context.rs index aa9e59848544f..73b3c31b11d91 100644 --- a/api/src/context.rs +++ b/api/src/context.rs @@ -221,20 +221,26 @@ impl Context { .map_err(|e| e.into()) } - pub fn get_latest_ledger_info(&self) -> Result { + pub fn get_oldest_version_and_block_height( + &self, + ) -> Result<(Version, u64), E> { + self.db + .get_first_viable_block() + .context("Failed to retrieve oldest block information") + .map_err(|e| E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError)) + } + + pub fn get_latest_storage_ledger_info( + &self, + ) -> Result { let ledger_info = self .get_latest_ledger_info_with_signatures() .context("Failed to retrieve latest ledger info") .map_err(|e| { E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError) })?; - let (oldest_version, oldest_block_height) = self - .db - .get_first_viable_block() - .context("Failed to retrieve oldest block information") - .map_err(|e| { - E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError) - })?; + + let (oldest_version, oldest_block_height) = self.get_oldest_version_and_block_height()?; let (_, _, newest_block_event) = self .db .get_block_info_by_version(ledger_info.ledger_info().version()) @@ -252,32 +258,12 @@ impl Context { )) } - pub fn get_latest_ledger_info_and_verify_internal_indexer_lookup_version( - &self, - requested_ledger_version: Option, - ) -> Result<(LedgerInfo, Version), E> { - if self.indexer_reader.is_none() { - return Err(E::internal_with_code_no_info( - "Indexer reader doesn't exist", - AptosErrorCode::InternalError, - )); - } - - let (latest_ledger_info, latest_internal_indexer_ledger_version) = - self.get_latest_internal_indexer_ledger_version_and_main_db_info()?; - if let Some(version) = requested_ledger_version { - let request_ledger_version = Version::from(version); - if latest_internal_indexer_ledger_version < request_ledger_version { - return Err(version_not_found( - request_ledger_version, - &latest_ledger_info, - )); - } else if request_ledger_version < latest_ledger_info.oldest_ledger_version.0 { - return Err(version_pruned(request_ledger_version, &latest_ledger_info)); - } - Ok((latest_ledger_info, request_ledger_version)) + pub fn get_latest_ledger_info(&self) -> Result { + if self.indexer_reader.is_some() { + let ledger_info = self.get_latest_internal_indexer_ledger_version_and_ledger_info()?; + Ok(ledger_info) } else { - Ok((latest_ledger_info, latest_internal_indexer_ledger_version)) + self.get_latest_storage_ledger_info() } } @@ -306,20 +292,42 @@ impl Context { Ok((latest_ledger_info, requested_ledger_version)) } - pub fn get_latest_internal_indexer_ledger_version_and_main_db_info( + pub fn get_latest_internal_indexer_ledger_version_and_ledger_info< + E: ServiceUnavailableError, + >( &self, - ) -> Result<(LedgerInfo, Version), E> { + ) -> Result { if let Some(indexer_reader) = self.indexer_reader.as_ref() { if let Some(latest_version) = indexer_reader .get_latest_internal_indexer_ledger_version() - .map_err(|err| E::internal_with_code_no_info(err, AptosErrorCode::InternalError))? + .map_err(|err| { + E::service_unavailable_with_code_no_info(err, AptosErrorCode::InternalError) + })? { - let latest_ledger_info = self.get_latest_ledger_info()?; - return Ok((latest_ledger_info, latest_version)); + let (_, _, new_block_event) = self + .db + .get_block_info_by_version(latest_version) + .map_err(|_| { + E::service_unavailable_with_code_no_info( + "Failed to get block", + AptosErrorCode::InternalError, + ) + })?; + let (oldest_version, oldest_block_height) = + self.get_oldest_version_and_block_height()?; + return Ok(LedgerInfo::new_ledger_info( + &self.chain_id(), + new_block_event.epoch(), + latest_version, + oldest_version, + oldest_block_height, + new_block_event.height(), + new_block_event.proposed_time(), + )); } } - Err(E::internal_with_code_no_info( + Err(E::service_unavailable_with_code_no_info( "Indexer reader doesn't exist, or doesn't have data.", AptosErrorCode::InternalError, )) diff --git a/api/src/events.rs b/api/src/events.rs index 5c9266df373b8..49c4fad21ce9f 100644 --- a/api/src/events.rs +++ b/api/src/events.rs @@ -77,7 +77,7 @@ impl EventsApi { // Ensure that account exists let api = self.clone(); api_spawn_blocking(move || { - let account = Account::new(api.context.clone(), address.0, None, None, None, true)?; + let account = Account::new(api.context.clone(), address.0, None, None, None)?; account.verify_account_or_object_resource()?; api.list( account.latest_ledger_info, @@ -144,7 +144,7 @@ impl EventsApi { let api = self.clone(); api_spawn_blocking(move || { - let account = Account::new(api.context.clone(), address.0, None, None, None, true)?; + let account = Account::new(api.context.clone(), address.0, None, None, None)?; let key = account.find_event_key(event_handle.0, field_name.0.into())?; api.list(account.latest_ledger_info, accept_type, page, key) }) diff --git a/api/src/index.rs b/api/src/index.rs index 94b5289636413..ba91cbb34c342 100644 --- a/api/src/index.rs +++ b/api/src/index.rs @@ -33,7 +33,6 @@ impl IndexApi { self.context .check_api_output_enabled("Get ledger info", &accept_type)?; let ledger_info = self.context.get_latest_ledger_info()?; - let node_role = self.context.node_role(); api_spawn_blocking(move || match accept_type { diff --git a/api/src/transactions.rs b/api/src/transactions.rs index 86a16b8a356bd..1e1214361961b 100644 --- a/api/src/transactions.rs +++ b/api/src/transactions.rs @@ -986,7 +986,7 @@ impl TransactionsApi { address: Address, ) -> BasicResultWith404> { // Verify the account exists - let account = Account::new(self.context.clone(), address, None, None, None, true)?; + let account = Account::new(self.context.clone(), address, None, None, None)?; account.get_account_resource()?; let latest_ledger_info = account.latest_ledger_info; diff --git a/api/types/src/ledger_info.rs b/api/types/src/ledger_info.rs index ef912190c94c9..97438ae104013 100644 --- a/api/types/src/ledger_info.rs +++ b/api/types/src/ledger_info.rs @@ -40,6 +40,26 @@ impl LedgerInfo { } } + pub fn new_ledger_info( + chain_id: &ChainId, + epoch: u64, + ledger_version: u64, + oldest_ledger_version: u64, + oldest_block_height: u64, + block_height: u64, + ledger_timestamp: u64, + ) -> Self { + Self { + chain_id: chain_id.id(), + epoch: epoch.into(), + ledger_version: ledger_version.into(), + oldest_ledger_version: oldest_ledger_version.into(), + block_height: block_height.into(), + oldest_block_height: oldest_block_height.into(), + ledger_timestamp: ledger_timestamp.into(), + } + } + pub fn epoch(&self) -> u64 { self.epoch.into() } diff --git a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs index 26557162fb157..1e7f1a4860103 100644 --- a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs +++ b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs @@ -50,15 +50,15 @@ crate::gas_schedule::macros::define_gas_parameters!( [mut_borrow_variant_field: InternalGas, { RELEASE_V1_18.. => "mut_borrow_variant_field" }, 835], [imm_borrow_variant_field_generic: InternalGas, - { RELEASE_V1_18 => "imm_borrow_variant_field_generic" }, 835], + { RELEASE_V1_18.. => "imm_borrow_variant_field_generic" }, 835], [mut_borrow_variant_field_generic: InternalGas, - { RELEASE_V1_18 => "mut_borrow_variant_field_generic" }, 835], + { RELEASE_V1_18.. => "mut_borrow_variant_field_generic" }, 835], // variant testing [test_variant: InternalGas, - { RELEASE_V1_18 => "test_variant" }, 535], + { RELEASE_V1_18.. => "test_variant" }, 535], [test_variant_generic: InternalGas, - { RELEASE_V1_18 => "test_variant_generic" }, 535], + { RELEASE_V1_18.. => "test_variant_generic" }, 535], // locals [copy_loc_base: InternalGas, "copy_loc.base", 294], diff --git a/aptos-move/aptos-gas-schedule/src/ver.rs b/aptos-move/aptos-gas-schedule/src/ver.rs index f8b5d7617151c..2df67131a21f7 100644 --- a/aptos-move/aptos-gas-schedule/src/ver.rs +++ b/aptos-move/aptos-gas-schedule/src/ver.rs @@ -69,7 +69,7 @@ /// global operations. /// - V1 /// - TBA -pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_18; +pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_20; pub mod gas_feature_versions { pub const RELEASE_V1_8: u64 = 11; diff --git a/aptos-move/framework/aptos-framework/doc/object.md b/aptos-move/framework/aptos-framework/doc/object.md index bba128592ff31..f3dae60d94a88 100644 --- a/aptos-move/framework/aptos-framework/doc/object.md +++ b/aptos-move/framework/aptos-framework/doc/object.md @@ -604,6 +604,16 @@ generate_unique_address uses this for domain separation within its native implem + + +Objects cannot be burnt + + +
const EBURN_NOT_ALLOWED: u64 = 10;
+
+ + + The object does not allow for deletion @@ -2130,12 +2140,13 @@ objects may have cyclic dependencies. ## Function `burn` -Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. -This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. -Original owners can reclaim burnt objects any time in the future by calling unburn. +Previously allowed to burn objects, has now been disabled. Objects can still be unburnt. +Please use the test only [object::burn_object] for testing with previously burned objects. -
public entry fun burn<T: key>(owner: &signer, object: object::Object<T>)
+
+
#[deprecated]
+public entry fun burn<T: key>(_owner: &signer, _object: object::Object<T>)
 
@@ -2144,12 +2155,8 @@ Original owners can reclaim burnt objects any time in the future by calling unbu Implementation -
public entry fun burn<T: key>(owner: &signer, object: Object<T>) acquires ObjectCore {
-    let original_owner = signer::address_of(owner);
-    assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER));
-    let object_addr = object.inner;
-    move_to(&create_signer(object_addr), TombStone { original_owner });
-    transfer_raw_inner(object_addr, BURN_ADDRESS);
+
public entry fun burn<T: key>(_owner: &signer, _object: Object<T>) {
+    abort error::permission_denied(EBURN_NOT_ALLOWED)
 }
 
@@ -2441,6 +2448,33 @@ to determine the identity of the starting point of ownership. + + + + +
fun spec_create_object_address(source: address, seed: vector<u8>): address;
+
+ + + + + + + +
fun spec_create_user_derived_object_address(source: address, derive_from: address): address;
+
+ + + + + + + +
fun spec_create_guid_object_address(source: address, creation_num: u64): address;
+
+ + + ### Function `address_to_object` @@ -3245,17 +3279,14 @@ to determine the identity of the starting point of ownership. ### Function `burn` -
public entry fun burn<T: key>(owner: &signer, object: object::Object<T>)
+
#[deprecated]
+public entry fun burn<T: key>(_owner: &signer, _object: object::Object<T>)
 
-
pragma aborts_if_is_partial;
-let object_address = object.inner;
-aborts_if !exists<ObjectCore>(object_address);
-aborts_if owner(object) != signer::address_of(owner);
-aborts_if is_burnt(object);
+
aborts_if true;
 
@@ -3368,31 +3399,4 @@ to determine the identity of the starting point of ownership.
- - - - - -
fun spec_create_object_address(source: address, seed: vector<u8>): address;
-
- - - - - - - -
fun spec_create_user_derived_object_address(source: address, derive_from: address): address;
-
- - - - - - - -
fun spec_create_guid_object_address(source: address, creation_num: u64): address;
-
- - [move-book]: https://aptos.dev/move/book/SUMMARY diff --git a/aptos-move/framework/aptos-framework/sources/object.move b/aptos-move/framework/aptos-framework/sources/object.move index 6e809e87e8736..c03914fb7675c 100644 --- a/aptos-move/framework/aptos-framework/sources/object.move +++ b/aptos-move/framework/aptos-framework/sources/object.move @@ -50,6 +50,8 @@ module aptos_framework::object { const EOBJECT_NOT_BURNT: u64 = 8; /// Object is untransferable any operations that might result in a transfer are disallowed. const EOBJECT_NOT_TRANSFERRABLE: u64 = 9; + /// Objects cannot be burnt + const EBURN_NOT_ALLOWED: u64 = 10; /// Explicitly separate the GUID space between Object and Account to prevent accidental overlap. const INIT_GUID_CREATION_NUM: u64 = 0x4000000000000; @@ -610,15 +612,12 @@ module aptos_framework::object { }; } - /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. - /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. - /// Original owners can reclaim burnt objects any time in the future by calling unburn. - public entry fun burn(owner: &signer, object: Object) acquires ObjectCore { - let original_owner = signer::address_of(owner); - assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER)); - let object_addr = object.inner; - move_to(&create_signer(object_addr), TombStone { original_owner }); - transfer_raw_inner(object_addr, BURN_ADDRESS); + #[deprecated] + /// Previously allowed to burn objects, has now been disabled. Objects can still be unburnt. + /// + /// Please use the test only [`object::burn_object`] for testing with previously burned objects. + public entry fun burn(_owner: &signer, _object: Object) { + abort error::permission_denied(EBURN_NOT_ALLOWED) } /// Allow origin owners to reclaim any objects they previous burnt. @@ -705,6 +704,20 @@ module aptos_framework::object { #[test_only] const EWEAPON_DOES_NOT_EXIST: u64 = 0x101; + #[test_only] + /// For testing the previous behavior of `object::burn()` + /// + /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed. + /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects. + /// Original owners can reclaim burnt objects any time in the future by calling unburn. + public fun burn_object(owner: &signer, object: Object) acquires ObjectCore { + let original_owner = signer::address_of(owner); + assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER)); + let object_addr = object.inner; + move_to(&create_signer(object_addr), TombStone { original_owner }); + transfer_raw_inner(object_addr, BURN_ADDRESS); + } + #[test_only] struct HeroEquipEvent has drop, store { weapon_id: Option>, @@ -820,7 +833,7 @@ module aptos_framework::object { #[expected_failure(abort_code = 0x10008, location = Self)] fun test_cannot_unburn_after_transfer_with_ref(creator: &signer) acquires ObjectCore, TombStone { let (hero_constructor, hero) = create_hero(creator); - burn(creator, hero); + burn_object(creator, hero); let transfer_ref = generate_transfer_ref(&hero_constructor); transfer_with_ref(generate_linear_transfer_ref(&transfer_ref), @0x456); unburn(creator, hero); @@ -876,7 +889,7 @@ module aptos_framework::object { disable_ungated_transfer(&transfer_ref); // Owner should be able to burn, despite ungated transfer disallowed. - burn(creator, hero); + burn_object(creator, hero); assert!(owner(hero) == BURN_ADDRESS, 0); assert!(!ungated_transfer_allowed(hero), 0); @@ -897,7 +910,7 @@ module aptos_framework::object { // Owner should be not be able to burn weapon directly. assert!(owner(weapon) == object_address(&hero), 0); assert!(owns(weapon, signer::address_of(creator)), 0); - burn(creator, weapon); + burn_object(creator, weapon); } #[test(creator = @0x123)] @@ -907,6 +920,13 @@ module aptos_framework::object { unburn(creator, hero); } + #[test(creator = @0x123)] + #[expected_failure(abort_code = 0x5000A, location = Self)] + fun test_burn_should_fail(creator: &signer) acquires ObjectCore { + let (_, hero) = create_hero(creator); + burn(creator, hero); + } + #[test_only] fun create_simple_object(creator: &signer, seed: vector): Object { object_from_constructor_ref(&create_named_object(creator, seed)) diff --git a/aptos-move/framework/aptos-framework/sources/object.spec.move b/aptos-move/framework/aptos-framework/sources/object.spec.move index d2627d649fd61..51ae05b568368 100644 --- a/aptos-move/framework/aptos-framework/sources/object.spec.move +++ b/aptos-move/framework/aptos-framework/sources/object.spec.move @@ -475,7 +475,11 @@ spec aptos_framework::object { aborts_if !global(object_address).allow_ungated_transfer; } - spec burn(owner: &signer, object: Object) { + spec burn(_owner: &signer, _object: Object) { + aborts_if true; + } + + spec burn_object(owner: &signer, object: Object) { pragma aborts_if_is_partial; let object_address = object.inner; aborts_if !exists(object_address); diff --git a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move index fc20e1cf311a6..9e39b97fa2854 100644 --- a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move +++ b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move @@ -372,7 +372,7 @@ module aptos_framework::primary_fungible_store { // User 2 burns their primary store but should still be able to transfer afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); // Balance still works assert!(balance(user_2_address, metadata) == 80, 0); @@ -396,7 +396,7 @@ module aptos_framework::primary_fungible_store { // User 2 burns their primary store but should still be able to withdraw afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); let coins = withdraw(user_2, metadata, 70); assert!(balance(user_2_address, metadata) == 10, 0); diff --git a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move index 1b80c489024e5..d069923a5f8ef 100644 --- a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move +++ b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move @@ -28,7 +28,7 @@ module aptos_framework::simple_token_pfs_tests { // User 2 burns their primary store but should still be able to transfer afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); // Balance still works assert!(balance(user_2_address, metadata) == 80, 0); @@ -54,7 +54,7 @@ module aptos_framework::simple_token_pfs_tests { // User 2 burns their primary store but should still be able to withdraw afterward. let user_2_primary_store = primary_store(user_2_address, metadata); - object::burn(user_2, user_2_primary_store); + object::burn_object(user_2, user_2_primary_store); assert!(object::is_burnt(user_2_primary_store), 0); let coins = withdraw(user_2, metadata, 70); assert!(balance(user_2_address, metadata) == 10, 0); diff --git a/aptos-move/framework/src/module_metadata.rs b/aptos-move/framework/src/module_metadata.rs index e0dc1d36b4fa6..18a6178e23bc6 100644 --- a/aptos-move/framework/src/module_metadata.rs +++ b/aptos-move/framework/src/module_metadata.rs @@ -624,11 +624,23 @@ fn check_module_complexity(module: &CompiledModule) -> Result<(), MetaDataValida check_ident_complexity(module, &mut meter, handle.name)?; } for def in module.struct_defs() { - if let StructFieldInformation::Declared(fields) = &def.field_information { - for field in fields { - check_ident_complexity(module, &mut meter, field.name)?; - check_sigtok_complexity(module, &mut meter, &field.signature.0)? - } + match &def.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fields) => { + for field in fields { + check_ident_complexity(module, &mut meter, field.name)?; + check_sigtok_complexity(module, &mut meter, &field.signature.0)? + } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + check_ident_complexity(module, &mut meter, variant.name)?; + for field in &variant.fields { + check_ident_complexity(module, &mut meter, field.name)?; + check_sigtok_complexity(module, &mut meter, &field.signature.0)? + } + } + }, } } for def in module.function_defs() { diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 74ac16174b674..75338671dd31d 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -90,6 +90,7 @@ pub struct ConsensusConfig { pub rand_rb_config: ReliableBroadcastConfig, pub num_bounded_executor_tasks: u64, pub enable_pre_commit: bool, + pub max_pending_rounds_in_commit_vote_cache: u64, pub optimistic_sig_verification_for_votes: bool, pub optimistic_sig_verification_for_order_votes: bool, } @@ -319,6 +320,7 @@ impl Default for ConsensusConfig { }, num_bounded_executor_tasks: 16, enable_pre_commit: true, + max_pending_rounds_in_commit_vote_cache: 100, optimistic_sig_verification_for_votes: true, optimistic_sig_verification_for_order_votes: true, } diff --git a/config/src/config/consensus_observer_config.rs b/config/src/config/consensus_observer_config.rs index 8d930cf17c8d3..02d8572134950 100644 --- a/config/src/config/consensus_observer_config.rs +++ b/config/src/config/consensus_observer_config.rs @@ -9,8 +9,8 @@ use serde::{Deserialize, Serialize}; use serde_yaml::Value; // Useful constants for enabling consensus observer on different node types -const ENABLE_ON_VALIDATORS: bool = false; -const ENABLE_ON_VALIDATOR_FULLNODES: bool = false; +const ENABLE_ON_VALIDATORS: bool = true; +const ENABLE_ON_VALIDATOR_FULLNODES: bool = true; const ENABLE_ON_PUBLIC_FULLNODES: bool = false; #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)] @@ -30,6 +30,8 @@ pub struct ConsensusObserverConfig { /// Interval (in milliseconds) to garbage collect peer state pub garbage_collection_interval_ms: u64, + /// The maximum number of concurrent subscriptions + pub max_concurrent_subscriptions: u64, /// Maximum number of blocks to keep in memory (e.g., pending blocks, ordered blocks, etc.) pub max_num_pending_blocks: u64, /// Maximum timeout (in milliseconds) for active subscriptions @@ -52,8 +54,9 @@ impl Default for ConsensusObserverConfig { publisher_enabled: false, max_network_channel_size: 1000, max_parallel_serialization_tasks: num_cpus::get(), // Default to the number of CPUs - network_request_timeout_ms: 10_000, // 10 seconds + network_request_timeout_ms: 5_000, // 5 seconds garbage_collection_interval_ms: 60_000, // 60 seconds + max_concurrent_subscriptions: 2, // 2 streams should be sufficient max_num_pending_blocks: 100, // 100 blocks max_subscription_timeout_ms: 30_000, // 30 seconds max_synced_version_timeout_ms: 60_000, // 60 seconds diff --git a/consensus/consensus-types/src/block_test.rs b/consensus/consensus-types/src/block_test.rs index bc33ddec8bc5a..54ece0539e2c8 100644 --- a/consensus/consensus-types/src/block_test.rs +++ b/consensus/consensus-types/src/block_test.rs @@ -17,7 +17,7 @@ use aptos_types::{ account_address::AccountAddress, aggregate_signature::PartialSignatures, block_info::{BlockInfo, Round}, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures}, on_chain_config::ValidatorSet, validator_signer::ValidatorSigner, validator_verifier::{random_validator_verifier, ValidatorVerifier}, @@ -131,7 +131,7 @@ fn test_same_qc_different_authors() { .unwrap(); let signature = signer.sign(genesis_qc.ledger_info().ledger_info()).unwrap(); - let mut ledger_info_altered = LedgerInfoWithPartialSignatures::new( + let mut ledger_info_altered = LedgerInfoWithVerifiedSignatures::new( genesis_qc.ledger_info().ledger_info().clone(), PartialSignatures::empty(), ); @@ -201,7 +201,7 @@ fn test_block_metadata_bitvec() { ); let mut ledger_info_1 = - LedgerInfoWithPartialSignatures::new(ledger_info.clone(), PartialSignatures::empty()); + LedgerInfoWithVerifiedSignatures::new(ledger_info.clone(), PartialSignatures::empty()); let votes_1 = vec![true, false, true, true]; votes_1 .iter() diff --git a/consensus/consensus-types/src/timeout_2chain.rs b/consensus/consensus-types/src/timeout_2chain.rs index c0d62edc6ffb3..87d35bc99bd92 100644 --- a/consensus/consensus-types/src/timeout_2chain.rs +++ b/consensus/consensus-types/src/timeout_2chain.rs @@ -406,7 +406,7 @@ mod tests { use aptos_types::{ aggregate_signature::PartialSignatures, block_info::BlockInfo, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures}, validator_verifier::random_validator_verifier, }; @@ -415,7 +415,7 @@ mod tests { let quorum_size = validators.quorum_voting_power() as usize; let generate_quorum = |round, num_of_signature| { let vote_data = VoteData::new(BlockInfo::random(round), BlockInfo::random(0)); - let mut ledger_info = LedgerInfoWithPartialSignatures::new( + let mut ledger_info = LedgerInfoWithVerifiedSignatures::new( LedgerInfo::new(BlockInfo::empty(), vote_data.hash()), PartialSignatures::empty(), ); diff --git a/consensus/consensus-types/src/wrapped_ledger_info.rs b/consensus/consensus-types/src/wrapped_ledger_info.rs index 6125f85ca2c94..ee254af17304b 100644 --- a/consensus/consensus-types/src/wrapped_ledger_info.rs +++ b/consensus/consensus-types/src/wrapped_ledger_info.rs @@ -77,6 +77,10 @@ impl WrappedLedgerInfo { &self.signed_ledger_info } + pub fn epoch(&self) -> u64 { + self.ledger_info().ledger_info().epoch() + } + pub fn commit_info(&self) -> &BlockInfo { self.ledger_info().ledger_info().commit_info() } diff --git a/consensus/safety-rules/src/test_utils.rs b/consensus/safety-rules/src/test_utils.rs index 07b9159c66a45..ce161c0a5fb14 100644 --- a/consensus/safety-rules/src/test_utils.rs +++ b/consensus/safety-rules/src/test_utils.rs @@ -24,7 +24,7 @@ use aptos_types::{ block_info::BlockInfo, epoch_change::EpochChangeProof, epoch_state::EpochState, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures}, on_chain_config::ValidatorSet, proof::AccumulatorExtensionProof, validator_info::ValidatorInfo, @@ -168,7 +168,7 @@ pub fn make_proposal_with_parent_and_overrides( ) .unwrap(); - let mut ledger_info_with_signatures = LedgerInfoWithPartialSignatures::new( + let mut ledger_info_with_signatures = LedgerInfoWithVerifiedSignatures::new( vote.ledger_info().clone(), PartialSignatures::empty(), ); diff --git a/consensus/src/consensus_observer/common/error.rs b/consensus/src/consensus_observer/common/error.rs index 37a516d10115c..7fc6a78785a96 100644 --- a/consensus/src/consensus_observer/common/error.rs +++ b/consensus/src/consensus_observer/common/error.rs @@ -21,6 +21,9 @@ pub enum Error { #[error("Subscription progress stopped: {0}")] SubscriptionProgressStopped(String), + #[error("Subscriptions reset: {0}")] + SubscriptionsReset(String), + #[error("Subscription suboptimal: {0}")] SubscriptionSuboptimal(String), @@ -40,6 +43,7 @@ impl Error { Self::RpcError(_) => "rpc_error", Self::SubscriptionDisconnected(_) => "subscription_disconnected", Self::SubscriptionProgressStopped(_) => "subscription_progress_stopped", + Self::SubscriptionsReset(_) => "subscriptions_reset", Self::SubscriptionSuboptimal(_) => "subscription_suboptimal", Self::SubscriptionTimeout(_) => "subscription_timeout", Self::UnexpectedError(_) => "unexpected_error", diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs index 8cf8144d25a86..5888bbfcaca26 100644 --- a/consensus/src/consensus_observer/common/metrics.rs +++ b/consensus/src/consensus_observer/common/metrics.rs @@ -5,17 +5,18 @@ use aptos_config::network_id::{NetworkId, PeerNetworkId}; use aptos_metrics_core::{ - register_histogram_vec, register_int_counter_vec, register_int_gauge_vec, HistogramVec, - IntCounterVec, IntGaugeVec, + register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge_vec, + HistogramVec, IntCounter, IntCounterVec, IntGaugeVec, }; use once_cell::sync::Lazy; // Useful metric labels pub const BLOCK_PAYLOAD_LABEL: &str = "block_payload"; pub const COMMIT_DECISION_LABEL: &str = "commit_decision"; +pub const COMMITTED_BLOCKS_LABEL: &str = "committed_blocks"; pub const CREATED_SUBSCRIPTION_LABEL: &str = "created_subscription"; pub const ORDERED_BLOCK_ENTRIES_LABEL: &str = "ordered_block_entries"; -pub const ORDERED_BLOCKS_LABEL: &str = "ordered_blocks"; +pub const ORDERED_BLOCK_LABEL: &str = "ordered_block"; pub const PENDING_BLOCK_ENTRIES_LABEL: &str = "pending_block_entries"; pub const PENDING_BLOCKS_LABEL: &str = "pending_blocks"; pub const STORED_PAYLOADS_LABEL: &str = "stored_payloads"; @@ -30,6 +31,34 @@ pub static OBSERVER_CREATED_SUBSCRIPTIONS: Lazy = Lazy::new(|| { .unwrap() }); +/// Counter for tracking the number of times the block state was cleared by the consensus observer +pub static OBSERVER_CLEARED_BLOCK_STATE: Lazy = Lazy::new(|| { + register_int_counter!( + "consensus_observer_cleared_block_state", + "Counter for tracking the number of times the block state was cleared by the consensus observer", + ).unwrap() +}); + +/// Counter for tracking dropped (direct send) messages by the consensus observer +pub static OBSERVER_DROPPED_MESSAGES: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "consensus_observer_dropped_messages", + "Counters related to dropped (direct send) messages by the consensus observer", + &["message_type", "network_id"] + ) + .unwrap() +}); + +/// Counter for tracking rejected (direct send) messages by the consensus observer +pub static OBSERVER_REJECTED_MESSAGES: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "consensus_observer_rejected_messages", + "Counters related to rejected (direct send) messages by the consensus observer", + &["message_type", "network_id"] + ) + .unwrap() +}); + /// Gauge for tracking the number of active subscriptions for the consensus observer pub static OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS: Lazy = Lazy::new(|| { register_int_gauge_vec!( @@ -180,8 +209,8 @@ pub static PUBLISHER_SENT_MESSAGES: Lazy = Lazy::new(|| { .unwrap() }); -/// Increments the given request counter with the provided values -pub fn increment_request_counter( +/// Increments the given counter with the provided values +pub fn increment_counter( counter: &Lazy, label: &str, peer_network_id: &PeerNetworkId, @@ -192,6 +221,11 @@ pub fn increment_request_counter( .inc(); } +/// Increments the given counter without labels +pub fn increment_counter_without_labels(counter: &Lazy) { + counter.inc(); +} + /// Observes the value for the provided histogram and label pub fn observe_value_with_label( histogram: &Lazy, diff --git a/consensus/src/consensus_observer/network/network_handler.rs b/consensus/src/consensus_observer/network/network_handler.rs index d8aa1447312f7..bbaeca0dc4843 100644 --- a/consensus/src/consensus_observer/network/network_handler.rs +++ b/consensus/src/consensus_observer/network/network_handler.rs @@ -208,7 +208,7 @@ impl ConsensusObserverNetworkHandler { None => { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Missing response sender for RCP request: {:?}", + "Missing response sender for the RPC request: {:?}", request )) ); diff --git a/consensus/src/consensus_observer/network/observer_client.rs b/consensus/src/consensus_observer/network/observer_client.rs index a2f94ff44524f..33c4ce902af33 100644 --- a/consensus/src/consensus_observer/network/observer_client.rs +++ b/consensus/src/consensus_observer/network/observer_client.rs @@ -46,7 +46,7 @@ impl> message_label: &str, ) -> Result<(), Error> { // Increment the message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGES, message_label, peer_network_id, @@ -74,7 +74,7 @@ impl> .message(&format!("Failed to send message: {:?}", error))); // Update the direct send error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, @@ -125,7 +125,7 @@ impl> .message(&format!("Failed to serialize message: {:?}", error))); // Update the direct send error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, @@ -147,7 +147,7 @@ impl> let request_id = rand::thread_rng().gen(); // Increment the request counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_SENT_REQUESTS, request.get_label(), peer_network_id, @@ -174,7 +174,7 @@ impl> match result { Ok(consensus_observer_response) => { // Update the RPC success metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_RECEIVED_MESSAGE_RESPONSES, request_label, peer_network_id, @@ -192,7 +192,7 @@ impl> .error(&error)); // Update the RPC error metrics - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_SENT_MESSAGE_ERRORS, error.get_label(), peer_network_id, diff --git a/consensus/src/consensus_observer/network/observer_message.rs b/consensus/src/consensus_observer/network/observer_message.rs index 6c68384cda32e..8b673f6335f56 100644 --- a/consensus/src/consensus_observer/network/observer_message.rs +++ b/consensus/src/consensus_observer/network/observer_message.rs @@ -312,8 +312,8 @@ impl CommitDecision { /// The transaction payload and proof of each block #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PayloadWithProof { - pub transactions: Vec, - pub proofs: Vec, + transactions: Vec, + proofs: Vec, } impl PayloadWithProof { @@ -337,8 +337,8 @@ impl PayloadWithProof { /// The transaction payload and proof of each block with a transaction limit #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PayloadWithProofAndLimit { - pub payload_with_proof: PayloadWithProof, - pub transaction_limit: Option, + payload_with_proof: PayloadWithProof, + transaction_limit: Option, } impl PayloadWithProofAndLimit { @@ -629,8 +629,8 @@ impl BlockTransactionPayload { /// Payload message contains the block and transaction payload #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct BlockPayload { - pub block: BlockInfo, - pub transaction_payload: BlockTransactionPayload, + block: BlockInfo, + transaction_payload: BlockTransactionPayload, } impl BlockPayload { @@ -641,25 +641,72 @@ impl BlockPayload { } } + /// Returns a reference to the block info + pub fn block(&self) -> &BlockInfo { + &self.block + } + + /// Returns the epoch of the block info + pub fn epoch(&self) -> u64 { + self.block.epoch() + } + + /// Returns the round of the block info + pub fn round(&self) -> Round { + self.block.round() + } + + /// Returns a reference to the block transaction payload + pub fn transaction_payload(&self) -> &BlockTransactionPayload { + &self.transaction_payload + } + /// Verifies the block payload digests and returns an error if the data is invalid pub fn verify_payload_digests(&self) -> Result<(), Error> { - // Verify the proof of store digests against the transaction + // Get the transactions, payload proofs and inline batches let transactions = self.transaction_payload.transactions(); + let payload_proofs = self.transaction_payload.payload_proofs(); + let inline_batches = self.transaction_payload.inline_batches(); + + // Get the number of transactions, payload proofs and inline batches + let num_transactions = transactions.len(); + let num_payload_proofs = payload_proofs.len(); + let num_inline_batches = inline_batches.len(); + + // Verify the payload proof digests using the transactions let mut transactions_iter = transactions.iter(); - for proof_of_store in &self.transaction_payload.payload_proofs() { - reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info())?; + for proof_of_store in &payload_proofs { + reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info()).map_err( + |error| { + Error::InvalidMessageError(format!( + "Failed to verify payload proof digests! Num transactions: {:?}, \ + num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}", + num_transactions, num_payload_proofs, num_inline_batches, proof_of_store.info(), error + )) + }, + )?; } - // Verify the inline batch digests against the inline batches - for batch_info in self.transaction_payload.inline_batches() { - reconstruct_and_verify_batch(&mut transactions_iter, batch_info)?; + // Verify the inline batch digests using the transactions + for batch_info in inline_batches.into_iter() { + reconstruct_and_verify_batch(&mut transactions_iter, batch_info).map_err( + |error| { + Error::InvalidMessageError(format!( + "Failed to verify inline batch digests! Num transactions: {:?}, \ + num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}", + num_transactions, num_payload_proofs, num_inline_batches, batch_info, error + )) + }, + )?; } - // Verify that there are no transactions remaining + // Verify that there are no transactions remaining (all transactions should be consumed) let remaining_transactions = transactions_iter.as_slice(); if !remaining_transactions.is_empty() { return Err(Error::InvalidMessageError(format!( - "Failed to verify payload transactions! Transactions remaining: {:?}. Expected: 0", + "Failed to verify payload transactions! Num transactions: {:?}, \ + transactions remaining: {:?}. Expected: 0", + num_transactions, remaining_transactions.len() ))); } @@ -720,7 +767,7 @@ fn reconstruct_and_verify_batch( let expected_digest = expected_batch_info.digest(); if batch_digest != *expected_digest { return Err(Error::InvalidMessageError(format!( - "The reconstructed batch digest does not match the expected digest!\ + "The reconstructed batch digest does not match the expected digest! \ Batch: {:?}, Expected digest: {:?}, Reconstructed digest: {:?}", expected_batch_info, expected_digest, batch_digest ))); diff --git a/consensus/src/consensus_observer/observer/active_state.rs b/consensus/src/consensus_observer/observer/active_state.rs index 73c03af670eee..f162fab553e15 100644 --- a/consensus/src/consensus_observer/observer/active_state.rs +++ b/consensus/src/consensus_observer/observer/active_state.rs @@ -101,8 +101,8 @@ impl ActiveObserverState { /// root ledger info and remove the blocks from the given stores. pub fn create_commit_callback( &self, - pending_ordered_blocks: OrderedBlockStore, - block_payload_store: BlockPayloadStore, + pending_ordered_blocks: Arc>, + block_payload_store: Arc>, ) -> StateComputerCommitCallBackType { // Clone the root pointer let root = self.root.clone(); @@ -243,7 +243,7 @@ async fn extract_on_chain_configs( let onchain_randomness_config_seq_num: anyhow::Result = on_chain_configs.get(); if let Err(error) = &onchain_randomness_config_seq_num { - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to read on-chain randomness config seq num! Error: {:?}", error @@ -282,15 +282,17 @@ async fn extract_on_chain_configs( /// A simple helper function that handles the committed blocks /// (as part of the commit callback). fn handle_committed_blocks( - pending_ordered_blocks: OrderedBlockStore, - block_payload_store: BlockPayloadStore, + pending_ordered_blocks: Arc>, + block_payload_store: Arc>, root: Arc>, blocks: &[Arc], ledger_info: LedgerInfoWithSignatures, ) { // Remove the committed blocks from the payload and pending stores - block_payload_store.remove_committed_blocks(blocks); - pending_ordered_blocks.remove_blocks_for_commit(&ledger_info); + block_payload_store.lock().remove_committed_blocks(blocks); + pending_ordered_blocks + .lock() + .remove_blocks_for_commit(&ledger_info); // Verify the ledger info is for the same epoch let mut root = root.lock(); @@ -407,8 +409,12 @@ mod test { let root = Arc::new(Mutex::new(create_ledger_info(epoch, round))); // Create the ordered block store and block payload store - let ordered_block_store = OrderedBlockStore::new(node_config.consensus_observer); - let mut block_payload_store = BlockPayloadStore::new(node_config.consensus_observer); + let ordered_block_store = Arc::new(Mutex::new(OrderedBlockStore::new( + node_config.consensus_observer, + ))); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + node_config.consensus_observer, + ))); // Handle the committed blocks at the wrong epoch and verify the root is not updated handle_committed_blocks( @@ -432,12 +438,16 @@ mod test { // Add pending ordered blocks let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, epoch, round); + let ordered_blocks = create_and_add_ordered_blocks( + ordered_block_store.clone(), + num_ordered_blocks, + epoch, + round, + ); // Add block payloads for the ordered blocks for ordered_block in &ordered_blocks { - create_and_add_payloads_for_ordered_block(&mut block_payload_store, ordered_block); + create_and_add_payloads_for_ordered_block(block_payload_store.clone(), ordered_block); } // Create the commit ledger info (for the second to last block) @@ -461,8 +471,11 @@ mod test { ); // Verify the committed blocks are removed from the stores - assert_eq!(ordered_block_store.get_all_ordered_blocks().len(), 1); - assert_eq!(block_payload_store.get_block_payloads().lock().len(), 1); + assert_eq!(ordered_block_store.lock().get_all_ordered_blocks().len(), 1); + assert_eq!( + block_payload_store.lock().get_block_payloads().lock().len(), + 1 + ); // Verify the root is updated assert_eq!(root.lock().clone(), committed_ledger_info); @@ -495,7 +508,7 @@ mod test { /// Creates and adds the specified number of ordered blocks to the ordered blocks fn create_and_add_ordered_blocks( - ordered_block_store: &OrderedBlockStore, + ordered_block_store: Arc>, num_ordered_blocks: usize, epoch: u64, starting_round: Round, @@ -532,7 +545,9 @@ mod test { let ordered_block = OrderedBlock::new(blocks, ordered_proof); // Insert the block into the ordered block store - ordered_block_store.insert_ordered_block(ordered_block.clone()); + ordered_block_store + .lock() + .insert_ordered_block(ordered_block.clone()); // Add the block to the ordered blocks ordered_blocks.push(ordered_block); @@ -543,13 +558,15 @@ mod test { /// Creates and adds payloads for the ordered block fn create_and_add_payloads_for_ordered_block( - block_payload_store: &mut BlockPayloadStore, + block_payload_store: Arc>, ordered_block: &OrderedBlock, ) { for block in ordered_block.blocks() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } } diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs index 250b338d23344..032a3fa38f8bc 100644 --- a/consensus/src/consensus_observer/observer/consensus_observer.rs +++ b/consensus/src/consensus_observer/observer/consensus_observer.rs @@ -28,10 +28,14 @@ use crate::{ pipeline::execution_client::TExecutionClient, }; use aptos_channels::{aptos_channel, aptos_channel::Receiver, message_queues::QueueStyle}; -use aptos_config::config::{ConsensusObserverConfig, NodeConfig}; +use aptos_config::{ + config::{ConsensusObserverConfig, NodeConfig}, + network_id::PeerNetworkId, +}; use aptos_consensus_types::{pipeline, pipelined_block::PipelinedBlock}; use aptos_crypto::{bls12381, Genesis}; use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener}; +use aptos_infallible::Mutex; use aptos_logger::{debug, error, info, warn}; use aptos_network::{ application::interface::NetworkClient, protocols::wire::handshake::v1::ProtocolId, @@ -63,13 +67,13 @@ pub struct ConsensusObserver { active_observer_state: ActiveObserverState, // The block payload store (containing the block transaction payloads) - block_payload_store: BlockPayloadStore, + block_payload_store: Arc>, // The ordered block store (containing ordered blocks that are ready for execution) - ordered_block_store: OrderedBlockStore, + ordered_block_store: Arc>, // The pending block store (containing pending blocks that are without payloads) - pending_block_store: PendingBlockStore, + pending_block_store: Arc>, // The execution client to the buffer manager execution_client: Arc, @@ -81,7 +85,7 @@ pub struct ConsensusObserver { // The flag indicates if we're waiting to transition to a new epoch. sync_handle: Option<(DropGuard, bool)>, - // The subscription manager + // The consensus observer subscription manager subscription_manager: SubscriptionManager, } @@ -116,12 +120,17 @@ impl ConsensusObserver { let active_observer_state = ActiveObserverState::new(node_config, db_reader, reconfig_events, consensus_publisher); + // Create the block and payload stores + let ordered_block_store = OrderedBlockStore::new(consensus_observer_config); + let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let pending_block_store = PendingBlockStore::new(consensus_observer_config); + // Create the consensus observer Self { active_observer_state, - ordered_block_store: OrderedBlockStore::new(consensus_observer_config), - block_payload_store: BlockPayloadStore::new(consensus_observer_config), - pending_block_store: PendingBlockStore::new(consensus_observer_config), + ordered_block_store: Arc::new(Mutex::new(ordered_block_store)), + block_payload_store: Arc::new(Mutex::new(block_payload_store)), + pending_block_store: Arc::new(Mutex::new(pending_block_store)), execution_client, sync_notification_sender, sync_handle: None, @@ -137,7 +146,7 @@ impl ConsensusObserver { } // Otherwise, check if all the payloads exist in the payload store - self.block_payload_store.all_payloads_exist(blocks) + self.block_payload_store.lock().all_payloads_exist(blocks) } /// Checks the progress of the consensus observer @@ -156,13 +165,15 @@ impl ConsensusObserver { return; } - // Otherwise, check the health of the active subscription - let new_subscription_created = self + // Otherwise, check the health of the active subscriptions + if let Err(error) = self .subscription_manager .check_and_manage_subscriptions() - .await; - if new_subscription_created { - // Clear the pending block state (a new subscription was created) + .await + { + // Log the failure and clear the pending block state + warn!(LogSchema::new(LogEntry::ConsensusObserver) + .message(&format!("Subscription checks failed! Error: {:?}", error))); self.clear_pending_block_state().await; } } @@ -171,13 +182,13 @@ impl ConsensusObserver { /// subscriptions, where we want to wipe all state and restart). async fn clear_pending_block_state(&self) { // Clear the payload store - self.block_payload_store.clear_all_payloads(); + self.block_payload_store.lock().clear_all_payloads(); // Clear the pending blocks - self.pending_block_store.clear_missing_blocks(); + self.pending_block_store.lock().clear_missing_blocks(); // Clear the ordered blocks - self.ordered_block_store.clear_all_ordered_blocks(); + self.ordered_block_store.lock().clear_all_ordered_blocks(); // Reset the execution pipeline for the root let root = self.active_observer_state.root(); @@ -189,6 +200,9 @@ impl ConsensusObserver { )) ); } + + // Increment the cleared block state counter + metrics::increment_counter_without_labels(&metrics::OBSERVER_CLEARED_BLOCK_STATE); } /// Finalizes the ordered block by sending it to the execution pipeline @@ -256,10 +270,25 @@ impl ConsensusObserver { self.active_observer_state.epoch_state() } - /// Returns the last known block - fn get_last_block(&self) -> BlockInfo { - if let Some(last_pending_block) = self.ordered_block_store.get_last_ordered_block() { - last_pending_block + /// Returns the highest committed block epoch and round + fn get_highest_committed_epoch_round(&self) -> (u64, Round) { + if let Some(epoch_round) = self + .ordered_block_store + .lock() + .get_highest_committed_epoch_round() + { + epoch_round + } else { + // Return the root epoch and round + let root_block_info = self.active_observer_state.root().commit_info().clone(); + (root_block_info.epoch(), root_block_info.round()) + } + } + + /// Returns the last ordered block + fn get_last_ordered_block(&self) -> BlockInfo { + if let Some(last_ordered_block) = self.ordered_block_store.lock().get_last_ordered_block() { + last_ordered_block } else { // Return the root ledger info self.active_observer_state.root().commit_info().clone() @@ -278,34 +307,55 @@ impl ConsensusObserver { /// Orders any ready pending blocks for the given epoch and round async fn order_ready_pending_block(&mut self, block_epoch: u64, block_round: Round) { - if let Some(ordered_block) = self.pending_block_store.remove_ready_block( + // Get any ready ordered block + let ready_ordered_block = self.pending_block_store.lock().remove_ready_block( block_epoch, block_round, - &self.block_payload_store, - ) { - self.process_ordered_block(ordered_block).await; + self.block_payload_store.clone(), + ); + + // Process the ready ordered block (if it exists) + if let Some(ready_ordered_block) = ready_ordered_block { + self.process_ordered_block(ready_ordered_block).await; } } /// Processes the block payload message - async fn process_block_payload_message(&mut self, block_payload: BlockPayload) { + async fn process_block_payload_message( + &mut self, + peer_network_id: PeerNetworkId, + block_payload: BlockPayload, + ) { // Get the epoch and round for the block - let block_epoch = block_payload.block.epoch(); - let block_round = block_payload.block.round(); + let block_epoch = block_payload.epoch(); + let block_round = block_payload.round(); + + // Determine if the payload is behind the last ordered block, or if it already exists + let last_ordered_block = self.get_last_ordered_block(); + let payload_out_of_date = + (block_epoch, block_round) <= (last_ordered_block.epoch(), last_ordered_block.round()); + let payload_exists = self + .block_payload_store + .lock() + .existing_payload_entry(&block_payload); + + // If the payload is out of date or already exists, ignore it + if payload_out_of_date || payload_exists { + // Update the metrics for the dropped block payload + update_metrics_for_dropped_block_payload_message(peer_network_id, &block_payload); + return; + } // Update the metrics for the received block payload - metrics::set_gauge_with_label( - &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, - metrics::BLOCK_PAYLOAD_LABEL, - block_round, - ); + update_metrics_for_block_payload_message(peer_network_id, &block_payload); // Verify the block payload digests if let Err(error) = block_payload.verify_payload_digests() { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to verify block payload digests! Ignoring block: {:?}. Error: {:?}", - block_payload.block, error + block_payload.block(), + error )) ); return; @@ -319,7 +369,7 @@ impl ConsensusObserver { error!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to verify block payload signatures! Ignoring block: {:?}. Error: {:?}", - block_payload.block, error + block_payload.block(), error )) ); return; @@ -332,6 +382,7 @@ impl ConsensusObserver { // Update the payload store with the payload self.block_payload_store + .lock() .insert_block_payload(block_payload, verified_payload); // Check if there are blocks that were missing payloads but are @@ -344,18 +395,28 @@ impl ConsensusObserver { } /// Processes the commit decision message - fn process_commit_decision_message(&mut self, commit_decision: CommitDecision) { + fn process_commit_decision_message( + &mut self, + peer_network_id: PeerNetworkId, + commit_decision: CommitDecision, + ) { + // Get the commit decision epoch and round + let commit_epoch = commit_decision.epoch(); + let commit_round = commit_decision.round(); + + // If the commit message is behind our highest committed block, ignore it + if (commit_epoch, commit_round) <= self.get_highest_committed_epoch_round() { + // Update the metrics for the dropped commit decision + update_metrics_for_dropped_commit_decision_message(peer_network_id, &commit_decision); + return; + } + // Update the metrics for the received commit decision - metrics::set_gauge_with_label( - &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, - metrics::COMMIT_DECISION_LABEL, - commit_decision.round(), - ); + update_metrics_for_commit_decision_message(peer_network_id, &commit_decision); // If the commit decision is for the current epoch, verify and process it let epoch_state = self.get_epoch_state(); - let commit_decision_epoch = commit_decision.epoch(); - if commit_decision_epoch == epoch_state.epoch { + if commit_epoch == epoch_state.epoch { // Verify the commit decision if let Err(error) = commit_decision.verify_commit_proof(&epoch_state) { error!( @@ -379,10 +440,9 @@ impl ConsensusObserver { // Otherwise, we failed to process the commit decision. If the commit // is for a future epoch or round, we need to state sync. - let last_block = self.get_last_block(); - let commit_decision_round = commit_decision.round(); - let epoch_changed = commit_decision_epoch > last_block.epoch(); - if epoch_changed || commit_decision_round > last_block.round() { + let last_block = self.get_last_ordered_block(); + let epoch_changed = commit_epoch > last_block.epoch(); + if epoch_changed || commit_round > last_block.round() { // If we're waiting for state sync to transition into a new epoch, // we should just wait and not issue a new state sync request. if self.in_state_sync_epoch_change() { @@ -408,15 +468,17 @@ impl ConsensusObserver { self.active_observer_state .update_root(commit_decision.commit_proof().clone()); self.block_payload_store - .remove_blocks_for_epoch_round(commit_decision_epoch, commit_decision_round); + .lock() + .remove_blocks_for_epoch_round(commit_epoch, commit_round); self.ordered_block_store + .lock() .remove_blocks_for_commit(commit_decision.commit_proof()); // Start the state sync process let abort_handle = sync_to_commit_decision( commit_decision, - commit_decision_epoch, - commit_decision_round, + commit_epoch, + commit_round, self.execution_client.clone(), self.sync_notification_sender.clone(), ); @@ -431,6 +493,7 @@ impl ConsensusObserver { // Get the pending block for the commit decision let pending_block = self .ordered_block_store + .lock() .get_ordered_block(commit_decision.epoch(), commit_decision.round()); // Process the pending block @@ -444,6 +507,7 @@ impl ConsensusObserver { )) ); self.ordered_block_store + .lock() .update_commit_decision(commit_decision); // If we are not in sync mode, forward the commit decision to the execution pipeline @@ -469,23 +533,30 @@ impl ConsensusObserver { // Unpack the network message let (peer_network_id, message) = network_message.into_parts(); - // Verify the message is from the peer we've subscribed to + // Verify the message is from the peers we've subscribed to if let Err(error) = self .subscription_manager - .verify_message_sender(peer_network_id) + .verify_message_for_subscription(peer_network_id) { + // Increment the rejected message counter + metrics::increment_counter( + &metrics::OBSERVER_REJECTED_MESSAGES, + message.get_label(), + &peer_network_id, + ); + + // Log the error and return warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Message failed subscription sender verification! Error: {:?}", + "Received message that was not from an active subscription! Error: {:?}", error, )) ); - return; } // Increment the received message counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::OBSERVER_RECEIVED_MESSAGES, message.get_label(), &peer_network_id, @@ -494,39 +565,15 @@ impl ConsensusObserver { // Process the message based on the type match message { ConsensusObserverDirectSend::OrderedBlock(ordered_block) => { - // Log the received ordered block message - let log_message = format!( - "Received ordered block: {}, from peer: {}!", - ordered_block.proof_block_info(), - peer_network_id - ); - log_received_message(log_message); - - // Process the ordered block message - self.process_ordered_block_message(ordered_block).await; + self.process_ordered_block_message(peer_network_id, ordered_block) + .await; }, ConsensusObserverDirectSend::CommitDecision(commit_decision) => { - // Log the received commit decision message - let log_message = format!( - "Received commit decision: {}, from peer: {}!", - commit_decision.proof_block_info(), - peer_network_id - ); - log_received_message(log_message); - - // Process the commit decision message - self.process_commit_decision_message(commit_decision); + self.process_commit_decision_message(peer_network_id, commit_decision); }, ConsensusObserverDirectSend::BlockPayload(block_payload) => { - // Log the received block payload message - let log_message = format!( - "Received block payload: {}, from peer: {}!", - block_payload.block, peer_network_id - ); - log_received_message(log_message); - - // Process the block payload message - self.process_block_payload_message(block_payload).await; + self.process_block_payload_message(peer_network_id, block_payload) + .await; }, } @@ -535,7 +582,11 @@ impl ConsensusObserver { } /// Processes the ordered block - async fn process_ordered_block_message(&mut self, ordered_block: OrderedBlock) { + async fn process_ordered_block_message( + &mut self, + peer_network_id: PeerNetworkId, + ordered_block: OrderedBlock, + ) { // Verify the ordered blocks before processing if let Err(error) = ordered_block.verify_ordered_blocks() { error!( @@ -548,12 +599,37 @@ impl ConsensusObserver { return; }; + // Get the epoch and round of the first block + let first_block = ordered_block.first_block(); + let first_block_epoch_round = (first_block.epoch(), first_block.round()); + + // Determine if the block is behind the last ordered block, or if it is already pending + let last_ordered_block = self.get_last_ordered_block(); + let block_out_of_date = + first_block_epoch_round <= (last_ordered_block.epoch(), last_ordered_block.round()); + let block_pending = self + .pending_block_store + .lock() + .existing_pending_block(&ordered_block); + + // If the block is out of date or already pending, ignore it + if block_out_of_date || block_pending { + // Update the metrics for the dropped ordered block + update_metrics_for_dropped_ordered_block_message(peer_network_id, &ordered_block); + return; + } + + // Update the metrics for the received ordered block + update_metrics_for_ordered_block_message(peer_network_id, &ordered_block); + // If all payloads exist, process the block. Otherwise, store it // in the pending block store and wait for the payloads to arrive. if self.all_payloads_exist(ordered_block.blocks()) { self.process_ordered_block(ordered_block).await; } else { - self.pending_block_store.insert_pending_block(ordered_block); + self.pending_block_store + .lock() + .insert_pending_block(ordered_block); } } @@ -587,6 +663,7 @@ impl ConsensusObserver { // Verify the block payloads against the ordered block if let Err(error) = self .block_payload_store + .lock() .verify_payloads_against_ordered_block(&ordered_block) { error!( @@ -601,9 +678,10 @@ impl ConsensusObserver { // The block was verified correctly. If the block is a child of our // last block, we can insert it into the ordered block store. - if self.get_last_block().id() == ordered_block.first_block().parent_id() { + if self.get_last_ordered_block().id() == ordered_block.first_block().parent_id() { // Insert the ordered block into the pending blocks self.ordered_block_store + .lock() .insert_ordered_block(ordered_block.clone()); // If we're not in sync mode, finalize the ordered blocks @@ -655,6 +733,7 @@ impl ConsensusObserver { let new_epoch_state = self.get_epoch_state(); let verified_payload_rounds = self .block_payload_store + .lock() .verify_payload_signatures(&new_epoch_state); // Order all the pending blocks that are now ready (these were buffered during state sync) @@ -668,9 +747,8 @@ impl ConsensusObserver { self.sync_handle = None; // Process all the newly ordered blocks - for (_, (ordered_block, commit_decision)) in - self.ordered_block_store.get_all_ordered_blocks() - { + let all_ordered_blocks = self.ordered_block_store.lock().get_all_ordered_blocks(); + for (_, (ordered_block, commit_decision)) in all_ordered_blocks { // Finalize the ordered block self.finalize_ordered_block(ordered_block).await; @@ -684,19 +762,25 @@ impl ConsensusObserver { /// Updates the metrics for the processed blocks fn update_processed_blocks_metrics(&self) { // Update the payload store metrics - self.block_payload_store.update_payload_store_metrics(); + self.block_payload_store + .lock() + .update_payload_store_metrics(); // Update the pending block metrics - self.pending_block_store.update_pending_blocks_metrics(); + self.pending_block_store + .lock() + .update_pending_blocks_metrics(); // Update the pending block metrics - self.ordered_block_store.update_ordered_blocks_metrics(); + self.ordered_block_store + .lock() + .update_ordered_blocks_metrics(); } /// Waits for a new epoch to start async fn wait_for_epoch_start(&mut self) { // Wait for the active state epoch to update - let block_payloads = self.block_payload_store.get_block_payloads(); + let block_payloads = self.block_payload_store.lock().get_block_payloads(); let (payload_manager, consensus_config, execution_config, randomness_config) = self .active_observer_state .wait_for_epoch_start(block_payloads) @@ -822,3 +906,135 @@ fn sync_to_commit_decision( )); abort_handle } + +/// Updates the metrics for the received block payload message +fn update_metrics_for_block_payload_message( + peer_network_id: PeerNetworkId, + block_payload: &BlockPayload, +) { + // Log the received block payload message + let log_message = format!( + "Received block payload: {}, from peer: {}!", + block_payload.block(), + peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received block payload + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::BLOCK_PAYLOAD_LABEL, + block_payload.round(), + ); +} + +/// Updates the metrics for the received commit decision message +fn update_metrics_for_commit_decision_message( + peer_network_id: PeerNetworkId, + commit_decision: &CommitDecision, +) { + // Log the received commit decision message + let log_message = format!( + "Received commit decision: {}, from peer: {}!", + commit_decision.proof_block_info(), + peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received commit decision + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::COMMIT_DECISION_LABEL, + commit_decision.round(), + ); +} + +/// Updates the metrics for the dropped block payload message +fn update_metrics_for_dropped_block_payload_message( + peer_network_id: PeerNetworkId, + block_payload: &BlockPayload, +) { + // Increment the dropped message counter + metrics::increment_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::BLOCK_PAYLOAD_LABEL, + &peer_network_id, + ); + + // Log the dropped block payload message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring block payload message from peer: {:?}! Block epoch and round: ({}, {})", + peer_network_id, + block_payload.epoch(), + block_payload.round() + )) + ); +} + +/// Updates the metrics for the dropped commit decision message +fn update_metrics_for_dropped_commit_decision_message( + peer_network_id: PeerNetworkId, + commit_decision: &CommitDecision, +) { + // Increment the dropped message counter + metrics::increment_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::COMMITTED_BLOCKS_LABEL, + &peer_network_id, + ); + + // Log the dropped commit decision message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring commit decision message from peer: {:?}! Commit epoch and round: ({}, {})", + peer_network_id, + commit_decision.epoch(), + commit_decision.round() + )) + ); +} + +/// Updates the metrics for the dropped ordered block message +fn update_metrics_for_dropped_ordered_block_message( + peer_network_id: PeerNetworkId, + ordered_block: &OrderedBlock, +) { + // Increment the dropped message counter + metrics::increment_counter( + &metrics::OBSERVER_DROPPED_MESSAGES, + metrics::ORDERED_BLOCK_LABEL, + &peer_network_id, + ); + + // Log the dropped ordered block message + debug!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Ignoring ordered block message from peer: {:?}! Block epoch and round: ({}, {})", + peer_network_id, + ordered_block.proof_block_info().epoch(), + ordered_block.proof_block_info().round() + )) + ); +} + +/// Updates the metrics for the received ordered block message +fn update_metrics_for_ordered_block_message( + peer_network_id: PeerNetworkId, + ordered_block: &OrderedBlock, +) { + // Log the received ordered block message + let log_message = format!( + "Received ordered block: {}, from peer: {}!", + ordered_block.proof_block_info(), + peer_network_id + ); + log_received_message(log_message); + + // Update the metrics for the received ordered block + metrics::set_gauge_with_label( + &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS, + metrics::ORDERED_BLOCK_LABEL, + ordered_block.proof_block_info().round(), + ); +} diff --git a/consensus/src/consensus_observer/observer/mod.rs b/consensus/src/consensus_observer/observer/mod.rs index 35dd0ea2ec72e..4a4e5d42881a3 100644 --- a/consensus/src/consensus_observer/observer/mod.rs +++ b/consensus/src/consensus_observer/observer/mod.rs @@ -8,3 +8,4 @@ pub mod payload_store; pub mod pending_blocks; pub mod subscription; pub mod subscription_manager; +pub mod subscription_utils; diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs index edfde50a4ed8f..a2408b3a4b20d 100644 --- a/consensus/src/consensus_observer/observer/ordered_blocks.rs +++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs @@ -10,46 +10,52 @@ use crate::consensus_observer::{ }; use aptos_config::config::ConsensusObserverConfig; use aptos_consensus_types::common::Round; -use aptos_infallible::Mutex; use aptos_logger::{debug, warn}; use aptos_types::{block_info::BlockInfo, ledger_info::LedgerInfoWithSignatures}; -use std::{collections::BTreeMap, sync::Arc}; +use std::collections::BTreeMap; /// A simple struct to store ordered blocks -#[derive(Clone)] pub struct OrderedBlockStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, + // The highest committed block (epoch and round) + highest_committed_epoch_round: Option<(u64, Round)>, + // Ordered blocks. The key is the epoch and round of the last block in the // ordered block. Each entry contains the block and the commit decision (if any). - ordered_blocks: Arc)>>>, + ordered_blocks: BTreeMap<(u64, Round), (OrderedBlock, Option)>, } impl OrderedBlockStore { pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self { Self { consensus_observer_config, - ordered_blocks: Arc::new(Mutex::new(BTreeMap::new())), + highest_committed_epoch_round: None, + ordered_blocks: BTreeMap::new(), } } /// Clears all ordered blocks - pub fn clear_all_ordered_blocks(&self) { - self.ordered_blocks.lock().clear(); + pub fn clear_all_ordered_blocks(&mut self) { + self.ordered_blocks.clear(); } /// Returns a copy of the ordered blocks pub fn get_all_ordered_blocks( &self, ) -> BTreeMap<(u64, Round), (OrderedBlock, Option)> { - self.ordered_blocks.lock().clone() + self.ordered_blocks.clone() + } + + /// Returns the highest committed epoch and round (if any) + pub fn get_highest_committed_epoch_round(&self) -> Option<(u64, Round)> { + self.highest_committed_epoch_round } /// Returns the last ordered block (if any) pub fn get_last_ordered_block(&self) -> Option { self.ordered_blocks - .lock() .last_key_value() .map(|(_, (ordered_block, _))| ordered_block.last_block().block_info()) } @@ -57,7 +63,6 @@ impl OrderedBlockStore { /// Returns the ordered block for the given epoch and round (if any) pub fn get_ordered_block(&self, epoch: u64, round: Round) -> Option { self.ordered_blocks - .lock() .get(&(epoch, round)) .map(|(ordered_block, _)| ordered_block.clone()) } @@ -65,10 +70,10 @@ impl OrderedBlockStore { /// Inserts the given ordered block into the ordered blocks. This function /// assumes the block has already been checked to extend the current ordered /// blocks, and that the ordered proof has been verified. - pub fn insert_ordered_block(&self, ordered_block: OrderedBlock) { + pub fn insert_ordered_block(&mut self, ordered_block: OrderedBlock) { // Verify that the number of ordered blocks doesn't exceed the maximum let max_num_ordered_blocks = self.consensus_observer_config.max_num_pending_blocks as usize; - if self.ordered_blocks.lock().len() >= max_num_ordered_blocks { + if self.ordered_blocks.len() >= max_num_ordered_blocks { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Exceeded the maximum number of ordered blocks: {:?}. Dropping block: {:?}.", @@ -94,42 +99,70 @@ impl OrderedBlockStore { // Insert the ordered block self.ordered_blocks - .lock() .insert((last_block_epoch, last_block_round), (ordered_block, None)); } /// Removes the ordered blocks for the given commit ledger info. This will /// remove all blocks up to (and including) the epoch and round of the commit. - pub fn remove_blocks_for_commit(&self, commit_ledger_info: &LedgerInfoWithSignatures) { + pub fn remove_blocks_for_commit(&mut self, commit_ledger_info: &LedgerInfoWithSignatures) { // Determine the epoch and round to split off let split_off_epoch = commit_ledger_info.ledger_info().epoch(); let split_off_round = commit_ledger_info.commit_info().round().saturating_add(1); // Remove the blocks from the ordered blocks - let mut ordered_blocks = self.ordered_blocks.lock(); - *ordered_blocks = ordered_blocks.split_off(&(split_off_epoch, split_off_round)); + self.ordered_blocks = self + .ordered_blocks + .split_off(&(split_off_epoch, split_off_round)); + + // Update the highest committed epoch and round + self.update_highest_committed_epoch_round(commit_ledger_info); } /// Updates the commit decision of the ordered block (if found) - pub fn update_commit_decision(&self, commit_decision: &CommitDecision) { + pub fn update_commit_decision(&mut self, commit_decision: &CommitDecision) { // Get the epoch and round of the commit decision let commit_decision_epoch = commit_decision.epoch(); let commit_decision_round = commit_decision.round(); // Update the commit decision for the ordered blocks - let mut ordered_blocks = self.ordered_blocks.lock(); - if let Some((_, existing_commit_decision)) = - ordered_blocks.get_mut(&(commit_decision_epoch, commit_decision_round)) + if let Some((_, existing_commit_decision)) = self + .ordered_blocks + .get_mut(&(commit_decision_epoch, commit_decision_round)) { *existing_commit_decision = Some(commit_decision.clone()); } + + // Update the highest committed epoch and round + self.update_highest_committed_epoch_round(commit_decision.commit_proof()); + } + + /// Updates the highest committed epoch and round based on the commit ledger info + fn update_highest_committed_epoch_round( + &mut self, + commit_ledger_info: &LedgerInfoWithSignatures, + ) { + // Get the epoch and round of the commit ledger info + let commit_epoch = commit_ledger_info.ledger_info().epoch(); + let commit_round = commit_ledger_info.commit_info().round(); + let commit_epoch_round = (commit_epoch, commit_round); + + // Update the highest committed epoch and round (if appropriate) + match self.highest_committed_epoch_round { + Some(highest_committed_epoch_round) => { + if commit_epoch_round > highest_committed_epoch_round { + self.highest_committed_epoch_round = Some(commit_epoch_round); + } + }, + None => { + self.highest_committed_epoch_round = Some(commit_epoch_round); + }, + } } /// Updates the metrics for the ordered blocks pub fn update_ordered_blocks_metrics(&self) { // Update the number of ordered block entries - let ordered_blocks = self.ordered_blocks.lock(); - let num_entries = ordered_blocks.len() as u64; + let num_entries = self.ordered_blocks.len() as u64; metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, metrics::ORDERED_BLOCK_ENTRIES_LABEL, @@ -137,26 +170,39 @@ impl OrderedBlockStore { ); // Update the total number of ordered blocks - let num_ordered_blocks = ordered_blocks + let num_ordered_blocks = self + .ordered_blocks .values() .map(|(ordered_block, _)| ordered_block.blocks().len() as u64) .sum(); metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, num_ordered_blocks, ); // Update the highest round for the ordered blocks - let highest_ordered_round = ordered_blocks + let highest_ordered_round = self + .ordered_blocks .last_key_value() .map(|(_, (ordered_block, _))| ordered_block.last_block().round()) .unwrap_or(0); metrics::set_gauge_with_label( &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS, - metrics::ORDERED_BLOCKS_LABEL, + metrics::ORDERED_BLOCK_LABEL, highest_ordered_round, ); + + // Update the highest round for the committed blocks + let highest_committed_round = self + .highest_committed_epoch_round + .map(|(_, round)| round) + .unwrap_or(0); + metrics::set_gauge_with_label( + &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS, + metrics::COMMITTED_BLOCKS_LABEL, + highest_committed_round, + ); } } @@ -173,28 +219,128 @@ mod test { use aptos_types::{ aggregate_signature::AggregateSignature, ledger_info::LedgerInfo, transaction::Version, }; + use std::sync::Arc; #[test] fn test_clear_all_ordered_blocks() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 10; - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch); // Clear all ordered blocks ordered_block_store.clear_all_ordered_blocks(); // Check that all the ordered blocks were removed - assert!(ordered_block_store.ordered_blocks.lock().is_empty()); + assert!(ordered_block_store.ordered_blocks.is_empty()); + } + + #[test] + fn test_get_highest_committed_epoch_round() { + // Create a new ordered block store + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + + // Verify that we have no highest committed epoch and round + assert!(ordered_block_store + .get_highest_committed_epoch_round() + .is_none()); + + // Insert several ordered blocks for the current epoch + let current_epoch = 10; + let num_ordered_blocks = 50; + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); + + // Create a commit decision for the first ordered block + let first_ordered_block = ordered_blocks.first().unwrap(); + let first_ordered_block_info = first_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the first ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the first ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info); + + // Create a commit decision for the last ordered block + let last_ordered_block = ordered_blocks.last().unwrap(); + let last_ordered_block_info = last_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the last ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the last ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Insert several ordered blocks for the next epoch + let next_epoch = current_epoch + 1; + let num_ordered_blocks = 10; + let ordered_blocks = + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); + + // Verify the highest committed epoch and round is still the last ordered block + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Create a commit decision for the first ordered block (in the next epoch) + let first_ordered_block = ordered_blocks.first().unwrap(); + let first_ordered_block_info = first_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the first ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is the first ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info); + + // Create a commit decision for the last ordered block (in the next epoch) + let last_ordered_block = ordered_blocks.last().unwrap(); + let last_ordered_block_info = last_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Remove the ordered blocks for the commit decision + ordered_block_store.remove_blocks_for_commit(commit_decision.commit_proof()); + + // Verify the highest committed epoch and round is the last ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); + + // Create a commit decision for an out-of-date ordered block + let out_of_date_ordered_block = ordered_blocks.first().unwrap(); + let out_of_date_ordered_block_info = out_of_date_ordered_block.last_block().block_info(); + let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new( + LedgerInfo::new(out_of_date_ordered_block_info.clone(), HashValue::random()), + AggregateSignature::empty(), + )); + + // Update the commit decision for the out-of-date ordered block + ordered_block_store.update_commit_decision(&commit_decision); + + // Verify the highest committed epoch and round is still the last ordered block (in the next epoch) + verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info); } #[test] fn test_get_last_ordered_block() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Verify that we have no last ordered block assert!(ordered_block_store.get_last_ordered_block().is_none()); @@ -202,8 +348,11 @@ mod test { // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 50; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Verify the last ordered block is the block with the highest round let last_ordered_block = ordered_blocks.last().unwrap(); @@ -217,7 +366,7 @@ mod test { let next_epoch = current_epoch + 1; let num_ordered_blocks = 50; let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); // Verify the last ordered block is the block with the highest epoch and round let last_ordered_block = ordered_blocks.last().unwrap(); @@ -231,13 +380,16 @@ mod test { #[test] fn test_get_ordered_block() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 50; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Ensure the ordered blocks were all inserted let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks(); @@ -272,12 +424,12 @@ mod test { }; // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(consensus_observer_config); + let mut ordered_block_store = OrderedBlockStore::new(consensus_observer_config); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = max_num_pending_blocks * 2; // Insert more than the maximum - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch); // Verify the ordered blocks were inserted up to the maximum let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks(); @@ -287,7 +439,7 @@ mod test { let next_epoch = current_epoch + 1; let num_ordered_blocks = max_num_pending_blocks - 1; // Insert one less than the maximum let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch); + create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch); // Verify the ordered blocks were not inserted (they should have just been dropped) for ordered_block in &ordered_blocks { @@ -305,19 +457,22 @@ mod test { #[test] fn test_remove_blocks_for_commit() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 10; let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Insert several ordered blocks for the next epoch let next_epoch = current_epoch + 1; let num_ordered_blocks_next_epoch = 20; let ordered_blocks_next_epoch = create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_next_epoch, next_epoch, ); @@ -326,7 +481,7 @@ mod test { let future_epoch = next_epoch + 1; let num_ordered_blocks_future_epoch = 30; create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_future_epoch, future_epoch, ); @@ -399,19 +554,22 @@ mod test { #[test] fn test_update_commit_decision() { // Create a new ordered block store - let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); + let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default()); // Insert several ordered blocks for the current epoch let current_epoch = 0; let num_ordered_blocks = 10; - let ordered_blocks = - create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch); + let ordered_blocks = create_and_add_ordered_blocks( + &mut ordered_block_store, + num_ordered_blocks, + current_epoch, + ); // Insert several ordered blocks for the next epoch let next_epoch = current_epoch + 1; let num_ordered_blocks_next_epoch = 20; let ordered_blocks_next_epoch = create_and_add_ordered_blocks( - &ordered_block_store, + &mut ordered_block_store, num_ordered_blocks_next_epoch, next_epoch, ); @@ -499,7 +657,7 @@ mod test { /// Creates and adds the specified number of ordered blocks to the ordered blocks fn create_and_add_ordered_blocks( - ordered_block_store: &OrderedBlockStore, + ordered_block_store: &mut OrderedBlockStore, num_ordered_blocks: usize, epoch: u64, ) -> Vec { @@ -571,4 +729,19 @@ mod test { updated_commit_decision.as_ref().unwrap().clone() ); } + + /// Verifies the highest committed epoch and round matches the given block info + fn verify_highest_committed_epoch_round( + ordered_block_store: &OrderedBlockStore, + block_info: &BlockInfo, + ) { + // Verify the highest committed epoch and round is the block info + let highest_committed_epoch_round = ordered_block_store + .get_highest_committed_epoch_round() + .unwrap(); + assert_eq!( + highest_committed_epoch_round, + (block_info.epoch(), block_info.round()) + ); + } } diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs index bae1225c58118..59859ec0b82ea 100644 --- a/consensus/src/consensus_observer/observer/payload_store.rs +++ b/consensus/src/consensus_observer/observer/payload_store.rs @@ -26,12 +26,12 @@ pub enum BlockPayloadStatus { } /// A simple struct to store the block payloads of ordered and committed blocks -#[derive(Clone)] pub struct BlockPayloadStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, - // Block transaction payloads (indexed by epoch and round) + // Block transaction payloads (indexed by epoch and round). + // This is directly accessed by the payload manager. block_payloads: Arc>>, } @@ -61,6 +61,15 @@ impl BlockPayloadStore { self.block_payloads.lock().clear(); } + /// Returns true iff we already have a payload entry for the given block + pub fn existing_payload_entry(&self, block_payload: &BlockPayload) -> bool { + // Get the epoch and round of the payload + let epoch_and_round = (block_payload.epoch(), block_payload.round()); + + // Check if a payload already exists in the store + self.block_payloads.lock().contains_key(&epoch_and_round) + } + /// Returns a reference to the block payloads pub fn get_block_payloads(&self) -> Arc>> { self.block_payloads.clone() @@ -78,14 +87,15 @@ impl BlockPayloadStore { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Exceeded the maximum number of payloads: {:?}. Dropping block: {:?}!", - max_num_pending_blocks, block_payload.block, + max_num_pending_blocks, + block_payload.block(), )) ); return; // Drop the block if we've exceeded the maximum } // Create the new payload status - let epoch_and_round = (block_payload.block.epoch(), block_payload.block.round()); + let epoch_and_round = (block_payload.epoch(), block_payload.round()); let payload_status = if verified_payload_signatures { BlockPayloadStatus::AvailableAndVerified(block_payload) } else { @@ -161,7 +171,7 @@ impl BlockPayloadStore { // Get the block transaction payload let transaction_payload = match entry.get() { BlockPayloadStatus::AvailableAndVerified(block_payload) => { - &block_payload.transaction_payload + block_payload.transaction_payload() }, BlockPayloadStatus::AvailableAndUnverified(_) => { // The payload should have already been verified @@ -251,7 +261,7 @@ impl BlockPayloadStore { // Collect the rounds of all newly verified blocks let verified_payload_rounds: Vec = verified_payloads_to_update .iter() - .map(|block_payload| block_payload.block.round()) + .map(|block_payload| block_payload.round()) .collect(); // Update the verified block payloads. Note: this will cause @@ -299,16 +309,12 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store let num_blocks_in_store = 100; - let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 1, - false, - ); + let unverified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false); // Verify the payloads don't exist in the block payload store assert!(!block_payload_store.all_payloads_exist(&unverified_blocks)); @@ -320,12 +326,8 @@ mod test { // Add some verified blocks to the payload store let num_blocks_in_store = 100; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that all the payloads exist in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -355,22 +357,18 @@ mod test { fn test_all_payloads_exist_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add several verified blocks to the payload store let num_blocks_in_store = 10; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the payloads exists in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); // Mark the payload of the first block as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Check that the payload no longer exists in the block payload store assert!(!block_payload_store.all_payloads_exist(&verified_blocks)); @@ -383,19 +381,15 @@ mod test { fn test_clear_all_payloads() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store let num_blocks_in_store = 30; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 1, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false); // Add some verified blocks to the payload store - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the payloads exist in the block payload store assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -415,6 +409,41 @@ mod test { check_num_verified_payloads(&block_payload_store, 0); } + #[test] + fn test_existing_payload_entry() { + // Create a new block payload store + let consensus_observer_config = ConsensusObserverConfig::default(); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + + // Create a new block payload + let epoch = 10; + let round = 100; + let block_payload = create_block_payload(epoch, round); + + // Check that the payload doesn't exist in the block payload store + assert!(!block_payload_store.existing_payload_entry(&block_payload)); + + // Insert the verified block payload into the block payload store + block_payload_store.insert_block_payload(block_payload.clone(), true); + + // Check that the payload now exists in the block payload store + assert!(block_payload_store.existing_payload_entry(&block_payload)); + + // Create another block payload + let epoch = 5; + let round = 101; + let block_payload = create_block_payload(epoch, round); + + // Check that the payload doesn't exist in the block payload store + assert!(!block_payload_store.existing_payload_entry(&block_payload)); + + // Insert the unverified block payload into the block payload store + block_payload_store.insert_block_payload(block_payload.clone(), false); + + // Check that the payload now exists in the block payload store + assert!(block_payload_store.existing_payload_entry(&block_payload)); + } + #[test] fn test_insert_block_payload() { // Create a new block payload store @@ -423,12 +452,8 @@ mod test { // Add some verified blocks to the payload store let num_blocks_in_store = 20; - let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), - num_blocks_in_store, - 0, - true, - ); + let verified_blocks = + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Check that the block payload store contains the new block payloads assert!(block_payload_store.all_payloads_exist(&verified_blocks)); @@ -438,7 +463,7 @@ mod test { check_num_verified_payloads(&block_payload_store, num_blocks_in_store); // Mark the payload of the first block as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Check that the payload no longer exists in the block payload store assert!(!block_payload_store.all_payloads_exist(&verified_blocks)); @@ -465,11 +490,11 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add the maximum number of verified blocks to the payload store let num_blocks_in_store = max_num_pending_blocks as usize; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, num_blocks_in_store); @@ -477,7 +502,7 @@ mod test { // Add more blocks to the payload store let num_blocks_to_add = 5; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -485,7 +510,7 @@ mod test { // Add a large number of blocks to the payload store let num_blocks_to_add = 100; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true); // Verify the number of blocks in the block payload store check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -502,11 +527,11 @@ mod test { }; // Create a new block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add the maximum number of unverified blocks to the payload store let num_blocks_in_store = max_num_pending_blocks as usize; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, num_blocks_in_store); @@ -514,7 +539,7 @@ mod test { // Add more blocks to the payload store let num_blocks_to_add = 5; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -522,7 +547,7 @@ mod test { // Add a large number of blocks to the payload store let num_blocks_to_add = 100; - create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false); + create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false); // Verify the number of blocks in the block payload store check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize); @@ -533,13 +558,13 @@ mod test { fn test_remove_blocks_for_epoch_round_verified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some verified blocks to the payload store for the current epoch let current_epoch = 0; let num_blocks_in_store = 100; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, true, @@ -573,7 +598,7 @@ mod test { // Add some verified blocks to the payload store for the next epoch let next_epoch = current_epoch + 1; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, true, @@ -591,13 +616,13 @@ mod test { fn test_remove_blocks_for_epoch_round_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some unverified blocks to the payload store for the current epoch let current_epoch = 10; let num_blocks_in_store = 100; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, false, @@ -630,7 +655,7 @@ mod test { // Add some unverified blocks to the payload store for the next epoch let next_epoch = current_epoch + 1; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, false, @@ -648,13 +673,13 @@ mod test { fn test_remove_committed_blocks_verified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some blocks to the payload store for the current epoch let current_epoch = 0; let num_blocks_in_store = 100; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, true, @@ -700,7 +725,7 @@ mod test { // Add some blocks to the payload store for the next epoch let next_epoch = 1; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, true, @@ -717,13 +742,13 @@ mod test { fn test_remove_committed_blocks_unverified() { // Create a new block payload store let consensus_observer_config = ConsensusObserverConfig::default(); - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); // Add some blocks to the payload store for the current epoch let current_epoch = 10; let num_blocks_in_store = 100; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, current_epoch, false, @@ -768,7 +793,7 @@ mod test { // Add some blocks to the payload store for the next epoch let next_epoch = 11; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_blocks_in_store, next_epoch, false, @@ -791,7 +816,7 @@ mod test { let current_epoch = 0; let num_verified_blocks = 10; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -801,7 +826,7 @@ mod test { let next_epoch = current_epoch + 1; let num_unverified_blocks = 20; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_unverified_blocks, next_epoch, false, @@ -811,7 +836,7 @@ mod test { let future_epoch = current_epoch + 30; let num_future_blocks = 30; let future_unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_future_blocks, future_epoch, false, @@ -877,7 +902,7 @@ mod test { let current_epoch = 0; let num_verified_blocks = 10; let verified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -895,7 +920,7 @@ mod test { .unwrap(); // Mark the first block payload as unverified - mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]); + mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]); // Verify the ordered block and ensure it fails (since the payloads are unverified) let error = block_payload_store @@ -923,7 +948,7 @@ mod test { let current_epoch = 10; let num_verified_blocks = 6; create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_verified_blocks, current_epoch, true, @@ -933,7 +958,7 @@ mod test { let next_epoch = current_epoch + 1; let num_unverified_blocks = 15; let unverified_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_unverified_blocks, next_epoch, false, @@ -943,7 +968,7 @@ mod test { let future_epoch = next_epoch + 1; let num_future_blocks = 10; let unverified_future_blocks = create_and_add_blocks_to_store( - block_payload_store.clone(), + &mut block_payload_store, num_future_blocks, future_epoch, false, @@ -986,7 +1011,7 @@ mod test { /// Creates and adds the given number of blocks to the block payload store fn create_and_add_blocks_to_store( - mut block_payload_store: BlockPayloadStore, + block_payload_store: &mut BlockPayloadStore, num_blocks: usize, epoch: u64, verified_payload_signatures: bool, @@ -1060,6 +1085,12 @@ mod test { pipelined_blocks } + /// Creates a new block payload with the given epoch and round + fn create_block_payload(epoch: u64, round: Round) -> BlockPayload { + let block_info = BlockInfo::random_with_epoch(epoch, round); + BlockPayload::new(block_info, BlockTransactionPayload::empty()) + } + /// Checks the number of unverified payloads in the block payload store fn check_num_unverified_payloads( block_payload_store: &BlockPayloadStore, @@ -1110,7 +1141,7 @@ mod test { /// Marks the payload of the given block as unverified fn mark_payload_as_unverified( - block_payload_store: BlockPayloadStore, + block_payload_store: &BlockPayloadStore, block: &Arc, ) { // Get the payload entry for the given block diff --git a/consensus/src/consensus_observer/observer/pending_blocks.rs b/consensus/src/consensus_observer/observer/pending_blocks.rs index 46c0586f08130..2a7ebbde0519f 100644 --- a/consensus/src/consensus_observer/observer/pending_blocks.rs +++ b/consensus/src/consensus_observer/observer/pending_blocks.rs @@ -19,41 +19,47 @@ use std::{ }; /// A simple struct to hold blocks that are waiting for payloads -#[derive(Clone)] pub struct PendingBlockStore { // The configuration of the consensus observer consensus_observer_config: ConsensusObserverConfig, - // A map of ordered blocks that are without payloads. The key is the - // (epoch, round) of the first block in the ordered block. - blocks_without_payloads: Arc>>, + // A map of ordered blocks that are without payloads. The key is + // the (epoch, round) of the first block in the ordered block. + blocks_without_payloads: BTreeMap<(u64, Round), OrderedBlock>, } impl PendingBlockStore { pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self { Self { consensus_observer_config, - blocks_without_payloads: Arc::new(Mutex::new(BTreeMap::new())), + blocks_without_payloads: BTreeMap::new(), } } /// Clears all missing blocks from the store - pub fn clear_missing_blocks(&self) { - self.blocks_without_payloads.lock().clear(); + pub fn clear_missing_blocks(&mut self) { + self.blocks_without_payloads.clear(); + } + + /// Returns true iff the store contains an entry for the given ordered block + pub fn existing_pending_block(&self, ordered_block: &OrderedBlock) -> bool { + // Get the epoch and round of the first block + let first_block = ordered_block.first_block(); + let first_block_epoch_round = (first_block.epoch(), first_block.round()); + + // Check if the block is already in the store + self.blocks_without_payloads + .contains_key(&first_block_epoch_round) } /// Inserts a block (without payloads) into the store - pub fn insert_pending_block(&self, ordered_block: OrderedBlock) { + pub fn insert_pending_block(&mut self, ordered_block: OrderedBlock) { // Get the epoch and round of the first block let first_block = ordered_block.first_block(); let first_block_epoch_round = (first_block.epoch(), first_block.round()); // Insert the block into the store using the round of the first block - match self - .blocks_without_payloads - .lock() - .entry(first_block_epoch_round) - { + match self.blocks_without_payloads.entry(first_block_epoch_round) { Entry::Occupied(_) => { // The block is already in the store warn!( @@ -75,16 +81,15 @@ impl PendingBlockStore { /// Garbage collects the pending blocks store by removing /// the oldest blocks if the store is too large. - fn garbage_collect_pending_blocks(&self) { + fn garbage_collect_pending_blocks(&mut self) { // Calculate the number of blocks to remove - let mut blocks_without_payloads = self.blocks_without_payloads.lock(); - let num_pending_blocks = blocks_without_payloads.len() as u64; + let num_pending_blocks = self.blocks_without_payloads.len() as u64; let max_pending_blocks = self.consensus_observer_config.max_num_pending_blocks; let num_blocks_to_remove = num_pending_blocks.saturating_sub(max_pending_blocks); // Remove the oldest blocks if the store is too large for _ in 0..num_blocks_to_remove { - if let Some((oldest_epoch_round, _)) = blocks_without_payloads.pop_first() { + if let Some((oldest_epoch_round, _)) = self.blocks_without_payloads.pop_first() { warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "The pending block store is too large: {:?} blocks. Removing the block for the oldest epoch and round: {:?}", @@ -98,25 +103,28 @@ impl PendingBlockStore { /// Removes and returns the block from the store that is now ready /// to be processed (after the new payload has been received). pub fn remove_ready_block( - &self, + &mut self, received_payload_epoch: u64, received_payload_round: Round, - block_payload_store: &BlockPayloadStore, + block_payload_store: Arc>, ) -> Option { // Calculate the round at which to split the blocks let split_round = received_payload_round.saturating_add(1); // Split the blocks at the epoch and round - let mut blocks_without_payloads = self.blocks_without_payloads.lock(); - let mut blocks_at_higher_rounds = - blocks_without_payloads.split_off(&(received_payload_epoch, split_round)); + let mut blocks_at_higher_rounds = self + .blocks_without_payloads + .split_off(&(received_payload_epoch, split_round)); // Check if the last block is ready (this should be the only ready block). // Any earlier blocks are considered out-of-date and will be dropped. let mut ready_block = None; - if let Some((epoch_and_round, ordered_block)) = blocks_without_payloads.pop_last() { + if let Some((epoch_and_round, ordered_block)) = self.blocks_without_payloads.pop_last() { // If all payloads exist for the block, then the block is ready - if block_payload_store.all_payloads_exist(ordered_block.blocks()) { + if block_payload_store + .lock() + .all_payloads_exist(ordered_block.blocks()) + { ready_block = Some(ordered_block); } else { // Otherwise, check if we're still waiting for higher payloads for the block @@ -127,18 +135,18 @@ impl PendingBlockStore { } // Check if any out-of-date blocks were dropped - if !blocks_without_payloads.is_empty() { + if !self.blocks_without_payloads.is_empty() { info!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Dropped {:?} out-of-date pending blocks before epoch and round: {:?}", - blocks_without_payloads.len(), + self.blocks_without_payloads.len(), (received_payload_epoch, received_payload_round) )) ); } // Update the pending blocks to only include the blocks at higher rounds - *blocks_without_payloads = blocks_at_higher_rounds; + self.blocks_without_payloads = blocks_at_higher_rounds; // Return the ready block (if one exists) ready_block @@ -147,8 +155,7 @@ impl PendingBlockStore { /// Updates the metrics for the pending blocks pub fn update_pending_blocks_metrics(&self) { // Update the number of pending block entries - let blocks_without_payloads = self.blocks_without_payloads.lock(); - let num_entries = blocks_without_payloads.len() as u64; + let num_entries = self.blocks_without_payloads.len() as u64; metrics::set_gauge_with_label( &metrics::OBSERVER_NUM_PROCESSED_BLOCKS, metrics::PENDING_BLOCK_ENTRIES_LABEL, @@ -156,7 +163,8 @@ impl PendingBlockStore { ); // Update the total number of pending blocks - let num_pending_blocks = blocks_without_payloads + let num_pending_blocks = self + .blocks_without_payloads .values() .map(|block| block.blocks().len() as u64) .sum(); @@ -167,7 +175,8 @@ impl PendingBlockStore { ); // Update the highest round for the pending blocks - let highest_pending_round = blocks_without_payloads + let highest_pending_round = self + .blocks_without_payloads .last_key_value() .map(|(_, pending_block)| pending_block.last_block().round()) .unwrap_or(0); @@ -208,13 +217,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let missing_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -223,17 +234,80 @@ mod test { // Verify that the store is not empty verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &missing_blocks, ); // Clear the missing blocks from the store - pending_block_store.clear_missing_blocks(); + pending_block_store.lock().clear_missing_blocks(); // Verify that the store is now empty - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); - assert!(blocks_without_payloads.is_empty()); + assert!(pending_block_store + .lock() + .blocks_without_payloads + .is_empty()); + } + + #[test] + fn test_existing_pending_block() { + // Create a new pending block store + let max_num_pending_blocks = 10; + let consensus_observer_config = ConsensusObserverConfig { + max_num_pending_blocks: max_num_pending_blocks as u64, + ..ConsensusObserverConfig::default() + }; + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + ConsensusObserverConfig::default(), + ))); + + // Insert the maximum number of blocks into the store + let current_epoch = 10; + let starting_round = 100; + let pending_blocks = create_and_add_pending_blocks( + pending_block_store.clone(), + max_num_pending_blocks, + current_epoch, + starting_round, + 5, + ); + + // Verify that all blocks were inserted correctly + for pending_block in &pending_blocks { + assert!(pending_block_store + .lock() + .existing_pending_block(pending_block)); + } + + // Create a new block payload store and insert payloads for the second block + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); + let second_block = pending_blocks[1].clone(); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); + + // Remove the second block (which is now ready) + let payload_round = second_block.first_block().round(); + let ready_block = pending_block_store.lock().remove_ready_block( + current_epoch, + payload_round, + block_payload_store.clone(), + ); + assert_eq!(ready_block, Some(second_block)); + + // Verify that the first and second blocks were removed + verify_pending_blocks( + pending_block_store.clone(), + max_num_pending_blocks - 2, + &pending_blocks[2..].to_vec(), + ); + + // Verify that the first and second blocks are no longer in the store + for pending_block in &pending_blocks[..2] { + assert!(!pending_block_store + .lock() + .existing_pending_block(pending_block)); + } } #[test] @@ -244,13 +318,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -259,7 +335,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -267,7 +343,7 @@ mod test { // Insert the maximum number of blocks into the store again let starting_round = (max_num_pending_blocks * 100) as Round; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -276,7 +352,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -284,12 +360,17 @@ mod test { // Insert one more block into the store (for the next epoch) let next_epoch = 1; let starting_round = 0; - let new_pending_block = - create_and_add_pending_blocks(&pending_block_store, 1, next_epoch, starting_round, 5); + let new_pending_block = create_and_add_pending_blocks( + pending_block_store.clone(), + 1, + next_epoch, + starting_round, + 5, + ); // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -303,13 +384,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 200; let mut pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -318,7 +401,7 @@ mod test { // Verify that all blocks were inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -329,7 +412,7 @@ mod test { // Insert one more block into the store let starting_round = ((max_num_pending_blocks * 10) + (i * 100)) as Round; let new_pending_block = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), 1, current_epoch, starting_round, @@ -338,7 +421,7 @@ mod test { // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -348,7 +431,8 @@ mod test { let oldest_block_round = oldest_block.first_block().round(); // Verify that the oldest block was garbage collected - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); + let blocks_without_payloads = + pending_block_store.lock().blocks_without_payloads.clone(); assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round))); } @@ -359,7 +443,7 @@ mod test { // Insert one more block into the store let starting_round = i; let new_pending_block = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), 1, next_epoch, starting_round, @@ -368,7 +452,7 @@ mod test { // Verify the new block was inserted correctly verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &new_pending_block, ); @@ -378,7 +462,8 @@ mod test { let oldest_block_round = oldest_block.first_block().round(); // Verify that the oldest block was garbage collected - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); + let blocks_without_payloads = + pending_block_store.lock().blocks_without_payloads.clone(); assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round))); } } @@ -391,13 +476,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -405,43 +492,45 @@ mod test { ); // Create a new block payload store and insert payloads for the second block - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); let second_block = pending_blocks[1].clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &second_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); // Remove the second block (which is now ready) let payload_round = second_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(second_block)); // Verify that the first and second blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 2, &pending_blocks[2..].to_vec(), ); // Insert payloads for the last block let last_block = pending_blocks.last().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &last_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block); // Remove the last block (which is now ready) let payload_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // Verify that the last block was removed assert_eq!(ready_block, Some(last_block)); // Verify that the store is empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -452,13 +541,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 10; let starting_round = 100; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -466,7 +557,9 @@ mod test { ); // Create an empty block payload store - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); // Incrementally insert and process each payload for the first block let first_block = pending_blocks.first().unwrap().clone(); @@ -474,14 +567,16 @@ mod test { // Insert the block let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); // Attempt to remove the block (which might not be ready) let payload_round = block.round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // If the block is ready, verify that it was removed. @@ -492,7 +587,7 @@ mod test { // Verify that the block was removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 1, &pending_blocks[1..].to_vec(), ); @@ -502,7 +597,7 @@ mod test { // Verify that the block still remains verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, &pending_blocks, ); @@ -517,14 +612,16 @@ mod test { if payload_round != last_block.first_block().round() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } // Attempt to remove the block (which might not be ready) - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // The block should not be ready @@ -532,14 +629,14 @@ mod test { // Verify that the block still remains or has been removed on the last insert if payload_round == last_block.last_block().round() { - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } else { - verify_pending_blocks(&pending_block_store, 1, &vec![last_block.clone()]); + verify_pending_blocks(pending_block_store.clone(), 1, &vec![last_block.clone()]); } } // Verify that the store is now empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -550,13 +647,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 0; let starting_round = 0; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -564,63 +663,65 @@ mod test { ); // Create a new block payload store and insert payloads for the first block - let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); let first_block = pending_blocks.first().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &first_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &first_block); // Remove the first block (which is now ready) let payload_round = first_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(first_block)); // Verify that the first block was removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 1, &pending_blocks[1..].to_vec(), ); // Insert payloads for the second block let second_block = pending_blocks[1].clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &second_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block); // Remove the second block (which is now ready) let payload_round = second_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); assert_eq!(ready_block, Some(second_block)); // Verify that the first and second blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 2, &pending_blocks[2..].to_vec(), ); // Insert payloads for the last block let last_block = pending_blocks.last().unwrap().clone(); - insert_payloads_for_ordered_block(&mut block_payload_store, &last_block); + insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block); // Remove the last block (which is now ready) let payload_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, payload_round, - &block_payload_store, + block_payload_store.clone(), ); // Verify that the last block was removed assert_eq!(ready_block, Some(last_block)); // Verify that the store is empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } #[test] @@ -631,13 +732,15 @@ mod test { max_num_pending_blocks: max_num_pending_blocks as u64, ..ConsensusObserverConfig::default() }; - let pending_block_store = PendingBlockStore::new(consensus_observer_config); + let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new( + consensus_observer_config, + ))); // Insert the maximum number of blocks into the store let current_epoch = 10; let starting_round = 100; let pending_blocks = create_and_add_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks, current_epoch, starting_round, @@ -645,21 +748,23 @@ mod test { ); // Create an empty block payload store - let block_payload_store = BlockPayloadStore::new(consensus_observer_config); + let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new( + consensus_observer_config, + ))); // Remove the third block (which is not ready) let third_block = pending_blocks[2].clone(); let third_block_round = third_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, third_block_round, - &block_payload_store, + block_payload_store.clone(), ); assert!(ready_block.is_none()); // Verify that the first three blocks were removed verify_pending_blocks( - &pending_block_store, + pending_block_store.clone(), max_num_pending_blocks - 3, &pending_blocks[3..].to_vec(), ); @@ -667,20 +772,20 @@ mod test { // Remove the last block (which is not ready) let last_block = pending_blocks.last().unwrap().clone(); let last_block_round = last_block.first_block().round(); - let ready_block = pending_block_store.remove_ready_block( + let ready_block = pending_block_store.lock().remove_ready_block( current_epoch, last_block_round, - &block_payload_store, + block_payload_store.clone(), ); assert!(ready_block.is_none()); // Verify that the store is now empty - verify_pending_blocks(&pending_block_store, 0, &vec![]); + verify_pending_blocks(pending_block_store.clone(), 0, &vec![]); } /// Creates and adds the specified number of blocks to the pending block store fn create_and_add_pending_blocks( - pending_block_store: &PendingBlockStore, + pending_block_store: Arc>, num_pending_blocks: usize, epoch: u64, starting_round: Round, @@ -732,7 +837,9 @@ mod test { let ordered_block = OrderedBlock::new(pipelined_blocks, ordered_proof.clone()); // Insert the ordered block into the pending block store - pending_block_store.insert_pending_block(ordered_block.clone()); + pending_block_store + .lock() + .insert_pending_block(ordered_block.clone()); // Add the ordered block to the pending blocks pending_blocks.push(ordered_block); @@ -743,31 +850,37 @@ mod test { /// Inserts payloads into the payload store for the ordered block fn insert_payloads_for_ordered_block( - block_payload_store: &mut BlockPayloadStore, + block_payload_store: Arc>, ordered_block: &OrderedBlock, ) { for block in ordered_block.blocks() { let block_payload = BlockPayload::new(block.block_info(), BlockTransactionPayload::empty()); - block_payload_store.insert_block_payload(block_payload, true); + block_payload_store + .lock() + .insert_block_payload(block_payload, true); } } /// Verifies that the pending block store contains the expected blocks fn verify_pending_blocks( - pending_block_store: &PendingBlockStore, + pending_block_store: Arc>, num_expected_blocks: usize, pending_blocks: &Vec, ) { // Check the number of pending blocks - let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock(); - assert_eq!(blocks_without_payloads.len(), num_expected_blocks); + assert_eq!( + pending_block_store.lock().blocks_without_payloads.len(), + num_expected_blocks + ); // Check that all pending blocks are in the store for pending_block in pending_blocks { let first_block = pending_block.first_block(); assert_eq!( - blocks_without_payloads + pending_block_store + .lock() + .blocks_without_payloads .get(&(first_block.epoch(), first_block.round())) .unwrap(), pending_block diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs index fe29aa6a5a577..5d9ae4d43def1 100644 --- a/consensus/src/consensus_observer/observer/subscription.rs +++ b/consensus/src/consensus_observer/observer/subscription.rs @@ -1,25 +1,17 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::consensus_observer::common::{ - error::Error, - logging::{LogEntry, LogSchema}, -}; +use crate::consensus_observer::{common::error::Error, observer::subscription_utils}; use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; -use aptos_logger::{info, warn}; -use aptos_network::{application::metadata::PeerMetadata, ProtocolId}; +use aptos_network::application::metadata::PeerMetadata; use aptos_storage_interface::DbReader; use aptos_time_service::{TimeService, TimeServiceTrait}; -use ordered_float::OrderedFloat; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{HashMap, HashSet}, sync::Arc, time::{Duration, Instant}, }; -// A useful constant for representing the maximum ping latency -const MAX_PING_LATENCY_SECS: f64 = 10_000.0; - /// A single consensus observer subscription pub struct ConsensusObserverSubscription { // The configuration of the consensus observer @@ -31,7 +23,7 @@ pub struct ConsensusObserverSubscription { // The peer network id of the active subscription peer_network_id: PeerNetworkId, - // The timestamp of the last message received from the peer + // The timestamp of the last message received for the subscription last_message_receive_time: Instant, // The timestamp and connected peers for the last optimality check @@ -66,12 +58,40 @@ impl ConsensusObserverSubscription { } } + /// Checks if the subscription is still healthy. If not, an error + /// is returned indicating the reason for the subscription failure. + pub fn check_subscription_health( + &mut self, + connected_peers_and_metadata: &HashMap, + ) -> Result<(), Error> { + // Verify the subscription peer is still connected + let peer_network_id = self.get_peer_network_id(); + if !connected_peers_and_metadata.contains_key(&peer_network_id) { + return Err(Error::SubscriptionDisconnected(format!( + "The peer: {:?} is no longer connected!", + peer_network_id + ))); + } + + // Verify the subscription has not timed out + self.check_subscription_timeout()?; + + // Verify that the DB is continuing to sync and commit new data + self.check_syncing_progress()?; + + // Verify that the subscription peer is still optimal + self.check_subscription_peer_optimality(connected_peers_and_metadata)?; + + // The subscription seems healthy + Ok(()) + } + /// Verifies that the peer currently selected for the subscription is /// optimal. This is only done if: (i) the peers have changed since the /// last check; or (ii) enough time has elapsed to force a refresh. - pub fn check_subscription_peer_optimality( + fn check_subscription_peer_optimality( &mut self, - peers_and_metadata: HashMap, + peers_and_metadata: &HashMap, ) -> Result<(), Error> { // Get the last optimality check time and connected peers let (last_optimality_check_time, last_optimality_check_peers) = @@ -106,16 +126,21 @@ impl ConsensusObserverSubscription { self.last_optimality_check_time_and_peers = (time_now, current_connected_peers); // Sort the peers by subscription optimality - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - - // Verify that we're subscribed to the most optimal peer - if let Some(optimal_peer) = sorted_peers.first() { - if *optimal_peer != self.peer_network_id { - return Err(Error::SubscriptionSuboptimal(format!( - "Subscription to peer: {} is no longer optimal! New optimal peer: {}", - self.peer_network_id, optimal_peer - ))); - } + let sorted_peers = + subscription_utils::sort_peers_by_subscription_optimality(peers_and_metadata); + + // Verify that this peer is one of the most optimal peers + let max_concurrent_subscriptions = + self.consensus_observer_config.max_concurrent_subscriptions as usize; + if !sorted_peers + .iter() + .take(max_concurrent_subscriptions) + .any(|peer| peer == &self.peer_network_id) + { + return Err(Error::SubscriptionSuboptimal(format!( + "Subscription to peer: {} is no longer optimal! New optimal peers: {:?}", + self.peer_network_id, sorted_peers + ))); } Ok(()) @@ -123,7 +148,7 @@ impl ConsensusObserverSubscription { /// Verifies that the subscription has not timed out based /// on the last received message time. - pub fn check_subscription_timeout(&self) -> Result<(), Error> { + fn check_subscription_timeout(&self) -> Result<(), Error> { // Calculate the duration since the last message let time_now = self.time_service.now(); let duration_since_last_message = time_now.duration_since(self.last_message_receive_time); @@ -142,7 +167,7 @@ impl ConsensusObserverSubscription { } /// Verifies that the DB is continuing to sync and commit new data - pub fn check_syncing_progress(&mut self) -> Result<(), Error> { + fn check_syncing_progress(&mut self) -> Result<(), Error> { // Get the current synced version from storage let current_synced_version = self.db_reader @@ -185,151 +210,10 @@ impl ConsensusObserverSubscription { self.peer_network_id } - /// Verifies the given message is from the expected peer - pub fn verify_message_sender(&mut self, peer_network_id: &PeerNetworkId) -> Result<(), Error> { - // Verify the message is from the expected peer - if self.peer_network_id != *peer_network_id { - return Err(Error::UnexpectedError(format!( - "Received message from unexpected peer: {}! Subscribed to: {}", - peer_network_id, self.peer_network_id - ))); - } - - // Update the last message receive time + /// Updates the last message receive time to the current time + pub fn update_last_message_receive_time(&mut self) { self.last_message_receive_time = self.time_service.now(); - - Ok(()) - } -} - -/// Gets the distance from the validators for the specified peer from the peer metadata -fn get_distance_for_peer( - peer_network_id: &PeerNetworkId, - peer_metadata: &PeerMetadata, -) -> Option { - // Get the distance for the peer - let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); - let distance = peer_monitoring_metadata - .latest_network_info_response - .as_ref() - .map(|response| response.distance_from_validators); - - // If the distance is missing, log a warning - if distance.is_none() { - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Unable to get distance for peer! Peer: {:?}", - peer_network_id - )) - ); - } - - distance -} - -/// Gets the latency for the specified peer from the peer metadata -fn get_latency_for_peer( - peer_network_id: &PeerNetworkId, - peer_metadata: &PeerMetadata, -) -> Option { - // Get the latency for the peer - let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); - let latency = peer_monitoring_metadata.average_ping_latency_secs; - - // If the latency is missing, log a warning - if latency.is_none() { - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Unable to get latency for peer! Peer: {:?}", - peer_network_id - )) - ); - } - - latency -} - -/// Sorts the peers by subscription optimality (in descending order of -/// optimality). This requires: (i) sorting the peers by distance from the -/// validator set and ping latency (lower values are more optimal); and (ii) -/// filtering out peers that don't support consensus observer. -/// -/// Note: we prioritize distance over latency as we want to avoid close -/// but not up-to-date peers. If peers don't have sufficient metadata -/// for sorting, they are given a lower priority. -pub fn sort_peers_by_subscription_optimality( - peers_and_metadata: &HashMap, -) -> Vec { - // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)] - let mut unsupported_peers = Vec::new(); - let mut peers_and_latencies_by_distance = BTreeMap::new(); - for (peer_network_id, peer_metadata) in peers_and_metadata { - // Verify that the peer supports consensus observer - if !supports_consensus_observer(peer_metadata) { - unsupported_peers.push(*peer_network_id); - continue; // Skip the peer - } - - // Get the distance and latency for the peer - let distance = get_distance_for_peer(peer_network_id, peer_metadata); - let latency = get_latency_for_peer(peer_network_id, peer_metadata); - - // If the distance is not found, use the maximum distance - let distance = - distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS); - - // If the latency is not found, use a large latency - let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS); - - // Add the peer and latency to the distance group - peers_and_latencies_by_distance - .entry(distance) - .or_insert_with(Vec::new) - .push((*peer_network_id, OrderedFloat(latency))); - } - - // If there are peers that don't support consensus observer, log them - if !unsupported_peers.is_empty() { - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Found {} peers that don't support consensus observer! Peers: {:?}", - unsupported_peers.len(), - unsupported_peers - )) - ); } - - // Sort the peers by distance and latency. Note: BTreeMaps are - // sorted by key, so the entries will be sorted by distance in ascending order. - let mut sorted_peers = Vec::new(); - for (_, mut peers_and_latencies) in peers_and_latencies_by_distance { - // Sort the peers by latency - peers_and_latencies.sort_by_key(|(_, latency)| *latency); - - // Add the peers to the sorted list (in sorted order) - sorted_peers.extend( - peers_and_latencies - .into_iter() - .map(|(peer_network_id, _)| peer_network_id), - ); - } - - // Log the sorted peers - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Sorted {} peers by subscription optimality! Peers: {:?}", - sorted_peers.len(), - sorted_peers - )) - ); - - sorted_peers -} - -/// Returns true iff the peer metadata indicates support for consensus observer -fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool { - peer_metadata.supports_protocol(ProtocolId::ConsensusObserver) - && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc) } #[cfg(test)] @@ -340,12 +224,12 @@ mod test { use aptos_network::{ protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, transport::{ConnectionId, ConnectionMetadata}, + ProtocolId, }; - use aptos_peer_monitoring_service_types::{ - response::NetworkInformationResponse, PeerMonitoringMetadata, - }; + use aptos_peer_monitoring_service_types::PeerMonitoringMetadata; use aptos_storage_interface::Result; use aptos_types::{network_address::NetworkAddress, transaction::Version}; + use claims::assert_matches; use mockall::mock; // This is a simple mock of the DbReader (it generates a MockDatabaseReader) @@ -357,12 +241,15 @@ mod test { } #[test] - fn check_subscription_peer_optimality() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_health_connected_and_timeout() { + // Create a consensus observer config + let consensus_observer_config = ConsensusObserverConfig { + max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors + ..ConsensusObserverConfig::default() + }; // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -371,47 +258,180 @@ mod test { time_service.clone(), ); - // Verify the time and peers for the last optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, time_service.now()); - assert!(last_check_peers.is_empty()); + // Verify that the subscription is unhealthy (the peer is not connected) + assert_matches!( + subscription.check_subscription_health(&HashMap::new()), + Err(Error::SubscriptionDisconnected(_)) + ); // Create a peers and metadata map for the subscription let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Elapse enough time to timeout the subscription + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Verify that the subscription has timed out + assert_matches!( + subscription.check_subscription_health(&peers_and_metadata), + Err(Error::SubscriptionTimeout(_)) + ); + } + + #[test] + fn test_check_subscription_health_progress() { + // Create a consensus observer config with a large timeout + let consensus_observer_config = ConsensusObserverConfig { + max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + ..ConsensusObserverConfig::default() + }; + + // Create a mock DB reader with expectations + let first_synced_version = 1; + let second_synced_version = 2; + let mut mock_db_reader = MockDatabaseReader::new(); + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(first_synced_version)) + .times(1); // Only allow one call for the first version + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(second_synced_version)); // Allow multiple calls for the second version + + // Create a new observer subscription + let peer_network_id = PeerNetworkId::random(); + let time_service = TimeService::mock(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(mock_db_reader), peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), + time_service.clone(), + ); + + // Verify that the DB is making sync progress and that the highest synced version is updated + let mock_time_service = time_service.into_mock(); + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), + ); + + // Elapse enough time to timeout the subscription + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_synced_version_timeout_ms + 1, + )); + + // Verify that the DB is still making sync progress (the next version is higher) + verify_subscription_syncing_progress( + &mut subscription, + second_synced_version, + mock_time_service.now(), + ); + + // Elapse enough time to timeout the subscription + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_synced_version_timeout_ms + 1, + )); + + // Verify that the DB is not making sync progress and that the subscription has timed out + assert_matches!( + subscription.check_syncing_progress(), + Err(Error::SubscriptionProgressStopped(_)) + ); + } + + #[test] + fn test_check_subscription_health_optimality() { + // Create a consensus observer config with a single subscription and large timeouts + let consensus_observer_config = ConsensusObserverConfig { + max_concurrent_subscriptions: 1, + max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors + ..ConsensusObserverConfig::default() + }; + + // Create a mock DB reader with expectations + let mut mock_db_reader = MockDatabaseReader::new(); + mock_db_reader + .expect_get_latest_ledger_info_version() + .returning(move || Ok(1)); + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(mock_db_reader), + peer_network_id, + time_service.clone(), ); + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Verify that the subscription is healthy + assert!(subscription + .check_subscription_health(&peers_and_metadata) + .is_ok()); + // Add a more optimal peer to the set of peers let new_optimal_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - new_optimal_peer, - PeerMetadata::new_for_test( - create_connection_metadata(new_optimal_peer, true), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); + + // Elapse enough time for a peer optimality check + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the subscription is no longer optimal + assert_matches!( + subscription.check_subscription_health(&peers_and_metadata), + Err(Error::SubscriptionSuboptimal(_)) + ); + } + + #[test] + fn test_check_subscription_peer_optimality_single() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), + peer_network_id, + time_service.clone(), ); + // Verify the time and peers for the last optimality check + let mock_time_service = time_service.into_mock(); + verify_last_check_time_and_peers(&subscription, mock_time_service.now(), HashSet::new()); + + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Add a more optimal peer to the set of peers + let new_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); + // Verify that the peer is optimal (not enough time has elapsed to check) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse some amount of time (but not enough to check optimality) - let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.subscription_peer_change_interval_ms / 2, )); // Verify that the peer is still optimal (not enough time has elapsed to check) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse enough time to check the peer optimality mock_time_service.advance(Duration::from_millis( @@ -419,17 +439,13 @@ mod test { )); // Verify that the peer is no longer optimal (a more optimal peer has been added) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Verify the time of the last peer optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, mock_time_service.now()); - assert_eq!( - last_check_peers, - peers_and_metadata.keys().cloned().collect() + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), ); // Elapse enough time to check the peer optimality @@ -438,35 +454,29 @@ mod test { )); // Verify that the peer is now optimal (the peers haven't changed) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Remove the current peer from the list of peers peers_and_metadata.remove(&peer_network_id); // Verify that the peer is not optimal (the peers have changed) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Verify the time of the last peer optimality check - let (last_check_time, last_check_peers) = - subscription.last_optimality_check_time_and_peers.clone(); - assert_eq!(last_check_time, mock_time_service.now()); - assert_eq!( - last_check_peers, - peers_and_metadata.keys().cloned().collect() + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), ); } #[test] - fn check_subscription_peer_refresh() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_peer_optimality_multiple() { + // Create a consensus observer config with a maximum of 2 subscriptions + let consensus_observer_config = create_observer_config(2); // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -477,33 +487,73 @@ mod test { // Create a peers and metadata map for the subscription let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + + // Add a more optimal peer to the set of peers + let new_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); + + // Elapse enough time to check the peer optimality + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is optimal (it's in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); + + // Add another more optimal peer to the set of peers + let another_optimal_peer = PeerNetworkId::random(); + add_metadata_for_peer(&mut peers_and_metadata, another_optimal_peer, true, true); + + // Elapse enough time to check the peer optimality + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is no longer optimal (it's not in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); + + // Remove the previous optimal peer from the list of peers + peers_and_metadata.remove(&new_optimal_peer); + + // Elapse enough time to check the peer optimality + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_peer_change_interval_ms + 1, + )); + + // Verify that the peer is optimal (it's in the top 2 most optimal peers) + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); + } + + #[test] + fn test_check_subscription_peer_optimality_refresh() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); + + // Create a new observer subscription + let time_service = TimeService::mock(); + let peer_network_id = PeerNetworkId::random(); + let mut subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), + time_service.clone(), ); + // Create a peers and metadata map for the subscription + let mut peers_and_metadata = HashMap::new(); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); + // Verify that the peer is optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add a more optimal peer to the set of peers let new_optimal_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - new_optimal_peer, - PeerMetadata::new_for_test( - create_connection_metadata(new_optimal_peer, true), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true); // Verify that the peer is still optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Elapse enough time to refresh optimality let mock_time_service = time_service.into_mock(); @@ -512,9 +562,7 @@ mod test { )); // Verify that the peer is no longer optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); // Elapse some amount of time (but not enough to refresh) mock_time_service.advance(Duration::from_millis( @@ -522,9 +570,7 @@ mod test { )); // Verify that the peer is now optimal (not enough time has elapsed to refresh) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Remove the more optimal peer from the list of peers peers_and_metadata.remove(&new_optimal_peer); @@ -535,23 +581,23 @@ mod test { )); // Verify that the peer is optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Verify the time of the last peer optimality check - let current_time = mock_time_service.now(); - let (last_check_time, _) = subscription.last_optimality_check_time_and_peers; - assert_eq!(last_check_time, current_time); + verify_last_check_time_and_peers( + &subscription, + mock_time_service.now(), + peers_and_metadata.keys().cloned().collect(), + ); } #[test] - fn check_subscription_peer_optimality_supported() { - // Create a consensus observer config and time service - let consensus_observer_config = ConsensusObserverConfig::default(); - let time_service = TimeService::mock(); + fn test_check_subscription_peer_optimality_supported() { + // Create a consensus observer config with a maximum of 1 subscription + let consensus_observer_config = create_observer_config(1); // Create a new observer subscription + let time_service = TimeService::mock(); let peer_network_id = PeerNetworkId::random(); let mut subscription = ConsensusObserverSubscription::new( consensus_observer_config, @@ -562,13 +608,7 @@ mod test { // Insert empty metadata for the subscription peer let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.insert( - peer_network_id, - PeerMetadata::new_for_test( - create_connection_metadata(peer_network_id, true), - PeerMonitoringMetadata::new(None, None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false); // Elapse enough time to check optimality let mock_time_service = time_service.into_mock(); @@ -577,19 +617,11 @@ mod test { )); // Verify that the peer is still optimal (there are no other peers) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add a more optimal peer without consensus observer support let unsupported_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - unsupported_peer, - PeerMetadata::new_for_test( - create_connection_metadata(unsupported_peer, false), - PeerMonitoringMetadata::new(Some(0.1), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, unsupported_peer, false, false); // Elapse enough time to check optimality mock_time_service.advance(Duration::from_millis( @@ -597,19 +629,11 @@ mod test { )); // Verify that the peer is still optimal (the unsupported peer is ignored) - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_ok()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true); // Add another more optimal peer with consensus observer support let supported_peer = PeerNetworkId::random(); - peers_and_metadata.insert( - supported_peer, - PeerMetadata::new_for_test( - create_connection_metadata(supported_peer, true), - PeerMonitoringMetadata::new(Some(0.01), None, None, None, None), - ), - ); + add_metadata_for_peer(&mut peers_and_metadata, supported_peer, true, true); // Elapse enough time to check optimality mock_time_service.advance(Duration::from_millis( @@ -617,9 +641,7 @@ mod test { )); // Verify that the peer is no longer optimal - assert!(subscription - .check_subscription_peer_optimality(peers_and_metadata.clone()) - .is_err()); + verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false); } #[test] @@ -637,7 +659,7 @@ mod test { // Verify that the subscription has not timed out and that the last message time is updated let current_time = time_service.now(); - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); assert_eq!(subscription.last_message_receive_time, current_time); // Elapse some amount of time (but not enough to timeout) @@ -647,17 +669,15 @@ mod test { )); // Verify that the subscription has not timed out - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); - // Verify a new message is received successfully and that the last message time is updated + // Update the last message receive time let current_time = mock_time_service.now(); - subscription - .verify_message_sender(&peer_network_id) - .unwrap(); + subscription.update_last_message_receive_time(); assert_eq!(subscription.last_message_receive_time, current_time); // Verify that the subscription has not timed out - assert!(subscription.check_subscription_timeout().is_ok()); + verify_subscription_time_out(&subscription, false); // Elapse enough time to timeout the subscription mock_time_service.advance(Duration::from_millis( @@ -665,7 +685,7 @@ mod test { )); // Verify that the subscription has timed out - assert!(subscription.check_subscription_timeout().is_err()); + verify_subscription_time_out(&subscription, true); } #[test] @@ -694,25 +714,23 @@ mod test { ); // Verify that the DB is making sync progress and that the highest synced version is updated - let current_time = time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (first_synced_version, current_time) + let mock_time_service = time_service.into_mock(); + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), ); // Elapse some amount of time (not enough to timeout) - let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_synced_version_timeout_ms / 2, )); // Verify that the DB is still making sync progress - let current_time = mock_time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (first_synced_version, current_time) + verify_subscription_syncing_progress( + &mut subscription, + first_synced_version, + mock_time_service.now(), ); // Elapse enough time to timeout the subscription @@ -721,11 +739,10 @@ mod test { )); // Verify that the DB is still making sync progress (the next version is higher) - let current_time = mock_time_service.now(); - assert!(subscription.check_syncing_progress().is_ok()); - assert_eq!( - subscription.highest_synced_version_and_time, - (second_synced_version, current_time) + verify_subscription_syncing_progress( + &mut subscription, + second_synced_version, + mock_time_service.now(), ); // Elapse enough time to timeout the subscription @@ -734,11 +751,31 @@ mod test { )); // Verify that the DB is not making sync progress and that the subscription has timed out - assert!(subscription.check_syncing_progress().is_err()); + assert_matches!( + subscription.check_syncing_progress(), + Err(Error::SubscriptionProgressStopped(_)) + ); + } + + #[test] + fn test_get_peer_network_id() { + // Create a new observer subscription + let consensus_observer_config = ConsensusObserverConfig::default(); + let peer_network_id = PeerNetworkId::random(); + let time_service = TimeService::mock(); + let subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + Arc::new(MockDatabaseReader::new()), + peer_network_id, + time_service.clone(), + ); + + // Verify that the peer network id matches the expected value + assert_eq!(subscription.get_peer_network_id(), peer_network_id); } #[test] - fn test_verify_message_sender() { + fn test_update_last_message_receive_time() { // Create a new observer subscription let consensus_observer_config = ConsensusObserverConfig::default(); let peer_network_id = PeerNetworkId::random(); @@ -750,140 +787,39 @@ mod test { time_service.clone(), ); - // Verify that the message sender is valid - let current_time = time_service.now(); - assert!(subscription.verify_message_sender(&peer_network_id).is_ok()); - assert_eq!(subscription.last_message_receive_time, current_time); + // Verify the initial last message time + assert_eq!(subscription.last_message_receive_time, time_service.now()); // Elapse some amount of time let mock_time_service = time_service.into_mock(); mock_time_service.advance(Duration::from_secs(10)); - // Verify that the message sender is not the expected peer - let other_peer_network_id = PeerNetworkId::random(); - assert!(subscription - .verify_message_sender(&other_peer_network_id) - .is_err()); - assert_eq!(subscription.last_message_receive_time, current_time); - - // Elapse more time - mock_time_service.advance(Duration::from_secs(10)); - - // Verify that the message sender is the expected peer and that the last message time is updated + // Update the last message time let current_time = mock_time_service.now(); - assert!(subscription.verify_message_sender(&peer_network_id).is_ok()); - assert_eq!(subscription.last_message_receive_time, current_time); - } + subscription.update_last_message_receive_time(); - #[test] - fn test_sort_peers_by_distance_and_latency() { - // Sort an empty list of peers - let peers_and_metadata = HashMap::new(); - assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); - - // Create a list of peers with empty metadata - let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with valid metadata - let peers_and_metadata = create_peers_and_metadata(false, false, true, 10); - - // Sort the peers - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - - // Verify the order of the peers - verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with and without metadata - let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10); - peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10)); - peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10)); - peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10)); - - // Sort the peers - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 40); - - // Verify the order of the first 20 peers - let (first_20_peers, sorted_peers) = sorted_peers.split_at(20); - verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers); - - // Verify that the next 10 peers only have latency metadata - let (next_10_peers, sorted_peers) = sorted_peers.split_at(10); - for sorted_peer in next_10_peers { - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); - assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some()); - } - - // Verify that the last 10 peers have no metadata - let (last_10_peers, remaining_peers) = sorted_peers.split_at(10); - for sorted_peer in last_10_peers { - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); - assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none()); - } - assert!(remaining_peers.is_empty()); + // Verify that the last message time is updated + assert_eq!(subscription.last_message_receive_time, current_time); } - #[test] - fn test_sort_peers_by_distance_and_latency_filter() { - // Sort an empty list of peers - let peers_and_metadata = HashMap::new(); - assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); - - // Create a list of peers with empty metadata (with consensus observer support) - let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 10); - - // Create a list of peers with empty metadata (without consensus observer support) - let peers_and_metadata = create_peers_and_metadata(true, true, false, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert!(sorted_peers.is_empty()); - - // Create a list of peers with valid metadata (without consensus observer support) - let peers_and_metadata = create_peers_and_metadata(false, false, false, 10); - - // Sort the peers and verify the results - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert!(sorted_peers.is_empty()); - - // Create a list of peers with empty metadata (with and without consensus observer support) - let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5); - peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50)); - - // Sort the peers and verify the results (only the supported peers are sorted) - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 5); - - // Create a list of peers with valid metadata (with and without consensus observer support) - let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50); - peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10)); - - // Sort the peers and verify the results (only the supported peers are sorted) - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers.len(), 50); - - // Create a list of peers with valid metadata (with and without consensus observer support) - let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1); - let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1); - let mut peers_and_metadata = HashMap::new(); - peers_and_metadata.extend(supported_peer_and_metadata.clone()); - peers_and_metadata.extend(unsupported_peer_and_metadata); + /// Adds metadata for the specified peer to the map of peers and metadata + fn add_metadata_for_peer( + peers_and_metadata: &mut HashMap, + peer_network_id: PeerNetworkId, + support_consensus_observer: bool, + set_ping_latency: bool, + ) { + // Determine the ping latency to use for the peer + let average_ping_latency = if set_ping_latency { Some(0.1) } else { None }; - // Sort the peers and verify the results (only the supported peer is sorted) - let supported_peer = supported_peer_and_metadata.keys().next().unwrap(); - let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); - assert_eq!(sorted_peers, vec![*supported_peer]); + // Add the peer and metadata to the map + peers_and_metadata.insert( + peer_network_id, + PeerMetadata::new_for_test( + create_connection_metadata(peer_network_id, support_consensus_observer), + PeerMonitoringMetadata::new(average_ping_latency, None, None, None, None), + ), + ); } /// Creates a new connection metadata for testing @@ -913,82 +849,69 @@ mod test { } } - /// Creates a new peer and metadata for testing - fn create_peer_and_metadata( - latency: Option, - distance_from_validators: Option, - support_consensus_observer: bool, - ) -> (PeerNetworkId, PeerMetadata) { - // Create a random peer - let peer_network_id = PeerNetworkId::random(); - - // Create a new peer metadata with the given latency and distance - let connection_metadata = - create_connection_metadata(peer_network_id, support_consensus_observer); - let network_information_response = - distance_from_validators.map(|distance| NetworkInformationResponse { - connected_peers: BTreeMap::new(), - distance_from_validators: distance, - }); - let peer_monitoring_metadata = - PeerMonitoringMetadata::new(latency, None, network_information_response, None, None); - let peer_metadata = - PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata); - - (peer_network_id, peer_metadata) + /// Creates a consensus observer config with the given max concurrent subscriptions + fn create_observer_config(max_concurrent_subscriptions: u64) -> ConsensusObserverConfig { + ConsensusObserverConfig { + max_concurrent_subscriptions, + ..ConsensusObserverConfig::default() + } } - /// Creates a list of peers and metadata for testing - fn create_peers_and_metadata( - empty_latency: bool, - empty_distance: bool, - support_consensus_observer: bool, - num_peers: u64, - ) -> HashMap { - let mut peers_and_metadata = HashMap::new(); - for i in 1..num_peers + 1 { - // Determine the distance for the peer - let distance = if empty_distance { None } else { Some(i) }; + /// Verifies that the last check time and peers are as expected + fn verify_last_check_time_and_peers( + subscription: &ConsensusObserverSubscription, + expected_last_check_time: Instant, + expected_last_check_peers: HashSet, + ) { + // Get the last check time and peers from the subscription + let (last_check_time, last_check_peers) = + subscription.last_optimality_check_time_and_peers.clone(); + + // Verify the last check time and peers match the expected values + assert_eq!(last_check_time, expected_last_check_time); + assert_eq!(last_check_peers, expected_last_check_peers); + } - // Determine the latency for the peer - let latency = if empty_latency { None } else { Some(i as f64) }; + /// Verifies that the subscription time out matches the expected value + fn verify_subscription_time_out(subscription: &ConsensusObserverSubscription, timed_out: bool) { + // Check if the subscription has timed out + let result = subscription.check_subscription_timeout(); - // Create a new peer and metadata - let (peer_network_id, peer_metadata) = - create_peer_and_metadata(latency, distance, support_consensus_observer); - peers_and_metadata.insert(peer_network_id, peer_metadata); + // Verify the result + if timed_out { + assert_matches!(result, Err(Error::SubscriptionTimeout(_))); + } else { + assert!(result.is_ok()); } - peers_and_metadata } - /// Verifies that the distance and latencies for the peers are in - /// increasing order (with the distance taking precedence over the latency). - fn verify_increasing_distance_latencies( + /// Verifies that the peer optimality matches the expected value + fn verify_subscription_peer_optimality( + subscription: &mut ConsensusObserverSubscription, peers_and_metadata: &HashMap, - sorted_peers: &[PeerNetworkId], + is_optimal: bool, ) { - let mut previous_latency = None; - let mut previous_distance = 0; - for sorted_peer in sorted_peers { - // Get the distance and latency for the peer - let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); - let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap(); - let latency = get_latency_for_peer(sorted_peer, peer_metadata); - - // Verify the order of the peers - if distance == previous_distance { - if let Some(latency) = latency { - if let Some(previous_latency) = previous_latency { - assert!(latency >= previous_latency); - } - } - } else { - assert!(distance > previous_distance); - } + // Check the subscription peer optimality + let result = subscription.check_subscription_peer_optimality(peers_and_metadata); - // Update the previous latency and distance - previous_latency = latency; - previous_distance = distance; + // Verify the result + if is_optimal { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionSuboptimal(_))); } } + + /// Verifies that the syncing progress is as expected + fn verify_subscription_syncing_progress( + subscription: &mut ConsensusObserverSubscription, + first_synced_version: Version, + time: Instant, + ) { + assert!(subscription.check_syncing_progress().is_ok()); + assert_eq!( + subscription.highest_synced_version_and_time, + (first_synced_version, time) + ); + } } diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs index 2f124e5841cd3..24ae1f7d321b4 100644 --- a/consensus/src/consensus_observer/observer/subscription_manager.rs +++ b/consensus/src/consensus_observer/observer/subscription_manager.rs @@ -13,20 +13,27 @@ use crate::consensus_observer::{ ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse, }, }, - observer::{subscription, subscription::ConsensusObserverSubscription}, + observer::{subscription::ConsensusObserverSubscription, subscription_utils}, publisher::consensus_publisher::ConsensusPublisher, }; use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; -use aptos_logger::{error, info, warn}; +use aptos_infallible::Mutex; +use aptos_logger::{info, warn}; use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata}; use aptos_storage_interface::DbReader; use aptos_time_service::TimeService; +use itertools::Itertools; use std::{collections::HashMap, sync::Arc}; +use tokio::task::JoinHandle; /// The manager for consensus observer subscriptions pub struct SubscriptionManager { - // The currently active consensus observer subscription - active_observer_subscription: Option, + // The currently active set of consensus observer subscriptions + active_observer_subscriptions: + Arc>>, + + // The active subscription creation task (if one is currently running) + active_subscription_creation_task: Arc>>>, // The consensus observer client to send network messages consensus_observer_client: @@ -56,7 +63,8 @@ impl SubscriptionManager { time_service: TimeService, ) -> Self { Self { - active_observer_subscription: None, + active_observer_subscriptions: Arc::new(Mutex::new(HashMap::new())), + active_subscription_creation_task: Arc::new(Mutex::new(None)), consensus_observer_client, consensus_observer_config, consensus_publisher, @@ -65,244 +73,224 @@ impl SubscriptionManager { } } - /// Checks if the active subscription is still healthy. If not, an error is returned. - fn check_active_subscription(&mut self) -> Result<(), Error> { - let active_observer_subscription = self.active_observer_subscription.take(); - if let Some(mut active_subscription) = active_observer_subscription { - // Check if the peer for the subscription is still connected - let peer_network_id = active_subscription.get_peer_network_id(); - let peer_still_connected = self - .get_connected_peers_and_metadata() - .map_or(false, |peers_and_metadata| { - peers_and_metadata.contains_key(&peer_network_id) - }); - - // Verify the peer is still connected - if !peer_still_connected { - return Err(Error::SubscriptionDisconnected( - "The peer is no longer connected!".to_string(), - )); - } - - // Verify the subscription has not timed out - active_subscription.check_subscription_timeout()?; - - // Verify that the DB is continuing to sync and commit new data - active_subscription.check_syncing_progress()?; - - // Verify that the subscription peer is optimal - if let Some(peers_and_metadata) = self.get_connected_peers_and_metadata() { - active_subscription.check_subscription_peer_optimality(peers_and_metadata)?; - } - - // The subscription seems healthy, we can keep it - self.active_observer_subscription = Some(active_subscription); + /// Checks if the subscription to the given peer is still healthy. + /// If not, an error explaining why it is unhealthy is returned. + fn check_subscription_health( + &mut self, + connected_peers_and_metadata: &HashMap, + peer_network_id: PeerNetworkId, + ) -> Result<(), Error> { + // Get the active subscription for the peer + let mut active_observer_subscriptions = self.active_observer_subscriptions.lock(); + let active_subscription = active_observer_subscriptions.get_mut(&peer_network_id); + + // Check the health of the subscription + match active_subscription { + Some(active_subscription) => { + active_subscription.check_subscription_health(connected_peers_and_metadata) + }, + None => Err(Error::UnexpectedError(format!( + "The subscription to peer: {:?} is not active!", + peer_network_id + ))), } + } - Ok(()) + /// Checks the health of the active subscriptions. If any subscription is + /// unhealthy, it will be terminated and new subscriptions will be created. + /// This returns an error iff all subscriptions were unhealthy and terminated. + pub async fn check_and_manage_subscriptions(&mut self) -> Result<(), Error> { + // Get the subscription and connected peers + let initial_subscription_peers = self.get_active_subscription_peers(); + let connected_peers_and_metadata = self.get_connected_peers_and_metadata(); + + // Terminate any unhealthy subscriptions + let terminated_subscriptions = + self.terminate_unhealthy_subscriptions(&connected_peers_and_metadata); + + // Check if all subscriptions were terminated + let num_terminated_subscriptions = terminated_subscriptions.len(); + let all_subscriptions_terminated = num_terminated_subscriptions > 0 + && num_terminated_subscriptions == initial_subscription_peers.len(); + + // Calculate the number of new subscriptions to create + let remaining_subscription_peers = self.get_active_subscription_peers(); + let max_concurrent_subscriptions = + self.consensus_observer_config.max_concurrent_subscriptions as usize; + let num_subscriptions_to_create = + max_concurrent_subscriptions.saturating_sub(remaining_subscription_peers.len()); + + // Update the total subscription metrics + update_total_subscription_metrics(&remaining_subscription_peers); + + // Spawn a task to create the new subscriptions (asynchronously) + self.spawn_subscription_creation_task( + num_subscriptions_to_create, + remaining_subscription_peers, + terminated_subscriptions, + connected_peers_and_metadata, + ) + .await; + + // Return an error if all subscriptions were terminated + if all_subscriptions_terminated { + Err(Error::SubscriptionsReset(format!( + "All {:?} subscriptions were unhealthy and terminated!", + num_terminated_subscriptions, + ))) + } else { + Ok(()) + } } - /// Checks the health of the active subscription. If the subscription is - /// unhealthy, it will be terminated and a new subscription will be created. - /// This returns true iff a new subscription was created. - pub async fn check_and_manage_subscriptions(&mut self) -> bool { - // Get the peer ID of the currently active subscription (if any) - let active_subscription_peer = self - .active_observer_subscription - .as_ref() - .map(|subscription| subscription.get_peer_network_id()); + /// Returns the currently active subscription peers + fn get_active_subscription_peers(&self) -> Vec { + let active_observer_subscriptions = self.active_observer_subscriptions.lock(); + active_observer_subscriptions.keys().cloned().collect() + } - // If we have an active subscription, verify that the subscription - // is still healthy. If not, the subscription should be terminated. - if let Some(active_subscription_peer) = active_subscription_peer { - if let Err(error) = self.check_active_subscription() { - // Log the subscription termination + /// Gets the connected peers and metadata. If an error + /// occurred, it is logged and an empty map is returned. + fn get_connected_peers_and_metadata(&self) -> HashMap { + self.consensus_observer_client + .get_peers_and_metadata() + .get_connected_peers_and_metadata() + .unwrap_or_else(|error| { + // Log the error warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Terminating subscription to peer: {:?}! Error: {:?}", - active_subscription_peer, error + "Failed to get connected peers and metadata! Error: {:?}", + error )) ); - // Unsubscribe from the peer - self.unsubscribe_from_peer(active_subscription_peer); - - // Update the subscription termination metrics - self.update_subscription_termination_metrics(active_subscription_peer, error); - } - } - - // If we don't have a subscription, we should select a new peer to - // subscribe to. If we had a previous subscription (and it was - // terminated) it should be excluded from the selection process. - if self.active_observer_subscription.is_none() { - // Create a new observer subscription - self.create_new_observer_subscription(active_subscription_peer) - .await; - - // If we successfully created a new subscription, update the metrics - if let Some(active_subscription) = &self.active_observer_subscription { - // Update the subscription creation metrics - self.update_subscription_creation_metrics( - active_subscription.get_peer_network_id(), - ); - - return true; // A new subscription was created - } - } - - false // No new subscription was created + // Return an empty map + HashMap::new() + }) } - /// Creates a new observer subscription by sending subscription requests to - /// appropriate peers and waiting for a successful response. If `previous_subscription_peer` - /// is provided, it will be excluded from the selection process. - async fn create_new_observer_subscription( + /// Spawns a new subscription creation task to create + /// the specified number of new subscriptions. + async fn spawn_subscription_creation_task( &mut self, - previous_subscription_peer: Option, + num_subscriptions_to_create: usize, + active_subscription_peers: Vec, + terminated_subscriptions: Vec<(PeerNetworkId, Error)>, + connected_peers_and_metadata: HashMap, ) { - // Get a set of sorted peers to service our subscription request - let sorted_peers = match self.sort_peers_for_subscription(previous_subscription_peer) { - Some(sorted_peers) => sorted_peers, - None => { - error!(LogSchema::new(LogEntry::ConsensusObserver) - .message("Failed to sort peers for subscription requests!")); - return; - }, - }; - - // Verify that we have potential peers - if sorted_peers.is_empty() { - warn!(LogSchema::new(LogEntry::ConsensusObserver) - .message("There are no peers to subscribe to!")); + // If there are no new subscriptions to create, return early + if num_subscriptions_to_create == 0 { return; } - // Go through the sorted peers and attempt to subscribe to a single peer. - // The first peer that responds successfully will be the selected peer. - for selected_peer in &sorted_peers { - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Attempting to subscribe to peer: {}!", - selected_peer - )) - ); - - // Send a subscription request to the peer and wait for the response. - // Note: it is fine to block here because we assume only a single active subscription. - let subscription_request = ConsensusObserverRequest::Subscribe; - let request_timeout_ms = self.consensus_observer_config.network_request_timeout_ms; - let response = self - .consensus_observer_client - .send_rpc_request_to_peer(selected_peer, subscription_request, request_timeout_ms) - .await; - - // Process the response and update the active subscription - match response { - Ok(ConsensusObserverResponse::SubscribeAck) => { - info!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Successfully subscribed to peer: {}!", - selected_peer - )) - ); - - // Update the active subscription - let subscription = ConsensusObserverSubscription::new( - self.consensus_observer_config, - self.db_reader.clone(), - *selected_peer, - self.time_service.clone(), - ); - self.active_observer_subscription = Some(subscription); - - return; // Return after successfully subscribing - }, - Ok(response) => { - // We received an invalid response - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Got unexpected response type: {:?}", - response.get_label() - )) - ); - }, - Err(error) => { - // We encountered an error while sending the request - error!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to send subscription request to peer: {}! Error: {:?}", - selected_peer, error - )) - ); - }, + // If there is an active subscription creation task, return early + if let Some(subscription_creation_task) = &*self.active_subscription_creation_task.lock() { + if !subscription_creation_task.is_finished() { + return; // The task is still running } } - // We failed to connect to any peers - warn!( - LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to subscribe to any peers! Num peers attempted: {:?}", - sorted_peers.len() - )) - ); - } + // Clone the shared state for the task + let active_observer_subscriptions = self.active_observer_subscriptions.clone(); + let consensus_observer_config = self.consensus_observer_config; + let consensus_observer_client = self.consensus_observer_client.clone(); + let consensus_publisher = self.consensus_publisher.clone(); + let db_reader = self.db_reader.clone(); + let time_service = self.time_service.clone(); + + // Spawn a new subscription creation task + let subscription_creation_task = tokio::spawn(async move { + // Identify the terminated subscription peers + let terminated_subscription_peers = terminated_subscriptions + .iter() + .map(|(peer, _)| *peer) + .collect(); + + // Create the new subscriptions + let new_subscriptions = subscription_utils::create_new_subscriptions( + consensus_observer_config, + consensus_observer_client, + consensus_publisher, + db_reader, + time_service, + connected_peers_and_metadata, + num_subscriptions_to_create, + active_subscription_peers, + terminated_subscription_peers, + ) + .await; + + // Identify the new subscription peers + let new_subscription_peers = new_subscriptions + .iter() + .map(|subscription| subscription.get_peer_network_id()) + .collect::>(); + + // Add the new subscriptions to the list of active subscriptions + for subscription in new_subscriptions { + active_observer_subscriptions + .lock() + .insert(subscription.get_peer_network_id(), subscription); + } - /// Gets the connected peers and metadata. If an error occurred, - /// it is logged and None is returned. - fn get_connected_peers_and_metadata(&self) -> Option> { - match self - .consensus_observer_client - .get_peers_and_metadata() - .get_connected_peers_and_metadata() - { - Ok(connected_peers_and_metadata) => Some(connected_peers_and_metadata), - Err(error) => { - error!( + // Log a warning if we failed to create as many subscriptions as requested + let num_subscriptions_created = new_subscription_peers.len(); + if num_subscriptions_created < num_subscriptions_to_create { + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( - "Failed to get connected peers and metadata! Error: {:?}", - error + "Failed to create the requested number of subscriptions! Number of subscriptions \ + requested: {:?}, number of subscriptions created: {:?}.", + num_subscriptions_to_create, + num_subscriptions_created )) ); - None - }, - } + } + + // Update the subscription change metrics + update_subscription_change_metrics(new_subscription_peers, terminated_subscriptions); + }); + + // Update the active subscription creation task + *self.active_subscription_creation_task.lock() = Some(subscription_creation_task); } - /// Produces a list of sorted peers to service our subscription request. - /// Note: if `previous_subscription_peer` is provided, it will be excluded - /// from the selection process. Likewise, all peers currently subscribed to us - /// will be excluded from the selection process. - fn sort_peers_for_subscription( + /// Terminates any unhealthy subscriptions and returns the list of terminated subscriptions + fn terminate_unhealthy_subscriptions( &mut self, - previous_subscription_peer: Option, - ) -> Option> { - if let Some(mut peers_and_metadata) = self.get_connected_peers_and_metadata() { - // Remove the previous subscription peer (if provided) - if let Some(previous_subscription_peer) = previous_subscription_peer { - let _ = peers_and_metadata.remove(&previous_subscription_peer); - } - - // Remove any peers that are currently subscribed to us - if let Some(consensus_publisher) = &self.consensus_publisher { - for peer_network_id in consensus_publisher.get_active_subscribers() { - let _ = peers_and_metadata.remove(&peer_network_id); - } - } + connected_peers_and_metadata: &HashMap, + ) -> Vec<(PeerNetworkId, Error)> { + let mut terminated_subscriptions = vec![]; + for subscription_peer in self.get_active_subscription_peers() { + // Check the health of the subscription and terminate it if needed + if let Err(error) = + self.check_subscription_health(connected_peers_and_metadata, subscription_peer) + { + // Log the subscription termination error + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Terminating subscription to peer: {:?}! Termination reason: {:?}", + subscription_peer, error + )) + ); - // Sort the peers by subscription optimality - let sorted_peers = - subscription::sort_peers_by_subscription_optimality(&peers_and_metadata); + // Unsubscribe from the peer and remove the subscription + self.unsubscribe_from_peer(subscription_peer); - // Return the sorted peers - Some(sorted_peers) - } else { - None // No connected peers were found + // Add the peer to the list of terminated subscriptions + terminated_subscriptions.push((subscription_peer, error)); + } } + + terminated_subscriptions } /// Unsubscribes from the given peer by sending an unsubscribe request - fn unsubscribe_from_peer(&self, peer_network_id: PeerNetworkId) { + fn unsubscribe_from_peer(&mut self, peer_network_id: PeerNetworkId) { + // Remove the peer from the active subscriptions + self.active_observer_subscriptions + .lock() + .remove(&peer_network_id); + // Send an unsubscribe request to the peer and process the response. // Note: we execute this asynchronously, as we don't need to wait for the response. let consensus_observer_client = self.consensus_observer_client.clone(); @@ -339,7 +327,7 @@ impl SubscriptionManager { }, Err(error) => { // We encountered an error while sending the request - error!( + warn!( LogSchema::new(LogEntry::ConsensusObserver).message(&format!( "Failed to send unsubscribe request to peer: {}! Error: {:?}", peer_network_id, error @@ -350,64 +338,68 @@ impl SubscriptionManager { }); } - /// Updates the subscription creation metrics for the given peer - fn update_subscription_creation_metrics(&self, peer_network_id: PeerNetworkId) { - // Set the number of active subscriptions - metrics::set_gauge( - &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, - &peer_network_id.network_id(), - 1, - ); + /// Verifies that the message is from an active + /// subscription. If not, an error is returned. + pub fn verify_message_for_subscription( + &mut self, + message_sender: PeerNetworkId, + ) -> Result<(), Error> { + // Check if the message is from an active subscription + if let Some(active_subscription) = self + .active_observer_subscriptions + .lock() + .get_mut(&message_sender) + { + // Update the last message receive time and return early + active_subscription.update_last_message_receive_time(); + return Ok(()); + } + + // Otherwise, the message is not from an active subscription. + // Send another unsubscribe request, and return an error. + self.unsubscribe_from_peer(message_sender); + Err(Error::InvalidMessageError(format!( + "Received message from unexpected peer, and not an active subscription: {}!", + message_sender + ))) + } +} - // Update the number of created subscriptions - metrics::increment_request_counter( +/// Updates the subscription creation and termination metrics +fn update_subscription_change_metrics( + new_subscription_peers: Vec, + terminated_subscription_peers: Vec<(PeerNetworkId, Error)>, +) { + // Update the created subscriptions metrics + for peer_network_id in new_subscription_peers { + metrics::increment_counter( &metrics::OBSERVER_CREATED_SUBSCRIPTIONS, metrics::CREATED_SUBSCRIPTION_LABEL, &peer_network_id, ); } - /// Updates the subscription termination metrics for the given peer - fn update_subscription_termination_metrics( - &self, - peer_network_id: PeerNetworkId, - error: Error, - ) { - // Reset the number of active subscriptions - metrics::set_gauge( - &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, - &peer_network_id.network_id(), - 0, - ); - - // Update the number of terminated subscriptions - metrics::increment_request_counter( + // Update the terminated subscriptions metrics + for (peer_network_id, termination_reason) in terminated_subscription_peers { + metrics::increment_counter( &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS, - error.get_label(), + termination_reason.get_label(), &peer_network_id, ); } +} - /// Verifies that the message sender is the currently subscribed peer. - /// If the sender is not the subscribed peer, an error is returned. - pub fn verify_message_sender(&mut self, message_sender: PeerNetworkId) -> Result<(), Error> { - if let Some(active_subscription) = &mut self.active_observer_subscription { - active_subscription - .verify_message_sender(&message_sender) - .map_err(|error| { - // Send another unsubscription request to the peer (in case the previous was lost) - self.unsubscribe_from_peer(message_sender); - error - }) - } else { - // Send another unsubscription request to the peer (in case the previous was lost) - self.unsubscribe_from_peer(message_sender); - - Err(Error::UnexpectedError(format!( - "Received message from unexpected peer: {}! No active subscription found!", - message_sender - ))) - } +/// Updates the total subscription metrics (grouped by network ID) +fn update_total_subscription_metrics(active_subscription_peers: &[PeerNetworkId]) { + for (network_id, active_subscription_peers) in &active_subscription_peers + .iter() + .chunk_by(|peer_network_id| peer_network_id.network_id()) + { + metrics::set_gauge( + &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS, + &network_id, + active_subscription_peers.collect::>().len() as i64, + ); } } @@ -439,7 +431,96 @@ mod test { } #[tokio::test] - async fn test_check_active_subscription_connected() { + async fn test_check_and_manage_subscriptions() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (peers_and_metadata, consensus_observer_client) = + create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + time_service.clone(), + ); + + // Verify that no subscriptions are active + verify_active_subscription_peers(&subscription_manager, vec![]); + + // Check and manage the subscriptions + let result = subscription_manager.check_and_manage_subscriptions().await; + + // Verify that no subscriptions were terminated + assert!(result.is_ok()); + verify_active_subscription_peers(&subscription_manager, vec![]); + + // Add a new connected peer and subscription + let connected_peer_1 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + connected_peer_1, + time_service.clone(), + ); + + // Add another connected peer and subscription + let connected_peer_2 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 2, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + connected_peer_2, + TimeService::mock(), // Use a different time service (to avoid timeouts!) + ); + + // Check and manage the subscriptions + subscription_manager + .check_and_manage_subscriptions() + .await + .unwrap(); + + // Verify that the subscriptions are still active + verify_active_subscription_peers(&subscription_manager, vec![ + connected_peer_1, + connected_peer_2, + ]); + + // Elapse time to simulate a timeout for peer 1 + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Check and manage the subscriptions + subscription_manager + .check_and_manage_subscriptions() + .await + .unwrap(); + + // Verify that the first subscription was terminated + verify_active_subscription_peers(&subscription_manager, vec![connected_peer_2]); + + // Disconnect the second peer + remove_peer_and_connection(peers_and_metadata.clone(), connected_peer_2); + + // Check and manage the subscriptions + let result = subscription_manager.check_and_manage_subscriptions().await; + + // Verify that the second subscription was terminated and an error was returned + verify_active_subscription_peers(&subscription_manager, vec![]); + assert_matches!(result, Err(Error::SubscriptionsReset(_))); + } + + #[tokio::test] + async fn test_check_subscription_health_connected() { // Create a consensus observer client let network_id = NetworkId::Public; let (peers_and_metadata, consensus_observer_client) = @@ -457,20 +538,20 @@ mod test { ); // Create a new subscription - let observer_subscription = ConsensusObserverSubscription::new( + let peer_network_id = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, consensus_observer_config, db_reader.clone(), - PeerNetworkId::random(), + peer_network_id, TimeService::mock(), ); - subscription_manager.active_observer_subscription = Some(observer_subscription); - // Check the active subscription and verify that it is removed (the peer is not connected) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionDisconnected(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it unhealthy (the peer is not connected) + check_subscription_connection(&mut subscription_manager, peer_network_id, false); + + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![peer_network_id]); // Add a new connected peer let connected_peer = @@ -485,14 +566,18 @@ mod test { TimeService::mock(), ); - // Check the active subscription and verify that it is still active (the peer is connected) - assert!(subscription_manager.check_active_subscription().is_ok()); - let active_subscription = subscription_manager.active_observer_subscription.unwrap(); - assert_eq!(active_subscription.get_peer_network_id(), connected_peer); + // Check the active subscription is still healthy + check_subscription_connection(&mut subscription_manager, connected_peer, true); + + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + + // Verify that the active subscription is still present + verify_active_subscription_peers(&subscription_manager, vec![connected_peer]); } #[tokio::test] - async fn test_check_active_subscription_progress_stopped() { + async fn test_check_subscription_health_progress_stopped() { // Create a consensus observer config let consensus_observer_config = ConsensusObserverConfig { max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out @@ -528,22 +613,30 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_progress(&mut subscription_manager, connected_peer, true); + + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse time to simulate a DB progress error let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_synced_version_timeout_ms + 1, )); - // Check the active subscription and verify that it is removed (the DB is not syncing) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionProgressStopped(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the DB is not syncing) + check_subscription_progress(&mut subscription_manager, connected_peer, false); + + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]); + + // Verify the active subscription is no longer present + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] - async fn test_check_active_subscription_timeout() { + async fn test_check_subscription_health_timeout() { // Create a consensus observer client let network_id = NetworkId::Public; let (peers_and_metadata, consensus_observer_client) = @@ -574,25 +667,34 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_timeout(&mut subscription_manager, connected_peer, true); + + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse time to simulate a timeout let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.max_subscription_timeout_ms + 1, )); - // Check the active subscription and verify that it is removed (the subscription timed out) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionTimeout(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the subscription timed out) + check_subscription_timeout(&mut subscription_manager, connected_peer, false); + + // Terminate unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]); + + // Verify the active subscription is no longer present + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] - async fn test_check_active_subscription_suboptimal() { + async fn test_check_subscription_health_suboptimal() { // Create a consensus observer config let consensus_observer_config = ConsensusObserverConfig { max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out + max_concurrent_subscriptions: 1, // Only allow one subscription max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors ..ConsensusObserverConfig::default() }; @@ -618,7 +720,7 @@ mod test { // Add a suboptimal validator peer let suboptimal_peer = - create_peer_and_connection(network_id, peers_and_metadata.clone(), 0, None, true); + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); // Create a new subscription to the suboptimal peer create_observer_subscription( @@ -629,106 +731,175 @@ mod test { time_service.clone(), ); + // Check the active subscription and verify that it is healthy + check_subscription_optimality(&mut subscription_manager, suboptimal_peer, true); + + // Terminate unhealthy subscriptions and verify none are removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + // Elapse enough time to trigger the peer optimality check let mock_time_service = time_service.clone().into_mock(); mock_time_service.advance(Duration::from_millis( consensus_observer_config.subscription_peer_change_interval_ms + 1, )); - // Check the active subscription and verify that it is removed (the peer is suboptimal) - assert_matches!( - subscription_manager.check_active_subscription(), - Err(Error::SubscriptionSuboptimal(_)) - ); - assert!(subscription_manager.active_observer_subscription.is_none()); + // Check the active subscription and verify that it is unhealthy (the peer is suboptimal) + check_subscription_optimality(&mut subscription_manager, suboptimal_peer, false); + + // Elapse enough time to trigger the peer optimality check again + let mock_time_service = time_service.clone().into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.subscription_refresh_interval_ms + 1, + )); + + // Terminate any unhealthy subscriptions and verify the subscription was removed + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![suboptimal_peer]); + + // Verify the active subscription is no longer present + verify_active_subscription_peers(&subscription_manager, vec![]); } #[tokio::test] - async fn test_sort_peers_for_subscription() { + #[allow(clippy::await_holding_lock)] // Required to wait on the subscription creation task + async fn test_spawn_subscription_creation_task() { // Create a consensus observer client - let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; - let (peers_and_metadata, consensus_observer_client) = - create_consensus_observer_client(network_ids); + let network_id = NetworkId::Public; + let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); // Create a new subscription manager let consensus_observer_config = ConsensusObserverConfig::default(); let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); let mut subscription_manager = SubscriptionManager::new( consensus_observer_client, consensus_observer_config, None, db_reader.clone(), - TimeService::mock(), + time_service.clone(), ); - // Sort the peers for a subscription and verify that no peers are returned - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); - assert!(sorted_peers.is_empty()); - - // Add a connected validator peer, VFN peer and public peer - for network_id in network_ids { - let distance_from_validators = match network_id { - NetworkId::Validator => 0, - NetworkId::Vfn => 1, - NetworkId::Public => 2, - }; - create_peer_and_connection( - *network_id, - peers_and_metadata.clone(), - distance_from_validators, - None, - true, - ); + // Verify that the active subscription creation task is empty + verify_subscription_creation_task(&subscription_manager, false); + + // Spawn a subscription creation task with 0 subscriptions to create + subscription_manager + .spawn_subscription_creation_task(0, vec![], vec![], hashmap![]) + .await; + + // Verify that the active subscription creation task is still empty (no task was spawned) + verify_subscription_creation_task(&subscription_manager, false); + + // Spawn a subscription creation task with 1 subscription to create + subscription_manager + .spawn_subscription_creation_task(1, vec![], vec![], hashmap![]) + .await; + + // Verify that the active subscription creation task is now populated + verify_subscription_creation_task(&subscription_manager, true); + + // Wait for the active subscription creation task to finish + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_mut() + { + active_task.await.unwrap(); } - // Sort the peers for a subscription and verify the ordering (according to distance) - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); - assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator); - assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn); - assert_eq!(sorted_peers[2].network_id(), NetworkId::Public); - assert_eq!(sorted_peers.len(), 3); - - // Sort the peers, but mark the validator as the last subscribed peer - let previous_subscription_peer = sorted_peers[0]; - let sorted_peer_subset = subscription_manager - .sort_peers_for_subscription(Some(previous_subscription_peer)) - .unwrap(); - assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn); - assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public); - assert_eq!(sorted_peer_subset.len(), 2); + // Verify that the active subscription creation task is still present + verify_subscription_creation_task(&subscription_manager, true); - // Remove all the peers and verify that no peers are returned - for peer_network_id in sorted_peers { - remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id); + // Verify that the active subscription creation task is finished + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_ref() + { + assert!(active_task.is_finished()); } - // Add multiple validator peers, with different latencies - let mut validator_peers = vec![]; - for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] { - let validator_peer = create_peer_and_connection( - NetworkId::Validator, - peers_and_metadata.clone(), - 0, - Some(ping_latency_secs), - true, + // Spawn a subscription creation task with 2 subscriptions to create + subscription_manager + .spawn_subscription_creation_task(2, vec![], vec![], hashmap![]) + .await; + + // Verify the new active subscription creation task is not finished + if let Some(active_task) = subscription_manager + .active_subscription_creation_task + .lock() + .as_ref() + { + assert!(!active_task.is_finished()); + }; + } + + #[tokio::test] + async fn test_terminate_unhealthy_subscriptions_multiple() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (peers_and_metadata, consensus_observer_client) = + create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = create_mock_db_reader(); + let time_service = TimeService::mock(); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + time_service.clone(), + ); + + // Create two new subscriptions + let subscription_peer_1 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + let subscription_peer_2 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + for peer in &[subscription_peer_1, subscription_peer_2] { + // Create the subscription + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + *peer, + time_service.clone(), ); - validator_peers.push(validator_peer); } - // Sort the peers for a subscription and verify the ordering (according to latency) - let sorted_peers = subscription_manager - .sort_peers_for_subscription(None) - .unwrap(); - let expected_peers = validator_peers.into_iter().rev().collect::>(); - assert_eq!(sorted_peers, expected_peers); + // Terminate unhealthy subscriptions and verify that both subscriptions are still healthy + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]); + + // Create another subscription + let subscription_peer_3 = + create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer_3, + TimeService::mock(), // Use a different time service (to avoid timeouts) + ); + + // Elapse time to simulate a timeout (on the first two subscriptions) + let mock_time_service = time_service.into_mock(); + mock_time_service.advance(Duration::from_millis( + consensus_observer_config.max_subscription_timeout_ms + 1, + )); + + // Terminate unhealthy subscriptions and verify the first two subscriptions were terminated + verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![ + subscription_peer_1, + subscription_peer_2, + ]); + + // Verify the third subscription is still active + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_3]); } #[tokio::test] - async fn test_verify_message_sender() { + async fn test_unsubscribe_from_peer() { // Create a consensus observer client let network_id = NetworkId::Public; let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); @@ -744,10 +915,68 @@ mod test { TimeService::mock(), ); - // Check that message verification fails (we have no active subscription) - assert!(subscription_manager - .verify_message_sender(PeerNetworkId::random()) - .is_err()); + // Verify that no subscriptions are active + verify_active_subscription_peers(&subscription_manager, vec![]); + + // Create a new subscription + let subscription_peer_1 = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer_1, + TimeService::mock(), + ); + + // Verify the subscription is active + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_1]); + + // Create another subscription + let subscription_peer_2 = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + subscription_peer_2, + TimeService::mock(), + ); + + // Verify the second subscription is active + verify_active_subscription_peers(&subscription_manager, vec![ + subscription_peer_1, + subscription_peer_2, + ]); + + // Unsubscribe from the first peer + subscription_manager.unsubscribe_from_peer(subscription_peer_1); + + // Verify that the first subscription is no longer active + verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_2]); + } + + #[tokio::test] + async fn test_verify_message_for_subscription() { + // Create a consensus observer client + let network_id = NetworkId::Public; + let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]); + + // Create a new subscription manager + let consensus_observer_config = ConsensusObserverConfig::default(); + let db_reader = Arc::new(MockDatabaseReader::new()); + let mut subscription_manager = SubscriptionManager::new( + consensus_observer_client, + consensus_observer_config, + None, + db_reader.clone(), + TimeService::mock(), + ); + + // Check that message verification fails (we have no active subscriptions) + check_message_verification_result( + &mut subscription_manager, + PeerNetworkId::random(), + false, + ); // Create a new subscription let subscription_peer = PeerNetworkId::random(); @@ -759,15 +988,125 @@ mod test { TimeService::mock(), ); - // Check that message verification fails if the peer doesn't match the subscription - assert!(subscription_manager - .verify_message_sender(PeerNetworkId::random()) - .is_err()); + // Check that message verification passes for the subscription + check_message_verification_result(&mut subscription_manager, subscription_peer, true); + + // Create another subscription + let second_subscription_peer = PeerNetworkId::random(); + create_observer_subscription( + &mut subscription_manager, + consensus_observer_config, + db_reader.clone(), + second_subscription_peer, + TimeService::mock(), + ); + + // Check that message verification passes for the second subscription + check_message_verification_result( + &mut subscription_manager, + second_subscription_peer, + true, + ); + + // Check that message verification fails if the peer doesn't match either subscription + check_message_verification_result( + &mut subscription_manager, + PeerNetworkId::random(), + false, + ); + } + + /// Checks the result of verifying a message from a given peer + fn check_message_verification_result( + subscription_manager: &mut SubscriptionManager, + peer_network_id: PeerNetworkId, + pass_verification: bool, + ) { + // Verify the message for the given peer + let result = subscription_manager.verify_message_for_subscription(peer_network_id); + + // Ensure the result matches the expected value + if pass_verification { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::InvalidMessageError(_))); + } + } + + /// Checks the health of a subscription and verifies the connection status + fn check_subscription_connection( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_connected: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected connection status + if expect_connected { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionDisconnected(_))); + } + } + + /// Checks the health of a subscription and verifies the optimality status + fn check_subscription_optimality( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_optimal: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected optimality status + if expect_optimal { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionSuboptimal(_))); + } + } + + /// Checks the health of a subscription and verifies the progress status + fn check_subscription_progress( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_progress: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected progress status + if expect_progress { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionProgressStopped(_))); + } + } - // Check that message verification passes if the peer matches the subscription - assert!(subscription_manager - .verify_message_sender(subscription_peer) - .is_ok()); + /// Checks the health of a subscription and verifies the timeout status + fn check_subscription_timeout( + subscription_manager: &mut SubscriptionManager, + subscription_peer: PeerNetworkId, + expect_timeout: bool, + ) { + // Check the health of the subscription + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + let result = subscription_manager + .check_subscription_health(&connected_peers_and_metadata, subscription_peer); + + // Check the result based on the expected timeout status + if expect_timeout { + assert!(result.is_ok()); + } else { + assert_matches!(result, Err(Error::SubscriptionTimeout(_))); + } } /// Creates a new consensus observer client and a peers and metadata container @@ -808,7 +1147,10 @@ mod test { subscription_peer, time_service, ); - subscription_manager.active_observer_subscription = Some(observer_subscription); + subscription_manager + .active_observer_subscriptions + .lock() + .insert(subscription_peer, observer_subscription); } /// Creates a new peer with the specified connection metadata @@ -879,4 +1221,53 @@ mod test { .remove_peer_metadata(peer_network_id, connection_id) .unwrap(); } + + /// Verifies the active subscription peers + fn verify_active_subscription_peers( + subscription_manager: &SubscriptionManager, + expected_active_peers: Vec, + ) { + // Get the active subscription peers + let active_peers = subscription_manager.get_active_subscription_peers(); + + // Verify the active subscription peers + for peer in &expected_active_peers { + assert!(active_peers.contains(peer)); + } + assert_eq!(active_peers.len(), expected_active_peers.len()); + } + + /// Verifies the status of the active subscription creation task + fn verify_subscription_creation_task( + subscription_manager: &SubscriptionManager, + expect_active_task: bool, + ) { + let current_active_task = subscription_manager + .active_subscription_creation_task + .lock() + .is_some(); + assert_eq!(current_active_task, expect_active_task); + } + + /// Verifies the list of terminated unhealthy subscriptions + fn verify_terminated_unhealthy_subscriptions( + subscription_manager: &mut SubscriptionManager, + expected_terminated_peers: Vec, + ) { + // Get the connected peers and metadata + let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata(); + + // Terminate any unhealthy subscriptions + let terminated_subscriptions = + subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata); + + // Verify the terminated subscriptions + for (terminated_subscription_peer, _) in &terminated_subscriptions { + assert!(expected_terminated_peers.contains(terminated_subscription_peer)); + } + assert_eq!( + terminated_subscriptions.len(), + expected_terminated_peers.len() + ); + } } diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs new file mode 100644 index 0000000000000..0bca7c61b007d --- /dev/null +++ b/consensus/src/consensus_observer/observer/subscription_utils.rs @@ -0,0 +1,1186 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use crate::consensus_observer::{ + common::logging::{LogEntry, LogSchema}, + network::{ + observer_client::ConsensusObserverClient, + observer_message::{ + ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse, + }, + }, + observer::subscription::ConsensusObserverSubscription, + publisher::consensus_publisher::ConsensusPublisher, +}; +use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId}; +use aptos_logger::{error, info, warn}; +use aptos_network::{ + application::{interface::NetworkClient, metadata::PeerMetadata}, + ProtocolId, +}; +use aptos_storage_interface::DbReader; +use aptos_time_service::TimeService; +use ordered_float::OrderedFloat; +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; + +// A useful constant for representing the maximum ping latency +const MAX_PING_LATENCY_SECS: f64 = 10_000.0; + +/// Attempts to create the given number of new subscriptions +/// from the connected peers and metadata. Any active or unhealthy +/// subscriptions are excluded from the selection process. +pub async fn create_new_subscriptions( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + consensus_publisher: Option>, + db_reader: Arc, + time_service: TimeService, + connected_peers_and_metadata: HashMap, + num_subscriptions_to_create: usize, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, +) -> Vec { + // Sort the potential peers for subscription requests + let mut sorted_potential_peers = match sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + active_subscription_peers, + consensus_publisher, + ) { + Some(sorted_peers) => sorted_peers, + None => { + error!(LogSchema::new(LogEntry::ConsensusObserver) + .message("Failed to sort peers for subscription requests!")); + return vec![]; + }, + }; + + // Verify that we have potential peers to subscribe to + if sorted_potential_peers.is_empty() { + warn!(LogSchema::new(LogEntry::ConsensusObserver) + .message("There are no potential peers to subscribe to!")); + return vec![]; + } + + // Go through the potential peers and attempt to create new subscriptions + let mut created_subscriptions = vec![]; + for _ in 0..num_subscriptions_to_create { + // If there are no peers left to subscribe to, return early + if sorted_potential_peers.is_empty() { + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "There are no more potential peers to subscribe to! \ + Num created subscriptions: {:?}", + created_subscriptions.len() + )) + ); + break; + } + + // Attempt to create a new subscription + let (observer_subscription, failed_subscription_peers) = create_single_subscription( + consensus_observer_config, + consensus_observer_client.clone(), + db_reader.clone(), + sorted_potential_peers.clone(), + time_service.clone(), + ) + .await; + + // Remove the failed peers from the sorted list + sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer)); + + // Process a successful subscription creation + if let Some(observer_subscription) = observer_subscription { + // Remove the peer from the sorted list (for the next selection) + sorted_potential_peers + .retain(|peer| *peer != observer_subscription.get_peer_network_id()); + + // Add the newly created subscription to the subscription list + created_subscriptions.push(observer_subscription); + } + } + + // Return the list of created subscriptions + created_subscriptions +} + +/// Attempts to create a new subscription to a single peer from the +/// sorted list of potential peers. If successful, the new subscription +/// is returned, alongside any peers with failed attempts. +async fn create_single_subscription( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + db_reader: Arc, + sorted_potential_peers: Vec, + time_service: TimeService, +) -> (Option, Vec) { + let mut peers_with_failed_attempts = vec![]; + for potential_peer in sorted_potential_peers { + // Log the subscription attempt + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Attempting to subscribe to potential peer: {}!", + potential_peer + )) + ); + + // Send a subscription request to the peer and wait for the response + let subscription_request = ConsensusObserverRequest::Subscribe; + let request_timeout_ms = consensus_observer_config.network_request_timeout_ms; + let response = consensus_observer_client + .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms) + .await; + + // Process the response and update the active subscription + match response { + Ok(ConsensusObserverResponse::SubscribeAck) => { + // Log the successful subscription + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Successfully subscribed to peer: {}!", + potential_peer + )) + ); + + // Create the new subscription + let subscription = ConsensusObserverSubscription::new( + consensus_observer_config, + db_reader.clone(), + potential_peer, + time_service.clone(), + ); + + // Return the successful subscription + return (Some(subscription), peers_with_failed_attempts); + }, + Ok(response) => { + // We received an invalid response + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Got unexpected response type for subscription request: {:?}", + response.get_label() + )) + ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); + }, + Err(error) => { + // We encountered an error while sending the request + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Failed to send subscription request to peer: {}! Error: {:?}", + potential_peer, error + )) + ); + + // Add the peer to the list of failed attempts + peers_with_failed_attempts.push(potential_peer); + }, + } + } + + // We failed to create a new subscription + (None, peers_with_failed_attempts) +} + +/// Gets the distance from the validators for the specified peer from the peer metadata +fn get_distance_for_peer( + peer_network_id: &PeerNetworkId, + peer_metadata: &PeerMetadata, +) -> Option { + // Get the distance for the peer + let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); + let distance = peer_monitoring_metadata + .latest_network_info_response + .as_ref() + .map(|response| response.distance_from_validators); + + // If the distance is missing, log a warning + if distance.is_none() { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Unable to get distance for peer! Peer: {:?}", + peer_network_id + )) + ); + } + + distance +} + +/// Gets the latency for the specified peer from the peer metadata +fn get_latency_for_peer( + peer_network_id: &PeerNetworkId, + peer_metadata: &PeerMetadata, +) -> Option { + // Get the latency for the peer + let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata(); + let latency = peer_monitoring_metadata.average_ping_latency_secs; + + // If the latency is missing, log a warning + if latency.is_none() { + warn!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Unable to get latency for peer! Peer: {:?}", + peer_network_id + )) + ); + } + + latency +} + +/// Produces a list of sorted peers to service the subscription requests. +/// Any active or unhealthy subscriptions are excluded from the selection process. +/// Likewise, any peers currently subscribed to us are also excluded. +fn sort_peers_for_subscriptions( + mut connected_peers_and_metadata: HashMap, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, + consensus_publisher: Option>, +) -> Option> { + // Remove any peers we're already subscribed to + for active_subscription_peer in active_subscription_peers { + let _ = connected_peers_and_metadata.remove(&active_subscription_peer); + } + + // Remove any unhealthy subscription peers + for unhealthy_peer in unhealthy_subscription_peers { + let _ = connected_peers_and_metadata.remove(&unhealthy_peer); + } + + // Remove any peers that are currently subscribed to us + if let Some(consensus_publisher) = consensus_publisher { + for peer_network_id in consensus_publisher.get_active_subscribers() { + let _ = connected_peers_and_metadata.remove(&peer_network_id); + } + } + + // Sort the peers by subscription optimality + let sorted_peers = sort_peers_by_subscription_optimality(&connected_peers_and_metadata); + + // Return the sorted peers + Some(sorted_peers) +} + +/// Sorts the peers by subscription optimality (in descending order of +/// optimality). This requires: (i) sorting the peers by distance from the +/// validator set and ping latency (lower values are more optimal); and (ii) +/// filtering out peers that don't support consensus observer. +/// +/// Note: we prioritize distance over latency as we want to avoid close +/// but not up-to-date peers. If peers don't have sufficient metadata +/// for sorting, they are given a lower priority. +pub fn sort_peers_by_subscription_optimality( + peers_and_metadata: &HashMap, +) -> Vec { + // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)] + let mut unsupported_peers = Vec::new(); + let mut peers_and_latencies_by_distance = BTreeMap::new(); + for (peer_network_id, peer_metadata) in peers_and_metadata { + // Verify that the peer supports consensus observer + if !supports_consensus_observer(peer_metadata) { + unsupported_peers.push(*peer_network_id); + continue; // Skip the peer + } + + // Get the distance and latency for the peer + let distance = get_distance_for_peer(peer_network_id, peer_metadata); + let latency = get_latency_for_peer(peer_network_id, peer_metadata); + + // If the distance is not found, use the maximum distance + let distance = + distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS); + + // If the latency is not found, use a large latency + let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS); + + // Add the peer and latency to the distance group + peers_and_latencies_by_distance + .entry(distance) + .or_insert_with(Vec::new) + .push((*peer_network_id, OrderedFloat(latency))); + } + + // If there are peers that don't support consensus observer, log them + if !unsupported_peers.is_empty() { + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Found {} peers that don't support consensus observer! Peers: {:?}", + unsupported_peers.len(), + unsupported_peers + )) + ); + } + + // Sort the peers by distance and latency. Note: BTreeMaps are + // sorted by key, so the entries will be sorted by distance in ascending order. + let mut sorted_peers = Vec::new(); + for (_, mut peers_and_latencies) in peers_and_latencies_by_distance { + // Sort the peers by latency + peers_and_latencies.sort_by_key(|(_, latency)| *latency); + + // Add the peers to the sorted list (in sorted order) + sorted_peers.extend( + peers_and_latencies + .into_iter() + .map(|(peer_network_id, _)| peer_network_id), + ); + } + + // Log the sorted peers + info!( + LogSchema::new(LogEntry::ConsensusObserver).message(&format!( + "Sorted {} peers by subscription optimality! Peers: {:?}", + sorted_peers.len(), + sorted_peers + )) + ); + + sorted_peers +} + +/// Returns true iff the peer metadata indicates support for consensus observer +fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool { + peer_metadata.supports_protocol(ProtocolId::ConsensusObserver) + && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc) +} + +#[cfg(test)] +mod tests { + use super::*; + use aptos_channels::{aptos_channel, message_queues::QueueStyle}; + use aptos_config::{config::PeerRole, network_id::NetworkId}; + use aptos_netcore::transport::ConnectionOrigin; + use aptos_network::{ + application::storage::PeersAndMetadata, + peer_manager::{ConnectionRequestSender, PeerManagerRequest, PeerManagerRequestSender}, + protocols::{ + network::{NetworkSender, NewNetworkSender}, + wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet}, + }, + transport::{ConnectionId, ConnectionMetadata}, + }; + use aptos_peer_monitoring_service_types::{ + response::NetworkInformationResponse, PeerMonitoringMetadata, + }; + use aptos_storage_interface::Result; + use aptos_types::{network_address::NetworkAddress, transaction::Version, PeerId}; + use bytes::Bytes; + use futures::StreamExt; + use mockall::mock; + use std::collections::HashSet; + + // This is a simple mock of the DbReader (it generates a MockDatabaseReader) + mock! { + pub DatabaseReader {} + impl DbReader for DatabaseReader { + fn get_latest_ledger_info_version(&self) -> Result; + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_new_subscriptions() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + get_distance_from_validators(network_id), + None, + true, + ); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Spawn the subscription creation task to create 2 subscriptions + let num_subscriptions_to_create = 2; + let subscription_creation_handle = tokio::spawn(async move { + create_new_subscriptions( + consensus_observer_config, + consensus_observer_client.clone(), + None, + Arc::new(MockDatabaseReader::new()), + TimeService::mock(), + connected_peers_and_metadata, + num_subscriptions_to_create, + vec![], + vec![], + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task. + // The VFN peer should fail the subscription request. + for connected_peer in &connected_peers { + let network_id = connected_peer.network_id(); + handle_next_subscription_request( + network_id, + &mut peer_manager_request_receivers, + network_id != NetworkId::Vfn, // The VFN peer should fail the subscription request + ) + .await; + } + + // Wait for the subscription creation task to complete + let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap(); + + // Verify the number of created subscriptions + assert_eq!( + consensus_observer_subscriptions.len(), + num_subscriptions_to_create + ); + + // Verify the created subscription peers + let first_peer = *connected_peers.first().unwrap(); + let last_peer = *connected_peers.last().unwrap(); + let expected_subscription_peers = [first_peer, last_peer]; + for consensus_observer_subscription in consensus_observer_subscriptions { + let peer_network_id = consensus_observer_subscription.get_peer_network_id(); + assert!(expected_subscription_peers.contains(&peer_network_id)); + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_new_subscriptions_multiple() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + get_distance_from_validators(network_id), + None, + true, + ); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Create multiple sets of subscriptions and verify the results + for num_subscriptions_to_create in [0, 1, 2, 3, 10] { + // Determine the expected subscription peers + let expected_subscription_peers = connected_peers + .iter() + .take(num_subscriptions_to_create) + .cloned() + .collect(); + + // Create the subscriptions and verify the result + create_and_verify_subscriptions( + consensus_observer_config, + peers_and_metadata.clone(), + consensus_observer_client.clone(), + &mut peer_manager_request_receivers, + num_subscriptions_to_create, + expected_subscription_peers, + ) + .await; + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_create_single_subscription() { + // Create a consensus observer config and client + let consensus_observer_config = ConsensusObserverConfig::default(); + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) = + create_consensus_observer_client(network_ids); + + // Create a list of connected peers (one per network) + let mut connected_peers = vec![]; + for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] { + // Create a new peer + let peer_network_id = + create_peer_and_connection(*network_id, peers_and_metadata.clone(), 0, None, true); + + // Add the peer to the list of sorted peers + connected_peers.push(peer_network_id); + } + + // Spawn the subscription creation task + let sorted_potential_peers = connected_peers.clone(); + let subscription_creation_handle = tokio::spawn(async move { + create_single_subscription( + consensus_observer_config, + consensus_observer_client.clone(), + Arc::new(MockDatabaseReader::new()), + sorted_potential_peers, + TimeService::mock(), + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task. + // We should only respond successfully to the peer on the public network. + handle_next_subscription_request( + NetworkId::Validator, + &mut peer_manager_request_receivers, + false, + ) + .await; + handle_next_subscription_request( + NetworkId::Vfn, + &mut peer_manager_request_receivers, + false, + ) + .await; + handle_next_subscription_request( + NetworkId::Public, + &mut peer_manager_request_receivers, + true, + ) + .await; + + // Wait for the subscription creation task to complete + let (observer_subscription, failed_subscription_peers) = + subscription_creation_handle.await.unwrap(); + + // Verify that the public peer was successfully subscribed to + assert_eq!( + &observer_subscription.unwrap().get_peer_network_id(), + connected_peers.last().unwrap() + ); + + // Verify that the other peers failed our subscription attempts + let expected_failed_peers = connected_peers.iter().take(2).cloned().collect::>(); + assert_eq!(failed_subscription_peers, expected_failed_peers); + } + + #[test] + fn test_sort_peers_by_distance_and_latency() { + // Sort an empty list of peers + let peers_and_metadata = HashMap::new(); + assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); + + // Create a list of peers with empty metadata + let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with valid metadata + let peers_and_metadata = create_peers_and_metadata(false, false, true, 10); + + // Sort the peers + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + + // Verify the order of the peers + verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with and without metadata + let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10); + peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10)); + peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10)); + peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10)); + + // Sort the peers + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 40); + + // Verify the order of the first 20 peers + let (first_20_peers, sorted_peers) = sorted_peers.split_at(20); + verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers); + + // Verify that the next 10 peers only have latency metadata + let (next_10_peers, sorted_peers) = sorted_peers.split_at(10); + for sorted_peer in next_10_peers { + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); + assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some()); + } + + // Verify that the last 10 peers have no metadata + let (last_10_peers, remaining_peers) = sorted_peers.split_at(10); + for sorted_peer in last_10_peers { + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none()); + assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none()); + } + assert!(remaining_peers.is_empty()); + } + + #[test] + fn test_sort_peers_by_distance_and_latency_filter() { + // Sort an empty list of peers + let peers_and_metadata = HashMap::new(); + assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty()); + + // Create a list of peers with empty metadata (with consensus observer support) + let peers_and_metadata = create_peers_and_metadata(true, true, true, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 10); + + // Create a list of peers with empty metadata (without consensus observer support) + let peers_and_metadata = create_peers_and_metadata(true, true, false, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert!(sorted_peers.is_empty()); + + // Create a list of peers with valid metadata (without consensus observer support) + let peers_and_metadata = create_peers_and_metadata(false, false, false, 10); + + // Sort the peers and verify the results + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert!(sorted_peers.is_empty()); + + // Create a list of peers with empty metadata (with and without consensus observer support) + let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5); + peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50)); + + // Sort the peers and verify the results (only the supported peers are sorted) + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 5); + + // Create a list of peers with valid metadata (with and without consensus observer support) + let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50); + peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10)); + + // Sort the peers and verify the results (only the supported peers are sorted) + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers.len(), 50); + + // Create a list of peers with valid metadata (with and without consensus observer support) + let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1); + let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1); + let mut peers_and_metadata = HashMap::new(); + peers_and_metadata.extend(supported_peer_and_metadata.clone()); + peers_and_metadata.extend(unsupported_peer_and_metadata); + + // Sort the peers and verify the results (only the supported peer is sorted) + let supported_peer = supported_peer_and_metadata.keys().next().unwrap(); + let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata); + assert_eq!(sorted_peers, vec![*supported_peer]); + } + + #[tokio::test] + async fn test_sort_peers_for_subscriptions() { + // Create a consensus observer client + let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public]; + let (peers_and_metadata, consensus_observer_client, _) = + create_consensus_observer_client(network_ids); + + // Create a consensus publisher + let consensus_observer_config = ConsensusObserverConfig::default(); + let (consensus_publisher, _) = + ConsensusPublisher::new(consensus_observer_config, consensus_observer_client.clone()); + let consensus_publisher = Arc::new(consensus_publisher); + + // Sort the peers and verify that no peers are returned + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert!(sorted_peers.is_empty()); + + // Add a connected validator peer, VFN peer and public peer + for network_id in network_ids { + create_peer_and_connection( + *network_id, + peers_and_metadata.clone(), + get_distance_from_validators(network_id), + None, + true, + ); + } + + // Sort the peers and verify the ordering (according to distance) + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator); + assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peers[2].network_id(), NetworkId::Public); + assert_eq!(sorted_peers.len(), 3); + + // Sort the peers, but mark the validator as unhealthy (so it's ignored) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![sorted_peers[0]], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public); + assert_eq!(sorted_peer_subset.len(), 2); + + // Sort the peers, but mark the VFN and validator as active subscriptions (so they're ignored) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![sorted_peers[0], sorted_peers[1]], + vec![], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public); + assert_eq!(sorted_peer_subset.len(), 1); + + // Create a consensus publisher with the PFN as an active subscriber + let consensus_publisher_with_subscribers = + Arc::new(ConsensusPublisher::new_with_active_subscribers( + consensus_observer_config, + consensus_observer_client.clone(), + HashSet::from_iter(vec![sorted_peers[2]]), + )); + + // Sort the peers, and verify the PFN is ignored (since it's an active subscriber) + let sorted_peer_subset = sort_subscription_peers( + consensus_publisher_with_subscribers, + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Validator); + assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Vfn); + assert_eq!(sorted_peer_subset.len(), 2); + + // Remove all the peers and verify that no peers are returned upon sorting + for peer_network_id in sorted_peers { + remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id); + } + let sorted_peers = sort_subscription_peers( + consensus_publisher.clone(), + peers_and_metadata.clone(), + vec![], + vec![], + ); + assert!(sorted_peers.is_empty()); + + // Add multiple validator peers, with different latencies + let mut validator_peers = vec![]; + for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] { + let validator_peer = create_peer_and_connection( + NetworkId::Validator, + peers_and_metadata.clone(), + 0, + Some(ping_latency_secs), + true, + ); + validator_peers.push(validator_peer); + } + + // Sort the peers and verify the ordering (according to latency) + let sorted_peers = sort_subscription_peers( + consensus_publisher, + peers_and_metadata.clone(), + vec![], + vec![], + ); + let expected_peers = validator_peers.into_iter().rev().collect::>(); + assert_eq!(sorted_peers, expected_peers); + } + + /// Creates new subscriptions and verifies the results + async fn create_and_verify_subscriptions( + consensus_observer_config: ConsensusObserverConfig, + peers_and_metadata: Arc, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + peer_manager_request_receivers: &mut HashMap< + NetworkId, + aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>, + >, + num_subscriptions_to_create: usize, + expected_subscription_peers: Vec, + ) { + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Spawn the subscription creation task + let subscription_creation_handle = tokio::spawn(async move { + create_new_subscriptions( + consensus_observer_config, + consensus_observer_client.clone(), + None, + Arc::new(MockDatabaseReader::new()), + TimeService::mock(), + connected_peers_and_metadata, + num_subscriptions_to_create, + vec![], + vec![], + ) + .await + }); + + // Handle the peer manager requests made by the subscription creation task + for expected_subscription_peer in &expected_subscription_peers { + handle_next_subscription_request( + expected_subscription_peer.network_id(), + peer_manager_request_receivers, + true, + ) + .await; + } + + // Wait for the subscription creation task to complete + let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap(); + + // Verify the created subscriptions + assert_eq!( + consensus_observer_subscriptions.len(), + expected_subscription_peers.len() + ); + for subscription in consensus_observer_subscriptions { + assert!(expected_subscription_peers.contains(&subscription.get_peer_network_id())); + } + } + + /// Creates a new connection metadata for testing + fn create_connection_metadata( + peer_network_id: PeerNetworkId, + support_consensus_observer: bool, + ) -> ConnectionMetadata { + if support_consensus_observer { + // Create a protocol set that supports consensus observer + let protocol_set = ProtocolIdSet::from_iter(vec![ + ProtocolId::ConsensusObserver, + ProtocolId::ConsensusObserverRpc, + ]); + + // Create the connection metadata with the protocol set + ConnectionMetadata::new( + peer_network_id.peer_id(), + ConnectionId::default(), + NetworkAddress::mock(), + ConnectionOrigin::Inbound, + MessagingProtocolVersion::V1, + protocol_set, + PeerRole::PreferredUpstream, + ) + } else { + ConnectionMetadata::mock(peer_network_id.peer_id()) + } + } + + /// Creates a new consensus observer client, along with the + /// associated network senders and peers and metadata. + fn create_consensus_observer_client( + network_ids: &[NetworkId], + ) -> ( + Arc, + Arc>>, + HashMap>, + ) { + // Create the network senders and receivers for each network + let mut network_senders = HashMap::new(); + let mut peer_manager_request_receivers = HashMap::new(); + for network_id in network_ids { + // Create the request managers + let queue_cfg = aptos_channel::Config::new(10).queue_style(QueueStyle::FIFO); + let (peer_manager_request_sender, peer_manager_request_receiver) = queue_cfg.build(); + let (connected_request_sender, _) = queue_cfg.build(); + + // Create the network sender + let network_sender = NetworkSender::new( + PeerManagerRequestSender::new(peer_manager_request_sender), + ConnectionRequestSender::new(connected_request_sender), + ); + + // Save the network sender and the request receiver + network_senders.insert(*network_id, network_sender); + peer_manager_request_receivers.insert(*network_id, peer_manager_request_receiver); + } + + // Create the network client + let peers_and_metadata = PeersAndMetadata::new(network_ids); + let network_client = NetworkClient::new( + vec![ProtocolId::ConsensusObserver], + vec![ProtocolId::ConsensusObserverRpc], + network_senders, + peers_and_metadata.clone(), + ); + + // Create the consensus observer client + let consensus_observer_client = Arc::new(ConsensusObserverClient::new(network_client)); + + ( + peers_and_metadata, + consensus_observer_client, + peer_manager_request_receivers, + ) + } + + /// Creates a new peer with the specified connection metadata + fn create_peer_and_connection( + network_id: NetworkId, + peers_and_metadata: Arc, + distance_from_validators: u64, + ping_latency_secs: Option, + support_consensus_observer: bool, + ) -> PeerNetworkId { + // Create the connection metadata + let peer_network_id = PeerNetworkId::new(network_id, PeerId::random()); + let connection_metadata = if support_consensus_observer { + // Create a protocol set that supports consensus observer + let protocol_set = ProtocolIdSet::from_iter(vec![ + ProtocolId::ConsensusObserver, + ProtocolId::ConsensusObserverRpc, + ]); + + // Create the connection metadata with the protocol set + ConnectionMetadata::new( + peer_network_id.peer_id(), + ConnectionId::default(), + NetworkAddress::mock(), + ConnectionOrigin::Inbound, + MessagingProtocolVersion::V1, + protocol_set, + PeerRole::PreferredUpstream, + ) + } else { + ConnectionMetadata::mock(peer_network_id.peer_id()) + }; + + // Insert the connection into peers and metadata + peers_and_metadata + .insert_connection_metadata(peer_network_id, connection_metadata.clone()) + .unwrap(); + + // Update the peer monitoring metadata + let latest_network_info_response = NetworkInformationResponse { + connected_peers: BTreeMap::new(), + distance_from_validators, + }; + let monitoring_metdata = PeerMonitoringMetadata::new( + ping_latency_secs, + ping_latency_secs, + Some(latest_network_info_response), + None, + None, + ); + peers_and_metadata + .update_peer_monitoring_metadata(peer_network_id, monitoring_metdata.clone()) + .unwrap(); + + peer_network_id + } + + /// Creates a new peer and metadata for testing + fn create_peer_and_metadata( + latency: Option, + distance_from_validators: Option, + support_consensus_observer: bool, + ) -> (PeerNetworkId, PeerMetadata) { + // Create a random peer + let peer_network_id = PeerNetworkId::random(); + + // Create a new peer metadata with the given latency and distance + let connection_metadata = + create_connection_metadata(peer_network_id, support_consensus_observer); + let network_information_response = + distance_from_validators.map(|distance| NetworkInformationResponse { + connected_peers: BTreeMap::new(), + distance_from_validators: distance, + }); + let peer_monitoring_metadata = + PeerMonitoringMetadata::new(latency, None, network_information_response, None, None); + let peer_metadata = + PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata); + + (peer_network_id, peer_metadata) + } + + /// Creates a list of peers and metadata for testing + fn create_peers_and_metadata( + empty_latency: bool, + empty_distance: bool, + support_consensus_observer: bool, + num_peers: u64, + ) -> HashMap { + let mut peers_and_metadata = HashMap::new(); + for i in 1..num_peers + 1 { + // Determine the distance for the peer + let distance = if empty_distance { None } else { Some(i) }; + + // Determine the latency for the peer + let latency = if empty_latency { None } else { Some(i as f64) }; + + // Create a new peer and metadata + let (peer_network_id, peer_metadata) = + create_peer_and_metadata(latency, distance, support_consensus_observer); + peers_and_metadata.insert(peer_network_id, peer_metadata); + } + peers_and_metadata + } + + /// Returns the distance from the validators for the specified network + fn get_distance_from_validators(network_id: &NetworkId) -> u64 { + match network_id { + NetworkId::Validator => 0, + NetworkId::Vfn => 1, + NetworkId::Public => 2, + } + } + + /// Fetches and handles the next subscription request from the peer manager + async fn handle_next_subscription_request( + network_id: NetworkId, + peer_manager_request_receivers: &mut HashMap< + NetworkId, + aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>, + >, + return_successfully: bool, + ) { + // Get the request receiver for the given network + let peer_manager_request_receiver = + peer_manager_request_receivers.get_mut(&network_id).unwrap(); + + // Wait for the next subscription request + match peer_manager_request_receiver.next().await { + Some(PeerManagerRequest::SendRpc(_, network_request)) => { + // Parse the network request + let data = network_request.data; + let response_sender = network_request.res_tx; + let message: ConsensusObserverMessage = bcs::from_bytes(data.as_ref()).unwrap(); + + // Process the network message + match message { + ConsensusObserverMessage::Request(request) => { + // Verify the request is for a new subscription + match request { + ConsensusObserverRequest::Subscribe => (), + _ => panic!( + "Unexpected consensus observer request received: {:?}!", + request + ), + } + + // Determine the response to send + let response = if return_successfully { + // Ack the subscription request + ConsensusObserverResponse::SubscribeAck + } else { + // Respond with the wrong message type + ConsensusObserverResponse::UnsubscribeAck + }; + let response_message = ConsensusObserverMessage::Response(response); + + // Send the response to the peer + let response_bytes = + bcs::to_bytes(&response_message).map(Bytes::from).unwrap(); + let _ = response_sender.send(Ok(response_bytes)); + }, + _ => panic!( + "Unexpected consensus observer message type received: {:?}!", + message + ), + } + }, + Some(PeerManagerRequest::SendDirectSend(_, _)) => { + panic!("Unexpected direct send message received!") + }, + None => panic!("No subscription request received!"), + } + } + + /// Removes the peer and connection metadata for the given peer + fn remove_peer_and_connection( + peers_and_metadata: Arc, + peer_network_id: PeerNetworkId, + ) { + let peer_metadata = peers_and_metadata + .get_metadata_for_peer(peer_network_id) + .unwrap(); + let connection_id = peer_metadata.get_connection_metadata().connection_id; + peers_and_metadata + .remove_peer_metadata(peer_network_id, connection_id) + .unwrap(); + } + + /// A simple helper method that sorts the given peers for a subscription + fn sort_subscription_peers( + consensus_publisher: Arc, + peers_and_metadata: Arc, + active_subscription_peers: Vec, + unhealthy_subscription_peers: Vec, + ) -> Vec { + // Get the connected peers and metadata + let connected_peers_and_metadata = peers_and_metadata + .get_connected_peers_and_metadata() + .unwrap(); + + // Sort the peers for subscription requests + sort_peers_for_subscriptions( + connected_peers_and_metadata, + unhealthy_subscription_peers, + active_subscription_peers, + Some(consensus_publisher), + ) + .unwrap() + } + + /// Verifies that the distance and latencies for the peers are in + /// increasing order (with the distance taking precedence over the latency). + fn verify_increasing_distance_latencies( + peers_and_metadata: &HashMap, + sorted_peers: &[PeerNetworkId], + ) { + let mut previous_latency = None; + let mut previous_distance = 0; + for sorted_peer in sorted_peers { + // Get the distance and latency for the peer + let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap(); + let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap(); + let latency = get_latency_for_peer(sorted_peer, peer_metadata); + + // Verify the order of the peers + if distance == previous_distance { + if let Some(latency) = latency { + if let Some(previous_latency) = previous_latency { + assert!(latency >= previous_latency); + } + } + } else { + assert!(distance > previous_distance); + } + + // Update the previous latency and distance + previous_latency = latency; + previous_distance = distance; + } + } +} diff --git a/consensus/src/consensus_observer/publisher/consensus_publisher.rs b/consensus/src/consensus_observer/publisher/consensus_publisher.rs index 11e2f63aa92de..899901593f7ed 100644 --- a/consensus/src/consensus_observer/publisher/consensus_publisher.rs +++ b/consensus/src/consensus_observer/publisher/consensus_publisher.rs @@ -70,6 +70,26 @@ impl ConsensusPublisher { (consensus_publisher, outbound_message_receiver) } + #[cfg(test)] + /// Creates a new consensus publisher with the given active subscribers + pub fn new_with_active_subscribers( + consensus_observer_config: ConsensusObserverConfig, + consensus_observer_client: Arc< + ConsensusObserverClient>, + >, + active_subscribers: HashSet, + ) -> Self { + // Create the consensus publisher + let (consensus_publisher, _) = + ConsensusPublisher::new(consensus_observer_config, consensus_observer_client); + + // Update the active subscribers + *consensus_publisher.active_subscribers.write() = active_subscribers; + + // Return the publisher + consensus_publisher + } + /// Adds the given subscriber to the set of active subscribers fn add_active_subscriber(&self, peer_network_id: PeerNetworkId) { self.active_subscribers.write().insert(peer_network_id); @@ -150,7 +170,7 @@ impl ConsensusPublisher { let (peer_network_id, message, response_sender) = network_message.into_parts(); // Update the RPC request counter - metrics::increment_request_counter( + metrics::increment_counter( &metrics::PUBLISHER_RECEIVED_REQUESTS, message.get_label(), &peer_network_id, diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 214506e6f92bc..1af6f4f8c6da1 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -662,9 +662,9 @@ pub static ORDER_VOTE_ADDED: Lazy = Lazy::new(|| { .unwrap() }); -pub static ORDER_VOTE_VERY_OLD: Lazy = Lazy::new(|| { +pub static ORDER_VOTE_NOT_IN_RANGE: Lazy = Lazy::new(|| { register_int_counter!( - "aptos_consensus_order_vote_very_old", + "aptos_consensus_order_vote_not_in_range", "Count of the number of order votes that are very old" ) .unwrap() diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index 411d24c7ac2fa..334b0a76fbf4e 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -29,7 +29,7 @@ use aptos_consensus_types::{ }; use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_infallible::Mutex; -use aptos_logger::{error, info, sample, sample::SampleRate, warn}; +use aptos_logger::{error, sample, sample::SampleRate, warn}; use aptos_types::{on_chain_config::ValidatorTxnConfig, validator_txn::ValidatorTransaction}; use aptos_validator_transaction_pool as vtxn_pool; use futures::future::BoxFuture; @@ -203,7 +203,7 @@ impl PipelineBackpressureConfig { PROPOSER_ESTIMATED_CALIBRATED_BLOCK_TXNS.observe(calibrated_block_size as f64); // Check if calibrated block size is reduction in size, to turn on backpressure. if max_block_txns > calibrated_block_size { - info!( + warn!( block_execution_times = format!("{:?}", block_execution_times), estimated_calibrated_block_sizes = format!("{:?}", sizes), calibrated_block_size = calibrated_block_size, diff --git a/consensus/src/network.rs b/consensus/src/network.rs index 698e089638513..517c01fce472c 100644 --- a/consensus/src/network.rs +++ b/consensus/src/network.rs @@ -346,7 +346,7 @@ impl NetworkSender { if self.author == peer { let self_msg = Event::Message(self.author, msg.clone()); if let Err(err) = self_sender.send(self_msg).await { - error!(error = ?err, "Error delivering a self msg"); + warn!(error = ?err, "Error delivering a self msg"); } continue; } diff --git a/consensus/src/payload_manager.rs b/consensus/src/payload_manager.rs index 4749efb10c643..c2e7c580fb9b3 100644 --- a/consensus/src/payload_manager.rs +++ b/consensus/src/payload_manager.rs @@ -471,7 +471,7 @@ async fn get_transactions_for_observer( }; // If the payload is valid, publish it to any downstream observers - let transaction_payload = block_payload.transaction_payload; + let transaction_payload = block_payload.transaction_payload(); if let Some(consensus_publisher) = consensus_publisher { let message = ConsensusObserverMessage::new_block_payload_message( block.gen_block_info(HashValue::zero(), 0, None), diff --git a/consensus/src/pending_order_votes.rs b/consensus/src/pending_order_votes.rs index d8fe2a21484ab..9ca52a7d47f9c 100644 --- a/consensus/src/pending_order_votes.rs +++ b/consensus/src/pending_order_votes.rs @@ -9,10 +9,9 @@ use aptos_logger::prelude::*; use aptos_types::{ epoch_state::EpochState, ledger_info::{ - LedgerInfo, LedgerInfoWithMixedSignatures, LedgerInfoWithSignatures, VerificationStatus, + LedgerInfo, LedgerInfoWithUnverifiedSignatures, LedgerInfoWithSignatures, VerificationStatus, }, validator_verifier::VerifyError, -}; use std::{collections::HashMap, sync::Arc}; /// Result of the order vote processing. The failure case (Verification error) is returned @@ -23,7 +22,8 @@ pub enum OrderVoteReceptionResult { /// QC currently has. VoteAdded(u128), /// This block has just been certified after adding the vote. - NewLedgerInfoWithSignatures(LedgerInfoWithSignatures), + /// Returns the created order certificate and the QC on which the order certificate is based. + NewLedgerInfoWithSignatures((Arc, LedgerInfoWithSignatures)), /// There might be some issues adding a vote ErrorAddingVote(VerifyError), /// Error happens when aggregating signature @@ -35,14 +35,16 @@ pub enum OrderVoteReceptionResult { #[derive(Debug, PartialEq, Eq)] enum OrderVoteStatus { EnoughVotes(LedgerInfoWithSignatures), - NotEnoughVotes(LedgerInfoWithMixedSignatures), + NotEnoughVotes(LedgerInfoWithUnverifiedSignatures), } /// A PendingVotes structure keep track of order votes for the last few rounds pub struct PendingOrderVotes { /// Maps LedgerInfo digest to associated signatures. /// Order vote status stores caches the information on whether the votes are enough to form a QC. - li_digest_to_votes: HashMap, + /// We also store the QC that the order votes certify. + li_digest_to_votes: + HashMap, } impl PendingOrderVotes { @@ -53,6 +55,10 @@ impl PendingOrderVotes { } } + pub fn exists(&self, li_digest: &HashValue) -> bool { + self.li_digest_to_votes.contains_key(li_digest) + } + /// Add a vote to the pending votes // TODO: Should we add any counters here? pub fn insert_order_vote( @@ -60,22 +66,32 @@ impl PendingOrderVotes { order_vote: &OrderVote, epoch_state: Arc, verification_status: VerificationStatus, + verified_quorum_cert: Option, ) -> OrderVoteReceptionResult { // derive data from order vote let li_digest = order_vote.ledger_info().hash(); // obtain the ledger info with signatures associated to the order vote's ledger info - let status = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| { + let (quorum_cert, status) = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| { // if the ledger info with signatures doesn't exist yet, create it - OrderVoteStatus::NotEnoughVotes(LedgerInfoWithMixedSignatures::new( - order_vote.ledger_info().clone(), - )) + ( + verified_quorum_cert.expect( + "Quorum Cert is expected when creating a new entry in pending order votes", + ), + OrderVoteStatus::NotEnoughVotes(LedgerInfoWithUnverifiedSignatures::new( + order_vote.ledger_info().clone(), + PartialSignatures::empty(), + )), + ) }); match status { OrderVoteStatus::EnoughVotes(li_with_sig) => { // we already have enough votes for this ledger info - OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig.clone()) + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + Arc::new(quorum_cert.clone()), + li_with_sig.clone(), + )) }, OrderVoteStatus::NotEnoughVotes(li_with_sig) => { // we don't have enough votes for this ledger info yet @@ -118,9 +134,10 @@ impl PendingOrderVotes { Ok(ledger_info_with_sig) => { *status = OrderVoteStatus::EnoughVotes(ledger_info_with_sig.clone()); - OrderVoteReceptionResult::NewLedgerInfoWithSignatures( + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + Arc::new(quorum_cert.clone()), ledger_info_with_sig, - ) + )) }, Err(e) => OrderVoteReceptionResult::ErrorAggregatingSignature(e), } @@ -142,19 +159,21 @@ impl PendingOrderVotes { // Removes votes older than highest_ordered_round pub fn garbage_collect(&mut self, highest_ordered_round: u64) { - self.li_digest_to_votes.retain(|_, status| match status { - OrderVoteStatus::EnoughVotes(li_with_sig) => { - li_with_sig.ledger_info().round() > highest_ordered_round - }, - OrderVoteStatus::NotEnoughVotes(li_with_sig) => { - li_with_sig.ledger_info().round() > highest_ordered_round - }, - }); + self.li_digest_to_votes + .retain(|_, (_, status)| match status { + OrderVoteStatus::EnoughVotes(li_with_sig) => { + li_with_sig.ledger_info().round() > highest_ordered_round + }, + OrderVoteStatus::NotEnoughVotes(li_with_sig) => { + li_with_sig.ledger_info().round() > highest_ordered_round + }, + }); } pub fn has_enough_order_votes(&self, ledger_info: &LedgerInfo) -> bool { let li_digest = ledger_info.hash(); - if let Some(OrderVoteStatus::EnoughVotes(_)) = self.li_digest_to_votes.get(&li_digest) { + if let Some((_, OrderVoteStatus::EnoughVotes(_))) = self.li_digest_to_votes.get(&li_digest) + { return true; } false @@ -194,6 +213,7 @@ mod tests { // create random vote from validator[0] let li1 = random_ledger_info(); + let qc = QuorumCert::dummy(); let order_vote_1_author_0 = OrderVote::new_with_signature( signers[0].author(), li1.clone(), @@ -205,7 +225,8 @@ mod tests { pending_order_votes.insert_order_vote( &order_vote_1_author_0, epoch_state.clone(), - VerificationStatus::Verified + VerificationStatus::Verified, + Some(qc.clone()) ), OrderVoteReceptionResult::VoteAdded(1) ); @@ -215,7 +236,8 @@ mod tests { pending_order_votes.insert_order_vote( &order_vote_1_author_0, epoch_state.clone(), - VerificationStatus::Verified + VerificationStatus::Verified, + Some(qc.clone()) ), OrderVoteReceptionResult::VoteAdded(1) ); @@ -232,6 +254,7 @@ mod tests { &order_vote_2_author_1, epoch_state.clone(), VerificationStatus::Verified + Some(qc.clone()) ), OrderVoteReceptionResult::VoteAdded(1) ); @@ -248,6 +271,7 @@ mod tests { &order_vote_2_author_2, epoch_state.clone(), VerificationStatus::Verified, + Some(qc.clone()), ) { OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig) => { assert!(li_with_sig diff --git a/consensus/src/pending_votes.rs b/consensus/src/pending_votes.rs index 03f0a3c41b115..35087a9ddfa31 100644 --- a/consensus/src/pending_votes.rs +++ b/consensus/src/pending_votes.rs @@ -19,8 +19,12 @@ use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_logger::prelude::*; use aptos_types::{ epoch_state::EpochState, - ledger_info::{LedgerInfoWithMixedSignatures, LedgerInfoWithSignatures, VerificationStatus}, + ledger_info::{LedgerInfoWithUnverifiedSignatures, LedgerInfoWithSignatures, VerificationStatus}, validator_verifier::VerifyError, +use std::{ + collections::{BTreeMap, HashMap}, + fmt, + sync::Arc, }; use std::{collections::HashMap, fmt, sync::Arc}; @@ -139,7 +143,7 @@ impl PendingVotes { let (hash_index, status) = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| { ( len, - VoteStatus::NotEnoughVotes(LedgerInfoWithMixedSignatures::new( + VoteStatus::NotEnoughVotes(LedgerInfoWithUnverifiedSignatures::new( vote.ledger_info().clone(), )), ) diff --git a/consensus/src/pipeline/buffer_item.rs b/consensus/src/pipeline/buffer_item.rs index f44cf291c04ae..3d06658cd4323 100644 --- a/consensus/src/pipeline/buffer_item.rs +++ b/consensus/src/pipeline/buffer_item.rs @@ -16,7 +16,7 @@ use aptos_reliable_broadcast::DropGuard; use aptos_types::{ aggregate_signature::PartialSignatures, block_info::BlockInfo, - ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures}, + ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures}, validator_verifier::ValidatorVerifier, }; use futures::future::BoxFuture; @@ -68,7 +68,7 @@ fn generate_executed_item_from_ordered( order_vote_enabled: bool, ) -> BufferItem { debug!("{} advance to executed from ordered", commit_info); - let partial_commit_proof = LedgerInfoWithPartialSignatures::new( + let partial_commit_proof = LedgerInfoWithVerifiedSignatures::new( generate_commit_ledger_info(&commit_info, &ordered_proof, order_vote_enabled), verified_signatures, ); @@ -106,7 +106,7 @@ pub struct OrderedItem { pub struct ExecutedItem { pub executed_blocks: Vec, - pub partial_commit_proof: LedgerInfoWithPartialSignatures, + pub partial_commit_proof: LedgerInfoWithVerifiedSignatures, pub callback: StateComputerCommitCallBackType, pub commit_info: BlockInfo, pub ordered_proof: LedgerInfoWithSignatures, @@ -114,7 +114,7 @@ pub struct ExecutedItem { pub struct SignedItem { pub executed_blocks: Vec, - pub partial_commit_proof: LedgerInfoWithPartialSignatures, + pub partial_commit_proof: LedgerInfoWithVerifiedSignatures, pub callback: StateComputerCommitCallBackType, pub commit_vote: CommitVote, pub rb_handle: Option<(Instant, DropGuard)>, @@ -146,9 +146,10 @@ impl BufferItem { ordered_blocks: Vec, ordered_proof: LedgerInfoWithSignatures, callback: StateComputerCommitCallBackType, + unverified_signatures: PartialSignatures, ) -> Self { Self::Ordered(Box::new(OrderedItem { - unverified_signatures: PartialSignatures::empty(), + unverified_signatures, commit_proof: None, callback, ordered_blocks, diff --git a/consensus/src/pipeline/buffer_manager.rs b/consensus/src/pipeline/buffer_manager.rs index b3ebe706f608c..38d5aa8578893 100644 --- a/consensus/src/pipeline/buffer_manager.rs +++ b/consensus/src/pipeline/buffer_manager.rs @@ -28,6 +28,7 @@ use aptos_bounded_executor::BoundedExecutor; use aptos_config::config::ConsensusObserverConfig; use aptos_consensus_types::{ common::{Author, Round}, + pipeline::commit_vote::CommitVote, pipelined_block::PipelinedBlock, }; use aptos_crypto::HashValue; @@ -37,8 +38,8 @@ use aptos_network::protocols::{rpc::error::RpcError, wire::handshake::v1::Protoc use aptos_reliable_broadcast::{DropGuard, ReliableBroadcast}; use aptos_time_service::TimeService; use aptos_types::{ - account_address::AccountAddress, epoch_change::EpochChangeProof, epoch_state::EpochState, - ledger_info::LedgerInfoWithSignatures, + account_address::AccountAddress, aggregate_signature::PartialSignatures, + epoch_change::EpochChangeProof, epoch_state::EpochState, ledger_info::LedgerInfoWithSignatures, }; use bytes::Bytes; use futures::{ @@ -51,7 +52,7 @@ use futures::{ }; use once_cell::sync::OnceCell; use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap}, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, @@ -164,6 +165,11 @@ pub struct BufferManager { consensus_publisher: Option>, pending_commit_proofs: BTreeMap, + + max_pending_rounds_in_commit_vote_cache: u64, + // If the buffer manager receives a commit vote for a block that is not in buffer items, then + // the vote will be cached. We can cache upto max_pending_rounds_in_commit_vote_cache (100) blocks. + pending_commit_votes: BTreeMap>, } impl BufferManager { @@ -194,6 +200,7 @@ impl BufferManager { highest_committed_round: Round, consensus_observer_config: ConsensusObserverConfig, consensus_publisher: Option>, + max_pending_rounds_in_commit_vote_cache: u64, ) -> Self { let buffer = Buffer::::new(); @@ -257,6 +264,9 @@ impl BufferManager { consensus_publisher, pending_commit_proofs: BTreeMap::new(), + + max_pending_rounds_in_commit_vote_cache, + pending_commit_votes: BTreeMap::new(), } } @@ -333,6 +343,30 @@ impl BufferManager { } } + fn try_add_pending_commit_vote(&mut self, vote: CommitVote) -> bool { + let block_id = vote.commit_info().id(); + let round = vote.commit_info().round(); + + // Store the commit vote only if it is for one of the next 100 rounds. + if round > self.highest_committed_round + && self.highest_committed_round + self.max_pending_rounds_in_commit_vote_cache > round + { + self.pending_commit_votes + .entry(round) + .or_default() + .insert(vote.author(), vote); + true + } else { + debug!( + round = round, + highest_committed_round = self.highest_committed_round, + block_id = block_id, + "Received a commit vote not in the next 100 rounds, ignored." + ); + false + } + } + fn drain_pending_commit_proof_till( &mut self, round: Round, @@ -381,7 +415,23 @@ impl BufferManager { .await .expect("Failed to send execution schedule request"); - let item = BufferItem::new_ordered(ordered_blocks, ordered_proof, callback); + let mut unverified_signatures = PartialSignatures::empty(); + if let Some(block) = ordered_blocks.last() { + if let Some(votes) = self.pending_commit_votes.remove(&block.round()) { + votes + .values() + .filter(|vote| vote.commit_info().id() == block.id()) + .for_each(|vote| { + unverified_signatures.add_signature(vote.author(), vote.signature().clone()) + }); + } + } + let item = BufferItem::new_ordered( + ordered_blocks, + ordered_proof, + callback, + unverified_signatures, + ); self.buffer.push_back(item); } @@ -708,7 +758,7 @@ impl BufferManager { // find the corresponding item let author = vote.author(); let commit_info = vote.commit_info().clone(); - info!("Receive commit vote {} from {}", commit_info, author); + trace!("Receive commit vote {} from {}", commit_info, author); let target_block_id = vote.commit_info().id(); let current_cursor = self .buffer @@ -741,6 +791,8 @@ impl BufferManager { } else { return None; } + } else if self.try_add_pending_commit_vote(vote) { + reply_ack(protocol, response_sender); } else { reply_nack(protocol, response_sender); // TODO: send_commit_vote() doesn't care about the response and this should be direct send not RPC } @@ -944,6 +996,7 @@ impl BufferManager { }, Some(Ok(round)) = self.persisting_phase_rx.next() => { // see where `need_backpressure()` is called. + self.pending_commit_votes.retain(|rnd, _| *rnd > round); self.highest_committed_round = round }, Some(rpc_request) = verified_commit_msg_rx.next() => { diff --git a/consensus/src/pipeline/decoupled_execution_utils.rs b/consensus/src/pipeline/decoupled_execution_utils.rs index 039834497bce9..8178d871e7efc 100644 --- a/consensus/src/pipeline/decoupled_execution_utils.rs +++ b/consensus/src/pipeline/decoupled_execution_utils.rs @@ -44,6 +44,7 @@ pub fn prepare_phases_and_buffer_manager( highest_committed_round: u64, consensus_observer_config: ConsensusObserverConfig, consensus_publisher: Option>, + max_pending_rounds_in_commit_vote_cache: u64, ) -> ( PipelinePhase, PipelinePhase, @@ -134,6 +135,7 @@ pub fn prepare_phases_and_buffer_manager( highest_committed_round, consensus_observer_config, consensus_publisher, + max_pending_rounds_in_commit_vote_cache, ), ) } diff --git a/consensus/src/pipeline/execution_client.rs b/consensus/src/pipeline/execution_client.rs index 9228c2dcaedc8..9d50fe08e4a3f 100644 --- a/consensus/src/pipeline/execution_client.rs +++ b/consensus/src/pipeline/execution_client.rs @@ -282,6 +282,8 @@ impl ExecutionProxyClient { highest_committed_round, consensus_observer_config, consensus_publisher, + self.consensus_config + .max_pending_rounds_in_commit_vote_cache, ); tokio::spawn(execution_schedule_phase.start()); diff --git a/consensus/src/pipeline/tests/buffer_manager_tests.rs b/consensus/src/pipeline/tests/buffer_manager_tests.rs index d8ca6523d1c66..9ef9ed94600cd 100644 --- a/consensus/src/pipeline/tests/buffer_manager_tests.rs +++ b/consensus/src/pipeline/tests/buffer_manager_tests.rs @@ -161,6 +161,7 @@ pub fn prepare_buffer_manager( 0, ConsensusObserverConfig::default(), None, + 100, ); ( diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs index e72910fc3520e..b263bdc1d9c52 100644 --- a/consensus/src/round_manager.rs +++ b/consensus/src/round_manager.rs @@ -9,8 +9,8 @@ use crate::{ }, counters::{ self, ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE, ORDER_VOTE_ADDED, - ORDER_VOTE_BROADCASTED, ORDER_VOTE_OTHER_ERRORS, ORDER_VOTE_VERY_OLD, PROPOSAL_VOTE_ADDED, - PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT, + ORDER_VOTE_BROADCASTED, ORDER_VOTE_NOT_IN_RANGE, ORDER_VOTE_OTHER_ERRORS, + PROPOSAL_VOTE_ADDED, PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT, QC_AGGREGATED_FROM_VOTES, SYNC_INFO_RECEIVED_WITH_NEWER_CERT, }, error::{error_kind, VerifyError}, @@ -51,7 +51,7 @@ use aptos_consensus_types::{ vote_msg::VoteMsg, wrapped_ledger_info::WrappedLedgerInfo, }; -use aptos_crypto::HashValue; +use aptos_crypto::{hash::CryptoHash, HashValue}; use aptos_infallible::{checked, Mutex}; use aptos_logger::prelude::*; #[cfg(test)] @@ -563,20 +563,28 @@ impl RoundManager { block_parent_hash = proposal_msg.proposal().quorum_cert().certified_block().id(), ); - ensure!( - self.ensure_round_and_sync_up( + let in_correct_round = self + .ensure_round_and_sync_up( proposal_msg.proposal().round(), proposal_msg.sync_info(), proposal_msg.proposer(), ) .await - .context("[RoundManager] Process proposal")?, - "Stale proposal {}, current round {}", - proposal_msg.proposal(), - self.round_state.current_round() - ); - - self.process_proposal(proposal_msg.take_proposal()).await + .context("[RoundManager] Process proposal")?; + if in_correct_round { + self.process_proposal(proposal_msg.take_proposal()).await + } else { + sample!( + SampleRate::Duration(Duration::from_secs(30)), + warn!( + "[sampled] Stale proposal {}, current round {}", + proposal_msg.proposal(), + self.round_state.current_round() + ) + ); + counters::ERROR_COUNT.inc(); + Ok(()) + } } pub async fn process_delayed_proposal_msg(&mut self, proposal: Block) -> anyhow::Result<()> { @@ -1099,22 +1107,57 @@ impl RoundManager { { return Ok(()); } - - if order_vote.ledger_info().round() > self.block_store.sync_info().highest_ordered_round() { - let vote_reception_result = self.pending_order_votes.insert_order_vote( - order_vote, - self.epoch_state.clone(), - verification_status, - ); - self.process_order_vote_reception_result(vote_reception_result) + let highest_ordered_round = self.block_store.sync_info().highest_ordered_round(); + let order_vote_round = order_vote_msg.order_vote().ledger_info().round(); + let li_digest = order_vote_msg.order_vote().ledger_info().hash(); + if order_vote_round > highest_ordered_round + && order_vote_round < highest_ordered_round + 100 + { + // If it is the first order vote received for the block, verify the QC and insert along with QC. + // For the subsequent order votes for the same block, we don't have to verify the QC. Just inserting the + // order vote is enough. + let vote_reception_result = if !self.pending_order_votes.exists(&li_digest) { + let start = Instant::now(); + order_vote_msg + .quorum_cert() + .verify(&self.epoch_state().verifier) + .context("[OrderVoteMsg QuorumCert verification failed")?; + counters::VERIFY_MSG + .with_label_values(&["order_vote_qc"]) + .observe(start.elapsed().as_secs_f64()); + self.pending_order_votes.insert_order_vote( + order_vote_msg.order_vote(), + &self.epoch_state.verifier, + Some(order_vote_msg.quorum_cert().clone()), + ) + } else { + self.pending_order_votes.insert_order_vote( + order_vote_msg.order_vote(), + &self.epoch_state.verifier, + None, + ) + }; + self.process_order_vote_reception_result( + vote_reception_result, + order_vote_msg.order_vote().author(), + ) .await?; - } else { - ORDER_VOTE_VERY_OLD.inc(); - info!( - "Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}", - order_vote.ledger_info().round(), - self.block_store.sync_info().highest_ordered_round() - ); + } else { + ORDER_VOTE_NOT_IN_RANGE.inc(); + sample!( + SampleRate::Duration(Duration::from_secs(1)), + info!( + "[sampled] Received an order vote not in the 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}", + order_vote_msg.order_vote().ledger_info().round(), + self.block_store.sync_info().highest_ordered_round() + ) + ); + debug!( + "Received an order vote not in the next 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}", + order_vote_msg.order_vote().ledger_info().round(), + self.block_store.sync_info().highest_ordered_round() + ) + } } Ok(()) } @@ -1305,13 +1348,18 @@ impl RoundManager { async fn process_order_vote_reception_result( &mut self, result: OrderVoteReceptionResult, + preferred_peer: Author, ) -> anyhow::Result<()> { match result { - OrderVoteReceptionResult::NewLedgerInfoWithSignatures(ledger_info_with_signatures) => { - self.new_ordered_cert(WrappedLedgerInfo::new( - VoteData::dummy(), - ledger_info_with_signatures, - )) + OrderVoteReceptionResult::NewLedgerInfoWithSignatures(( + verified_qc, + ledger_info_with_signatures, + )) => { + self.new_ordered_cert( + WrappedLedgerInfo::new(VoteData::dummy(), ledger_info_with_signatures), + verified_qc, + preferred_peer, + ) .await }, OrderVoteReceptionResult::VoteAdded(_) => { @@ -1341,49 +1389,61 @@ impl RoundManager { async fn new_qc_from_order_vote_msg( &mut self, - order_vote_msg: &OrderVoteMsg, + verified_qc: Arc, + preferred_peer: Author, ) -> anyhow::Result<()> { - if let NeedFetchResult::QCAlreadyExist = self + match self .block_store - .need_fetch_for_quorum_cert(order_vote_msg.quorum_cert()) + .need_fetch_for_quorum_cert(verified_qc.as_ref()) { - return Ok(()); + NeedFetchResult::QCAlreadyExist => Ok(()), + NeedFetchResult::QCBlockExist => { + // If the block is already in the block store, but QC isn't available in the block store, insert QC. + let result = self + .block_store + .insert_quorum_cert( + verified_qc.as_ref(), + &mut self.create_block_retriever(preferred_peer), + ) + .await + .context("[RoundManager] Failed to process the QC from order vote msg"); + self.process_certificates().await?; + result + }, + NeedFetchResult::NeedFetch => { + // If the block doesn't exist, we could ideally do sync up based on the qc. + // But this could trigger fetching a lot of past blocks in case the node is lagging behind. + // So, we just log a warning here to avoid a long sequence of block fetchs. + // One of the subsequence syncinfo messages will trigger the block fetch or state sync if required. + ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc(); + sample!( + SampleRate::Duration(Duration::from_millis(200)), + info!( + "Ordered certificate created without block in block store: {:?}", + verified_qc.certified_block() + ); + ); + Err(anyhow::anyhow!( + "Ordered certificate created without block in block store" + )) + }, + NeedFetchResult::QCRoundBeforeRoot => { + Err(anyhow::anyhow!("Ordered certificate is old")) + }, } - - let start = Instant::now(); - order_vote_msg - .quorum_cert() - .verify(&self.epoch_state.verifier) - .context("[OrderVoteMsg QuorumCert verification failed")?; - counters::VERIFY_MSG - .with_label_values(&["order_vote_qc"]) - .observe(start.elapsed().as_secs_f64()); - - let result = self - .block_store - .insert_quorum_cert( - order_vote_msg.quorum_cert(), - &mut self.create_block_retriever(order_vote_msg.order_vote().author()), - ) - .await - .context("[RoundManager] Failed to process the QC from order vote msg"); - self.process_certificates().await?; - result } // Insert ordered certificate formed by aggregating order votes - async fn new_ordered_cert(&mut self, ordered_cert: WrappedLedgerInfo) -> anyhow::Result<()> { - if self - .block_store - .get_block(ordered_cert.commit_info().id()) - .is_none() - { - ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc(); - error!( - "Ordered certificate created without block in block store: {:?}", - ordered_cert - ); - } + async fn new_ordered_cert( + &mut self, + ordered_cert: WrappedLedgerInfo, + verified_qc: Arc, + preferred_peer: Author, + ) -> anyhow::Result<()> { + self.new_qc_from_order_vote_msg(verified_qc, preferred_peer) + .await?; + + // If the block and qc now exist in the quorum store, insert the ordered cert let result = self .block_store .insert_ordered_cert(&ordered_cert) @@ -1507,7 +1567,7 @@ impl RoundManager { Ok(_) => trace!(RoundStateLogSchema::new(round_state)), Err(e) => { counters::ERROR_COUNT.inc(); - warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state)); + warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e); } } } @@ -1561,7 +1621,7 @@ impl RoundManager { Ok(_) => trace!(RoundStateLogSchema::new(round_state)), Err(e) => { counters::ERROR_COUNT.inc(); - warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state)); + warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e); } } }, diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs index 87389c4e870d8..762442c8d05ca 100644 --- a/consensus/src/round_manager_test.rs +++ b/consensus/src/round_manager_test.rs @@ -4,6 +4,7 @@ use crate::{ block_storage::{pending_blocks::PendingBlocks, BlockReader, BlockStore}, + counters, liveness::{ proposal_generator::{ ChainHealthBackoffConfig, PipelineBackpressureConfig, ProposalGenerator, @@ -1144,11 +1145,13 @@ fn new_round_on_timeout_certificate() { None, ), ); + let before = counters::ERROR_COUNT.get(); assert!(node .round_manager .process_proposal_msg(old_good_proposal) .await - .is_err()); + .is_ok()); // we eat the error + assert_eq!(counters::ERROR_COUNT.get(), before + 1); // but increase the counter }); } diff --git a/crates/aptos/CHANGELOG.md b/crates/aptos/CHANGELOG.md index 79547763c1bd9..516bace3b2591 100644 --- a/crates/aptos/CHANGELOG.md +++ b/crates/aptos/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to the Aptos CLI will be captured in this file. This project ## Unreleased +## [4.2.0] - 2024/09/16 +- Update latest VM and associated changes +- Update to latest compiler + ## [4.1.0] - 2024/08/30 - Marks Move 2 and compiler v2 as stable. - Adds new `--move-2` flag to work with Move 2 without need for multiple other flags. diff --git a/crates/aptos/CONTRIBUTING.md b/crates/aptos/CONTRIBUTING.md new file mode 100644 index 0000000000000..7bd0fe7d976ee --- /dev/null +++ b/crates/aptos/CONTRIBUTING.md @@ -0,0 +1,247 @@ +# Aptos CLI Development Guide + +This is a list of design decisions and guidelines for adding commands to the Aptos CLI. + +## Command Groups + +Commands should be grouped into the existing categories. The current categories are: + +- account +- config +- genesis +- governance +- key +- move +- multisig +- node +- stake +- update + +All categories must have a doc comment that describes the command. It must also derive `Parser` and `Subcommand`. For +example: + +```rust +/// Tool for interacting with accounts +/// +/// This tool is used to create accounts, get information about the +/// account's resources, and transfer resources between accounts. +#[derive(Debug, Subcommand)] +pub enum AccountTool { + Create(create::CreateAccount), + CreateResourceAccount(create_resource_account::CreateResourceAccount), + DeriveResourceAccountAddress(derive_resource_account::DeriveResourceAccount), + FundWithFaucet(fund::FundWithFaucet), + Balance(balance::Balance), + List(list::ListAccount), + LookupAddress(key_rotation::LookupAddress), + RotateKey(key_rotation::RotateKey), + Transfer(transfer::TransferCoins), +} +``` + +Then it must also be added to the top level command structure: + +```rust +/// Command Line Interface (CLI) for developing and interacting with the Aptos blockchain +#[derive(Parser)] +#[clap(name = "aptos", author, version, propagate_version = true, styles = aptos_cli_common::aptos_cli_style())] +pub enum Tool { + #[clap(subcommand)] + Account(account::AccountTool), + #[clap(subcommand)] + Config(config::ConfigTool), + #[clap(subcommand)] + Genesis(genesis::GenesisTool), + #[clap(subcommand)] + Governance(governance::GovernanceTool), + Info(InfoTool), + Init(common::init::InitTool), + #[clap(subcommand)] + Key(op::key::KeyTool), + #[clap(subcommand)] + Move(move_tool::MoveTool), + #[clap(subcommand)] + Multisig(account::MultisigAccountTool), + #[clap(subcommand)] + Node(node::NodeTool), + #[clap(subcommand)] + Stake(stake::StakeTool), + #[clap(subcommand)] + Update(update::UpdateTool), +} +``` + +## Commands + +A command is a single top level command for the CLI. The CLI command must complete it's action in the single command +execution. + +### Command Names + +```rust +/// Compiles a package and returns the associated ModuleIds +#[derive(Parser)] +pub struct CompilePackage { + /// Save the package metadata in the package's build directory + /// + /// If set, package metadata should be generated and stored in the package's build directory. + /// This metadata can be used to construct a transaction to publish a package. + #[clap(long)] + pub(crate) save_metadata: bool, + + #[clap(flatten)] + pub(crate) included_artifacts_args: IncludedArtifactsArgs, + #[clap(flatten)] + pub(crate) move_options: MovePackageDir, +} +``` + +Command names should be simple, identifiable, and easy to use. For example, compilation is grouped in `move` and uses +the subcommand `compile`. + +```bash +aptos move compile +``` + +Once the new command is created, it should have `#[derive(Parser)]` added above. Additionally, it will need to be added +the higher level tool: + +```rust +#[derive(Subcommand)] +pub enum MoveTool { + #[clap(alias = "build")] + Compile(CompilePackage), + #[clap(alias = "build-script")] + CompileScript(CompileScript), + Init(Init), + // ... +} + +impl MoveTool { + pub async fn execute(self) -> CliResult { + match self { + MoveTool::Compile(tool) => tool.execute_serialized().await, + MoveTool::CompileScript(tool) => tool.execute_serialized().await, + MoveTool::Init(tool) => tool.execute_serialized_success().await, + } + } +} +``` + +Note that, there are two types of commands here `execute_serialized()` and `execute_serialized_success()`, if the +command must be returning a value, then it should call `execute_serialized()`, which will convert the input type as JSON +to `stdout`. + +Additionally, `alias` is allowed, but discouraged for new commands. This is mostly to provide either backwards +compatibility or reduce confusion for new users. + +### Command flags + +```rust +#[derive(Parser)] +pub struct CompilePackage { + /// Save the package metadata in the package's build directory + /// + /// If set, package metadata should be generated and stored in the package's build directory. + /// This metadata can be used to construct a transaction to publish a package. + #[clap(long)] + pub(crate) save_metadata: bool, + + // ... +} +``` + +Command inputs should always be documented for help to show up in the CLI. for example, below is the example for +`save_metadata`. They should be snake case, and will show up as a flag. Do not use `short` commands, as they can be +confused between different commands. + +```bash +aptos move compile --save-metadata +``` + +### Command flag groupings + +```rust +/// Compiles a package and returns the associated ModuleIds +#[derive(Parser)] +pub struct CompilePackage { + // ... + #[clap(flatten)] + pub(crate) included_artifacts_args: IncludedArtifactsArgs, + #[clap(flatten)] + pub(crate) move_options: MovePackageDir, +} +``` + +Command flags can be grouped into common structs to be used across multiple commands. These should be flattened by +adding the struct associated and using `#[clap(flatten)]` like above. These should not have a doc comment, and any doc +comments will not end up in the command. Instead, document the structs directly like so: + +```rust +#[derive(Parser)] +pub struct IncludedArtifactsArgs { + /// Artifacts to be generated when building the package + /// + /// Which artifacts to include in the package. This can be one of `none`, `sparse`, and + /// `all`. `none` is the most compact form and does not allow to reconstruct a source + /// package from chain; `sparse` is the minimal set of artifacts needed to reconstruct + /// a source package; `all` includes all available artifacts. The choice of included + /// artifacts heavily influences the size and therefore gas cost of publishing: `none` + /// is the size of bytecode alone; `sparse` is roughly 2 times as much; and `all` 3-4 + /// as much. + #[clap(long, default_value_t = IncludedArtifacts::Sparse)] + pub(crate) included_artifacts: IncludedArtifacts, +} +``` + +### Command Implementation + +```rust +#[async_trait] +impl CliCommand> for CompilePackage { + fn command_name(&self) -> &'static str { + "CompilePackage" + } + + async fn execute(self) -> CliTypedResult> { + let build_options = BuildOptions { + install_dir: self.move_options.output_dir.clone(), + ..self + .included_artifacts_args + .included_artifacts + .build_options( + self.move_options.dev, + self.move_options.skip_fetch_latest_git_deps, + self.move_options.named_addresses(), + self.move_options.override_std.clone(), + self.move_options.bytecode_version, + self.move_options.compiler_version, + self.move_options.language_version, + self.move_options.skip_attribute_checks, + self.move_options.check_test_code, + ) + }; + let pack = BuiltPackage::build(self.move_options.get_package_path()?, build_options) + .map_err(|e| CliError::MoveCompilationError(format!("{:#}", e)))?; + if self.save_metadata { + pack.extract_metadata_and_save()?; + } + let ids = pack + .modules() + .map(|m| m.self_id().to_string()) + .collect::>(); + // TODO: Also say how many scripts are compiled + Ok(ids) + } +} +``` + +Commands should implement the `CliCommand` trait for the package. This allows it to be called upstream generically +and `T` will automatically be serialized to JSON for the output. This allows for typed testing in unit tests, while +still having output converted for the total CLI. + +It's an anti-pattern to `panic`, please avoid panicking, and instead provide `CliError` or `CliError` conversion for the +current types. + +All output from the CLI should use `eprintln!()`, rather than `println!()`. `stdout` is reserved for the JSON output at +the end of the command, `stderr` is used for the rest of the output. diff --git a/crates/aptos/Cargo.toml b/crates/aptos/Cargo.toml index 46e0ee8a6f92d..b1270bc827272 100644 --- a/crates/aptos/Cargo.toml +++ b/crates/aptos/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "aptos" description = "Aptos tool for management of nodes and interacting with the blockchain" -version = "4.1.0" +version = "4.2.0" # Workspace inherited keys authors = { workspace = true } diff --git a/crates/aptos/src/move_tool/bytecode.rs b/crates/aptos/src/move_tool/bytecode.rs index 7a80844d5b55f..6a662300b20db 100644 --- a/crates/aptos/src/move_tool/bytecode.rs +++ b/crates/aptos/src/move_tool/bytecode.rs @@ -39,7 +39,7 @@ const DECOMPILER_EXTENSION: &str = "mv.move"; /// /// For example, if you want to disassemble an on-chain package `PackName` at account `0x42`: /// 1. Download the package with `aptos move download --account 0x42 --package PackName --bytecode` -/// 2. Disassemble the package bytecode with `aptos disassemble --package-path PackName/bytecode_modules` +/// 2. Disassemble the package bytecode with `aptos move disassemble --package-path PackName/bytecode_modules` #[derive(Debug, Parser)] pub struct Disassemble { #[clap(flatten)] diff --git a/crates/reliable-broadcast/src/lib.rs b/crates/reliable-broadcast/src/lib.rs index a46e806f9aca9..7246f2b729a52 100644 --- a/crates/reliable-broadcast/src/lib.rs +++ b/crates/reliable-broadcast/src/lib.rs @@ -210,13 +210,13 @@ where fn log_rpc_failure(error: anyhow::Error, receiver: Author) { // Log a sampled warning (to prevent spam) sample!( - SampleRate::Duration(Duration::from_secs(1)), - warn!(error = ?error, "rpc to {} failed, error {}", receiver, error) + SampleRate::Duration(Duration::from_secs(30)), + warn!("[sampled] rpc to {} failed, error {:#}", receiver, error) ); // Log at the debug level (this is useful for debugging // and won't spam the logs in a production environment). - debug!(error = ?error, "rpc to {} failed, error {}", receiver, error); + debug!("rpc to {} failed, error {:#}", receiver, error); } pub struct DropGuard { diff --git a/execution/executor/src/components/chunk_output.rs b/execution/executor/src/components/chunk_output.rs index 7de6d39417a0d..3e471f5dcf714 100644 --- a/execution/executor/src/components/chunk_output.rs +++ b/execution/executor/src/components/chunk_output.rs @@ -301,13 +301,6 @@ pub fn update_counters_for_processed_chunk( ), }, TransactionStatus::Discard(discard_status_code) => { - sample!( - SampleRate::Duration(Duration::from_secs(15)), - warn!( - "Txn being discarded is {:?} with status code {:?}", - txn, discard_status_code - ) - ); ( // Specialize duplicate txns for alerts if *discard_status_code == StatusCode::SEQUENCE_NUMBER_TOO_OLD { @@ -317,6 +310,14 @@ pub fn update_counters_for_processed_chunk( } else if *discard_status_code == StatusCode::TRANSACTION_EXPIRED { "discard_transaction_expired" } else { + // Only log if it is an interesting discard + sample!( + SampleRate::Duration(Duration::from_secs(15)), + warn!( + "[sampled] Txn being discarded is {:?} with status code {:?}", + txn, discard_status_code + ) + ); "discard" }, "error_code", diff --git a/network/framework/src/application/interface.rs b/network/framework/src/application/interface.rs index 6ccb2cf36354e..912e34c49e98b 100644 --- a/network/framework/src/application/interface.rs +++ b/network/framework/src/application/interface.rs @@ -177,7 +177,7 @@ impl NetworkClient { sample!( SampleRate::Duration(Duration::from_secs(10)), warn!( - "Unavailable peers (without a common network protocol): {:?}", + "[sampled] Unavailable peers (without a common network protocol): {:?}", peers_without_a_protocol ) ); diff --git a/network/framework/src/peer/mod.rs b/network/framework/src/peer/mod.rs index 094e3d70c0421..651d5fed0eece 100644 --- a/network/framework/src/peer/mod.rs +++ b/network/framework/src/peer/mod.rs @@ -639,14 +639,17 @@ where .outbound_rpcs .handle_outbound_request(request, write_reqs_tx) { - warn!( - NetworkSchema::new(&self.network_context) - .connection_metadata(&self.connection_metadata), - error = %e, - "Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}", - protocol_id, - self.remote_peer_id().short_str(), - e, + sample!( + SampleRate::Duration(Duration::from_secs(10)), + warn!( + NetworkSchema::new(&self.network_context) + .connection_metadata(&self.connection_metadata), + error = %e, + "[sampled] Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}", + protocol_id, + self.remote_peer_id().short_str(), + e, + ) ); } }, diff --git a/network/framework/src/protocols/health_checker/mod.rs b/network/framework/src/protocols/health_checker/mod.rs index fea7da738dd95..c59bc8a4a3dde 100644 --- a/network/framework/src/protocols/health_checker/mod.rs +++ b/network/framework/src/protocols/health_checker/mod.rs @@ -342,11 +342,9 @@ impl + Unpin> HealthChec }, Err(err) => { warn!( - NetworkSchema::new(&self.network_context) - .remote_peer(&peer_id), - error = ?err, + NetworkSchema::new(&self.network_context).remote_peer(&peer_id), round = round, - "{} Ping failed for peer: {} round: {} with error: {:?}", + "{} Ping failed for peer: {} round: {} with error: {:#}", self.network_context, peer_id.short_str(), round, diff --git a/network/framework/src/protocols/rpc/mod.rs b/network/framework/src/protocols/rpc/mod.rs index b948226c4cd70..2be2a22a5f667 100644 --- a/network/framework/src/protocols/rpc/mod.rs +++ b/network/framework/src/protocols/rpc/mod.rs @@ -666,13 +666,16 @@ impl OutboundRpcs { FAILED_LABEL, ) .inc(); - warn!( - NetworkSchema::new(network_context).remote_peer(peer_id), - "{} Error making outbound RPC request to {} (request_id {}). Error: {}", - network_context, - peer_id.short_str(), - request_id, - error + sample!( + SampleRate::Duration(Duration::from_secs(10)), + warn!( + NetworkSchema::new(network_context).remote_peer(peer_id), + "[sampled] {} Error making outbound RPC request to {} (request_id {}). Error: {}", + network_context, + peer_id.short_str(), + request_id, + error + ) ); } }, diff --git a/state-sync/storage-service/server/src/handler.rs b/state-sync/storage-service/server/src/handler.rs index d1748ae8a72d9..fc642e212ded2 100644 --- a/state-sync/storage-service/server/src/handler.rs +++ b/state-sync/storage-service/server/src/handler.rs @@ -247,7 +247,7 @@ impl Handler { { sample!( SampleRate::Duration(Duration::from_secs(ERROR_LOG_FREQUENCY_SECS)), - warn!(LogSchema::new(LogEntry::OptimisticFetchRequest) + trace!(LogSchema::new(LogEntry::OptimisticFetchRequest) .error(&Error::InvalidRequest( "An active optimistic fetch was already found for the peer!".into() )) diff --git a/third_party/move/move-binary-format/src/check_bounds.rs b/third_party/move/move-binary-format/src/check_bounds.rs index a3629a450af90..cee325ef32f87 100644 --- a/third_party/move/move-binary-format/src/check_bounds.rs +++ b/third_party/move/move-binary-format/src/check_bounds.rs @@ -385,8 +385,11 @@ impl<'a> BoundsChecker<'a> { } }, StructFieldInformation::DeclaredVariants(variants) => { - for field in variants.iter().flat_map(|v| v.fields.iter()) { - self.check_field_def(type_param_count, field)?; + for variant in variants { + check_bounds_impl(self.view.identifiers(), variant.name)?; + for field in &variant.fields { + self.check_field_def(type_param_count, field)?; + } } if variants.is_empty() { // Empty variants are not allowed diff --git a/third_party/move/move-binary-format/src/check_complexity.rs b/third_party/move/move-binary-format/src/check_complexity.rs index 79ccc6b48bfc3..232d530404cc9 100644 --- a/third_party/move/move-binary-format/src/check_complexity.rs +++ b/third_party/move/move-binary-format/src/check_complexity.rs @@ -244,6 +244,7 @@ impl<'a> BinaryComplexityMeter<'a> { }, StructFieldInformation::DeclaredVariants(variants) => { for variant in variants { + self.meter_identifier(variant.name)?; for field in &variant.fields { self.charge(field.signature.0.num_nodes() as u64)?; } diff --git a/third_party/move/move-binary-format/src/proptest_types/types.rs b/third_party/move/move-binary-format/src/proptest_types/types.rs index 03f5a4f7544c3..566d45809a735 100644 --- a/third_party/move/move-binary-format/src/proptest_types/types.rs +++ b/third_party/move/move-binary-format/src/proptest_types/types.rs @@ -230,15 +230,22 @@ impl StructDefinitionGen { for (i, fd) in fields.into_iter().enumerate() { variant_fields[i % self.variants.len()].push(fd) } + let mut seen_names = BTreeSet::new(); StructFieldInformation::DeclaredVariants( variant_fields .into_iter() .zip(self.variants.iter()) - .map(|(fields, name)| VariantDefinition { - name: IdentifierIndex( - name.index(state.identifiers_len) as TableIndex - ), - fields, + .filter_map(|(fields, name)| { + let variant_name = name.index(state.identifiers_len) as TableIndex; + // avoid duplicates + if seen_names.insert(variant_name) { + Some(VariantDefinition { + name: IdentifierIndex(variant_name), + fields, + }) + } else { + None + } }) .collect(), ) diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs index 8f0d3704aa5ae..0540045fb8b43 100644 --- a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs +++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs @@ -22,4 +22,5 @@ pub mod negative_stack_size_tests; pub mod reference_safety_tests; pub mod signature_tests; pub mod struct_defs_tests; +pub mod variant_name_test; pub mod vec_pack_tests; diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs new file mode 100644 index 0000000000000..fd936241cd1af --- /dev/null +++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs @@ -0,0 +1,81 @@ +// Copyright (c) The Move Contributors +// SPDX-License-Identifier: Apache-2.0 + +use move_binary_format::{ + file_format::{ + AbilitySet, AddressIdentifierIndex, FieldDefinition, IdentifierIndex, ModuleHandle, + ModuleHandleIndex, Signature, SignatureToken, StructDefinition, StructFieldInformation, + StructHandle, StructHandleIndex, StructTypeParameter, TypeSignature, VariantDefinition, + }, + file_format_common::VERSION_7, + CompiledModule, +}; +use move_bytecode_verifier::{ + verifier::verify_module_with_config_for_test_with_version, VerifierConfig, +}; +use move_core_types::{identifier::Identifier, vm_status::StatusCode}; + +/// Tests whether the name of a variant is in bounds. (That is, the IdentifierIndex +/// is in bounds of the identifier table.) +#[test] +fn test_variant_name() { + // This is a POC produced during auditing + let ty = SignatureToken::Bool; + + let cm = CompiledModule { + version: 7, + self_module_handle_idx: ModuleHandleIndex(0), + module_handles: vec![ModuleHandle { + address: AddressIdentifierIndex(0), + name: IdentifierIndex(0), + }], + struct_handles: vec![StructHandle { + module: ModuleHandleIndex(0), + name: IdentifierIndex(0), + abilities: AbilitySet::ALL, + type_parameters: vec![StructTypeParameter { + constraints: AbilitySet::EMPTY, + is_phantom: true, + }], + }], + function_handles: vec![], + field_handles: vec![], + friend_decls: vec![], + struct_def_instantiations: vec![], + function_instantiations: vec![], + field_instantiations: vec![], + signatures: vec![Signature(vec![]), Signature(vec![ty])], + identifiers: vec![Identifier::new("M").unwrap()], + address_identifiers: vec![], + constant_pool: vec![], + metadata: vec![], + struct_defs: vec![StructDefinition { + struct_handle: StructHandleIndex(0), + field_information: StructFieldInformation::DeclaredVariants(vec![VariantDefinition { + fields: vec![FieldDefinition { + name: IdentifierIndex(0), + signature: TypeSignature(SignatureToken::Bool), + }], + // <---- out of bound + name: IdentifierIndex(1), + }]), + }], + function_defs: vec![], + struct_variant_handles: vec![], + struct_variant_instantiations: vec![], + variant_field_handles: vec![], + variant_field_instantiations: vec![], + }; + + let result = verify_module_with_config_for_test_with_version( + "test_variant_name", + &VerifierConfig::production(), + &cm, + Some(VERSION_7), + ); + + assert_eq!( + result.unwrap_err().major_status(), + StatusCode::INDEX_OUT_OF_BOUNDS, + ); +} diff --git a/third_party/move/move-bytecode-verifier/src/check_duplication.rs b/third_party/move/move-bytecode-verifier/src/check_duplication.rs index e79f279dc021d..ad6d317c7b579 100644 --- a/third_party/move/move-bytecode-verifier/src/check_duplication.rs +++ b/third_party/move/move-bytecode-verifier/src/check_duplication.rs @@ -15,7 +15,7 @@ use move_binary_format::{ file_format::{ CompiledModule, CompiledScript, Constant, FieldDefinition, FunctionHandle, FunctionHandleIndex, FunctionInstantiation, ModuleHandle, Signature, - StructFieldInformation, StructHandle, StructHandleIndex, TableIndex, + StructFieldInformation, StructHandle, StructHandleIndex, TableIndex, VariantDefinition, }, IndexKind, }; @@ -52,6 +52,10 @@ impl<'a> DuplicationChecker<'a> { let checker = Self { module }; checker.check_field_handles()?; checker.check_field_instantiations()?; + checker.check_variant_field_handles()?; + checker.check_variant_field_instantiations()?; + checker.check_struct_variant_handles()?; + checker.check_struct_variant_instantiations()?; checker.check_function_definitions()?; checker.check_struct_definitions()?; checker.check_struct_instantiations() @@ -201,6 +205,50 @@ impl<'a> DuplicationChecker<'a> { Ok(()) } + fn check_variant_field_handles(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.variant_field_handles()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantFieldHandle, + idx, + )), + None => Ok(()), + } + } + + fn check_variant_field_instantiations(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.variant_field_instantiations()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantFieldInstantiation, + idx, + )), + None => Ok(()), + } + } + + fn check_struct_variant_handles(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.struct_variant_handles()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::StructVariantHandle, + idx, + )), + None => Ok(()), + } + } + + fn check_struct_variant_instantiations(&self) -> PartialVMResult<()> { + match Self::first_duplicate_element(self.module.struct_variant_instantiations()) { + Some(idx) => Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::StructVariantInstantiation, + idx, + )), + None => Ok(()), + } + } + fn check_struct_definitions(&self) -> PartialVMResult<()> { // StructDefinition - contained StructHandle defines uniqueness if let Some(idx) = @@ -212,7 +260,7 @@ impl<'a> DuplicationChecker<'a> { idx, )); } - // Field names in structs must be unique + // Field names in variants and structs must be unique for (struct_idx, struct_def) in self.module.struct_defs().iter().enumerate() { match &struct_def.field_information { StructFieldInformation::Native => continue, @@ -227,6 +275,7 @@ impl<'a> DuplicationChecker<'a> { Self::check_duplicate_fields(fields.iter())? }, StructFieldInformation::DeclaredVariants(variants) => { + Self::check_duplicate_variants(variants.iter())?; for variant in variants { Self::check_duplicate_fields(variant.fields.iter())? } @@ -278,6 +327,20 @@ impl<'a> DuplicationChecker<'a> { } } + fn check_duplicate_variants<'l>( + variants: impl Iterator, + ) -> PartialVMResult<()> { + if let Some(idx) = Self::first_duplicate_element(variants.map(|x| x.name)) { + Err(verification_error( + StatusCode::DUPLICATE_ELEMENT, + IndexKind::VariantDefinition, + idx, + )) + } else { + Ok(()) + } + } + fn check_function_definitions(&self) -> PartialVMResult<()> { // FunctionDefinition - contained FunctionHandle defines uniqueness if let Some(idx) = diff --git a/third_party/move/move-bytecode-verifier/src/limits.rs b/third_party/move/move-bytecode-verifier/src/limits.rs index 8d95b0b55aa13..1fcb2436be6f2 100644 --- a/third_party/move/move-bytecode-verifier/src/limits.rs +++ b/third_party/move/move-bytecode-verifier/src/limits.rs @@ -97,10 +97,20 @@ impl<'a> LimitsVerifier<'a> { } if let Some(sdefs) = self.resolver.struct_defs() { for sdef in sdefs { - if let StructFieldInformation::Declared(fdefs) = &sdef.field_information { - for fdef in fdefs { - self.verify_type_node(config, &fdef.signature.0)? - } + match &sdef.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fdefs) => { + for fdef in fdefs { + self.verify_type_node(config, &fdef.signature.0)? + } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + for fdef in &variant.fields { + self.verify_type_node(config, &fdef.signature.0)? + } + } + }, } } } diff --git a/third_party/move/move-bytecode-verifier/src/signature_v2.rs b/third_party/move/move-bytecode-verifier/src/signature_v2.rs index e618353a0725f..77388dec740ad 100644 --- a/third_party/move/move-bytecode-verifier/src/signature_v2.rs +++ b/third_party/move/move-bytecode-verifier/src/signature_v2.rs @@ -1151,14 +1151,28 @@ fn max_num_of_ty_params_or_args(resolver: BinaryIndexedView) -> usize { if let Some(struct_defs) = resolver.struct_defs() { for struct_def in struct_defs { - if let StructFieldInformation::Declared(fields) = &struct_def.field_information { - for field in fields { - for ty in field.signature.0.preorder_traversal() { - if let SignatureToken::TypeParameter(ty_param_idx) = ty { - n = n.max(*ty_param_idx as usize + 1) + match &struct_def.field_information { + StructFieldInformation::Native => {}, + StructFieldInformation::Declared(fields) => { + for field in fields { + for ty in field.signature.0.preorder_traversal() { + if let SignatureToken::TypeParameter(ty_param_idx) = ty { + n = n.max(*ty_param_idx as usize + 1) + } } } - } + }, + StructFieldInformation::DeclaredVariants(variants) => { + for variant in variants { + for field in &variant.fields { + for ty in field.signature.0.preorder_traversal() { + if let SignatureToken::TypeParameter(ty_param_idx) = ty { + n = n.max(*ty_param_idx as usize + 1) + } + } + } + } + }, } } } diff --git a/third_party/move/move-bytecode-verifier/src/verifier.rs b/third_party/move/move-bytecode-verifier/src/verifier.rs index 9783d8f33e9e3..506560dacc4cf 100644 --- a/third_party/move/move-bytecode-verifier/src/verifier.rs +++ b/third_party/move/move-bytecode-verifier/src/verifier.rs @@ -63,10 +63,21 @@ pub fn verify_module_with_config_for_test( name: &str, config: &VerifierConfig, module: &CompiledModule, +) -> VMResult<()> { + verify_module_with_config_for_test_with_version(name, config, module, None) +} + +pub fn verify_module_with_config_for_test_with_version( + name: &str, + config: &VerifierConfig, + module: &CompiledModule, + bytecode_version: Option, ) -> VMResult<()> { const MAX_MODULE_SIZE: usize = 65355; let mut bytes = vec![]; - module.serialize(&mut bytes).unwrap(); + module + .serialize_for_version(bytecode_version, &mut bytes) + .unwrap(); let now = Instant::now(); let result = verify_module_with_config(config, module); eprintln!( diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp new file mode 100644 index 0000000000000..4ebc0cefaba81 --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp @@ -0,0 +1,7 @@ + +Diagnostics: +error: enum type `T` must have at least one variant. + ┌─ tests/checking/variants/variants_empty.move:2:5 + │ +2 │ enum T{} + │ ^^^^^^^^ diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move new file mode 100644 index 0000000000000..ceecab879f3de --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move @@ -0,0 +1,3 @@ +module 0x42::variants_empty { + enum T{} +} diff --git a/third_party/move/move-model/src/builder/module_builder.rs b/third_party/move/move-model/src/builder/module_builder.rs index c0ebb45a8c29a..7cf73ae32f664 100644 --- a/third_party/move/move-model/src/builder/module_builder.rs +++ b/third_party/move/move-model/src/builder/module_builder.rs @@ -1257,6 +1257,15 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { } }) .collect_vec(); + if variant_maps.is_empty() { + self.parent.error( + &self.parent.to_loc(&def.loc), + &format!( + "enum type `{}` must have at least one variant.", + qsym.symbol.display(self.parent.env.symbol_pool()) + ), + ) + } (StructLayout::Variants(variant_maps), false) }, EA::StructLayout::Native(_) => (StructLayout::None, false), @@ -3480,9 +3489,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { let spec = self.struct_specs.remove(&name.symbol).unwrap_or_default(); let mut field_data: BTreeMap = BTreeMap::new(); let mut variants: BTreeMap = BTreeMap::new(); - match &entry.layout { + let is_enum = match &entry.layout { StructLayout::Singleton(fields, _) => { field_data.extend(fields.values().map(|f| (FieldId::new(f.name), f.clone()))); + false }, StructLayout::Variants(entry_variants) => { for (order, variant) in entry_variants.iter().enumerate() { @@ -3501,9 +3511,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { field_data.insert(field_id, field); } } + true }, - StructLayout::None => {}, - } + StructLayout::None => false, + }; let data = StructData { name: name.symbol, loc: entry.loc.clone(), @@ -3513,11 +3524,7 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> { abilities: entry.abilities, spec_var_opt: None, field_data, - variants: if variants.is_empty() { - None - } else { - Some(variants) - }, + variants: if is_enum { Some(variants) } else { None }, spec: RefCell::new(spec), is_native: entry.is_native, }; diff --git a/third_party/move/tools/move-resource-viewer/src/lib.rs b/third_party/move/tools/move-resource-viewer/src/lib.rs index 6691bed783f3f..54ffa7f3c65ab 100644 --- a/third_party/move/tools/move-resource-viewer/src/lib.rs +++ b/third_party/move/tools/move-resource-viewer/src/lib.rs @@ -476,8 +476,8 @@ impl MoveValueAnnotator { values .iter() .zip(tys) - .zip(field_names.iter()) - .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n.clone(), v))) + .zip(field_names) + .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n, v))) .collect::>>() }; diff --git a/types/Cargo.toml b/types/Cargo.toml index 79c5b17de97c1..e4b52e7e8521c 100644 --- a/types/Cargo.toml +++ b/types/Cargo.toml @@ -28,6 +28,8 @@ arr_macro = { workspace = true } base64 = { workspace = true } bcs = { workspace = true } bytes = { workspace = true } +dashmap = { workspace = true } +derivative = { workspace = true } fixed = { workspace = true } fxhash = { workspace = true } hashbrown = { workspace = true } diff --git a/types/src/aggregate_signature.rs b/types/src/aggregate_signature.rs index 3202583b6252d..24ac789da671e 100644 --- a/types/src/aggregate_signature.rs +++ b/types/src/aggregate_signature.rs @@ -86,12 +86,12 @@ impl PartialSignatures { self.signatures.is_empty() } - pub fn remove_signature(&mut self, validator: AccountAddress) { - self.signatures.remove(&validator); + pub fn remove_signature(&mut self, validator: AccountAddress) -> Option { + self.signatures.remove(&validator) } pub fn add_signature(&mut self, validator: AccountAddress, signature: bls12381::Signature) { - self.signatures.entry(validator).or_insert(signature); + self.signatures.insert(validator, signature); } pub fn signatures(&self) -> &BTreeMap { diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs index 60e737a8cc214..9f3aead311f81 100644 --- a/types/src/ledger_info.rs +++ b/types/src/ledger_info.rs @@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize}; use std::{ collections::BTreeMap, fmt::{Display, Formatter}, + mem, ops::{Deref, DerefMut}, sync::Arc, }; @@ -317,26 +318,21 @@ impl LedgerInfoWithV0 { } } -pub enum VerificationStatus { - Verified, - Unverified, -} - /// Contains the ledger info and partially aggregated signature from a set of validators, this data /// is only used during the aggregating the votes from different validators and is not persisted in DB. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct LedgerInfoWithPartialSignatures { +pub struct LedgerInfoWithVerifiedSignatures { ledger_info: LedgerInfo, partial_sigs: PartialSignatures, } -impl Display for LedgerInfoWithPartialSignatures { +impl Display for LedgerInfoWithVerifiedSignatures { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "{}", self.ledger_info) } } -impl LedgerInfoWithPartialSignatures { +impl LedgerInfoWithVerifiedSignatures { pub fn new(ledger_info: LedgerInfo, signatures: PartialSignatures) -> Self { Self { ledger_info, @@ -380,10 +376,19 @@ impl LedgerInfoWithPartialSignatures { } } -/// Contains the ledger info and partially aggregated signature from a set of validators, this data -/// is only used during the aggregating the votes from different validators and is not persisted in DB. +pub enum SignatureWithStatus { + Verified(bls12381::Signature), + Unverified(bls12381::Signature), +} + +/// This data structure is used to support the optimistic signature verification feature. +/// Contains the ledger info and the signatures received on the ledger info from different validators. +/// Some of the signatures could be verified before inserting into this data structure. Some of the signatures +/// are not verified. Rather than verifying the signatures immediately, we aggregate all the signatures and +/// verify the aggregated signature at once. If the aggregated signature is invalid, then we verify each individual +/// unverified signature and remove the invalid signatures. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct LedgerInfoWithMixedSignatures { +pub struct LedgerInfoWithUnverifiedSignatures { ledger_info: LedgerInfo, // These signatures are not yet verified. For efficiency, once enough unverified signatures are collected, // they will be aggregated and verified. @@ -391,13 +396,13 @@ pub struct LedgerInfoWithMixedSignatures { verified_signatures: PartialSignatures, } -impl Display for LedgerInfoWithMixedSignatures { +impl Display for LedgerInfoWithUnverifiedSignatures { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "{}", self.ledger_info) } } -impl LedgerInfoWithMixedSignatures { +impl LedgerInfoWithUnverifiedSignatures { pub fn new(ledger_info: LedgerInfo) -> Self { Self { ledger_info, @@ -427,9 +432,6 @@ impl LedgerInfoWithMixedSignatures { if self.verified_signatures.contains_voter(&validator) { return; } - if self.unverified_signatures.contains_voter(&validator) { - self.unverified_signatures.remove_signature(validator); - } self.unverified_signatures .add_signature(validator, signature); } @@ -437,47 +439,41 @@ impl LedgerInfoWithMixedSignatures { pub fn add_signature( &mut self, validator: AccountAddress, - signature: bls12381::Signature, - verification_status: VerificationStatus, + signature_with_status: SignatureWithStatus, ) { - match verification_status { - VerificationStatus::Verified => self.add_verified_signature(validator, signature), - VerificationStatus::Unverified => self.add_unverified_signature(validator, signature), + match signature_with_status { + SignatureWithStatus::Verified(signature) => { + self.add_verified_signature(validator, signature) + }, + SignatureWithStatus::Unverified(signature) => { + self.add_unverified_signature(validator, signature) + }, }; } pub fn verified_voters(&self) -> Vec<&AccountAddress> { - self.verified_signatures - .signatures() - .keys() - .collect_vec() - .clone() + self.verified_signatures.signatures().keys().collect_vec() } pub fn unverified_voters(&self) -> Vec<&AccountAddress> { - self.unverified_signatures - .signatures() - .keys() - .collect_vec() - .clone() + self.unverified_signatures.signatures().keys().collect_vec() } // Collecting all the authors from verified signatures, unverified signatures and the aggregated signature. - pub fn all_voters(&self) -> Vec { + pub fn all_voters(&self) -> impl Iterator { self.verified_signatures .signatures() .keys() .chain(self.unverified_signatures.signatures().keys()) - .cloned() - .collect() } pub fn check_voting_power( &self, verifier: &ValidatorVerifier, + check_super_majority: bool, ) -> std::result::Result { let all_voters = self.all_voters(); - verifier.check_voting_power(all_voters.iter().collect_vec().into_iter(), true) + verifier.check_voting_power(all_voters, check_super_majority) } // Aggregates all the signatures, verifies the aggregate signature, and returns the aggregate signature. @@ -485,7 +481,7 @@ impl LedgerInfoWithMixedSignatures { &mut self, epoch_state: Arc, ) -> Result { - self.check_voting_power(&epoch_state.verifier)?; + self.check_voting_power(&epoch_state.verifier, true)?; let mut all_signatures = self.verified_signatures.clone(); for (author, signature) in self.unverified_signatures.signatures() { @@ -494,21 +490,25 @@ impl LedgerInfoWithMixedSignatures { let aggregated_sig = epoch_state.verifier.aggregate_signatures(&all_signatures)?; - let (verified_aggregate_signature, malicious_authors) = match epoch_state + match epoch_state .verifier - .clone() .verify_multi_signatures(self.ledger_info(), &aggregated_sig) { Ok(_) => { - for (account_address, signature) in self.unverified_signatures.signatures() { + for (account_address, signature) in + mem::replace(&mut self.unverified_signatures, PartialSignatures::empty()) + .signatures() + { self.verified_signatures .add_signature(*account_address, signature.clone()); } - self.unverified_signatures = PartialSignatures::empty(); - (aggregated_sig, vec![]) + Ok(LedgerInfoWithSignatures::new( + self.ledger_info.clone(), + aggregated_sig, + )) }, Err(_) => { - // Question: Should we assign min tasks per thread here for into_par_iter()? + // Question: How to add counters to keep track of the total time spent in the parallel threads? let verified = self .unverified_signatures .signatures() @@ -519,40 +519,40 @@ impl LedgerInfoWithMixedSignatures { .verify(*account_address, self.ledger_info(), signature) .is_ok() { - return Some((*account_address, signature.clone())); + return Some(*account_address); } None }) .collect::>(); - for (account_address, signature) in verified { - self.verified_signatures - .add_signature(account_address, signature.clone()); - self.unverified_signatures.remove_signature(account_address); + for account_address in verified { + if let Some(signature) = + self.unverified_signatures.remove_signature(account_address) + { + self.verified_signatures + .add_signature(account_address, signature); + } + } + + // For these authors, we will not use optimistic signature verification in the future. + for author in + mem::replace(&mut self.unverified_signatures, PartialSignatures::empty()) + .signatures() + .keys() + { + epoch_state.verifier.add_pessimistic_verify_set(*author); + } + + match self.check_voting_power(&epoch_state.verifier, true) { + Ok(_) => Ok(LedgerInfoWithSignatures::new( + self.ledger_info.clone(), + epoch_state + .verifier + .aggregate_signatures(&self.verified_signatures)?, + )), + Err(e) => Err(e), } - let malicious_authors = self - .unverified_signatures - .signatures() - .keys() - .cloned() - .collect(); - self.unverified_signatures = PartialSignatures::empty(); - - let aggregated_sig = epoch_state - .verifier - .aggregate_signatures(&self.verified_signatures)?; - // epoch_state - // .read() - // .verifier - // .verify_multi_signatures(self.ledger_info(), &aggregated_sig)?; - (aggregated_sig, malicious_authors) }, - }; - epoch_state - .verifier - .add_malicious_authors(malicious_authors); - self.check_voting_power(&epoch_state.verifier).map(|_| { - LedgerInfoWithSignatures::new(self.ledger_info.clone(), verified_aggregate_signature) - }) + } } pub fn ledger_info(&self) -> &LedgerInfo { @@ -686,14 +686,13 @@ mod tests { let epoch_state = Arc::new(EpochState::new(10, validator_verifier.clone())); let mut ledger_info_with_mixed_signatures = - LedgerInfoWithMixedSignatures::new(ledger_info.clone()); + LedgerInfoWithUnverifiedSignatures::new(ledger_info.clone()); let mut partial_sig = PartialSignatures::empty(); ledger_info_with_mixed_signatures.add_signature( validator_signers[0].author(), - validator_signers[0].sign(&ledger_info).unwrap(), - VerificationStatus::Verified, + SignatureWithStatus::Verified(validator_signers[0].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[0].author(), @@ -702,8 +701,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[1].author(), - validator_signers[1].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[1].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[1].author(), @@ -712,8 +710,7 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[2].author(), - validator_signers[2].sign(&ledger_info).unwrap(), - VerificationStatus::Verified, + SignatureWithStatus::Verified(validator_signers[2].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[2].author(), @@ -722,15 +719,14 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[3].author(), - validator_signers[3].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[3].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[3].author(), validator_signers[3].sign(&ledger_info).unwrap(), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -746,7 +742,7 @@ mod tests { 2 ); assert_eq!( - ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier), + ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier, true), Err(VerifyError::TooLittleVotingPower { voting_power: 4, expected_voting_power: 5 @@ -755,11 +751,10 @@ mod tests { ledger_info_with_mixed_signatures.add_signature( validator_signers[4].author(), - bls12381::Signature::dummy_signature(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -776,7 +771,7 @@ mod tests { ); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 5 ); @@ -801,20 +796,19 @@ mod tests { .len(), 4 ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 1); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1); ledger_info_with_mixed_signatures.add_signature( validator_signers[5].author(), - validator_signers[5].sign(&ledger_info).unwrap(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(validator_signers[5].sign(&ledger_info).unwrap()), ); partial_sig.add_signature( validator_signers[5].author(), validator_signers[5].sign(&ledger_info).unwrap(), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); assert_eq!( ledger_info_with_mixed_signatures .unverified_signatures @@ -831,7 +825,7 @@ mod tests { ); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 5 ); @@ -861,18 +855,17 @@ mod tests { .len(), 5 ); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 1); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1); ledger_info_with_mixed_signatures.add_signature( validator_signers[6].author(), - bls12381::Signature::dummy_signature(), - VerificationStatus::Unverified, + SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()), ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 6); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 6); assert_eq!( ledger_info_with_mixed_signatures - .check_voting_power(&validator_verifier) + .check_voting_power(&validator_verifier, true) .unwrap(), 6 ); @@ -896,7 +889,7 @@ mod tests { .len(), 5 ); - assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5); - assert_eq!(epoch_state.verifier.malicious_authors().len(), 2); + assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5); + assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 2); } } diff --git a/types/src/transaction/use_case.rs b/types/src/transaction/use_case.rs index ee72a61b5d964..d947b76874b44 100644 --- a/types/src/transaction/use_case.rs +++ b/types/src/transaction/use_case.rs @@ -18,7 +18,7 @@ impl std::fmt::Debug for UseCaseKey { match self { Platform => write!(f, "PP"), - ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[31..])), + ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[29..])), Others => write!(f, "OO"), } } diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs index 564044649fdd9..dfac27817cfc9 100644 --- a/types/src/validator_verifier.rs +++ b/types/src/validator_verifier.rs @@ -17,13 +17,14 @@ use aptos_crypto::{ hash::CryptoHash, Signature, VerifyingKey, }; -use aptos_infallible::RwLock; +use dashmap::DashSet; +use derivative::Derivative; use itertools::Itertools; #[cfg(any(test, feature = "fuzzing"))] use proptest_derive::Arbitrary; use serde::{Deserialize, Deserializer, Serialize}; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, HashMap}, fmt, sync::Arc, }; @@ -130,7 +131,8 @@ impl TryFrom for ValidatorConsensusInfo { /// Supports validation of signatures for known authors with individual voting powers. This struct /// can be used for all signature verification operations including block and network signature /// verification, respectively. -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, Derivative, Serialize)] +#[derivative(PartialEq, Eq)] pub struct ValidatorVerifier { /// A vector of each validator's on-chain account address to its pubkeys and voting power. validator_infos: Vec, @@ -149,21 +151,10 @@ pub struct ValidatorVerifier { /// submitted bad votes that has resulted in having to verify each vote individually. Further votes by these validators /// will be verified individually bypassing the optimization. #[serde(skip)] - malicious_authors: Arc>>, + #[derivative(PartialEq = "ignore")] + pessimistic_verify_set: Arc>, } -// Implement Eq and PartialEq for ValidatorVerifier. Skip malicious_authors field in the comparison. -impl PartialEq for ValidatorVerifier { - fn eq(&self, other: &Self) -> bool { - self.validator_infos == other.validator_infos - && self.quorum_voting_power == other.quorum_voting_power - && self.total_voting_power == other.total_voting_power - && self.address_to_validator_index == other.address_to_validator_index - } -} - -impl Eq for ValidatorVerifier {} - /// Reconstruct fields from the raw data upon deserialization. impl<'de> Deserialize<'de> for ValidatorVerifier { fn deserialize(deserializer: D) -> Result @@ -200,7 +191,7 @@ impl ValidatorVerifier { quorum_voting_power, total_voting_power, address_to_validator_index, - malicious_authors: Arc::new(RwLock::new(HashSet::new())), + pessimistic_verify_set: Arc::new(DashSet::new()), } } @@ -236,18 +227,12 @@ impl ValidatorVerifier { )) } - pub fn add_malicious_authors(&self, malicious_authors: Vec) { - for author in malicious_authors { - self.malicious_authors.write().insert(author); - } - } - - pub fn malicious_authors(&self) -> HashSet { - self.malicious_authors.read().clone() + pub fn add_pessimistic_verify_set(&self, author: AccountAddress) { + self.pessimistic_verify_set.insert(author); } - pub fn is_malicious_author(&self, author: &AccountAddress) -> bool { - self.malicious_authors.read().contains(author) + pub fn pessimistic_verify_set(&self) -> Arc> { + self.pessimistic_verify_set.clone() } /// Helper method to initialize with a single author and public key with quorum voting power 1.