diff --git a/Cargo.lock b/Cargo.lock index 6c5b3c7e284a7..f90fd7f8daca4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1678,6 +1678,7 @@ dependencies = [ "aptos-logger", "aptos-rest-client", "aptos-retrier", + "aptos-runtimes", "aptos-sdk", "aptos-short-hex-str", "aptos-state-sync-driver", @@ -3945,6 +3946,7 @@ dependencies = [ "aptos-temppath", "aptos-types", "assert_approx_eq", + "async-trait", "bcs 0.1.4", "csv", "futures", @@ -3953,6 +3955,7 @@ dependencies = [ "rand 0.7.3", "reqwest", "tokio", + "tokio-scoped", ] [[package]] @@ -15696,6 +15699,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-scoped" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4beb8ba13bc53ac53ce1d52b42f02e5d8060f0f42138862869beb769722b256" +dependencies = [ + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-stream" version = "0.1.14" diff --git a/crates/transaction-emitter-lib/src/emitter/account_minter.rs b/crates/transaction-emitter-lib/src/emitter/account_minter.rs index a96e91d11eb1d..6b41ddb4cb7cd 100644 --- a/crates/transaction-emitter-lib/src/emitter/account_minter.rs +++ b/crates/transaction-emitter-lib/src/emitter/account_minter.rs @@ -18,6 +18,7 @@ use aptos_sdk::{ use aptos_transaction_generator_lib::{ CounterState, ReliableTransactionSubmitter, RootAccountHandle, SEND_AMOUNT, }; +use aptos_types::account_address::AccountAddress; use core::{ cmp::min, result::Result::{Err, Ok}, @@ -31,7 +32,7 @@ use std::{ }; pub struct SourceAccountManager<'t> { - pub source_account: &'t LocalAccount, + pub source_account: Arc, pub txn_executor: &'t dyn ReliableTransactionSubmitter, pub req: &'t EmitJobRequest, pub txn_factory: TransactionFactory, @@ -43,17 +44,21 @@ impl<'t> RootAccountHandle for SourceAccountManager<'t> { self.check_approve_funds(amount, reason).await.unwrap(); } - fn get_root_account(&self) -> &LocalAccount { - self.source_account + fn get_root_account(&self) -> Arc { + self.source_account.clone() } } impl<'t> SourceAccountManager<'t> { + fn source_account_address(&self) -> AccountAddress { + self.source_account.address() + } + // returns true if we might want to recheck the volume, as it was auto-approved. async fn check_approve_funds(&self, amount: u64, reason: &str) -> Result { let balance = self .txn_executor - .get_account_balance(self.source_account.address()) + .get_account_balance(self.source_account_address()) .await?; Ok(if self.req.mint_to_root { // We have a root account, so amount of funds minted is not a problem @@ -63,7 +68,7 @@ impl<'t> SourceAccountManager<'t> { if balance < amount.checked_mul(100).unwrap_or(u64::MAX / 2) { info!( "Mint account {} current balance is {}, needing {} for {}, minting to refil it fully", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -74,7 +79,7 @@ impl<'t> SourceAccountManager<'t> { } else { info!( "Mint account {} current balance is {}, needing {} for {}. Proceeding without minting, as balance would overflow otherwise", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -85,7 +90,7 @@ impl<'t> SourceAccountManager<'t> { } else { info!( "Source account {} current balance is {}, needed {} coins for {}, or {:.3}% of its balance", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -95,7 +100,7 @@ impl<'t> SourceAccountManager<'t> { if balance < amount { return Err(anyhow!( "Source ({}) doesn't have enough coins, balance {} < needed {} for {}", - self.source_account.address(), + self.source_account_address(), balance, amount, reason @@ -128,7 +133,7 @@ impl<'t> SourceAccountManager<'t> { let txn = self .source_account .sign_with_transaction_builder(self.txn_factory.payload( - aptos_stdlib::aptos_coin_mint(self.source_account.address(), amount), + aptos_stdlib::aptos_coin_mint(self.source_account_address(), amount), )); if let Err(e) = txn_executor.execute_transactions(&[txn]).await { @@ -136,7 +141,7 @@ impl<'t> SourceAccountManager<'t> { // so check on failure if another emitter has refilled it instead let balance = txn_executor - .get_account_balance(self.source_account.address()) + .get_account_balance(self.source_account_address()) .await?; if balance > u64::MAX / 2 { Ok(()) @@ -393,7 +398,7 @@ impl<'t> AccountMinter<'t> { pub async fn create_and_fund_seed_accounts( &mut self, - mut new_source_account: Option, + new_source_account: Option, txn_executor: &dyn ReliableTransactionSubmitter, account_generator: Box, seed_account_num: usize, @@ -407,6 +412,10 @@ impl<'t> AccountMinter<'t> { ); let mut i = 0; let mut seed_accounts = vec![]; + let source_account = match new_source_account { + None => self.source_account.get_root_account().clone(), + Some(param_account) => Arc::new(param_account), + }; while i < seed_account_num { let batch_size = min(max_submit_batch_size, seed_account_num - i); let mut rng = StdRng::from_rng(self.rng()).unwrap(); @@ -418,11 +427,7 @@ impl<'t> AccountMinter<'t> { .iter() .map(|account| { create_and_fund_account_request( - if let Some(account) = &mut new_source_account { - account - } else { - self.source_account.get_root_account() - }, + source_account.clone(), coins_per_seed_account, account.public_key(), txn_factory, @@ -470,16 +475,17 @@ impl<'t> AccountMinter<'t> { coins_for_source: u64, ) -> Result { const NUM_TRIES: usize = 3; + let root_account = self.source_account.get_root_account(); + let root_address = root_account.address(); for i in 0..NUM_TRIES { - self.source_account.get_root_account().set_sequence_number( - txn_executor - .query_sequence_number(self.source_account.get_root_account().address()) - .await?, - ); + { + let new_sequence_number = txn_executor.query_sequence_number(root_address).await?; + root_account.set_sequence_number(new_sequence_number); + } let new_source_account = LocalAccount::generate(self.rng()); let txn = create_and_fund_account_request( - self.source_account.get_root_account(), + root_account.clone(), coins_for_source, new_source_account.public_key(), &self.txn_factory, @@ -530,12 +536,14 @@ async fn create_and_fund_new_accounts( .chunks(max_num_accounts_per_batch) .map(|chunk| chunk.to_vec()) .collect::>(); + let source_address = source_account.address(); + let source_account = Arc::new(source_account); for batch in accounts_by_batch { let creation_requests: Vec<_> = batch .iter() .map(|account| { create_and_fund_account_request( - &source_account, + source_account.clone(), coins_per_new_account, account.public_key(), txn_factory, @@ -546,13 +554,13 @@ async fn create_and_fund_new_accounts( txn_executor .execute_transactions_with_counter(&creation_requests, counters) .await - .with_context(|| format!("Account {} couldn't mint", source_account.address()))?; + .with_context(|| format!("Account {} couldn't mint", source_address))?; } Ok(()) } pub fn create_and_fund_account_request( - creation_account: &LocalAccount, + creation_account: Arc, amount: u64, pubkey: &Ed25519PublicKey, txn_factory: &TransactionFactory, diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs index 9c40a855ed82c..4b31491d81b42 100644 --- a/crates/transaction-emitter-lib/src/emitter/mod.rs +++ b/crates/transaction-emitter-lib/src/emitter/mod.rs @@ -17,7 +17,7 @@ use crate::emitter::{ use again::RetryPolicy; use anyhow::{ensure, format_err, Result}; use aptos_config::config::DEFAULT_MAX_SUBMIT_TRANSACTION_BATCH_SIZE; -use aptos_logger::{debug, error, info, sample, sample::SampleRate, warn}; +use aptos_logger::{error, info, sample, sample::SampleRate, warn}; use aptos_rest_client::{aptos_api_types::AptosErrorCode, error::RestError, Client as RestClient}; use aptos_sdk::{ move_types::account_address::AccountAddress, @@ -649,7 +649,7 @@ impl EmitJob { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct TxnEmitter { txn_factory: TransactionFactory, rng: StdRng, @@ -673,7 +673,7 @@ impl TxnEmitter { pub async fn start_job( &mut self, - root_account: &LocalAccount, + root_account: Arc, req: EmitJobRequest, stats_tracking_phases: usize, ) -> Result { @@ -708,7 +708,7 @@ impl TxnEmitter { let account_generator = create_account_generator(req.account_type); let mut all_accounts = create_accounts( - root_account, + root_account.clone(), &init_txn_factory, account_generator, &req, @@ -730,7 +730,7 @@ impl TxnEmitter { retry_after: req.init_retry_interval, }; let source_account_manager = SourceAccountManager { - source_account: root_account, + source_account: root_account.clone(), txn_executor: &txn_executor, req: &req, txn_factory: init_txn_factory.clone(), @@ -819,7 +819,7 @@ impl TxnEmitter { async fn emit_txn_for_impl( mut self, - source_account: &LocalAccount, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, print_stats_interval: Option, @@ -855,7 +855,7 @@ impl TxnEmitter { pub async fn emit_txn_for( self, - source_account: &mut LocalAccount, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, ) -> Result { @@ -865,7 +865,7 @@ impl TxnEmitter { pub async fn emit_txn_for_with_stats( self, - source_account: &LocalAccount, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, interval_secs: u64, @@ -982,78 +982,6 @@ async fn wait_for_accounts_sequence( (latest_fetched_counts, sum_of_completion_timestamps_millis) } -fn update_seq_num_and_get_num_expired( - accounts: &mut [LocalAccount], - account_to_start_and_end_seq_num: HashMap, - latest_fetched_counts: HashMap, -) -> (usize, usize) { - accounts.iter_mut().for_each(|account| { - let (start_seq_num, end_seq_num) = - if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { - pair - } else { - return; - }; - assert!(account.sequence_number() == *end_seq_num); - - match latest_fetched_counts.get(&account.address()) { - Some(count) => { - if *count != account.sequence_number() { - assert!(account.sequence_number() > *count); - debug!( - "Stale sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - count - ); - account.set_sequence_number(*count); - } - }, - None => { - debug!( - "Couldn't fetch sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - start_seq_num - ); - account.set_sequence_number(*start_seq_num); - }, - } - }); - - account_to_start_and_end_seq_num - .iter() - .map( - |(address, (start_seq_num, end_seq_num))| match latest_fetched_counts.get(address) { - Some(count) => { - assert!( - *count <= *end_seq_num, - "{address} :: {count} > {end_seq_num}" - ); - if *count >= *start_seq_num { - ( - (*count - *start_seq_num) as usize, - (*end_seq_num - *count) as usize, - ) - } else { - debug!( - "Stale sequence_number fetched for {}, start_seq_num {}, fetched {}", - address, start_seq_num, *count - ); - (0, (*end_seq_num - *start_seq_num) as usize) - } - }, - None => (0, (end_seq_num - start_seq_num) as usize), - }, - ) - .fold( - (0, 0), - |(committed, expired), (cur_committed, cur_expired)| { - (committed + cur_committed, expired + cur_expired) - }, - ) -} - pub async fn query_sequence_number(client: &RestClient, address: AccountAddress) -> Result { Ok(query_sequence_numbers(client, [address].iter()).await?.0[0].1) } @@ -1133,7 +1061,7 @@ pub fn parse_seed(seed_string: &str) -> [u8; 32] { } pub async fn create_accounts( - root_account: &LocalAccount, + root_account: Arc, txn_factory: &TransactionFactory, account_generator: Box, req: &EmitJobRequest, diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs index 0f636147ada78..ac9de081ee507 100644 --- a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs +++ b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs @@ -4,11 +4,11 @@ use crate::{ emitter::{ stats::{DynamicStatsTracking, StatsAccumulator}, - update_seq_num_and_get_num_expired, wait_for_accounts_sequence, + wait_for_accounts_sequence, }, EmitModeParams, }; -use aptos_logger::{info, sample, sample::SampleRate, warn}; +use aptos_logger::{debug, info, sample, sample::SampleRate, warn}; use aptos_rest_client::Client as RestClient; use aptos_sdk::{ move_types::account_address::AccountAddress, @@ -25,6 +25,7 @@ use futures::future::join_all; use itertools::Itertools; use rand::seq::IteratorRandom; use std::{ + borrow::Borrow, collections::HashMap, sync::{atomic::AtomicU64, Arc}, time::Instant, @@ -32,7 +33,7 @@ use std::{ use tokio::time::sleep; pub struct SubmissionWorker { - pub(crate) accounts: Vec, + pub(crate) accounts: Vec>, client: RestClient, stop: Arc, params: EmitModeParams, @@ -55,6 +56,7 @@ impl SubmissionWorker { skip_latency_stats: bool, rng: ::rand::rngs::StdRng, ) -> Self { + let accounts = accounts.into_iter().map(Arc::new).collect(); Self { accounts, client, @@ -200,6 +202,9 @@ impl SubmissionWorker { } self.accounts + .into_iter() + .map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap()) + .collect() } // returns true if it returned early @@ -243,11 +248,15 @@ impl SubmissionWorker { ) .await; - let (num_committed, num_expired) = update_seq_num_and_get_num_expired( - &mut self.accounts, - account_to_start_and_end_seq_num, - latest_fetched_counts, - ); + for account in self.accounts.iter_mut() { + update_account_seq_num( + Arc::get_mut(account).unwrap(), + &account_to_start_and_end_seq_num, + &latest_fetched_counts, + ); + } + let (num_committed, num_expired) = + count_committed_expired_stats(account_to_start_and_end_seq_num, latest_fetched_counts); if num_expired > 0 { loop_stats @@ -306,12 +315,87 @@ impl SubmissionWorker { .into_iter() .flat_map(|account| { self.txn_generator - .generate_transactions(account, self.params.transactions_per_account) + .generate_transactions(account.borrow(), self.params.transactions_per_account) }) .collect() } } +fn update_account_seq_num( + account: &mut LocalAccount, + account_to_start_and_end_seq_num: &HashMap, + latest_fetched_counts: &HashMap, +) { + let (start_seq_num, end_seq_num) = + if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { + pair + } else { + return; + }; + assert!(account.sequence_number() == *end_seq_num); + + match latest_fetched_counts.get(&account.address()) { + Some(count) => { + if *count != account.sequence_number() { + assert!(account.sequence_number() > *count); + debug!( + "Stale sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + count + ); + account.set_sequence_number(*count); + } + }, + None => { + debug!( + "Couldn't fetch sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + start_seq_num + ); + account.set_sequence_number(*start_seq_num); + }, + } +} + +fn count_committed_expired_stats( + account_to_start_and_end_seq_num: HashMap, + latest_fetched_counts: HashMap, +) -> (usize, usize) { + account_to_start_and_end_seq_num + .iter() + .map( + |(address, (start_seq_num, end_seq_num))| match latest_fetched_counts.get(address) { + Some(count) => { + assert!( + *count <= *end_seq_num, + "{address} :: {count} > {end_seq_num}" + ); + if *count >= *start_seq_num { + ( + (*count - *start_seq_num) as usize, + (*end_seq_num - *count) as usize, + ) + } else { + debug!( + "Stale sequence_number fetched for {}, start_seq_num {}, fetched {}", + address, start_seq_num, *count + ); + (0, (*end_seq_num - *start_seq_num) as usize) + } + }, + None => (0, (end_seq_num - start_seq_num) as usize), + }, + ) + .fold( + (0, 0), + |(committed, expired), (cur_committed, cur_expired)| { + (committed + cur_committed, expired + cur_expired) + }, + ) +} + pub async fn submit_transactions( client: &RestClient, txns: &[SignedTransaction], diff --git a/crates/transaction-emitter-lib/src/wrappers.rs b/crates/transaction-emitter-lib/src/wrappers.rs index 9f6659e5e9b98..813c4a1ae07dc 100644 --- a/crates/transaction-emitter-lib/src/wrappers.rs +++ b/crates/transaction-emitter-lib/src/wrappers.rs @@ -17,7 +17,10 @@ use aptos_logger::{error, info}; use aptos_sdk::transaction_builder::TransactionFactory; use aptos_transaction_generator_lib::{args::TransactionTypeArg, WorkflowProgress}; use rand::{rngs::StdRng, Rng, SeedableRng}; -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; pub async fn emit_transactions( cluster_args: &ClusterArgs, @@ -157,9 +160,10 @@ pub async fn emit_transactions_with_cluster( emit_job_request = emit_job_request.skip_minting_accounts(); } + let coin_source_account = std::sync::Arc::new(coin_source_account); let stats = emitter .emit_txn_for_with_stats( - &coin_source_account, + coin_source_account, emit_job_request, duration, (args.duration / 10).clamp(1, 10), @@ -177,6 +181,7 @@ pub async fn create_accounts_command( .context("Failed to build cluster")?; let client = cluster.random_instance().rest_client(); let coin_source_account = cluster.load_coin_source_account(&client).await?; + let coin_source_account = Arc::new(coin_source_account); let txn_factory = TransactionFactory::new(cluster.chain_id) .with_transaction_expiration_time(60) .with_max_gas_amount(create_accounts_args.max_gas_per_txn); @@ -194,7 +199,7 @@ pub async fn create_accounts_command( }; create_accounts( - &coin_source_account, + coin_source_account, &txn_factory, Box::new(PrivateKeyAccountGenerator), &emit_job_request, diff --git a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs index 06aab2e1f8577..1dee749e5f019 100644 --- a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs +++ b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs @@ -40,17 +40,16 @@ impl TransactionGenerator for AccountsPoolWrapperGenerator { _account: &LocalAccount, num_to_create: usize, ) -> Vec { - let mut accounts_to_use = + let accounts_to_use = self.source_accounts_pool .take_from_pool(num_to_create, true, &mut self.rng); if accounts_to_use.is_empty() { return Vec::new(); } let txns = accounts_to_use - .iter_mut() + .iter() .flat_map(|account| self.generator.generate_transactions(account, 1)) .collect(); - if let Some(destination_accounts_pool) = &self.destination_accounts_pool { destination_accounts_pool.add_to_pool(accounts_to_use); } diff --git a/crates/transaction-generator-lib/src/call_custom_modules.rs b/crates/transaction-generator-lib/src/call_custom_modules.rs index b540478e28966..bcdaafd567dc7 100644 --- a/crates/transaction-generator-lib/src/call_custom_modules.rs +++ b/crates/transaction-generator-lib/src/call_custom_modules.rs @@ -13,7 +13,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; -use std::sync::Arc; +use std::{borrow::Borrow, sync::Arc}; // Fn + Send + Sync, as it will be called from multiple threads simultaneously // if you need any coordination, use Arc> fields @@ -220,7 +220,7 @@ impl CustomModulesDelegationGeneratorCreator { let publisher = LocalAccount::generate(&mut rng); let publisher_address = publisher.address(); requests_create.push(create_account_transaction( - root_account.get_root_account(), + root_account.get_root_account().borrow(), publisher_address, &init_txn_factory, publisher_balance, diff --git a/crates/transaction-generator-lib/src/entry_points.rs b/crates/transaction-generator-lib/src/entry_points.rs index 1b4dd4927cac3..6c5f5a014243e 100644 --- a/crates/transaction-generator-lib/src/entry_points.rs +++ b/crates/transaction-generator-lib/src/entry_points.rs @@ -17,7 +17,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::rngs::StdRng; -use std::sync::Arc; +use std::{borrow::Borrow, sync::Arc}; pub struct EntryPointTransactionGenerator { pub entry_point: EntryPoints, @@ -75,7 +75,7 @@ impl UserModuleTransactionGenerator for EntryPointTransactionGenerator { .iter() .map(|to| { create_account_transaction( - root_account.get_root_account(), + root_account.get_root_account().borrow(), to.address(), txn_factory, 0, diff --git a/crates/transaction-generator-lib/src/lib.rs b/crates/transaction-generator-lib/src/lib.rs index 364c6f8421eef..89805cb70a481 100644 --- a/crates/transaction-generator-lib/src/lib.rs +++ b/crates/transaction-generator-lib/src/lib.rs @@ -213,15 +213,15 @@ impl CounterState { pub trait RootAccountHandle: Send + Sync { async fn approve_funds(&self, amount: u64, reason: &str); - fn get_root_account(&self) -> &LocalAccount; + fn get_root_account(&self) -> Arc; } -pub struct AlwaysApproveRootAccountHandle<'t> { - pub root_account: &'t LocalAccount, +pub struct AlwaysApproveRootAccountHandle { + pub root_account: Arc, } #[async_trait::async_trait] -impl<'t> RootAccountHandle for AlwaysApproveRootAccountHandle<'t> { +impl RootAccountHandle for AlwaysApproveRootAccountHandle { async fn approve_funds(&self, amount: u64, reason: &str) { println!( "Consuming funds from root/source account: up to {} for {}", @@ -229,8 +229,8 @@ impl<'t> RootAccountHandle for AlwaysApproveRootAccountHandle<'t> { ); } - fn get_root_account(&self) -> &LocalAccount { - self.root_account + fn get_root_account(&self) -> Arc { + self.root_account.clone() } } diff --git a/execution/executor-benchmark/src/lib.rs b/execution/executor-benchmark/src/lib.rs index fc98e217a3f5b..fe1cca4d748f6 100644 --- a/execution/executor-benchmark/src/lib.rs +++ b/execution/executor-benchmark/src/lib.rs @@ -124,8 +124,8 @@ pub fn run_benchmark( config.storage.storage_pruner_config = pruner_config; config.storage.rocksdb_configs.enable_storage_sharding = enable_storage_sharding; let (db, executor) = init_db_and_executor::(&config); - - let mut root_account = TransactionGenerator::read_root_account(genesis_key, &db); + let root_account = TransactionGenerator::read_root_account(genesis_key, &db); + let root_account = Arc::new(root_account); let transaction_generators = transaction_mix.clone().map(|transaction_mix| { let num_existing_accounts = TransactionGenerator::read_meta(&source_dir); let num_accounts_to_be_loaded = std::cmp::min( @@ -153,7 +153,7 @@ pub fn run_benchmark( let (transaction_generator_creator, phase) = init_workload::( transaction_mix, - &mut root_account, + root_account.clone(), main_signer_accounts, burner_accounts, db.clone(), @@ -187,6 +187,7 @@ pub fn run_benchmark( } } } + let root_account = Arc::into_inner(root_account).unwrap(); let mut generator = TransactionGenerator::new_with_existing_db( db.clone(), root_account, @@ -247,7 +248,7 @@ pub fn run_benchmark( fn init_workload( transaction_mix: Vec<(TransactionType, usize)>, - root_account: &mut LocalAccount, + root_account: Arc, mut main_signer_accounts: Vec, burner_accounts: Vec, db: DbReaderWriter, diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index c4131fb3acc97..e25527f5af7bd 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -60,6 +60,7 @@ use aptos_testcases::{ validator_reboot_stress_test::ValidatorRebootStressTest, CompositeNetworkTest, }; +use async_trait::async_trait; use clap::{Parser, Subcommand}; use futures::stream::{FuturesUnordered, StreamExt}; use once_cell::sync::Lazy; @@ -67,6 +68,7 @@ use rand::{rngs::ThreadRng, seq::SliceRandom, Rng}; use std::{ env, num::NonZeroUsize, + ops::DerefMut, path::{Path, PathBuf}, process, sync::{ @@ -2660,18 +2662,19 @@ impl Test for RestartValidator { } } +#[async_trait] impl NetworkTest for RestartValidator { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let runtime = Runtime::new()?; - runtime.block_on(async { - let node = ctx.swarm().validators_mut().next().unwrap(); - node.health_check().await.expect("node health check failed"); - node.stop().await.unwrap(); - println!("Restarting node {}", node.peer_id()); - node.start().await.unwrap(); - tokio::time::sleep(Duration::from_secs(1)).await; - node.health_check().await.expect("node health check failed"); - }); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let swarm = ctx.swarm.read().await; + let node = swarm.validators().next().unwrap(); + node.health_check().await.expect("node health check failed"); + node.stop().await.unwrap(); + println!("Restarting node {}", node.peer_id()); + node.start().await.unwrap(); + tokio::time::sleep(Duration::from_secs(1)).await; + node.health_check().await.expect("node health check failed"); Ok(()) } } @@ -2685,17 +2688,23 @@ impl Test for EmitTransaction { } } +#[async_trait] impl NetworkTest for EmitTransaction { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let duration = Duration::from_secs(10); let all_validators = ctx - .swarm() + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration).unwrap(); + let stats = generate_traffic(ctx, &all_validators, duration) + .await + .unwrap(); ctx.report.report_txn_stats(self.name().to_string(), &stats); - Ok(()) } } @@ -2717,10 +2726,11 @@ impl Test for Delay { } } +#[async_trait] impl NetworkTest for Delay { - fn run(&self, _ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, _ctx: NetworkContextSynchronizer<'a>) -> Result<()> { info!("forge sleep {}", self.seconds); - std::thread::sleep(Duration::from_secs(self.seconds)); + tokio::time::sleep(Duration::from_secs(self.seconds)).await; Ok(()) } } @@ -2734,10 +2744,12 @@ impl Test for GatherMetrics { } } +#[async_trait] impl NetworkTest for GatherMetrics { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let runtime = ctx.runtime.handle(); - runtime.block_on(gather_metrics_one(ctx)); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + gather_metrics_one(ctx).await; Ok(()) } } @@ -2749,14 +2761,17 @@ async fn gather_metrics_one(ctx: &NetworkContext<'_>) { let now = chrono::prelude::Utc::now() .format("%Y%m%d_%H%M%S") .to_string(); - for val in ctx.swarm.validators() { - let mut url = val.inspection_service_endpoint(); - let valname = val.peer_id().to_string(); - url.set_path("metrics"); - let fname = format!("{}.{}.metrics", now, valname); - let outpath: PathBuf = outdir.join(fname); - let th = handle.spawn(gather_metrics_to_file(url, outpath)); - gets.push(th); + { + let swarm = ctx.swarm.read().await; + for val in swarm.validators() { + let mut url = val.inspection_service_endpoint(); + let valname = val.peer_id().to_string(); + url.set_path("metrics"); + let fname = format!("{}.{}.metrics", now, valname); + let outpath: PathBuf = outdir.join(fname); + let th = handle.spawn(gather_metrics_to_file(url, outpath)); + gets.push(th); + } } // join all the join handles while !gets.is_empty() { diff --git a/testsuite/forge/Cargo.toml b/testsuite/forge/Cargo.toml index 8e18c496f73e5..caa3c1f34ba78 100644 --- a/testsuite/forge/Cargo.toml +++ b/testsuite/forge/Cargo.toml @@ -28,6 +28,7 @@ aptos-inspection-service = { workspace = true } aptos-logger = { workspace = true } aptos-rest-client = { workspace = true } aptos-retrier = { workspace = true } +aptos-runtimes = { workspace = true } aptos-sdk = { workspace = true } aptos-short-hex-str = { workspace = true } aptos-state-sync-driver = { workspace = true } diff --git a/testsuite/forge/src/backend/k8s/fullnode.rs b/testsuite/forge/src/backend/k8s/fullnode.rs index f7537e3a492cb..bbfad70651728 100644 --- a/testsuite/forge/src/backend/k8s/fullnode.rs +++ b/testsuite/forge/src/backend/k8s/fullnode.rs @@ -35,7 +35,7 @@ use std::{ env, net::{Ipv4Addr, SocketAddr, SocketAddrV4}, path::PathBuf, - sync::Arc, + sync::{atomic::AtomicU32, Arc}, }; use tempfile::TempDir; @@ -504,7 +504,7 @@ pub async fn install_public_fullnode<'a>( haproxy_enabled: false, port_forward_enabled: use_port_forward, - rest_api_port: REST_API_SERVICE_PORT, // in the case of port-forward, this port will be changed at runtime + rest_api_port: AtomicU32::new(REST_API_SERVICE_PORT), // in the case of port-forward, this port will be changed at runtime }; Ok((node_peer_id, ret_node)) diff --git a/testsuite/forge/src/backend/k8s/node.rs b/testsuite/forge/src/backend/k8s/node.rs index 582da1ae89165..35e76ba93e99f 100644 --- a/testsuite/forge/src/backend/k8s/node.rs +++ b/testsuite/forge/src/backend/k8s/node.rs @@ -20,6 +20,7 @@ use std::{ fmt::{Debug, Formatter}, process::{Command, Stdio}, str::FromStr, + sync::atomic::{AtomicU32, Ordering}, thread, time::{Duration, Instant}, }; @@ -32,7 +33,7 @@ pub struct K8sNode { pub(crate) peer_id: PeerId, pub(crate) index: usize, pub(crate) service_name: String, - pub(crate) rest_api_port: u32, + pub(crate) rest_api_port: AtomicU32, pub version: Version, pub namespace: String, // whether this node has HAProxy in front of it @@ -43,7 +44,7 @@ pub struct K8sNode { impl K8sNode { fn rest_api_port(&self) -> u32 { - self.rest_api_port + self.rest_api_port.load(Ordering::SeqCst) } fn service_name(&self) -> String { @@ -133,19 +134,19 @@ impl Node for K8sNode { self.peer_id } - async fn start(&mut self) -> Result<()> { + async fn start(&self) -> Result<()> { scale_stateful_set_replicas(self.stateful_set_name(), self.namespace(), 1).await?; // need to port-forward again since the node is coming back // note that we will get a new port if self.port_forward_enabled { - self.rest_api_port = get_free_port(); + self.rest_api_port.store(get_free_port(), Ordering::SeqCst); self.port_forward_rest_api()?; } self.wait_until_healthy(Instant::now() + Duration::from_secs(60)) .await } - async fn stop(&mut self) -> Result<()> { + async fn stop(&self) -> Result<()> { info!("going to stop node {}", self.stateful_set_name()); scale_stateful_set_replicas(self.stateful_set_name(), self.namespace(), 0).await } @@ -164,7 +165,7 @@ impl Node for K8sNode { .expect("Invalid URL.") } - async fn clear_storage(&mut self) -> Result<()> { + async fn clear_storage(&self) -> Result<()> { // Remove all storage files let ledger_db_path = format!("{}/db/{}", APTOS_DATA_DIR, LEDGER_DB_NAME); let state_db_path = format!("{}/db/{}", APTOS_DATA_DIR, STATE_MERKLE_DB_NAME); @@ -236,7 +237,7 @@ impl Node for K8sNode { Ok(port as u64) } - async fn health_check(&mut self) -> Result<(), HealthCheckError> { + async fn health_check(&self) -> Result<(), HealthCheckError> { self.rest_client() .get_ledger_information() .await @@ -256,11 +257,11 @@ impl Node for K8sNode { .unwrap() } - async fn get_identity(&mut self) -> Result { + async fn get_identity(&self) -> Result { stateful_set::get_identity(self.stateful_set_name(), self.namespace()).await } - async fn set_identity(&mut self, k8s_secret_name: String) -> Result<()> { + async fn set_identity(&self, k8s_secret_name: String) -> Result<()> { stateful_set::set_identity( self.stateful_set_name(), self.namespace(), diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index d4cc6512f2a53..15c51cd8b0318 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -40,14 +40,15 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, convert::TryFrom, env, str, - sync::Arc, + sync::{atomic::AtomicU32, Arc}, }; +// use std::sync::Mutex; use tokio::{runtime::Runtime, time::Duration}; pub struct K8sSwarm { validators: HashMap, fullnodes: HashMap, - root_account: LocalAccount, + root_account: Arc, kube_client: K8sClient, versions: Arc>, pub chain_id: ChainId, @@ -86,6 +87,7 @@ impl K8sSwarm { ) })?; let root_account = LocalAccount::new(address, account_key, sequence_number); + let root_account = Arc::new(root_account); let mut versions = HashMap::new(); let cur_version = Version::new(0, image_tag.to_string()); @@ -178,7 +180,7 @@ impl K8sSwarm { self.get_kube_client(), Some(self.kube_namespace.clone()), )); - let (peer_id, mut k8snode) = install_public_fullnode( + let (peer_id, k8snode) = install_public_fullnode( stateful_set_api, configmap_api, persistent_volume_claim_api, @@ -201,7 +203,7 @@ impl K8sSwarm { #[async_trait::async_trait] impl Swarm for K8sSwarm { - async fn health_check(&mut self) -> Result<()> { + async fn health_check(&self) -> Result<()> { let nodes = self.validators.values().collect(); let unhealthy_nodes = nodes_healthcheck(nodes).await.unwrap(); if !unhealthy_nodes.is_empty() { @@ -221,26 +223,10 @@ impl Swarm for K8sSwarm { Box::new(validators.into_iter()) } - fn validators_mut<'a>(&'a mut self) -> Box + 'a> { - let mut validators: Vec<_> = self - .validators - .values_mut() - .map(|v| v as &'a mut dyn Validator) - .collect(); - validators.sort_by_key(|v| v.index()); - Box::new(validators.into_iter()) - } - fn validator(&self, id: PeerId) -> Option<&dyn Validator> { self.validators.get(&id).map(|v| v as &dyn Validator) } - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator> { - self.validators - .get_mut(&id) - .map(|v| v as &mut dyn Validator) - } - /// TODO: this should really be a method on Node rather than Swarm async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()> { let validator = self @@ -281,24 +267,10 @@ impl Swarm for K8sSwarm { Box::new(full_nodes.into_iter()) } - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a> { - let mut full_nodes: Vec<_> = self - .fullnodes - .values_mut() - .map(|n| n as &'a mut dyn FullNode) - .collect(); - full_nodes.sort_by_key(|n| n.index()); - Box::new(full_nodes.into_iter()) - } - fn full_node(&self, id: PeerId) -> Option<&dyn FullNode> { self.fullnodes.get(&id).map(|v| v as &dyn FullNode) } - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode> { - self.fullnodes.get_mut(&id).map(|v| v as &mut dyn FullNode) - } - fn add_validator(&mut self, _version: &Version, _template: NodeConfig) -> Result { todo!() } @@ -337,11 +309,11 @@ impl Swarm for K8sSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo<'_> { + fn chain_info(&self) -> ChainInfo { let rest_api_url = self.get_rest_api_url(0); let inspection_service_url = self.get_inspection_service_url(0); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, @@ -457,11 +429,11 @@ impl Swarm for K8sSwarm { bail!("No prom client"); } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_> { + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo { let rest_api_url = self.get_rest_api_url(idx); let inspection_service_url = self.get_inspection_service_url(idx); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, @@ -570,7 +542,7 @@ fn get_k8s_node_from_stateful_set( peer_id: PeerId::random(), index, service_name, - rest_api_port, + rest_api_port: AtomicU32::new(rest_api_port), version: Version::new(0, image_tag), namespace: namespace.to_string(), haproxy_enabled: enable_haproxy, diff --git a/testsuite/forge/src/backend/local/node.rs b/testsuite/forge/src/backend/local/node.rs index 92816a780218c..d0beef57a2746 100644 --- a/testsuite/forge/src/backend/local/node.rs +++ b/testsuite/forge/src/backend/local/node.rs @@ -50,7 +50,7 @@ impl Drop for Process { #[derive(Debug)] pub struct LocalNode { version: LocalVersion, - process: Option, + process: std::sync::Mutex>, name: String, index: usize, account_private_key: Option>, @@ -81,7 +81,7 @@ impl LocalNode { Ok(Self { version, - process: None, + process: std::sync::Mutex::new(None), name, index, account_private_key, @@ -115,8 +115,13 @@ impl LocalNode { &self.account_private_key } - pub fn start(&mut self) -> Result<()> { - ensure!(self.process.is_none(), "node {} already running", self.name); + pub fn start(&self) -> Result<()> { + let mut process_locker = self.process.lock().unwrap(); + ensure!( + process_locker.is_none(), + "node {} already running", + self.name + ); // Ensure log file exists let log_file = OpenOptions::new() @@ -171,13 +176,13 @@ impl LocalNode { self.config.storage.backup_service_address.port() ); - self.process = Some(Process(process)); + *process_locker = Some(Process(process)); Ok(()) } - pub fn stop(&mut self) { - self.process = None; + pub fn stop(&self) { + *(self.process.lock().unwrap()) = None; } pub fn port(&self) -> u16 { @@ -206,28 +211,32 @@ impl LocalNode { fs::read_to_string(self.log_path()).map_err(Into::into) } - pub async fn health_check(&mut self) -> Result<(), HealthCheckError> { + pub async fn health_check(&self) -> Result<(), HealthCheckError> { debug!("Health check on node '{}'", self.name); - if let Some(p) = &mut self.process { - match p.0.try_wait() { - // This would mean the child process has crashed - Ok(Some(status)) => { - let error = format!("Node '{}' crashed with: {}", self.name, status); - return Err(HealthCheckError::NotRunning(error)); - }, - - // This is the case where the node is still running - Ok(None) => {}, - - // Some other unknown error - Err(e) => { - return Err(HealthCheckError::Unknown(e.into())); - }, + { + let mut process_locker = self.process.lock().unwrap(); + let process = process_locker.as_mut(); + if let Some(p) = process { + match p.0.try_wait() { + // This would mean the child process has crashed + Ok(Some(status)) => { + let error = format!("Node '{}' crashed with: {}", self.name, status); + return Err(HealthCheckError::NotRunning(error)); + }, + + // This is the case where the node is still running + Ok(None) => {}, + + // Some other unknown error + Err(e) => { + return Err(HealthCheckError::Unknown(e.into())); + }, + } + } else { + let error = format!("Node '{}' is stopped", self.name); + return Err(HealthCheckError::NotRunning(error)); } - } else { - let error = format!("Node '{}' is stopped", self.name); - return Err(HealthCheckError::NotRunning(error)); } self.inspection_client() @@ -280,24 +289,24 @@ impl Node for LocalNode { self.config() } - async fn start(&mut self) -> Result<()> { + async fn start(&self) -> Result<()> { self.start() } - async fn stop(&mut self) -> Result<()> { + async fn stop(&self) -> Result<()> { self.stop(); Ok(()) } - async fn get_identity(&mut self) -> Result { + async fn get_identity(&self) -> Result { todo!() } - async fn set_identity(&mut self, _k8s_secret_name: String) -> Result<()> { + async fn set_identity(&self, _k8s_secret_name: String) -> Result<()> { todo!() } - async fn clear_storage(&mut self) -> Result<()> { + async fn clear_storage(&self) -> Result<()> { // Remove all storage files (i.e., blockchain data, consensus data and state sync data) let node_config = self.config(); let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME); @@ -354,7 +363,7 @@ impl Node for LocalNode { Ok(()) } - async fn health_check(&mut self) -> Result<(), HealthCheckError> { + async fn health_check(&self) -> Result<(), HealthCheckError> { self.health_check().await } diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 4e8905c2a3305..cb1f8ba2989f7 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -96,7 +96,7 @@ pub struct LocalSwarm { fullnodes: HashMap, public_networks: HashMap, dir: SwarmDirectory, - root_account: LocalAccount, + root_account: Arc, chain_id: ChainId, root_key: ConfigKey, @@ -245,6 +245,7 @@ impl LocalSwarm { AccountKey::from_private_key(root_key.private_key()), 0, ); + let root_account = Arc::new(root_account); Ok(LocalSwarm { node_name_counter: validators.len(), @@ -367,7 +368,7 @@ impl LocalSwarm { )?; let version = self.versions.get(version).unwrap(); - let mut fullnode = LocalNode::new( + let fullnode = LocalNode::new( version.to_owned(), fullnode_config.name, index, @@ -397,7 +398,7 @@ impl LocalSwarm { )?; let version = self.versions.get(version).unwrap(); - let mut fullnode = LocalNode::new( + let fullnode = LocalNode::new( version.to_owned(), fullnode_config.name, index, @@ -477,7 +478,7 @@ impl Drop for LocalSwarm { #[async_trait::async_trait] impl Swarm for LocalSwarm { - async fn health_check(&mut self) -> Result<()> { + async fn health_check(&self) -> Result<()> { Ok(()) } @@ -491,26 +492,10 @@ impl Swarm for LocalSwarm { Box::new(validators.into_iter()) } - fn validators_mut<'a>(&'a mut self) -> Box + 'a> { - let mut validators: Vec<_> = self - .validators - .values_mut() - .map(|v| v as &'a mut dyn Validator) - .collect(); - validators.sort_by_key(|v| v.index()); - Box::new(validators.into_iter()) - } - fn validator(&self, id: PeerId) -> Option<&dyn Validator> { self.validators.get(&id).map(|v| v as &dyn Validator) } - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator> { - self.validators - .get_mut(&id) - .map(|v| v as &mut dyn Validator) - } - async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()> { let version = self .versions @@ -534,24 +519,10 @@ impl Swarm for LocalSwarm { Box::new(full_nodes.into_iter()) } - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a> { - let mut full_nodes: Vec<_> = self - .fullnodes - .values_mut() - .map(|v| v as &'a mut dyn FullNode) - .collect(); - full_nodes.sort_by_key(|n| n.index()); - Box::new(full_nodes.into_iter()) - } - fn full_node(&self, id: PeerId) -> Option<&dyn FullNode> { self.fullnodes.get(&id).map(|v| v as &dyn FullNode) } - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode> { - self.fullnodes.get_mut(&id).map(|v| v as &mut dyn FullNode) - } - fn add_validator(&mut self, _version: &Version, _template: NodeConfig) -> Result { todo!() } @@ -578,7 +549,7 @@ impl Swarm for LocalSwarm { } fn remove_full_node(&mut self, id: PeerId) -> Result<()> { - if let Some(mut fullnode) = self.fullnodes.remove(&id) { + if let Some(fullnode) = self.fullnodes.remove(&id) { fullnode.stop(); } @@ -589,7 +560,7 @@ impl Swarm for LocalSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo<'_> { + fn chain_info(&self) -> ChainInfo { let rest_api_url = self .validators() .next() @@ -604,7 +575,7 @@ impl Swarm for LocalSwarm { .to_string(); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, @@ -655,7 +626,7 @@ impl Swarm for LocalSwarm { todo!() } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_> { + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo { let rest_api_url = self .validators() .nth(idx) @@ -669,7 +640,7 @@ impl Swarm for LocalSwarm { .inspection_service_endpoint() .to_string(); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, diff --git a/testsuite/forge/src/interface/admin.rs b/testsuite/forge/src/interface/admin.rs index 50d726ced5a6a..0abbbe87039cf 100644 --- a/testsuite/forge/src/interface/admin.rs +++ b/testsuite/forge/src/interface/admin.rs @@ -20,12 +20,12 @@ pub trait AdminTest: Test { pub struct AdminContext<'t> { core: CoreContext, - chain_info: ChainInfo<'t>, + chain_info: ChainInfo, pub report: &'t mut TestReport, } impl<'t> AdminContext<'t> { - pub fn new(core: CoreContext, chain_info: ChainInfo<'t>, report: &'t mut TestReport) -> Self { + pub fn new(core: CoreContext, chain_info: ChainInfo, report: &'t mut TestReport) -> Self { Self { core, chain_info, @@ -45,7 +45,7 @@ impl<'t> AdminContext<'t> { RestClient::new(Url::parse(self.chain_info.rest_api()).unwrap()) } - pub fn chain_info(&mut self) -> &mut ChainInfo<'t> { + pub fn chain_info(&mut self) -> &mut ChainInfo { &mut self.chain_info } diff --git a/testsuite/forge/src/interface/aptos.rs b/testsuite/forge/src/interface/aptos.rs index b31cbb2655449..6e38ae931dfed 100644 --- a/testsuite/forge/src/interface/aptos.rs +++ b/testsuite/forge/src/interface/aptos.rs @@ -25,6 +25,7 @@ use aptos_sdk::{ use rand::{rngs::OsRng, Rng, SeedableRng}; use reqwest::Url; use serde::{Deserialize, Serialize}; +use std::sync::Arc; #[async_trait::async_trait] pub trait AptosTest: Test { @@ -34,14 +35,14 @@ pub trait AptosTest: Test { pub struct AptosContext<'t> { core: CoreContext, - public_info: AptosPublicInfo<'t>, + public_info: AptosPublicInfo, pub report: &'t mut TestReport, } impl<'t> AptosContext<'t> { pub fn new( core: CoreContext, - public_info: AptosPublicInfo<'t>, + public_info: AptosPublicInfo, report: &'t mut TestReport, ) -> Self { Self { @@ -107,26 +108,26 @@ impl<'t> AptosContext<'t> { self.public_info.get_balance(address).await } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.public_info.root_account + pub fn root_account(&mut self) -> Arc { + self.public_info.root_account.clone() } } -pub struct AptosPublicInfo<'t> { +pub struct AptosPublicInfo { chain_id: ChainId, inspection_service_url: Url, rest_api_url: Url, rest_client: RestClient, - root_account: &'t mut LocalAccount, + root_account: Arc, rng: ::rand::rngs::StdRng, } -impl<'t> AptosPublicInfo<'t> { +impl AptosPublicInfo { pub fn new( chain_id: ChainId, inspection_service_url_str: String, rest_api_url_str: String, - root_account: &'t mut LocalAccount, + root_account: Arc, ) -> Self { let rest_api_url = Url::parse(&rest_api_url_str).unwrap(); let inspection_service_url = Url::parse(&inspection_service_url_str).unwrap(); @@ -152,8 +153,8 @@ impl<'t> AptosPublicInfo<'t> { self.inspection_service_url.as_str() } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.root_account + pub fn root_account(&mut self) -> Arc { + self.root_account.clone() } pub async fn create_user_account(&mut self, pubkey: &Ed25519PublicKey) -> Result<()> { @@ -300,12 +301,12 @@ impl<'t> AptosPublicInfo<'t> { Ok(account) } - pub async fn reconfig(&mut self) -> State { + pub async fn reconfig(&self) -> State { // dedupe with smoke-test::test_utils::reconfig reconfig( &self.rest_client, &self.transaction_factory(), - self.root_account, + self.root_account.clone(), ) .await } @@ -328,21 +329,23 @@ impl<'t> AptosPublicInfo<'t> { pub async fn reconfig( client: &RestClient, transaction_factory: &TransactionFactory, - root_account: &mut LocalAccount, + root_account: Arc, ) -> State { let aptos_version = client.get_aptos_version().await.unwrap(); let current = aptos_version.into_inner(); let current_version = *current.major.inner(); - let txns = vec![ - root_account.sign_with_transaction_builder(transaction_factory.clone().payload( - aptos_stdlib::version_set_for_next_epoch(current_version + 1), - )), - root_account.sign_with_transaction_builder( - transaction_factory - .clone() - .payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()), - ), - ]; + let txns = { + vec![ + root_account.sign_with_transaction_builder(transaction_factory.clone().payload( + aptos_stdlib::version_set_for_next_epoch(current_version + 1), + )), + root_account.sign_with_transaction_builder( + transaction_factory + .clone() + .payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()), + ), + ] + }; submit_and_wait_reconfig(client, txns).await } diff --git a/testsuite/forge/src/interface/chain_info.rs b/testsuite/forge/src/interface/chain_info.rs index 9baa8c22ac9f1..ff6606f9fab67 100644 --- a/testsuite/forge/src/interface/chain_info.rs +++ b/testsuite/forge/src/interface/chain_info.rs @@ -10,18 +10,19 @@ use aptos_sdk::{ types::{chain_id::ChainId, LocalAccount}, }; use reqwest::Url; +use std::sync::Arc; #[derive(Debug)] -pub struct ChainInfo<'t> { - pub root_account: &'t mut LocalAccount, +pub struct ChainInfo { + pub root_account: Arc, pub rest_api_url: String, pub inspection_service_url: String, pub chain_id: ChainId, } -impl<'t> ChainInfo<'t> { +impl ChainInfo { pub fn new( - root_account: &'t mut LocalAccount, + root_account: Arc, rest_api_url: String, inspection_service_url: String, chain_id: ChainId, @@ -34,15 +35,13 @@ impl<'t> ChainInfo<'t> { } } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.root_account + pub fn root_account(&self) -> Arc { + self.root_account.clone() } pub async fn resync_root_account_seq_num(&mut self, client: &RestClient) -> Result<()> { - let account = client - .get_account(self.root_account.address()) - .await? - .into_inner(); + let root_address = { self.root_account.address() }; + let account = client.get_account(root_address).await?.into_inner(); self.root_account .set_sequence_number(account.sequence_number); Ok(()) @@ -64,12 +63,12 @@ impl<'t> ChainInfo<'t> { TransactionFactory::new(self.chain_id()) } - pub fn into_aptos_public_info(self) -> AptosPublicInfo<'t> { + pub fn into_aptos_public_info(self) -> AptosPublicInfo { AptosPublicInfo::new( self.chain_id, self.inspection_service_url.clone(), self.rest_api_url.clone(), - self.root_account, + self.root_account.clone(), ) } } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 1d4f87fc2a9f4..26b622d58ef98 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -9,20 +9,53 @@ use crate::{ CoreContext, Result, Swarm, TestReport, }; use aptos_transaction_emitter_lib::{EmitJobRequest, TxnStats}; -use std::time::Duration; -use tokio::runtime::Runtime; +use async_trait::async_trait; +use std::{future::Future, sync::Arc, time::Duration}; +use tokio::runtime::{Handle, Runtime}; /// The testing interface which defines a test written with full control over an existing network. /// Tests written against this interface will have access to both the Root account as well as the /// nodes which comprise the network. +#[async_trait] pub trait NetworkTest: Test { /// Executes the test against the given context. - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()>; + async fn run<'t>(&self, ctx: NetworkContextSynchronizer<'t>) -> Result<()>; +} + +#[derive(Clone)] +pub struct NetworkContextSynchronizer<'t> { + pub ctx: Arc>>, + pub handle: tokio::runtime::Handle, +} + +// TODO: some useful things that don't need to hold the lock or make a copy +impl<'t> NetworkContextSynchronizer<'t> { + pub fn new(ctx: NetworkContext<'t>, handle: tokio::runtime::Handle) -> Self { + Self { + ctx: Arc::new(tokio::sync::Mutex::new(ctx)), + handle, + } + } + + pub async fn report_text(&self, text: String) { + let mut locker = self.ctx.lock().await; + locker.report.report_text(text); + } + + pub fn flex_block_on(&self, future: F) -> F::Output { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle.block_on(future) + }, + Err(_) => self.handle.block_on(future), + } + } } pub struct NetworkContext<'t> { core: CoreContext, - pub swarm: &'t mut dyn Swarm, + pub swarm: Arc>>, pub report: &'t mut TestReport, pub global_duration: Duration, pub emit_job: EmitJobRequest, @@ -33,7 +66,7 @@ pub struct NetworkContext<'t> { impl<'t> NetworkContext<'t> { pub fn new( core: CoreContext, - swarm: &'t mut dyn Swarm, + swarm: Arc>>, report: &'t mut TestReport, global_duration: Duration, emit_job: EmitJobRequest, @@ -46,19 +79,15 @@ impl<'t> NetworkContext<'t> { global_duration, emit_job, success_criteria, - runtime: Runtime::new().unwrap(), + runtime: aptos_runtimes::spawn_named_runtime("emitter".into(), Some(64)), } } - pub fn swarm(&mut self) -> &mut dyn Swarm { - self.swarm - } - pub fn core(&mut self) -> &mut CoreContext { &mut self.core } - pub fn check_for_success( + pub async fn check_for_success( &mut self, stats: &TxnStats, window: Duration, @@ -68,18 +97,38 @@ impl<'t> NetworkContext<'t> { start_version: u64, end_version: u64, ) -> Result<()> { - self.runtime - .block_on(SuccessCriteriaChecker::check_for_success( - &self.success_criteria, - self.swarm, - self.report, - stats, - window, - latency_breakdown, - start_time, - end_time, - start_version, - end_version, - )) + SuccessCriteriaChecker::check_for_success( + &self.success_criteria, + self.swarm.clone(), + self.report, + stats, + window, + latency_breakdown, + start_time, + end_time, + start_version, + end_version, + ) + .await + } + + pub fn handle(&self) -> Handle { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle + }, + Err(_) => self.runtime.handle().clone(), + } + } + + pub fn flex_block_on(&self, future: F) -> F::Output { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle.block_on(future) + }, + Err(_) => self.runtime.block_on(future), + } } } diff --git a/testsuite/forge/src/interface/node.rs b/testsuite/forge/src/interface/node.rs index e4dbcf24edfba..e3eaa314c2f46 100644 --- a/testsuite/forge/src/interface/node.rs +++ b/testsuite/forge/src/interface/node.rs @@ -55,19 +55,19 @@ pub trait Node: Send + Sync { /// Start this Node. /// This should be a noop if the Node is already running. - async fn start(&mut self) -> Result<()>; + async fn start(&self) -> Result<()>; /// Stop this Node. /// This should be a noop if the Node isn't running. - async fn stop(&mut self) -> Result<()>; + async fn stop(&self) -> Result<()>; - async fn get_identity(&mut self) -> Result; + async fn get_identity(&self) -> Result; - async fn set_identity(&mut self, k8s_secret_name: String) -> Result<()>; + async fn set_identity(&self, k8s_secret_name: String) -> Result<()>; /// Clears this Node's Storage. This stops the node as well - async fn clear_storage(&mut self) -> Result<()>; + async fn clear_storage(&self) -> Result<()>; - async fn health_check(&mut self) -> Result<(), HealthCheckError>; + async fn health_check(&self) -> Result<(), HealthCheckError>; fn counter(&self, counter: &str, port: u64) -> Result; @@ -227,7 +227,7 @@ pub trait NodeExt: Node { Ok(self.rest_client().health_check(seconds).await?) } - async fn wait_until_healthy(&mut self, deadline: Instant) -> Result<()> { + async fn wait_until_healthy(&self, deadline: Instant) -> Result<()> { let mut healthcheck_error = HealthCheckError::Unknown(anyhow::anyhow!("No healthcheck performed yet")); while Instant::now() < deadline { diff --git a/testsuite/forge/src/interface/prometheus_metrics.rs b/testsuite/forge/src/interface/prometheus_metrics.rs index 6edcdbe4f81f1..9ec7d6f82440c 100644 --- a/testsuite/forge/src/interface/prometheus_metrics.rs +++ b/testsuite/forge/src/interface/prometheus_metrics.rs @@ -3,7 +3,7 @@ use crate::Swarm; use prometheus_http_query::response::Sample; -use std::{collections::BTreeMap, fmt}; +use std::{collections::BTreeMap, fmt, sync::Arc}; #[derive(Clone)] pub struct MetricSamples(Vec); @@ -58,10 +58,16 @@ impl SystemMetrics { } } -pub async fn fetch_error_metrics(swarm: &dyn Swarm) -> anyhow::Result { +pub async fn fetch_error_metrics( + swarm: Arc>>, +) -> anyhow::Result { let error_query = r#"aptos_error_log_count{role=~"validator"}"#; - let result = swarm.query_metrics(error_query, None, None).await?; + let result = swarm + .read() + .await + .query_metrics(error_query, None, None) + .await?; let error_samples = result.as_instant().unwrap_or(&[]); Ok(error_samples @@ -72,13 +78,14 @@ pub async fn fetch_error_metrics(swarm: &dyn Swarm) -> anyhow::Result { } pub async fn fetch_system_metrics( - swarm: &dyn Swarm, + swarm: Arc>>, start_time: i64, end_time: i64, ) -> anyhow::Result { let cpu_query = r#"avg(rate(container_cpu_usage_seconds_total{container=~"validator"}[30s]))"#; let memory_query = r#"avg(container_memory_rss{container=~"validator"})"#; + let swarm = swarm.read().await; let cpu_samples = swarm .query_range_metrics(cpu_query, start_time, end_time, None) .await?; @@ -119,7 +126,7 @@ impl LatencyBreakdown { } pub async fn fetch_latency_breakdown( - swarm: &dyn Swarm, + swarm: Arc>>, start_time: u64, end_time: u64, ) -> anyhow::Result { @@ -131,6 +138,7 @@ pub async fn fetch_latency_breakdown( let qs_batch_to_pos_query = r#"sum(rate(quorum_store_batch_to_PoS_duration_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_batch_to_PoS_duration_count{role=~"validator"}[1m]))"#; let qs_pos_to_proposal_query = r#"sum(rate(quorum_store_pos_to_pull_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_pos_to_pull_count{role=~"validator"}[1m]))"#; + let swarm = swarm.read().await; let consensus_proposal_to_ordered_samples = swarm .query_range_metrics( consensus_proposal_to_ordered_query, diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index b92928dcb9599..b2c3f501ba5ec 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -16,42 +16,29 @@ use aptos_sdk::types::PeerId; use futures::future::{join_all, try_join_all}; use prometheus_http_query::response::{PromqlResult, Sample}; use std::time::{Duration, Instant}; -use tokio::runtime::Runtime; /// Trait used to represent a running network comprised of Validators and FullNodes #[async_trait::async_trait] -pub trait Swarm: Sync { +pub trait Swarm: Sync + Send { /// Performs a health check on the entire swarm, ensuring all Nodes are Live and that no forks /// have occurred - async fn health_check(&mut self) -> Result<()>; + async fn health_check(&self) -> Result<()>; /// Returns an Iterator of references to all the Validators in the Swarm fn validators<'a>(&'a self) -> Box + 'a>; - /// Returns an Iterator of mutable references to all the Validators in the Swarm - fn validators_mut<'a>(&'a mut self) -> Box + 'a>; - /// Returns a reference to the Validator with the provided PeerId fn validator(&self, id: PeerId) -> Option<&dyn Validator>; - /// Returns a mutable reference to the Validator with the provided PeerId - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator>; - /// Upgrade a Validator to run specified `Version` async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()>; /// Returns an Iterator of references to all the FullNodes in the Swarm fn full_nodes<'a>(&'a self) -> Box + 'a>; - /// Returns an Iterator of mutable references to all the FullNodes in the Swarm - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a>; - /// Returns a reference to the FullNode with the provided PeerId fn full_node(&self, id: PeerId) -> Option<&dyn FullNode>; - /// Returns a mutable reference to the FullNode with the provided PeerId - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode>; - /// Adds a Validator to the swarm and returns the PeerId fn add_validator(&mut self, version: &Version, template: NodeConfig) -> Result; @@ -79,7 +66,7 @@ pub trait Swarm: Sync { fn versions<'a>(&'a self) -> Box + 'a>; /// Construct a ChainInfo from this Swarm - fn chain_info(&mut self) -> ChainInfo<'_>; + fn chain_info(&self) -> ChainInfo; fn logs_location(&mut self) -> String; @@ -107,13 +94,13 @@ pub trait Swarm: Sync { timeout: Option, ) -> Result>; - fn aptos_public_info(&mut self) -> AptosPublicInfo<'_> { + fn aptos_public_info(&self) -> AptosPublicInfo { self.chain_info().into_aptos_public_info() } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_>; + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo; - fn aptos_public_info_for_node(&mut self, idx: usize) -> AptosPublicInfo<'_> { + fn aptos_public_info_for_node(&mut self, idx: usize) -> AptosPublicInfo { self.chain_info_for_node(idx).into_aptos_public_info() } @@ -202,12 +189,11 @@ pub trait SwarmExt: Swarm { } /// Perform a safety check, ensuring that no forks have occurred in the network. - fn fork_check(&self, epoch_duration: Duration) -> Result<()> { - let runtime = Runtime::new().unwrap(); - + async fn fork_check(&self, epoch_duration: Duration) -> Result<()> { // Lots of errors can actually occur after an epoch change so guarantee that we change epochs here // This can wait for 2x epoch to at least force the caller to be explicit about the epoch duration - runtime.block_on(self.wait_for_all_nodes_to_change_epoch(epoch_duration * 2))?; + self.wait_for_all_nodes_to_change_epoch(epoch_duration * 2) + .await?; let clients = self .validators() @@ -215,16 +201,16 @@ pub trait SwarmExt: Swarm { .chain(self.full_nodes().map(|node| node.rest_client())) .collect::>(); - let versions = runtime - .block_on(try_join_all( - clients - .iter() - .map(|node| node.get_ledger_information()) - .collect::>(), - ))? - .into_iter() - .map(|resp| resp.into_inner().version) - .collect::>(); + let versions = try_join_all( + clients + .iter() + .map(|node| node.get_ledger_information()) + .collect::>(), + ) + .await? + .into_iter() + .map(|resp| resp.into_inner().version) + .collect::>(); let min_version = versions .iter() .min() @@ -236,21 +222,14 @@ pub trait SwarmExt: Swarm { .copied() .ok_or_else(|| anyhow!("Unable to query nodes for their latest version"))?; - if !runtime.block_on(Self::are_root_hashes_equal_at_version( - &clients, - min_version, - ))? { + if !Self::are_root_hashes_equal_at_version(&clients, min_version).await? { return Err(anyhow!("Fork check failed")); } - runtime.block_on( - self.wait_for_all_nodes_to_catchup_to_version(max_version, Duration::from_secs(10)), - )?; + self.wait_for_all_nodes_to_catchup_to_version(max_version, Duration::from_secs(10)) + .await?; - if !runtime.block_on(Self::are_root_hashes_equal_at_version( - &clients, - max_version, - ))? { + if !Self::are_root_hashes_equal_at_version(&clients, max_version).await? { return Err(anyhow!("Fork check failed")); } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index f4b15c9facdac..e6083cec11f1e 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -547,7 +547,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { let initial_version = self.initial_version(); // The genesis version should always match the initial node version let genesis_version = initial_version.clone(); - let runtime = Runtime::new().unwrap(); + let runtime = Runtime::new().unwrap(); // TODO: new multithreaded? let mut rng = ::rand::rngs::StdRng::from_seed(OsRng.gen()); let mut swarm = runtime.block_on(self.factory.launch_swarm( &mut rng, @@ -586,16 +586,26 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { summary.handle_result(test.name().to_owned(), result)?; } + let logs_location = swarm.logs_location(); + let swarm = Arc::new(tokio::sync::RwLock::new(swarm)); for test in self.filter_tests(&self.tests.network_tests) { - let mut network_ctx = NetworkContext::new( + let network_ctx = NetworkContext::new( CoreContext::from_rng(&mut rng), - &mut *swarm, + swarm.clone(), &mut report, self.global_duration, self.tests.emit_job_request.clone(), self.tests.success_criteria.clone(), ); - let result = run_test(|| test.run(&mut network_ctx)); + let handle = network_ctx.runtime.handle().clone(); + let _handle_context = handle.enter(); + let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle.clone()); + let result = run_test(|| handle.block_on(test.run(network_ctx.clone()))); + // explicitly keep network context in scope so that its created tokio Runtime drops after all the stuff has run. + let NetworkContextSynchronizer { ctx, handle } = network_ctx; + drop(handle); + let ctx = Arc::into_inner(ctx).unwrap().into_inner(); + drop(ctx); report.report_text(result.to_string()); summary.handle_result(test.name().to_owned(), result)?; } @@ -606,7 +616,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { io::stderr().flush()?; if !summary.success() { println!(); - println!("Swarm logs can be found here: {}", swarm.logs_location()); + println!("Swarm logs can be found here: {}", logs_location); } } diff --git a/testsuite/forge/src/success_criteria.rs b/testsuite/forge/src/success_criteria.rs index fa92684e407fd..987ac23643bf1 100644 --- a/testsuite/forge/src/success_criteria.rs +++ b/testsuite/forge/src/success_criteria.rs @@ -13,7 +13,7 @@ use aptos::node::analyze::fetch_metadata::FetchMetadata; use aptos_sdk::types::PeerId; use aptos_transaction_emitter_lib::{TxnStats, TxnStatsRate}; use prometheus_http_query::response::Sample; -use std::{collections::BTreeMap, time::Duration}; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; #[derive(Clone, Debug)] pub struct StateProgressThreshold { @@ -269,7 +269,7 @@ impl SuccessCriteriaChecker { pub async fn check_for_success( success_criteria: &SuccessCriteria, - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, stats: &TxnStats, window: Duration, @@ -308,34 +308,42 @@ impl SuccessCriteriaChecker { if let Some(timeout) = success_criteria.wait_for_all_nodes_to_catchup { swarm + .read() + .await .wait_for_all_nodes_to_catchup_to_next(timeout) .await .context("Failed waiting for all nodes to catchup to next version")?; } if success_criteria.check_no_restarts { - swarm + let swarm_read = swarm.read().await; + swarm_read .ensure_no_validator_restart() .await .context("Failed ensuring no validator restarted")?; - swarm + swarm_read .ensure_no_fullnode_restart() .await .context("Failed ensuring no fullnode restarted")?; } if success_criteria.check_no_errors { - Self::check_no_errors(swarm).await?; + Self::check_no_errors(swarm.clone()).await?; } if let Some(system_metrics_threshold) = success_criteria.system_metrics_threshold.clone() { - Self::check_system_metrics(swarm, start_time, end_time, system_metrics_threshold) - .await?; + Self::check_system_metrics( + swarm.clone(), + start_time, + end_time, + system_metrics_threshold, + ) + .await?; } if let Some(chain_progress_threshold) = &success_criteria.chain_progress_check { Self::check_chain_progress( - swarm, + swarm.clone(), report, chain_progress_threshold, start_version, @@ -349,17 +357,21 @@ impl SuccessCriteriaChecker { } async fn check_chain_progress( - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, chain_progress_threshold: &StateProgressThreshold, start_version: u64, end_version: u64, ) -> anyhow::Result<()> { // Choose client with newest ledger version to fetch NewBlockEvents from: - let (_max_v, client) = swarm - .get_client_with_newest_ledger_version() - .await - .context("No clients replied in check_chain_progress")?; + let (_max_v, client) = { + swarm + .read() + .await + .get_client_with_newest_ledger_version() + .await + .context("No clients replied in check_chain_progress")? + }; let epochs = FetchMetadata::fetch_new_block_events(&client, None, None) .await @@ -565,7 +577,9 @@ impl SuccessCriteriaChecker { } } - async fn check_no_errors(swarm: &mut dyn Swarm) -> anyhow::Result<()> { + async fn check_no_errors( + swarm: Arc>>, + ) -> anyhow::Result<()> { let error_count = fetch_error_metrics(swarm).await?; if error_count > 0 { bail!( @@ -579,7 +593,7 @@ impl SuccessCriteriaChecker { } async fn check_system_metrics( - swarm: &mut dyn Swarm, + swarm: Arc>>, start_time: i64, end_time: i64, threshold: SystemMetricsThreshold, diff --git a/testsuite/forge/src/test_utils/consensus_utils.rs b/testsuite/forge/src/test_utils/consensus_utils.rs index 5f9f3716a1c1b..6115a5c2e60de 100644 --- a/testsuite/forge/src/test_utils/consensus_utils.rs +++ b/testsuite/forge/src/test_utils/consensus_utils.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::{wait_for_all_nodes_to_catchup_to_version, Swarm, SwarmExt}; +use crate::{wait_for_all_nodes_to_catchup_to_version, AptosPublicInfo}; use anyhow::{bail, Context, Result}; use aptos_config::config::DEFAULT_MAX_PAGE_SIZE; use aptos_rest_client::Client as RestClient; @@ -53,14 +53,16 @@ async fn get_node_state(validator_client: &RestClient) -> NodeState { /// I.e. if part is shorter than how long it takes for empty block to be /// generated, we can make sure one block gets created on every part. pub async fn test_consensus_fault_tolerance( - swarm: &mut dyn Swarm, + // swarm: Arc>>, + validator_clients: Vec<(String, RestClient)>, + public_info: AptosPublicInfo, cycles: usize, cycle_duration_s: f32, parts_in_cycle: usize, - mut failure_injection: Box, + mut failure_injection: Box, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) mut check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> Result<()> + Send, >, new_epoch_on_cycle: bool, // Instead of failing on first check, we check the full run, @@ -68,8 +70,6 @@ pub async fn test_consensus_fault_tolerance( // Can allow us to better see if state would've gotten resolved by itself, etc. raise_check_error_at_the_end: bool, ) -> Result<()> { - let validator_clients = swarm.get_validator_clients_with_names(); - async fn get_all_states(validator_clients: &[(String, RestClient)]) -> Vec { join_all( validator_clients @@ -142,7 +142,7 @@ pub async fn test_consensus_fault_tolerance( } if new_epoch_on_cycle { - swarm.aptos_public_info().reconfig().await; + public_info.reconfig().await; } } @@ -237,7 +237,7 @@ impl FailureInjection for NoFailureInjection { async fn clear(&mut self, _: &[(String, RestClient)]) {} } -pub fn no_failure_injection() -> Box { +pub fn no_failure_injection() -> Box { Box::new(NoFailureInjection {}) } diff --git a/testsuite/smoke-test/src/aptos/error_report.rs b/testsuite/smoke-test/src/aptos/error_report.rs index d3bee2ad9e6ba..96a4a184353a1 100644 --- a/testsuite/smoke-test/src/aptos/error_report.rs +++ b/testsuite/smoke-test/src/aptos/error_report.rs @@ -11,7 +11,7 @@ use aptos_types::{ async fn submit_and_check_err TransactionBuilder>( local_account: &LocalAccount, - info: &mut AptosPublicInfo<'_>, + info: &mut AptosPublicInfo, f: F, expected: &str, ) { @@ -34,7 +34,7 @@ async fn submit_and_check_err TransactionBuilder>( #[tokio::test] async fn test_error_report() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let local_account = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/gas_check.rs b/testsuite/smoke-test/src/aptos/gas_check.rs index 68f7d79659ff8..f3f63915486e9 100644 --- a/testsuite/smoke-test/src/aptos/gas_check.rs +++ b/testsuite/smoke-test/src/aptos/gas_check.rs @@ -12,7 +12,7 @@ use std::time::Duration; #[ignore] #[tokio::test] async fn test_gas_check() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let account1 = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/mint_transfer.rs b/testsuite/smoke-test/src/aptos/mint_transfer.rs index 6740ec1b952f7..ae2e593c60600 100644 --- a/testsuite/smoke-test/src/aptos/mint_transfer.rs +++ b/testsuite/smoke-test/src/aptos/mint_transfer.rs @@ -9,7 +9,7 @@ use aptos_types::transaction::{ExecutionStatus, TransactionStatus}; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn test_mint_transfer() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let account1 = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/move_test_helpers.rs b/testsuite/smoke-test/src/aptos/move_test_helpers.rs index 013bff5e3874b..d5d0f2aa80b9f 100644 --- a/testsuite/smoke-test/src/aptos/move_test_helpers.rs +++ b/testsuite/smoke-test/src/aptos/move_test_helpers.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; /// New style publishing via `code::publish_package` pub async fn publish_package( - info: &mut AptosPublicInfo<'_>, + info: &mut AptosPublicInfo, move_dir: PathBuf, ) -> Result { let package = BuiltPackage::build(move_dir, BuildOptions::default())?; diff --git a/testsuite/smoke-test/src/aptos/package_publish.rs b/testsuite/smoke-test/src/aptos/package_publish.rs index 7d1b48285c1cc..3c9c408fc8c82 100644 --- a/testsuite/smoke-test/src/aptos/package_publish.rs +++ b/testsuite/smoke-test/src/aptos/package_publish.rs @@ -6,7 +6,7 @@ use aptos_forge::Swarm; #[tokio::test] async fn test_package_publish() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let base_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")); diff --git a/testsuite/smoke-test/src/aptos_cli/validator.rs b/testsuite/smoke-test/src/aptos_cli/validator.rs index 2f6673c916403..5de3d15fa3e27 100644 --- a/testsuite/smoke-test/src/aptos_cli/validator.rs +++ b/testsuite/smoke-test/src/aptos_cli/validator.rs @@ -42,7 +42,7 @@ use std::{ #[tokio::test] async fn test_analyze_validators() { - let (mut swarm, cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_genesis_stake(Arc::new(|_i, genesis_stake_amount| { *genesis_stake_amount = 100000; @@ -545,7 +545,7 @@ pub(crate) fn generate_blob(data: &[u8]) -> String { async fn test_large_total_stake() { // just barelly below u64::MAX const BASE: u64 = 10_000_000_000_000_000_000; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_init_genesis_stake(Arc::new(|_, genesis_stake_amount| { // make sure we have quorum *genesis_stake_amount = BASE; @@ -613,7 +613,7 @@ async fn test_nodes_rewards() { // with 10% APY, BASE amount gives 100 rewards per second const BASE: u64 = 3600u64 * 24 * 365 * 10 * 100; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_init_config(Arc::new(|_, conf, _| { // reduce timeout, as we will have dead node during rounds conf.consensus.round_initial_timeout_ms = 200; @@ -946,7 +946,7 @@ async fn test_nodes_rewards() { #[tokio::test] async fn test_register_and_update_validator() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .build_with_cli(0) .await; @@ -1042,7 +1042,7 @@ async fn test_register_and_update_validator() { #[tokio::test] async fn test_join_and_leave_validator() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_i, conf, _| { // reduce timeout, as we will have dead node during rounds @@ -1207,7 +1207,7 @@ async fn test_join_and_leave_validator() { #[tokio::test] async fn test_owner_create_and_delegate_flow() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_i, conf, _| { // reduce timeout, as we will have dead node during rounds diff --git a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs index 312763a539612..56e6cea9fa2d5 100644 --- a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs @@ -134,7 +134,8 @@ async fn run_fail_point_test( >, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()> + + Send, >, ) { let mut swarm = create_swarm(num_validators, max_block_size).await; @@ -145,8 +146,15 @@ async fn run_fail_point_test( finish_traffic: Arc::new(AtomicBool::new(false)), } }; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, cycles, cycle_duration_s, parts_in_cycle, @@ -177,10 +185,17 @@ fn successful_criteria(executed_epochs: u64, executed_rounds: u64, executed_tran async fn test_no_failures() { let num_validators = 3; - let mut swarm = create_swarm(num_validators, 1).await; + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, @@ -202,10 +217,17 @@ async fn test_no_failures() { async fn test_ordered_only_cert() { let num_validators = 3; - let mut swarm = create_swarm(num_validators, 1).await; + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, @@ -236,10 +258,16 @@ async fn test_ordered_only_cert() { async fn test_execution_retry() { let num_validators = 4; - let mut swarm = create_swarm(num_validators, 1).await; - + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, diff --git a/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs index e6eba097db66c..2636a3def9491 100644 --- a/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs @@ -10,7 +10,7 @@ use aptos_forge::{ test_utils::consensus_utils::{ no_failure_injection, test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, }, - LocalSwarm, + LocalSwarm, Swarm, SwarmExt, }; use aptos_types::on_chain_config::{ ConsensusAlgorithmConfig, DagConsensusConfigV1, OnChainConsensusConfig, ValidatorTxnConfig, @@ -61,10 +61,16 @@ pub async fn create_dag_swarm(num_nodes: usize) -> LocalSwarm { async fn test_no_failures() { let num_validators = 3; - let mut swarm = create_dag_swarm(num_validators).await; - + let swarm = create_dag_swarm(num_validators).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, @@ -97,7 +103,8 @@ async fn run_dag_fail_point_test( >, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()> + + Send, >, ) { let mut swarm = create_dag_swarm(num_validators).await; @@ -108,8 +115,15 @@ async fn run_dag_fail_point_test( finish_traffic: Arc::new(AtomicBool::new(false)), } }; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, cycles, cycle_duration_s, parts_in_cycle, diff --git a/testsuite/smoke-test/src/execution.rs b/testsuite/smoke-test/src/execution.rs index 41802c2c713df..a63be389708cf 100644 --- a/testsuite/smoke-test/src/execution.rs +++ b/testsuite/smoke-test/src/execution.rs @@ -96,7 +96,7 @@ async fn get_last_non_reconfig_block_ending_txn_name(rest_client: &Client) -> Op #[tokio::test] async fn block_epilogue_upgrade_test() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(2) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(2) .with_aptos() // Start with V1 .with_init_genesis_config(Arc::new(|genesis_config| { diff --git a/testsuite/smoke-test/src/full_nodes.rs b/testsuite/smoke-test/src/full_nodes.rs index 28c6b5600d050..254f2825318a1 100644 --- a/testsuite/smoke-test/src/full_nodes.rs +++ b/testsuite/smoke-test/src/full_nodes.rs @@ -34,7 +34,7 @@ async fn test_full_node_basic_flow() { ) .await .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await @@ -126,7 +126,7 @@ async fn test_vfn_failover() { .await; let transaction_factory = swarm.chain_info().transaction_factory(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/fullnode.rs b/testsuite/smoke-test/src/fullnode.rs index 8b2c8f30f5b79..35b22ec579ab1 100644 --- a/testsuite/smoke-test/src/fullnode.rs +++ b/testsuite/smoke-test/src/fullnode.rs @@ -48,7 +48,7 @@ async fn test_indexer() { ) .unwrap(); - let fullnode = swarm.full_node_mut(fullnode_peer_id).unwrap(); + let fullnode = swarm.full_node(fullnode_peer_id).unwrap(); fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await @@ -146,7 +146,7 @@ async fn test_internal_indexer_with_fast_sync() { .wait_for_all_nodes_to_catchup(Duration::from_secs(60)) .await .unwrap(); - let node = swarm.full_node_mut(peer_id).unwrap(); + let node = swarm.full_node(peer_id).unwrap(); let node_config = node.config().to_owned(); node.stop().await.unwrap(); check_indexer_db(&node_config); diff --git a/testsuite/smoke-test/src/indexer.rs b/testsuite/smoke-test/src/indexer.rs index 9bb854d44df71..45d2e6a2a4200 100644 --- a/testsuite/smoke-test/src/indexer.rs +++ b/testsuite/smoke-test/src/indexer.rs @@ -32,10 +32,7 @@ pub fn setup_indexer() -> anyhow::Result { Ok(conn_pool) } -pub async fn execute_nft_txns<'t>( - creator: LocalAccount, - info: &mut AptosPublicInfo<'t>, -) -> Result<()> { +pub async fn execute_nft_txns<'t>(creator: LocalAccount, info: &mut AptosPublicInfo) -> Result<()> { let collection_name = "collection name".to_owned().into_bytes(); let token_name = "token name".to_owned().into_bytes(); let collection_builder = @@ -98,7 +95,7 @@ async fn test_old_indexer() { let conn_pool = setup_indexer().unwrap(); - let mut swarm = crate::smoke_test_environment::SwarmBuilder::new_local(1) + let swarm = crate::smoke_test_environment::SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_, config, _| { config.storage.enable_indexer = true; diff --git a/testsuite/smoke-test/src/inspection_service.rs b/testsuite/smoke-test/src/inspection_service.rs index 13341406c0fe3..da83113228628 100644 --- a/testsuite/smoke-test/src/inspection_service.rs +++ b/testsuite/smoke-test/src/inspection_service.rs @@ -6,7 +6,7 @@ use aptos_forge::Swarm; #[tokio::test] async fn test_inspection_service_connection() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); // Ping the inspection service index page and verify we get a successful response let resp = reqwest::get(info.inspection_service_url().to_owned()) diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs index f572dfae0cfa2..e91af813bc458 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs @@ -29,7 +29,7 @@ use tokio::time::sleep; async fn jwk_consensus_basic() { let epoch_duration_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs index ac7f514051c11..76a07be724ae2 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs @@ -29,7 +29,7 @@ use tokio::time::sleep; async fn jwk_consensus_per_issuer() { let epoch_duration_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs index c87671691ff91..f26e1d4bf7489 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs @@ -31,7 +31,7 @@ async fn jwk_consensus_provider_change_mind() { // Big epoch duration to ensure epoch change does not help reset validators if they are stuck. let epoch_duration_secs = 1800; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/mod.rs b/testsuite/smoke-test/src/jwks/mod.rs index ea7cab2002204..d677300ca1c6a 100644 --- a/testsuite/smoke-test/src/jwks/mod.rs +++ b/testsuite/smoke-test/src/jwks/mod.rs @@ -89,7 +89,7 @@ async fn get_patched_jwks(rest_client: &Client) -> PatchedJWKs { /// Patch the JWK with governance proposal and see it is effective. #[tokio::test] async fn jwk_patching() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_aptos() .build_with_cli(0) .await; diff --git a/testsuite/smoke-test/src/keyless.rs b/testsuite/smoke-test/src/keyless.rs index c7735a39e9ad3..fb77d12e15cf9 100644 --- a/testsuite/smoke-test/src/keyless.rs +++ b/testsuite/smoke-test/src/keyless.rs @@ -45,7 +45,7 @@ use std::{fmt::Debug, time::Duration}; #[tokio::test] async fn test_keyless_oidc_txn_verifies() { - let (_, _, mut swarm, signed_txn) = get_transaction(get_sample_openid_sig_and_pk).await; + let (_, _, swarm, signed_txn) = get_transaction(get_sample_openid_sig_and_pk).await; info!("Submit OpenID transaction"); let result = swarm @@ -61,7 +61,7 @@ async fn test_keyless_oidc_txn_verifies() { #[tokio::test] async fn test_keyless_rotate_vk() { - let (tw_sk, config, jwk, mut swarm, mut cli, root_idx) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, mut cli, root_idx) = setup_local_net().await; let mut info = swarm.aptos_public_info(); let (old_sig, old_pk) = get_sample_groth16_sig_and_pk(); @@ -164,7 +164,7 @@ async fn test_keyless_secure_test_jwk_initialized_at_genesis() { #[tokio::test] async fn test_keyless_oidc_txn_with_bad_jwt_sig() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (mut sig, pk) = get_sample_openid_sig_and_pk(); match &mut sig.cert { @@ -190,7 +190,7 @@ async fn test_keyless_oidc_txn_with_bad_jwt_sig() { #[tokio::test] async fn test_keyless_oidc_txn_with_expired_epk() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (mut sig, pk) = get_sample_openid_sig_and_pk(); sig.exp_date_secs = 1; // This should fail the verification since the expiration date is way in the past @@ -211,7 +211,7 @@ async fn test_keyless_oidc_txn_with_expired_epk() { #[tokio::test] async fn test_keyless_groth16_verifies() { - let (_, _, mut swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk).await; + let (_, _, swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk).await; info!("Submit keyless Groth16 transaction"); let result = swarm @@ -227,7 +227,7 @@ async fn test_keyless_groth16_verifies() { #[tokio::test] async fn test_keyless_no_extra_field_groth16_verifies() { - let (_, _, mut swarm, signed_txn) = + let (_, _, swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk_no_extra_field).await; info!("Submit keyless Groth16 transaction"); @@ -244,7 +244,7 @@ async fn test_keyless_no_extra_field_groth16_verifies() { #[tokio::test] async fn test_keyless_no_training_wheels_groth16_verifies() { - let (_tw_sk, config, jwk, mut swarm, mut cli, root_idx) = setup_local_net().await; + let (_tw_sk, config, jwk, swarm, mut cli, root_idx) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -267,7 +267,7 @@ async fn test_keyless_no_training_wheels_groth16_verifies() { #[tokio::test] async fn test_keyless_groth16_with_mauled_proof() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -287,7 +287,7 @@ async fn test_keyless_groth16_with_mauled_proof() { #[tokio::test] async fn test_keyless_groth16_with_bad_tw_signature() { - let (_tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (_tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -318,7 +318,7 @@ async fn test_keyless_groth16_with_bad_tw_signature() { } async fn sign_transaction<'a>( - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, mut sig: KeylessSignature, pk: KeylessPublicKey, jwk: &RSA_JWK, @@ -438,7 +438,7 @@ async fn get_transaction( LocalSwarm, SignedTransaction, ) { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_pk_and_sig_func(); @@ -477,7 +477,7 @@ async fn setup_local_net() -> ( async fn remove_training_wheels<'a>( cli: &mut CliTestFramework, - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, root_idx: usize, ) { let script = format!( @@ -658,7 +658,7 @@ async fn get_latest_jwkset(rest_client: &Client) -> PatchedJWKs { async fn rotate_vk_by_governance<'a>( cli: &mut CliTestFramework, - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, vk: &Groth16VerificationKey, root_idx: usize, ) { diff --git a/testsuite/smoke-test/src/randomness/disable_feature_0.rs b/testsuite/smoke-test/src/randomness/disable_feature_0.rs index a5273521879f8..f004a95ebb187 100644 --- a/testsuite/smoke-test/src/randomness/disable_feature_0.rs +++ b/testsuite/smoke-test/src/randomness/disable_feature_0.rs @@ -19,7 +19,7 @@ use std::{sync::Arc, time::Duration}; async fn disable_feature_0() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/disable_feature_1.rs b/testsuite/smoke-test/src/randomness/disable_feature_1.rs index 99712c7b385d6..efcfb5c5f3c53 100644 --- a/testsuite/smoke-test/src/randomness/disable_feature_1.rs +++ b/testsuite/smoke-test/src/randomness/disable_feature_1.rs @@ -21,7 +21,7 @@ use std::{sync::Arc, time::Duration}; async fn disable_feature_1() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs b/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs index 2f02146368a4e..4ce4dcae4089e 100644 --- a/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs +++ b/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs @@ -16,7 +16,7 @@ async fn dkg_with_validator_join_leave() { let estimated_dkg_latency_secs = 80; let time_limit_secs = epoch_duration_secs + estimated_dkg_latency_secs; - let mut swarm = SwarmBuilder::new_local(7) + let swarm = SwarmBuilder::new_local(7) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs b/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs index 585a46fcd65cf..44154c16e69d6 100644 --- a/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs +++ b/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, str::FromStr, sync::Arc, time::Duration}; async fn e2e_basic_consumption() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_0.rs b/testsuite/smoke-test/src/randomness/enable_feature_0.rs index 9aac2afb4af29..1f7e47d1d1445 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_0.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_0.rs @@ -22,7 +22,7 @@ async fn enable_feature_0() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_1.rs b/testsuite/smoke-test/src/randomness/enable_feature_1.rs index f622313643812..2288f1f16b57d 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_1.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_1.rs @@ -22,7 +22,7 @@ async fn enable_feature_1() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_2.rs b/testsuite/smoke-test/src/randomness/enable_feature_2.rs index e7fef83c420fa..3f008e9fe9c78 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_2.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_2.rs @@ -17,7 +17,7 @@ async fn enable_feature_2() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/entry_func_attrs.rs b/testsuite/smoke-test/src/randomness/entry_func_attrs.rs index 49c931dd3df3b..72a8df885ca46 100644 --- a/testsuite/smoke-test/src/randomness/entry_func_attrs.rs +++ b/testsuite/smoke-test/src/randomness/entry_func_attrs.rs @@ -271,7 +271,7 @@ async fn common(params: TestParams) { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { conf.epoch_duration_secs = epoch_duration_secs; diff --git a/testsuite/smoke-test/src/rest_api.rs b/testsuite/smoke-test/src/rest_api.rs index 97a6f8c2b0bfb..46cc8eb2205a5 100644 --- a/testsuite/smoke-test/src/rest_api.rs +++ b/testsuite/smoke-test/src/rest_api.rs @@ -30,7 +30,7 @@ use std::{convert::TryFrom, str::FromStr, sync::Arc, time::Duration}; #[tokio::test] async fn test_get_index() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); let resp = reqwest::get(info.url().to_owned()).await.unwrap(); @@ -39,7 +39,7 @@ async fn test_get_index() { #[tokio::test] async fn test_basic_client() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); info.client().get_ledger_information().await.unwrap(); @@ -244,7 +244,7 @@ async fn test_gas_estimation_gas_used_limit() { #[tokio::test] async fn test_bcs() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); // Create accounts @@ -546,7 +546,7 @@ async fn test_bcs() { #[tokio::test] async fn test_view_function() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); let client: &Client = info.client(); diff --git a/testsuite/smoke-test/src/rosetta.rs b/testsuite/smoke-test/src/rosetta.rs index 65d89c5ba63d1..4cd421c8e3312 100644 --- a/testsuite/smoke-test/src/rosetta.rs +++ b/testsuite/smoke-test/src/rosetta.rs @@ -242,7 +242,7 @@ async fn test_network() { #[tokio::test] async fn test_account_balance() { - let (mut swarm, cli, _faucet, rosetta_client) = setup_simple_test(3).await; + let (swarm, cli, _faucet, rosetta_client) = setup_simple_test(3).await; let account_1 = cli.account_id(0); let account_2 = cli.account_id(1); @@ -467,7 +467,7 @@ async fn test_account_balance() { } async fn create_staking_contract( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, operator: AccountAddress, voter: AccountAddress, @@ -491,7 +491,7 @@ async fn create_staking_contract( } async fn unlock_stake( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, operator: AccountAddress, amount: u64, @@ -509,7 +509,7 @@ async fn unlock_stake( } async fn create_delegation_pool( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, commission_percentage: u64, sequence_number: u64, @@ -587,7 +587,7 @@ async fn wait_for_rosetta_block(node_clients: &NodeClients<'_>, block_height: u6 #[tokio::test] async fn test_transfer() { - let (mut swarm, cli, _faucet, rosetta_client) = setup_simple_test(1).await; + let (swarm, cli, _faucet, rosetta_client) = setup_simple_test(1).await; let chain_id = swarm.chain_id(); let client = swarm.aptos_public_info().client().clone(); let sender = cli.account_id(0); @@ -2501,7 +2501,7 @@ async fn withdraw_undelegated_stake_and_wait( async fn test_delegation_pool_operations() { const NUM_TXNS_PER_PAGE: u16 = 2; - let (mut swarm, cli, _, rosetta_client) = setup_test( + let (swarm, cli, _, rosetta_client) = setup_test( 2, Arc::new(|_, config, _| config.api.max_transactions_page_size = NUM_TXNS_PER_PAGE), ) diff --git a/testsuite/smoke-test/src/state_sync_utils.rs b/testsuite/smoke-test/src/state_sync_utils.rs index c5ffc487ed30d..0558d169629b0 100644 --- a/testsuite/smoke-test/src/state_sync_utils.rs +++ b/testsuite/smoke-test/src/state_sync_utils.rs @@ -31,7 +31,7 @@ pub async fn create_fullnode(full_node_config: NodeConfig, swarm: &mut LocalSwar validator_peer_id, ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await @@ -62,7 +62,7 @@ pub async fn stop_fullnode_and_delete_storage( fullnode: AccountAddress, clear_storage: bool, ) { - let fullnode = swarm.full_node_mut(fullnode).unwrap(); + let fullnode = swarm.full_node(fullnode).unwrap(); if clear_storage { // The fullnode is implicitly stopped during the clear_storage() call fullnode.clear_storage().await.unwrap(); diff --git a/testsuite/smoke-test/src/test_smoke_tests.rs b/testsuite/smoke-test/src/test_smoke_tests.rs index 45ffd8f0b0ad4..16c28215369ec 100644 --- a/testsuite/smoke-test/src/test_smoke_tests.rs +++ b/testsuite/smoke-test/src/test_smoke_tests.rs @@ -40,7 +40,7 @@ async fn test_aptos_node_after_get_bin() { ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/transaction.rs b/testsuite/smoke-test/src/transaction.rs index c58e36400dc65..1e9d47b85ba77 100644 --- a/testsuite/smoke-test/src/transaction.rs +++ b/testsuite/smoke-test/src/transaction.rs @@ -19,7 +19,7 @@ use aptos_sdk::{ #[ignore] #[tokio::test] async fn test_external_transaction_signer() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); // generate key pair diff --git a/testsuite/smoke-test/src/txn_broadcast.rs b/testsuite/smoke-test/src/txn_broadcast.rs index a041e906eaf2d..018a939bcf1a7 100644 --- a/testsuite/smoke-test/src/txn_broadcast.rs +++ b/testsuite/smoke-test/src/txn_broadcast.rs @@ -39,7 +39,7 @@ async fn test_txn_broadcast() { ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/txn_emitter.rs b/testsuite/smoke-test/src/txn_emitter.rs index 74af50817fa9b..4952cd89d54b1 100644 --- a/testsuite/smoke-test/src/txn_emitter.rs +++ b/testsuite/smoke-test/src/txn_emitter.rs @@ -206,8 +206,9 @@ async fn test_txn_emmitter_low_funds() { mempool_backlog: 10, }); + let account_1 = Arc::new(account_1); let txn_stat = emitter - .emit_txn_for_with_stats(&account_1, emit_job_request, Duration::from_secs(10), 3) + .emit_txn_for_with_stats(account_1, emit_job_request, Duration::from_secs(10), 3) .await .unwrap(); diff --git a/testsuite/smoke-test/src/utils.rs b/testsuite/smoke-test/src/utils.rs index a720671836eb1..dc64bdb686b1c 100644 --- a/testsuite/smoke-test/src/utils.rs +++ b/testsuite/smoke-test/src/utils.rs @@ -12,7 +12,7 @@ use aptos_sdk::{ use aptos_types::on_chain_config::{OnChainConsensusConfig, OnChainExecutionConfig}; use move_core_types::language_storage::CORE_CODE_ADDRESS; use rand::random; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; pub const MAX_CATCH_UP_WAIT_SECS: u64 = 180; // The max time we'll wait for nodes to catch up pub const MAX_CONNECTIVITY_WAIT_SECS: u64 = 180; // The max time we'll wait for nodes to gain connectivity @@ -117,7 +117,7 @@ pub async fn transfer_coins( pub async fn transfer_and_maybe_reconfig( client: &RestClient, transaction_factory: &TransactionFactory, - root_account: &mut LocalAccount, + root_account: Arc, sender: &mut LocalAccount, receiver: &LocalAccount, num_transfers: usize, @@ -125,7 +125,7 @@ pub async fn transfer_and_maybe_reconfig( for _ in 0..num_transfers { // Reconfigurations have a 20% chance of being executed if random::() % 5 == 0 { - reconfig(client, transaction_factory, root_account).await; + reconfig(client, transaction_factory, root_account.clone()).await; } transfer_coins(client, transaction_factory, sender, receiver, 1).await; diff --git a/testsuite/testcases/Cargo.toml b/testsuite/testcases/Cargo.toml index 225c5d96153bf..c7443b02da9c2 100644 --- a/testsuite/testcases/Cargo.toml +++ b/testsuite/testcases/Cargo.toml @@ -26,6 +26,7 @@ aptos-runtimes = { workspace = true } aptos-sdk = { workspace = true } aptos-temppath = { workspace = true } aptos-types = { workspace = true } +async-trait = { workspace = true } bcs = { workspace = true } csv = { workspace = true } futures = { workspace = true } @@ -34,6 +35,7 @@ itertools = { workspace = true } rand = { workspace = true } reqwest = { workspace = true } tokio = { workspace = true } +tokio-scoped = { workspace = true } [dev-dependencies] assert_approx_eq = { workspace = true } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 88a6a10e419d2..c14203af78ffe 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -2,11 +2,24 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::{batch_update, generate_traffic}; +use crate::{batch_update_gradually, create_emitter_and_request, generate_traffic}; use anyhow::bail; -use aptos_forge::{NetworkContext, NetworkTest, Result, SwarmExt, Test}; +use aptos_forge::{ + EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, TxnEmitter, + TxnStats, Version, +}; use aptos_logger::info; -use tokio::{runtime::Runtime, time::Duration}; +use aptos_sdk::types::{LocalAccount, PeerId}; +use async_trait::async_trait; +use rand::SeedableRng; +use std::{ + ops::DerefMut, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; +use tokio::time::Duration; pub struct SimpleValidatorUpgrade; @@ -20,15 +33,127 @@ impl Test for SimpleValidatorUpgrade { } } +async fn stat_gather_task( + emitter: TxnEmitter, + emit_job_request: EmitJobRequest, + source_account: Arc, + upgrade_traffic_chunk_duration: Duration, + done: Arc, +) -> Result> { + let mut upgrade_stats = vec![]; + while !done.load(Ordering::Relaxed) { + info!("stat_gather_task some traffic..."); + let upgrading_stats = emitter + .clone() + .emit_txn_for( + source_account.clone(), + emit_job_request.clone(), + upgrade_traffic_chunk_duration, + ) + .await?; + info!("stat_gather_task some stats: {}", &upgrading_stats); + upgrade_stats.push(upgrading_stats); + } + let statsum = upgrade_stats.into_iter().reduce(|a, b| &a + &b); + Ok(statsum) +} + +fn upgrade_and_gather_stats( + ctxa: NetworkContextSynchronizer, + // upgrade args + validators_to_update: &[PeerId], + version: &Version, + wait_until_healthy: bool, + delay: Duration, + max_wait: Duration, + // traffic args + nodes: &[PeerId], +) -> Result> { + let upgrade_done = Arc::new(AtomicBool::new(false)); + let emitter_ctx = ctxa.clone(); + let mut stats_result: Result> = Ok(None); + let mut upgrade_result: Result<()> = Ok(()); + tokio_scoped::scope(|scopev| { + // emit trafic and gather stats + scopev.spawn(async { + info!("upgrade_and_gather_stats traffic thread start"); + let (emitter, emit_job_request, source_account) = { + let mut ctx_locker = emitter_ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); + let (emitter, emit_job_request) = match create_emitter_and_request( + ctx.swarm.clone(), + emit_job_request, + nodes, + rng, + ) + .await + { + Ok(parts) => parts, + Err(err) => { + stats_result = Err(err); + return; + }, + }; + let source_account = ctx.swarm.read().await.chain_info().root_account; + (emitter, emit_job_request, source_account) + // release lock on network context + }; + let upgrade_traffic_chunk_duration = Duration::from_secs(15); + info!("upgrade_and_gather_stats traffic thread 1"); + stats_result = stat_gather_task( + emitter, + emit_job_request, + source_account, + upgrade_traffic_chunk_duration, + upgrade_done.clone(), + ) + .await; + info!("upgrade_and_gather_stats traffic thread done"); + }); + // do upgrade + scopev.spawn(async { + info!("upgrade_and_gather_stats upgrade thread start"); + upgrade_result = batch_update_gradually( + ctxa, + validators_to_update, + version, + wait_until_healthy, + delay, + max_wait, + ) + .await; + info!("upgrade_and_gather_stats upgrade thread 1"); + upgrade_done.store(true, Ordering::Relaxed); + info!("upgrade_and_gather_stats upgrade thread done"); + }); + }); + + upgrade_result?; + stats_result +} + +#[async_trait] impl NetworkTest for SimpleValidatorUpgrade { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let runtime = Runtime::new()?; + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { + let upgrade_wait_for_healthy = true; + let upgrade_node_delay = Duration::from_secs(10); + let upgrade_max_wait = Duration::from_secs(40); let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctx.swarm().versions().collect::>(); + let mut versions = ctxa + .ctx + .lock() + .await + .swarm + .read() + .await + .versions() + .collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -42,14 +167,29 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version, new_version ); info!("{}", msg); - ctx.report.report_text(msg); + ctxa.report_text(msg).await; // Split the swarm into 2 parts - if ctx.swarm().validators().count() < 4 { + if ctxa + .ctx + .lock() + .await + .swarm + .read() + .await + .validators() + .count() + < 4 + { bail!("compat test requires >= 4 validators"); } - let all_validators = ctx - .swarm() + let all_validators = ctxa + .ctx + .lock() + .await + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); @@ -63,12 +203,16 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version ); info!("{}", msg); - ctx.report.report_text(msg); + ctxa.report_text(msg).await; // Generate some traffic - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; - ctx.report - .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat); + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let txn_stat_prior = generate_traffic(ctx, &all_validators, duration).await?; + ctx.report + .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); + } // Update the first Validator let msg = format!( @@ -76,55 +220,114 @@ impl NetworkTest for SimpleValidatorUpgrade { new_version ); info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &[first_node], &new_version))?; + ctxa.report_text(msg).await; + let upgrade_stats = upgrade_and_gather_stats( + ctxa.clone(), + &[first_node], + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + &[first_node], + )?; + let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a, b| &a + &b); + if let Some(upgrade_stats_sum) = upgrade_stats_sum { + ctxa.ctx.lock().await.report.report_txn_stats( + format!("{}::single-validator-upgrading", self.name()), + &upgrade_stats_sum, + ); + } // Generate some traffic - let txn_stat = generate_traffic(ctx, &[first_node], duration)?; - ctx.report.report_txn_stats( - format!("{}::single-validator-upgrade", self.name()), - &txn_stat, - ); + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let txn_stat_one = generate_traffic(ctx, &[first_node], duration).await?; + ctx.report.report_txn_stats( + format!("{}::single-validator-upgrade", self.name()), + &txn_stat_one, + ); - // Update the rest of the first batch - let msg = format!( - "3. Upgrading rest of first batch to new version: {}", - new_version - ); - info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &first_batch, &new_version))?; + // Update the rest of the first batch + let msg = format!( + "3. Upgrading rest of first batch to new version: {}", + new_version + ); + info!("{}", msg); + ctx.report.report_text(msg); + } - // Generate some traffic - let txn_stat = generate_traffic(ctx, &first_batch, duration)?; - ctx.report.report_txn_stats( - format!("{}::half-validator-upgrade", self.name()), - &txn_stat, - ); + // upgrade the rest of the first half + let upgrade2_stats = upgrade_and_gather_stats( + ctxa.clone(), + &first_batch, + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + &first_batch, + )?; + let upgrade2_stats_sum = upgrade2_stats.into_iter().reduce(|a, b| &a + &b); + if let Some(upgrade2_stats_sum) = upgrade2_stats_sum { + ctxa.ctx.lock().await.report.report_txn_stats( + format!("{}::half-validator-upgrading", self.name()), + &upgrade2_stats_sum, + ); + } + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); - ctx.swarm().fork_check(epoch_duration)?; + // Generate some traffic + let txn_stat_half = generate_traffic(ctx, &first_batch, duration).await?; + ctx.report.report_txn_stats( + format!("{}::half-validator-upgrade", self.name()), + &txn_stat_half, + ); - // Update the second batch - let msg = format!("4. upgrading second batch to new version: {}", new_version); - info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &second_batch, &new_version))?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; - // Generate some traffic - let txn_stat = generate_traffic(ctx, &second_batch, duration)?; - ctx.report.report_txn_stats( - format!("{}::rest-validator-upgrade", self.name()), - &txn_stat, - ); + // Update the second batch + let msg = format!("4. upgrading second batch to new version: {}", new_version); + info!("{}", msg); + ctx.report.report_text(msg); + } + let upgrade3_stats = upgrade_and_gather_stats( + ctxa.clone(), + &second_batch, + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + &second_batch, + )?; + let upgrade3_stats_sum = upgrade3_stats.into_iter().reduce(|a, b| &a + &b); + if let Some(upgrade3_stats_sum) = upgrade3_stats_sum { + ctxa.ctx.lock().await.report.report_txn_stats( + format!("{}::rest-validator-upgrading", self.name()), + &upgrade3_stats_sum, + ); + } + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); - let msg = "5. check swarm health".to_string(); - info!("{}", msg); - ctx.report.report_text(msg); - ctx.swarm().fork_check(epoch_duration)?; - ctx.report.report_text(format!( - "Compatibility test for {} ==> {} passed", - old_version, new_version - )); + // Generate some traffic + let txn_stat_all = generate_traffic(ctx, &second_batch, duration).await?; + ctx.report.report_txn_stats( + format!("{}::rest-validator-upgrade", self.name()), + &txn_stat_all, + ); + + let msg = "5. check swarm health".to_string(); + info!("{}", msg); + ctx.report.report_text(msg); + ctx.swarm.read().await.fork_check(epoch_duration).await?; + ctx.report.report_text(format!( + "Compatibility test for {} ==> {} passed", + old_version, new_version + )); + } Ok(()) } diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index 99a8d6f18ed41..909e98e7c3826 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -7,12 +7,13 @@ use aptos_forge::{ test_utils::consensus_utils::{ test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, }, - NetworkContext, NetworkTest, Result, Swarm, SwarmExt, Test, TestReport, + NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, SwarmExt, Test, + TestReport, }; use aptos_logger::{info, warn}; +use async_trait::async_trait; use rand::Rng; -use std::{collections::HashSet, time::Duration}; -use tokio::runtime::Runtime; +use std::{collections::HashSet, sync::Arc, time::Duration}; pub struct ChangingWorkingQuorumTest { pub min_tps: usize, @@ -32,34 +33,37 @@ impl Test for ChangingWorkingQuorumTest { } } +#[async_trait] impl NetworkLoadTest for ChangingWorkingQuorumTest { - fn setup(&self, ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // because we are doing failure testing, we should be sending // traffic to nodes that are alive. - if ctx.swarm().full_nodes().count() > 0 { + let full_nodes_count = { ctx.swarm.read().await.full_nodes().count() }; + if full_nodes_count > 0 { Ok(LoadDestination::AllFullnodes) } else if self.always_healthy_nodes > 0 { - Ok(LoadDestination::Peers( - ctx.swarm() + let validator_peer_ids = { + ctx.swarm + .read() + .await .validators() .take(self.always_healthy_nodes) .map(|v| v.peer_id()) - .collect(), - )) + .collect() + }; + Ok(LoadDestination::Peers(validator_peer_ids)) } else { Ok(LoadDestination::AllValidators) } } - fn test( + async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { - let runtime = Runtime::new().unwrap(); - - let validators = swarm.get_validator_clients_with_names(); + let validators = { swarm.read().await.get_validator_clients_with_names() }; let num_validators = validators.len(); @@ -74,12 +78,22 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { ); // On every cycle, we will fail this many next nodes, and make this many previous nodes healthy again. let cycle_offset = max_fail_in_test / 4 + 1; - let num_destinations = if swarm.full_nodes().count() > 0 { - swarm.full_nodes().count() - } else if num_always_healthy > 0 { - num_always_healthy - } else { - swarm.validators().count() + let num_destinations = { + let swarm = swarm.read().await; + if swarm.full_nodes().count() > 0 { + swarm.full_nodes().count() + } else if num_always_healthy > 0 { + num_always_healthy + } else { + swarm.validators().count() + } + }; + let (validator_clients, public_info) = { + let swarm = swarm.read().await; + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) }; // Function that returns set of down nodes in a given cycle. let down_indices_f = move |cycle: usize| -> HashSet { @@ -107,34 +121,33 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { num_always_healthy, max_fail_in_test, num_validators, cycle_offset, self.num_large_validators); let slow_allowed_lagging = if self.add_execution_delay { - runtime.block_on(async { - let mut rng = rand::thread_rng(); - let mut slow_allowed_lagging = HashSet::new(); - for (index, (name, validator)) in - validators.iter().enumerate().skip(num_always_healthy) - { - let sleep_time = rng.gen_range(20, 500); - if sleep_time > 100 { - slow_allowed_lagging.insert(index); - } - let name = name.clone(); + let mut slow_allowed_lagging = HashSet::new(); + for (index, (name, validator)) in validators.iter().enumerate().skip(num_always_healthy) + { + let sleep_time = { + let mut rng = rand::thread_rng(); + rng.gen_range(20, 500) + }; + if sleep_time > 100 { + slow_allowed_lagging.insert(index); + } + let name = name.clone(); - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - format!("sleep({})", sleep_time), + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + format!("sleep({})", sleep_time), + ) + .await + .map_err(|e| { + anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::, anyhow::Error>(slow_allowed_lagging) - })? + })?; + } + slow_allowed_lagging } else { HashSet::new() }; @@ -142,32 +155,38 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { let min_tps = self.min_tps; let check_period_s = self.check_period_s; - runtime.block_on(test_consensus_fault_tolerance( - swarm, - duration.as_secs() as usize / self.check_period_s, - self.check_period_s as f32, - 1, - Box::new(FailPointFailureInjection::new(Box::new(move |cycle, part| { + let failure_injection = Box::new(FailPointFailureInjection::new(Box::new( + move |cycle, part| { if part == 0 { let down_indices = down_indices_f(cycle); info!("For cycle {} down nodes: {:?}", cycle, down_indices); // For all down nodes, we are going to drop all messages we receive. ( - down_indices.iter().flat_map(|i| { - [ - ( + down_indices + .iter() + .flat_map(|i| { + [( *i, "consensus::process::any".to_string(), "return".to_string(), - ), - ] - }).collect(), + )] + }) + .collect(), true, ) } else { (vec![], false) } - }))), + }, + ))); + + test_consensus_fault_tolerance( + validator_clients, + public_info, + duration.as_secs() as usize / self.check_period_s, + self.check_period_s as f32, + 1, + failure_injection, Box::new(move |cycle, _, _, _, cycle_end, cycle_start| { // we group nodes into 3 groups: // - active - nodes we expect to be making progress, and doing so together. we check wery strict rule of min(cycle_end) vs max(cycle_start) @@ -266,37 +285,35 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { }), false, true, - )).context("test_consensus_fault_tolerance failed")?; + ).await.context("test_consensus_fault_tolerance failed")?; // undo slowing down. if self.add_execution_delay { - runtime.block_on(async { - for (name, validator) in validators.iter().skip(num_always_healthy) { - let name = name.clone(); + for (name, validator) in validators.iter().skip(num_always_healthy) { + let name = name.clone(); - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - "off".to_string(), + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + "off".to_string(), + ) + .await + .map_err(|e| { + anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::<(), anyhow::Error>(()) - })?; + })?; + } } Ok(()) } } +#[async_trait] impl NetworkTest for ChangingWorkingQuorumTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 09af1aee62ba4..d3b2e0f73e11c 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -4,7 +4,7 @@ use crate::{generate_onchain_config_blob, NetworkLoadTest}; use anyhow::Ok; use aptos::test::CliTestFramework; -use aptos_forge::{NetworkTest, NodeExt, SwarmExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, SwarmExt, Test}; use aptos_logger::info; use aptos_sdk::bcs; use aptos_types::{ @@ -13,8 +13,8 @@ use aptos_types::{ ConsensusAlgorithmConfig, DagConsensusConfigV1, OnChainConsensusConfig, ValidatorTxnConfig, }, }; -use std::time::Duration; -use tokio::runtime::Runtime; +use async_trait::async_trait; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -26,182 +26,194 @@ impl Test for DagOnChainEnableTest { } } +#[async_trait] impl NetworkLoadTest for DagOnChainEnableTest { - fn test( + async fn test( &self, - swarm: &mut dyn aptos_forge::Swarm, + swarm: Arc>>, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); - let rest_client = swarm.validators().next().unwrap().rest_client(); - - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; + + tokio::time::sleep(duration / 3).await; + + let root_cli_index = { + let root_account = swarm.read().await.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .await - }); - - std::thread::sleep(duration / 3); - - runtime.block_on(async { - - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), - ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change to V2 - let new_consensus_config = OnChainConsensusConfig::V3 { - alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), - vtxn: ValidatorTxnConfig::default_disabled(), - }; - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + }; + + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", + ) + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change to V2 + let new_consensus_config = OnChainConsensusConfig::V3 { + alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), + vtxn: ValidatorTxnConfig::default_disabled(), + }; + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; - - std::thread::sleep(duration / 3); - - let initial_consensus_config = runtime.block_on(async { - - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; + + tokio::time::sleep(duration / 3).await; + + let root_cli_index = { + let root_account = swarm.read().await.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change to DAG - let new_consensus_config = OnChainConsensusConfig::V3 { - alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), - vtxn: ValidatorTxnConfig::default_disabled(), - }; - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + }; + + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", + ) + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change to DAG + let new_consensus_config = OnChainConsensusConfig::V3 { + alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), + vtxn: ValidatorTxnConfig::default_disabled(), + }; + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); - - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await?; - - Ok(current_consensus_config) - })?; - - std::thread::sleep(duration / 3); - - runtime.block_on(async { - - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); + + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; + + let initial_consensus_config = current_consensus_config; + + tokio::time::sleep(duration / 3).await; + + let root_cli_index = { + let root_account = swarm.read().await.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change back to initial - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + }; + + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", + ) + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change back to initial + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&initial_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&initial_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for DagOnChainEnableTest { - fn run(&self, ctx: &mut aptos_forge::NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 6a69224bb1bc4..aba938f235c13 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -4,15 +4,15 @@ use crate::generate_traffic; use anyhow::Context; use aptos_config::config::OverrideNodeConfig; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; use aptos_logger::info; +use async_trait::async_trait; use rand::{ rngs::{OsRng, StdRng}, seq::IteratorRandom, Rng, SeedableRng, }; -use std::{thread, time::Duration}; -use tokio::runtime::Runtime; +use std::{ops::DerefMut, thread, time::Duration}; const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; @@ -24,58 +24,65 @@ impl Test for ForgeSetupTest { } } +#[async_trait] impl NetworkTest for ForgeSetupTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut rng = StdRng::from_seed(OsRng.gen()); - let runtime = Runtime::new().unwrap(); + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); - let swarm = ctx.swarm(); + // TODO: decrease lock shadow on swarm for this test + { + let swarm = ctx.swarm.read().await; - let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); - let fullnode_id = all_fullnodes.iter().choose(&mut rng).unwrap(); + let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let fullnode_id = all_fullnodes.iter().choose(&mut rng).unwrap(); - info!("Pick one fullnode to stop and wipe"); - let fullnode = swarm.full_node_mut(*fullnode_id).unwrap(); - runtime.block_on(fullnode.clear_storage())?; - runtime.block_on(fullnode.start())?; + info!("Pick one fullnode to stop and wipe"); + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.clear_storage().await?; + fullnode.start().await?; - let fullnode = swarm.full_node(*fullnode_id).unwrap(); - let fullnode_name = fullnode.name(); + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + let fullnode_name = fullnode.name(); - for _ in 0..10 { - let query = format!( - "{}{{instance=\"{}\",type=\"synced\"}}", - STATE_SYNC_VERSION_COUNTER_NAME, &fullnode_name - ); - info!("PromQL Query {}", query); - let r = runtime.block_on(swarm.query_metrics(&query, None, None))?; - let ivs = r.as_instant().unwrap(); - for iv in ivs { - info!( - "{}: {}", - STATE_SYNC_VERSION_COUNTER_NAME, - iv.sample().value() + for _ in 0..10 { + let query = format!( + "{}{{instance=\"{}\",type=\"synced\"}}", + STATE_SYNC_VERSION_COUNTER_NAME, &fullnode_name ); + info!("PromQL Query {}", query); + let r = swarm.query_metrics(&query, None, None).await?; + let ivs = r.as_instant().unwrap(); + for iv in ivs { + info!( + "{}: {}", + STATE_SYNC_VERSION_COUNTER_NAME, + iv.sample().value() + ); + } + thread::sleep(std::time::Duration::from_secs(5)); } - thread::sleep(std::time::Duration::from_secs(5)); } // add some PFNs and send load to them let mut pfns = Vec::new(); let num_pfns = 5; - for _ in 0..num_pfns { - let pfn_version = swarm.versions().max().unwrap(); - let pfn_node_config = - OverrideNodeConfig::new_with_default_base(swarm.get_default_pfn_node_config()); - let pfn_peer_id = - runtime.block_on(swarm.add_full_node(&pfn_version, pfn_node_config))?; + { + let mut swarm = ctx.swarm.write().await; + for _ in 0..num_pfns { + let pfn_version = swarm.versions().max().unwrap(); + let pfn_node_config = + OverrideNodeConfig::new_with_default_base(swarm.get_default_pfn_node_config()); + let pfn_peer_id = swarm.add_full_node(&pfn_version, pfn_node_config).await?; - let _pfn = swarm.full_node(pfn_peer_id).context("pfn not found")?; - pfns.push(pfn_peer_id); + let _pfn = swarm.full_node(pfn_peer_id).context("pfn not found")?; + pfns.push(pfn_peer_id); + } } let duration = Duration::from_secs(10 * num_pfns); - let txn_stat = generate_traffic(ctx, &pfns, duration)?; + let txn_stat = generate_traffic(ctx, &pfns, duration).await?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index cd49107a815d6..974ab91894eb4 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -4,14 +4,17 @@ use crate::{batch_update, generate_traffic}; use anyhow::bail; use aptos_forge::{ - NetworkContext, NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, FORGE_KEY_SEED, + NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, + FORGE_KEY_SEED, }; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_temppath::TempPath; use aptos_types::transaction::authenticator::AuthenticationKey; -use tokio::{runtime::Runtime, time::Duration}; +use async_trait::async_trait; +use std::ops::DerefMut; +use tokio::time::Duration; pub struct FrameworkUpgrade; @@ -25,15 +28,17 @@ impl Test for FrameworkUpgrade { } } +#[async_trait] impl NetworkTest for FrameworkUpgrade { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let runtime = Runtime::new()?; + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctx.swarm().versions().collect::>(); + let mut versions = ctx.swarm.read().await.versions().collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -42,11 +47,14 @@ impl NetworkTest for FrameworkUpgrade { (versions[0].clone(), versions[1].clone()) }; - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; let msg = format!( "Compatibility test results for {} ==> {} (PR)", @@ -60,17 +68,19 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, first_half, &new_version))?; + batch_update(ctx, first_half, &new_version).await?; // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration).await?; + } // Apply the framework release bundle. let root_key_path = TempPath::new(); @@ -98,54 +108,62 @@ impl NetworkTest for FrameworkUpgrade { AuthenticationKey::ed25519(&validator_key.public_key()).account_address(); let network_info = aptos_release_builder::validate::NetworkConfig { - endpoint: ctx.swarm().validators().last().unwrap().rest_api_endpoint(), + endpoint: ctx + .swarm + .read() + .await + .validators() + .last() + .unwrap() + .rest_api_endpoint(), root_key_path: root_key_path.path().to_path_buf(), validator_account, validator_key, framework_git_rev: None, }; - runtime.block_on(network_info.mint_to_validator())?; + network_info.mint_to_validator().await?; let release_config = aptos_release_builder::current_release_config(); - runtime.block_on(aptos_release_builder::validate::validate_config( - release_config.clone(), - network_info, - ))?; + aptos_release_builder::validate::validate_config(release_config.clone(), network_info) + .await?; // Update the sequence number for the root account - let root_account = ctx.swarm().chain_info().root_account().address(); + let root_account = { ctx.swarm.read().await.chain_info().root_account().address() }; // Test the module publishing workflow - let sequence_number = runtime - .block_on( - ctx.swarm() - .chain_info() - .rest_client() - .get_account(root_account), - ) - .unwrap() - .inner() - .sequence_number; - ctx.swarm() - .chain_info() - .root_account() - .set_sequence_number(sequence_number); + { + let chain_info = ctx.swarm.read().await.chain_info(); + let sequence_number = chain_info + .rest_client() + .get_account(root_account) + .await + .unwrap() + .inner() + .sequence_number; + chain_info + .root_account() + .set_sequence_number(sequence_number); + } // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration).await?; + } let msg = "5. check swarm health".to_string(); info!("{}", msg); ctx.report.report_text(msg); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration).await?; + } ctx.report.report_text(format!( "Compatibility test for {} ==> {} passed", old_version, new_version @@ -156,16 +174,18 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the remaining nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, second_half, &new_version))?; + batch_update(ctx, second_half, &new_version).await?; let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration).await?; + } Ok(()) } diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index a2d8702e402a8..8fdd177b5af9b 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -2,10 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, Result, Swarm, Test, TestReport}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, +}; +use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; -use std::time::Duration; -use tokio::{runtime::Runtime, time::Instant}; +use std::{sync::Arc, time::Duration}; +use tokio::time::Instant; pub struct FullNodeRebootStressTest; @@ -15,39 +18,51 @@ impl Test for FullNodeRebootStressTest { } } +#[async_trait] impl NetworkLoadTest for FullNodeRebootStressTest { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::AllFullnodes) } - fn test( + async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let runtime = Runtime::new().unwrap(); - let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); - - let mut rng = thread_rng(); + let all_fullnodes = { + swarm + .read() + .await + .full_nodes() + .map(|v| v.peer_id()) + .collect::>() + }; while start.elapsed() < duration { - let fullnode_to_reboot = swarm - .full_node_mut(*all_fullnodes.choose(&mut rng).unwrap()) - .unwrap(); - runtime.block_on(async { fullnode_to_reboot.stop().await })?; - runtime.block_on(async { fullnode_to_reboot.start().await })?; - std::thread::sleep(Duration::from_secs(10)); + { + let swarm = swarm.read().await; + let fullnode_to_reboot = { + let mut rng = thread_rng(); + swarm + .full_node(*all_fullnodes.choose(&mut rng).unwrap()) + .unwrap() + }; + fullnode_to_reboot.stop().await?; + fullnode_to_reboot.start().await?; + } + tokio::time::sleep(Duration::from_secs(10)).await; } Ok(()) } } +#[async_trait] impl NetworkTest for FullNodeRebootStressTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index a620211693382..15ec3b1ffbf2a 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -29,19 +29,22 @@ pub mod validator_reboot_stress_test; use anyhow::Context; use aptos_forge::{ prometheus_metrics::{fetch_latency_breakdown, LatencyBreakdown}, - EmitJobRequest, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, - TestReport, TxnEmitter, TxnStats, Version, + EmitJobRequest, NetworkContext, NetworkContextSynchronizer, NetworkTest, NodeExt, Result, + Swarm, SwarmExt, Test, TestReport, TxnEmitter, TxnStats, Version, }; use aptos_logger::info; use aptos_rest_client::Client as RestClient; use aptos_sdk::{transaction_builder::TransactionFactory, types::PeerId}; +use async_trait::async_trait; use futures::future::join_all; use rand::{rngs::StdRng, SeedableRng}; use std::{ fmt::Write, + ops::DerefMut, + sync::Arc, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use tokio::runtime::Runtime; +use tokio::runtime::{Handle, Runtime}; const WARMUP_DURATION_FRACTION: f32 = 0.07; const COOLDOWN_DURATION_FRACTION: f32 = 0.04; @@ -52,14 +55,20 @@ async fn batch_update( version: &Version, ) -> Result<()> { for validator in validators_to_update { - ctx.swarm().upgrade_validator(*validator, version).await?; + ctx.swarm + .write() + .await + .upgrade_validator(*validator, version) + .await?; } - ctx.swarm().health_check().await?; + ctx.swarm.read().await.health_check().await?; let deadline = Instant::now() + Duration::from_secs(60); for validator in validators_to_update { - ctx.swarm() - .validator_mut(*validator) + ctx.swarm + .read() + .await + .validator(*validator) .unwrap() .wait_until_healthy(deadline) .await?; @@ -68,8 +77,62 @@ async fn batch_update( Ok(()) } -pub fn create_emitter_and_request( - swarm: &mut dyn Swarm, +async fn batch_update_gradually( + ctxa: NetworkContextSynchronizer<'_>, + validators_to_update: &[PeerId], + version: &Version, + wait_until_healthy: bool, + delay: Duration, + max_wait: Duration, +) -> Result<()> { + for validator in validators_to_update { + info!("batch_update_gradually upgrade start: {}", validator); + { + ctxa.ctx + .lock() + .await + .swarm + .write() + .await + .upgrade_validator(*validator, version) + .await?; + } + if wait_until_healthy { + info!("batch_update_gradually upgrade waiting: {}", validator); + let deadline = Instant::now() + max_wait; + ctxa.ctx + .lock() + .await + .swarm + .read() + .await + .validator(*validator) + .unwrap() + .wait_until_healthy(deadline) + .await?; + info!("batch_update_gradually upgrade healthy: {}", validator); + } + if !delay.is_zero() { + info!("batch_update_gradually upgrade delay: {:?}", delay); + tokio::time::sleep(delay).await; + } + info!("batch_update_gradually upgrade done: {}", validator); + } + + ctxa.ctx + .lock() + .await + .swarm + .read() + .await + .health_check() + .await?; + + Ok(()) +} + +pub async fn create_emitter_and_request( + swarm: Arc>>, mut emit_job_request: EmitJobRequest, nodes: &[PeerId], rng: StdRng, @@ -77,12 +140,16 @@ pub fn create_emitter_and_request( // as we are loading nodes, use higher client timeout let client_timeout = Duration::from_secs(30); - let chain_info = swarm.chain_info(); + let chain_info = swarm.read().await.chain_info(); let transaction_factory = TransactionFactory::new(chain_info.chain_id); let emitter = TxnEmitter::new(transaction_factory, rng); - emit_job_request = - emit_job_request.rest_clients(swarm.get_clients_for_peers(nodes, client_timeout)); + emit_job_request = emit_job_request.rest_clients( + swarm + .read() + .await + .get_clients_for_peers(nodes, client_timeout), + ); Ok((emitter, emit_job_request)) } @@ -91,7 +158,7 @@ pub fn traffic_emitter_runtime() -> Result { Ok(runtime) } -pub fn generate_traffic( +pub async fn generate_traffic( ctx: &mut NetworkContext<'_>, nodes: &[PeerId], duration: Duration, @@ -99,14 +166,15 @@ pub fn generate_traffic( let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let (emitter, emit_job_request) = - create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; + create_emitter_and_request(ctx.swarm.clone(), emit_job_request, nodes, rng).await?; - let rt = traffic_emitter_runtime()?; - let stats = rt.block_on(emitter.emit_txn_for( - ctx.swarm().chain_info().root_account, - emit_job_request, - duration, - ))?; + let stats = emitter + .emit_txn_for( + ctx.swarm.read().await.chain_info().root_account, + emit_job_request, + duration, + ) + .await?; Ok(stats) } @@ -121,7 +189,11 @@ pub enum LoadDestination { } impl LoadDestination { - fn get_destination_nodes(self, swarm: &mut dyn Swarm) -> Vec { + async fn get_destination_nodes( + self, + swarm: Arc>>, + ) -> Vec { + let swarm = swarm.read().await; let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); @@ -141,50 +213,59 @@ impl LoadDestination { } } +#[async_trait] pub trait NetworkLoadTest: Test { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::FullnodesOtherwiseValidators) } // Load is started before this function is called, and stops after this function returns. // Expected duration is passed into this function, expecting this function to take that much // time to finish. How long this function takes will dictate how long the actual test lasts. - fn test( + async fn test( &self, - _swarm: &mut dyn Swarm, + _swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { - std::thread::sleep(duration); + tokio::time::sleep(duration).await; Ok(()) } - fn finish(&self, _ctx: &mut NetworkContext) -> Result<()> { + async fn finish<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result<()> { Ok(()) } } +#[async_trait] impl NetworkTest for dyn NetworkLoadTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let runtime = Runtime::new().unwrap(); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let start_timestamp = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("Time went backwards") .as_secs(); - let (start_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + let (start_version, _) = ctx + .swarm + .read() + .await + .get_client_with_newest_ledger_version() + .await .context("no clients replied for start version")?; let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let duration = ctx.global_duration; - let stats_by_phase = self.network_load_test( - ctx, - emit_job_request, - duration, - WARMUP_DURATION_FRACTION, - COOLDOWN_DURATION_FRACTION, - rng, - )?; + let stats_by_phase = self + .network_load_test( + ctx, + emit_job_request, + duration, + WARMUP_DURATION_FRACTION, + COOLDOWN_DURATION_FRACTION, + rng, + ) + .await?; let phased = stats_by_phase.len() > 1; for (phase, phase_stats) in stats_by_phase.iter().enumerate() { @@ -219,11 +300,15 @@ impl NetworkTest for dyn NetworkLoadTest { .duration_since(UNIX_EPOCH) .expect("Time went backwards") .as_secs(); - let (end_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + let (end_version, _) = ctx + .swarm + .read() + .await + .get_client_with_newest_ledger_version() + .await .context("no clients replied for end version")?; - self.finish(ctx).context("finish NetworkLoadTest ")?; + self.finish(ctx).await.context("finish NetworkLoadTest ")?; for phase_stats in stats_by_phase.into_iter() { ctx.check_for_success( @@ -235,6 +320,7 @@ impl NetworkTest for dyn NetworkLoadTest { start_version, end_version, ) + .await .context("check for success")?; } @@ -242,28 +328,34 @@ impl NetworkTest for dyn NetworkLoadTest { } } -impl dyn NetworkLoadTest { - pub fn network_load_test( +impl dyn NetworkLoadTest + '_ { + pub async fn network_load_test<'a>( &self, - ctx: &mut NetworkContext, + ctx: &mut NetworkContext<'a>, emit_job_request: EmitJobRequest, duration: Duration, warmup_duration_fraction: f32, cooldown_duration_fraction: f32, rng: StdRng, ) -> Result> { - let destination = self.setup(ctx).context("setup NetworkLoadTest")?; - let nodes_to_send_load_to = destination.get_destination_nodes(ctx.swarm()); + let destination = self.setup(ctx).await.context("setup NetworkLoadTest")?; + let nodes_to_send_load_to = destination.get_destination_nodes(ctx.swarm.clone()).await; // Generate some traffic - let (mut emitter, emit_job_request) = - create_emitter_and_request(ctx.swarm(), emit_job_request, &nodes_to_send_load_to, rng) - .context("create emitter")?; + let (mut emitter, emit_job_request) = create_emitter_and_request( + ctx.swarm.clone(), + emit_job_request, + &nodes_to_send_load_to, + rng, + ) + .await + .context("create emitter")?; - let rt = traffic_emitter_runtime()?; let clients = ctx - .swarm() + .swarm + .read() + .await .get_clients_for_peers(&nodes_to_send_load_to, Duration::from_secs(10)); let mut stats_tracking_phases = emit_job_request.get_num_phases(); @@ -273,12 +365,13 @@ impl dyn NetworkLoadTest { } info!("Starting emitting txns for {}s", duration.as_secs()); - let mut job = rt - .block_on(emitter.start_job( - ctx.swarm().chain_info().root_account, + let mut job = emitter + .start_job( + ctx.swarm.read().await.chain_info().root_account, emit_job_request, stats_tracking_phases, - )) + ) + .await .context("start emitter job")?; let total_start = PhaseTimingStart::now(); @@ -288,14 +381,14 @@ impl dyn NetworkLoadTest { let test_duration = duration - warmup_duration - cooldown_duration; let phase_duration = test_duration.div_f32((stats_tracking_phases - 2) as f32); - job = rt.block_on(job.periodic_stat_forward(warmup_duration, 60)); + job = job.periodic_stat_forward(warmup_duration, 60).await; info!("{}s warmup finished", warmup_duration.as_secs()); let mut phase_timing = Vec::new(); let mut phase_start_network_state = Vec::new(); let test_start = Instant::now(); for i in 0..stats_tracking_phases - 2 { - phase_start_network_state.push(rt.block_on(NetworkState::new(&clients))); + phase_start_network_state.push(NetworkState::new(&clients).await); job.start_next_phase(); if i > 0 { @@ -307,10 +400,11 @@ impl dyn NetworkLoadTest { } let phase_start = PhaseTimingStart::now(); - let join_stats = rt.spawn(job.periodic_stat_forward(phase_duration, 60)); - self.test(ctx.swarm, ctx.report, phase_duration) + let join_stats = Handle::current().spawn(job.periodic_stat_forward(phase_duration, 60)); + self.test(ctx.swarm.clone(), ctx.report, phase_duration) + .await .context("test NetworkLoadTest")?; - job = rt.block_on(join_stats).context("join stats")?; + job = join_stats.await.context("join stats")?; phase_timing.push(phase_start.elapsed()); } let actual_test_duration = test_start.elapsed(); @@ -320,13 +414,15 @@ impl dyn NetworkLoadTest { actual_test_duration.as_secs() ); - phase_start_network_state.push(rt.block_on(NetworkState::new(&clients))); + phase_start_network_state.push(NetworkState::new(&clients).await); job.start_next_phase(); let cooldown_start = Instant::now(); let cooldown_used = cooldown_start.elapsed(); if cooldown_used < cooldown_duration { - job = rt.block_on(job.periodic_stat_forward(cooldown_duration - cooldown_used, 60)); + job = job + .periodic_stat_forward(cooldown_duration - cooldown_used, 60) + .await; } info!("{}s cooldown finished", cooldown_duration.as_secs()); @@ -337,7 +433,7 @@ impl dyn NetworkLoadTest { total_timing.start_unixtime_s, total_timing.end_unixtime_s, ); - let stats_by_phase = rt.block_on(job.stop_job()); + let stats_by_phase = job.stop_job().await; info!("Stopped job"); info!("Warmup stats: {}", stats_by_phase[0].rate()); @@ -353,11 +449,12 @@ impl dyn NetworkLoadTest { } else { Some(cur.clone()) }; - let latency_breakdown = rt.block_on(fetch_latency_breakdown( - ctx.swarm(), + let latency_breakdown = fetch_latency_breakdown( + ctx.swarm.clone(), phase_timing[i].start_unixtime_s, phase_timing[i].end_unixtime_s, - ))?; + ) + .await?; info!( "Latency breakdown details for phase {}: from {} to {}: {:?}", i, @@ -491,14 +588,23 @@ impl CompositeNetworkTest { } } +#[async_trait] impl NetworkTest for CompositeNetworkTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - for wrapper in &self.wrappers { - wrapper.setup(ctx)?; + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + for wrapper in &self.wrappers { + wrapper.setup(ctx).await?; + } } - self.test.run(ctx)?; - for wrapper in &self.wrappers { - wrapper.finish(ctx)?; + self.test.run(ctxa.clone()).await?; + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + for wrapper in &self.wrappers { + wrapper.finish(ctx).await?; + } } Ok(()) } diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 60d794375fcd3..64123c184453f 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -7,13 +7,13 @@ use aptos_forge::{ args::TransactionTypeArg, prometheus_metrics::{LatencyBreakdown, LatencyBreakdownSlice}, success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, - EmitJobMode, EmitJobRequest, NetworkContext, NetworkTest, Result, Test, TxnStats, - WorkflowProgress, + EmitJobMode, EmitJobRequest, NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, + Test, TxnStats, WorkflowProgress, }; use aptos_logger::info; +use async_trait::async_trait; use rand::SeedableRng; -use std::{fmt::Debug, time::Duration}; -use tokio::runtime::Runtime; +use std::{fmt::Debug, ops::DerefMut, time::Duration}; // add larger warmup, as when we are exceeding the max load, // it takes more time to fill mempool. @@ -181,7 +181,7 @@ impl Test for LoadVsPerfBenchmark { } impl LoadVsPerfBenchmark { - fn evaluate_single( + async fn evaluate_single( &self, ctx: &mut NetworkContext<'_>, workloads: &Workloads, @@ -190,14 +190,17 @@ impl LoadVsPerfBenchmark { ) -> Result> { let rng = SeedableRng::from_rng(ctx.core().rng())?; let emit_job_request = workloads.configure(index, ctx.emit_job.clone()); - let stats_by_phase = self.test.network_load_test( - ctx, - emit_job_request, - duration, - PER_TEST_WARMUP_DURATION_FRACTION, - PER_TEST_COOLDOWN_DURATION_FRACTION, - rng, - )?; + let stats_by_phase = self + .test + .network_load_test( + ctx, + emit_job_request, + duration, + PER_TEST_WARMUP_DURATION_FRACTION, + PER_TEST_COOLDOWN_DURATION_FRACTION, + rng, + ) + .await?; let mut result = vec![]; for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { @@ -214,8 +217,9 @@ impl LoadVsPerfBenchmark { } } +#[async_trait] impl NetworkTest for LoadVsPerfBenchmark { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { assert!( self.criteria.is_empty() || self.criteria.len() == self.workloads.len(), "Invalid config, {} criteria and {} workloads given", @@ -223,26 +227,30 @@ impl NetworkTest for LoadVsPerfBenchmark { self.workloads.len(), ); - let rt = Runtime::new().unwrap(); + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let mut continous_job = if let Some(continuous_traffic) = &self.continuous_traffic { - let nodes_to_send_load_to = - LoadDestination::FullnodesOtherwiseValidators.get_destination_nodes(ctx.swarm()); + let nodes_to_send_load_to = LoadDestination::FullnodesOtherwiseValidators + .get_destination_nodes(ctx.swarm.clone()) + .await; let rng = SeedableRng::from_rng(ctx.core().rng())?; let (mut emitter, emit_job_request) = create_emitter_and_request( - ctx.swarm(), + ctx.swarm.clone(), continuous_traffic.traffic.clone(), &nodes_to_send_load_to, rng, ) + .await .context("create emitter")?; - let job = rt - .block_on(emitter.start_job( - ctx.swarm().chain_info().root_account, + let job = emitter + .start_job( + ctx.swarm.read().await.chain_info().root_account, emit_job_request, 1 + 2 * self.workloads.len(), - )) + ) + .await .context("start emitter job")?; Some(job) } else { @@ -271,7 +279,8 @@ impl NetworkTest for LoadVsPerfBenchmark { phase_duration .checked_mul(self.workloads.num_phases(index) as u32) .unwrap(), - )?, + ) + .await?, ); if let Some(job) = continous_job.as_mut() { @@ -293,26 +302,29 @@ impl NetworkTest for LoadVsPerfBenchmark { ctx.report.report_text(line); } - let continuous_results = continous_job.map(|job| { - let stats_by_phase = rt.block_on(job.stop_job()); - - let mut result = vec![]; - for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { - if phase % 2 != 0 { - result.push(( - format!("continuous with traffic {}", phase / 2), - phase_stats, - )); + let continuous_results = match continous_job { + Some(job) => { + let stats_by_phase = job.stop_job().await; + + let mut result = vec![]; + for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { + if phase % 2 != 0 { + result.push(( + format!("continuous with traffic {}", phase / 2), + phase_stats, + )); + } } - } - let table = to_table_continuous("continuous traffic".to_string(), &result); - for line in table { - ctx.report.report_text(line); - } + let table = to_table_continuous("continuous traffic".to_string(), &result); + for line in table { + ctx.report.report_text(line); + } - result - }); + Some(result) + }, + None => None, + }; for (index, result) in results.iter().enumerate() { // always take last phase for success criteria diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index 2cdda468a1418..a881780b17d2f 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -3,72 +3,75 @@ use crate::{multi_region_network_test::chunk_peers, LoadDestination, NetworkLoadTest}; use aptos_forge::{ - GroupCpuStress, NetworkContext, NetworkTest, Swarm, SwarmChaos, SwarmCpuStress, SwarmExt, Test, + GroupCpuStress, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, + SwarmCpuStress, SwarmExt, Test, }; use aptos_logger::info; use aptos_types::PeerId; +use async_trait::async_trait; use rand::Rng; -use tokio::runtime::Runtime; +use std::sync::Arc; -fn add_execution_delay(swarm: &mut dyn Swarm, config: &ExecutionDelayConfig) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let validators = swarm.get_validator_clients_with_names(); +async fn add_execution_delay( + swarm: Arc>>, + config: &ExecutionDelayConfig, +) -> anyhow::Result<()> { + let validators = { swarm.read().await.get_validator_clients_with_names() }; - runtime.block_on(async { - let mut rng = rand::thread_rng(); - for (name, validator) in validators { - let sleep_percentage = if rng.gen_bool(config.inject_delay_node_fraction) { + for (name, validator) in validators { + let sleep_percentage = { + let mut rng = rand::thread_rng(); + if rng.gen_bool(config.inject_delay_node_fraction) { rng.gen_range(1_u32, config.inject_delay_max_transaction_percentage) } else { 0 - }; - info!( - "Validator {} adding {}% of transactions with {}ms execution delay", - name, sleep_percentage, config.inject_delay_per_transaction_ms - ); - validator - .set_failpoint( - "aptos_vm::execution::user_transaction".to_string(), - format!( - "{}%delay({})", - sleep_percentage, config.inject_delay_per_transaction_ms - ), + } + }; + info!( + "Validator {} adding {}% of transactions with {}ms execution delay", + name, sleep_percentage, config.inject_delay_per_transaction_ms + ); + validator + .set_failpoint( + "aptos_vm::execution::user_transaction".to_string(), + format!( + "{}%delay({})", + sleep_percentage, config.inject_delay_per_transaction_ms + ), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to add execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to add execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + })?; + } + Ok(()) } -fn remove_execution_delay(swarm: &mut dyn Swarm) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let validators = swarm.get_validator_clients_with_names(); - - runtime.block_on(async { - for (name, validator) in validators { - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - "off".to_string(), +async fn remove_execution_delay( + swarm: Arc>>, +) -> anyhow::Result<()> { + let validators = { swarm.read().await.get_validator_clients_with_names() }; + + for (name, validator) in validators { + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + "off".to_string(), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + })?; + } + Ok(()) } /// Config for adding variable processing overhead/delay into @@ -90,20 +93,22 @@ pub struct ExecutionDelayTest { pub add_execution_delay: ExecutionDelayConfig, } +#[async_trait] impl NetworkLoadTest for ExecutionDelayTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - add_execution_delay(ctx.swarm(), &self.add_execution_delay)?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + add_execution_delay(ctx.swarm.clone(), &self.add_execution_delay).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - remove_execution_delay(ctx.swarm()) + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + remove_execution_delay(ctx.swarm.clone()).await } } +#[async_trait] impl NetworkTest for ExecutionDelayTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } @@ -122,16 +127,15 @@ pub struct NetworkUnreliabilityTest { pub config: NetworkUnreliabilityConfig, } +#[async_trait] impl NetworkLoadTest for NetworkUnreliabilityTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - let swarm = ctx.swarm(); - let runtime = Runtime::new().unwrap(); - let validators = swarm.get_validator_clients_with_names(); + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + let validators = { ctx.swarm.read().await.get_validator_clients_with_names() }; - runtime.block_on(async { - let mut rng = rand::thread_rng(); - for (name, validator) in validators { - let drop_percentage = if rng.gen_bool(self.config.inject_unreliability_fraction) { + for (name, validator) in validators { + let drop_percentage = { + let mut rng = rand::thread_rng(); + if rng.gen_bool(self.config.inject_unreliability_fraction) { rng.gen_range( 1_u32, (self.config.inject_max_unreliability_percentage * 1000.0) as u32, @@ -139,56 +143,53 @@ impl NetworkLoadTest for NetworkUnreliabilityTest { / 1000.0 } else { 0.0 - }; - info!( - "Validator {} dropping {}% of messages", - name, drop_percentage - ); - validator - .set_failpoint( - "consensus::send::any".to_string(), - format!("{}%return", drop_percentage), + } + }; + info!( + "Validator {} dropping {}% of messages", + name, drop_percentage + ); + validator + .set_failpoint( + "consensus::send::any".to_string(), + format!("{}%return", drop_percentage), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to add unreliability on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to add unreliability on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::<(), anyhow::Error>(()) - })?; + })?; + } Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let validators = ctx.swarm().get_validator_clients_with_names(); - - runtime.block_on(async { - for (name, validator) in validators { - validator - .set_failpoint("consensus::send::any".to_string(), "off".to_string()) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to remove unreliability on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + let validators = { ctx.swarm.read().await.get_validator_clients_with_names() }; + + for (name, validator) in validators { + validator + .set_failpoint("consensus::send::any".to_string(), "off".to_string()) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to remove unreliability on {} failed, {:?}", + name, + e + ) + })?; + } + Ok(()) } } +#[async_trait] impl NetworkTest for NetworkUnreliabilityTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } @@ -226,8 +227,16 @@ impl CpuChaosTest { /// Creates a new SwarmCpuStress to be injected via chaos. Note: /// CPU chaos is only done for the validators in the swarm (and /// not the fullnodes). - fn create_cpu_chaos(&self, swarm: &mut dyn Swarm) -> SwarmCpuStress { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + async fn create_cpu_chaos( + &self, + swarm: Arc>>, + ) -> SwarmCpuStress { + let all_validators = swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>(); let cpu_chaos_config = self.cpu_chaos_config.clone(); create_swarm_cpu_stress(all_validators, Some(cpu_chaos_config)) } @@ -279,30 +288,34 @@ pub fn create_swarm_cpu_stress( SwarmCpuStress { group_cpu_stresses } } +#[async_trait] impl NetworkLoadTest for CpuChaosTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm.clone()).await; - ctx.runtime.block_on( - ctx.swarm - .inject_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)), - )?; + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) + .await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm.clone()).await; - ctx.runtime.block_on( - ctx.swarm - .remove_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)), - ) + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) + .await } } +#[async_trait] impl NetworkTest for CpuChaosTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 9e44a837e5d4e..840d37699152e 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -2,11 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupNetEm, NetworkContext, NetworkTest, Swarm, SwarmChaos, SwarmNetEm, Test}; +use aptos_forge::{ + GroupNetEm, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, + SwarmNetEm, Test, +}; use aptos_logger::info; use aptos_types::PeerId; +use async_trait::async_trait; use itertools::{self, EitherOrBoth, Itertools}; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; /// The link stats are obtained from https://github.com/doitintl/intercloud-throughput/blob/master/results_202202/results.csv /// The four regions were hand-picked from the dataset to simulate a multi-region setup @@ -251,9 +255,16 @@ impl MultiRegionNetworkEmulationTest { /// Creates a new SwarmNetEm to be injected via chaos. Note: network /// emulation is only done for the validators in the swarm (and not /// the fullnodes). - fn create_netem_chaos(&self, swarm: &mut dyn Swarm) -> SwarmNetEm { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - let all_vfns = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + async fn create_netem_chaos( + &self, + swarm: Arc>>, + ) -> SwarmNetEm { + let (all_validators, all_vfns) = { + let swarm = swarm.read().await; + let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + let all_vfns = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (all_validators, all_vfns) + }; let all_pairs: Vec<_> = all_validators .iter() @@ -309,26 +320,34 @@ pub fn create_multi_region_swarm_network_chaos( } } +#[async_trait] impl NetworkLoadTest for MultiRegionNetworkEmulationTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - let chaos = self.create_netem_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)))?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + let chaos = self.create_netem_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::NetEm(chaos)) + .await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - let chaos = self.create_netem_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)))?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + let chaos = self.create_netem_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::NetEm(chaos)) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for MultiRegionNetworkEmulationTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index ba70db54942c9..30c7bf4bba4ea 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -3,8 +3,10 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos_forge::{ - GroupNetworkBandwidth, NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkBandwidth, Test, + GroupNetworkBandwidth, NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, + SwarmNetworkBandwidth, Test, }; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkBandwidthTest; @@ -23,20 +25,21 @@ impl Test for NetworkBandwidthTest { } } +#[async_trait] impl NetworkLoadTest for NetworkBandwidthTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on( - ctx.swarm - .inject_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { - group_network_bandwidths: vec![GroupNetworkBandwidth { - name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), - rate: RATE_MBPS, - limit: LIMIT_BYTES, - buffer: BUFFER_BYTES, - }], - })), - )?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { + group_network_bandwidths: vec![GroupNetworkBandwidth { + name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), + rate: RATE_MBPS, + limit: LIMIT_BYTES, + buffer: BUFFER_BYTES, + }], + })) + .await?; let msg = format!( "Limited bandwidth to {}mbps with limit {} and buffer {} to namespace", @@ -48,25 +51,26 @@ impl NetworkLoadTest for NetworkBandwidthTest { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on( - ctx.swarm - .remove_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { - group_network_bandwidths: vec![GroupNetworkBandwidth { - name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), - rate: RATE_MBPS, - limit: LIMIT_BYTES, - buffer: BUFFER_BYTES, - }], - })), - )?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { + group_network_bandwidths: vec![GroupNetworkBandwidth { + name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), + rate: RATE_MBPS, + limit: LIMIT_BYTES, + buffer: BUFFER_BYTES, + }], + })) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for NetworkBandwidthTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index 7fd83aa344994..925d04dfd82f2 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test, +}; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkLossTest; @@ -17,13 +20,17 @@ impl Test for NetworkLossTest { } } +#[async_trait] impl NetworkLoadTest for NetworkLossTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::Loss(SwarmNetworkLoss { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, - })))?; + })) + .await?; let msg = format!( "Injected {}% loss with {}% correlation loss to namespace", @@ -34,18 +41,22 @@ impl NetworkLoadTest for NetworkLossTest { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::Loss(SwarmNetworkLoss { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, - })))?; + })) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for NetworkLossTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index 8846b73a94ed6..48f192f91c2be 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -2,7 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkPartition, Test}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkPartition, + Test, +}; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkPartitionTest; @@ -16,15 +20,16 @@ impl Test for NetworkPartitionTest { } } +#[async_trait] impl NetworkLoadTest for NetworkPartitionTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on( - ctx.swarm - .inject_chaos(SwarmChaos::Partition(SwarmNetworkPartition { - partition_percentage: PARTITION_PERCENTAGE, - })), - )?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::Partition(SwarmNetworkPartition { + partition_percentage: PARTITION_PERCENTAGE, + })) + .await?; let msg = format!( "Partitioned {}% validators in namespace", @@ -35,26 +40,29 @@ impl NetworkLoadTest for NetworkPartitionTest { // Just send the load to last validator which is not included in the partition Ok(LoadDestination::Peers(vec![ctx .swarm + .read() + .await .validators() .last() .map(|v| v.peer_id()) .unwrap()])) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on( - ctx.swarm - .remove_chaos(SwarmChaos::Partition(SwarmNetworkPartition { - partition_percentage: PARTITION_PERCENTAGE, - })), - )?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::Partition(SwarmNetworkPartition { + partition_percentage: PARTITION_PERCENTAGE, + })) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for NetworkPartitionTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index 2d6c126907cc4..84cb944df3882 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -3,8 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 use crate::generate_traffic; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; -use std::thread; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; +use std::{ops::DerefMut, thread}; use tokio::{runtime::Runtime, time::Duration}; pub struct PartialNodesDown; @@ -15,30 +16,37 @@ impl Test for PartialNodesDown { } } +#[async_trait] impl NetworkTest for PartialNodesDown { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; let duration = Duration::from_secs(120); let all_validators = ctx - .swarm() + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); let mut down_nodes = all_validators.clone(); let up_nodes = down_nodes.split_off(all_validators.len() / 10); for n in &down_nodes { - let node = ctx.swarm().validator_mut(*n).unwrap(); + let swarm = ctx.swarm.read().await; + let node = swarm.validator(*n).unwrap(); println!("Node {} is going to stop", node.name()); runtime.block_on(node.stop())?; } thread::sleep(Duration::from_secs(5)); // Generate some traffic - let txn_stat = generate_traffic(ctx, &up_nodes, duration)?; + let txn_stat = generate_traffic(ctx, &up_nodes, duration).await?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { - let node = ctx.swarm().validator_mut(*n).unwrap(); + let swarm = ctx.swarm.read().await; + let node = swarm.validator(*n).unwrap(); println!("Node {} is going to restart", node.name()); runtime.block_on(node.start())?; } diff --git a/testsuite/testcases/src/performance_test.rs b/testsuite/testcases/src/performance_test.rs index f602ede7d437f..63786565d1f98 100644 --- a/testsuite/testcases/src/performance_test.rs +++ b/testsuite/testcases/src/performance_test.rs @@ -3,7 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 use crate::NetworkLoadTest; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; pub struct PerformanceBenchmark; @@ -15,8 +16,9 @@ impl Test for PerformanceBenchmark { impl NetworkLoadTest for PerformanceBenchmark {} +#[async_trait] impl NetworkTest for PerformanceBenchmark { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index a57ad39a9c05d..88b3fac41b72c 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -9,20 +9,20 @@ use crate::{ use anyhow::Error; use aptos_config::config::{NodeConfig, OverrideNodeConfig}; use aptos_forge::{ - NetworkContext, NetworkTest, OverrideNodeConfigFn, Result, Swarm, SwarmChaos, SwarmCpuStress, - SwarmNetEm, Test, + NetworkContext, NetworkContextSynchronizer, NetworkTest, OverrideNodeConfigFn, Result, Swarm, + SwarmChaos, SwarmCpuStress, SwarmNetEm, Test, }; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; use aptos_types::PeerId; +use async_trait::async_trait; use itertools::{EitherOrBoth, Itertools}; use rand::{ rngs::{OsRng, StdRng}, seq::SliceRandom, Rng, SeedableRng, }; -use std::iter::once; -use tokio::runtime::Runtime; +use std::{iter::once, sync::Arc}; /// A simple test that adds multiple public fullnodes (PFNs) to the swarm /// and submits transactions through them. Network emulation chaos can also @@ -57,9 +57,12 @@ impl PFNPerformance { /// Creates CPU chaos for the swarm. Note: CPU chaos is added /// to all validators, VFNs and PFNs in the swarm. - fn create_cpu_chaos(&self, swarm: &mut dyn Swarm) -> SwarmCpuStress { + async fn create_cpu_chaos( + &self, + swarm: Arc>>, + ) -> SwarmCpuStress { // Gather and shuffle all peers IDs (so that we get random CPU chaos) - let shuffled_peer_ids = self.gather_and_shuffle_peer_ids(swarm); + let shuffled_peer_ids = self.gather_and_shuffle_peer_ids(swarm).await; // Create CPU chaos for the swarm create_swarm_cpu_stress(shuffled_peer_ids, None) @@ -67,19 +70,31 @@ impl PFNPerformance { /// Creates network emulation chaos for the swarm. Note: network chaos /// is added to all validators, VFNs and PFNs in the swarm. - fn create_network_emulation_chaos(&self, swarm: &mut dyn Swarm) -> SwarmNetEm { + async fn create_network_emulation_chaos( + &self, + swarm: Arc>>, + ) -> SwarmNetEm { // Gather and shuffle all peers IDs (so that we get random network emulation) - let shuffled_peer_ids = self.gather_and_shuffle_peer_ids_with_colocation(swarm); + let shuffled_peer_ids = self + .gather_and_shuffle_peer_ids_with_colocation(swarm) + .await; // Create network emulation chaos for the swarm create_multi_region_swarm_network_chaos(shuffled_peer_ids, None) } /// Gathers and shuffles all peer IDs in the swarm - fn gather_and_shuffle_peer_ids(&self, swarm: &mut dyn Swarm) -> Vec { + async fn gather_and_shuffle_peer_ids( + &self, + swarm: Arc>>, + ) -> Vec { // Identify the validators and fullnodes in the swarm - let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); - let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let (validator_peer_ids, fullnode_peer_ids) = { + let swarm = swarm.read().await; + let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); + let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (validator_peer_ids, fullnode_peer_ids) + }; // Gather and shuffle all peers IDs let mut all_peer_ids = validator_peer_ids @@ -93,13 +108,17 @@ impl PFNPerformance { } /// Gathers and shuffles all peer IDs in the swarm, colocating VFNs with their validator - fn gather_and_shuffle_peer_ids_with_colocation( + async fn gather_and_shuffle_peer_ids_with_colocation( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, ) -> Vec> { // Identify the validators and fullnodes in the swarm - let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); - let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let (validator_peer_ids, fullnode_peer_ids) = { + let swarm = swarm.read().await; + let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); + let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (validator_peer_ids, fullnode_peer_ids) + }; let (vfn_peer_ids, pfn_peer_ids) = fullnode_peer_ids.split_at(fullnode_peer_ids.len() - self.num_pfns as usize); let mut vfn_and_vn_ids: Vec<_> = validator_peer_ids @@ -124,51 +143,65 @@ impl Test for PFNPerformance { } } +#[async_trait] impl NetworkTest for PFNPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } +#[async_trait] impl NetworkLoadTest for PFNPerformance { /// We must override the setup function to: (i) create PFNs in /// the swarm; and (ii) use those PFNs as the load destination. - fn setup(&self, ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // Add the PFNs to the swarm let pfn_peer_ids = - create_and_add_pfns(ctx, self.num_pfns, self.config_override_fn.clone())?; + create_and_add_pfns(ctx, self.num_pfns, self.config_override_fn.clone()).await?; // Add CPU chaos to the swarm if self.add_cpu_chaos { - let cpu_chaos = self.create_cpu_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::CpuStress(cpu_chaos)))?; + let cpu_chaos = self.create_cpu_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::CpuStress(cpu_chaos)) + .await?; } // Add network emulation to the swarm if self.add_network_emulation { - let network_chaos = self.create_network_emulation_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(network_chaos)))?; + let network_chaos = self.create_network_emulation_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::NetEm(network_chaos)) + .await?; } // Use the PFNs as the load destination Ok(LoadDestination::Peers(pfn_peer_ids)) } - fn finish(&self, ctx: &mut NetworkContext) -> Result<()> { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result<()> { // Remove CPU chaos from the swarm if self.add_cpu_chaos { - let cpu_chaos = self.create_cpu_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::CpuStress(cpu_chaos)))?; + let cpu_chaos = self.create_cpu_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::CpuStress(cpu_chaos)) + .await?; } // Remove network emulation from the swarm if self.add_network_emulation { - let network_chaos = self.create_network_emulation_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(network_chaos)))?; + let network_chaos = self.create_network_emulation_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::NetEm(network_chaos)) + .await?; } Ok(()) @@ -176,48 +209,49 @@ impl NetworkLoadTest for PFNPerformance { } /// Adds a number of PFNs to the network and returns the peer IDs -fn create_and_add_pfns( - ctx: &mut NetworkContext, +async fn create_and_add_pfns<'a>( + ctx: &mut NetworkContext<'a>, num_pfns: u64, config_override_fn: Option, ) -> Result, Error> { info!("Creating {} public fullnodes!", num_pfns); // Identify the version for the PFNs - let swarm = ctx.swarm(); - let pfn_version = swarm.versions().max().unwrap(); + let pfn_version = { ctx.swarm.read().await.versions().max().unwrap() }; // Create the PFN swarm - let runtime = Runtime::new().unwrap(); - let pfn_peer_ids: Vec = (0..num_pfns) - .map(|i| { - // Create a config for the PFN. Note: this needs to be done here - // because the config will generate a unique peer ID for the PFN. - let mut pfn_config = swarm.get_default_pfn_node_config(); - let mut base_config = NodeConfig::default(); - if let Some(f) = config_override_fn.as_ref() { - f(&mut pfn_config, &mut base_config); - } - let pfn_override_config = OverrideNodeConfig::new(pfn_config, base_config); - - // Add the PFN to the swarm - let peer_id = runtime - .block_on(swarm.add_full_node(&pfn_version, pfn_override_config)) - .unwrap(); - - // Verify the PFN was added - if swarm.full_node(peer_id).is_none() { - panic!( - "Failed to locate PFN {:?} in the swarm! Peer ID: {:?}", - i, peer_id - ); - } - - // Return the peer ID - info!("Created PFN {:?} with peer ID: {:?}", i, peer_id); - peer_id - }) - .collect(); + let mut pfn_peer_ids = Vec::with_capacity(num_pfns as usize); + for i in 0..num_pfns { + // Create a config for the PFN. Note: this needs to be done here + // because the config will generate a unique peer ID for the PFN. + let mut pfn_config = ctx.swarm.read().await.get_default_pfn_node_config(); + let mut base_config = NodeConfig::default(); + if let Some(f) = config_override_fn.as_ref() { + f(&mut pfn_config, &mut base_config); + } + let pfn_override_config = OverrideNodeConfig::new(pfn_config, base_config); + + // Add the PFN to the swarm + let peer_id = ctx + .swarm + .write() + .await + .add_full_node(&pfn_version, pfn_override_config) + .await + .unwrap(); + + // Verify the PFN was added + if ctx.swarm.read().await.full_node(peer_id).is_none() { + panic!( + "Failed to locate PFN {:?} in the swarm! Peer ID: {:?}", + i, peer_id + ); + } + + // Return the peer ID + info!("Created PFN {:?} with peer ID: {:?}", i, peer_id); + pfn_peer_ids.push(peer_id); + } Ok(pfn_peer_ids) } diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index 63c8e6e505fa6..873ae870e7a1b 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -4,15 +4,15 @@ use crate::{generate_onchain_config_blob, NetworkLoadTest}; use anyhow::Ok; use aptos::test::CliTestFramework; -use aptos_forge::{NetworkTest, NodeExt, SwarmExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, SwarmExt, Test}; use aptos_logger::info; use aptos_sdk::bcs; use aptos_types::{ account_config::CORE_CODE_ADDRESS, on_chain_config::{ConsensusConfigV1, OnChainConsensusConfig}, }; -use std::time::Duration; -use tokio::runtime::Runtime; +use async_trait::async_trait; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -24,92 +24,95 @@ impl Test for QuorumStoreOnChainEnableTest { } } +#[async_trait] impl NetworkLoadTest for QuorumStoreOnChainEnableTest { - fn test( + async fn test( &self, - swarm: &mut dyn aptos_forge::Swarm, + swarm: Arc>>, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); - let rest_client = swarm.validators().next().unwrap().rest_client(); - - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, - ) - .await - }); - - std::thread::sleep(duration / 2); - - runtime.block_on(async { - - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; + + tokio::time::sleep(duration / 2).await; + + let root_cli_index = { + let root_account = swarm.read().await.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .unwrap(); - - let inner = match current_consensus_config { - OnChainConsensusConfig::V1(inner) => inner, - OnChainConsensusConfig::V2(_) => panic!("Unexpected V2 config"), - _ => unimplemented!() - }; - - // Change to V2 - let new_consensus_config = OnChainConsensusConfig::V2(ConsensusConfigV1 { ..inner }); - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + }; + + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", + ) + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + let inner = match current_consensus_config { + OnChainConsensusConfig::V1(inner) => inner, + OnChainConsensusConfig::V2(_) => panic!("Unexpected V2 config"), + _ => unimplemented!(), + }; + + // Change to V2 + let new_consensus_config = OnChainConsensusConfig::V2(ConsensusConfigV1 { ..inner }); + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; - std::thread::sleep(duration / 2); + tokio::time::sleep(duration / 2).await; // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for QuorumStoreOnChainEnableTest { - fn run(&self, ctx: &mut aptos_forge::NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/reconfiguration_test.rs b/testsuite/testcases/src/reconfiguration_test.rs index 57f99767eb194..eb0c0a7e38409 100644 --- a/testsuite/testcases/src/reconfiguration_test.rs +++ b/testsuite/testcases/src/reconfiguration_test.rs @@ -3,7 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 use anyhow::anyhow; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; pub struct ReconfigurationTest; @@ -13,8 +14,9 @@ impl Test for ReconfigurationTest { } } +#[async_trait] impl NetworkTest for ReconfigurationTest { - fn run(&self, _ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, _ctx: NetworkContextSynchronizer<'a>) -> Result<()> { Err(anyhow!("Not supported in aptos-framework yet")) } // TODO(https://github.com/aptos-labs/aptos-core/issues/317): add back after support those transactions in aptos-framework diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 22f43cc7a1569..2e0948ccc48bf 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -5,12 +5,13 @@ use crate::generate_traffic; use anyhow::bail; use aptos_forge::{ - get_highest_synced_epoch, get_highest_synced_version, NetworkContext, NetworkTest, Result, - SwarmExt, Test, + get_highest_synced_epoch, get_highest_synced_version, NetworkContext, + NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, }; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; -use std::time::Instant; +use async_trait::async_trait; +use std::{ops::DerefMut, time::Instant}; use tokio::{runtime::Runtime, time::Duration}; const MAX_EPOCH_CHANGE_SECS: u64 = 300; // Max amount of time (in seconds) to wait for an epoch change @@ -27,15 +28,18 @@ impl Test for StateSyncFullnodePerformance { } } +#[async_trait] impl NetworkTest for StateSyncFullnodePerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name()).await?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes).await?; // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(ctx, &all_fullnodes, &[]).await?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. @@ -53,28 +57,32 @@ impl Test for StateSyncFullnodeFastSyncPerformance { } } +#[async_trait] impl NetworkTest for StateSyncFullnodeFastSyncPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name()).await?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes).await?; // Wait for an epoch change to ensure fast sync can download all the latest states info!("Waiting for an epoch change."); - let runtime = Runtime::new().unwrap(); - runtime.block_on(async { - ctx.swarm() - .wait_for_all_nodes_to_change_epoch(Duration::from_secs(MAX_EPOCH_CHANGE_SECS)) + { + ctx.swarm + .read() .await - })?; + .wait_for_all_nodes_to_change_epoch(Duration::from_secs(MAX_EPOCH_CHANGE_SECS)) + .await?; + } // Get the highest known epoch in the chain - let highest_synced_epoch = runtime.block_on(async { - get_highest_synced_epoch(&ctx.swarm().get_all_nodes_clients_with_names()) + let highest_synced_epoch = { + get_highest_synced_epoch(&ctx.swarm.read().await.get_all_nodes_clients_with_names()) .await .unwrap_or(0) - }); + }; if highest_synced_epoch == 0 { return Err(anyhow::format_err!( "The swarm has synced 0 epochs! Something has gone wrong!" @@ -82,12 +90,19 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { } // Fetch the number of state values held on-chain - let fullnode_name = ctx.swarm().full_nodes().next().unwrap().name(); - let prom_query = format!( - "{}{{instance=\"{}\"}}", - NUM_STATE_VALUE_COUNTER_NAME, &fullnode_name - ); - let promql_result = runtime.block_on(ctx.swarm().query_metrics(&prom_query, None, None))?; + let prom_query = { + let swarm = ctx.swarm.read().await; + let fullnode_name = swarm.full_nodes().next().unwrap().name(); + format!( + "{}{{instance=\"{}\"}}", + NUM_STATE_VALUE_COUNTER_NAME, &fullnode_name + ) + }; + + let promql_result = { + let swarm = ctx.swarm.read().await; + swarm.query_metrics(&prom_query, None, None).await? + }; let number_of_state_values = match promql_result.as_instant().unwrap().first() { Some(instant_vector) => instant_vector.sample().value() as u64, None => { @@ -103,7 +118,7 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { ); // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(ctx, &all_fullnodes, &[]).await?; // Wait for all nodes to catch up to the highest synced epoch // then calculate and display the throughput results. @@ -129,15 +144,21 @@ impl Test for StateSyncValidatorPerformance { } } +#[async_trait] impl NetworkTest for StateSyncValidatorPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can kill 2 validators but still make progress. - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; let num_validators = all_validators.len(); if num_validators < 7 { return Err(anyhow::format_err!( @@ -155,12 +176,12 @@ impl NetworkTest for StateSyncValidatorPerformance { ); // Generate some traffic through the validators. - emit_traffic_and_ensure_bounded_sync(ctx, &all_validators)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_validators).await?; // Stop and reset two validators so they start syncing from genesis info!("Deleting data for two validators!"); let validators_to_reset = &all_validators[0..2]; - stop_and_reset_nodes(ctx, &[], validators_to_reset)?; + stop_and_reset_nodes(ctx, &[], validators_to_reset).await?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. @@ -170,16 +191,19 @@ impl NetworkTest for StateSyncValidatorPerformance { /// Verifies the setup for the given fullnode test and returns the /// set of fullnodes. -fn get_fullnodes_and_check_setup( - ctx: &mut NetworkContext, +async fn get_fullnodes_and_check_setup<'a>( + ctx: &mut NetworkContext<'a>, test_name: &'static str, ) -> Result> { // Verify we have at least 1 fullnode - let all_fullnodes = ctx - .swarm() - .full_nodes() - .map(|v| v.peer_id()) - .collect::>(); + let all_fullnodes = { + ctx.swarm + .read() + .await + .full_nodes() + .map(|v| v.peer_id()) + .collect::>() + }; if all_fullnodes.is_empty() { return Err(anyhow::format_err!( "Fullnode test {} requires at least 1 fullnode!", @@ -191,7 +215,7 @@ fn get_fullnodes_and_check_setup( info!( "Running state sync test {:?} with {:?} validators and {:?} fullnodes.", test_name, - ctx.swarm().validators().count(), + ctx.swarm.read().await.validators().count(), all_fullnodes.len() ); @@ -200,8 +224,8 @@ fn get_fullnodes_and_check_setup( /// Emits traffic through all specified nodes and ensures all nodes can /// sync within a reasonable time bound. -fn emit_traffic_and_ensure_bounded_sync( - ctx: &mut NetworkContext, +async fn emit_traffic_and_ensure_bounded_sync<'a>( + ctx: &mut NetworkContext<'a>, nodes_to_send_traffic: &[AccountAddress], ) -> Result<()> { // Generate some traffic through the specified nodes. @@ -211,52 +235,54 @@ fn emit_traffic_and_ensure_bounded_sync( "Generating the initial traffic for {:?} seconds.", emit_txn_duration.as_secs() ); - let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration)?; + let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration).await?; // Wait for all nodes to synchronize. We time bound this to ensure // nodes don't fall too far behind. info!("Waiting for the validators and fullnodes to be synchronized."); - Runtime::new().unwrap().block_on(async { - ctx.swarm() - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + ctx.swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } /// Stops and resets all specified nodes -fn stop_and_reset_nodes( - ctx: &mut NetworkContext, +async fn stop_and_reset_nodes<'a>( + ctx: &mut NetworkContext<'a>, fullnodes_to_reset: &[AccountAddress], validators_to_reset: &[AccountAddress], ) -> Result<()> { - let runtime = Runtime::new().unwrap(); - // Stop and reset all fullnodes info!("Deleting all fullnode data!"); for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node_mut(*fullnode_id).unwrap(); - runtime.block_on(async { fullnode.clear_storage().await })?; + let swarm = ctx.swarm.read().await; + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.clear_storage().await?; } // Stop and reset all validators info!("Deleting all validator data!"); for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator_mut(*valdiator_id).unwrap(); - runtime.block_on(async { validator.clear_storage().await })?; + let swarm = ctx.swarm.read().await; + let validator = swarm.validator(*valdiator_id).unwrap(); + validator.clear_storage().await?; } // Restart the fullnodes so they start syncing from a fresh state for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node_mut(*fullnode_id).unwrap(); - runtime.block_on(async { fullnode.start().await })?; + let swarm = ctx.swarm.read().await; + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.start().await?; } // Restart the validators so they start syncing from a fresh state for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator_mut(*valdiator_id).unwrap(); - runtime.block_on(async { validator.start().await })?; + let swarm = ctx.swarm.read().await; + let validator = swarm.validator(*valdiator_id).unwrap(); + validator.start().await?; } Ok(()) @@ -278,7 +304,9 @@ fn display_state_sync_state_throughput( // We allow up to half the test time to do this. let node_sync_duration = ctx.global_duration.checked_div(2).unwrap(); runtime.block_on(async { - ctx.swarm() + ctx.swarm + .read() + .await .wait_for_all_nodes_to_catchup_to_epoch(highest_synced_epoch, node_sync_duration) .await })?; @@ -320,7 +348,7 @@ fn ensure_state_sync_transaction_throughput( // Get the highest synced version for the chain let runtime = Runtime::new().unwrap(); let highest_synced_version = runtime.block_on(async { - get_highest_synced_version(&ctx.swarm().get_all_nodes_clients_with_names()) + get_highest_synced_version(&ctx.swarm.read().await.get_all_nodes_clients_with_names()) .await .unwrap_or(0) }); @@ -334,7 +362,9 @@ fn ensure_state_sync_transaction_throughput( // We allow up to half the test time to do this. let node_sync_duration = ctx.global_duration.checked_div(2).unwrap(); runtime.block_on(async { - ctx.swarm() + ctx.swarm + .read() + .await .wait_for_all_nodes_to_catchup(node_sync_duration) .await })?; diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index c7d97fd3ecf29..3fb63ef4972e4 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -3,10 +3,12 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos_forge::{ - GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkTest, Swarm, SwarmChaos, - SwarmNetworkBandwidth, SwarmNetworkDelay, Test, + GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkContextSynchronizer, + NetworkTest, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test, }; use aptos_logger::info; +use aptos_types::account_address::AccountAddress; +use async_trait::async_trait; /// Represents a test that simulates a network with 3 regions, all in the same cloud. pub struct ThreeRegionSameCloudSimulationTest; @@ -24,9 +26,9 @@ impl Test for ThreeRegionSameCloudSimulationTest { /// 4. Currently simulating a 50 percentile network delay between us-west <--> af-south <--> eu-north /// /// This is deprecated and flawed. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead -fn create_three_region_swarm_network_delay(swarm: &dyn Swarm) -> SwarmNetworkDelay { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - +fn create_three_region_swarm_network_delay( + all_validators: Vec, +) -> SwarmNetworkDelay { // each region has 1/3 of the validators let region_size = all_validators.len() / 3; let mut us_west = all_validators; @@ -82,29 +84,40 @@ fn create_bandwidth_limit() -> SwarmNetworkBandwidth { } } +#[async_trait] impl NetworkLoadTest for ThreeRegionSameCloudSimulationTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { // inject network delay - let delay = create_three_region_swarm_network_delay(ctx.swarm()); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; + let delay = create_three_region_swarm_network_delay(all_validators); + let mut swarm = ctx.swarm.write().await; let chaos = SwarmChaos::Delay(delay); - ctx.runtime.block_on(ctx.swarm.inject_chaos(chaos))?; + swarm.inject_chaos(chaos).await?; // inject bandwidth limit let bandwidth = create_bandwidth_limit(); let chaos = SwarmChaos::Bandwidth(bandwidth); - ctx.runtime.block_on(ctx.swarm.inject_chaos(chaos))?; + swarm.inject_chaos(chaos).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime.block_on(ctx.swarm.remove_all_chaos())?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm.write().await.remove_all_chaos().await?; Ok(()) } } +#[async_trait] impl NetworkTest for ThreeRegionSameCloudSimulationTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 53905027f22b5..0cd9a50ae9afd 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -3,10 +3,13 @@ use crate::NetworkLoadTest; use anyhow::Context; -use aptos_forge::{NetworkContext, NetworkTest, NodeExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, Test}; use aptos_sdk::move_types::account_address::AccountAddress; -use std::time::{Duration, Instant}; -use tokio::runtime::Runtime; +use async_trait::async_trait; +use std::{ + ops::DerefMut, + time::{Duration, Instant}, +}; pub struct TwinValidatorTest; @@ -18,57 +21,62 @@ impl Test for TwinValidatorTest { impl NetworkLoadTest for TwinValidatorTest {} +#[async_trait] impl NetworkTest for TwinValidatorTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + + let all_validators_ids = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; + let validator_count = all_validators_ids.len(); + let twin_count = 2; - let all_validators_ids = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); - let validator_count = all_validators_ids.len(); - let twin_count = 2; - runtime.block_on(async { for i in 0..twin_count { let main_id: AccountAddress = all_validators_ids[i]; let twin_id = all_validators_ids[i + validator_count - twin_count]; - ctx.swarm() - .validator_mut(twin_id) + let swarm = ctx.swarm.read().await; + swarm + .validator(twin_id) .unwrap() .clear_storage() .await .context(format!( "Error while clearing storage and stopping {twin_id}" ))?; - let main_identity = ctx - .swarm() - .validator_mut(main_id) + let main_identity = swarm + .validator(main_id) .unwrap() .get_identity() .await .context(format!("Error while getting identity for {main_id}"))?; - ctx.swarm() - .validator_mut(twin_id) + swarm + .validator(twin_id) .unwrap() .set_identity(main_identity) .await .context(format!("Error while setting identity for {twin_id}"))?; - ctx.swarm() - .validator_mut(twin_id) + swarm + .validator(twin_id) .unwrap() .start() .await .context(format!("Error while starting {twin_id}"))?; - ctx.swarm() - .validator_mut(twin_id) + swarm + .validator(twin_id) .unwrap() .wait_until_healthy(Instant::now() + Duration::from_secs(300)) .await .context(format!("Error while waiting for {twin_id}"))?; } - Ok::<(), anyhow::Error>(()) - })?; - ::run(self, ctx) + } + ::run(self, ctxa).await } } diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index dd824174a4ee9..931881f4e6956 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -1,16 +1,18 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::{ - create_emitter_and_request, traffic_emitter_runtime, LoadDestination, NetworkLoadTest, -}; +use crate::{create_emitter_and_request, LoadDestination, NetworkLoadTest}; use aptos_forge::{ success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, - EmitJobRequest, NetworkContext, NetworkTest, Result, Swarm, Test, TestReport, + EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, }; use aptos_logger::info; +use async_trait::async_trait; use rand::{rngs::OsRng, Rng, SeedableRng}; -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; pub struct TwoTrafficsTest { pub inner_traffic: EmitJobRequest, @@ -23,10 +25,11 @@ impl Test for TwoTrafficsTest { } } +#[async_trait] impl NetworkLoadTest for TwoTrafficsTest { - fn test( + async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, duration: Duration, ) -> Result<()> { @@ -34,26 +37,28 @@ impl NetworkLoadTest for TwoTrafficsTest { "Running TwoTrafficsTest test for duration {}s", duration.as_secs_f32() ); - let nodes_to_send_load_to = - LoadDestination::FullnodesOtherwiseValidators.get_destination_nodes(swarm); + let nodes_to_send_load_to = LoadDestination::FullnodesOtherwiseValidators + .get_destination_nodes(swarm.clone()) + .await; let rng = ::rand::rngs::StdRng::from_seed(OsRng.gen()); let (emitter, emit_job_request) = create_emitter_and_request( - swarm, + swarm.clone(), self.inner_traffic.clone(), &nodes_to_send_load_to, rng, - )?; - - let rt = traffic_emitter_runtime()?; + ) + .await?; let test_start = Instant::now(); - let stats = rt.block_on(emitter.emit_txn_for( - swarm.chain_info().root_account, - emit_job_request, - duration, - ))?; + let stats = emitter + .emit_txn_for( + swarm.read().await.chain_info().root_account, + emit_job_request, + duration, + ) + .await?; let actual_test_duration = test_start.elapsed(); info!( @@ -77,8 +82,9 @@ impl NetworkLoadTest for TwoTrafficsTest { } } +#[async_trait] impl NetworkTest for TwoTrafficsTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index 092871293febc..edf9c59fc62c5 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -4,15 +4,15 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos::{account::create::DEFAULT_FUNDED_COINS, test::CliTestFramework}; use aptos_forge::{ - reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, - FORGE_KEY_SEED, + reconfig, NetworkContext, NetworkContextSynchronizer, NetworkTest, NodeExt, Result, Swarm, + SwarmExt, Test, TestReport, FORGE_KEY_SEED, }; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_types::{account_address::AccountAddress, transaction::authenticator::AuthenticationKey}; -use std::time::Duration; -use tokio::runtime::Runtime; +use async_trait::async_trait; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -24,20 +24,21 @@ impl Test for ValidatorJoinLeaveTest { } } +#[async_trait] impl NetworkLoadTest for ValidatorJoinLeaveTest { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn test( + async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can lose 2 validators but still make progress. - let num_validators = swarm.validators().count(); + let num_validators = { swarm.read().await.validators().count() }; if num_validators < 7 { return Err(anyhow::format_err!( "ValidatorSet leaving and rejoining test require at least 7 validators! Given: {:?}.", @@ -47,20 +48,23 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); // Connect the operator tool to the node's JSON RPC API - let rest_client = swarm.validators().next().unwrap().rest_client(); - let transaction_factory = swarm.chain_info().transaction_factory(); - let runtime = Runtime::new().unwrap(); - - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, - ) - .await - }); - - let mut public_info = swarm.chain_info().into_aptos_public_info(); + let transaction_factory = { swarm.read().await.chain_info().transaction_factory() }; + + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; + + let mut public_info = { swarm.read().await.chain_info().into_aptos_public_info() }; let mut validator_cli_indices = Vec::new(); @@ -77,30 +81,25 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { let mut keygen = KeyGen::from_seed(seed_slice); - let (validator_cli_index, _keys, account_balance) = runtime.block_on(async { - let (validator_cli_index, keys) = - init_validator_account(&mut cli, &mut keygen).await; - - let auth_key = AuthenticationKey::ed25519(&keys.account_private_key.public_key()); - let validator_account_address = AccountAddress::new(*auth_key.account_address()); + let (validator_cli_index, keys) = init_validator_account(&mut cli, &mut keygen).await; - public_info - .mint(validator_account_address, DEFAULT_FUNDED_COINS) - .await - .unwrap(); + let auth_key = AuthenticationKey::ed25519(&keys.account_private_key.public_key()); + let validator_account_address = AccountAddress::new(*auth_key.account_address()); - let account_balance = public_info - .get_balance(validator_account_address) - .await - .unwrap(); + public_info + .mint(validator_account_address, DEFAULT_FUNDED_COINS) + .await + .unwrap(); - (validator_cli_index, keys, account_balance) - }); + let account_balance = public_info + .get_balance(validator_account_address) + .await + .unwrap(); assert_eq!(account_balance, DEFAULT_FUNDED_COINS); validator_cli_indices.push(validator_cli_index); assert_eq!( - runtime.block_on(get_validator_state(&cli, validator_cli_index)), + get_validator_state(&cli, validator_cli_index).await, ValidatorState::ACTIVE ); } @@ -114,79 +113,66 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { + { swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .read() .await - })?; + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; + } // Wait for 1/3 of the test duration. - std::thread::sleep(duration / 3); - - runtime.block_on(async { - // 1/3 validators leave the validator set. - info!("Make the last 1/3 validators leave the validator set!"); - for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { - cli.leave_validator_set(*operator_index, None) - .await - .unwrap(); - - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; - } - - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; - }); + tokio::time::sleep(duration / 3).await; + + // 1/3 validators leave the validator set. + info!("Make the last 1/3 validators leave the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.leave_validator_set(*operator_index, None) + .await + .unwrap(); + + let root_account = swarm.read().await.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } + + { + let root_account = swarm.read().await.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } // Wait for 1/3 of the test duration. - std::thread::sleep(duration / 3); - - runtime.block_on(async { - // Rejoining validator set. - info!("Make the last 1/3 validators rejoin the validator set!"); - for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { - cli.join_validator_set(*operator_index, None).await.unwrap(); - - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; - } - - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; - }); + tokio::time::sleep(duration / 3).await; + + // Rejoining validator set. + info!("Make the last 1/3 validators rejoin the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.join_validator_set(*operator_index, None).await.unwrap(); + + let root_account = swarm.read().await.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } + + { + let root_account = swarm.read().await.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } } +#[async_trait] impl NetworkTest for ValidatorJoinLeaveTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index 9355ac97a99d8..1a83dde6f908c 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -2,10 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 use crate::NetworkLoadTest; -use aptos_forge::{NetworkContext, NetworkTest, Result, Swarm, Test, TestReport}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; +use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; -use std::time::Duration; -use tokio::{runtime::Runtime, time::Instant}; +use std::{sync::Arc, time::Duration}; +use tokio::time::Instant; pub struct ValidatorRebootStressTest { pub num_simultaneously: usize, @@ -19,40 +20,50 @@ impl Test for ValidatorRebootStressTest { } } +#[async_trait] impl NetworkLoadTest for ValidatorRebootStressTest { - fn test( + async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let runtime = Runtime::new().unwrap(); - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - - let mut rng = thread_rng(); + let all_validators = { + swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; while start.elapsed() < duration { - let addresses: Vec<_> = all_validators - .choose_multiple(&mut rng, self.num_simultaneously) - .cloned() - .collect(); + let addresses: Vec<_> = { + let mut rng = thread_rng(); + all_validators + .choose_multiple(&mut rng, self.num_simultaneously) + .cloned() + .collect() + }; for adr in &addresses { - let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); - runtime.block_on(async { validator_to_reboot.stop().await })?; + let swarm = swarm.read().await; + let validator_to_reboot = swarm.validator(*adr).unwrap(); + validator_to_reboot.stop().await?; } if self.down_time_secs > 0.0 { - std::thread::sleep(Duration::from_secs_f32(self.down_time_secs)); + tokio::time::sleep(Duration::from_secs_f32(self.down_time_secs)).await; } for adr in &addresses { - let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); - runtime.block_on(async { validator_to_reboot.start().await })?; + let swarm = swarm.read().await; + let validator_to_reboot = swarm.validator(*adr).unwrap(); + validator_to_reboot.start().await?; } if self.pause_secs > 0.0 { - std::thread::sleep(Duration::from_secs_f32(self.pause_secs)); + tokio::time::sleep(Duration::from_secs_f32(self.pause_secs)).await; } } @@ -60,8 +71,9 @@ impl NetworkLoadTest for ValidatorRebootStressTest { } } +#[async_trait] impl NetworkTest for ValidatorRebootStressTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } }