From ff39282f7b6ab3c7b5705076406e18d92abefba8 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Wed, 8 May 2024 12:59:52 -0400 Subject: [PATCH 01/28] batch_update_gradually --- testsuite/testcases/src/compatibility_test.rs | 29 +++++++++++-------- testsuite/testcases/src/lib.rs | 24 +++++++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 88a6a10e419d2..3737113670007 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -2,7 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::{batch_update, generate_traffic}; +use crate::{batch_update_gradually, generate_traffic}; use anyhow::bail; use aptos_forge::{NetworkContext, NetworkTest, Result, SwarmExt, Test}; use aptos_logger::info; @@ -23,6 +23,9 @@ impl Test for SimpleValidatorUpgrade { impl NetworkTest for SimpleValidatorUpgrade { fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { let runtime = Runtime::new()?; + let upgrade_wait_for_healthy = true; + let upgrade_node_delay = Duration::from_secs(10); + let upgrade_max_wait = Duration::from_secs(40); let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); @@ -53,6 +56,8 @@ impl NetworkTest for SimpleValidatorUpgrade { .validators() .map(|v| v.peer_id()) .collect::>(); + // TODO: this is the "compat" test. Expand and refine to properly validate network2. + // TODO: Ensure sustained TPS during upgrade. Slower upgrade rollout. let mut first_batch = all_validators.clone(); let second_batch = first_batch.split_off(first_batch.len() / 2); let first_node = first_batch.pop().unwrap(); @@ -66,9 +71,9 @@ impl NetworkTest for SimpleValidatorUpgrade { ctx.report.report_text(msg); // Generate some traffic - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat_prior = generate_traffic(ctx, &all_validators, duration)?; ctx.report - .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat); + .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); // Update the first Validator let msg = format!( @@ -77,13 +82,13 @@ impl NetworkTest for SimpleValidatorUpgrade { ); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &[first_node], &new_version))?; + runtime.block_on(batch_update_gradually(ctx, &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; // Generate some traffic - let txn_stat = generate_traffic(ctx, &[first_node], duration)?; + let txn_stat_one = generate_traffic(ctx, &[first_node], duration)?; ctx.report.report_txn_stats( format!("{}::single-validator-upgrade", self.name()), - &txn_stat, + &txn_stat_one, ); // Update the rest of the first batch @@ -93,13 +98,13 @@ impl NetworkTest for SimpleValidatorUpgrade { ); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &first_batch, &new_version))?; + runtime.block_on(batch_update_gradually(ctx, &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; // Generate some traffic - let txn_stat = generate_traffic(ctx, &first_batch, duration)?; + let txn_stat_half = generate_traffic(ctx, &first_batch, duration)?; ctx.report.report_txn_stats( format!("{}::half-validator-upgrade", self.name()), - &txn_stat, + &txn_stat_half, ); ctx.swarm().fork_check(epoch_duration)?; @@ -108,13 +113,13 @@ impl NetworkTest for SimpleValidatorUpgrade { let msg = format!("4. upgrading second batch to new version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, &second_batch, &new_version))?; + runtime.block_on(batch_update_gradually(ctx, &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; // Generate some traffic - let txn_stat = generate_traffic(ctx, &second_batch, duration)?; + let txn_stat_all = generate_traffic(ctx, &second_batch, duration)?; ctx.report.report_txn_stats( format!("{}::rest-validator-upgrade", self.name()), - &txn_stat, + &txn_stat_all, ); let msg = "5. check swarm health".to_string(); diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index a620211693382..dece19d2e19ed 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -68,6 +68,30 @@ async fn batch_update( Ok(()) } +async fn batch_update_gradually( + ctx: &mut NetworkContext<'_>, + validators_to_update: &[PeerId], + version: &Version, + wait_until_healthy: bool, + delay: Duration, + max_wait: Duration, +) -> Result<()> { + for validator in validators_to_update { + ctx.swarm().upgrade_validator(*validator, version).await?; + if wait_until_healthy { + let deadline = Instant::now() + max_wait; + ctx.swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; + } + if !delay.is_zero() { + tokio::time::sleep(delay).await; + } + } + + ctx.swarm().health_check().await?; + + Ok(()) +} + pub fn create_emitter_and_request( swarm: &mut dyn Swarm, mut emit_job_request: EmitJobRequest, From cd603974c3b764c435dc2d88f2b42a2a4bad09d3 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 16 May 2024 08:55:45 -0400 Subject: [PATCH 02/28] big refactor towards multi-threading lots of `&foo` and `&mut foo` replaced with Arc> --- .../src/emitter/account_minter.rs | 55 ++-- .../src/emitter/mod.rs | 21 +- .../src/emitter/submission_worker.rs | 109 ++++++- .../transaction-emitter-lib/src/wrappers.rs | 7 +- .../src/account_generator.rs | 4 +- .../src/accounts_pool_wrapper.rs | 22 +- .../src/batch_transfer.rs | 4 +- .../src/bounded_batch_wrapper.rs | 3 +- .../src/call_custom_modules.rs | 6 +- crates/transaction-generator-lib/src/lib.rs | 18 +- .../src/p2p_transaction_generator.rs | 9 +- .../src/publish_modules.rs | 5 +- .../src/transaction_mix_generator.rs | 2 +- .../src/workflow_delegator.rs | 2 +- testsuite/forge-cli/src/main.rs | 15 +- testsuite/forge/src/backend/k8s/swarm.rs | 12 +- testsuite/forge/src/backend/local/swarm.rs | 11 +- testsuite/forge/src/interface/admin.rs | 6 +- testsuite/forge/src/interface/aptos.rs | 34 +- testsuite/forge/src/interface/chain_info.rs | 21 +- testsuite/forge/src/interface/network.rs | 17 +- testsuite/forge/src/interface/swarm.rs | 10 +- testsuite/forge/src/runner.rs | 6 +- testsuite/testcases/src/compatibility_test.rs | 299 +++++++++++++++--- .../src/consensus_reliability_tests.rs | 11 +- .../testcases/src/dag_onchain_enable_test.rs | 40 ++- testsuite/testcases/src/forge_setup_test.rs | 9 +- testsuite/testcases/src/framework_upgrade.rs | 23 +- .../src/fullnode_reboot_stress_test.rs | 4 +- testsuite/testcases/src/lib.rs | 72 +++-- .../testcases/src/load_vs_perf_benchmark.rs | 15 +- testsuite/testcases/src/modifiers.rs | 10 +- .../src/multi_region_network_test.rs | 4 +- .../testcases/src/network_bandwidth_test.rs | 6 +- testsuite/testcases/src/network_loss_test.rs | 4 +- .../testcases/src/network_partition_test.rs | 4 +- .../testcases/src/partial_nodes_down_test.rs | 9 +- testsuite/testcases/src/performance_test.rs | 4 +- .../src/public_fullnode_performance.rs | 7 +- .../src/quorum_store_onchain_enable_test.rs | 16 +- .../testcases/src/reconfiguration_test.rs | 4 +- .../testcases/src/state_sync_performance.rs | 40 +-- .../src/three_region_simulation_test.rs | 7 +- .../testcases/src/twin_validator_test.rs | 107 ++++--- testsuite/testcases/src/two_traffics_test.rs | 7 +- .../src/validator_join_leave_test.rs | 48 +-- .../src/validator_reboot_stress_test.rs | 4 +- 47 files changed, 784 insertions(+), 369 deletions(-) diff --git a/crates/transaction-emitter-lib/src/emitter/account_minter.rs b/crates/transaction-emitter-lib/src/emitter/account_minter.rs index a96e91d11eb1d..abedb58cf18b0 100644 --- a/crates/transaction-emitter-lib/src/emitter/account_minter.rs +++ b/crates/transaction-emitter-lib/src/emitter/account_minter.rs @@ -29,9 +29,11 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; +use std::ops::Deref; +use aptos_types::account_address::AccountAddress; pub struct SourceAccountManager<'t> { - pub source_account: &'t LocalAccount, + pub source_account: Arc>, pub txn_executor: &'t dyn ReliableTransactionSubmitter, pub req: &'t EmitJobRequest, pub txn_factory: TransactionFactory, @@ -43,17 +45,21 @@ impl<'t> RootAccountHandle for SourceAccountManager<'t> { self.check_approve_funds(amount, reason).await.unwrap(); } - fn get_root_account(&self) -> &LocalAccount { - self.source_account + fn get_root_account(&self) -> Arc> { + self.source_account.clone() } } impl<'t> SourceAccountManager<'t> { + fn source_account_address(&self) -> AccountAddress { + self.source_account.lock().unwrap().address() + } + // returns true if we might want to recheck the volume, as it was auto-approved. async fn check_approve_funds(&self, amount: u64, reason: &str) -> Result { let balance = self .txn_executor - .get_account_balance(self.source_account.address()) + .get_account_balance(self.source_account_address()) .await?; Ok(if self.req.mint_to_root { // We have a root account, so amount of funds minted is not a problem @@ -63,7 +69,7 @@ impl<'t> SourceAccountManager<'t> { if balance < amount.checked_mul(100).unwrap_or(u64::MAX / 2) { info!( "Mint account {} current balance is {}, needing {} for {}, minting to refil it fully", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -74,7 +80,7 @@ impl<'t> SourceAccountManager<'t> { } else { info!( "Mint account {} current balance is {}, needing {} for {}. Proceeding without minting, as balance would overflow otherwise", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -85,7 +91,7 @@ impl<'t> SourceAccountManager<'t> { } else { info!( "Source account {} current balance is {}, needed {} coins for {}, or {:.3}% of its balance", - self.source_account.address(), + self.source_account_address(), balance, amount, reason, @@ -95,7 +101,7 @@ impl<'t> SourceAccountManager<'t> { if balance < amount { return Err(anyhow!( "Source ({}) doesn't have enough coins, balance {} < needed {} for {}", - self.source_account.address(), + self.source_account_address(), balance, amount, reason @@ -126,9 +132,9 @@ impl<'t> SourceAccountManager<'t> { info!("Minting new coins to root"); let txn = self - .source_account + .source_account.lock().unwrap() .sign_with_transaction_builder(self.txn_factory.payload( - aptos_stdlib::aptos_coin_mint(self.source_account.address(), amount), + aptos_stdlib::aptos_coin_mint(self.source_account_address(), amount), )); if let Err(e) = txn_executor.execute_transactions(&[txn]).await { @@ -136,7 +142,7 @@ impl<'t> SourceAccountManager<'t> { // so check on failure if another emitter has refilled it instead let balance = txn_executor - .get_account_balance(self.source_account.address()) + .get_account_balance(self.source_account_address()) .await?; if balance > u64::MAX / 2 { Ok(()) @@ -417,16 +423,21 @@ impl<'t> AccountMinter<'t> { let create_requests: Vec<_> = batch .iter() .map(|account| { - create_and_fund_account_request( if let Some(account) = &mut new_source_account { - account + create_and_fund_account_request( + account, + coins_per_seed_account, + account.public_key(), + txn_factory, + ) } else { - self.source_account.get_root_account() - }, - coins_per_seed_account, - account.public_key(), - txn_factory, - ) + create_and_fund_account_request( + self.source_account.get_root_account().lock().unwrap().deref(), + coins_per_seed_account, + account.public_key(), + txn_factory, + ) + } }) .collect(); txn_executor @@ -471,15 +482,15 @@ impl<'t> AccountMinter<'t> { ) -> Result { const NUM_TRIES: usize = 3; for i in 0..NUM_TRIES { - self.source_account.get_root_account().set_sequence_number( + self.source_account.get_root_account().lock().unwrap().set_sequence_number( txn_executor - .query_sequence_number(self.source_account.get_root_account().address()) + .query_sequence_number(self.source_account.get_root_account().lock().unwrap().address()) .await?, ); let new_source_account = LocalAccount::generate(self.rng()); let txn = create_and_fund_account_request( - self.source_account.get_root_account(), + self.source_account.get_root_account().lock().unwrap().deref(), coins_for_source, new_source_account.public_key(), &self.txn_factory, diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs index 1923e45d503c0..248f3e595c51e 100644 --- a/crates/transaction-emitter-lib/src/emitter/mod.rs +++ b/crates/transaction-emitter-lib/src/emitter/mod.rs @@ -41,6 +41,7 @@ use std::{ }, time::{Duration, Instant}, }; +use std::ops::DerefMut; use tokio::{runtime::Handle, task::JoinHandle, time}; // Max is 100k TPS for 3 hours @@ -645,7 +646,7 @@ impl EmitJob { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct TxnEmitter { txn_factory: TransactionFactory, rng: StdRng, @@ -669,7 +670,7 @@ impl TxnEmitter { pub async fn start_job( &mut self, - root_account: &LocalAccount, + root_account: Arc>, req: EmitJobRequest, stats_tracking_phases: usize, ) -> Result { @@ -704,7 +705,7 @@ impl TxnEmitter { let account_generator = create_account_generator(req.account_type); let mut all_accounts = create_accounts( - root_account, + root_account.clone(), &init_txn_factory, account_generator, &req, @@ -726,7 +727,7 @@ impl TxnEmitter { retry_after: req.init_retry_interval, }; let source_account_manager = SourceAccountManager { - source_account: root_account, + source_account: root_account.clone(), txn_executor: &txn_executor, req: &req, txn_factory: init_txn_factory.clone(), @@ -815,7 +816,7 @@ impl TxnEmitter { async fn emit_txn_for_impl( mut self, - source_account: &LocalAccount, + source_account: Arc>, emit_job_request: EmitJobRequest, duration: Duration, print_stats_interval: Option, @@ -851,7 +852,7 @@ impl TxnEmitter { pub async fn emit_txn_for( self, - source_account: &mut LocalAccount, + source_account: Arc>, emit_job_request: EmitJobRequest, duration: Duration, ) -> Result { @@ -861,7 +862,7 @@ impl TxnEmitter { pub async fn emit_txn_for_with_stats( self, - source_account: &LocalAccount, + source_account: Arc>, emit_job_request: EmitJobRequest, duration: Duration, interval_secs: u64, @@ -978,12 +979,16 @@ async fn wait_for_accounts_sequence( (latest_fetched_counts, sum_of_completion_timestamps_millis) } +#[cfg(unused)] fn update_seq_num_and_get_num_expired( accounts: &mut [LocalAccount], + account: Arc>, account_to_start_and_end_seq_num: HashMap, latest_fetched_counts: HashMap, ) -> (usize, usize) { accounts.iter_mut().for_each(|account| { + // let mut account_lock = account.lock().unwrap(); + // let account = account_lock.deref_mut(); let (start_seq_num, end_seq_num) = if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { pair @@ -1129,7 +1134,7 @@ pub fn parse_seed(seed_string: &str) -> [u8; 32] { } pub async fn create_accounts( - root_account: &LocalAccount, + root_account: Arc>, txn_factory: &TransactionFactory, account_generator: Box, req: &EmitJobRequest, diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs index 0f636147ada78..98d2aab15f5e2 100644 --- a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs +++ b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs @@ -4,11 +4,11 @@ use crate::{ emitter::{ stats::{DynamicStatsTracking, StatsAccumulator}, - update_seq_num_and_get_num_expired, wait_for_accounts_sequence, + wait_for_accounts_sequence, }, EmitModeParams, }; -use aptos_logger::{info, sample, sample::SampleRate, warn}; +use aptos_logger::{debug, info, sample, sample::SampleRate, warn}; use aptos_rest_client::Client as RestClient; use aptos_sdk::{ move_types::account_address::AccountAddress, @@ -26,13 +26,14 @@ use itertools::Itertools; use rand::seq::IteratorRandom; use std::{ collections::HashMap, - sync::{atomic::AtomicU64, Arc}, + sync::{atomic::AtomicU64, Arc, Mutex}, time::Instant, }; +use std::ops::DerefMut; use tokio::time::sleep; pub struct SubmissionWorker { - pub(crate) accounts: Vec, + pub(crate) accounts: Vec>>, client: RestClient, stop: Arc, params: EmitModeParams, @@ -55,6 +56,7 @@ impl SubmissionWorker { skip_latency_stats: bool, rng: ::rand::rngs::StdRng, ) -> Self { + let accounts = accounts.into_iter().map(|account| Arc::new(Mutex::new(account))).collect(); Self { accounts, client, @@ -199,7 +201,7 @@ impl SubmissionWorker { } } - self.accounts + self.accounts.into_iter().map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap().into_inner().unwrap()).collect() } // returns true if it returned early @@ -243,11 +245,25 @@ impl SubmissionWorker { ) .await; - let (num_committed, num_expired) = update_seq_num_and_get_num_expired( - &mut self.accounts, + // self.accounts.iter().for_each(|account| {}) + for account in self.accounts.iter() { + let account = account.clone(); + let mut locker = account.lock().unwrap(); + update_account_seq_num( + locker.deref_mut(), + &account_to_start_and_end_seq_num, + &latest_fetched_counts, + ); + } + let (num_committed, num_expired) = count_committed_expired_stats( account_to_start_and_end_seq_num, latest_fetched_counts, ); + // let (num_committed, num_expired) = update_seq_num_and_get_num_expired( + // self.accounts.clone(), + // account_to_start_and_end_seq_num, + // latest_fetched_counts, + // ); if num_expired > 0 { loop_stats @@ -261,7 +277,7 @@ impl SubmissionWorker { num_expired, self.accounts .iter() - .map(|a| a.address()) + .map(|a| a.lock().unwrap().address()) .collect::>(), ) ); @@ -306,12 +322,87 @@ impl SubmissionWorker { .into_iter() .flat_map(|account| { self.txn_generator - .generate_transactions(account, self.params.transactions_per_account) + .generate_transactions(account.clone(), self.params.transactions_per_account) }) .collect() } } +fn update_account_seq_num( + account: &mut LocalAccount, + account_to_start_and_end_seq_num: &HashMap, + latest_fetched_counts: &HashMap, +) { + let (start_seq_num, end_seq_num) = + if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { + pair + } else { + return; + }; + assert!(account.sequence_number() == *end_seq_num); + + match latest_fetched_counts.get(&account.address()) { + Some(count) => { + if *count != account.sequence_number() { + assert!(account.sequence_number() > *count); + debug!( + "Stale sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + count + ); + account.set_sequence_number(*count); + } + }, + None => { + debug!( + "Couldn't fetch sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + start_seq_num + ); + account.set_sequence_number(*start_seq_num); + }, + } +} + +fn count_committed_expired_stats( + account_to_start_and_end_seq_num: HashMap, + latest_fetched_counts: HashMap, +) -> (usize, usize) { + account_to_start_and_end_seq_num + .iter() + .map( + |(address, (start_seq_num, end_seq_num))| match latest_fetched_counts.get(address) { + Some(count) => { + assert!( + *count <= *end_seq_num, + "{address} :: {count} > {end_seq_num}" + ); + if *count >= *start_seq_num { + ( + (*count - *start_seq_num) as usize, + (*end_seq_num - *count) as usize, + ) + } else { + debug!( + "Stale sequence_number fetched for {}, start_seq_num {}, fetched {}", + address, start_seq_num, *count + ); + (0, (*end_seq_num - *start_seq_num) as usize) + } + }, + None => (0, (end_seq_num - start_seq_num) as usize), + }, + ) + .fold( + (0, 0), + |(committed, expired), (cur_committed, cur_expired)| { + (committed + cur_committed, expired + cur_expired) + }, + ) +} + pub async fn submit_transactions( client: &RestClient, txns: &[SignedTransaction], diff --git a/crates/transaction-emitter-lib/src/wrappers.rs b/crates/transaction-emitter-lib/src/wrappers.rs index 9f6659e5e9b98..c4a103a34eb44 100644 --- a/crates/transaction-emitter-lib/src/wrappers.rs +++ b/crates/transaction-emitter-lib/src/wrappers.rs @@ -18,6 +18,7 @@ use aptos_sdk::transaction_builder::TransactionFactory; use aptos_transaction_generator_lib::{args::TransactionTypeArg, WorkflowProgress}; use rand::{rngs::StdRng, Rng, SeedableRng}; use std::time::{Duration, Instant}; +use std::sync::{Arc, Mutex}; pub async fn emit_transactions( cluster_args: &ClusterArgs, @@ -157,9 +158,10 @@ pub async fn emit_transactions_with_cluster( emit_job_request = emit_job_request.skip_minting_accounts(); } + let coin_source_account = std::sync::Arc::new(std::sync::Mutex::new(coin_source_account)); let stats = emitter .emit_txn_for_with_stats( - &coin_source_account, + coin_source_account, emit_job_request, duration, (args.duration / 10).clamp(1, 10), @@ -177,6 +179,7 @@ pub async fn create_accounts_command( .context("Failed to build cluster")?; let client = cluster.random_instance().rest_client(); let coin_source_account = cluster.load_coin_source_account(&client).await?; + let coin_source_account = Arc::new(Mutex::new(coin_source_account)); let txn_factory = TransactionFactory::new(cluster.chain_id) .with_transaction_expiration_time(60) .with_max_gas_amount(create_accounts_args.max_gas_per_txn); @@ -194,7 +197,7 @@ pub async fn create_accounts_command( }; create_accounts( - &coin_source_account, + coin_source_account, &txn_factory, Box::new(PrivateKeyAccountGenerator), &emit_job_request, diff --git a/crates/transaction-generator-lib/src/account_generator.rs b/crates/transaction-generator-lib/src/account_generator.rs index 374f8c4562a39..ac2d47374daaf 100644 --- a/crates/transaction-generator-lib/src/account_generator.rs +++ b/crates/transaction-generator-lib/src/account_generator.rs @@ -43,7 +43,7 @@ impl AccountGenerator { impl TransactionGenerator for AccountGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -53,7 +53,7 @@ impl TransactionGenerator for AccountGenerator { let receiver = LocalAccount::generate(&mut self.rng); let receiver_address = receiver.address(); let request = create_account_transaction( - account, + account.clone(), receiver_address, &self.txn_factory, self.creation_balance, diff --git a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs index 06aab2e1f8577..613c5e141c894 100644 --- a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs +++ b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs @@ -37,7 +37,7 @@ impl AccountsPoolWrapperGenerator { impl TransactionGenerator for AccountsPoolWrapperGenerator { fn generate_transactions( &mut self, - _account: &LocalAccount, + _account: Arc>, num_to_create: usize, ) -> Vec { let mut accounts_to_use = @@ -46,11 +46,23 @@ impl TransactionGenerator for AccountsPoolWrapperGenerator { if accounts_to_use.is_empty() { return Vec::new(); } - let txns = accounts_to_use - .iter_mut() - .flat_map(|account| self.generator.generate_transactions(account, 1)) - .collect(); + // Wrap LocalAccount in Arc+Mutex + let account_arcs : Vec>> = accounts_to_use.into_iter().map(|account| Arc::new(std::sync::Mutex::new(account))).collect(); + // get txns + let txns = account_arcs.iter().flat_map(|account| self.generator.generate_transactions(account.clone(), 1)).collect(); + // let txns = accounts_to_use + // .iter_mut() + // .flat_map(|account| { + // + // self.generator.generate_transactions(account, 1) + // }) + // .collect(); + // back to plain LocalAccount, add to accounts + let accounts_to_use = account_arcs.into_iter().map(|account| { + let account_mutex = Arc::into_inner(account).unwrap(); + account_mutex.into_inner().unwrap() + }).collect(); if let Some(destination_accounts_pool) = &self.destination_accounts_pool { destination_accounts_pool.add_to_pool(accounts_to_use); } diff --git a/crates/transaction-generator-lib/src/batch_transfer.rs b/crates/transaction-generator-lib/src/batch_transfer.rs index ff58614f9c908..22965a3b19614 100644 --- a/crates/transaction-generator-lib/src/batch_transfer.rs +++ b/crates/transaction-generator-lib/src/batch_transfer.rs @@ -39,7 +39,7 @@ impl BatchTransferTransactionGenerator { impl TransactionGenerator for BatchTransferTransactionGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -48,7 +48,7 @@ impl TransactionGenerator for BatchTransferTransactionGenerator { .all_addresses .clone_from_pool(self.batch_size, &mut self.rng); requests.push( - account.sign_with_transaction_builder(self.txn_factory.payload( + account.lock().unwrap().sign_with_transaction_builder(self.txn_factory.payload( aptos_stdlib::aptos_account_batch_transfer(receivers, vec![ self.send_amount; self.batch_size diff --git a/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs b/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs index c3b79dc621578..c4a0410965f5f 100644 --- a/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs +++ b/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs @@ -1,6 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::sync::Arc; use crate::{TransactionGenerator, TransactionGeneratorCreator}; use aptos_sdk::types::{transaction::SignedTransaction, LocalAccount}; @@ -12,7 +13,7 @@ struct BoundedBatchWrapperTransactionGenerator { impl TransactionGenerator for BoundedBatchWrapperTransactionGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { self.generator diff --git a/crates/transaction-generator-lib/src/call_custom_modules.rs b/crates/transaction-generator-lib/src/call_custom_modules.rs index 71537fee71213..13c7ed1ee4ddb 100644 --- a/crates/transaction-generator-lib/src/call_custom_modules.rs +++ b/crates/transaction-generator-lib/src/call_custom_modules.rs @@ -1,6 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::ops::Deref; use super::{publishing::publish_util::Package, ReliableTransactionSubmitter}; use crate::{ create_account_transaction, publishing::publish_util::PackageHandler, RootAccountHandle, @@ -82,15 +83,16 @@ impl CustomModulesDelegationGenerator { impl TransactionGenerator for CustomModulesDelegationGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); for _ in 0..num_to_create { let (package, publisher) = self.packages.choose(&mut self.rng).unwrap(); + let account = account.lock().unwrap(); let request = (self.txn_generator)( - account, + account.deref(), package, publisher, &self.txn_factory, diff --git a/crates/transaction-generator-lib/src/lib.rs b/crates/transaction-generator-lib/src/lib.rs index 364c6f8421eef..8c28827e94652 100644 --- a/crates/transaction-generator-lib/src/lib.rs +++ b/crates/transaction-generator-lib/src/lib.rs @@ -123,7 +123,7 @@ impl Default for TransactionType { pub trait TransactionGenerator: Sync + Send { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec; } @@ -213,15 +213,15 @@ impl CounterState { pub trait RootAccountHandle: Send + Sync { async fn approve_funds(&self, amount: u64, reason: &str); - fn get_root_account(&self) -> &LocalAccount; + fn get_root_account(&self) -> Arc>; } -pub struct AlwaysApproveRootAccountHandle<'t> { - pub root_account: &'t LocalAccount, +pub struct AlwaysApproveRootAccountHandle{ + pub root_account: Arc>, } #[async_trait::async_trait] -impl<'t> RootAccountHandle for AlwaysApproveRootAccountHandle<'t> { +impl RootAccountHandle for AlwaysApproveRootAccountHandle { async fn approve_funds(&self, amount: u64, reason: &str) { println!( "Consuming funds from root/source account: up to {} for {}", @@ -229,8 +229,8 @@ impl<'t> RootAccountHandle for AlwaysApproveRootAccountHandle<'t> { ); } - fn get_root_account(&self) -> &LocalAccount { - self.root_account + fn get_root_account(&self) -> Arc> { + self.root_account.clone() } } @@ -519,12 +519,12 @@ impl ObjectPool { } pub fn create_account_transaction( - from: &LocalAccount, + from: Arc>, to: AccountAddress, txn_factory: &TransactionFactory, creation_balance: u64, ) -> SignedTransaction { - from.sign_with_transaction_builder(txn_factory.payload( + from.lock().unwrap().sign_with_transaction_builder(txn_factory.payload( if creation_balance > 0 { aptos_stdlib::aptos_account_transfer(to, creation_balance) } else { diff --git a/crates/transaction-generator-lib/src/p2p_transaction_generator.rs b/crates/transaction-generator-lib/src/p2p_transaction_generator.rs index 0175865a3f634..83b37b12f78e9 100644 --- a/crates/transaction-generator-lib/src/p2p_transaction_generator.rs +++ b/crates/transaction-generator-lib/src/p2p_transaction_generator.rs @@ -16,6 +16,7 @@ use std::{ cmp::{max, min}, sync::Arc, }; +use std::ops::Deref; pub enum SamplingMode { /// See `BasicSampler`. @@ -249,7 +250,7 @@ impl Distribution for Standard { impl TransactionGenerator for P2PTransactionGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -277,11 +278,13 @@ impl TransactionGenerator for P2PTransactionGenerator { let receiver = receivers.get(i).expect("all_addresses can't be empty"); let request = if num_valid_tx > 0 { num_valid_tx -= 1; - self.gen_single_txn(account, receiver, self.send_amount, &self.txn_factory) + let account = account.lock().unwrap(); + self.gen_single_txn(account.deref(), receiver, self.send_amount, &self.txn_factory) } else { + let account = account.lock().unwrap(); self.generate_invalid_transaction( &mut self.rng.clone(), - account, + account.deref(), receiver, &requests, ) diff --git a/crates/transaction-generator-lib/src/publish_modules.rs b/crates/transaction-generator-lib/src/publish_modules.rs index 4b1838dc85521..33de9bd3141d3 100644 --- a/crates/transaction-generator-lib/src/publish_modules.rs +++ b/crates/transaction-generator-lib/src/publish_modules.rs @@ -1,3 +1,4 @@ +use std::ops::DerefMut; // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 use crate::{ @@ -34,9 +35,11 @@ impl PublishPackageGenerator { impl TransactionGenerator for PublishPackageGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { + let mut account_locker = account.lock().unwrap(); + let account = account_locker.deref_mut(); let mut requests = Vec::with_capacity(num_to_create); // First publish the module and then use it diff --git a/crates/transaction-generator-lib/src/transaction_mix_generator.rs b/crates/transaction-generator-lib/src/transaction_mix_generator.rs index eef89c664cb86..d4a9cdbcbe9bf 100644 --- a/crates/transaction-generator-lib/src/transaction_mix_generator.rs +++ b/crates/transaction-generator-lib/src/transaction_mix_generator.rs @@ -38,7 +38,7 @@ impl PhasedTxnMixGenerator { impl TransactionGenerator for PhasedTxnMixGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, num_to_create: usize, ) -> Vec { let phase = if self.txn_mix_per_phase.len() == 1 { diff --git a/crates/transaction-generator-lib/src/workflow_delegator.rs b/crates/transaction-generator-lib/src/workflow_delegator.rs index 439b68e056780..bffae2f2c41cf 100644 --- a/crates/transaction-generator-lib/src/workflow_delegator.rs +++ b/crates/transaction-generator-lib/src/workflow_delegator.rs @@ -113,7 +113,7 @@ impl WorkflowTxnGenerator { impl TransactionGenerator for WorkflowTxnGenerator { fn generate_transactions( &mut self, - account: &LocalAccount, + account: Arc>, mut num_to_create: usize, ) -> Vec { assert_ne!(num_to_create, 0); diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index a18c3787d152b..baff55067b5d3 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -76,6 +76,7 @@ use std::{ thread, time::Duration, }; +use std::ops::DerefMut; use suites::dag::get_dag_test; use tokio::{runtime::Runtime, select}; use url::Url; @@ -2660,9 +2661,11 @@ impl Test for RestartValidator { } impl NetworkTest for RestartValidator { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { let runtime = Runtime::new()?; runtime.block_on(async { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let node = ctx.swarm().validators_mut().next().unwrap(); node.health_check().await.expect("node health check failed"); node.stop().await.unwrap(); @@ -2685,7 +2688,9 @@ impl Test for EmitTransaction { } impl NetworkTest for EmitTransaction { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let duration = Duration::from_secs(10); let all_validators = ctx .swarm() @@ -2717,7 +2722,7 @@ impl Test for Delay { } impl NetworkTest for Delay { - fn run(&self, _ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { info!("forge sleep {}", self.seconds); std::thread::sleep(Duration::from_secs(self.seconds)); Ok(()) @@ -2734,7 +2739,9 @@ impl Test for GatherMetrics { } impl NetworkTest for GatherMetrics { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let runtime = ctx.runtime.handle(); runtime.block_on(gather_metrics_one(ctx)); Ok(()) diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index d4cc6512f2a53..21a42f230f838 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -42,12 +42,13 @@ use std::{ env, str, sync::Arc, }; +use std::sync::Mutex; use tokio::{runtime::Runtime, time::Duration}; pub struct K8sSwarm { validators: HashMap, fullnodes: HashMap, - root_account: LocalAccount, + root_account: Arc>, kube_client: K8sClient, versions: Arc>, pub chain_id: ChainId, @@ -86,6 +87,7 @@ impl K8sSwarm { ) })?; let root_account = LocalAccount::new(address, account_key, sequence_number); + let root_account = Arc::new(Mutex::new(root_account)); let mut versions = HashMap::new(); let cur_version = Version::new(0, image_tag.to_string()); @@ -337,11 +339,11 @@ impl Swarm for K8sSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo<'_> { + fn chain_info(&mut self) -> ChainInfo { let rest_api_url = self.get_rest_api_url(0); let inspection_service_url = self.get_inspection_service_url(0); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, @@ -457,11 +459,11 @@ impl Swarm for K8sSwarm { bail!("No prom client"); } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_> { + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo { let rest_api_url = self.get_rest_api_url(idx); let inspection_service_url = self.get_inspection_service_url(idx); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 4e8905c2a3305..4a215f19ae843 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -96,7 +96,7 @@ pub struct LocalSwarm { fullnodes: HashMap, public_networks: HashMap, dir: SwarmDirectory, - root_account: LocalAccount, + root_account: Arc>, chain_id: ChainId, root_key: ConfigKey, @@ -245,6 +245,7 @@ impl LocalSwarm { AccountKey::from_private_key(root_key.private_key()), 0, ); + let root_account = Arc::new(std::sync::Mutex::new(root_account)); Ok(LocalSwarm { node_name_counter: validators.len(), @@ -589,7 +590,7 @@ impl Swarm for LocalSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo<'_> { + fn chain_info(&mut self) -> ChainInfo { let rest_api_url = self .validators() .next() @@ -604,7 +605,7 @@ impl Swarm for LocalSwarm { .to_string(); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, @@ -655,7 +656,7 @@ impl Swarm for LocalSwarm { todo!() } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_> { + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo { let rest_api_url = self .validators() .nth(idx) @@ -669,7 +670,7 @@ impl Swarm for LocalSwarm { .inspection_service_endpoint() .to_string(); ChainInfo::new( - &mut self.root_account, + self.root_account.clone(), rest_api_url, inspection_service_url, self.chain_id, diff --git a/testsuite/forge/src/interface/admin.rs b/testsuite/forge/src/interface/admin.rs index 50d726ced5a6a..0abbbe87039cf 100644 --- a/testsuite/forge/src/interface/admin.rs +++ b/testsuite/forge/src/interface/admin.rs @@ -20,12 +20,12 @@ pub trait AdminTest: Test { pub struct AdminContext<'t> { core: CoreContext, - chain_info: ChainInfo<'t>, + chain_info: ChainInfo, pub report: &'t mut TestReport, } impl<'t> AdminContext<'t> { - pub fn new(core: CoreContext, chain_info: ChainInfo<'t>, report: &'t mut TestReport) -> Self { + pub fn new(core: CoreContext, chain_info: ChainInfo, report: &'t mut TestReport) -> Self { Self { core, chain_info, @@ -45,7 +45,7 @@ impl<'t> AdminContext<'t> { RestClient::new(Url::parse(self.chain_info.rest_api()).unwrap()) } - pub fn chain_info(&mut self) -> &mut ChainInfo<'t> { + pub fn chain_info(&mut self) -> &mut ChainInfo { &mut self.chain_info } diff --git a/testsuite/forge/src/interface/aptos.rs b/testsuite/forge/src/interface/aptos.rs index b31cbb2655449..a3c3290371ca7 100644 --- a/testsuite/forge/src/interface/aptos.rs +++ b/testsuite/forge/src/interface/aptos.rs @@ -1,6 +1,8 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; use super::Test; use crate::{CoreContext, Result, TestReport}; use anyhow::anyhow; @@ -34,14 +36,14 @@ pub trait AptosTest: Test { pub struct AptosContext<'t> { core: CoreContext, - public_info: AptosPublicInfo<'t>, + public_info: AptosPublicInfo, pub report: &'t mut TestReport, } impl<'t> AptosContext<'t> { pub fn new( core: CoreContext, - public_info: AptosPublicInfo<'t>, + public_info: AptosPublicInfo, report: &'t mut TestReport, ) -> Self { Self { @@ -107,26 +109,26 @@ impl<'t> AptosContext<'t> { self.public_info.get_balance(address).await } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.public_info.root_account + pub fn root_account(&mut self) -> Arc> { + self.public_info.root_account.clone() } } -pub struct AptosPublicInfo<'t> { +pub struct AptosPublicInfo { chain_id: ChainId, inspection_service_url: Url, rest_api_url: Url, rest_client: RestClient, - root_account: &'t mut LocalAccount, + root_account: Arc>, rng: ::rand::rngs::StdRng, } -impl<'t> AptosPublicInfo<'t> { +impl AptosPublicInfo { pub fn new( chain_id: ChainId, inspection_service_url_str: String, rest_api_url_str: String, - root_account: &'t mut LocalAccount, + root_account: Arc>, ) -> Self { let rest_api_url = Url::parse(&rest_api_url_str).unwrap(); let inspection_service_url = Url::parse(&inspection_service_url_str).unwrap(); @@ -152,14 +154,14 @@ impl<'t> AptosPublicInfo<'t> { self.inspection_service_url.as_str() } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.root_account + pub fn root_account(&mut self) -> Arc> { + self.root_account.clone() } pub async fn create_user_account(&mut self, pubkey: &Ed25519PublicKey) -> Result<()> { let auth_key = AuthenticationKey::ed25519(pubkey); let create_account_txn = - self.root_account + self.root_account.lock().unwrap() .sign_with_transaction_builder(self.transaction_factory().payload( aptos_stdlib::aptos_account_create_account(auth_key.account_address()), )); @@ -175,7 +177,7 @@ impl<'t> AptosPublicInfo<'t> { ) -> Result { let auth_key = AuthenticationKey::any_key(pubkey.clone()); let create_account_txn = - self.root_account + self.root_account.lock().unwrap() .sign_with_transaction_builder(self.transaction_factory().payload( aptos_stdlib::aptos_account_create_account(auth_key.account_address()), )); @@ -186,7 +188,7 @@ impl<'t> AptosPublicInfo<'t> { } pub async fn mint(&mut self, addr: AccountAddress, amount: u64) -> Result<()> { - let mint_txn = self.root_account.sign_with_transaction_builder( + let mint_txn = self.root_account.lock().unwrap().sign_with_transaction_builder( self.transaction_factory() .payload(aptos_stdlib::aptos_coin_mint(addr, amount)), ); @@ -305,14 +307,14 @@ impl<'t> AptosPublicInfo<'t> { reconfig( &self.rest_client, &self.transaction_factory(), - self.root_account, + self.root_account.lock().unwrap().deref_mut(), ) .await } /// Syncs the root account to it's sequence number in the event that a faucet changed it's value pub async fn sync_root_account_sequence_number(&mut self) { - let root_address = self.root_account().address(); + let root_address = self.root_account().lock().unwrap().address(); let root_sequence_number = self .client() .get_account_bcs(root_address) @@ -320,7 +322,7 @@ impl<'t> AptosPublicInfo<'t> { .unwrap() .into_inner() .sequence_number(); - self.root_account() + self.root_account().lock().unwrap() .set_sequence_number(root_sequence_number); } } diff --git a/testsuite/forge/src/interface/chain_info.rs b/testsuite/forge/src/interface/chain_info.rs index 9baa8c22ac9f1..58c0456859a1e 100644 --- a/testsuite/forge/src/interface/chain_info.rs +++ b/testsuite/forge/src/interface/chain_info.rs @@ -2,6 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::sync::{Arc, Mutex}; use crate::AptosPublicInfo; use anyhow::Result; use aptos_rest_client::Client as RestClient; @@ -12,16 +13,16 @@ use aptos_sdk::{ use reqwest::Url; #[derive(Debug)] -pub struct ChainInfo<'t> { - pub root_account: &'t mut LocalAccount, +pub struct ChainInfo { + pub root_account: Arc>, pub rest_api_url: String, pub inspection_service_url: String, pub chain_id: ChainId, } -impl<'t> ChainInfo<'t> { +impl ChainInfo { pub fn new( - root_account: &'t mut LocalAccount, + root_account: Arc>, rest_api_url: String, inspection_service_url: String, chain_id: ChainId, @@ -34,16 +35,16 @@ impl<'t> ChainInfo<'t> { } } - pub fn root_account(&mut self) -> &mut LocalAccount { - self.root_account + pub fn root_account(&mut self) -> Arc> { + self.root_account.clone() } pub async fn resync_root_account_seq_num(&mut self, client: &RestClient) -> Result<()> { let account = client - .get_account(self.root_account.address()) + .get_account(self.root_account.lock().unwrap().address()) .await? .into_inner(); - self.root_account + self.root_account.lock().unwrap() .set_sequence_number(account.sequence_number); Ok(()) } @@ -64,12 +65,12 @@ impl<'t> ChainInfo<'t> { TransactionFactory::new(self.chain_id()) } - pub fn into_aptos_public_info(self) -> AptosPublicInfo<'t> { + pub fn into_aptos_public_info(self) -> AptosPublicInfo { AptosPublicInfo::new( self.chain_id, self.inspection_service_url.clone(), self.rest_api_url.clone(), - self.root_account, + self.root_account.clone(), ) } } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 1d4f87fc2a9f4..14752533f8073 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -2,6 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::sync::{Arc, Mutex}; use super::Test; use crate::{ prometheus_metrics::LatencyBreakdown, @@ -17,7 +18,21 @@ use tokio::runtime::Runtime; /// nodes which comprise the network. pub trait NetworkTest: Test { /// Executes the test against the given context. - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()>; + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()>; +} + +#[derive(Clone)] +pub struct NetworkContextSynchronizer<'t> { + pub ctx: Arc>>, +} + +// TODO: some useful things that don't need to hold the lock or make a copy +impl<'t> NetworkContextSynchronizer<'t> { + pub fn new(ctx: NetworkContext<'t>) -> Self { + Self{ + ctx: Arc::new(Mutex::new(ctx)), + } + } } pub struct NetworkContext<'t> { diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index ce21f44f18f28..a133645ecf97a 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -20,7 +20,7 @@ use tokio::runtime::Runtime; /// Trait used to represent a running network comprised of Validators and FullNodes #[async_trait::async_trait] -pub trait Swarm: Sync { +pub trait Swarm: Sync + Send { /// Performs a health check on the entire swarm, ensuring all Nodes are Live and that no forks /// have occurred async fn health_check(&mut self) -> Result<()>; @@ -79,7 +79,7 @@ pub trait Swarm: Sync { fn versions<'a>(&'a self) -> Box + 'a>; /// Construct a ChainInfo from this Swarm - fn chain_info(&mut self) -> ChainInfo<'_>; + fn chain_info(&mut self) -> ChainInfo; fn logs_location(&mut self) -> String; @@ -107,13 +107,13 @@ pub trait Swarm: Sync { timeout: Option, ) -> Result>; - fn aptos_public_info(&mut self) -> AptosPublicInfo<'_> { + fn aptos_public_info(&mut self) -> AptosPublicInfo { self.chain_info().into_aptos_public_info() } - fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo<'_>; + fn chain_info_for_node(&mut self, idx: usize) -> ChainInfo; - fn aptos_public_info_for_node(&mut self, idx: usize) -> AptosPublicInfo<'_> { + fn aptos_public_info_for_node(&mut self, idx: usize) -> AptosPublicInfo { self.chain_info_for_node(idx).into_aptos_public_info() } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index f4b15c9facdac..42ee9ace49ff5 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -587,7 +587,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { } for test in self.filter_tests(&self.tests.network_tests) { - let mut network_ctx = NetworkContext::new( + let network_ctx = NetworkContext::new( CoreContext::from_rng(&mut rng), &mut *swarm, &mut report, @@ -595,7 +595,9 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { self.tests.emit_job_request.clone(), self.tests.success_criteria.clone(), ); - let result = run_test(|| test.run(&mut network_ctx)); + // let network_ctx = Arc::new(Mutex::new(network_ctx)); + let network_ctx = NetworkContextSynchronizer::new(network_ctx); + let result = run_test(|| test.run(network_ctx)); report.report_text(result.to_string()); summary.handle_result(test.name().to_owned(), result)?; } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 3737113670007..b04b34eba1ea1 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -2,11 +2,17 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use crate::{batch_update_gradually, generate_traffic}; +use std::ops::DerefMut; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use crate::{batch_update_gradually, create_emitter_and_request, generate_traffic, traffic_emitter_runtime}; use anyhow::bail; -use aptos_forge::{NetworkContext, NetworkTest, Result, SwarmExt, Test}; +use rand::SeedableRng; +use aptos_forge::{EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, TxnEmitter, TxnStats, Version}; use aptos_logger::info; use tokio::{runtime::Runtime, time::Duration}; +use aptos_sdk::transaction_builder::TransactionFactory; +use aptos_sdk::types::{LocalAccount, PeerId}; pub struct SimpleValidatorUpgrade; @@ -20,9 +26,171 @@ impl Test for SimpleValidatorUpgrade { } } +#[cfg(unused)] +async fn upgrade_task( + // ctx: &mut NetworkContext<'_>, + ctxa: NetworkContextSynchronizer<'_>, + validators_to_update: &[PeerId], + version: &Version, + wait_until_healthy: bool, + delay: Duration, + max_wait: Duration, + done: Arc, +) -> Result<()> { + let result = batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait).await; + done.store(true, Ordering::Relaxed); + result +} +async fn stat_gather_task( + emitter: TxnEmitter, + emit_job_request: EmitJobRequest, + source_account: Arc>, + upgrade_traffic_chunk_duration: Duration, + // handle: &Handle, + done: Arc, +) -> Result>{ + let mut upgrade_stats = vec![]; + while done.load(Ordering::Relaxed) == false { + // let upgrading_stats = spawn_generate_traffic(emitter.clone(), emit_job_request.clone(), &source_account, upgrade_traffic_chunk_duration, handle.clone()).await??; + // let mut account_locker = source_account.lock().unwrap(); + // let source_account = account_locker.deref_mut(); + let upgrading_stats = emitter.clone().emit_txn_for( + source_account.clone(), + emit_job_request.clone(), + upgrade_traffic_chunk_duration, + ).await?; + upgrade_stats.push(upgrading_stats); + } + let statsum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); + Ok(statsum) +} + +fn traffic_task( + ctxa: NetworkContextSynchronizer, + nodes: &[PeerId], + upgrade_done: Arc, +) -> Result> { + let (emitter, emit_job_request, source_account) = { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + // spawn_generate_traffic_setup(ctx, nodes)? + let mut emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); + let swarm = ctx.swarm(); + let client_timeout = Duration::from_secs(30); + + let chain_info = swarm.chain_info(); + let transaction_factory = TransactionFactory::new(chain_info.chain_id); + let emitter = TxnEmitter::new(transaction_factory, rng); + + emit_job_request = + emit_job_request.rest_clients(swarm.get_clients_for_peers(nodes, client_timeout)); + let source_account = chain_info.root_account.clone(); + (emitter, emit_job_request, source_account) + }; + // match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { + // Ok(parts) => parts, + // Err(err) => { + // stats_result = Err(err); + // return; + // } + // }; + // let source_account = ctx.swarm().chain_info().root_account; + let traffic_runtime = traffic_emitter_runtime()?; + // let upgrade_joiner = handle.spawn(upgrade_task(ctx, validators_to_update, version, wait_until_healthy, delay, max_wait, upgrade_done.clone())); + let upgrade_traffic_chunk_duration = Duration::from_secs(15); + traffic_runtime.block_on(stat_gather_task( + emitter, + emit_job_request, + source_account, + upgrade_traffic_chunk_duration, + // traffic_runtime.handle(), + upgrade_done.clone(), + )) +} + + +fn upgrade_and_gather_stats( + ctxa: NetworkContextSynchronizer, + // upgrade args + validators_to_update: &[PeerId], + version: &Version, + wait_until_healthy: bool, + delay: Duration, + max_wait: Duration, + // handle: &Handle, + // traffic args + nodes: &[PeerId], + //traffic_handle: &Handle, +) -> Result> { + let upgrade_done = Arc::new(AtomicBool::new(false)); + // let (emitter,emit_job_request,source_account) = { + // (emitter, emit_job_request, root_account) + // }; + let mut emitter_ctx = ctxa.clone(); + let mut stats_result : Result> = Ok(None); + let mut upgrade_result : Result<()> = Ok(()); + std::thread::scope(|scopev| { + scopev.spawn(|| { + let mut ctx_locker = emitter_ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + // spawn_generate_traffic_setup(ctx, nodes)? + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); + let (emitter, emit_job_request) = + match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { + Ok(parts) => parts, + Err(err) => { + stats_result = Err(err); + return; + } + }; + let source_account = ctx.swarm().chain_info().root_account; + let traffic_runtime = match traffic_emitter_runtime() { + Ok(x) => x, + Err(err) => { + stats_result = Err(err); + return; + } + }; + // let upgrade_joiner = handle.spawn(upgrade_task(ctx, validators_to_update, version, wait_until_healthy, delay, max_wait, upgrade_done.clone())); + let upgrade_traffic_chunk_duration = Duration::from_secs(15); + stats_result = traffic_runtime.block_on(stat_gather_task( + emitter, + emit_job_request, + source_account, + upgrade_traffic_chunk_duration, + // traffic_runtime.handle(), + upgrade_done.clone(), + )); + }); + scopev.spawn(|| { + // let mut ctx = ctxmut.lock().unwrap(); + // let mut ctx = ctx.get_mut(); + let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); + upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); + upgrade_done.store(true, Ordering::Relaxed); + }); + }); + + // let mut upgrade_stats = vec![]; + // while upgrade_done.load(Ordering::Relaxed) == false { + // let upgrading_stats = spawn_generate_traffic(emitter, emit_job_request, source_account, upgrade_traffic_chunk_duration, traffic_handle.clone()).await??; + // upgrade_stats.push(upgrading_stats); + // } + // upgrade_joiner.await??; + // let result = batch_update_gradually(ctx.swarm(), validators_to_update, version, wait_until_healthy, delay, max_wait).await; + // upgrade_done.store(true, Ordering::Relaxed); + // let stats_result = stats_joiner.await; + // traffic_runtime.shutdown_timeout(Duration::from_millis(500)); + // result?; + stats_result +} + impl NetworkTest for SimpleValidatorUpgrade { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { let runtime = Runtime::new()?; + // let traffic_runtime = traffic_emitter_runtime()?; let upgrade_wait_for_healthy = true; let upgrade_node_delay = Duration::from_secs(10); let upgrade_max_wait = Duration::from_secs(40); @@ -31,7 +199,7 @@ impl NetworkTest for SimpleValidatorUpgrade { // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctx.swarm().versions().collect::>(); + let mut versions = ctxa.ctx.lock().unwrap().swarm().versions().collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -45,13 +213,13 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version, new_version ); info!("{}", msg); - ctx.report.report_text(msg); + ctxa.ctx.lock().unwrap().report.report_text(msg); // Split the swarm into 2 parts - if ctx.swarm().validators().count() < 4 { + if ctxa.ctx.lock().unwrap().swarm().validators().count() < 4 { bail!("compat test requires >= 4 validators"); } - let all_validators = ctx + let all_validators = ctxa.ctx.lock().unwrap() .swarm() .validators() .map(|v| v.peer_id()) @@ -68,12 +236,16 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version ); info!("{}", msg); - ctx.report.report_text(msg); + ctxa.ctx.lock().unwrap().report.report_text(msg); // Generate some traffic - let txn_stat_prior = generate_traffic(ctx, &all_validators, duration)?; - ctx.report - .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + let txn_stat_prior = generate_traffic(&mut ctx, &all_validators, duration)?; + ctx.report + .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); + } // Update the first Validator let msg = format!( @@ -81,55 +253,78 @@ impl NetworkTest for SimpleValidatorUpgrade { new_version ); info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update_gradually(ctx, &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + ctxa.ctx.lock().unwrap().report.report_text(msg); + // runtime.block_on(batch_update_gradually(ctx.swarm(), &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + let upgrade_stats = upgrade_and_gather_stats( + ctxa.clone(), + &[first_node], + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + // runtime.handle(), + &[first_node], + //traffic_runtime.handle(), + )?; + let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); + if let Some(upgrade_stats_sum) = upgrade_stats_sum { + ctxa.ctx.lock().unwrap().report.report_txn_stats( + format!("{}::single-validator-upgrading", self.name()), + &upgrade_stats_sum, + ); + } // Generate some traffic - let txn_stat_one = generate_traffic(ctx, &[first_node], duration)?; - ctx.report.report_txn_stats( - format!("{}::single-validator-upgrade", self.name()), - &txn_stat_one, - ); + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + let txn_stat_one = generate_traffic(&mut ctx, &[first_node], duration)?; + ctx.report.report_txn_stats( + format!("{}::single-validator-upgrade", self.name()), + &txn_stat_one, + ); - // Update the rest of the first batch - let msg = format!( - "3. Upgrading rest of first batch to new version: {}", - new_version - ); - info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update_gradually(ctx, &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + // Update the rest of the first batch + let msg = format!( + "3. Upgrading rest of first batch to new version: {}", + new_version + ); + info!("{}", msg); + ctx.report.report_text(msg); - // Generate some traffic - let txn_stat_half = generate_traffic(ctx, &first_batch, duration)?; - ctx.report.report_txn_stats( - format!("{}::half-validator-upgrade", self.name()), - &txn_stat_half, - ); + runtime.block_on(batch_update_gradually(ctxa.clone(), &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; - ctx.swarm().fork_check(epoch_duration)?; + // Generate some traffic + let txn_stat_half = generate_traffic(&mut ctx, &first_batch, duration)?; + ctx.report.report_txn_stats( + format!("{}::half-validator-upgrade", self.name()), + &txn_stat_half, + ); - // Update the second batch - let msg = format!("4. upgrading second batch to new version: {}", new_version); - info!("{}", msg); - ctx.report.report_text(msg); - runtime.block_on(batch_update_gradually(ctx, &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + ctx.swarm().fork_check(epoch_duration)?; - // Generate some traffic - let txn_stat_all = generate_traffic(ctx, &second_batch, duration)?; - ctx.report.report_txn_stats( - format!("{}::rest-validator-upgrade", self.name()), - &txn_stat_all, - ); + // Update the second batch + let msg = format!("4. upgrading second batch to new version: {}", new_version); + info!("{}", msg); + ctx.report.report_text(msg); + runtime.block_on(batch_update_gradually(ctxa.clone(), &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; - let msg = "5. check swarm health".to_string(); - info!("{}", msg); - ctx.report.report_text(msg); - ctx.swarm().fork_check(epoch_duration)?; - ctx.report.report_text(format!( - "Compatibility test for {} ==> {} passed", - old_version, new_version - )); + // Generate some traffic + let txn_stat_all = generate_traffic(&mut ctx, &second_batch, duration)?; + ctx.report.report_txn_stats( + format!("{}::rest-validator-upgrade", self.name()), + &txn_stat_all, + ); + + let msg = "5. check swarm health".to_string(); + info!("{}", msg); + ctx.report.report_text(msg); + ctx.swarm().fork_check(epoch_duration)?; + ctx.report.report_text(format!( + "Compatibility test for {} ==> {} passed", + old_version, new_version + )); + } Ok(()) } diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index 99a8d6f18ed41..96bc0456846cc 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -3,12 +3,9 @@ use crate::{LoadDestination, NetworkLoadTest}; use anyhow::{anyhow, bail, Context}; -use aptos_forge::{ - test_utils::consensus_utils::{ - test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, - }, - NetworkContext, NetworkTest, Result, Swarm, SwarmExt, Test, TestReport, -}; +use aptos_forge::{test_utils::consensus_utils::{ + test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, +}, NetworkContext, NetworkTest, Result, Swarm, SwarmExt, Test, TestReport, NetworkContextSynchronizer}; use aptos_logger::{info, warn}; use rand::Rng; use std::{collections::HashSet, time::Duration}; @@ -296,7 +293,7 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { } impl NetworkTest for ChangingWorkingQuorumTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 09af1aee62ba4..5838dfa96dff5 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -4,7 +4,7 @@ use crate::{generate_onchain_config_blob, NetworkLoadTest}; use anyhow::Ok; use aptos::test::CliTestFramework; -use aptos_forge::{NetworkTest, NodeExt, SwarmExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, SwarmExt, Test}; use aptos_logger::info; use aptos_sdk::bcs; use aptos_types::{ @@ -51,10 +51,14 @@ impl NetworkLoadTest for DagOnChainEnableTest { runtime.block_on(async { - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); + let root_cli_index = { + let root_account_arc = swarm.chain_info().root_account(); + let root_account = root_account_arc.lock().unwrap(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( &rest_client @@ -99,10 +103,14 @@ impl NetworkLoadTest for DagOnChainEnableTest { let initial_consensus_config = runtime.block_on(async { - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); + let root_cli_index = { + let root_account_arc = swarm.chain_info().root_account(); + let root_account = root_account_arc.lock().unwrap(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( &rest_client @@ -149,10 +157,14 @@ impl NetworkLoadTest for DagOnChainEnableTest { runtime.block_on(async { - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); + let root_cli_index = { + let root_account_arc = swarm.chain_info().root_account(); + let root_account = root_account_arc.lock().unwrap(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( &rest_client @@ -201,7 +213,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { } impl NetworkTest for DagOnChainEnableTest { - fn run(&self, ctx: &mut aptos_forge::NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 6a69224bb1bc4..54cf15a6f3632 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -4,7 +4,7 @@ use crate::generate_traffic; use anyhow::Context; use aptos_config::config::OverrideNodeConfig; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; use aptos_logger::info; use rand::{ rngs::{OsRng, StdRng}, @@ -12,6 +12,7 @@ use rand::{ Rng, SeedableRng, }; use std::{thread, time::Duration}; +use std::ops::DerefMut; use tokio::runtime::Runtime; const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; @@ -25,9 +26,11 @@ impl Test for ForgeSetupTest { } impl NetworkTest for ForgeSetupTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { let mut rng = StdRng::from_seed(OsRng.gen()); let runtime = Runtime::new().unwrap(); + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let swarm = ctx.swarm(); @@ -75,7 +78,7 @@ impl NetworkTest for ForgeSetupTest { } let duration = Duration::from_secs(10 * num_pfns); - let txn_stat = generate_traffic(ctx, &pfns, duration)?; + let txn_stat = generate_traffic(&mut ctx, &pfns, duration)?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index cd49107a815d6..6901010a43a57 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -1,11 +1,10 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; use crate::{batch_update, generate_traffic}; use anyhow::bail; -use aptos_forge::{ - NetworkContext, NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, FORGE_KEY_SEED, -}; +use aptos_forge::{NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, FORGE_KEY_SEED, NetworkContextSynchronizer}; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; @@ -26,7 +25,9 @@ impl Test for FrameworkUpgrade { } impl NetworkTest for FrameworkUpgrade { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); @@ -60,11 +61,11 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, first_half, &new_version))?; + runtime.block_on(batch_update(&mut ctx, first_half, &new_version))?; // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -115,7 +116,7 @@ impl NetworkTest for FrameworkUpgrade { ))?; // Update the sequence number for the root account - let root_account = ctx.swarm().chain_info().root_account().address(); + let root_account = ctx.swarm().chain_info().root_account().lock().unwrap().address(); // Test the module publishing workflow let sequence_number = runtime .block_on( @@ -130,11 +131,11 @@ impl NetworkTest for FrameworkUpgrade { ctx.swarm() .chain_info() .root_account() - .set_sequence_number(sequence_number); + .lock().unwrap().set_sequence_number(sequence_number); // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -156,10 +157,10 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the remaining nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, second_half, &new_version))?; + runtime.block_on(batch_update(&mut ctx, second_half, &new_version))?; let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index a2d8702e402a8..b54c78de9c0f5 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, Result, Swarm, Test, TestReport}; +use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; use tokio::{runtime::Runtime, time::Instant}; @@ -47,7 +47,7 @@ impl NetworkLoadTest for FullNodeRebootStressTest { } impl NetworkTest for FullNodeRebootStressTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index dece19d2e19ed..9e0ca5ed5279c 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -27,11 +27,7 @@ pub mod validator_join_leave_test; pub mod validator_reboot_stress_test; use anyhow::Context; -use aptos_forge::{ - prometheus_metrics::{fetch_latency_breakdown, LatencyBreakdown}, - EmitJobRequest, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, - TestReport, TxnEmitter, TxnStats, Version, -}; +use aptos_forge::{prometheus_metrics::{fetch_latency_breakdown, LatencyBreakdown}, EmitJobRequest, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, TxnEmitter, TxnStats, Version, NetworkContextSynchronizer}; use aptos_logger::info; use aptos_rest_client::Client as RestClient; use aptos_sdk::{transaction_builder::TransactionFactory, types::PeerId}; @@ -41,6 +37,7 @@ use std::{ fmt::Write, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; +use std::ops::DerefMut; use tokio::runtime::Runtime; const WARMUP_DURATION_FRACTION: f32 = 0.07; @@ -69,25 +66,26 @@ async fn batch_update( } async fn batch_update_gradually( - ctx: &mut NetworkContext<'_>, + ctxa: NetworkContextSynchronizer<'_>, validators_to_update: &[PeerId], version: &Version, wait_until_healthy: bool, delay: Duration, max_wait: Duration, ) -> Result<()> { + // let mut swarm = ctx.swarm(); for validator in validators_to_update { - ctx.swarm().upgrade_validator(*validator, version).await?; + ctxa.ctx.lock().unwrap().swarm().upgrade_validator(*validator, version).await?; if wait_until_healthy { let deadline = Instant::now() + max_wait; - ctx.swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; + ctxa.ctx.lock().unwrap().swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; } if !delay.is_zero() { tokio::time::sleep(delay).await; } } - ctx.swarm().health_check().await?; + ctxa.ctx.lock().unwrap().swarm().health_check().await?; Ok(()) } @@ -135,6 +133,34 @@ pub fn generate_traffic( Ok(stats) } +#[cfg(unused)] +pub fn spawn_generate_traffic_setup<'a>( + ctx: &mut NetworkContext<'a>, + nodes: &[PeerId], +) -> Result<(TxnEmitter, EmitJobRequest, &'a mut LocalAccount)> { + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng())?; + let (emitter, emit_job_request) = + create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; + let root_account = ctx.swarm().chain_info().root_account; + return Ok((emitter, emit_job_request, root_account)); +} + +#[cfg(unused)] +pub fn spawn_generate_traffic( + emitter: TxnEmitter, + emit_job_request: EmitJobRequest, + root_account: &LocalAccount, + duration: Duration, + handle: Handle, +) -> JoinHandle> { + handle.spawn(emitter.emit_txn_for( + root_account, + emit_job_request, + duration, + )) +} + pub enum LoadDestination { AllNodes, AllValidators, @@ -189,7 +215,9 @@ pub trait NetworkLoadTest: Test { } impl NetworkTest for dyn NetworkLoadTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let runtime = Runtime::new().unwrap(); let start_timestamp = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -202,7 +230,7 @@ impl NetworkTest for dyn NetworkLoadTest { let rng = SeedableRng::from_rng(ctx.core().rng())?; let duration = ctx.global_duration; let stats_by_phase = self.network_load_test( - ctx, + &mut ctx, emit_job_request, duration, WARMUP_DURATION_FRACTION, @@ -247,7 +275,7 @@ impl NetworkTest for dyn NetworkLoadTest { .block_on(ctx.swarm().get_client_with_newest_ledger_version()) .context("no clients replied for end version")?; - self.finish(ctx).context("finish NetworkLoadTest ")?; + self.finish(&mut ctx).context("finish NetworkLoadTest ")?; for phase_stats in stats_by_phase.into_iter() { ctx.check_for_success( @@ -516,13 +544,21 @@ impl CompositeNetworkTest { } impl NetworkTest for CompositeNetworkTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - for wrapper in &self.wrappers { - wrapper.setup(ctx)?; + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + for wrapper in &self.wrappers { + wrapper.setup(&mut ctx)?; + } } - self.test.run(ctx)?; - for wrapper in &self.wrappers { - wrapper.finish(ctx)?; + self.test.run(ctxa.clone())?; + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + for wrapper in &self.wrappers { + wrapper.finish(&mut ctx)?; + } } Ok(()) } diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 60d794375fcd3..1f69b1b469234 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -3,16 +3,11 @@ use crate::{create_emitter_and_request, LoadDestination, NetworkLoadTest}; use anyhow::Context; -use aptos_forge::{ - args::TransactionTypeArg, - prometheus_metrics::{LatencyBreakdown, LatencyBreakdownSlice}, - success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, - EmitJobMode, EmitJobRequest, NetworkContext, NetworkTest, Result, Test, TxnStats, - WorkflowProgress, -}; +use aptos_forge::{args::TransactionTypeArg, prometheus_metrics::{LatencyBreakdown, LatencyBreakdownSlice}, success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, EmitJobMode, EmitJobRequest, NetworkContext, NetworkTest, Result, Test, TxnStats, WorkflowProgress, NetworkContextSynchronizer}; use aptos_logger::info; use rand::SeedableRng; use std::{fmt::Debug, time::Duration}; +use std::ops::DerefMut; use tokio::runtime::Runtime; // add larger warmup, as when we are exceeding the max load, @@ -215,7 +210,7 @@ impl LoadVsPerfBenchmark { } impl NetworkTest for LoadVsPerfBenchmark { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { assert!( self.criteria.is_empty() || self.criteria.len() == self.workloads.len(), "Invalid config, {} criteria and {} workloads given", @@ -223,6 +218,8 @@ impl NetworkTest for LoadVsPerfBenchmark { self.workloads.len(), ); + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let rt = Runtime::new().unwrap(); let mut continous_job = if let Some(continuous_traffic) = &self.continuous_traffic { @@ -265,7 +262,7 @@ impl NetworkTest for LoadVsPerfBenchmark { info!("Starting for {:?}", self.workloads); results.push( self.evaluate_single( - ctx, + &mut ctx, &self.workloads, index, phase_duration diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index 2cdda468a1418..6b6ba34c7dfaa 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -2,9 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{multi_region_network_test::chunk_peers, LoadDestination, NetworkLoadTest}; -use aptos_forge::{ - GroupCpuStress, NetworkContext, NetworkTest, Swarm, SwarmChaos, SwarmCpuStress, SwarmExt, Test, -}; +use aptos_forge::{GroupCpuStress, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmCpuStress, SwarmExt, Test}; use aptos_logger::info; use aptos_types::PeerId; use rand::Rng; @@ -102,7 +100,7 @@ impl NetworkLoadTest for ExecutionDelayTest { } impl NetworkTest for ExecutionDelayTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } @@ -187,7 +185,7 @@ impl NetworkLoadTest for NetworkUnreliabilityTest { } impl NetworkTest for NetworkUnreliabilityTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } @@ -302,7 +300,7 @@ impl NetworkLoadTest for CpuChaosTest { } impl NetworkTest for CpuChaosTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index a4a60a062f818..ecd729f1c3c6b 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupNetEm, NetworkContext, NetworkTest, Swarm, SwarmChaos, SwarmNetEm, Test}; +use aptos_forge::{GroupNetEm, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmNetEm, Test}; use aptos_logger::info; use aptos_types::PeerId; use itertools::{self, EitherOrBoth, Itertools}; @@ -313,7 +313,7 @@ impl NetworkLoadTest for MultiRegionNetworkEmulationTest { } impl NetworkTest for MultiRegionNetworkEmulationTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index eb8f701565440..a41f96f086a51 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -2,9 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{ - GroupNetworkBandwidth, NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkBandwidth, Test, -}; +use aptos_forge::{GroupNetworkBandwidth, NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkBandwidth, Test}; pub struct NetworkBandwidthTest; @@ -65,7 +63,7 @@ impl NetworkLoadTest for NetworkBandwidthTest { } impl NetworkTest for NetworkBandwidthTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index f7f175555f7aa..5048fcf9de86e 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test}; +use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test}; pub struct NetworkLossTest; @@ -44,7 +44,7 @@ impl NetworkLoadTest for NetworkLossTest { } impl NetworkTest for NetworkLossTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index 41659cf5c8468..066494f047c9d 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkTest, SwarmChaos, SwarmNetworkPartition, Test}; +use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkPartition, Test}; pub struct NetworkPartitionTest; @@ -53,7 +53,7 @@ impl NetworkLoadTest for NetworkPartitionTest { } impl NetworkTest for NetworkPartitionTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index 2d6c126907cc4..d90841042e96c 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -2,8 +2,9 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; use crate::generate_traffic; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; use std::thread; use tokio::{runtime::Runtime, time::Duration}; @@ -16,7 +17,9 @@ impl Test for PartialNodesDown { } impl NetworkTest for PartialNodesDown { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; let duration = Duration::from_secs(120); let all_validators = ctx @@ -34,7 +37,7 @@ impl NetworkTest for PartialNodesDown { thread::sleep(Duration::from_secs(5)); // Generate some traffic - let txn_stat = generate_traffic(ctx, &up_nodes, duration)?; + let txn_stat = generate_traffic(&mut ctx, &up_nodes, duration)?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { diff --git a/testsuite/testcases/src/performance_test.rs b/testsuite/testcases/src/performance_test.rs index f602ede7d437f..2b0018c25f16a 100644 --- a/testsuite/testcases/src/performance_test.rs +++ b/testsuite/testcases/src/performance_test.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::NetworkLoadTest; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; pub struct PerformanceBenchmark; @@ -16,7 +16,7 @@ impl Test for PerformanceBenchmark { impl NetworkLoadTest for PerformanceBenchmark {} impl NetworkTest for PerformanceBenchmark { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index a57ad39a9c05d..108fe0ed43d16 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -8,10 +8,7 @@ use crate::{ }; use anyhow::Error; use aptos_config::config::{NodeConfig, OverrideNodeConfig}; -use aptos_forge::{ - NetworkContext, NetworkTest, OverrideNodeConfigFn, Result, Swarm, SwarmChaos, SwarmCpuStress, - SwarmNetEm, Test, -}; +use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, OverrideNodeConfigFn, Result, Swarm, SwarmChaos, SwarmCpuStress, SwarmNetEm, Test}; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; use aptos_types::PeerId; @@ -125,7 +122,7 @@ impl Test for PFNPerformance { } impl NetworkTest for PFNPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index 63c8e6e505fa6..f8ab04dc62a69 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -4,7 +4,7 @@ use crate::{generate_onchain_config_blob, NetworkLoadTest}; use anyhow::Ok; use aptos::test::CliTestFramework; -use aptos_forge::{NetworkTest, NodeExt, SwarmExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, SwarmExt, Test}; use aptos_logger::info; use aptos_sdk::bcs; use aptos_types::{ @@ -49,10 +49,14 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { runtime.block_on(async { - let root_cli_index = cli.add_account_with_address_to_cli( - swarm.chain_info().root_account().private_key().clone(), - swarm.chain_info().root_account().address(), - ); + let root_cli_index = { + let root_account_arc = swarm.chain_info().root_account(); + let root_account = root_account_arc.lock().unwrap(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( &rest_client @@ -109,7 +113,7 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { } impl NetworkTest for QuorumStoreOnChainEnableTest { - fn run(&self, ctx: &mut aptos_forge::NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/reconfiguration_test.rs b/testsuite/testcases/src/reconfiguration_test.rs index 57f99767eb194..ece02cd15d891 100644 --- a/testsuite/testcases/src/reconfiguration_test.rs +++ b/testsuite/testcases/src/reconfiguration_test.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 use anyhow::anyhow; -use aptos_forge::{NetworkContext, NetworkTest, Result, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; pub struct ReconfigurationTest; @@ -14,7 +14,7 @@ impl Test for ReconfigurationTest { } impl NetworkTest for ReconfigurationTest { - fn run(&self, _ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { Err(anyhow!("Not supported in aptos-framework yet")) } // TODO(https://github.com/aptos-labs/aptos-core/issues/317): add back after support those transactions in aptos-framework diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 22f43cc7a1569..6556ffa8638b6 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -2,12 +2,10 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; use crate::generate_traffic; use anyhow::bail; -use aptos_forge::{ - get_highest_synced_epoch, get_highest_synced_version, NetworkContext, NetworkTest, Result, - SwarmExt, Test, -}; +use aptos_forge::{get_highest_synced_epoch, get_highest_synced_version, NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test}; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; use std::time::Instant; @@ -28,18 +26,20 @@ impl Test for StateSyncFullnodePerformance { } impl NetworkTest for StateSyncFullnodePerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(&mut ctx, self.name())?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_fullnodes)?; // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(&mut ctx, &all_fullnodes, &[])?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. - ensure_state_sync_transaction_throughput(ctx, self.name()) + ensure_state_sync_transaction_throughput(&mut ctx, self.name()) } } @@ -54,11 +54,13 @@ impl Test for StateSyncFullnodeFastSyncPerformance { } impl NetworkTest for StateSyncFullnodeFastSyncPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(&mut ctx, self.name())?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_fullnodes)?; // Wait for an epoch change to ensure fast sync can download all the latest states info!("Waiting for an epoch change."); @@ -103,12 +105,12 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { ); // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(&mut ctx, &all_fullnodes, &[])?; // Wait for all nodes to catch up to the highest synced epoch // then calculate and display the throughput results. display_state_sync_state_throughput( - ctx, + &mut ctx, self.name(), highest_synced_epoch, number_of_state_values, @@ -130,7 +132,9 @@ impl Test for StateSyncValidatorPerformance { } impl NetworkTest for StateSyncValidatorPerformance { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can kill 2 validators but still make progress. let all_validators = ctx @@ -155,16 +159,16 @@ impl NetworkTest for StateSyncValidatorPerformance { ); // Generate some traffic through the validators. - emit_traffic_and_ensure_bounded_sync(ctx, &all_validators)?; + emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_validators)?; // Stop and reset two validators so they start syncing from genesis info!("Deleting data for two validators!"); let validators_to_reset = &all_validators[0..2]; - stop_and_reset_nodes(ctx, &[], validators_to_reset)?; + stop_and_reset_nodes(&mut ctx, &[], validators_to_reset)?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. - ensure_state_sync_transaction_throughput(ctx, self.name()) + ensure_state_sync_transaction_throughput(&mut ctx, self.name()) } } diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index 916bbcc86c322..a72ad243c8487 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -2,10 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{ - GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkTest, Swarm, SwarmChaos, - SwarmNetworkBandwidth, SwarmNetworkDelay, Test, -}; +use aptos_forge::{GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test}; use aptos_logger::info; /// Represents a test that simulates a network with 3 regions, all in the same cloud. @@ -102,7 +99,7 @@ impl NetworkLoadTest for ThreeRegionSameCloudSimulationTest { } impl NetworkTest for ThreeRegionSameCloudSimulationTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 53905027f22b5..64f93ad453095 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -1,9 +1,10 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; use crate::NetworkLoadTest; use anyhow::Context; -use aptos_forge::{NetworkContext, NetworkTest, NodeExt, Test}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, Test}; use aptos_sdk::move_types::account_address::AccountAddress; use std::time::{Duration, Instant}; use tokio::runtime::Runtime; @@ -19,56 +20,60 @@ impl Test for TwinValidatorTest { impl NetworkLoadTest for TwinValidatorTest {} impl NetworkTest for TwinValidatorTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); + fn run(&self, ctxa: NetworkContextSynchronizer) -> anyhow::Result<()> { + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); + let runtime = Runtime::new().unwrap(); - let all_validators_ids = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); - let validator_count = all_validators_ids.len(); - let twin_count = 2; - runtime.block_on(async { - for i in 0..twin_count { - let main_id: AccountAddress = all_validators_ids[i]; - let twin_id = all_validators_ids[i + validator_count - twin_count]; - ctx.swarm() - .validator_mut(twin_id) - .unwrap() - .clear_storage() - .await - .context(format!( - "Error while clearing storage and stopping {twin_id}" - ))?; - let main_identity = ctx - .swarm() - .validator_mut(main_id) - .unwrap() - .get_identity() - .await - .context(format!("Error while getting identity for {main_id}"))?; - ctx.swarm() - .validator_mut(twin_id) - .unwrap() - .set_identity(main_identity) - .await - .context(format!("Error while setting identity for {twin_id}"))?; - ctx.swarm() - .validator_mut(twin_id) - .unwrap() - .start() - .await - .context(format!("Error while starting {twin_id}"))?; - ctx.swarm() - .validator_mut(twin_id) - .unwrap() - .wait_until_healthy(Instant::now() + Duration::from_secs(300)) - .await - .context(format!("Error while waiting for {twin_id}"))?; - } - Ok::<(), anyhow::Error>(()) - })?; - ::run(self, ctx) + let all_validators_ids = ctx + .swarm() + .validators() + .map(|v| v.peer_id()) + .collect::>(); + let validator_count = all_validators_ids.len(); + let twin_count = 2; + runtime.block_on(async { + for i in 0..twin_count { + let main_id: AccountAddress = all_validators_ids[i]; + let twin_id = all_validators_ids[i + validator_count - twin_count]; + ctx.swarm() + .validator_mut(twin_id) + .unwrap() + .clear_storage() + .await + .context(format!( + "Error while clearing storage and stopping {twin_id}" + ))?; + let main_identity = ctx + .swarm() + .validator_mut(main_id) + .unwrap() + .get_identity() + .await + .context(format!("Error while getting identity for {main_id}"))?; + ctx.swarm() + .validator_mut(twin_id) + .unwrap() + .set_identity(main_identity) + .await + .context(format!("Error while setting identity for {twin_id}"))?; + ctx.swarm() + .validator_mut(twin_id) + .unwrap() + .start() + .await + .context(format!("Error while starting {twin_id}"))?; + ctx.swarm() + .validator_mut(twin_id) + .unwrap() + .wait_until_healthy(Instant::now() + Duration::from_secs(300)) + .await + .context(format!("Error while waiting for {twin_id}"))?; + } + Ok::<(), anyhow::Error>(()) + })?; + } + ::run(self, ctxa) } } diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index dd824174a4ee9..6af851413f251 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -4,10 +4,7 @@ use crate::{ create_emitter_and_request, traffic_emitter_runtime, LoadDestination, NetworkLoadTest, }; -use aptos_forge::{ - success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, - EmitJobRequest, NetworkContext, NetworkTest, Result, Swarm, Test, TestReport, -}; +use aptos_forge::{success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, EmitJobRequest, NetworkTest, Result, Swarm, Test, TestReport, NetworkContextSynchronizer}; use aptos_logger::info; use rand::{rngs::OsRng, Rng, SeedableRng}; use std::time::{Duration, Instant}; @@ -78,7 +75,7 @@ impl NetworkLoadTest for TwoTrafficsTest { } impl NetworkTest for TwoTrafficsTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index 092871293febc..735b465fa0554 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -1,12 +1,10 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use std::ops::DerefMut; use crate::{LoadDestination, NetworkLoadTest}; use aptos::{account::create::DEFAULT_FUNDED_COINS, test::CliTestFramework}; -use aptos_forge::{ - reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, - FORGE_KEY_SEED, -}; +use aptos_forge::{reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, FORGE_KEY_SEED, NetworkContextSynchronizer}; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; @@ -131,20 +129,26 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { .await .unwrap(); + let root_account_arcmutex = swarm.chain_info().root_account(); + let mut root_account_locker = root_account_arcmutex.lock().unwrap(); reconfig( &rest_client, &transaction_factory, - swarm.chain_info().root_account(), + root_account_locker.deref_mut(), ) .await; } - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; + { + let root_account_arcmutex = swarm.chain_info().root_account(); + let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + reconfig( + &rest_client, + &transaction_factory, + root_account_locker.deref_mut(), + ) + .await; + } }); // Wait for 1/3 of the test duration. @@ -156,20 +160,26 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { cli.join_validator_set(*operator_index, None).await.unwrap(); + let root_account_arcmutex = swarm.chain_info().root_account(); + let mut root_account_locker = root_account_arcmutex.lock().unwrap(); reconfig( &rest_client, &transaction_factory, - swarm.chain_info().root_account(), + root_account_locker.deref_mut(), ) .await; } - reconfig( - &rest_client, - &transaction_factory, - swarm.chain_info().root_account(), - ) - .await; + { + let root_account_arcmutex = swarm.chain_info().root_account(); + let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + reconfig( + &rest_client, + &transaction_factory, + root_account_locker.deref_mut(), + ) + .await; + } }); // Wait for all nodes to synchronize and stabilize. @@ -185,7 +195,7 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { } impl NetworkTest for ValidatorJoinLeaveTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index 9355ac97a99d8..37d9fddc6ef4e 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::NetworkLoadTest; -use aptos_forge::{NetworkContext, NetworkTest, Result, Swarm, Test, TestReport}; +use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; use tokio::{runtime::Runtime, time::Instant}; @@ -61,7 +61,7 @@ impl NetworkLoadTest for ValidatorRebootStressTest { } impl NetworkTest for ValidatorRebootStressTest { - fn run(&self, ctx: &mut NetworkContext<'_>) -> Result<()> { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { ::run(self, ctx) } } From 633dca22bbc4d03fb2de2bd5e6712932c394fd9b Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 16 May 2024 16:11:57 -0400 Subject: [PATCH 03/28] upgrade_and_gather_stats() on all phases cleanup --- testsuite/forge/src/interface/network.rs | 5 ++ testsuite/testcases/src/compatibility_test.rs | 83 +++++++++++-------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 14752533f8073..b90cd80f7103f 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -33,6 +33,11 @@ impl<'t> NetworkContextSynchronizer<'t> { ctx: Arc::new(Mutex::new(ctx)), } } + + pub fn report_text(&self, text: String) { + let mut locker = self.ctx.lock().unwrap(); + locker.report.report_text(text); + } } pub struct NetworkContext<'t> { diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index b04b34eba1ea1..b3640f649d155 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -46,14 +46,10 @@ async fn stat_gather_task( emit_job_request: EmitJobRequest, source_account: Arc>, upgrade_traffic_chunk_duration: Duration, - // handle: &Handle, done: Arc, ) -> Result>{ let mut upgrade_stats = vec![]; while done.load(Ordering::Relaxed) == false { - // let upgrading_stats = spawn_generate_traffic(emitter.clone(), emit_job_request.clone(), &source_account, upgrade_traffic_chunk_duration, handle.clone()).await??; - // let mut account_locker = source_account.lock().unwrap(); - // let source_account = account_locker.deref_mut(); let upgrading_stats = emitter.clone().emit_txn_for( source_account.clone(), emit_job_request.clone(), @@ -65,6 +61,7 @@ async fn stat_gather_task( Ok(statsum) } +#[cfg(unused)] fn traffic_task( ctxa: NetworkContextSynchronizer, nodes: &[PeerId], @@ -104,7 +101,6 @@ fn traffic_task( emit_job_request, source_account, upgrade_traffic_chunk_duration, - // traffic_runtime.handle(), upgrade_done.clone(), )) } @@ -118,23 +114,18 @@ fn upgrade_and_gather_stats( wait_until_healthy: bool, delay: Duration, max_wait: Duration, - // handle: &Handle, // traffic args nodes: &[PeerId], - //traffic_handle: &Handle, ) -> Result> { let upgrade_done = Arc::new(AtomicBool::new(false)); - // let (emitter,emit_job_request,source_account) = { - // (emitter, emit_job_request, root_account) - // }; let mut emitter_ctx = ctxa.clone(); let mut stats_result : Result> = Ok(None); let mut upgrade_result : Result<()> = Ok(()); std::thread::scope(|scopev| { + // emit trafic and gather stats scopev.spawn(|| { let mut ctx_locker = emitter_ctx.ctx.lock().unwrap(); let mut ctx = ctx_locker.deref_mut(); - // spawn_generate_traffic_setup(ctx, nodes)? let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); let (emitter, emit_job_request) = @@ -153,37 +144,24 @@ fn upgrade_and_gather_stats( return; } }; - // let upgrade_joiner = handle.spawn(upgrade_task(ctx, validators_to_update, version, wait_until_healthy, delay, max_wait, upgrade_done.clone())); let upgrade_traffic_chunk_duration = Duration::from_secs(15); stats_result = traffic_runtime.block_on(stat_gather_task( emitter, emit_job_request, source_account, upgrade_traffic_chunk_duration, - // traffic_runtime.handle(), upgrade_done.clone(), )); }); + // do upgrade scopev.spawn(|| { - // let mut ctx = ctxmut.lock().unwrap(); - // let mut ctx = ctx.get_mut(); let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); upgrade_done.store(true, Ordering::Relaxed); }); }); - // let mut upgrade_stats = vec![]; - // while upgrade_done.load(Ordering::Relaxed) == false { - // let upgrading_stats = spawn_generate_traffic(emitter, emit_job_request, source_account, upgrade_traffic_chunk_duration, traffic_handle.clone()).await??; - // upgrade_stats.push(upgrading_stats); - // } - // upgrade_joiner.await??; - // let result = batch_update_gradually(ctx.swarm(), validators_to_update, version, wait_until_healthy, delay, max_wait).await; - // upgrade_done.store(true, Ordering::Relaxed); - // let stats_result = stats_joiner.await; - // traffic_runtime.shutdown_timeout(Duration::from_millis(500)); - // result?; + upgrade_result?; stats_result } @@ -213,7 +191,7 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version, new_version ); info!("{}", msg); - ctxa.ctx.lock().unwrap().report.report_text(msg); + ctxa.report_text(msg); // Split the swarm into 2 parts if ctxa.ctx.lock().unwrap().swarm().validators().count() < 4 { @@ -236,7 +214,7 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version ); info!("{}", msg); - ctxa.ctx.lock().unwrap().report.report_text(msg); + ctxa.report_text(msg); // Generate some traffic { @@ -253,7 +231,7 @@ impl NetworkTest for SimpleValidatorUpgrade { new_version ); info!("{}", msg); - ctxa.ctx.lock().unwrap().report.report_text(msg); + ctxa.report_text(msg); // runtime.block_on(batch_update_gradually(ctx.swarm(), &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; let upgrade_stats = upgrade_and_gather_stats( ctxa.clone(), @@ -262,9 +240,7 @@ impl NetworkTest for SimpleValidatorUpgrade { upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait, - // runtime.handle(), &[first_node], - //traffic_runtime.handle(), )?; let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); if let Some(upgrade_stats_sum) = upgrade_stats_sum { @@ -291,8 +267,29 @@ impl NetworkTest for SimpleValidatorUpgrade { ); info!("{}", msg); ctx.report.report_text(msg); + } - runtime.block_on(batch_update_gradually(ctxa.clone(), &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + // upgrade the rest of the first half + let upgrade2_stats = upgrade_and_gather_stats( + ctxa.clone(), + &first_batch, + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + &first_batch, + )?; + let upgrade2_stats_sum = upgrade2_stats.into_iter().reduce(|a,b| &a + &b); + if let Some(upgrade2_stats_sum) = upgrade2_stats_sum { + ctxa.ctx.lock().unwrap().report.report_txn_stats( + format!("{}::half-validator-upgrading", self.name()), + &upgrade2_stats_sum, + ); + } + // runtime.block_on(batch_update_gradually(ctxa.clone(), &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); // Generate some traffic let txn_stat_half = generate_traffic(&mut ctx, &first_batch, duration)?; @@ -307,7 +304,27 @@ impl NetworkTest for SimpleValidatorUpgrade { let msg = format!("4. upgrading second batch to new version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update_gradually(ctxa.clone(), &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + } + let upgrade3_stats = upgrade_and_gather_stats( + ctxa.clone(), + &second_batch, + &new_version, + upgrade_wait_for_healthy, + upgrade_node_delay, + upgrade_max_wait, + &second_batch, + )?; + let upgrade3_stats_sum = upgrade3_stats.into_iter().reduce(|a,b| &a + &b); + if let Some(upgrade3_stats_sum) = upgrade3_stats_sum { + ctxa.ctx.lock().unwrap().report.report_txn_stats( + format!("{}::rest-validator-upgrading", self.name()), + &upgrade3_stats_sum, + ); + } + // runtime.block_on(batch_update_gradually(ctxa.clone(), &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; + { + let mut ctx_locker = ctxa.ctx.lock().unwrap(); + let mut ctx = ctx_locker.deref_mut(); // Generate some traffic let txn_stat_all = generate_traffic(&mut ctx, &second_batch, duration)?; From e5d5d5ec338766338d49e7a753d6523f5f8385e1 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 17 May 2024 16:38:42 -0400 Subject: [PATCH 04/28] lock reference fix --- .../src/emitter/account_minter.rs | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/crates/transaction-emitter-lib/src/emitter/account_minter.rs b/crates/transaction-emitter-lib/src/emitter/account_minter.rs index abedb58cf18b0..2d309ba381f7d 100644 --- a/crates/transaction-emitter-lib/src/emitter/account_minter.rs +++ b/crates/transaction-emitter-lib/src/emitter/account_minter.rs @@ -413,6 +413,14 @@ impl<'t> AccountMinter<'t> { ); let mut i = 0; let mut seed_accounts = vec![]; + let source_account = match new_source_account { + None => { + self.source_account.get_root_account().clone() + }, + Some(param_account) => { + Arc::new(std::sync::Mutex::new(param_account)) + }, + }; while i < seed_account_num { let batch_size = min(max_submit_batch_size, seed_account_num - i); let mut rng = StdRng::from_rng(self.rng()).unwrap(); @@ -423,21 +431,12 @@ impl<'t> AccountMinter<'t> { let create_requests: Vec<_> = batch .iter() .map(|account| { - if let Some(account) = &mut new_source_account { - create_and_fund_account_request( - account, - coins_per_seed_account, - account.public_key(), - txn_factory, - ) - } else { - create_and_fund_account_request( - self.source_account.get_root_account().lock().unwrap().deref(), - coins_per_seed_account, - account.public_key(), - txn_factory, - ) - } + create_and_fund_account_request( + source_account.clone(), + coins_per_seed_account, + account.public_key(), + txn_factory, + ) }) .collect(); txn_executor @@ -481,16 +480,19 @@ impl<'t> AccountMinter<'t> { coins_for_source: u64, ) -> Result { const NUM_TRIES: usize = 3; + let root_account = self.source_account.get_root_account(); + let root_address = root_account.lock().unwrap().address(); for i in 0..NUM_TRIES { - self.source_account.get_root_account().lock().unwrap().set_sequence_number( - txn_executor - .query_sequence_number(self.source_account.get_root_account().lock().unwrap().address()) - .await?, - ); + { + let new_sequence_number = txn_executor + .query_sequence_number(root_address) + .await?; + root_account.lock().unwrap().set_sequence_number(new_sequence_number); + } let new_source_account = LocalAccount::generate(self.rng()); let txn = create_and_fund_account_request( - self.source_account.get_root_account().lock().unwrap().deref(), + root_account.clone(), coins_for_source, new_source_account.public_key(), &self.txn_factory, @@ -541,12 +543,14 @@ async fn create_and_fund_new_accounts( .chunks(max_num_accounts_per_batch) .map(|chunk| chunk.to_vec()) .collect::>(); + let source_address = source_account.address(); + let source_account = Arc::new(std::sync::Mutex::new(source_account)); for batch in accounts_by_batch { let creation_requests: Vec<_> = batch .iter() .map(|account| { create_and_fund_account_request( - &source_account, + source_account.clone(), coins_per_new_account, account.public_key(), txn_factory, @@ -557,19 +561,19 @@ async fn create_and_fund_new_accounts( txn_executor .execute_transactions_with_counter(&creation_requests, counters) .await - .with_context(|| format!("Account {} couldn't mint", source_account.address()))?; + .with_context(|| format!("Account {} couldn't mint", source_address))?; } Ok(()) } pub fn create_and_fund_account_request( - creation_account: &LocalAccount, + creation_account: Arc>, amount: u64, pubkey: &Ed25519PublicKey, txn_factory: &TransactionFactory, ) -> SignedTransaction { let auth_key = AuthenticationKey::ed25519(pubkey); - creation_account.sign_with_transaction_builder(txn_factory.payload( + creation_account.lock().unwrap().sign_with_transaction_builder(txn_factory.payload( aptos_stdlib::aptos_account_transfer(auth_key.account_address(), amount), )) } From 0fafd8e51e2e17ae3405d929dd4d4a077082f3d3 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 31 May 2024 16:19:15 -0400 Subject: [PATCH 05/28] forge re-re-refactor Arc> -> Arc because it contains an atomic counter which hides mutability and is safe, no additional mutex needed or desired Some back to just &LocalAccount Add tokio Handle to NetworkContextSynchronizer and use it for async-ness inside NetworkTest run() implementations. NetworkContextSynchronizer Arc>> -> Arc>> because tokio contaminates and enblobifiies all --- Cargo.lock | 11 ++ .../src/emitter/account_minter.rs | 23 ++- .../src/emitter/mod.rs | 13 +- .../src/emitter/submission_worker.rs | 20 +- .../transaction-emitter-lib/src/wrappers.rs | 6 +- .../src/account_generator.rs | 4 +- .../src/accounts_pool_wrapper.rs | 15 +- .../src/batch_transfer.rs | 4 +- .../src/bounded_batch_wrapper.rs | 3 +- .../src/call_custom_modules.rs | 9 +- .../src/entry_points.rs | 3 +- crates/transaction-generator-lib/src/lib.rs | 12 +- .../src/p2p_transaction_generator.rs | 9 +- .../src/publish_modules.rs | 5 +- .../src/transaction_mix_generator.rs | 2 +- .../src/workflow_delegator.rs | 2 +- execution/executor-benchmark/src/lib.rs | 8 +- testsuite/forge-cli/src/main.rs | 41 ++-- testsuite/forge/src/backend/k8s/swarm.rs | 6 +- testsuite/forge/src/backend/local/swarm.rs | 4 +- testsuite/forge/src/interface/aptos.rs | 47 ++--- testsuite/forge/src/interface/chain_info.rs | 15 +- testsuite/forge/src/interface/network.rs | 14 +- testsuite/forge/src/runner.rs | 4 +- .../smoke-test/src/aptos/error_report.rs | 2 +- .../smoke-test/src/aptos/move_test_helpers.rs | 2 +- testsuite/smoke-test/src/indexer.rs | 2 +- testsuite/smoke-test/src/keyless.rs | 6 +- testsuite/smoke-test/src/rosetta.rs | 6 +- testsuite/smoke-test/src/test_utils.rs | 8 +- testsuite/smoke-test/src/txn_emitter.rs | 3 +- testsuite/testcases/Cargo.toml | 1 + testsuite/testcases/src/compatibility_test.rs | 97 +++++----- .../testcases/src/dag_onchain_enable_test.rs | 9 +- testsuite/testcases/src/forge_setup_test.rs | 28 +-- testsuite/testcases/src/framework_upgrade.rs | 40 ++-- testsuite/testcases/src/lib.rs | 181 +++++++++--------- .../testcases/src/load_vs_perf_benchmark.rs | 18 +- .../testcases/src/partial_nodes_down_test.rs | 16 +- .../src/quorum_store_onchain_enable_test.rs | 3 +- .../testcases/src/reconfiguration_test.rs | 2 +- .../testcases/src/state_sync_performance.rs | 96 ++++++---- .../testcases/src/twin_validator_test.rs | 30 +-- .../src/validator_join_leave_test.rs | 22 +-- 44 files changed, 458 insertions(+), 394 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 197dee80062a6..d8c6404d7d846 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3827,6 +3827,7 @@ dependencies = [ "rand 0.7.3", "reqwest", "tokio", + "tokio-scoped", ] [[package]] @@ -15467,6 +15468,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-scoped" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4beb8ba13bc53ac53ce1d52b42f02e5d8060f0f42138862869beb769722b256" +dependencies = [ + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-stream" version = "0.1.14" diff --git a/crates/transaction-emitter-lib/src/emitter/account_minter.rs b/crates/transaction-emitter-lib/src/emitter/account_minter.rs index 2d309ba381f7d..6001903c4b6ae 100644 --- a/crates/transaction-emitter-lib/src/emitter/account_minter.rs +++ b/crates/transaction-emitter-lib/src/emitter/account_minter.rs @@ -29,11 +29,10 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use std::ops::Deref; use aptos_types::account_address::AccountAddress; pub struct SourceAccountManager<'t> { - pub source_account: Arc>, + pub source_account: Arc, pub txn_executor: &'t dyn ReliableTransactionSubmitter, pub req: &'t EmitJobRequest, pub txn_factory: TransactionFactory, @@ -45,14 +44,14 @@ impl<'t> RootAccountHandle for SourceAccountManager<'t> { self.check_approve_funds(amount, reason).await.unwrap(); } - fn get_root_account(&self) -> Arc> { + fn get_root_account(&self) -> Arc { self.source_account.clone() } } impl<'t> SourceAccountManager<'t> { fn source_account_address(&self) -> AccountAddress { - self.source_account.lock().unwrap().address() + self.source_account.address() } // returns true if we might want to recheck the volume, as it was auto-approved. @@ -132,7 +131,7 @@ impl<'t> SourceAccountManager<'t> { info!("Minting new coins to root"); let txn = self - .source_account.lock().unwrap() + .source_account .sign_with_transaction_builder(self.txn_factory.payload( aptos_stdlib::aptos_coin_mint(self.source_account_address(), amount), )); @@ -399,7 +398,7 @@ impl<'t> AccountMinter<'t> { pub async fn create_and_fund_seed_accounts( &mut self, - mut new_source_account: Option, + new_source_account: Option, txn_executor: &dyn ReliableTransactionSubmitter, account_generator: Box, seed_account_num: usize, @@ -418,7 +417,7 @@ impl<'t> AccountMinter<'t> { self.source_account.get_root_account().clone() }, Some(param_account) => { - Arc::new(std::sync::Mutex::new(param_account)) + Arc::new(param_account) }, }; while i < seed_account_num { @@ -481,13 +480,13 @@ impl<'t> AccountMinter<'t> { ) -> Result { const NUM_TRIES: usize = 3; let root_account = self.source_account.get_root_account(); - let root_address = root_account.lock().unwrap().address(); + let root_address = root_account.address(); for i in 0..NUM_TRIES { { let new_sequence_number = txn_executor .query_sequence_number(root_address) .await?; - root_account.lock().unwrap().set_sequence_number(new_sequence_number); + root_account.set_sequence_number(new_sequence_number); } let new_source_account = LocalAccount::generate(self.rng()); @@ -544,7 +543,7 @@ async fn create_and_fund_new_accounts( .map(|chunk| chunk.to_vec()) .collect::>(); let source_address = source_account.address(); - let source_account = Arc::new(std::sync::Mutex::new(source_account)); + let source_account = Arc::new(source_account); for batch in accounts_by_batch { let creation_requests: Vec<_> = batch .iter() @@ -567,13 +566,13 @@ async fn create_and_fund_new_accounts( } pub fn create_and_fund_account_request( - creation_account: Arc>, + creation_account: Arc, amount: u64, pubkey: &Ed25519PublicKey, txn_factory: &TransactionFactory, ) -> SignedTransaction { let auth_key = AuthenticationKey::ed25519(pubkey); - creation_account.lock().unwrap().sign_with_transaction_builder(txn_factory.payload( + creation_account.sign_with_transaction_builder(txn_factory.payload( aptos_stdlib::aptos_account_transfer(auth_key.account_address(), amount), )) } diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs index 248f3e595c51e..f8ae30e7cb147 100644 --- a/crates/transaction-emitter-lib/src/emitter/mod.rs +++ b/crates/transaction-emitter-lib/src/emitter/mod.rs @@ -17,7 +17,7 @@ use crate::emitter::{ use again::RetryPolicy; use anyhow::{ensure, format_err, Result}; use aptos_config::config::DEFAULT_MAX_SUBMIT_TRANSACTION_BATCH_SIZE; -use aptos_logger::{debug, error, info, sample, sample::SampleRate, warn}; +use aptos_logger::{error, info, sample, sample::SampleRate, warn}; use aptos_rest_client::{aptos_api_types::AptosErrorCode, error::RestError, Client as RestClient}; use aptos_sdk::{ move_types::account_address::AccountAddress, @@ -41,7 +41,6 @@ use std::{ }, time::{Duration, Instant}, }; -use std::ops::DerefMut; use tokio::{runtime::Handle, task::JoinHandle, time}; // Max is 100k TPS for 3 hours @@ -670,7 +669,7 @@ impl TxnEmitter { pub async fn start_job( &mut self, - root_account: Arc>, + root_account: Arc, req: EmitJobRequest, stats_tracking_phases: usize, ) -> Result { @@ -816,7 +815,7 @@ impl TxnEmitter { async fn emit_txn_for_impl( mut self, - source_account: Arc>, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, print_stats_interval: Option, @@ -852,7 +851,7 @@ impl TxnEmitter { pub async fn emit_txn_for( self, - source_account: Arc>, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, ) -> Result { @@ -862,7 +861,7 @@ impl TxnEmitter { pub async fn emit_txn_for_with_stats( self, - source_account: Arc>, + source_account: Arc, emit_job_request: EmitJobRequest, duration: Duration, interval_secs: u64, @@ -1134,7 +1133,7 @@ pub fn parse_seed(seed_string: &str) -> [u8; 32] { } pub async fn create_accounts( - root_account: Arc>, + root_account: Arc, txn_factory: &TransactionFactory, account_generator: Box, req: &EmitJobRequest, diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs index 98d2aab15f5e2..5625ad2a84705 100644 --- a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs +++ b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs @@ -25,15 +25,15 @@ use futures::future::join_all; use itertools::Itertools; use rand::seq::IteratorRandom; use std::{ + borrow::Borrow, collections::HashMap, - sync::{atomic::AtomicU64, Arc, Mutex}, + sync::{atomic::AtomicU64, Arc}, time::Instant, }; -use std::ops::DerefMut; use tokio::time::sleep; pub struct SubmissionWorker { - pub(crate) accounts: Vec>>, + pub(crate) accounts: Vec>, client: RestClient, stop: Arc, params: EmitModeParams, @@ -56,7 +56,7 @@ impl SubmissionWorker { skip_latency_stats: bool, rng: ::rand::rngs::StdRng, ) -> Self { - let accounts = accounts.into_iter().map(|account| Arc::new(Mutex::new(account))).collect(); + let accounts = accounts.into_iter().map(Arc::new).collect(); Self { accounts, client, @@ -201,7 +201,7 @@ impl SubmissionWorker { } } - self.accounts.into_iter().map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap().into_inner().unwrap()).collect() + self.accounts.into_iter().map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap()).collect() } // returns true if it returned early @@ -246,11 +246,9 @@ impl SubmissionWorker { .await; // self.accounts.iter().for_each(|account| {}) - for account in self.accounts.iter() { - let account = account.clone(); - let mut locker = account.lock().unwrap(); + for account in self.accounts.iter_mut() { update_account_seq_num( - locker.deref_mut(), + Arc::get_mut(account).unwrap(), &account_to_start_and_end_seq_num, &latest_fetched_counts, ); @@ -277,7 +275,7 @@ impl SubmissionWorker { num_expired, self.accounts .iter() - .map(|a| a.lock().unwrap().address()) + .map(|a| a.address()) .collect::>(), ) ); @@ -322,7 +320,7 @@ impl SubmissionWorker { .into_iter() .flat_map(|account| { self.txn_generator - .generate_transactions(account.clone(), self.params.transactions_per_account) + .generate_transactions(account.borrow(), self.params.transactions_per_account) }) .collect() } diff --git a/crates/transaction-emitter-lib/src/wrappers.rs b/crates/transaction-emitter-lib/src/wrappers.rs index c4a103a34eb44..7ae79af53667b 100644 --- a/crates/transaction-emitter-lib/src/wrappers.rs +++ b/crates/transaction-emitter-lib/src/wrappers.rs @@ -18,7 +18,7 @@ use aptos_sdk::transaction_builder::TransactionFactory; use aptos_transaction_generator_lib::{args::TransactionTypeArg, WorkflowProgress}; use rand::{rngs::StdRng, Rng, SeedableRng}; use std::time::{Duration, Instant}; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; pub async fn emit_transactions( cluster_args: &ClusterArgs, @@ -158,7 +158,7 @@ pub async fn emit_transactions_with_cluster( emit_job_request = emit_job_request.skip_minting_accounts(); } - let coin_source_account = std::sync::Arc::new(std::sync::Mutex::new(coin_source_account)); + let coin_source_account = std::sync::Arc::new(coin_source_account); let stats = emitter .emit_txn_for_with_stats( coin_source_account, @@ -179,7 +179,7 @@ pub async fn create_accounts_command( .context("Failed to build cluster")?; let client = cluster.random_instance().rest_client(); let coin_source_account = cluster.load_coin_source_account(&client).await?; - let coin_source_account = Arc::new(Mutex::new(coin_source_account)); + let coin_source_account = Arc::new(coin_source_account); let txn_factory = TransactionFactory::new(cluster.chain_id) .with_transaction_expiration_time(60) .with_max_gas_amount(create_accounts_args.max_gas_per_txn); diff --git a/crates/transaction-generator-lib/src/account_generator.rs b/crates/transaction-generator-lib/src/account_generator.rs index ac2d47374daaf..374f8c4562a39 100644 --- a/crates/transaction-generator-lib/src/account_generator.rs +++ b/crates/transaction-generator-lib/src/account_generator.rs @@ -43,7 +43,7 @@ impl AccountGenerator { impl TransactionGenerator for AccountGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -53,7 +53,7 @@ impl TransactionGenerator for AccountGenerator { let receiver = LocalAccount::generate(&mut self.rng); let receiver_address = receiver.address(); let request = create_account_transaction( - account.clone(), + account, receiver_address, &self.txn_factory, self.creation_balance, diff --git a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs index 613c5e141c894..6fecf3590129f 100644 --- a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs +++ b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs @@ -37,19 +37,19 @@ impl AccountsPoolWrapperGenerator { impl TransactionGenerator for AccountsPoolWrapperGenerator { fn generate_transactions( &mut self, - _account: Arc>, + _account: &LocalAccount, num_to_create: usize, ) -> Vec { - let mut accounts_to_use = + let accounts_to_use = self.source_accounts_pool .take_from_pool(num_to_create, true, &mut self.rng); if accounts_to_use.is_empty() { return Vec::new(); } // Wrap LocalAccount in Arc+Mutex - let account_arcs : Vec>> = accounts_to_use.into_iter().map(|account| Arc::new(std::sync::Mutex::new(account))).collect(); + // let account_arcs : Vec> = accounts_to_use.into_iter().map(Arc::new).collect(); // get txns - let txns = account_arcs.iter().flat_map(|account| self.generator.generate_transactions(account.clone(), 1)).collect(); + let txns = accounts_to_use.iter().flat_map(|account| self.generator.generate_transactions(account, 1)).collect(); // let txns = accounts_to_use // .iter_mut() // .flat_map(|account| { @@ -59,10 +59,9 @@ impl TransactionGenerator for AccountsPoolWrapperGenerator { // .collect(); // back to plain LocalAccount, add to accounts - let accounts_to_use = account_arcs.into_iter().map(|account| { - let account_mutex = Arc::into_inner(account).unwrap(); - account_mutex.into_inner().unwrap() - }).collect(); + // let accounts_to_use = account_arcs.into_iter().map(|account| { + // Arc::into_inner(account).unwrap() + // }).collect(); if let Some(destination_accounts_pool) = &self.destination_accounts_pool { destination_accounts_pool.add_to_pool(accounts_to_use); } diff --git a/crates/transaction-generator-lib/src/batch_transfer.rs b/crates/transaction-generator-lib/src/batch_transfer.rs index 22965a3b19614..ff58614f9c908 100644 --- a/crates/transaction-generator-lib/src/batch_transfer.rs +++ b/crates/transaction-generator-lib/src/batch_transfer.rs @@ -39,7 +39,7 @@ impl BatchTransferTransactionGenerator { impl TransactionGenerator for BatchTransferTransactionGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -48,7 +48,7 @@ impl TransactionGenerator for BatchTransferTransactionGenerator { .all_addresses .clone_from_pool(self.batch_size, &mut self.rng); requests.push( - account.lock().unwrap().sign_with_transaction_builder(self.txn_factory.payload( + account.sign_with_transaction_builder(self.txn_factory.payload( aptos_stdlib::aptos_account_batch_transfer(receivers, vec![ self.send_amount; self.batch_size diff --git a/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs b/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs index c4a0410965f5f..c3b79dc621578 100644 --- a/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs +++ b/crates/transaction-generator-lib/src/bounded_batch_wrapper.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; use crate::{TransactionGenerator, TransactionGeneratorCreator}; use aptos_sdk::types::{transaction::SignedTransaction, LocalAccount}; @@ -13,7 +12,7 @@ struct BoundedBatchWrapperTransactionGenerator { impl TransactionGenerator for BoundedBatchWrapperTransactionGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { self.generator diff --git a/crates/transaction-generator-lib/src/call_custom_modules.rs b/crates/transaction-generator-lib/src/call_custom_modules.rs index 13c7ed1ee4ddb..b22077a98f15c 100644 --- a/crates/transaction-generator-lib/src/call_custom_modules.rs +++ b/crates/transaction-generator-lib/src/call_custom_modules.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::ops::Deref; use super::{publishing::publish_util::Package, ReliableTransactionSubmitter}; use crate::{ create_account_transaction, publishing::publish_util::PackageHandler, RootAccountHandle, @@ -14,6 +13,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use std::borrow::Borrow; use std::sync::Arc; // Fn + Send + Sync, as it will be called from multiple threads simultaneously @@ -83,16 +83,15 @@ impl CustomModulesDelegationGenerator { impl TransactionGenerator for CustomModulesDelegationGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); for _ in 0..num_to_create { let (package, publisher) = self.packages.choose(&mut self.rng).unwrap(); - let account = account.lock().unwrap(); let request = (self.txn_generator)( - account.deref(), + account, package, publisher, &self.txn_factory, @@ -222,7 +221,7 @@ impl CustomModulesDelegationGeneratorCreator { let publisher = LocalAccount::generate(&mut rng); let publisher_address = publisher.address(); requests_create.push(create_account_transaction( - root_account.get_root_account(), + root_account.get_root_account().borrow(), publisher_address, &init_txn_factory, publisher_balance, diff --git a/crates/transaction-generator-lib/src/entry_points.rs b/crates/transaction-generator-lib/src/entry_points.rs index 1b4dd4927cac3..ce5f9e2ac20b2 100644 --- a/crates/transaction-generator-lib/src/entry_points.rs +++ b/crates/transaction-generator-lib/src/entry_points.rs @@ -17,6 +17,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::rngs::StdRng; +use std::borrow::Borrow; use std::sync::Arc; pub struct EntryPointTransactionGenerator { @@ -75,7 +76,7 @@ impl UserModuleTransactionGenerator for EntryPointTransactionGenerator { .iter() .map(|to| { create_account_transaction( - root_account.get_root_account(), + root_account.get_root_account().borrow(), to.address(), txn_factory, 0, diff --git a/crates/transaction-generator-lib/src/lib.rs b/crates/transaction-generator-lib/src/lib.rs index 8c28827e94652..ff563cacf8df3 100644 --- a/crates/transaction-generator-lib/src/lib.rs +++ b/crates/transaction-generator-lib/src/lib.rs @@ -123,7 +123,7 @@ impl Default for TransactionType { pub trait TransactionGenerator: Sync + Send { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec; } @@ -213,11 +213,11 @@ impl CounterState { pub trait RootAccountHandle: Send + Sync { async fn approve_funds(&self, amount: u64, reason: &str); - fn get_root_account(&self) -> Arc>; + fn get_root_account(&self) -> Arc; } pub struct AlwaysApproveRootAccountHandle{ - pub root_account: Arc>, + pub root_account: Arc, } #[async_trait::async_trait] @@ -229,7 +229,7 @@ impl RootAccountHandle for AlwaysApproveRootAccountHandle { ); } - fn get_root_account(&self) -> Arc> { + fn get_root_account(&self) -> Arc { self.root_account.clone() } } @@ -519,12 +519,12 @@ impl ObjectPool { } pub fn create_account_transaction( - from: Arc>, + from: &LocalAccount, to: AccountAddress, txn_factory: &TransactionFactory, creation_balance: u64, ) -> SignedTransaction { - from.lock().unwrap().sign_with_transaction_builder(txn_factory.payload( + from.sign_with_transaction_builder(txn_factory.payload( if creation_balance > 0 { aptos_stdlib::aptos_account_transfer(to, creation_balance) } else { diff --git a/crates/transaction-generator-lib/src/p2p_transaction_generator.rs b/crates/transaction-generator-lib/src/p2p_transaction_generator.rs index 83b37b12f78e9..0175865a3f634 100644 --- a/crates/transaction-generator-lib/src/p2p_transaction_generator.rs +++ b/crates/transaction-generator-lib/src/p2p_transaction_generator.rs @@ -16,7 +16,6 @@ use std::{ cmp::{max, min}, sync::Arc, }; -use std::ops::Deref; pub enum SamplingMode { /// See `BasicSampler`. @@ -250,7 +249,7 @@ impl Distribution for Standard { impl TransactionGenerator for P2PTransactionGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { let mut requests = Vec::with_capacity(num_to_create); @@ -278,13 +277,11 @@ impl TransactionGenerator for P2PTransactionGenerator { let receiver = receivers.get(i).expect("all_addresses can't be empty"); let request = if num_valid_tx > 0 { num_valid_tx -= 1; - let account = account.lock().unwrap(); - self.gen_single_txn(account.deref(), receiver, self.send_amount, &self.txn_factory) + self.gen_single_txn(account, receiver, self.send_amount, &self.txn_factory) } else { - let account = account.lock().unwrap(); self.generate_invalid_transaction( &mut self.rng.clone(), - account.deref(), + account, receiver, &requests, ) diff --git a/crates/transaction-generator-lib/src/publish_modules.rs b/crates/transaction-generator-lib/src/publish_modules.rs index 33de9bd3141d3..4b1838dc85521 100644 --- a/crates/transaction-generator-lib/src/publish_modules.rs +++ b/crates/transaction-generator-lib/src/publish_modules.rs @@ -1,4 +1,3 @@ -use std::ops::DerefMut; // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 use crate::{ @@ -35,11 +34,9 @@ impl PublishPackageGenerator { impl TransactionGenerator for PublishPackageGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { - let mut account_locker = account.lock().unwrap(); - let account = account_locker.deref_mut(); let mut requests = Vec::with_capacity(num_to_create); // First publish the module and then use it diff --git a/crates/transaction-generator-lib/src/transaction_mix_generator.rs b/crates/transaction-generator-lib/src/transaction_mix_generator.rs index d4a9cdbcbe9bf..eef89c664cb86 100644 --- a/crates/transaction-generator-lib/src/transaction_mix_generator.rs +++ b/crates/transaction-generator-lib/src/transaction_mix_generator.rs @@ -38,7 +38,7 @@ impl PhasedTxnMixGenerator { impl TransactionGenerator for PhasedTxnMixGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, num_to_create: usize, ) -> Vec { let phase = if self.txn_mix_per_phase.len() == 1 { diff --git a/crates/transaction-generator-lib/src/workflow_delegator.rs b/crates/transaction-generator-lib/src/workflow_delegator.rs index bffae2f2c41cf..439b68e056780 100644 --- a/crates/transaction-generator-lib/src/workflow_delegator.rs +++ b/crates/transaction-generator-lib/src/workflow_delegator.rs @@ -113,7 +113,7 @@ impl WorkflowTxnGenerator { impl TransactionGenerator for WorkflowTxnGenerator { fn generate_transactions( &mut self, - account: Arc>, + account: &LocalAccount, mut num_to_create: usize, ) -> Vec { assert_ne!(num_to_create, 0); diff --git a/execution/executor-benchmark/src/lib.rs b/execution/executor-benchmark/src/lib.rs index 17e7b073863a1..534321d161bcb 100644 --- a/execution/executor-benchmark/src/lib.rs +++ b/execution/executor-benchmark/src/lib.rs @@ -123,7 +123,8 @@ pub fn run_benchmark( config.storage.rocksdb_configs.enable_storage_sharding = enable_storage_sharding; let (db, executor) = init_db_and_executor::(&config); - let mut root_account = TransactionGenerator::read_root_account(genesis_key, &db); + let root_account = TransactionGenerator::read_root_account(genesis_key, &db); + let root_account = Arc::new(root_account); let transaction_generators = transaction_mix.clone().map(|transaction_mix| { let num_existing_accounts = TransactionGenerator::read_meta(&source_dir); let num_accounts_to_be_loaded = std::cmp::min( @@ -151,7 +152,7 @@ pub fn run_benchmark( let (transaction_generator_creator, phase) = init_workload::( transaction_mix, - &mut root_account, + root_account.clone(), main_signer_accounts, burner_accounts, db.clone(), @@ -185,6 +186,7 @@ pub fn run_benchmark( } } } + let root_account = Arc::into_inner(root_account).unwrap(); let mut generator = TransactionGenerator::new_with_existing_db( db.clone(), root_account, @@ -245,7 +247,7 @@ pub fn run_benchmark( fn init_workload( transaction_mix: Vec<(TransactionType, usize)>, - root_account: &mut LocalAccount, + root_account: Arc, mut main_signer_accounts: Vec, burner_accounts: Vec, db: DbReaderWriter, diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index baff55067b5d3..41b11028646ce 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2662,10 +2662,9 @@ impl Test for RestartValidator { impl NetworkTest for RestartValidator { fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - let runtime = Runtime::new()?; - runtime.block_on(async { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + ctxa.handle.clone().block_on(async { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let node = ctx.swarm().validators_mut().next().unwrap(); node.health_check().await.expect("node health check failed"); node.stop().await.unwrap(); @@ -2689,17 +2688,18 @@ impl Test for EmitTransaction { impl NetworkTest for EmitTransaction { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let duration = Duration::from_secs(10); - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration).unwrap(); - ctx.report.report_txn_stats(self.name().to_string(), &stats); - + ctx.handle.clone().block_on(async { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let duration = Duration::from_secs(10); + let all_validators = ctx + .swarm() + .validators() + .map(|v| v.peer_id()) + .collect::>(); + let stats = generate_traffic(ctx, &all_validators, duration).unwrap(); + ctx.report.report_txn_stats(self.name().to_string(), &stats); + }); Ok(()) } } @@ -2722,7 +2722,7 @@ impl Test for Delay { } impl NetworkTest for Delay { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + fn run(&self, _ctx: NetworkContextSynchronizer) -> Result<()> { info!("forge sleep {}", self.seconds); std::thread::sleep(Duration::from_secs(self.seconds)); Ok(()) @@ -2740,10 +2740,11 @@ impl Test for GatherMetrics { impl NetworkTest for GatherMetrics { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let runtime = ctx.runtime.handle(); - runtime.block_on(gather_metrics_one(ctx)); + ctx.handle.clone().block_on(async { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + gather_metrics_one(ctx).await; + }); Ok(()) } } diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index 21a42f230f838..14b73974f13c1 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -42,13 +42,13 @@ use std::{ env, str, sync::Arc, }; -use std::sync::Mutex; +// use std::sync::Mutex; use tokio::{runtime::Runtime, time::Duration}; pub struct K8sSwarm { validators: HashMap, fullnodes: HashMap, - root_account: Arc>, + root_account: Arc, kube_client: K8sClient, versions: Arc>, pub chain_id: ChainId, @@ -87,7 +87,7 @@ impl K8sSwarm { ) })?; let root_account = LocalAccount::new(address, account_key, sequence_number); - let root_account = Arc::new(Mutex::new(root_account)); + let root_account = Arc::new(root_account); let mut versions = HashMap::new(); let cur_version = Version::new(0, image_tag.to_string()); diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 4a215f19ae843..90d007161941c 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -96,7 +96,7 @@ pub struct LocalSwarm { fullnodes: HashMap, public_networks: HashMap, dir: SwarmDirectory, - root_account: Arc>, + root_account: Arc, chain_id: ChainId, root_key: ConfigKey, @@ -245,7 +245,7 @@ impl LocalSwarm { AccountKey::from_private_key(root_key.private_key()), 0, ); - let root_account = Arc::new(std::sync::Mutex::new(root_account)); + let root_account = Arc::new(root_account); Ok(LocalSwarm { node_name_counter: validators.len(), diff --git a/testsuite/forge/src/interface/aptos.rs b/testsuite/forge/src/interface/aptos.rs index a3c3290371ca7..ed3e9f15dcbdf 100644 --- a/testsuite/forge/src/interface/aptos.rs +++ b/testsuite/forge/src/interface/aptos.rs @@ -1,8 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use super::Test; use crate::{CoreContext, Result, TestReport}; use anyhow::anyhow; @@ -109,7 +108,7 @@ impl<'t> AptosContext<'t> { self.public_info.get_balance(address).await } - pub fn root_account(&mut self) -> Arc> { + pub fn root_account(&mut self) -> Arc { self.public_info.root_account.clone() } } @@ -119,7 +118,7 @@ pub struct AptosPublicInfo { inspection_service_url: Url, rest_api_url: Url, rest_client: RestClient, - root_account: Arc>, + root_account: Arc, rng: ::rand::rngs::StdRng, } @@ -128,7 +127,7 @@ impl AptosPublicInfo { chain_id: ChainId, inspection_service_url_str: String, rest_api_url_str: String, - root_account: Arc>, + root_account: Arc, ) -> Self { let rest_api_url = Url::parse(&rest_api_url_str).unwrap(); let inspection_service_url = Url::parse(&inspection_service_url_str).unwrap(); @@ -154,14 +153,14 @@ impl AptosPublicInfo { self.inspection_service_url.as_str() } - pub fn root_account(&mut self) -> Arc> { + pub fn root_account(&mut self) -> Arc { self.root_account.clone() } pub async fn create_user_account(&mut self, pubkey: &Ed25519PublicKey) -> Result<()> { let auth_key = AuthenticationKey::ed25519(pubkey); let create_account_txn = - self.root_account.lock().unwrap() + self.root_account .sign_with_transaction_builder(self.transaction_factory().payload( aptos_stdlib::aptos_account_create_account(auth_key.account_address()), )); @@ -177,7 +176,7 @@ impl AptosPublicInfo { ) -> Result { let auth_key = AuthenticationKey::any_key(pubkey.clone()); let create_account_txn = - self.root_account.lock().unwrap() + self.root_account .sign_with_transaction_builder(self.transaction_factory().payload( aptos_stdlib::aptos_account_create_account(auth_key.account_address()), )); @@ -188,7 +187,7 @@ impl AptosPublicInfo { } pub async fn mint(&mut self, addr: AccountAddress, amount: u64) -> Result<()> { - let mint_txn = self.root_account.lock().unwrap().sign_with_transaction_builder( + let mint_txn = self.root_account.sign_with_transaction_builder( self.transaction_factory() .payload(aptos_stdlib::aptos_coin_mint(addr, amount)), ); @@ -307,14 +306,14 @@ impl AptosPublicInfo { reconfig( &self.rest_client, &self.transaction_factory(), - self.root_account.lock().unwrap().deref_mut(), + self.root_account.clone(), ) .await } /// Syncs the root account to it's sequence number in the event that a faucet changed it's value pub async fn sync_root_account_sequence_number(&mut self) { - let root_address = self.root_account().lock().unwrap().address(); + let root_address = self.root_account().address(); let root_sequence_number = self .client() .get_account_bcs(root_address) @@ -322,7 +321,7 @@ impl AptosPublicInfo { .unwrap() .into_inner() .sequence_number(); - self.root_account().lock().unwrap() + self.root_account() .set_sequence_number(root_sequence_number); } } @@ -330,21 +329,23 @@ impl AptosPublicInfo { pub async fn reconfig( client: &RestClient, transaction_factory: &TransactionFactory, - root_account: &mut LocalAccount, + root_account: Arc, ) -> State { let aptos_version = client.get_aptos_version().await.unwrap(); let current = aptos_version.into_inner(); let current_version = *current.major.inner(); - let txns = vec![ - root_account.sign_with_transaction_builder(transaction_factory.clone().payload( - aptos_stdlib::version_set_for_next_epoch(current_version + 1), - )), - root_account.sign_with_transaction_builder( - transaction_factory - .clone() - .payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()), - ), - ]; + let txns = { + vec![ + root_account.sign_with_transaction_builder(transaction_factory.clone().payload( + aptos_stdlib::version_set_for_next_epoch(current_version + 1), + )), + root_account.sign_with_transaction_builder( + transaction_factory + .clone() + .payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()), + ), + ] + }; submit_and_wait_reconfig(client, txns).await } diff --git a/testsuite/forge/src/interface/chain_info.rs b/testsuite/forge/src/interface/chain_info.rs index 58c0456859a1e..61b4d114ba2ba 100644 --- a/testsuite/forge/src/interface/chain_info.rs +++ b/testsuite/forge/src/interface/chain_info.rs @@ -2,7 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use crate::AptosPublicInfo; use anyhow::Result; use aptos_rest_client::Client as RestClient; @@ -14,7 +14,7 @@ use reqwest::Url; #[derive(Debug)] pub struct ChainInfo { - pub root_account: Arc>, + pub root_account: Arc, pub rest_api_url: String, pub inspection_service_url: String, pub chain_id: ChainId, @@ -22,7 +22,7 @@ pub struct ChainInfo { impl ChainInfo { pub fn new( - root_account: Arc>, + root_account: Arc, rest_api_url: String, inspection_service_url: String, chain_id: ChainId, @@ -35,16 +35,19 @@ impl ChainInfo { } } - pub fn root_account(&mut self) -> Arc> { + pub fn root_account(&mut self) -> Arc { self.root_account.clone() } pub async fn resync_root_account_seq_num(&mut self, client: &RestClient) -> Result<()> { + let root_address = { + self.root_account.address() + }; let account = client - .get_account(self.root_account.lock().unwrap().address()) + .get_account(root_address) .await? .into_inner(); - self.root_account.lock().unwrap() + self.root_account .set_sequence_number(account.sequence_number); Ok(()) } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index b90cd80f7103f..8b7896712afad 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -2,7 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use super::Test; use crate::{ prometheus_metrics::LatencyBreakdown, @@ -23,19 +23,21 @@ pub trait NetworkTest: Test { #[derive(Clone)] pub struct NetworkContextSynchronizer<'t> { - pub ctx: Arc>>, + pub ctx: Arc>>, + pub handle: tokio::runtime::Handle, } // TODO: some useful things that don't need to hold the lock or make a copy impl<'t> NetworkContextSynchronizer<'t> { - pub fn new(ctx: NetworkContext<'t>) -> Self { + pub fn new(ctx: NetworkContext<'t>, handle: tokio::runtime::Handle) -> Self { Self{ - ctx: Arc::new(Mutex::new(ctx)), + ctx: Arc::new(tokio::sync::Mutex::new(ctx)), + handle, } } - pub fn report_text(&self, text: String) { - let mut locker = self.ctx.lock().unwrap(); + pub async fn report_text(&self, text: String) { + let mut locker = self.ctx.lock().await; locker.report.report_text(text); } } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index 42ee9ace49ff5..ec31af39b0e00 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -547,7 +547,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { let initial_version = self.initial_version(); // The genesis version should always match the initial node version let genesis_version = initial_version.clone(); - let runtime = Runtime::new().unwrap(); + let runtime = Runtime::new().unwrap(); // TODO: new multithreaded? let mut rng = ::rand::rngs::StdRng::from_seed(OsRng.gen()); let mut swarm = runtime.block_on(self.factory.launch_swarm( &mut rng, @@ -596,7 +596,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { self.tests.success_criteria.clone(), ); // let network_ctx = Arc::new(Mutex::new(network_ctx)); - let network_ctx = NetworkContextSynchronizer::new(network_ctx); + let network_ctx = NetworkContextSynchronizer::new(network_ctx, runtime.handle().clone()); let result = run_test(|| test.run(network_ctx)); report.report_text(result.to_string()); summary.handle_result(test.name().to_owned(), result)?; diff --git a/testsuite/smoke-test/src/aptos/error_report.rs b/testsuite/smoke-test/src/aptos/error_report.rs index d3bee2ad9e6ba..e9e30e7089007 100644 --- a/testsuite/smoke-test/src/aptos/error_report.rs +++ b/testsuite/smoke-test/src/aptos/error_report.rs @@ -11,7 +11,7 @@ use aptos_types::{ async fn submit_and_check_err TransactionBuilder>( local_account: &LocalAccount, - info: &mut AptosPublicInfo<'_>, + info: &mut AptosPublicInfo, f: F, expected: &str, ) { diff --git a/testsuite/smoke-test/src/aptos/move_test_helpers.rs b/testsuite/smoke-test/src/aptos/move_test_helpers.rs index 013bff5e3874b..d5d0f2aa80b9f 100644 --- a/testsuite/smoke-test/src/aptos/move_test_helpers.rs +++ b/testsuite/smoke-test/src/aptos/move_test_helpers.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; /// New style publishing via `code::publish_package` pub async fn publish_package( - info: &mut AptosPublicInfo<'_>, + info: &mut AptosPublicInfo, move_dir: PathBuf, ) -> Result { let package = BuiltPackage::build(move_dir, BuildOptions::default())?; diff --git a/testsuite/smoke-test/src/indexer.rs b/testsuite/smoke-test/src/indexer.rs index 9bb854d44df71..f07bbfb59a2ae 100644 --- a/testsuite/smoke-test/src/indexer.rs +++ b/testsuite/smoke-test/src/indexer.rs @@ -34,7 +34,7 @@ pub fn setup_indexer() -> anyhow::Result { pub async fn execute_nft_txns<'t>( creator: LocalAccount, - info: &mut AptosPublicInfo<'t>, + info: &mut AptosPublicInfo, ) -> Result<()> { let collection_name = "collection name".to_owned().into_bytes(); let token_name = "token name".to_owned().into_bytes(); diff --git a/testsuite/smoke-test/src/keyless.rs b/testsuite/smoke-test/src/keyless.rs index 41273edaad9bf..735a3af5e6c9a 100644 --- a/testsuite/smoke-test/src/keyless.rs +++ b/testsuite/smoke-test/src/keyless.rs @@ -318,7 +318,7 @@ async fn test_keyless_groth16_with_bad_tw_signature() { } async fn sign_transaction<'a>( - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, mut sig: KeylessSignature, pk: KeylessPublicKey, jwk: &RSA_JWK, @@ -477,7 +477,7 @@ async fn setup_local_net() -> ( async fn remove_training_wheels<'a>( cli: &mut CliTestFramework, - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, root_idx: usize, ) { let script = format!( @@ -642,7 +642,7 @@ async fn get_latest_jwkset(rest_client: &Client) -> PatchedJWKs { async fn rotate_vk_by_governance<'a>( cli: &mut CliTestFramework, - info: &mut AptosPublicInfo<'a>, + info: &mut AptosPublicInfo, vk: &Groth16VerificationKey, root_idx: usize, ) { diff --git a/testsuite/smoke-test/src/rosetta.rs b/testsuite/smoke-test/src/rosetta.rs index f14cc2ffc0e54..bfdc0344ef579 100644 --- a/testsuite/smoke-test/src/rosetta.rs +++ b/testsuite/smoke-test/src/rosetta.rs @@ -467,7 +467,7 @@ async fn test_account_balance() { } async fn create_staking_contract( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, operator: AccountAddress, voter: AccountAddress, @@ -491,7 +491,7 @@ async fn create_staking_contract( } async fn unlock_stake( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, operator: AccountAddress, amount: u64, @@ -509,7 +509,7 @@ async fn unlock_stake( } async fn create_delegation_pool( - info: &AptosPublicInfo<'_>, + info: &AptosPublicInfo, account: &mut LocalAccount, commission_percentage: u64, sequence_number: u64, diff --git a/testsuite/smoke-test/src/test_utils.rs b/testsuite/smoke-test/src/test_utils.rs index da2258ab9ff71..28455af82fbfe 100644 --- a/testsuite/smoke-test/src/test_utils.rs +++ b/testsuite/smoke-test/src/test_utils.rs @@ -10,6 +10,8 @@ use aptos_sdk::{ types::{transaction::SignedTransaction, LocalAccount}, }; use rand::random; +// use std::borrow::Borrow; +use std::sync::Arc; use std::time::Duration; pub const MAX_CATCH_UP_WAIT_SECS: u64 = 180; // The max time we'll wait for nodes to catch up @@ -46,7 +48,7 @@ pub async fn execute_transactions( transfer_and_maybe_reconfig( client, &transaction_factory, - swarm.chain_info().root_account, + swarm.chain_info().root_account.clone(), sender, receiver, num_transfers, @@ -115,7 +117,7 @@ pub async fn transfer_coins( pub async fn transfer_and_maybe_reconfig( client: &RestClient, transaction_factory: &TransactionFactory, - root_account: &mut LocalAccount, + root_account: Arc, sender: &mut LocalAccount, receiver: &LocalAccount, num_transfers: usize, @@ -123,7 +125,7 @@ pub async fn transfer_and_maybe_reconfig( for _ in 0..num_transfers { // Reconfigurations have a 20% chance of being executed if random::() % 5 == 0 { - reconfig(client, transaction_factory, root_account).await; + reconfig(client, transaction_factory, root_account.clone()).await; } transfer_coins(client, transaction_factory, sender, receiver, 1).await; diff --git a/testsuite/smoke-test/src/txn_emitter.rs b/testsuite/smoke-test/src/txn_emitter.rs index 3023f89c70cff..5b42558c09353 100644 --- a/testsuite/smoke-test/src/txn_emitter.rs +++ b/testsuite/smoke-test/src/txn_emitter.rs @@ -206,8 +206,9 @@ async fn test_txn_emmitter_low_funds() { mempool_backlog: 10, }); + let account_1 = Arc::new(account_1); let txn_stat = emitter - .emit_txn_for_with_stats(&account_1, emit_job_request, Duration::from_secs(10), 3) + .emit_txn_for_with_stats(account_1, emit_job_request, Duration::from_secs(10), 3) .await .unwrap(); diff --git a/testsuite/testcases/Cargo.toml b/testsuite/testcases/Cargo.toml index 225c5d96153bf..89b3e2f6e6fc0 100644 --- a/testsuite/testcases/Cargo.toml +++ b/testsuite/testcases/Cargo.toml @@ -34,6 +34,7 @@ itertools = { workspace = true } rand = { workspace = true } reqwest = { workspace = true } tokio = { workspace = true } +tokio-scoped = { workspace = true } [dev-dependencies] assert_approx_eq = { workspace = true } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index b3640f649d155..c6ac1196cf812 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -5,13 +5,13 @@ use std::ops::DerefMut; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; -use crate::{batch_update_gradually, create_emitter_and_request, generate_traffic, traffic_emitter_runtime}; +use crate::{batch_update_gradually, create_emitter_and_request, generate_traffic}; use anyhow::bail; use rand::SeedableRng; use aptos_forge::{EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, TxnEmitter, TxnStats, Version}; use aptos_logger::info; -use tokio::{runtime::Runtime, time::Duration}; -use aptos_sdk::transaction_builder::TransactionFactory; +use tokio::time::Duration; +// use aptos_sdk::transaction_builder::TransactionFactory; use aptos_sdk::types::{LocalAccount, PeerId}; pub struct SimpleValidatorUpgrade; @@ -44,12 +44,12 @@ async fn upgrade_task( async fn stat_gather_task( emitter: TxnEmitter, emit_job_request: EmitJobRequest, - source_account: Arc>, + source_account: Arc, upgrade_traffic_chunk_duration: Duration, done: Arc, ) -> Result>{ let mut upgrade_stats = vec![]; - while done.load(Ordering::Relaxed) == false { + while !done.load(Ordering::Relaxed) { let upgrading_stats = emitter.clone().emit_txn_for( source_account.clone(), emit_job_request.clone(), @@ -118,14 +118,15 @@ fn upgrade_and_gather_stats( nodes: &[PeerId], ) -> Result> { let upgrade_done = Arc::new(AtomicBool::new(false)); - let mut emitter_ctx = ctxa.clone(); + let emitter_ctx = ctxa.clone(); let mut stats_result : Result> = Ok(None); let mut upgrade_result : Result<()> = Ok(()); - std::thread::scope(|scopev| { + // std::thread::scope(|scopev| { + tokio_scoped::scope(|scopev| { // emit trafic and gather stats - scopev.spawn(|| { - let mut ctx_locker = emitter_ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + scopev.spawn(async { + let mut ctx_locker = emitter_ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); let (emitter, emit_job_request) = @@ -137,26 +138,27 @@ fn upgrade_and_gather_stats( } }; let source_account = ctx.swarm().chain_info().root_account; - let traffic_runtime = match traffic_emitter_runtime() { - Ok(x) => x, - Err(err) => { - stats_result = Err(err); - return; - } - }; + // let traffic_runtime = match traffic_emitter_runtime() { + // Ok(x) => x, + // Err(err) => { + // stats_result = Err(err); + // return; + // } + // }; let upgrade_traffic_chunk_duration = Duration::from_secs(15); - stats_result = traffic_runtime.block_on(stat_gather_task( + stats_result = stat_gather_task( emitter, emit_job_request, source_account, upgrade_traffic_chunk_duration, upgrade_done.clone(), - )); + ).await; }); // do upgrade - scopev.spawn(|| { - let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); - upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); + scopev.spawn(async { + // let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); + // upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); + upgrade_result = batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait).await; upgrade_done.store(true, Ordering::Relaxed); }); }); @@ -167,7 +169,14 @@ fn upgrade_and_gather_stats( impl NetworkTest for SimpleValidatorUpgrade { fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - let runtime = Runtime::new()?; + let handle = ctxa.handle.clone(); + handle.block_on(self.async_run(ctxa)) + } +} + +impl SimpleValidatorUpgrade { + async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { + // let runtime = Runtime::new()?; // let traffic_runtime = traffic_emitter_runtime()?; let upgrade_wait_for_healthy = true; let upgrade_node_delay = Duration::from_secs(10); @@ -177,7 +186,7 @@ impl NetworkTest for SimpleValidatorUpgrade { // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctxa.ctx.lock().unwrap().swarm().versions().collect::>(); + let mut versions = ctxa.ctx.lock().await.swarm().versions().collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -191,13 +200,13 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version, new_version ); info!("{}", msg); - ctxa.report_text(msg); + ctxa.report_text(msg).await; // Split the swarm into 2 parts - if ctxa.ctx.lock().unwrap().swarm().validators().count() < 4 { + if ctxa.ctx.lock().await.swarm().validators().count() < 4 { bail!("compat test requires >= 4 validators"); } - let all_validators = ctxa.ctx.lock().unwrap() + let all_validators = ctxa.ctx.lock().await .swarm() .validators() .map(|v| v.peer_id()) @@ -214,13 +223,13 @@ impl NetworkTest for SimpleValidatorUpgrade { old_version ); info!("{}", msg); - ctxa.report_text(msg); + ctxa.report_text(msg).await; // Generate some traffic { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let txn_stat_prior = generate_traffic(&mut ctx, &all_validators, duration)?; + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let txn_stat_prior = generate_traffic(ctx, &all_validators, duration)?; ctx.report .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); } @@ -231,7 +240,7 @@ impl NetworkTest for SimpleValidatorUpgrade { new_version ); info!("{}", msg); - ctxa.report_text(msg); + ctxa.report_text(msg).await; // runtime.block_on(batch_update_gradually(ctx.swarm(), &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; let upgrade_stats = upgrade_and_gather_stats( ctxa.clone(), @@ -244,7 +253,7 @@ impl NetworkTest for SimpleValidatorUpgrade { )?; let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); if let Some(upgrade_stats_sum) = upgrade_stats_sum { - ctxa.ctx.lock().unwrap().report.report_txn_stats( + ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::single-validator-upgrading", self.name()), &upgrade_stats_sum, ); @@ -252,9 +261,9 @@ impl NetworkTest for SimpleValidatorUpgrade { // Generate some traffic { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let txn_stat_one = generate_traffic(&mut ctx, &[first_node], duration)?; + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let txn_stat_one = generate_traffic(ctx, &[first_node], duration)?; ctx.report.report_txn_stats( format!("{}::single-validator-upgrade", self.name()), &txn_stat_one, @@ -281,18 +290,18 @@ impl NetworkTest for SimpleValidatorUpgrade { )?; let upgrade2_stats_sum = upgrade2_stats.into_iter().reduce(|a,b| &a + &b); if let Some(upgrade2_stats_sum) = upgrade2_stats_sum { - ctxa.ctx.lock().unwrap().report.report_txn_stats( + ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::half-validator-upgrading", self.name()), &upgrade2_stats_sum, ); } // runtime.block_on(batch_update_gradually(ctxa.clone(), &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_half = generate_traffic(&mut ctx, &first_batch, duration)?; + let txn_stat_half = generate_traffic(ctx, &first_batch, duration)?; ctx.report.report_txn_stats( format!("{}::half-validator-upgrade", self.name()), &txn_stat_half, @@ -316,18 +325,18 @@ impl NetworkTest for SimpleValidatorUpgrade { )?; let upgrade3_stats_sum = upgrade3_stats.into_iter().reduce(|a,b| &a + &b); if let Some(upgrade3_stats_sum) = upgrade3_stats_sum { - ctxa.ctx.lock().unwrap().report.report_txn_stats( + ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::rest-validator-upgrading", self.name()), &upgrade3_stats_sum, ); } // runtime.block_on(batch_update_gradually(ctxa.clone(), &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_all = generate_traffic(&mut ctx, &second_batch, duration)?; + let txn_stat_all = generate_traffic(ctx, &second_batch, duration)?; ctx.report.report_txn_stats( format!("{}::rest-validator-upgrade", self.name()), &txn_stat_all, diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 5838dfa96dff5..74425b7107a1d 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -52,8 +52,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { runtime.block_on(async { let root_cli_index = { - let root_account_arc = swarm.chain_info().root_account(); - let root_account = root_account_arc.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -104,8 +103,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { let initial_consensus_config = runtime.block_on(async { let root_cli_index = { - let root_account_arc = swarm.chain_info().root_account(); - let root_account = root_account_arc.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -158,8 +156,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { runtime.block_on(async { let root_cli_index = { - let root_account_arc = swarm.chain_info().root_account(); - let root_account = root_account_arc.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 54cf15a6f3632..9a71d9b46393e 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -19,18 +19,12 @@ const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; pub struct ForgeSetupTest; -impl Test for ForgeSetupTest { - fn name(&self) -> &'static str { - "verify_forge_setup" - } -} - -impl NetworkTest for ForgeSetupTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { +impl ForgeSetupTest { + async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { let mut rng = StdRng::from_seed(OsRng.gen()); let runtime = Runtime::new().unwrap(); - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let swarm = ctx.swarm(); @@ -78,7 +72,7 @@ impl NetworkTest for ForgeSetupTest { } let duration = Duration::from_secs(10 * num_pfns); - let txn_stat = generate_traffic(&mut ctx, &pfns, duration)?; + let txn_stat = generate_traffic(ctx, &pfns, duration)?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); @@ -86,3 +80,15 @@ impl NetworkTest for ForgeSetupTest { Ok(()) } } + +impl Test for ForgeSetupTest { + fn name(&self) -> &'static str { + "verify_forge_setup" + } +} + +impl NetworkTest for ForgeSetupTest { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + ctx.handle.clone().block_on(self.async_run(ctx)) + } +} diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index 6901010a43a57..4b7596affb5a5 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -16,18 +16,10 @@ pub struct FrameworkUpgrade; impl FrameworkUpgrade { pub const EPOCH_DURATION_SECS: u64 = 10; -} - -impl Test for FrameworkUpgrade { - fn name(&self) -> &'static str { - "framework_upgrade::framework-upgrade" - } -} -impl NetworkTest for FrameworkUpgrade { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); @@ -61,11 +53,11 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(&mut ctx, first_half, &new_version))?; + runtime.block_on(batch_update(ctx, first_half, &new_version))?; // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -116,7 +108,7 @@ impl NetworkTest for FrameworkUpgrade { ))?; // Update the sequence number for the root account - let root_account = ctx.swarm().chain_info().root_account().lock().unwrap().address(); + let root_account = ctx.swarm().chain_info().root_account().address(); // Test the module publishing workflow let sequence_number = runtime .block_on( @@ -131,11 +123,11 @@ impl NetworkTest for FrameworkUpgrade { ctx.swarm() .chain_info() .root_account() - .lock().unwrap().set_sequence_number(sequence_number); + .set_sequence_number(sequence_number); // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -157,10 +149,10 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the remaining nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(&mut ctx, second_half, &new_version))?; + runtime.block_on(batch_update(ctx, second_half, &new_version))?; let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(&mut ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration)?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -171,3 +163,15 @@ impl NetworkTest for FrameworkUpgrade { Ok(()) } } + +impl Test for FrameworkUpgrade { + fn name(&self) -> &'static str { + "framework_upgrade::framework-upgrade" + } +} + +impl NetworkTest for FrameworkUpgrade { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + ctx.handle.clone().block_on(self.async_run(ctx)) + } +} diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 9e0ca5ed5279c..99049bdbe5ca6 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -75,17 +75,17 @@ async fn batch_update_gradually( ) -> Result<()> { // let mut swarm = ctx.swarm(); for validator in validators_to_update { - ctxa.ctx.lock().unwrap().swarm().upgrade_validator(*validator, version).await?; + ctxa.ctx.lock().await.swarm().upgrade_validator(*validator, version).await?; if wait_until_healthy { let deadline = Instant::now() + max_wait; - ctxa.ctx.lock().unwrap().swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; + ctxa.ctx.lock().await.swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; } if !delay.is_zero() { tokio::time::sleep(delay).await; } } - ctxa.ctx.lock().unwrap().swarm().health_check().await?; + ctxa.ctx.lock().await.swarm().health_check().await?; Ok(()) } @@ -214,87 +214,90 @@ pub trait NetworkLoadTest: Test { } } -impl NetworkTest for dyn NetworkLoadTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let runtime = Runtime::new().unwrap(); - let start_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - let (start_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) - .context("no clients replied for start version")?; - let emit_job_request = ctx.emit_job.clone(); - let rng = SeedableRng::from_rng(ctx.core().rng())?; - let duration = ctx.global_duration; - let stats_by_phase = self.network_load_test( - &mut ctx, - emit_job_request, - duration, - WARMUP_DURATION_FRACTION, - COOLDOWN_DURATION_FRACTION, - rng, - )?; - - let phased = stats_by_phase.len() > 1; - for (phase, phase_stats) in stats_by_phase.iter().enumerate() { - let test_name = if phased { - format!("{}_phase_{}", self.name(), phase) - } else { - self.name().to_string() - }; - ctx.report - .report_txn_stats(test_name, &phase_stats.emitter_stats); - ctx.report.report_text(format!( - "Latency breakdown for phase {}: {:?}", - phase, - phase_stats - .latency_breakdown - .keys() - .into_iter() - .map(|slice| { - let slice_samples = phase_stats.latency_breakdown.get_samples(&slice); - format!( - "{:?}: max: {:.3}, avg: {:.3}", - slice, - slice_samples.max_sample(), - slice_samples.avg_sample() - ) - }) - .collect::>() - )); - } +async fn async_run_network_load_test(nlt: &dyn NetworkLoadTest, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let runtime = Runtime::new().unwrap(); + let start_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let (start_version, _) = runtime + .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + .context("no clients replied for start version")?; + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng())?; + let duration = ctx.global_duration; + let stats_by_phase = nlt.network_load_test( + ctx, + emit_job_request, + duration, + WARMUP_DURATION_FRACTION, + COOLDOWN_DURATION_FRACTION, + rng, + )?; + + let phased = stats_by_phase.len() > 1; + for (phase, phase_stats) in stats_by_phase.iter().enumerate() { + let test_name = if phased { + format!("{}_phase_{}", nlt.name(), phase) + } else { + nlt.name().to_string() + }; + ctx.report + .report_txn_stats(test_name, &phase_stats.emitter_stats); + ctx.report.report_text(format!( + "Latency breakdown for phase {}: {:?}", + phase, + phase_stats + .latency_breakdown + .keys() + .into_iter() + .map(|slice| { + let slice_samples = phase_stats.latency_breakdown.get_samples(&slice); + format!( + "{:?}: max: {:.3}, avg: {:.3}", + slice, + slice_samples.max_sample(), + slice_samples.avg_sample() + ) + }) + .collect::>() + )); + } - let end_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - let (end_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) - .context("no clients replied for end version")?; - - self.finish(&mut ctx).context("finish NetworkLoadTest ")?; - - for phase_stats in stats_by_phase.into_iter() { - ctx.check_for_success( - &phase_stats.emitter_stats, - phase_stats.actual_duration, - &phase_stats.latency_breakdown, - start_timestamp as i64, - end_timestamp as i64, - start_version, - end_version, - ) + let end_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let (end_version, _) = runtime + .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + .context("no clients replied for end version")?; + + nlt.finish(ctx).context("finish NetworkLoadTest ")?; + + for phase_stats in stats_by_phase.into_iter() { + ctx.check_for_success( + &phase_stats.emitter_stats, + phase_stats.actual_duration, + &phase_stats.latency_breakdown, + start_timestamp as i64, + end_timestamp as i64, + start_version, + end_version, + ) .context("check for success")?; - } + } - Ok(()) + Ok(())} + +impl NetworkTest for dyn NetworkLoadTest { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + ctx.handle.clone().block_on(async_run_network_load_test(self, ctx)) } } -impl dyn NetworkLoadTest { +impl dyn NetworkLoadTest + '_ { pub fn network_load_test( &self, ctx: &mut NetworkContext, @@ -541,29 +544,33 @@ impl CompositeNetworkTest { test: Box::new(test), } } -} -impl NetworkTest for CompositeNetworkTest { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); for wrapper in &self.wrappers { - wrapper.setup(&mut ctx)?; + wrapper.setup(ctx)?; } } self.test.run(ctxa.clone())?; { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); for wrapper in &self.wrappers { - wrapper.finish(&mut ctx)?; + wrapper.finish(ctx)?; } } Ok(()) } } +impl NetworkTest for CompositeNetworkTest { + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + ctxa.handle.clone().block_on(self.async_run(ctxa)) + } +} + impl Test for CompositeNetworkTest { fn name(&self) -> &'static str { "CompositeNetworkTest" diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 1f69b1b469234..665ad6cd8b9a1 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -207,10 +207,8 @@ impl LoadVsPerfBenchmark { Ok(result) } -} -impl NetworkTest for LoadVsPerfBenchmark { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { assert!( self.criteria.is_empty() || self.criteria.len() == self.workloads.len(), "Invalid config, {} criteria and {} workloads given", @@ -218,8 +216,8 @@ impl NetworkTest for LoadVsPerfBenchmark { self.workloads.len(), ); - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let rt = Runtime::new().unwrap(); let mut continous_job = if let Some(continuous_traffic) = &self.continuous_traffic { @@ -232,7 +230,7 @@ impl NetworkTest for LoadVsPerfBenchmark { &nodes_to_send_load_to, rng, ) - .context("create emitter")?; + .context("create emitter")?; let job = rt .block_on(emitter.start_job( @@ -262,7 +260,7 @@ impl NetworkTest for LoadVsPerfBenchmark { info!("Starting for {:?}", self.workloads); results.push( self.evaluate_single( - &mut ctx, + ctx, &self.workloads, index, phase_duration @@ -345,6 +343,12 @@ impl NetworkTest for LoadVsPerfBenchmark { } } +impl NetworkTest for LoadVsPerfBenchmark { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + ctx.handle.clone().block_on(self.async_run(ctx)) + } +} + fn to_table(type_name: String, results: &[Vec]) -> Vec { let mut table = Vec::new(); table.push(format!( diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index d90841042e96c..6e2d2afff97a2 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -16,10 +16,10 @@ impl Test for PartialNodesDown { } } -impl NetworkTest for PartialNodesDown { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); +impl PartialNodesDown { + async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; let duration = Duration::from_secs(120); let all_validators = ctx @@ -37,7 +37,7 @@ impl NetworkTest for PartialNodesDown { thread::sleep(Duration::from_secs(5)); // Generate some traffic - let txn_stat = generate_traffic(&mut ctx, &up_nodes, duration)?; + let txn_stat = generate_traffic(ctx, &up_nodes, duration)?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { @@ -49,3 +49,9 @@ impl NetworkTest for PartialNodesDown { Ok(()) } } + +impl NetworkTest for PartialNodesDown { + fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + ctx.handle.clone().block_on(self.async_run(ctx)) + } +} diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index f8ab04dc62a69..d6338871de9ce 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -50,8 +50,7 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { runtime.block_on(async { let root_cli_index = { - let root_account_arc = swarm.chain_info().root_account(); - let root_account = root_account_arc.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), diff --git a/testsuite/testcases/src/reconfiguration_test.rs b/testsuite/testcases/src/reconfiguration_test.rs index ece02cd15d891..7b45e50504f59 100644 --- a/testsuite/testcases/src/reconfiguration_test.rs +++ b/testsuite/testcases/src/reconfiguration_test.rs @@ -14,7 +14,7 @@ impl Test for ReconfigurationTest { } impl NetworkTest for ReconfigurationTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { + fn run(&self, _ctx: NetworkContextSynchronizer) -> Result<()> { Err(anyhow!("Not supported in aptos-framework yet")) } // TODO(https://github.com/aptos-labs/aptos-core/issues/317): add back after support those transactions in aptos-framework diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 6556ffa8638b6..c545f95af6cbe 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -19,6 +19,24 @@ const NUM_STATE_VALUE_COUNTER_NAME: &str = "aptos_jellyfish_leaf_count"; // The /// In the test, all fullnodes are wiped, restarted and timed to synchronize. pub struct StateSyncFullnodePerformance; +impl StateSyncFullnodePerformance { + async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + + // Emit a lot of traffic and ensure the fullnodes can all sync + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + + // Stop and reset the fullnodes so they start syncing from genesis + stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + + // Wait for all nodes to catch up to the highest synced version + // then calculate and display the throughput results. + ensure_state_sync_transaction_throughput(ctx, self.name()) + } +} + impl Test for StateSyncFullnodePerformance { fn name(&self) -> &'static str { "StateSyncFullnodePerformance" @@ -27,19 +45,7 @@ impl Test for StateSyncFullnodePerformance { impl NetworkTest for StateSyncFullnodePerformance { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let all_fullnodes = get_fullnodes_and_check_setup(&mut ctx, self.name())?; - - // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_fullnodes)?; - - // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(&mut ctx, &all_fullnodes, &[])?; - - // Wait for all nodes to catch up to the highest synced version - // then calculate and display the throughput results. - ensure_state_sync_transaction_throughput(&mut ctx, self.name()) + ctx.handle.clone().block_on(self.async_run(ctx)) } } @@ -47,20 +53,14 @@ impl NetworkTest for StateSyncFullnodePerformance { /// In the test, all fullnodes are wiped, restarted and timed to synchronize. pub struct StateSyncFullnodeFastSyncPerformance; -impl Test for StateSyncFullnodeFastSyncPerformance { - fn name(&self) -> &'static str { - "StateSyncFullnodeFastSyncPerformance" - } -} - -impl NetworkTest for StateSyncFullnodeFastSyncPerformance { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - let all_fullnodes = get_fullnodes_and_check_setup(&mut ctx, self.name())?; +impl StateSyncFullnodeFastSyncPerformance { + async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; // Wait for an epoch change to ensure fast sync can download all the latest states info!("Waiting for an epoch change."); @@ -105,12 +105,12 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { ); // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(&mut ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; // Wait for all nodes to catch up to the highest synced epoch // then calculate and display the throughput results. display_state_sync_state_throughput( - &mut ctx, + ctx, self.name(), highest_synced_epoch, number_of_state_values, @@ -121,20 +121,26 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { } } -/// A state sync performance test that measures validator sync performance. -/// In the test, 2 validators are wiped, restarted and timed to synchronize. -pub struct StateSyncValidatorPerformance; - -impl Test for StateSyncValidatorPerformance { +impl Test for StateSyncFullnodeFastSyncPerformance { fn name(&self) -> &'static str { - "StateSyncValidatorPerformance" + "StateSyncFullnodeFastSyncPerformance" } } -impl NetworkTest for StateSyncValidatorPerformance { +impl NetworkTest for StateSyncFullnodeFastSyncPerformance { fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + ctxa.handle.clone().block_on(self.async_run(ctxa)) + } +} + +/// A state sync performance test that measures validator sync performance. +/// In the test, 2 validators are wiped, restarted and timed to synchronize. +pub struct StateSyncValidatorPerformance; + +impl StateSyncValidatorPerformance { + async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can kill 2 validators but still make progress. let all_validators = ctx @@ -159,16 +165,28 @@ impl NetworkTest for StateSyncValidatorPerformance { ); // Generate some traffic through the validators. - emit_traffic_and_ensure_bounded_sync(&mut ctx, &all_validators)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_validators)?; // Stop and reset two validators so they start syncing from genesis info!("Deleting data for two validators!"); let validators_to_reset = &all_validators[0..2]; - stop_and_reset_nodes(&mut ctx, &[], validators_to_reset)?; + stop_and_reset_nodes(ctx, &[], validators_to_reset)?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. - ensure_state_sync_transaction_throughput(&mut ctx, self.name()) + ensure_state_sync_transaction_throughput(ctx, self.name()) + } +} + +impl Test for StateSyncValidatorPerformance { + fn name(&self) -> &'static str { + "StateSyncValidatorPerformance" + } +} + +impl NetworkTest for StateSyncValidatorPerformance { + fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { + ctxa.handle.clone().block_on(self.async_run(ctxa)) } } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 64f93ad453095..18500dd42ba5d 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -11,19 +11,11 @@ use tokio::runtime::Runtime; pub struct TwinValidatorTest; -impl Test for TwinValidatorTest { - fn name(&self) -> &'static str { - "twin validator" - } -} - -impl NetworkLoadTest for TwinValidatorTest {} - -impl NetworkTest for TwinValidatorTest { - fn run(&self, ctxa: NetworkContextSynchronizer) -> anyhow::Result<()> { +impl TwinValidatorTest { + async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> anyhow::Result<()> { { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); + let mut ctx_locker = ctxa.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new().unwrap(); let all_validators_ids = ctx @@ -77,3 +69,17 @@ impl NetworkTest for TwinValidatorTest { ::run(self, ctxa) } } + +impl Test for TwinValidatorTest { + fn name(&self) -> &'static str { + "twin validator" + } +} + +impl NetworkLoadTest for TwinValidatorTest {} + +impl NetworkTest for TwinValidatorTest { + fn run(&self, ctxa: NetworkContextSynchronizer) -> anyhow::Result<()> { + ctxa.handle.clone().block_on(self.async_run(ctxa)) + } +} diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index 735b465fa0554..b6ded09434dfd 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; +// use std::ops::DerefMut; use crate::{LoadDestination, NetworkLoadTest}; use aptos::{account::create::DEFAULT_FUNDED_COINS, test::CliTestFramework}; use aptos_forge::{reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, FORGE_KEY_SEED, NetworkContextSynchronizer}; @@ -129,23 +129,21 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { .await .unwrap(); - let root_account_arcmutex = swarm.chain_info().root_account(); - let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); reconfig( &rest_client, &transaction_factory, - root_account_locker.deref_mut(), + root_account, ) .await; } { - let root_account_arcmutex = swarm.chain_info().root_account(); - let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); reconfig( &rest_client, &transaction_factory, - root_account_locker.deref_mut(), + root_account, ) .await; } @@ -160,23 +158,21 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { cli.join_validator_set(*operator_index, None).await.unwrap(); - let root_account_arcmutex = swarm.chain_info().root_account(); - let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); reconfig( &rest_client, &transaction_factory, - root_account_locker.deref_mut(), + root_account, ) .await; } { - let root_account_arcmutex = swarm.chain_info().root_account(); - let mut root_account_locker = root_account_arcmutex.lock().unwrap(); + let root_account = swarm.chain_info().root_account(); reconfig( &rest_client, &transaction_factory, - root_account_locker.deref_mut(), + root_account, ) .await; } From 59808498675b4a835066579c408cc56f8c61339e Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 3 Jun 2024 16:38:19 -0400 Subject: [PATCH 06/28] try not to create a tokio runtime within a tokio runtime --- Cargo.lock | 1 + testsuite/forge-cli/src/main.rs | 6 ++++-- testsuite/forge/Cargo.toml | 1 + testsuite/forge/src/interface/network.rs | 2 +- testsuite/testcases/src/compatibility_test.rs | 8 ++++---- testsuite/testcases/src/forge_setup_test.rs | 2 +- testsuite/testcases/src/framework_upgrade.rs | 6 +++--- testsuite/testcases/src/lib.rs | 9 +++++++-- testsuite/testcases/src/partial_nodes_down_test.rs | 2 +- testsuite/testcases/src/state_sync_performance.rs | 2 +- 10 files changed, 24 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8c6404d7d846..23888f214708b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1647,6 +1647,7 @@ dependencies = [ "aptos-logger", "aptos-rest-client", "aptos-retrier", + "aptos-runtimes", "aptos-sdk", "aptos-short-hex-str", "aptos-state-sync-driver", diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 41b11028646ce..a2083dcf94198 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2688,7 +2688,9 @@ impl Test for EmitTransaction { impl NetworkTest for EmitTransaction { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(async { + let handle = ctx.handle.clone(); + let traffic_handle = ctx.handle.clone(); + handle.block_on(async { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let duration = Duration::from_secs(10); @@ -2697,7 +2699,7 @@ impl NetworkTest for EmitTransaction { .validators() .map(|v| v.peer_id()) .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration).unwrap(); + let stats = generate_traffic(ctx, &all_validators, duration, Some(traffic_handle)).unwrap(); ctx.report.report_txn_stats(self.name().to_string(), &stats); }); Ok(()) diff --git a/testsuite/forge/Cargo.toml b/testsuite/forge/Cargo.toml index 8e18c496f73e5..caa3c1f34ba78 100644 --- a/testsuite/forge/Cargo.toml +++ b/testsuite/forge/Cargo.toml @@ -28,6 +28,7 @@ aptos-inspection-service = { workspace = true } aptos-logger = { workspace = true } aptos-rest-client = { workspace = true } aptos-retrier = { workspace = true } +aptos-runtimes = { workspace = true } aptos-sdk = { workspace = true } aptos-short-hex-str = { workspace = true } aptos-state-sync-driver = { workspace = true } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 8b7896712afad..669843ce3f3d0 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -68,7 +68,7 @@ impl<'t> NetworkContext<'t> { global_duration, emit_job, success_criteria, - runtime: Runtime::new().unwrap(), + runtime: aptos_runtimes::spawn_named_runtime("emitter".into(), Some(64)), } } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index c6ac1196cf812..2705b1c313c66 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -229,7 +229,7 @@ impl SimpleValidatorUpgrade { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let txn_stat_prior = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat_prior = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; ctx.report .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); } @@ -263,7 +263,7 @@ impl SimpleValidatorUpgrade { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let txn_stat_one = generate_traffic(ctx, &[first_node], duration)?; + let txn_stat_one = generate_traffic(ctx, &[first_node], duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::single-validator-upgrade", self.name()), &txn_stat_one, @@ -301,7 +301,7 @@ impl SimpleValidatorUpgrade { let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_half = generate_traffic(ctx, &first_batch, duration)?; + let txn_stat_half = generate_traffic(ctx, &first_batch, duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::half-validator-upgrade", self.name()), &txn_stat_half, @@ -336,7 +336,7 @@ impl SimpleValidatorUpgrade { let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_all = generate_traffic(ctx, &second_batch, duration)?; + let txn_stat_all = generate_traffic(ctx, &second_batch, duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::rest-validator-upgrade", self.name()), &txn_stat_all, diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 9a71d9b46393e..042058341c7d8 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -72,7 +72,7 @@ impl ForgeSetupTest { } let duration = Duration::from_secs(10 * num_pfns); - let txn_stat = generate_traffic(ctx, &pfns, duration)?; + let txn_stat = generate_traffic(ctx, &pfns, duration, Some(ctx.runtime.handle().clone()))?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index 4b7596affb5a5..891f56b67955f 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -57,7 +57,7 @@ impl FrameworkUpgrade { // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -127,7 +127,7 @@ impl FrameworkUpgrade { // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -152,7 +152,7 @@ impl FrameworkUpgrade { runtime.block_on(batch_update(ctx, second_half, &new_version))?; let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration)?; + let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 99049bdbe5ca6..154b740ca5f80 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -38,7 +38,7 @@ use std::{ time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use std::ops::DerefMut; -use tokio::runtime::Runtime; +use tokio::runtime::{Handle, Runtime}; const WARMUP_DURATION_FRACTION: f32 = 0.07; const COOLDOWN_DURATION_FRACTION: f32 = 0.04; @@ -117,13 +117,18 @@ pub fn generate_traffic( ctx: &mut NetworkContext<'_>, nodes: &[PeerId], duration: Duration, + rt: Option, ) -> Result { let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let (emitter, emit_job_request) = create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; - let rt = traffic_emitter_runtime()?; + let rt = match rt { + Some(x) => x, + None => traffic_emitter_runtime()?.handle().clone() + }; + // let rt = traffic_emitter_runtime()?; let stats = rt.block_on(emitter.emit_txn_for( ctx.swarm().chain_info().root_account, emit_job_request, diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index 6e2d2afff97a2..e370a2e46dcf2 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -37,7 +37,7 @@ impl PartialNodesDown { thread::sleep(Duration::from_secs(5)); // Generate some traffic - let txn_stat = generate_traffic(ctx, &up_nodes, duration)?; + let txn_stat = generate_traffic(ctx, &up_nodes, duration, Some(runtime.handle().clone()))?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index c545f95af6cbe..90abb3803807d 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -233,7 +233,7 @@ fn emit_traffic_and_ensure_bounded_sync( "Generating the initial traffic for {:?} seconds.", emit_txn_duration.as_secs() ); - let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration)?; + let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration, Some(ctx.runtime.handle().clone()))?; // Wait for all nodes to synchronize. We time bound this to ensure // nodes don't fall too far behind. From 835735b753e36ae72e28a8cca1ac556b83e4e76e Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 3 Jun 2024 22:59:18 -0400 Subject: [PATCH 07/28] async fn generate_traffic() --- testsuite/forge-cli/src/main.rs | 3 +-- testsuite/testcases/src/compatibility_test.rs | 8 ++++---- testsuite/testcases/src/forge_setup_test.rs | 2 +- testsuite/testcases/src/framework_upgrade.rs | 6 +++--- testsuite/testcases/src/lib.rs | 14 ++++---------- testsuite/testcases/src/partial_nodes_down_test.rs | 2 +- testsuite/testcases/src/state_sync_performance.rs | 3 ++- 7 files changed, 16 insertions(+), 22 deletions(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index a2083dcf94198..34ed83b84999c 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2689,7 +2689,6 @@ impl Test for EmitTransaction { impl NetworkTest for EmitTransaction { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { let handle = ctx.handle.clone(); - let traffic_handle = ctx.handle.clone(); handle.block_on(async { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); @@ -2699,7 +2698,7 @@ impl NetworkTest for EmitTransaction { .validators() .map(|v| v.peer_id()) .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration, Some(traffic_handle)).unwrap(); + let stats = generate_traffic(ctx, &all_validators, duration).await.unwrap(); ctx.report.report_txn_stats(self.name().to_string(), &stats); }); Ok(()) diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 2705b1c313c66..59eef0d319219 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -229,7 +229,7 @@ impl SimpleValidatorUpgrade { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let txn_stat_prior = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat_prior = generate_traffic(ctx, &all_validators, duration).await?; ctx.report .report_txn_stats(format!("{}::liveness-check", self.name()), &txn_stat_prior); } @@ -263,7 +263,7 @@ impl SimpleValidatorUpgrade { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let txn_stat_one = generate_traffic(ctx, &[first_node], duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat_one = generate_traffic(ctx, &[first_node], duration).await?; ctx.report.report_txn_stats( format!("{}::single-validator-upgrade", self.name()), &txn_stat_one, @@ -301,7 +301,7 @@ impl SimpleValidatorUpgrade { let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_half = generate_traffic(ctx, &first_batch, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat_half = generate_traffic(ctx, &first_batch, duration).await?; ctx.report.report_txn_stats( format!("{}::half-validator-upgrade", self.name()), &txn_stat_half, @@ -336,7 +336,7 @@ impl SimpleValidatorUpgrade { let ctx = ctx_locker.deref_mut(); // Generate some traffic - let txn_stat_all = generate_traffic(ctx, &second_batch, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat_all = generate_traffic(ctx, &second_batch, duration).await?; ctx.report.report_txn_stats( format!("{}::rest-validator-upgrade", self.name()), &txn_stat_all, diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 042058341c7d8..14c374eb3dcdf 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -72,7 +72,7 @@ impl ForgeSetupTest { } let duration = Duration::from_secs(10 * num_pfns); - let txn_stat = generate_traffic(ctx, &pfns, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat = generate_traffic(ctx, &pfns, duration).await?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index 891f56b67955f..0fc17a3cd6982 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -57,7 +57,7 @@ impl FrameworkUpgrade { // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -127,7 +127,7 @@ impl FrameworkUpgrade { // Generate some traffic let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, @@ -152,7 +152,7 @@ impl FrameworkUpgrade { runtime.block_on(batch_update(ctx, second_half, &new_version))?; let duration = Duration::from_secs(30); - let txn_stat = generate_traffic(ctx, &all_validators, duration, Some(ctx.runtime.handle().clone()))?; + let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; ctx.report.report_txn_stats( format!("{}::full-framework-upgrade", self.name()), &txn_stat, diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 154b740ca5f80..230222a11204b 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -38,7 +38,7 @@ use std::{ time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use std::ops::DerefMut; -use tokio::runtime::{Handle, Runtime}; +use tokio::runtime::Runtime; const WARMUP_DURATION_FRACTION: f32 = 0.07; const COOLDOWN_DURATION_FRACTION: f32 = 0.04; @@ -113,27 +113,21 @@ pub fn traffic_emitter_runtime() -> Result { Ok(runtime) } -pub fn generate_traffic( +pub async fn generate_traffic( ctx: &mut NetworkContext<'_>, nodes: &[PeerId], duration: Duration, - rt: Option, ) -> Result { let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let (emitter, emit_job_request) = create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; - let rt = match rt { - Some(x) => x, - None => traffic_emitter_runtime()?.handle().clone() - }; - // let rt = traffic_emitter_runtime()?; - let stats = rt.block_on(emitter.emit_txn_for( + let stats = emitter.emit_txn_for( ctx.swarm().chain_info().root_account, emit_job_request, duration, - ))?; + ).await?; Ok(stats) } diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index e370a2e46dcf2..ff93c7abbb0c6 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -37,7 +37,7 @@ impl PartialNodesDown { thread::sleep(Duration::from_secs(5)); // Generate some traffic - let txn_stat = generate_traffic(ctx, &up_nodes, duration, Some(runtime.handle().clone()))?; + let txn_stat = generate_traffic(ctx, &up_nodes, duration).await?; ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 90abb3803807d..15aebf5f56c4e 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -233,7 +233,8 @@ fn emit_traffic_and_ensure_bounded_sync( "Generating the initial traffic for {:?} seconds.", emit_txn_duration.as_secs() ); - let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration, Some(ctx.runtime.handle().clone()))?; + let handle = ctx.runtime.handle().clone(); + let _txn_stat = handle.block_on(generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration))?; // Wait for all nodes to synchronize. We time bound this to ensure // nodes don't fall too far behind. From dded3545deb12e1a8342adf6aabb26279ce1855f Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 4 Jun 2024 15:16:28 -0400 Subject: [PATCH 08/28] tweak scope-drop stuff --- testsuite/forge/src/runner.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index ec31af39b0e00..e31246fe0db99 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -595,9 +595,14 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { self.tests.emit_job_request.clone(), self.tests.success_criteria.clone(), ); - // let network_ctx = Arc::new(Mutex::new(network_ctx)); - let network_ctx = NetworkContextSynchronizer::new(network_ctx, runtime.handle().clone()); - let result = run_test(|| test.run(network_ctx)); + let handle = network_ctx.runtime.handle().clone(); + let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle); + let result = run_test(|| test.run(network_ctx.clone())); + // explicitly keep network context in scope so that its created tokio Runtime drops after all the stuff has run. + let NetworkContextSynchronizer{ctx, handle} = network_ctx; + drop(handle); + let ctx = Arc::into_inner(ctx).unwrap().into_inner(); + drop(ctx); report.report_text(result.to_string()); summary.handle_result(test.name().to_owned(), result)?; } From 323d334d60212afe2a92c8f4856bef5588be4632 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 4 Jun 2024 17:08:17 -0400 Subject: [PATCH 09/28] another try at not recursively breaking tokio runtimes --- testsuite/forge/src/interface/network.rs | 39 ++++++++++++++++++- testsuite/forge/src/runner.rs | 1 + .../src/multi_region_network_test.rs | 8 ++-- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 669843ce3f3d0..6a822e19470ee 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -2,6 +2,7 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 +use std::future::Future; use std::sync::Arc; use super::Test; use crate::{ @@ -11,7 +12,7 @@ use crate::{ }; use aptos_transaction_emitter_lib::{EmitJobRequest, TxnStats}; use std::time::Duration; -use tokio::runtime::Runtime; +use tokio::runtime::{Handle, Runtime}; /// The testing interface which defines a test written with full control over an existing network. /// Tests written against this interface will have access to both the Root account as well as the @@ -40,6 +41,18 @@ impl<'t> NetworkContextSynchronizer<'t> { let mut locker = self.ctx.lock().await; locker.report.report_text(text); } + + pub fn flex_block_on(&self, future: F) -> F::Output { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle.block_on(future) + } + Err(_) => { + self.handle.block_on(future) + } + } + } } pub struct NetworkContext<'t> { @@ -104,4 +117,28 @@ impl<'t> NetworkContext<'t> { end_version, )) } + + pub fn handle(&self) -> Handle { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle + } + Err(_) => { + self.runtime.handle().clone() + } + } + } + + pub fn flex_block_on(&self, future: F) -> F::Output { + match Handle::try_current() { + Ok(handle) => { + // we are in an async context, we don't need block_on + handle.block_on(future) + } + Err(_) => { + self.runtime.block_on(future) + } + } + } } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index e31246fe0db99..53fded0a841ec 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -596,6 +596,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { self.tests.success_criteria.clone(), ); let handle = network_ctx.runtime.handle().clone(); + let _handle_context = handle.enter(); let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle); let result = run_test(|| test.run(network_ctx.clone())); // explicitly keep network context in scope so that its created tokio Runtime drops after all the stuff has run. diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 5af41794699f0..5497523d23b36 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -312,16 +312,16 @@ pub fn create_multi_region_swarm_network_chaos( impl NetworkLoadTest for MultiRegionNetworkEmulationTest { fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { let chaos = self.create_netem_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)))?; + let handle = ctx.handle(); + handle.block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)))?; Ok(LoadDestination::FullnodesOtherwiseValidators) } fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { let chaos = self.create_netem_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)))?; + let handle = ctx.handle(); + handle.block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)))?; Ok(()) } } From 8b1ceda6dc6f23861f53d423029d6be47d4cbae0 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 4 Jun 2024 17:10:25 -0400 Subject: [PATCH 10/28] fmt --- .../src/emitter/account_minter.rs | 24 ++--- .../src/emitter/mod.rs | 4 +- .../src/emitter/submission_worker.rs | 31 ++++--- .../transaction-emitter-lib/src/wrappers.rs | 6 +- .../src/accounts_pool_wrapper.rs | 5 +- .../src/call_custom_modules.rs | 3 +- .../src/entry_points.rs | 3 +- crates/transaction-generator-lib/src/lib.rs | 2 +- testsuite/forge-cli/src/main.rs | 6 +- testsuite/forge/src/interface/aptos.rs | 2 +- testsuite/forge/src/interface/chain_info.rs | 11 +-- testsuite/forge/src/interface/network.rs | 24 ++--- testsuite/forge/src/runner.rs | 2 +- testsuite/smoke-test/src/indexer.rs | 5 +- testsuite/testcases/src/compatibility_test.rs | 91 ++++++++++++------- .../src/consensus_reliability_tests.rs | 10 +- testsuite/testcases/src/forge_setup_test.rs | 3 +- testsuite/testcases/src/framework_upgrade.rs | 7 +- .../src/fullnode_reboot_stress_test.rs | 4 +- testsuite/testcases/src/lib.rs | 56 ++++++++---- .../testcases/src/load_vs_perf_benchmark.rs | 13 ++- testsuite/testcases/src/modifiers.rs | 5 +- .../src/multi_region_network_test.rs | 5 +- .../testcases/src/network_bandwidth_test.rs | 5 +- testsuite/testcases/src/network_loss_test.rs | 4 +- .../testcases/src/network_partition_test.rs | 5 +- .../testcases/src/partial_nodes_down_test.rs | 3 +- .../src/public_fullnode_performance.rs | 5 +- .../testcases/src/state_sync_performance.rs | 14 ++- .../src/three_region_simulation_test.rs | 5 +- .../testcases/src/twin_validator_test.rs | 6 +- testsuite/testcases/src/two_traffics_test.rs | 5 +- .../src/validator_join_leave_test.rs | 33 ++----- 33 files changed, 233 insertions(+), 174 deletions(-) diff --git a/crates/transaction-emitter-lib/src/emitter/account_minter.rs b/crates/transaction-emitter-lib/src/emitter/account_minter.rs index 6001903c4b6ae..6b41ddb4cb7cd 100644 --- a/crates/transaction-emitter-lib/src/emitter/account_minter.rs +++ b/crates/transaction-emitter-lib/src/emitter/account_minter.rs @@ -18,6 +18,7 @@ use aptos_sdk::{ use aptos_transaction_generator_lib::{ CounterState, ReliableTransactionSubmitter, RootAccountHandle, SEND_AMOUNT, }; +use aptos_types::account_address::AccountAddress; use core::{ cmp::min, result::Result::{Err, Ok}, @@ -29,7 +30,6 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use aptos_types::account_address::AccountAddress; pub struct SourceAccountManager<'t> { pub source_account: Arc, @@ -413,12 +413,8 @@ impl<'t> AccountMinter<'t> { let mut i = 0; let mut seed_accounts = vec![]; let source_account = match new_source_account { - None => { - self.source_account.get_root_account().clone() - }, - Some(param_account) => { - Arc::new(param_account) - }, + None => self.source_account.get_root_account().clone(), + Some(param_account) => Arc::new(param_account), }; while i < seed_account_num { let batch_size = min(max_submit_batch_size, seed_account_num - i); @@ -431,11 +427,11 @@ impl<'t> AccountMinter<'t> { .iter() .map(|account| { create_and_fund_account_request( - source_account.clone(), - coins_per_seed_account, - account.public_key(), - txn_factory, - ) + source_account.clone(), + coins_per_seed_account, + account.public_key(), + txn_factory, + ) }) .collect(); txn_executor @@ -483,9 +479,7 @@ impl<'t> AccountMinter<'t> { let root_address = root_account.address(); for i in 0..NUM_TRIES { { - let new_sequence_number = txn_executor - .query_sequence_number(root_address) - .await?; + let new_sequence_number = txn_executor.query_sequence_number(root_address).await?; root_account.set_sequence_number(new_sequence_number); } diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs index b65996e513ea2..5bf604b211109 100644 --- a/crates/transaction-emitter-lib/src/emitter/mod.rs +++ b/crates/transaction-emitter-lib/src/emitter/mod.rs @@ -990,8 +990,8 @@ fn update_seq_num_and_get_num_expired( latest_fetched_counts: HashMap, ) -> (usize, usize) { accounts.iter_mut().for_each(|account| { - // let mut account_lock = account.lock().unwrap(); - // let account = account_lock.deref_mut(); + // let mut account_lock = account.lock().unwrap(); + // let account = account_lock.deref_mut(); let (start_seq_num, end_seq_num) = if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { pair diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs index 5625ad2a84705..8c9c1095e869f 100644 --- a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs +++ b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs @@ -201,7 +201,10 @@ impl SubmissionWorker { } } - self.accounts.into_iter().map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap()).collect() + self.accounts + .into_iter() + .map(|account_arc_mutex| Arc::into_inner(account_arc_mutex).unwrap()) + .collect() } // returns true if it returned early @@ -253,10 +256,8 @@ impl SubmissionWorker { &latest_fetched_counts, ); } - let (num_committed, num_expired) = count_committed_expired_stats( - account_to_start_and_end_seq_num, - latest_fetched_counts, - ); + let (num_committed, num_expired) = + count_committed_expired_stats(account_to_start_and_end_seq_num, latest_fetched_counts); // let (num_committed, num_expired) = update_seq_num_and_get_num_expired( // self.accounts.clone(), // account_to_start_and_end_seq_num, @@ -344,21 +345,21 @@ fn update_account_seq_num( if *count != account.sequence_number() { assert!(account.sequence_number() > *count); debug!( - "Stale sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - count - ); + "Stale sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + count + ); account.set_sequence_number(*count); } }, None => { debug!( - "Couldn't fetch sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - start_seq_num - ); + "Couldn't fetch sequence_number for {}, expected {}, setting to {}", + account.address(), + account.sequence_number(), + start_seq_num + ); account.set_sequence_number(*start_seq_num); }, } diff --git a/crates/transaction-emitter-lib/src/wrappers.rs b/crates/transaction-emitter-lib/src/wrappers.rs index 7ae79af53667b..813c4a1ae07dc 100644 --- a/crates/transaction-emitter-lib/src/wrappers.rs +++ b/crates/transaction-emitter-lib/src/wrappers.rs @@ -17,8 +17,10 @@ use aptos_logger::{error, info}; use aptos_sdk::transaction_builder::TransactionFactory; use aptos_transaction_generator_lib::{args::TransactionTypeArg, WorkflowProgress}; use rand::{rngs::StdRng, Rng, SeedableRng}; -use std::time::{Duration, Instant}; -use std::sync::Arc; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; pub async fn emit_transactions( cluster_args: &ClusterArgs, diff --git a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs index 6fecf3590129f..f7406fb845a6b 100644 --- a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs +++ b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs @@ -49,7 +49,10 @@ impl TransactionGenerator for AccountsPoolWrapperGenerator { // Wrap LocalAccount in Arc+Mutex // let account_arcs : Vec> = accounts_to_use.into_iter().map(Arc::new).collect(); // get txns - let txns = accounts_to_use.iter().flat_map(|account| self.generator.generate_transactions(account, 1)).collect(); + let txns = accounts_to_use + .iter() + .flat_map(|account| self.generator.generate_transactions(account, 1)) + .collect(); // let txns = accounts_to_use // .iter_mut() // .flat_map(|account| { diff --git a/crates/transaction-generator-lib/src/call_custom_modules.rs b/crates/transaction-generator-lib/src/call_custom_modules.rs index b22077a98f15c..6ea8012d2f144 100644 --- a/crates/transaction-generator-lib/src/call_custom_modules.rs +++ b/crates/transaction-generator-lib/src/call_custom_modules.rs @@ -13,8 +13,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; -use std::borrow::Borrow; -use std::sync::Arc; +use std::{borrow::Borrow, sync::Arc}; // Fn + Send + Sync, as it will be called from multiple threads simultaneously // if you need any coordination, use Arc> fields diff --git a/crates/transaction-generator-lib/src/entry_points.rs b/crates/transaction-generator-lib/src/entry_points.rs index ce5f9e2ac20b2..6c5f5a014243e 100644 --- a/crates/transaction-generator-lib/src/entry_points.rs +++ b/crates/transaction-generator-lib/src/entry_points.rs @@ -17,8 +17,7 @@ use aptos_sdk::{ }; use async_trait::async_trait; use rand::rngs::StdRng; -use std::borrow::Borrow; -use std::sync::Arc; +use std::{borrow::Borrow, sync::Arc}; pub struct EntryPointTransactionGenerator { pub entry_point: EntryPoints, diff --git a/crates/transaction-generator-lib/src/lib.rs b/crates/transaction-generator-lib/src/lib.rs index ff563cacf8df3..89805cb70a481 100644 --- a/crates/transaction-generator-lib/src/lib.rs +++ b/crates/transaction-generator-lib/src/lib.rs @@ -216,7 +216,7 @@ pub trait RootAccountHandle: Send + Sync { fn get_root_account(&self) -> Arc; } -pub struct AlwaysApproveRootAccountHandle{ +pub struct AlwaysApproveRootAccountHandle { pub root_account: Arc, } diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 554315d5367fc..19ef8ea6b4422 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -67,6 +67,7 @@ use rand::{rngs::ThreadRng, seq::SliceRandom, Rng}; use std::{ env, num::NonZeroUsize, + ops::DerefMut, path::{Path, PathBuf}, process, sync::{ @@ -76,7 +77,6 @@ use std::{ thread, time::Duration, }; -use std::ops::DerefMut; use suites::dag::get_dag_test; use tokio::{runtime::Runtime, select}; use url::Url; @@ -2696,7 +2696,9 @@ impl NetworkTest for EmitTransaction { .validators() .map(|v| v.peer_id()) .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration).await.unwrap(); + let stats = generate_traffic(ctx, &all_validators, duration) + .await + .unwrap(); ctx.report.report_txn_stats(self.name().to_string(), &stats); }); Ok(()) diff --git a/testsuite/forge/src/interface/aptos.rs b/testsuite/forge/src/interface/aptos.rs index ed3e9f15dcbdf..da5f76d49aac2 100644 --- a/testsuite/forge/src/interface/aptos.rs +++ b/testsuite/forge/src/interface/aptos.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; use super::Test; use crate::{CoreContext, Result, TestReport}; use anyhow::anyhow; @@ -26,6 +25,7 @@ use aptos_sdk::{ use rand::{rngs::OsRng, Rng, SeedableRng}; use reqwest::Url; use serde::{Deserialize, Serialize}; +use std::sync::Arc; #[async_trait::async_trait] pub trait AptosTest: Test { diff --git a/testsuite/forge/src/interface/chain_info.rs b/testsuite/forge/src/interface/chain_info.rs index 61b4d114ba2ba..949bddf201059 100644 --- a/testsuite/forge/src/interface/chain_info.rs +++ b/testsuite/forge/src/interface/chain_info.rs @@ -2,7 +2,6 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; use crate::AptosPublicInfo; use anyhow::Result; use aptos_rest_client::Client as RestClient; @@ -11,6 +10,7 @@ use aptos_sdk::{ types::{chain_id::ChainId, LocalAccount}, }; use reqwest::Url; +use std::sync::Arc; #[derive(Debug)] pub struct ChainInfo { @@ -40,13 +40,8 @@ impl ChainInfo { } pub async fn resync_root_account_seq_num(&mut self, client: &RestClient) -> Result<()> { - let root_address = { - self.root_account.address() - }; - let account = client - .get_account(root_address) - .await? - .into_inner(); + let root_address = { self.root_account.address() }; + let account = client.get_account(root_address).await?.into_inner(); self.root_account .set_sequence_number(account.sequence_number); Ok(()) diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 6a822e19470ee..2a10a505a980c 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -2,8 +2,6 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::future::Future; -use std::sync::Arc; use super::Test; use crate::{ prometheus_metrics::LatencyBreakdown, @@ -11,7 +9,7 @@ use crate::{ CoreContext, Result, Swarm, TestReport, }; use aptos_transaction_emitter_lib::{EmitJobRequest, TxnStats}; -use std::time::Duration; +use std::{future::Future, sync::Arc, time::Duration}; use tokio::runtime::{Handle, Runtime}; /// The testing interface which defines a test written with full control over an existing network. @@ -31,7 +29,7 @@ pub struct NetworkContextSynchronizer<'t> { // TODO: some useful things that don't need to hold the lock or make a copy impl<'t> NetworkContextSynchronizer<'t> { pub fn new(ctx: NetworkContext<'t>, handle: tokio::runtime::Handle) -> Self { - Self{ + Self { ctx: Arc::new(tokio::sync::Mutex::new(ctx)), handle, } @@ -47,10 +45,8 @@ impl<'t> NetworkContextSynchronizer<'t> { Ok(handle) => { // we are in an async context, we don't need block_on handle.block_on(future) - } - Err(_) => { - self.handle.block_on(future) - } + }, + Err(_) => self.handle.block_on(future), } } } @@ -123,10 +119,8 @@ impl<'t> NetworkContext<'t> { Ok(handle) => { // we are in an async context, we don't need block_on handle - } - Err(_) => { - self.runtime.handle().clone() - } + }, + Err(_) => self.runtime.handle().clone(), } } @@ -135,10 +129,8 @@ impl<'t> NetworkContext<'t> { Ok(handle) => { // we are in an async context, we don't need block_on handle.block_on(future) - } - Err(_) => { - self.runtime.block_on(future) - } + }, + Err(_) => self.runtime.block_on(future), } } } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index 53fded0a841ec..a1761556d3eb4 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -600,7 +600,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle); let result = run_test(|| test.run(network_ctx.clone())); // explicitly keep network context in scope so that its created tokio Runtime drops after all the stuff has run. - let NetworkContextSynchronizer{ctx, handle} = network_ctx; + let NetworkContextSynchronizer { ctx, handle } = network_ctx; drop(handle); let ctx = Arc::into_inner(ctx).unwrap().into_inner(); drop(ctx); diff --git a/testsuite/smoke-test/src/indexer.rs b/testsuite/smoke-test/src/indexer.rs index f07bbfb59a2ae..1510eb51b86ac 100644 --- a/testsuite/smoke-test/src/indexer.rs +++ b/testsuite/smoke-test/src/indexer.rs @@ -32,10 +32,7 @@ pub fn setup_indexer() -> anyhow::Result { Ok(conn_pool) } -pub async fn execute_nft_txns<'t>( - creator: LocalAccount, - info: &mut AptosPublicInfo, -) -> Result<()> { +pub async fn execute_nft_txns<'t>(creator: LocalAccount, info: &mut AptosPublicInfo) -> Result<()> { let collection_name = "collection name".to_owned().into_bytes(); let token_name = "token name".to_owned().into_bytes(); let collection_builder = diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 59eef0d319219..d9dcfdb885ba4 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -2,17 +2,24 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; use crate::{batch_update_gradually, create_emitter_and_request, generate_traffic}; use anyhow::bail; -use rand::SeedableRng; -use aptos_forge::{EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, TxnEmitter, TxnStats, Version}; +use aptos_forge::{ + EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, TxnEmitter, + TxnStats, Version, +}; use aptos_logger::info; -use tokio::time::Duration; // use aptos_sdk::transaction_builder::TransactionFactory; use aptos_sdk::types::{LocalAccount, PeerId}; +use rand::SeedableRng; +use std::{ + ops::DerefMut, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; +use tokio::time::Duration; pub struct SimpleValidatorUpgrade; @@ -37,7 +44,15 @@ async fn upgrade_task( max_wait: Duration, done: Arc, ) -> Result<()> { - let result = batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait).await; + let result = batch_update_gradually( + ctxa, + validators_to_update, + version, + wait_until_healthy, + delay, + max_wait, + ) + .await; done.store(true, Ordering::Relaxed); result } @@ -47,17 +62,20 @@ async fn stat_gather_task( source_account: Arc, upgrade_traffic_chunk_duration: Duration, done: Arc, -) -> Result>{ +) -> Result> { let mut upgrade_stats = vec![]; while !done.load(Ordering::Relaxed) { - let upgrading_stats = emitter.clone().emit_txn_for( - source_account.clone(), - emit_job_request.clone(), - upgrade_traffic_chunk_duration, - ).await?; + let upgrading_stats = emitter + .clone() + .emit_txn_for( + source_account.clone(), + emit_job_request.clone(), + upgrade_traffic_chunk_duration, + ) + .await?; upgrade_stats.push(upgrading_stats); } - let statsum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); + let statsum = upgrade_stats.into_iter().reduce(|a, b| &a + &b); Ok(statsum) } @@ -85,13 +103,13 @@ fn traffic_task( let source_account = chain_info.root_account.clone(); (emitter, emit_job_request, source_account) }; - // match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { - // Ok(parts) => parts, - // Err(err) => { - // stats_result = Err(err); - // return; - // } - // }; + // match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { + // Ok(parts) => parts, + // Err(err) => { + // stats_result = Err(err); + // return; + // } + // }; // let source_account = ctx.swarm().chain_info().root_account; let traffic_runtime = traffic_emitter_runtime()?; // let upgrade_joiner = handle.spawn(upgrade_task(ctx, validators_to_update, version, wait_until_healthy, delay, max_wait, upgrade_done.clone())); @@ -105,7 +123,6 @@ fn traffic_task( )) } - fn upgrade_and_gather_stats( ctxa: NetworkContextSynchronizer, // upgrade args @@ -119,8 +136,8 @@ fn upgrade_and_gather_stats( ) -> Result> { let upgrade_done = Arc::new(AtomicBool::new(false)); let emitter_ctx = ctxa.clone(); - let mut stats_result : Result> = Ok(None); - let mut upgrade_result : Result<()> = Ok(()); + let mut stats_result: Result> = Ok(None); + let mut upgrade_result: Result<()> = Ok(()); // std::thread::scope(|scopev| { tokio_scoped::scope(|scopev| { // emit trafic and gather stats @@ -135,7 +152,7 @@ fn upgrade_and_gather_stats( Err(err) => { stats_result = Err(err); return; - } + }, }; let source_account = ctx.swarm().chain_info().root_account; // let traffic_runtime = match traffic_emitter_runtime() { @@ -152,13 +169,22 @@ fn upgrade_and_gather_stats( source_account, upgrade_traffic_chunk_duration, upgrade_done.clone(), - ).await; + ) + .await; }); // do upgrade scopev.spawn(async { // let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); // upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); - upgrade_result = batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait).await; + upgrade_result = batch_update_gradually( + ctxa, + validators_to_update, + version, + wait_until_healthy, + delay, + max_wait, + ) + .await; upgrade_done.store(true, Ordering::Relaxed); }); }); @@ -206,7 +232,10 @@ impl SimpleValidatorUpgrade { if ctxa.ctx.lock().await.swarm().validators().count() < 4 { bail!("compat test requires >= 4 validators"); } - let all_validators = ctxa.ctx.lock().await + let all_validators = ctxa + .ctx + .lock() + .await .swarm() .validators() .map(|v| v.peer_id()) @@ -251,7 +280,7 @@ impl SimpleValidatorUpgrade { upgrade_max_wait, &[first_node], )?; - let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a,b| &a + &b); + let upgrade_stats_sum = upgrade_stats.into_iter().reduce(|a, b| &a + &b); if let Some(upgrade_stats_sum) = upgrade_stats_sum { ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::single-validator-upgrading", self.name()), @@ -288,7 +317,7 @@ impl SimpleValidatorUpgrade { upgrade_max_wait, &first_batch, )?; - let upgrade2_stats_sum = upgrade2_stats.into_iter().reduce(|a,b| &a + &b); + let upgrade2_stats_sum = upgrade2_stats.into_iter().reduce(|a, b| &a + &b); if let Some(upgrade2_stats_sum) = upgrade2_stats_sum { ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::half-validator-upgrading", self.name()), @@ -323,7 +352,7 @@ impl SimpleValidatorUpgrade { upgrade_max_wait, &second_batch, )?; - let upgrade3_stats_sum = upgrade3_stats.into_iter().reduce(|a,b| &a + &b); + let upgrade3_stats_sum = upgrade3_stats.into_iter().reduce(|a, b| &a + &b); if let Some(upgrade3_stats_sum) = upgrade3_stats_sum { ctxa.ctx.lock().await.report.report_txn_stats( format!("{}::rest-validator-upgrading", self.name()), diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index 96bc0456846cc..b46f2052e9c25 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -3,9 +3,13 @@ use crate::{LoadDestination, NetworkLoadTest}; use anyhow::{anyhow, bail, Context}; -use aptos_forge::{test_utils::consensus_utils::{ - test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, -}, NetworkContext, NetworkTest, Result, Swarm, SwarmExt, Test, TestReport, NetworkContextSynchronizer}; +use aptos_forge::{ + test_utils::consensus_utils::{ + test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, + }, + NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, SwarmExt, Test, + TestReport, +}; use aptos_logger::{info, warn}; use rand::Rng; use std::{collections::HashSet, time::Duration}; diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 14c374eb3dcdf..023e3de889ae0 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -11,8 +11,7 @@ use rand::{ seq::IteratorRandom, Rng, SeedableRng, }; -use std::{thread, time::Duration}; -use std::ops::DerefMut; +use std::{ops::DerefMut, thread, time::Duration}; use tokio::runtime::Runtime; const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index 0fc17a3cd6982..e8235bd2d4c99 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -1,15 +1,18 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; use crate::{batch_update, generate_traffic}; use anyhow::bail; -use aptos_forge::{NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, FORGE_KEY_SEED, NetworkContextSynchronizer}; +use aptos_forge::{ + NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, DEFAULT_ROOT_PRIV_KEY, + FORGE_KEY_SEED, +}; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_temppath::TempPath; use aptos_types::transaction::authenticator::AuthenticationKey; +use std::ops::DerefMut; use tokio::{runtime::Runtime, time::Duration}; pub struct FrameworkUpgrade; diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index b54c78de9c0f5..0f5cdeafb3a9d 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, +}; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; use tokio::{runtime::Runtime, time::Instant}; diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 230222a11204b..295c7a6de0f09 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -27,7 +27,11 @@ pub mod validator_join_leave_test; pub mod validator_reboot_stress_test; use anyhow::Context; -use aptos_forge::{prometheus_metrics::{fetch_latency_breakdown, LatencyBreakdown}, EmitJobRequest, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, TxnEmitter, TxnStats, Version, NetworkContextSynchronizer}; +use aptos_forge::{ + prometheus_metrics::{fetch_latency_breakdown, LatencyBreakdown}, + EmitJobRequest, NetworkContext, NetworkContextSynchronizer, NetworkTest, NodeExt, Result, + Swarm, SwarmExt, Test, TestReport, TxnEmitter, TxnStats, Version, +}; use aptos_logger::info; use aptos_rest_client::Client as RestClient; use aptos_sdk::{transaction_builder::TransactionFactory, types::PeerId}; @@ -35,9 +39,9 @@ use futures::future::join_all; use rand::{rngs::StdRng, SeedableRng}; use std::{ fmt::Write, + ops::DerefMut, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use std::ops::DerefMut; use tokio::runtime::Runtime; const WARMUP_DURATION_FRACTION: f32 = 0.07; @@ -75,10 +79,22 @@ async fn batch_update_gradually( ) -> Result<()> { // let mut swarm = ctx.swarm(); for validator in validators_to_update { - ctxa.ctx.lock().await.swarm().upgrade_validator(*validator, version).await?; + ctxa.ctx + .lock() + .await + .swarm() + .upgrade_validator(*validator, version) + .await?; if wait_until_healthy { let deadline = Instant::now() + max_wait; - ctxa.ctx.lock().await.swarm().validator_mut(*validator).unwrap().wait_until_healthy(deadline).await?; + ctxa.ctx + .lock() + .await + .swarm() + .validator_mut(*validator) + .unwrap() + .wait_until_healthy(deadline) + .await?; } if !delay.is_zero() { tokio::time::sleep(delay).await; @@ -123,11 +139,13 @@ pub async fn generate_traffic( let (emitter, emit_job_request) = create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; - let stats = emitter.emit_txn_for( - ctx.swarm().chain_info().root_account, - emit_job_request, - duration, - ).await?; + let stats = emitter + .emit_txn_for( + ctx.swarm().chain_info().root_account, + emit_job_request, + duration, + ) + .await?; Ok(stats) } @@ -153,11 +171,7 @@ pub fn spawn_generate_traffic( duration: Duration, handle: Handle, ) -> JoinHandle> { - handle.spawn(emitter.emit_txn_for( - root_account, - emit_job_request, - duration, - )) + handle.spawn(emitter.emit_txn_for(root_account, emit_job_request, duration)) } pub enum LoadDestination { @@ -213,7 +227,10 @@ pub trait NetworkLoadTest: Test { } } -async fn async_run_network_load_test(nlt: &dyn NetworkLoadTest, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +async fn async_run_network_load_test( + nlt: &dyn NetworkLoadTest, + ctx: NetworkContextSynchronizer<'_>, +) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new().unwrap(); @@ -285,14 +302,17 @@ async fn async_run_network_load_test(nlt: &dyn NetworkLoadTest, ctx: NetworkCont start_version, end_version, ) - .context("check for success")?; + .context("check for success")?; } - Ok(())} + Ok(()) +} impl NetworkTest for dyn NetworkLoadTest { fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(async_run_network_load_test(self, ctx)) + ctx.handle + .clone() + .block_on(async_run_network_load_test(self, ctx)) } } diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 665ad6cd8b9a1..4d789a3778410 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -3,11 +3,16 @@ use crate::{create_emitter_and_request, LoadDestination, NetworkLoadTest}; use anyhow::Context; -use aptos_forge::{args::TransactionTypeArg, prometheus_metrics::{LatencyBreakdown, LatencyBreakdownSlice}, success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, EmitJobMode, EmitJobRequest, NetworkContext, NetworkTest, Result, Test, TxnStats, WorkflowProgress, NetworkContextSynchronizer}; +use aptos_forge::{ + args::TransactionTypeArg, + prometheus_metrics::{LatencyBreakdown, LatencyBreakdownSlice}, + success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, + EmitJobMode, EmitJobRequest, NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, + Test, TxnStats, WorkflowProgress, +}; use aptos_logger::info; use rand::SeedableRng; -use std::{fmt::Debug, time::Duration}; -use std::ops::DerefMut; +use std::{fmt::Debug, ops::DerefMut, time::Duration}; use tokio::runtime::Runtime; // add larger warmup, as when we are exceeding the max load, @@ -230,7 +235,7 @@ impl LoadVsPerfBenchmark { &nodes_to_send_load_to, rng, ) - .context("create emitter")?; + .context("create emitter")?; let job = rt .block_on(emitter.start_job( diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index 6b6ba34c7dfaa..11b9b8e1c3bfb 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{multi_region_network_test::chunk_peers, LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupCpuStress, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmCpuStress, SwarmExt, Test}; +use aptos_forge::{ + GroupCpuStress, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, + SwarmCpuStress, SwarmExt, Test, +}; use aptos_logger::info; use aptos_types::PeerId; use rand::Rng; diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 5497523d23b36..ad9d0fd7a1b04 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupNetEm, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmNetEm, Test}; +use aptos_forge::{ + GroupNetEm, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, + SwarmNetEm, Test, +}; use aptos_logger::info; use aptos_types::PeerId; use itertools::{self, EitherOrBoth, Itertools}; diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index adcc48613b311..d0cd9082eb35d 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupNetworkBandwidth, NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkBandwidth, Test}; +use aptos_forge::{ + GroupNetworkBandwidth, NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, + SwarmNetworkBandwidth, Test, +}; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkBandwidthTest; diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index 8757bfc515838..6239b9be49d5c 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test, +}; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkLossTest; diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index c13aa3f2e6bfc..54689d15a6f0d 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkPartition, Test}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkPartition, + Test, +}; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkPartitionTest; diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index ff93c7abbb0c6..6abde03878859 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -2,10 +2,9 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; use crate::generate_traffic; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; -use std::thread; +use std::{ops::DerefMut, thread}; use tokio::{runtime::Runtime, time::Duration}; pub struct PartialNodesDown; diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index 108fe0ed43d16..83310371476ab 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -8,7 +8,10 @@ use crate::{ }; use anyhow::Error; use aptos_config::config::{NodeConfig, OverrideNodeConfig}; -use aptos_forge::{NetworkContext, NetworkContextSynchronizer, NetworkTest, OverrideNodeConfigFn, Result, Swarm, SwarmChaos, SwarmCpuStress, SwarmNetEm, Test}; +use aptos_forge::{ + NetworkContext, NetworkContextSynchronizer, NetworkTest, OverrideNodeConfigFn, Result, Swarm, + SwarmChaos, SwarmCpuStress, SwarmNetEm, Test, +}; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; use aptos_types::PeerId; diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 15aebf5f56c4e..9729789987482 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -2,13 +2,15 @@ // Parts of the project are originally copyright © Meta Platforms, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; use crate::generate_traffic; use anyhow::bail; -use aptos_forge::{get_highest_synced_epoch, get_highest_synced_version, NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test}; +use aptos_forge::{ + get_highest_synced_epoch, get_highest_synced_version, NetworkContext, + NetworkContextSynchronizer, NetworkTest, Result, SwarmExt, Test, +}; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; -use std::time::Instant; +use std::{ops::DerefMut, time::Instant}; use tokio::{runtime::Runtime, time::Duration}; const MAX_EPOCH_CHANGE_SECS: u64 = 300; // Max amount of time (in seconds) to wait for an epoch change @@ -234,7 +236,11 @@ fn emit_traffic_and_ensure_bounded_sync( emit_txn_duration.as_secs() ); let handle = ctx.runtime.handle().clone(); - let _txn_stat = handle.block_on(generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration))?; + let _txn_stat = handle.block_on(generate_traffic( + ctx, + nodes_to_send_traffic, + emit_txn_duration, + ))?; // Wait for all nodes to synchronize. We time bound this to ensure // nodes don't fall too far behind. diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index 8ffe45abcf2ce..77e1287674abb 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{LoadDestination, NetworkLoadTest}; -use aptos_forge::{GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkContextSynchronizer, NetworkTest, Swarm, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test}; +use aptos_forge::{ + GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkContextSynchronizer, + NetworkTest, Swarm, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test, +}; use aptos_logger::info; /// Represents a test that simulates a network with 3 regions, all in the same cloud. diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 18500dd42ba5d..e1743f14182e3 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -1,12 +1,14 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use std::ops::DerefMut; use crate::NetworkLoadTest; use anyhow::Context; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, Test}; use aptos_sdk::move_types::account_address::AccountAddress; -use std::time::{Duration, Instant}; +use std::{ + ops::DerefMut, + time::{Duration, Instant}, +}; use tokio::runtime::Runtime; pub struct TwinValidatorTest; diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index 6af851413f251..58575052709f5 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -4,7 +4,10 @@ use crate::{ create_emitter_and_request, traffic_emitter_runtime, LoadDestination, NetworkLoadTest, }; -use aptos_forge::{success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, EmitJobRequest, NetworkTest, Result, Swarm, Test, TestReport, NetworkContextSynchronizer}; +use aptos_forge::{ + success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, + EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, +}; use aptos_logger::info; use rand::{rngs::OsRng, Rng, SeedableRng}; use std::time::{Duration, Instant}; diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index b6ded09434dfd..04dc2ac4030fb 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -4,7 +4,10 @@ // use std::ops::DerefMut; use crate::{LoadDestination, NetworkLoadTest}; use aptos::{account::create::DEFAULT_FUNDED_COINS, test::CliTestFramework}; -use aptos_forge::{reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, TestReport, FORGE_KEY_SEED, NetworkContextSynchronizer}; +use aptos_forge::{ + reconfig, NetworkContext, NetworkContextSynchronizer, NetworkTest, NodeExt, Result, Swarm, + SwarmExt, Test, TestReport, FORGE_KEY_SEED, +}; use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; @@ -130,22 +133,12 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { .unwrap(); let root_account = swarm.chain_info().root_account(); - reconfig( - &rest_client, - &transaction_factory, - root_account, - ) - .await; + reconfig(&rest_client, &transaction_factory, root_account).await; } { let root_account = swarm.chain_info().root_account(); - reconfig( - &rest_client, - &transaction_factory, - root_account, - ) - .await; + reconfig(&rest_client, &transaction_factory, root_account).await; } }); @@ -159,22 +152,12 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { cli.join_validator_set(*operator_index, None).await.unwrap(); let root_account = swarm.chain_info().root_account(); - reconfig( - &rest_client, - &transaction_factory, - root_account, - ) - .await; + reconfig(&rest_client, &transaction_factory, root_account).await; } { let root_account = swarm.chain_info().root_account(); - reconfig( - &rest_client, - &transaction_factory, - root_account, - ) - .await; + reconfig(&rest_client, &transaction_factory, root_account).await; } }); From 8d05c3dbe9643c7aa26426d5070fd914d8779199 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 6 Jun 2024 14:03:10 -0400 Subject: [PATCH 11/28] async_trait NetworkTest.run --- Cargo.lock | 1 + testsuite/forge-cli/src/main.rs | 70 ++++--- testsuite/forge/src/interface/network.rs | 4 +- testsuite/forge/src/runner.rs | 4 +- testsuite/testcases/Cargo.toml | 1 + testsuite/testcases/src/compatibility_test.rs | 13 +- .../src/consensus_reliability_tests.rs | 6 +- .../testcases/src/dag_onchain_enable_test.rs | 6 +- testsuite/testcases/src/forge_setup_test.rs | 24 +-- testsuite/testcases/src/framework_upgrade.rs | 24 ++- .../src/fullnode_reboot_stress_test.rs | 6 +- testsuite/testcases/src/lib.rs | 174 +++++++++--------- .../testcases/src/load_vs_perf_benchmark.rs | 12 +- testsuite/testcases/src/modifiers.rs | 16 +- .../src/multi_region_network_test.rs | 6 +- .../testcases/src/network_bandwidth_test.rs | 6 +- testsuite/testcases/src/network_loss_test.rs | 6 +- .../testcases/src/network_partition_test.rs | 6 +- .../testcases/src/partial_nodes_down_test.rs | 12 +- testsuite/testcases/src/performance_test.rs | 6 +- .../src/public_fullnode_performance.rs | 6 +- .../src/quorum_store_onchain_enable_test.rs | 6 +- .../testcases/src/reconfiguration_test.rs | 4 +- .../testcases/src/state_sync_performance.rs | 70 +++---- .../src/three_region_simulation_test.rs | 6 +- .../testcases/src/twin_validator_test.rs | 30 ++- testsuite/testcases/src/two_traffics_test.rs | 6 +- .../src/validator_join_leave_test.rs | 6 +- .../src/validator_reboot_stress_test.rs | 6 +- 29 files changed, 266 insertions(+), 277 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b97a6c434b523..a5a53b66fd992 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3871,6 +3871,7 @@ dependencies = [ "aptos-temppath", "aptos-types", "assert_approx_eq", + "async-trait", "bcs 0.1.4", "csv", "futures", diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 19ef8ea6b4422..98589a5f74d20 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -60,6 +60,7 @@ use aptos_testcases::{ validator_reboot_stress_test::ValidatorRebootStressTest, CompositeNetworkTest, }; +use async_trait::async_trait; use clap::{Parser, Subcommand}; use futures::stream::{FuturesUnordered, StreamExt}; use once_cell::sync::Lazy; @@ -2658,19 +2659,18 @@ impl Test for RestartValidator { } } +#[async_trait] impl NetworkTest for RestartValidator { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - ctxa.handle.clone().block_on(async { - let mut ctx_locker = ctxa.ctx.lock().await; - let ctx = ctx_locker.deref_mut(); - let node = ctx.swarm().validators_mut().next().unwrap(); - node.health_check().await.expect("node health check failed"); - node.stop().await.unwrap(); - println!("Restarting node {}", node.peer_id()); - node.start().await.unwrap(); - tokio::time::sleep(Duration::from_secs(1)).await; - node.health_check().await.expect("node health check failed"); - }); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let node = ctx.swarm().validators_mut().next().unwrap(); + node.health_check().await.expect("node health check failed"); + node.stop().await.unwrap(); + println!("Restarting node {}", node.peer_id()); + node.start().await.unwrap(); + tokio::time::sleep(Duration::from_secs(1)).await; + node.health_check().await.expect("node health check failed"); Ok(()) } } @@ -2684,23 +2684,21 @@ impl Test for EmitTransaction { } } +#[async_trait] impl NetworkTest for EmitTransaction { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - let handle = ctx.handle.clone(); - handle.block_on(async { - let mut ctx_locker = ctx.ctx.lock().await; - let ctx = ctx_locker.deref_mut(); - let duration = Duration::from_secs(10); - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); - let stats = generate_traffic(ctx, &all_validators, duration) - .await - .unwrap(); - ctx.report.report_txn_stats(self.name().to_string(), &stats); - }); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let duration = Duration::from_secs(10); + let all_validators = ctx + .swarm() + .validators() + .map(|v| v.peer_id()) + .collect::>(); + let stats = generate_traffic(ctx, &all_validators, duration) + .await + .unwrap(); + ctx.report.report_txn_stats(self.name().to_string(), &stats); Ok(()) } } @@ -2722,10 +2720,11 @@ impl Test for Delay { } } +#[async_trait] impl NetworkTest for Delay { - fn run(&self, _ctx: NetworkContextSynchronizer) -> Result<()> { + async fn run<'a>(&self, _ctx: NetworkContextSynchronizer<'a>) -> Result<()> { info!("forge sleep {}", self.seconds); - std::thread::sleep(Duration::from_secs(self.seconds)); + tokio::time::sleep(Duration::from_secs(self.seconds)).await; Ok(()) } } @@ -2739,13 +2738,12 @@ impl Test for GatherMetrics { } } +#[async_trait] impl NetworkTest for GatherMetrics { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(async { - let mut ctx_locker = ctx.ctx.lock().await; - let ctx = ctx_locker.deref_mut(); - gather_metrics_one(ctx).await; - }); + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + gather_metrics_one(ctx).await; Ok(()) } } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 2a10a505a980c..7b77832b4920e 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -9,15 +9,17 @@ use crate::{ CoreContext, Result, Swarm, TestReport, }; use aptos_transaction_emitter_lib::{EmitJobRequest, TxnStats}; +use async_trait::async_trait; use std::{future::Future, sync::Arc, time::Duration}; use tokio::runtime::{Handle, Runtime}; /// The testing interface which defines a test written with full control over an existing network. /// Tests written against this interface will have access to both the Root account as well as the /// nodes which comprise the network. +#[async_trait] pub trait NetworkTest: Test { /// Executes the test against the given context. - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()>; + async fn run<'t>(&self, ctx: NetworkContextSynchronizer<'t>) -> Result<()>; } #[derive(Clone)] diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index a1761556d3eb4..b9f17cebf48f5 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -597,8 +597,8 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { ); let handle = network_ctx.runtime.handle().clone(); let _handle_context = handle.enter(); - let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle); - let result = run_test(|| test.run(network_ctx.clone())); + let network_ctx = NetworkContextSynchronizer::new(network_ctx, handle.clone()); + let result = run_test(|| handle.block_on(test.run(network_ctx.clone()))); // explicitly keep network context in scope so that its created tokio Runtime drops after all the stuff has run. let NetworkContextSynchronizer { ctx, handle } = network_ctx; drop(handle); diff --git a/testsuite/testcases/Cargo.toml b/testsuite/testcases/Cargo.toml index 89b3e2f6e6fc0..6541a240698e3 100644 --- a/testsuite/testcases/Cargo.toml +++ b/testsuite/testcases/Cargo.toml @@ -35,6 +35,7 @@ rand = { workspace = true } reqwest = { workspace = true } tokio = { workspace = true } tokio-scoped = { workspace = true } +async-trait = { workspace = true } [dev-dependencies] assert_approx_eq = { workspace = true } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index d9dcfdb885ba4..8e325fc339c3d 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -11,6 +11,7 @@ use aptos_forge::{ use aptos_logger::info; // use aptos_sdk::transaction_builder::TransactionFactory; use aptos_sdk::types::{LocalAccount, PeerId}; +use async_trait::async_trait; use rand::SeedableRng; use std::{ ops::DerefMut, @@ -193,17 +194,9 @@ fn upgrade_and_gather_stats( stats_result } +#[async_trait] impl NetworkTest for SimpleValidatorUpgrade { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - let handle = ctxa.handle.clone(); - handle.block_on(self.async_run(ctxa)) - } -} - -impl SimpleValidatorUpgrade { - async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { - // let runtime = Runtime::new()?; - // let traffic_runtime = traffic_emitter_runtime()?; + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { let upgrade_wait_for_healthy = true; let upgrade_node_delay = Duration::from_secs(10); let upgrade_max_wait = Duration::from_secs(40); diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index b46f2052e9c25..f445af13f68af 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -11,6 +11,7 @@ use aptos_forge::{ TestReport, }; use aptos_logger::{info, warn}; +use async_trait::async_trait; use rand::Rng; use std::{collections::HashSet, time::Duration}; use tokio::runtime::Runtime; @@ -296,8 +297,9 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { } } +#[async_trait] impl NetworkTest for ChangingWorkingQuorumTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 74425b7107a1d..0adc712baa830 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -13,6 +13,7 @@ use aptos_types::{ ConsensusAlgorithmConfig, DagConsensusConfigV1, OnChainConsensusConfig, ValidatorTxnConfig, }, }; +use async_trait::async_trait; use std::time::Duration; use tokio::runtime::Runtime; @@ -209,8 +210,9 @@ impl NetworkLoadTest for DagOnChainEnableTest { } } +#[async_trait] impl NetworkTest for DagOnChainEnableTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 023e3de889ae0..88c4fa49d6de2 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -6,6 +6,7 @@ use anyhow::Context; use aptos_config::config::OverrideNodeConfig; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; use aptos_logger::info; +use async_trait::async_trait; use rand::{ rngs::{OsRng, StdRng}, seq::IteratorRandom, @@ -18,8 +19,15 @@ const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; pub struct ForgeSetupTest; -impl ForgeSetupTest { - async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +impl Test for ForgeSetupTest { + fn name(&self) -> &'static str { + "verify_forge_setup" + } +} + +#[async_trait] +impl NetworkTest for ForgeSetupTest { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut rng = StdRng::from_seed(OsRng.gen()); let runtime = Runtime::new().unwrap(); let mut ctx_locker = ctx.ctx.lock().await; @@ -79,15 +87,3 @@ impl ForgeSetupTest { Ok(()) } } - -impl Test for ForgeSetupTest { - fn name(&self) -> &'static str { - "verify_forge_setup" - } -} - -impl NetworkTest for ForgeSetupTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(self.async_run(ctx)) - } -} diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index e8235bd2d4c99..1904e36fead12 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -12,6 +12,7 @@ use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_temppath::TempPath; use aptos_types::transaction::authenticator::AuthenticationKey; +use async_trait::async_trait; use std::ops::DerefMut; use tokio::{runtime::Runtime, time::Duration}; @@ -19,8 +20,17 @@ pub struct FrameworkUpgrade; impl FrameworkUpgrade { pub const EPOCH_DURATION_SECS: u64 = 10; +} - async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +impl Test for FrameworkUpgrade { + fn name(&self) -> &'static str { + "framework_upgrade::framework-upgrade" + } +} + +#[async_trait] +impl NetworkTest for FrameworkUpgrade { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; @@ -166,15 +176,3 @@ impl FrameworkUpgrade { Ok(()) } } - -impl Test for FrameworkUpgrade { - fn name(&self) -> &'static str { - "framework_upgrade::framework-upgrade" - } -} - -impl NetworkTest for FrameworkUpgrade { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(self.async_run(ctx)) - } -} diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index 0f5cdeafb3a9d..7ba712e8d7f62 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -5,6 +5,7 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos_forge::{ NetworkContext, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, }; +use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; use tokio::{runtime::Runtime, time::Instant}; @@ -48,8 +49,9 @@ impl NetworkLoadTest for FullNodeRebootStressTest { } } +#[async_trait] impl NetworkTest for FullNodeRebootStressTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 295c7a6de0f09..5a004ee535c76 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -35,6 +35,7 @@ use aptos_forge::{ use aptos_logger::info; use aptos_rest_client::Client as RestClient; use aptos_sdk::{transaction_builder::TransactionFactory, types::PeerId}; +use async_trait::async_trait; use futures::future::join_all; use rand::{rngs::StdRng, SeedableRng}; use std::{ @@ -227,92 +228,84 @@ pub trait NetworkLoadTest: Test { } } -async fn async_run_network_load_test( - nlt: &dyn NetworkLoadTest, - ctx: NetworkContextSynchronizer<'_>, -) -> Result<()> { - let mut ctx_locker = ctx.ctx.lock().await; - let ctx = ctx_locker.deref_mut(); - let runtime = Runtime::new().unwrap(); - let start_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - let (start_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) - .context("no clients replied for start version")?; - let emit_job_request = ctx.emit_job.clone(); - let rng = SeedableRng::from_rng(ctx.core().rng())?; - let duration = ctx.global_duration; - let stats_by_phase = nlt.network_load_test( - ctx, - emit_job_request, - duration, - WARMUP_DURATION_FRACTION, - COOLDOWN_DURATION_FRACTION, - rng, - )?; - - let phased = stats_by_phase.len() > 1; - for (phase, phase_stats) in stats_by_phase.iter().enumerate() { - let test_name = if phased { - format!("{}_phase_{}", nlt.name(), phase) - } else { - nlt.name().to_string() - }; - ctx.report - .report_txn_stats(test_name, &phase_stats.emitter_stats); - ctx.report.report_text(format!( - "Latency breakdown for phase {}: {:?}", - phase, - phase_stats - .latency_breakdown - .keys() - .into_iter() - .map(|slice| { - let slice_samples = phase_stats.latency_breakdown.get_samples(&slice); - format!( - "{:?}: max: {:.3}, avg: {:.3}", - slice, - slice_samples.max_sample(), - slice_samples.avg_sample() - ) - }) - .collect::>() - )); - } - - let end_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - let (end_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) - .context("no clients replied for end version")?; - - nlt.finish(ctx).context("finish NetworkLoadTest ")?; - - for phase_stats in stats_by_phase.into_iter() { - ctx.check_for_success( - &phase_stats.emitter_stats, - phase_stats.actual_duration, - &phase_stats.latency_breakdown, - start_timestamp as i64, - end_timestamp as i64, - start_version, - end_version, - ) - .context("check for success")?; - } +#[async_trait] +impl NetworkTest for dyn NetworkLoadTest { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + let mut ctx_locker = ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let runtime = Runtime::new().unwrap(); + let start_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let (start_version, _) = runtime + .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + .context("no clients replied for start version")?; + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng())?; + let duration = ctx.global_duration; + let stats_by_phase = self.network_load_test( + ctx, + emit_job_request, + duration, + WARMUP_DURATION_FRACTION, + COOLDOWN_DURATION_FRACTION, + rng, + )?; + + let phased = stats_by_phase.len() > 1; + for (phase, phase_stats) in stats_by_phase.iter().enumerate() { + let test_name = if phased { + format!("{}_phase_{}", self.name(), phase) + } else { + self.name().to_string() + }; + ctx.report + .report_txn_stats(test_name, &phase_stats.emitter_stats); + ctx.report.report_text(format!( + "Latency breakdown for phase {}: {:?}", + phase, + phase_stats + .latency_breakdown + .keys() + .into_iter() + .map(|slice| { + let slice_samples = phase_stats.latency_breakdown.get_samples(&slice); + format!( + "{:?}: max: {:.3}, avg: {:.3}", + slice, + slice_samples.max_sample(), + slice_samples.avg_sample() + ) + }) + .collect::>() + )); + } - Ok(()) -} + let end_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let (end_version, _) = runtime + .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + .context("no clients replied for end version")?; + + self.finish(ctx).context("finish NetworkLoadTest ")?; + + for phase_stats in stats_by_phase.into_iter() { + ctx.check_for_success( + &phase_stats.emitter_stats, + phase_stats.actual_duration, + &phase_stats.latency_breakdown, + start_timestamp as i64, + end_timestamp as i64, + start_version, + end_version, + ) + .context("check for success")?; + } -impl NetworkTest for dyn NetworkLoadTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle - .clone() - .block_on(async_run_network_load_test(self, ctx)) + Ok(()) } } @@ -563,8 +556,11 @@ impl CompositeNetworkTest { test: Box::new(test), } } +} - async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { +#[async_trait] +impl NetworkTest for CompositeNetworkTest { + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); @@ -572,7 +568,7 @@ impl CompositeNetworkTest { wrapper.setup(ctx)?; } } - self.test.run(ctxa.clone())?; + self.test.run(ctxa.clone()).await?; { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); @@ -584,12 +580,6 @@ impl CompositeNetworkTest { } } -impl NetworkTest for CompositeNetworkTest { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - ctxa.handle.clone().block_on(self.async_run(ctxa)) - } -} - impl Test for CompositeNetworkTest { fn name(&self) -> &'static str { "CompositeNetworkTest" diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 4d789a3778410..8384170910ed9 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -11,6 +11,7 @@ use aptos_forge::{ Test, TxnStats, WorkflowProgress, }; use aptos_logger::info; +use async_trait::async_trait; use rand::SeedableRng; use std::{fmt::Debug, ops::DerefMut, time::Duration}; use tokio::runtime::Runtime; @@ -212,8 +213,11 @@ impl LoadVsPerfBenchmark { Ok(result) } +} - async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +#[async_trait] +impl NetworkTest for LoadVsPerfBenchmark { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { assert!( self.criteria.is_empty() || self.criteria.len() == self.workloads.len(), "Invalid config, {} criteria and {} workloads given", @@ -348,12 +352,6 @@ impl LoadVsPerfBenchmark { } } -impl NetworkTest for LoadVsPerfBenchmark { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(self.async_run(ctx)) - } -} - fn to_table(type_name: String, results: &[Vec]) -> Vec { let mut table = Vec::new(); table.push(format!( diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index 11b9b8e1c3bfb..c33587be6b21f 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -8,6 +8,7 @@ use aptos_forge::{ }; use aptos_logger::info; use aptos_types::PeerId; +use async_trait::async_trait; use rand::Rng; use tokio::runtime::Runtime; @@ -102,9 +103,10 @@ impl NetworkLoadTest for ExecutionDelayTest { } } +#[async_trait] impl NetworkTest for ExecutionDelayTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } @@ -187,9 +189,10 @@ impl NetworkLoadTest for NetworkUnreliabilityTest { } } +#[async_trait] impl NetworkTest for NetworkUnreliabilityTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } @@ -302,8 +305,9 @@ impl NetworkLoadTest for CpuChaosTest { } } +#[async_trait] impl NetworkTest for CpuChaosTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index ad9d0fd7a1b04..0a881cbb7d20b 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -8,6 +8,7 @@ use aptos_forge::{ }; use aptos_logger::info; use aptos_types::PeerId; +use async_trait::async_trait; use itertools::{self, EitherOrBoth, Itertools}; use std::collections::BTreeMap; @@ -329,9 +330,10 @@ impl NetworkLoadTest for MultiRegionNetworkEmulationTest { } } +#[async_trait] impl NetworkTest for MultiRegionNetworkEmulationTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index d0cd9082eb35d..459aa721d230e 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -6,6 +6,7 @@ use aptos_forge::{ GroupNetworkBandwidth, NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkBandwidth, Test, }; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkBandwidthTest; @@ -66,8 +67,9 @@ impl NetworkLoadTest for NetworkBandwidthTest { } } +#[async_trait] impl NetworkTest for NetworkBandwidthTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index 6239b9be49d5c..ac90b9d44c68e 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -5,6 +5,7 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos_forge::{ NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkLoss, Test, }; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkLossTest; @@ -46,8 +47,9 @@ impl NetworkLoadTest for NetworkLossTest { } } +#[async_trait] impl NetworkTest for NetworkLossTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index 54689d15a6f0d..f9281564d4170 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -6,6 +6,7 @@ use aptos_forge::{ NetworkContext, NetworkContextSynchronizer, NetworkTest, SwarmChaos, SwarmNetworkPartition, Test, }; +use async_trait::async_trait; /// This is deprecated. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead pub struct NetworkPartitionTest; @@ -56,8 +57,9 @@ impl NetworkLoadTest for NetworkPartitionTest { } } +#[async_trait] impl NetworkTest for NetworkPartitionTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index 6abde03878859..d2c12758be12a 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -4,6 +4,7 @@ use crate::generate_traffic; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; use std::{ops::DerefMut, thread}; use tokio::{runtime::Runtime, time::Duration}; @@ -15,8 +16,9 @@ impl Test for PartialNodesDown { } } -impl PartialNodesDown { - async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +#[async_trait] +impl NetworkTest for PartialNodesDown { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let runtime = Runtime::new()?; @@ -48,9 +50,3 @@ impl PartialNodesDown { Ok(()) } } - -impl NetworkTest for PartialNodesDown { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(self.async_run(ctx)) - } -} diff --git a/testsuite/testcases/src/performance_test.rs b/testsuite/testcases/src/performance_test.rs index 2b0018c25f16a..63786565d1f98 100644 --- a/testsuite/testcases/src/performance_test.rs +++ b/testsuite/testcases/src/performance_test.rs @@ -4,6 +4,7 @@ use crate::NetworkLoadTest; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; pub struct PerformanceBenchmark; @@ -15,8 +16,9 @@ impl Test for PerformanceBenchmark { impl NetworkLoadTest for PerformanceBenchmark {} +#[async_trait] impl NetworkTest for PerformanceBenchmark { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index 83310371476ab..32c08d9682fea 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -15,6 +15,7 @@ use aptos_forge::{ use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; use aptos_types::PeerId; +use async_trait::async_trait; use itertools::{EitherOrBoth, Itertools}; use rand::{ rngs::{OsRng, StdRng}, @@ -124,9 +125,10 @@ impl Test for PFNPerformance { } } +#[async_trait] impl NetworkTest for PFNPerformance { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index d6338871de9ce..f8974efc47c16 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -11,6 +11,7 @@ use aptos_types::{ account_config::CORE_CODE_ADDRESS, on_chain_config::{ConsensusConfigV1, OnChainConsensusConfig}, }; +use async_trait::async_trait; use std::time::Duration; use tokio::runtime::Runtime; @@ -111,8 +112,9 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { } } +#[async_trait] impl NetworkTest for QuorumStoreOnChainEnableTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/reconfiguration_test.rs b/testsuite/testcases/src/reconfiguration_test.rs index 7b45e50504f59..eb0c0a7e38409 100644 --- a/testsuite/testcases/src/reconfiguration_test.rs +++ b/testsuite/testcases/src/reconfiguration_test.rs @@ -4,6 +4,7 @@ use anyhow::anyhow; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Test}; +use async_trait::async_trait; pub struct ReconfigurationTest; @@ -13,8 +14,9 @@ impl Test for ReconfigurationTest { } } +#[async_trait] impl NetworkTest for ReconfigurationTest { - fn run(&self, _ctx: NetworkContextSynchronizer) -> Result<()> { + async fn run<'a>(&self, _ctx: NetworkContextSynchronizer<'a>) -> Result<()> { Err(anyhow!("Not supported in aptos-framework yet")) } // TODO(https://github.com/aptos-labs/aptos-core/issues/317): add back after support those transactions in aptos-framework diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 9729789987482..0a83c44ad20a8 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -10,6 +10,7 @@ use aptos_forge::{ }; use aptos_logger::info; use aptos_sdk::move_types::account_address::AccountAddress; +use async_trait::async_trait; use std::{ops::DerefMut, time::Instant}; use tokio::{runtime::Runtime, time::Duration}; @@ -21,8 +22,15 @@ const NUM_STATE_VALUE_COUNTER_NAME: &str = "aptos_jellyfish_leaf_count"; // The /// In the test, all fullnodes are wiped, restarted and timed to synchronize. pub struct StateSyncFullnodePerformance; -impl StateSyncFullnodePerformance { - async fn async_run(&self, ctx: NetworkContextSynchronizer<'_>) -> Result<()> { +impl Test for StateSyncFullnodePerformance { + fn name(&self) -> &'static str { + "StateSyncFullnodePerformance" + } +} + +#[async_trait] +impl NetworkTest for StateSyncFullnodePerformance { + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; @@ -39,24 +47,19 @@ impl StateSyncFullnodePerformance { } } -impl Test for StateSyncFullnodePerformance { - fn name(&self) -> &'static str { - "StateSyncFullnodePerformance" - } -} - -impl NetworkTest for StateSyncFullnodePerformance { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ctx.handle.clone().block_on(self.async_run(ctx)) - } -} - /// A state sync performance test that measures fast sync performance. /// In the test, all fullnodes are wiped, restarted and timed to synchronize. pub struct StateSyncFullnodeFastSyncPerformance; -impl StateSyncFullnodeFastSyncPerformance { - async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { +impl Test for StateSyncFullnodeFastSyncPerformance { + fn name(&self) -> &'static str { + "StateSyncFullnodeFastSyncPerformance" + } +} + +#[async_trait] +impl NetworkTest for StateSyncFullnodeFastSyncPerformance { + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; @@ -123,24 +126,19 @@ impl StateSyncFullnodeFastSyncPerformance { } } -impl Test for StateSyncFullnodeFastSyncPerformance { - fn name(&self) -> &'static str { - "StateSyncFullnodeFastSyncPerformance" - } -} - -impl NetworkTest for StateSyncFullnodeFastSyncPerformance { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - ctxa.handle.clone().block_on(self.async_run(ctxa)) - } -} - /// A state sync performance test that measures validator sync performance. /// In the test, 2 validators are wiped, restarted and timed to synchronize. pub struct StateSyncValidatorPerformance; -impl StateSyncValidatorPerformance { - async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> Result<()> { +impl Test for StateSyncValidatorPerformance { + fn name(&self) -> &'static str { + "StateSyncValidatorPerformance" + } +} + +#[async_trait] +impl NetworkTest for StateSyncValidatorPerformance { + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) @@ -180,18 +178,6 @@ impl StateSyncValidatorPerformance { } } -impl Test for StateSyncValidatorPerformance { - fn name(&self) -> &'static str { - "StateSyncValidatorPerformance" - } -} - -impl NetworkTest for StateSyncValidatorPerformance { - fn run(&self, ctxa: NetworkContextSynchronizer) -> Result<()> { - ctxa.handle.clone().block_on(self.async_run(ctxa)) - } -} - /// Verifies the setup for the given fullnode test and returns the /// set of fullnodes. fn get_fullnodes_and_check_setup( diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index 77e1287674abb..5efc2d4d10974 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -7,6 +7,7 @@ use aptos_forge::{ NetworkTest, Swarm, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test, }; use aptos_logger::info; +use async_trait::async_trait; /// Represents a test that simulates a network with 3 regions, all in the same cloud. pub struct ThreeRegionSameCloudSimulationTest; @@ -103,8 +104,9 @@ impl NetworkLoadTest for ThreeRegionSameCloudSimulationTest { } } +#[async_trait] impl NetworkTest for ThreeRegionSameCloudSimulationTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> anyhow::Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index e1743f14182e3..15d76a228c68e 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -5,6 +5,7 @@ use crate::NetworkLoadTest; use anyhow::Context; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, NodeExt, Test}; use aptos_sdk::move_types::account_address::AccountAddress; +use async_trait::async_trait; use std::{ ops::DerefMut, time::{Duration, Instant}, @@ -13,8 +14,17 @@ use tokio::runtime::Runtime; pub struct TwinValidatorTest; -impl TwinValidatorTest { - async fn async_run(&self, ctxa: NetworkContextSynchronizer<'_>) -> anyhow::Result<()> { +impl Test for TwinValidatorTest { + fn name(&self) -> &'static str { + "twin validator" + } +} + +impl NetworkLoadTest for TwinValidatorTest {} + +#[async_trait] +impl NetworkTest for TwinValidatorTest { + async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> anyhow::Result<()> { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); @@ -68,20 +78,6 @@ impl TwinValidatorTest { Ok::<(), anyhow::Error>(()) })?; } - ::run(self, ctxa) - } -} - -impl Test for TwinValidatorTest { - fn name(&self) -> &'static str { - "twin validator" - } -} - -impl NetworkLoadTest for TwinValidatorTest {} - -impl NetworkTest for TwinValidatorTest { - fn run(&self, ctxa: NetworkContextSynchronizer) -> anyhow::Result<()> { - ctxa.handle.clone().block_on(self.async_run(ctxa)) + ::run(self, ctxa).await } } diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index 58575052709f5..1d02e6a26a768 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -9,6 +9,7 @@ use aptos_forge::{ EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, }; use aptos_logger::info; +use async_trait::async_trait; use rand::{rngs::OsRng, Rng, SeedableRng}; use std::time::{Duration, Instant}; @@ -77,8 +78,9 @@ impl NetworkLoadTest for TwoTrafficsTest { } } +#[async_trait] impl NetworkTest for TwoTrafficsTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index 04dc2ac4030fb..e6634d27d6d3e 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -12,6 +12,7 @@ use aptos_keygen::KeyGen; use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_types::{account_address::AccountAddress, transaction::authenticator::AuthenticationKey}; +use async_trait::async_trait; use std::time::Duration; use tokio::runtime::Runtime; @@ -173,9 +174,10 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { } } +#[async_trait] impl NetworkTest for ValidatorJoinLeaveTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index 37d9fddc6ef4e..a9e7457a9cca1 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -3,6 +3,7 @@ use crate::NetworkLoadTest; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; +use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; use tokio::{runtime::Runtime, time::Instant}; @@ -60,8 +61,9 @@ impl NetworkLoadTest for ValidatorRebootStressTest { } } +#[async_trait] impl NetworkTest for ValidatorRebootStressTest { - fn run(&self, ctx: NetworkContextSynchronizer) -> Result<()> { - ::run(self, ctx) + async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { + ::run(self, ctx).await } } From ef91fbf7482aacab2eb38cec9c0869ca50bae259 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 7 Jun 2024 11:28:07 -0400 Subject: [PATCH 12/28] NetworkLoadTest -> async_trait --- .../src/consensus_reliability_tests.rs | 5 +- .../testcases/src/dag_onchain_enable_test.rs | 293 +++++++++--------- .../src/fullnode_reboot_stress_test.rs | 25 +- testsuite/testcases/src/lib.rs | 40 +-- .../testcases/src/load_vs_perf_benchmark.rs | 24 +- testsuite/testcases/src/modifiers.rs | 106 +++---- .../src/multi_region_network_test.rs | 11 +- .../testcases/src/network_bandwidth_test.rs | 49 ++- testsuite/testcases/src/network_loss_test.rs | 19 +- .../testcases/src/network_partition_test.rs | 29 +- .../src/public_fullnode_performance.rs | 25 +- .../src/quorum_store_onchain_enable_test.rs | 122 ++++---- .../src/three_region_simulation_test.rs | 11 +- testsuite/testcases/src/two_traffics_test.rs | 17 +- .../src/validator_join_leave_test.rs | 129 ++++---- .../src/validator_reboot_stress_test.rs | 27 +- 16 files changed, 454 insertions(+), 478 deletions(-) diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index f445af13f68af..9397ce353609e 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -34,8 +34,9 @@ impl Test for ChangingWorkingQuorumTest { } } +#[async_trait] impl NetworkLoadTest for ChangingWorkingQuorumTest { - fn setup(&self, ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // because we are doing failure testing, we should be sending // traffic to nodes that are alive. if ctx.swarm().full_nodes().count() > 0 { @@ -53,7 +54,7 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { } } - fn test( + async fn test( &self, swarm: &mut dyn Swarm, _report: &mut TestReport, diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 0adc712baa830..6a4ad2bc16069 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -15,7 +15,6 @@ use aptos_types::{ }; use async_trait::async_trait; use std::time::Duration; -use tokio::runtime::Runtime; const MAX_NODE_LAG_SECS: u64 = 360; @@ -27,184 +26,180 @@ impl Test for DagOnChainEnableTest { } } +#[async_trait] impl NetworkLoadTest for DagOnChainEnableTest { - fn test( + async fn test( &self, swarm: &mut dyn aptos_forge::Swarm, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); let rest_client = swarm.validators().next().unwrap().rest_client(); - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, + let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; + + tokio::time::sleep(duration / 3).await; + + let root_cli_index = { + let root_account = swarm.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .await - }); - - std::thread::sleep(duration / 3); - - runtime.block_on(async { + }; - let root_cli_index = { - let root_account = swarm.chain_info().root_account(); - cli.add_account_with_address_to_cli( - root_account.private_key().clone(), - root_account.address(), + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", ) - }; - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), - ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change to V2 - let new_consensus_config = OnChainConsensusConfig::V3 { - alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), - vtxn: ValidatorTxnConfig::default_disabled(), - }; - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change to V2 + let new_consensus_config = OnChainConsensusConfig::V3 { + alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), + vtxn: ValidatorTxnConfig::default_disabled(), + }; + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; - std::thread::sleep(duration / 3); + tokio::time::sleep(duration / 3).await; - let initial_consensus_config = runtime.block_on(async { + let root_cli_index = { + let root_account = swarm.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; - let root_cli_index = { - let root_account = swarm.chain_info().root_account(); - cli.add_account_with_address_to_cli( - root_account.private_key().clone(), - root_account.address(), + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", ) - }; - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), - ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change to DAG - let new_consensus_config = OnChainConsensusConfig::V3 { - alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), - vtxn: ValidatorTxnConfig::default_disabled(), - }; - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change to DAG + let new_consensus_config = OnChainConsensusConfig::V3 { + alg: ConsensusAlgorithmConfig::DAG(DagConsensusConfigV1::default()), + vtxn: ValidatorTxnConfig::default_disabled(), + }; + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; - Ok(current_consensus_config) - })?; + let initial_consensus_config = current_consensus_config; - std::thread::sleep(duration / 3); + tokio::time::sleep(duration / 3).await; - runtime.block_on(async { + let root_cli_index = { + let root_account = swarm.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), + ) + }; - let root_cli_index = { - let root_account = swarm.chain_info().root_account(); - cli.add_account_with_address_to_cli( - root_account.private_key().clone(), - root_account.address(), + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", ) - }; - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), - ) - .unwrap(); - - assert!(matches!(current_consensus_config, OnChainConsensusConfig::V3 { .. })); - - // Change back to initial - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + assert!(matches!( + current_consensus_config, + OnChainConsensusConfig::V3 { .. } + )); + + // Change back to initial + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&initial_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&initial_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index 7ba712e8d7f62..3abcc881b03ba 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -8,7 +8,7 @@ use aptos_forge::{ use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; -use tokio::{runtime::Runtime, time::Instant}; +use tokio::time::Instant; pub struct FullNodeRebootStressTest; @@ -18,31 +18,32 @@ impl Test for FullNodeRebootStressTest { } } +#[async_trait] impl NetworkLoadTest for FullNodeRebootStressTest { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::AllFullnodes) } - fn test( + async fn test( &self, swarm: &mut dyn Swarm, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let runtime = Runtime::new().unwrap(); let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); - let mut rng = thread_rng(); - while start.elapsed() < duration { - let fullnode_to_reboot = swarm - .full_node_mut(*all_fullnodes.choose(&mut rng).unwrap()) - .unwrap(); - runtime.block_on(async { fullnode_to_reboot.stop().await })?; - runtime.block_on(async { fullnode_to_reboot.start().await })?; - std::thread::sleep(Duration::from_secs(10)); + let fullnode_to_reboot = { + let mut rng = thread_rng(); + swarm + .full_node_mut(*all_fullnodes.choose(&mut rng).unwrap()) + .unwrap() + }; + fullnode_to_reboot.stop().await?; + fullnode_to_reboot.start().await?; + tokio::time::sleep(Duration::from_secs(10)).await; } Ok(()) diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 5a004ee535c76..1c5bccbd70e27 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -205,25 +205,26 @@ impl LoadDestination { } } +#[async_trait] pub trait NetworkLoadTest: Test { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::FullnodesOtherwiseValidators) } // Load is started before this function is called, and stops after this function returns. // Expected duration is passed into this function, expecting this function to take that much // time to finish. How long this function takes will dictate how long the actual test lasts. - fn test( + async fn test( &self, _swarm: &mut dyn Swarm, _report: &mut TestReport, duration: Duration, ) -> Result<()> { - std::thread::sleep(duration); + tokio::time::sleep(duration).await; Ok(()) } - fn finish(&self, _ctx: &mut NetworkContext) -> Result<()> { + async fn finish<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result<()> { Ok(()) } } @@ -244,14 +245,16 @@ impl NetworkTest for dyn NetworkLoadTest { let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let duration = ctx.global_duration; - let stats_by_phase = self.network_load_test( - ctx, - emit_job_request, - duration, - WARMUP_DURATION_FRACTION, - COOLDOWN_DURATION_FRACTION, - rng, - )?; + let stats_by_phase = self + .network_load_test( + ctx, + emit_job_request, + duration, + WARMUP_DURATION_FRACTION, + COOLDOWN_DURATION_FRACTION, + rng, + ) + .await?; let phased = stats_by_phase.len() > 1; for (phase, phase_stats) in stats_by_phase.iter().enumerate() { @@ -290,7 +293,7 @@ impl NetworkTest for dyn NetworkLoadTest { .block_on(ctx.swarm().get_client_with_newest_ledger_version()) .context("no clients replied for end version")?; - self.finish(ctx).context("finish NetworkLoadTest ")?; + self.finish(ctx).await.context("finish NetworkLoadTest ")?; for phase_stats in stats_by_phase.into_iter() { ctx.check_for_success( @@ -310,16 +313,16 @@ impl NetworkTest for dyn NetworkLoadTest { } impl dyn NetworkLoadTest + '_ { - pub fn network_load_test( + pub async fn network_load_test<'a>( &self, - ctx: &mut NetworkContext, + ctx: &mut NetworkContext<'a>, emit_job_request: EmitJobRequest, duration: Duration, warmup_duration_fraction: f32, cooldown_duration_fraction: f32, rng: StdRng, ) -> Result> { - let destination = self.setup(ctx).context("setup NetworkLoadTest")?; + let destination = self.setup(ctx).await.context("setup NetworkLoadTest")?; let nodes_to_send_load_to = destination.get_destination_nodes(ctx.swarm()); // Generate some traffic @@ -376,6 +379,7 @@ impl dyn NetworkLoadTest + '_ { let join_stats = rt.spawn(job.periodic_stat_forward(phase_duration, 60)); self.test(ctx.swarm, ctx.report, phase_duration) + .await .context("test NetworkLoadTest")?; job = rt.block_on(join_stats).context("join stats")?; phase_timing.push(phase_start.elapsed()); @@ -565,7 +569,7 @@ impl NetworkTest for CompositeNetworkTest { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); for wrapper in &self.wrappers { - wrapper.setup(ctx)?; + wrapper.setup(ctx).await?; } } self.test.run(ctxa.clone()).await?; @@ -573,7 +577,7 @@ impl NetworkTest for CompositeNetworkTest { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); for wrapper in &self.wrappers { - wrapper.finish(ctx)?; + wrapper.finish(ctx).await?; } } Ok(()) diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 8384170910ed9..72c60ff0780c5 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -182,7 +182,7 @@ impl Test for LoadVsPerfBenchmark { } impl LoadVsPerfBenchmark { - fn evaluate_single( + async fn evaluate_single( &self, ctx: &mut NetworkContext<'_>, workloads: &Workloads, @@ -191,14 +191,17 @@ impl LoadVsPerfBenchmark { ) -> Result> { let rng = SeedableRng::from_rng(ctx.core().rng())?; let emit_job_request = workloads.configure(index, ctx.emit_job.clone()); - let stats_by_phase = self.test.network_load_test( - ctx, - emit_job_request, - duration, - PER_TEST_WARMUP_DURATION_FRACTION, - PER_TEST_COOLDOWN_DURATION_FRACTION, - rng, - )?; + let stats_by_phase = self + .test + .network_load_test( + ctx, + emit_job_request, + duration, + PER_TEST_WARMUP_DURATION_FRACTION, + PER_TEST_COOLDOWN_DURATION_FRACTION, + rng, + ) + .await?; let mut result = vec![]; for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { @@ -275,7 +278,8 @@ impl NetworkTest for LoadVsPerfBenchmark { phase_duration .checked_mul(self.workloads.num_phases(index) as u32) .unwrap(), - )?, + ) + .await?, ); if let Some(job) = continous_job.as_mut() { diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index c33587be6b21f..6eac71da34b5d 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -92,13 +92,14 @@ pub struct ExecutionDelayTest { pub add_execution_delay: ExecutionDelayConfig, } +#[async_trait] impl NetworkLoadTest for ExecutionDelayTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { add_execution_delay(ctx.swarm(), &self.add_execution_delay)?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { remove_execution_delay(ctx.swarm()) } } @@ -125,16 +126,16 @@ pub struct NetworkUnreliabilityTest { pub config: NetworkUnreliabilityConfig, } +#[async_trait] impl NetworkLoadTest for NetworkUnreliabilityTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { let swarm = ctx.swarm(); - let runtime = Runtime::new().unwrap(); let validators = swarm.get_validator_clients_with_names(); - runtime.block_on(async { - let mut rng = rand::thread_rng(); - for (name, validator) in validators { - let drop_percentage = if rng.gen_bool(self.config.inject_unreliability_fraction) { + for (name, validator) in validators { + let drop_percentage = { + let mut rng = rand::thread_rng(); + if rng.gen_bool(self.config.inject_unreliability_fraction) { rng.gen_range( 1_u32, (self.config.inject_max_unreliability_percentage * 1000.0) as u32, @@ -142,50 +143,46 @@ impl NetworkLoadTest for NetworkUnreliabilityTest { / 1000.0 } else { 0.0 - }; - info!( - "Validator {} dropping {}% of messages", - name, drop_percentage - ); - validator - .set_failpoint( - "consensus::send::any".to_string(), - format!("{}%return", drop_percentage), + } + }; + info!( + "Validator {} dropping {}% of messages", + name, drop_percentage + ); + validator + .set_failpoint( + "consensus::send::any".to_string(), + format!("{}%return", drop_percentage), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to add unreliability on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to add unreliability on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::<(), anyhow::Error>(()) - })?; + })?; + } Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { let validators = ctx.swarm().get_validator_clients_with_names(); - runtime.block_on(async { - for (name, validator) in validators { - validator - .set_failpoint("consensus::send::any".to_string(), "off".to_string()) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to remove unreliability on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + for (name, validator) in validators { + validator + .set_failpoint("consensus::send::any".to_string(), "off".to_string()) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to remove unreliability on {} failed, {:?}", + name, + e + ) + })?; + } + Ok(()) } } @@ -283,25 +280,24 @@ pub fn create_swarm_cpu_stress( SwarmCpuStress { group_cpu_stresses } } +#[async_trait] impl NetworkLoadTest for CpuChaosTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); - ctx.runtime.block_on( - ctx.swarm - .inject_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)), - )?; + ctx.swarm + .inject_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) + .await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); - ctx.runtime.block_on( - ctx.swarm - .remove_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)), - ) + ctx.swarm + .remove_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) + .await } } diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 0a881cbb7d20b..3969a72be6877 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -313,19 +313,18 @@ pub fn create_multi_region_swarm_network_chaos( } } +#[async_trait] impl NetworkLoadTest for MultiRegionNetworkEmulationTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { let chaos = self.create_netem_chaos(ctx.swarm); - let handle = ctx.handle(); - handle.block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)))?; + ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { let chaos = self.create_netem_chaos(ctx.swarm); - let handle = ctx.handle(); - handle.block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)))?; + ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)).await?; Ok(()) } } diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index 459aa721d230e..d8514c630333e 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -25,20 +25,19 @@ impl Test for NetworkBandwidthTest { } } +#[async_trait] impl NetworkLoadTest for NetworkBandwidthTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on( - ctx.swarm - .inject_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { - group_network_bandwidths: vec![GroupNetworkBandwidth { - name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), - rate: RATE_MBPS, - limit: LIMIT_BYTES, - buffer: BUFFER_BYTES, - }], - })), - )?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .inject_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { + group_network_bandwidths: vec![GroupNetworkBandwidth { + name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), + rate: RATE_MBPS, + limit: LIMIT_BYTES, + buffer: BUFFER_BYTES, + }], + })) + .await?; let msg = format!( "Limited bandwidth to {}mbps with limit {} and buffer {} to namespace", @@ -50,19 +49,17 @@ impl NetworkLoadTest for NetworkBandwidthTest { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on( - ctx.swarm - .remove_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { - group_network_bandwidths: vec![GroupNetworkBandwidth { - name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), - rate: RATE_MBPS, - limit: LIMIT_BYTES, - buffer: BUFFER_BYTES, - }], - })), - )?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .remove_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { + group_network_bandwidths: vec![GroupNetworkBandwidth { + name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), + rate: RATE_MBPS, + limit: LIMIT_BYTES, + buffer: BUFFER_BYTES, + }], + })) + .await?; Ok(()) } } diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index ac90b9d44c68e..c5e93a3e2d87b 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -20,13 +20,15 @@ impl Test for NetworkLossTest { } } +#[async_trait] impl NetworkLoadTest for NetworkLossTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::Loss(SwarmNetworkLoss { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .inject_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, - })))?; + })) + .await?; let msg = format!( "Injected {}% loss with {}% correlation loss to namespace", @@ -37,12 +39,13 @@ impl NetworkLoadTest for NetworkLossTest { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::Loss(SwarmNetworkLoss { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .remove_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, - })))?; + })) + .await?; Ok(()) } } diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index f9281564d4170..00efe7a4e38e7 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -20,15 +20,14 @@ impl Test for NetworkPartitionTest { } } +#[async_trait] impl NetworkLoadTest for NetworkPartitionTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { - ctx.runtime - .block_on( - ctx.swarm - .inject_chaos(SwarmChaos::Partition(SwarmNetworkPartition { - partition_percentage: PARTITION_PERCENTAGE, - })), - )?; + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { + ctx.swarm + .inject_chaos(SwarmChaos::Partition(SwarmNetworkPartition { + partition_percentage: PARTITION_PERCENTAGE, + })) + .await?; let msg = format!( "Partitioned {}% validators in namespace", @@ -45,14 +44,12 @@ impl NetworkLoadTest for NetworkPartitionTest { .unwrap()])) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime - .block_on( - ctx.swarm - .remove_chaos(SwarmChaos::Partition(SwarmNetworkPartition { - partition_percentage: PARTITION_PERCENTAGE, - })), - )?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm + .remove_chaos(SwarmChaos::Partition(SwarmNetworkPartition { + partition_percentage: PARTITION_PERCENTAGE, + })) + .await?; Ok(()) } } diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index 32c08d9682fea..afa3148492b43 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -132,10 +132,11 @@ impl NetworkTest for PFNPerformance { } } +#[async_trait] impl NetworkLoadTest for PFNPerformance { /// We must override the setup function to: (i) create PFNs in /// the swarm; and (ii) use those PFNs as the load destination. - fn setup(&self, ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // Add the PFNs to the swarm let pfn_peer_ids = create_and_add_pfns(ctx, self.num_pfns, self.config_override_fn.clone())?; @@ -143,34 +144,38 @@ impl NetworkLoadTest for PFNPerformance { // Add CPU chaos to the swarm if self.add_cpu_chaos { let cpu_chaos = self.create_cpu_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::CpuStress(cpu_chaos)))?; + ctx.swarm + .inject_chaos(SwarmChaos::CpuStress(cpu_chaos)) + .await?; } // Add network emulation to the swarm if self.add_network_emulation { let network_chaos = self.create_network_emulation_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.inject_chaos(SwarmChaos::NetEm(network_chaos)))?; + ctx.swarm + .inject_chaos(SwarmChaos::NetEm(network_chaos)) + .await?; } // Use the PFNs as the load destination Ok(LoadDestination::Peers(pfn_peer_ids)) } - fn finish(&self, ctx: &mut NetworkContext) -> Result<()> { + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result<()> { // Remove CPU chaos from the swarm if self.add_cpu_chaos { let cpu_chaos = self.create_cpu_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::CpuStress(cpu_chaos)))?; + ctx.swarm + .remove_chaos(SwarmChaos::CpuStress(cpu_chaos)) + .await?; } // Remove network emulation from the swarm if self.add_network_emulation { let network_chaos = self.create_network_emulation_chaos(ctx.swarm); - ctx.runtime - .block_on(ctx.swarm.remove_chaos(SwarmChaos::NetEm(network_chaos)))?; + ctx.swarm + .remove_chaos(SwarmChaos::NetEm(network_chaos)) + .await?; } Ok(()) diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index f8974efc47c16..6dfa25a1ffe04 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -13,7 +13,6 @@ use aptos_types::{ }; use async_trait::async_trait; use std::time::Duration; -use tokio::runtime::Runtime; const MAX_NODE_LAG_SECS: u64 = 360; @@ -25,88 +24,81 @@ impl Test for QuorumStoreOnChainEnableTest { } } +#[async_trait] impl NetworkLoadTest for QuorumStoreOnChainEnableTest { - fn test( + async fn test( &self, swarm: &mut dyn aptos_forge::Swarm, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); let rest_client = swarm.validators().next().unwrap().rest_client(); - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, + let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; + + tokio::time::sleep(duration / 2).await; + + let root_cli_index = { + let root_account = swarm.chain_info().root_account(); + cli.add_account_with_address_to_cli( + root_account.private_key().clone(), + root_account.address(), ) - .await - }); - - std::thread::sleep(duration / 2); - - runtime.block_on(async { + }; - let root_cli_index = { - let root_account = swarm.chain_info().root_account(); - cli.add_account_with_address_to_cli( - root_account.private_key().clone(), - root_account.address(), + let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( + &rest_client + .get_account_resource_bcs::>( + CORE_CODE_ADDRESS, + "0x1::consensus_config::ConsensusConfig", ) - }; - - let current_consensus_config: OnChainConsensusConfig = bcs::from_bytes( - &rest_client - .get_account_resource_bcs::>( - CORE_CODE_ADDRESS, - "0x1::consensus_config::ConsensusConfig", - ) - .await - .unwrap() - .into_inner(), - ) - .unwrap(); - - let inner = match current_consensus_config { - OnChainConsensusConfig::V1(inner) => inner, - OnChainConsensusConfig::V2(_) => panic!("Unexpected V2 config"), - _ => unimplemented!() - }; - - // Change to V2 - let new_consensus_config = OnChainConsensusConfig::V2(ConsensusConfigV1 { ..inner }); - - let update_consensus_config_script = format!( - r#" - script {{ - use aptos_framework::aptos_governance; - use aptos_framework::consensus_config; - fun main(core_resources: &signer) {{ - let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); - let config_bytes = {}; - consensus_config::set(&framework_signer, config_bytes); - }} + .await + .unwrap() + .into_inner(), + ) + .unwrap(); + + let inner = match current_consensus_config { + OnChainConsensusConfig::V1(inner) => inner, + OnChainConsensusConfig::V2(_) => panic!("Unexpected V2 config"), + _ => unimplemented!(), + }; + + // Change to V2 + let new_consensus_config = OnChainConsensusConfig::V2(ConsensusConfigV1 { ..inner }); + + let update_consensus_config_script = format!( + r#" + script {{ + use aptos_framework::aptos_governance; + use aptos_framework::consensus_config; + fun main(core_resources: &signer) {{ + let framework_signer = aptos_governance::get_signer_testnet_only(core_resources, @0000000000000000000000000000000000000000000000000000000000000001); + let config_bytes = {}; + consensus_config::set(&framework_signer, config_bytes); }} - "#, - generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) - ); + }} + "#, + generate_onchain_config_blob(&bcs::to_bytes(&new_consensus_config).unwrap()) + ); - cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) - .await - })?; + cli.run_script_with_default_framework(root_cli_index, &update_consensus_config_script) + .await?; - std::thread::sleep(duration / 2); + tokio::time::sleep(duration / 2).await; // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index 5efc2d4d10974..f4718d383d6d7 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -83,23 +83,24 @@ fn create_bandwidth_limit() -> SwarmNetworkBandwidth { } } +#[async_trait] impl NetworkLoadTest for ThreeRegionSameCloudSimulationTest { - fn setup(&self, ctx: &mut NetworkContext) -> anyhow::Result { + async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { // inject network delay let delay = create_three_region_swarm_network_delay(ctx.swarm()); let chaos = SwarmChaos::Delay(delay); - ctx.runtime.block_on(ctx.swarm.inject_chaos(chaos))?; + ctx.swarm.inject_chaos(chaos).await?; // inject bandwidth limit let bandwidth = create_bandwidth_limit(); let chaos = SwarmChaos::Bandwidth(bandwidth); - ctx.runtime.block_on(ctx.swarm.inject_chaos(chaos))?; + ctx.swarm.inject_chaos(chaos).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn finish(&self, ctx: &mut NetworkContext) -> anyhow::Result<()> { - ctx.runtime.block_on(ctx.swarm.remove_all_chaos())?; + async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { + ctx.swarm.remove_all_chaos().await?; Ok(()) } } diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index 1d02e6a26a768..6dc5febbdbc5d 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -1,9 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::{ - create_emitter_and_request, traffic_emitter_runtime, LoadDestination, NetworkLoadTest, -}; +use crate::{create_emitter_and_request, LoadDestination, NetworkLoadTest}; use aptos_forge::{ success_criteria::{SuccessCriteria, SuccessCriteriaChecker}, EmitJobRequest, NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport, @@ -24,8 +22,9 @@ impl Test for TwoTrafficsTest { } } +#[async_trait] impl NetworkLoadTest for TwoTrafficsTest { - fn test( + async fn test( &self, swarm: &mut dyn Swarm, report: &mut TestReport, @@ -46,15 +45,11 @@ impl NetworkLoadTest for TwoTrafficsTest { rng, )?; - let rt = traffic_emitter_runtime()?; - let test_start = Instant::now(); - let stats = rt.block_on(emitter.emit_txn_for( - swarm.chain_info().root_account, - emit_job_request, - duration, - ))?; + let stats = emitter + .emit_txn_for(swarm.chain_info().root_account, emit_job_request, duration) + .await?; let actual_test_duration = test_start.elapsed(); info!( diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index e6634d27d6d3e..f1b9bc5030b2f 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -14,7 +14,6 @@ use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_types::{account_address::AccountAddress, transaction::authenticator::AuthenticationKey}; use async_trait::async_trait; use std::time::Duration; -use tokio::runtime::Runtime; const MAX_NODE_LAG_SECS: u64 = 360; @@ -26,12 +25,13 @@ impl Test for ValidatorJoinLeaveTest { } } +#[async_trait] impl NetworkLoadTest for ValidatorJoinLeaveTest { - fn setup(&self, _ctx: &mut NetworkContext) -> Result { + async fn setup<'a>(&self, _ctx: &mut NetworkContext<'a>) -> Result { Ok(LoadDestination::FullnodesOtherwiseValidators) } - fn test( + async fn test( &self, swarm: &mut dyn Swarm, _report: &mut TestReport, @@ -51,16 +51,14 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { // Connect the operator tool to the node's JSON RPC API let rest_client = swarm.validators().next().unwrap().rest_client(); let transaction_factory = swarm.chain_info().transaction_factory(); - let runtime = Runtime::new().unwrap(); - let mut cli = runtime.block_on(async { - CliTestFramework::new( - swarm.validators().next().unwrap().rest_api_endpoint(), - faucet_endpoint, - /*num_cli_accounts=*/ 0, - ) - .await - }); + let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let mut cli = CliTestFramework::new( + rest_api_endpoint, + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await; let mut public_info = swarm.chain_info().into_aptos_public_info(); @@ -79,30 +77,25 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { let mut keygen = KeyGen::from_seed(seed_slice); - let (validator_cli_index, _keys, account_balance) = runtime.block_on(async { - let (validator_cli_index, keys) = - init_validator_account(&mut cli, &mut keygen).await; - - let auth_key = AuthenticationKey::ed25519(&keys.account_private_key.public_key()); - let validator_account_address = AccountAddress::new(*auth_key.account_address()); + let (validator_cli_index, keys) = init_validator_account(&mut cli, &mut keygen).await; - public_info - .mint(validator_account_address, DEFAULT_FUNDED_COINS) - .await - .unwrap(); + let auth_key = AuthenticationKey::ed25519(&keys.account_private_key.public_key()); + let validator_account_address = AccountAddress::new(*auth_key.account_address()); - let account_balance = public_info - .get_balance(validator_account_address) - .await - .unwrap(); + public_info + .mint(validator_account_address, DEFAULT_FUNDED_COINS) + .await + .unwrap(); - (validator_cli_index, keys, account_balance) - }); + let account_balance = public_info + .get_balance(validator_account_address) + .await + .unwrap(); assert_eq!(account_balance, DEFAULT_FUNDED_COINS); validator_cli_indices.push(validator_cli_index); assert_eq!( - runtime.block_on(get_validator_state(&cli, validator_cli_index)), + get_validator_state(&cli, validator_cli_index).await, ValidatorState::ACTIVE ); } @@ -116,59 +109,51 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; // Wait for 1/3 of the test duration. - std::thread::sleep(duration / 3); - - runtime.block_on(async { - // 1/3 validators leave the validator set. - info!("Make the last 1/3 validators leave the validator set!"); - for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { - cli.leave_validator_set(*operator_index, None) - .await - .unwrap(); - - let root_account = swarm.chain_info().root_account(); - reconfig(&rest_client, &transaction_factory, root_account).await; - } - - { - let root_account = swarm.chain_info().root_account(); - reconfig(&rest_client, &transaction_factory, root_account).await; - } - }); + tokio::time::sleep(duration / 3).await; + + // 1/3 validators leave the validator set. + info!("Make the last 1/3 validators leave the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.leave_validator_set(*operator_index, None) + .await + .unwrap(); + + let root_account = swarm.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } + + { + let root_account = swarm.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } // Wait for 1/3 of the test duration. - std::thread::sleep(duration / 3); + tokio::time::sleep(duration / 3).await; - runtime.block_on(async { - // Rejoining validator set. - info!("Make the last 1/3 validators rejoin the validator set!"); - for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { - cli.join_validator_set(*operator_index, None).await.unwrap(); + // Rejoining validator set. + info!("Make the last 1/3 validators rejoin the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.join_validator_set(*operator_index, None).await.unwrap(); - let root_account = swarm.chain_info().root_account(); - reconfig(&rest_client, &transaction_factory, root_account).await; - } + let root_account = swarm.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } - { - let root_account = swarm.chain_info().root_account(); - reconfig(&rest_client, &transaction_factory, root_account).await; - } - }); + { + let root_account = swarm.chain_info().root_account(); + reconfig(&rest_client, &transaction_factory, root_account).await; + } // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - runtime.block_on(async { - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index a9e7457a9cca1..07334f593730d 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -6,7 +6,7 @@ use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; use std::time::Duration; -use tokio::{runtime::Runtime, time::Instant}; +use tokio::time::Instant; pub struct ValidatorRebootStressTest { pub num_simultaneously: usize, @@ -20,40 +20,41 @@ impl Test for ValidatorRebootStressTest { } } +#[async_trait] impl NetworkLoadTest for ValidatorRebootStressTest { - fn test( + async fn test( &self, swarm: &mut dyn Swarm, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let runtime = Runtime::new().unwrap(); let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - let mut rng = thread_rng(); - while start.elapsed() < duration { - let addresses: Vec<_> = all_validators - .choose_multiple(&mut rng, self.num_simultaneously) - .cloned() - .collect(); + let addresses: Vec<_> = { + let mut rng = thread_rng(); + all_validators + .choose_multiple(&mut rng, self.num_simultaneously) + .cloned() + .collect() + }; for adr in &addresses { let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); - runtime.block_on(async { validator_to_reboot.stop().await })?; + validator_to_reboot.stop().await?; } if self.down_time_secs > 0.0 { - std::thread::sleep(Duration::from_secs_f32(self.down_time_secs)); + tokio::time::sleep(Duration::from_secs_f32(self.down_time_secs)).await; } for adr in &addresses { let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); - runtime.block_on(async { validator_to_reboot.start().await })?; + validator_to_reboot.start().await?; } if self.pause_secs > 0.0 { - std::thread::sleep(Duration::from_secs_f32(self.pause_secs)); + tokio::time::sleep(Duration::from_secs_f32(self.pause_secs)).await; } } From d4d7a91833984f79c86b6a87b88773a6ef9c6606 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 7 Jun 2024 12:40:58 -0400 Subject: [PATCH 13/28] one less block_on --- testsuite/testcases/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 1c5bccbd70e27..a927939aad555 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -234,13 +234,14 @@ impl NetworkTest for dyn NetworkLoadTest { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let runtime = Runtime::new().unwrap(); let start_timestamp = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("Time went backwards") .as_secs(); - let (start_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + let (start_version, _) = ctx + .swarm() + .get_client_with_newest_ledger_version() + .await .context("no clients replied for start version")?; let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; @@ -289,8 +290,10 @@ impl NetworkTest for dyn NetworkLoadTest { .duration_since(UNIX_EPOCH) .expect("Time went backwards") .as_secs(); - let (end_version, _) = runtime - .block_on(ctx.swarm().get_client_with_newest_ledger_version()) + let (end_version, _) = ctx + .swarm() + .get_client_with_newest_ledger_version() + .await .context("no clients replied for end version")?; self.finish(ctx).await.context("finish NetworkLoadTest ")?; From 2876d2650ca04856186acb074d3fac4ab48d29ed Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 7 Jun 2024 13:28:26 -0400 Subject: [PATCH 14/28] less block_on --- testsuite/testcases/src/lib.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index a927939aad555..da13a8463a63c 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -43,7 +43,7 @@ use std::{ ops::DerefMut, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use tokio::runtime::Runtime; +use tokio::runtime::{Handle, Runtime}; const WARMUP_DURATION_FRACTION: f32 = 0.07; const COOLDOWN_DURATION_FRACTION: f32 = 0.04; @@ -334,7 +334,6 @@ impl dyn NetworkLoadTest + '_ { create_emitter_and_request(ctx.swarm(), emit_job_request, &nodes_to_send_load_to, rng) .context("create emitter")?; - let rt = traffic_emitter_runtime()?; let clients = ctx .swarm() .get_clients_for_peers(&nodes_to_send_load_to, Duration::from_secs(10)); @@ -346,12 +345,12 @@ impl dyn NetworkLoadTest + '_ { } info!("Starting emitting txns for {}s", duration.as_secs()); - let mut job = rt - .block_on(emitter.start_job( + let mut job = emitter.start_job( ctx.swarm().chain_info().root_account, emit_job_request, stats_tracking_phases, - )) + ) + .await .context("start emitter job")?; let total_start = PhaseTimingStart::now(); @@ -361,14 +360,14 @@ impl dyn NetworkLoadTest + '_ { let test_duration = duration - warmup_duration - cooldown_duration; let phase_duration = test_duration.div_f32((stats_tracking_phases - 2) as f32); - job = rt.block_on(job.periodic_stat_forward(warmup_duration, 60)); + job = job.periodic_stat_forward(warmup_duration, 60).await; info!("{}s warmup finished", warmup_duration.as_secs()); let mut phase_timing = Vec::new(); let mut phase_start_network_state = Vec::new(); let test_start = Instant::now(); for i in 0..stats_tracking_phases - 2 { - phase_start_network_state.push(rt.block_on(NetworkState::new(&clients))); + phase_start_network_state.push(NetworkState::new(&clients).await); job.start_next_phase(); if i > 0 { @@ -380,11 +379,11 @@ impl dyn NetworkLoadTest + '_ { } let phase_start = PhaseTimingStart::now(); - let join_stats = rt.spawn(job.periodic_stat_forward(phase_duration, 60)); + let join_stats = Handle::current().spawn(job.periodic_stat_forward(phase_duration, 60)); self.test(ctx.swarm, ctx.report, phase_duration) .await .context("test NetworkLoadTest")?; - job = rt.block_on(join_stats).context("join stats")?; + job = join_stats.await.context("join stats")?; phase_timing.push(phase_start.elapsed()); } let actual_test_duration = test_start.elapsed(); @@ -394,13 +393,13 @@ impl dyn NetworkLoadTest + '_ { actual_test_duration.as_secs() ); - phase_start_network_state.push(rt.block_on(NetworkState::new(&clients))); + phase_start_network_state.push(NetworkState::new(&clients).await); job.start_next_phase(); let cooldown_start = Instant::now(); let cooldown_used = cooldown_start.elapsed(); if cooldown_used < cooldown_duration { - job = rt.block_on(job.periodic_stat_forward(cooldown_duration - cooldown_used, 60)); + job = job.periodic_stat_forward(cooldown_duration - cooldown_used, 60).await; } info!("{}s cooldown finished", cooldown_duration.as_secs()); @@ -411,7 +410,7 @@ impl dyn NetworkLoadTest + '_ { total_timing.start_unixtime_s, total_timing.end_unixtime_s, ); - let stats_by_phase = rt.block_on(job.stop_job()); + let stats_by_phase = job.stop_job().await; info!("Stopped job"); info!("Warmup stats: {}", stats_by_phase[0].rate()); @@ -427,11 +426,11 @@ impl dyn NetworkLoadTest + '_ { } else { Some(cur.clone()) }; - let latency_breakdown = rt.block_on(fetch_latency_breakdown( + let latency_breakdown = fetch_latency_breakdown( ctx.swarm(), phase_timing[i].start_unixtime_s, phase_timing[i].end_unixtime_s, - ))?; + ).await?; info!( "Latency breakdown details for phase {}: from {} to {}: {:?}", i, From 2fd0b985b4ad0f97f3ab8b15ea27a0872dcb9ba5 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 7 Jun 2024 14:25:30 -0400 Subject: [PATCH 15/28] async contagion spreads --- testsuite/forge/src/interface/network.rs | 28 ++++++++++++------------ testsuite/testcases/src/lib.rs | 11 +++++++--- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 7b77832b4920e..b2ce759931d46 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -91,7 +91,7 @@ impl<'t> NetworkContext<'t> { &mut self.core } - pub fn check_for_success( + pub async fn check_for_success( &mut self, stats: &TxnStats, window: Duration, @@ -101,19 +101,19 @@ impl<'t> NetworkContext<'t> { start_version: u64, end_version: u64, ) -> Result<()> { - self.runtime - .block_on(SuccessCriteriaChecker::check_for_success( - &self.success_criteria, - self.swarm, - self.report, - stats, - window, - latency_breakdown, - start_time, - end_time, - start_version, - end_version, - )) + SuccessCriteriaChecker::check_for_success( + &self.success_criteria, + self.swarm, + self.report, + stats, + window, + latency_breakdown, + start_time, + end_time, + start_version, + end_version, + ) + .await } pub fn handle(&self) -> Handle { diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index da13a8463a63c..43dc1d851325a 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -308,6 +308,7 @@ impl NetworkTest for dyn NetworkLoadTest { start_version, end_version, ) + .await .context("check for success")?; } @@ -345,7 +346,8 @@ impl dyn NetworkLoadTest + '_ { } info!("Starting emitting txns for {}s", duration.as_secs()); - let mut job = emitter.start_job( + let mut job = emitter + .start_job( ctx.swarm().chain_info().root_account, emit_job_request, stats_tracking_phases, @@ -399,7 +401,9 @@ impl dyn NetworkLoadTest + '_ { let cooldown_used = cooldown_start.elapsed(); if cooldown_used < cooldown_duration { - job = job.periodic_stat_forward(cooldown_duration - cooldown_used, 60).await; + job = job + .periodic_stat_forward(cooldown_duration - cooldown_used, 60) + .await; } info!("{}s cooldown finished", cooldown_duration.as_secs()); @@ -430,7 +434,8 @@ impl dyn NetworkLoadTest + '_ { ctx.swarm(), phase_timing[i].start_unixtime_s, phase_timing[i].end_unixtime_s, - ).await?; + ) + .await?; info!( "Latency breakdown details for phase {}: from {} to {}: {:?}", i, From ce7b718a0af9867d900a9b7bccb305a34fa5d147 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 11 Jun 2024 10:05:44 -0400 Subject: [PATCH 16/28] fix --- testsuite/smoke-test/src/utils.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/smoke-test/src/utils.rs b/testsuite/smoke-test/src/utils.rs index a720671836eb1..dc64bdb686b1c 100644 --- a/testsuite/smoke-test/src/utils.rs +++ b/testsuite/smoke-test/src/utils.rs @@ -12,7 +12,7 @@ use aptos_sdk::{ use aptos_types::on_chain_config::{OnChainConsensusConfig, OnChainExecutionConfig}; use move_core_types::language_storage::CORE_CODE_ADDRESS; use rand::random; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; pub const MAX_CATCH_UP_WAIT_SECS: u64 = 180; // The max time we'll wait for nodes to catch up pub const MAX_CONNECTIVITY_WAIT_SECS: u64 = 180; // The max time we'll wait for nodes to gain connectivity @@ -117,7 +117,7 @@ pub async fn transfer_coins( pub async fn transfer_and_maybe_reconfig( client: &RestClient, transaction_factory: &TransactionFactory, - root_account: &mut LocalAccount, + root_account: Arc, sender: &mut LocalAccount, receiver: &LocalAccount, num_transfers: usize, @@ -125,7 +125,7 @@ pub async fn transfer_and_maybe_reconfig( for _ in 0..num_transfers { // Reconfigurations have a 20% chance of being executed if random::() % 5 == 0 { - reconfig(client, transaction_factory, root_account).await; + reconfig(client, transaction_factory, root_account.clone()).await; } transfer_coins(client, transaction_factory, sender, receiver, 1).await; From 1eb9a051297a6bcee55a96303cf87e831158b1ae Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 11 Jun 2024 10:17:39 -0400 Subject: [PATCH 17/28] hack compat duration --- testsuite/forge-cli/src/main.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 651b45e8db3c1..3c50c1ac4cc5b 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -279,7 +279,13 @@ fn main() -> Result<()> { logger.build(); let args = Args::parse(); - let duration = Duration::from_secs(args.duration_secs as u64); + let duration = if args.suite == "compat" { + // TODO: if this needs to be more perminent than hacking into this branch, edit + // .github/workflows/docker-build-test.yaml + Duration::from_secs(30 * 60) + } else { + Duration::from_secs(args.duration_secs as u64) + }; let suite_name: &str = args.suite.as_ref(); let runtime = Runtime::new()?; From 8a1878297827e3447085eb630612d066e948be3e Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 11 Jun 2024 12:14:14 -0400 Subject: [PATCH 18/28] logging --- testsuite/testcases/src/compatibility_test.rs | 18 ++++++++---------- testsuite/testcases/src/lib.rs | 5 +++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 8e325fc339c3d..053ceec035a48 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -66,6 +66,7 @@ async fn stat_gather_task( ) -> Result> { let mut upgrade_stats = vec![]; while !done.load(Ordering::Relaxed) { + info!("stat_gather_task some traffic..."); let upgrading_stats = emitter .clone() .emit_txn_for( @@ -74,6 +75,7 @@ async fn stat_gather_task( upgrade_traffic_chunk_duration, ) .await?; + info!("stat_gather_task some stats: {}", &upgrading_stats); upgrade_stats.push(upgrading_stats); } let statsum = upgrade_stats.into_iter().reduce(|a, b| &a + &b); @@ -139,10 +141,10 @@ fn upgrade_and_gather_stats( let emitter_ctx = ctxa.clone(); let mut stats_result: Result> = Ok(None); let mut upgrade_result: Result<()> = Ok(()); - // std::thread::scope(|scopev| { tokio_scoped::scope(|scopev| { // emit trafic and gather stats scopev.spawn(async { + info!("upgrade_and_gather_stats traffic thread start"); let mut ctx_locker = emitter_ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); let emit_job_request = ctx.emit_job.clone(); @@ -156,14 +158,8 @@ fn upgrade_and_gather_stats( }, }; let source_account = ctx.swarm().chain_info().root_account; - // let traffic_runtime = match traffic_emitter_runtime() { - // Ok(x) => x, - // Err(err) => { - // stats_result = Err(err); - // return; - // } - // }; let upgrade_traffic_chunk_duration = Duration::from_secs(15); + info!("upgrade_and_gather_stats traffic thread 1"); stats_result = stat_gather_task( emitter, emit_job_request, @@ -172,11 +168,11 @@ fn upgrade_and_gather_stats( upgrade_done.clone(), ) .await; + info!("upgrade_and_gather_stats traffic thread done"); }); // do upgrade scopev.spawn(async { - // let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); - // upgrade_result = runtime.block_on(batch_update_gradually(ctxa, validators_to_update, version, wait_until_healthy, delay, max_wait)); + info!("upgrade_and_gather_stats upgrade thread start"); upgrade_result = batch_update_gradually( ctxa, validators_to_update, @@ -186,7 +182,9 @@ fn upgrade_and_gather_stats( max_wait, ) .await; + info!("upgrade_and_gather_stats upgrade thread 1"); upgrade_done.store(true, Ordering::Relaxed); + info!("upgrade_and_gather_stats upgrade thread done"); }); }); diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 43dc1d851325a..dd468bb7c49a0 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -80,6 +80,7 @@ async fn batch_update_gradually( ) -> Result<()> { // let mut swarm = ctx.swarm(); for validator in validators_to_update { + info!("batch_update_gradually upgrade start: {}", validator); ctxa.ctx .lock() .await @@ -87,6 +88,7 @@ async fn batch_update_gradually( .upgrade_validator(*validator, version) .await?; if wait_until_healthy { + info!("batch_update_gradually upgrade waiting: {}", validator); let deadline = Instant::now() + max_wait; ctxa.ctx .lock() @@ -96,10 +98,13 @@ async fn batch_update_gradually( .unwrap() .wait_until_healthy(deadline) .await?; + info!("batch_update_gradually upgrade healthy: {}", validator); } if !delay.is_zero() { + info!("batch_update_gradually upgrade delay: {:?}", delay); tokio::time::sleep(delay).await; } + info!("batch_update_gradually upgrade done: {}", validator); } ctxa.ctx.lock().await.swarm().health_check().await?; From 36ba0701aa9c3f9ae2cd5d0f017d9c6d7382c971 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Wed, 12 Jun 2024 15:21:26 -0400 Subject: [PATCH 19/28] making parts of Swarm and Node traits &self from &mut self --- testsuite/forge/src/backend/k8s/fullnode.rs | 3 +- testsuite/forge/src/backend/k8s/node.rs | 15 +++-- testsuite/forge/src/backend/k8s/swarm.rs | 7 +- testsuite/forge/src/backend/local/node.rs | 65 ++++++++++--------- testsuite/forge/src/backend/local/swarm.rs | 8 +-- testsuite/forge/src/interface/node.rs | 10 +-- testsuite/forge/src/interface/swarm.rs | 2 +- testsuite/testcases/src/compatibility_test.rs | 30 +++++---- 8 files changed, 76 insertions(+), 64 deletions(-) diff --git a/testsuite/forge/src/backend/k8s/fullnode.rs b/testsuite/forge/src/backend/k8s/fullnode.rs index f7537e3a492cb..311cb89e347ec 100644 --- a/testsuite/forge/src/backend/k8s/fullnode.rs +++ b/testsuite/forge/src/backend/k8s/fullnode.rs @@ -37,6 +37,7 @@ use std::{ path::PathBuf, sync::Arc, }; +use std::sync::atomic::AtomicU32; use tempfile::TempDir; // these are constants given by the aptos-node helm chart @@ -504,7 +505,7 @@ pub async fn install_public_fullnode<'a>( haproxy_enabled: false, port_forward_enabled: use_port_forward, - rest_api_port: REST_API_SERVICE_PORT, // in the case of port-forward, this port will be changed at runtime + rest_api_port: AtomicU32::new(REST_API_SERVICE_PORT), // in the case of port-forward, this port will be changed at runtime }; Ok((node_peer_id, ret_node)) diff --git a/testsuite/forge/src/backend/k8s/node.rs b/testsuite/forge/src/backend/k8s/node.rs index 582da1ae89165..f52b5a19f76a1 100644 --- a/testsuite/forge/src/backend/k8s/node.rs +++ b/testsuite/forge/src/backend/k8s/node.rs @@ -23,6 +23,7 @@ use std::{ thread, time::{Duration, Instant}, }; +use std::sync::atomic::{AtomicU32, Ordering}; const APTOS_DATA_DIR: &str = "/opt/aptos/data"; @@ -32,7 +33,7 @@ pub struct K8sNode { pub(crate) peer_id: PeerId, pub(crate) index: usize, pub(crate) service_name: String, - pub(crate) rest_api_port: u32, + pub(crate) rest_api_port: AtomicU32, pub version: Version, pub namespace: String, // whether this node has HAProxy in front of it @@ -43,7 +44,7 @@ pub struct K8sNode { impl K8sNode { fn rest_api_port(&self) -> u32 { - self.rest_api_port + self.rest_api_port.load(Ordering::SeqCst) } fn service_name(&self) -> String { @@ -133,19 +134,19 @@ impl Node for K8sNode { self.peer_id } - async fn start(&mut self) -> Result<()> { + async fn start(&self) -> Result<()> { scale_stateful_set_replicas(self.stateful_set_name(), self.namespace(), 1).await?; // need to port-forward again since the node is coming back // note that we will get a new port if self.port_forward_enabled { - self.rest_api_port = get_free_port(); + self.rest_api_port.store(get_free_port(), Ordering::SeqCst); self.port_forward_rest_api()?; } self.wait_until_healthy(Instant::now() + Duration::from_secs(60)) .await } - async fn stop(&mut self) -> Result<()> { + async fn stop(&self) -> Result<()> { info!("going to stop node {}", self.stateful_set_name()); scale_stateful_set_replicas(self.stateful_set_name(), self.namespace(), 0).await } @@ -236,7 +237,7 @@ impl Node for K8sNode { Ok(port as u64) } - async fn health_check(&mut self) -> Result<(), HealthCheckError> { + async fn health_check(&self) -> Result<(), HealthCheckError> { self.rest_client() .get_ledger_information() .await @@ -256,7 +257,7 @@ impl Node for K8sNode { .unwrap() } - async fn get_identity(&mut self) -> Result { + async fn get_identity(&self) -> Result { stateful_set::get_identity(self.stateful_set_name(), self.namespace()).await } diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index 14b73974f13c1..c795a3adb515d 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -42,6 +42,7 @@ use std::{ env, str, sync::Arc, }; +use std::sync::atomic::AtomicU32; // use std::sync::Mutex; use tokio::{runtime::Runtime, time::Duration}; @@ -180,7 +181,7 @@ impl K8sSwarm { self.get_kube_client(), Some(self.kube_namespace.clone()), )); - let (peer_id, mut k8snode) = install_public_fullnode( + let (peer_id, k8snode) = install_public_fullnode( stateful_set_api, configmap_api, persistent_volume_claim_api, @@ -203,7 +204,7 @@ impl K8sSwarm { #[async_trait::async_trait] impl Swarm for K8sSwarm { - async fn health_check(&mut self) -> Result<()> { + async fn health_check(&self) -> Result<()> { let nodes = self.validators.values().collect(); let unhealthy_nodes = nodes_healthcheck(nodes).await.unwrap(); if !unhealthy_nodes.is_empty() { @@ -572,7 +573,7 @@ fn get_k8s_node_from_stateful_set( peer_id: PeerId::random(), index, service_name, - rest_api_port, + rest_api_port: AtomicU32::new(rest_api_port), version: Version::new(0, image_tag), namespace: namespace.to_string(), haproxy_enabled: enable_haproxy, diff --git a/testsuite/forge/src/backend/local/node.rs b/testsuite/forge/src/backend/local/node.rs index a2384d0febd2b..5f57692224e95 100644 --- a/testsuite/forge/src/backend/local/node.rs +++ b/testsuite/forge/src/backend/local/node.rs @@ -50,7 +50,7 @@ impl Drop for Process { #[derive(Debug)] pub struct LocalNode { version: LocalVersion, - process: Option, + process: std::sync::Mutex>, name: String, index: usize, account_private_key: Option>, @@ -81,7 +81,7 @@ impl LocalNode { Ok(Self { version, - process: None, + process: std::sync::Mutex::new(None), name, index, account_private_key, @@ -115,8 +115,9 @@ impl LocalNode { &self.account_private_key } - pub fn start(&mut self) -> Result<()> { - ensure!(self.process.is_none(), "node {} already running", self.name); + pub fn start(&self) -> Result<()> { + let mut process_locker = self.process.lock().unwrap(); + ensure!(process_locker.is_none(), "node {} already running", self.name); // Ensure log file exists let log_file = OpenOptions::new() @@ -172,13 +173,13 @@ impl LocalNode { self.config.storage.backup_service_address.port() ); - self.process = Some(Process(process)); + *process_locker = Some(Process(process)); Ok(()) } - pub fn stop(&mut self) { - self.process = None; + pub fn stop(&self) { + *(self.process.lock().unwrap()) = None; } pub fn port(&self) -> u16 { @@ -207,28 +208,32 @@ impl LocalNode { fs::read_to_string(self.log_path()).map_err(Into::into) } - pub async fn health_check(&mut self) -> Result<(), HealthCheckError> { + pub async fn health_check(&self) -> Result<(), HealthCheckError> { debug!("Health check on node '{}'", self.name); - if let Some(p) = &mut self.process { - match p.0.try_wait() { - // This would mean the child process has crashed - Ok(Some(status)) => { - let error = format!("Node '{}' crashed with: {}", self.name, status); - return Err(HealthCheckError::NotRunning(error)); - }, - - // This is the case where the node is still running - Ok(None) => {}, - - // Some other unknown error - Err(e) => { - return Err(HealthCheckError::Unknown(e.into())); - }, + { + let mut process_locker = self.process.lock().unwrap(); + let process = process_locker.as_mut(); + if let Some(p) = process { + match p.0.try_wait() { + // This would mean the child process has crashed + Ok(Some(status)) => { + let error = format!("Node '{}' crashed with: {}", self.name, status); + return Err(HealthCheckError::NotRunning(error)); + }, + + // This is the case where the node is still running + Ok(None) => {}, + + // Some other unknown error + Err(e) => { + return Err(HealthCheckError::Unknown(e.into())); + }, + } + } else { + let error = format!("Node '{}' is stopped", self.name); + return Err(HealthCheckError::NotRunning(error)); } - } else { - let error = format!("Node '{}' is stopped", self.name); - return Err(HealthCheckError::NotRunning(error)); } self.inspection_client() @@ -281,16 +286,16 @@ impl Node for LocalNode { self.config() } - async fn start(&mut self) -> Result<()> { + async fn start(&self) -> Result<()> { self.start() } - async fn stop(&mut self) -> Result<()> { + async fn stop(&self) -> Result<()> { self.stop(); Ok(()) } - async fn get_identity(&mut self) -> Result { + async fn get_identity(&self) -> Result { todo!() } @@ -355,7 +360,7 @@ impl Node for LocalNode { Ok(()) } - async fn health_check(&mut self) -> Result<(), HealthCheckError> { + async fn health_check(&self) -> Result<(), HealthCheckError> { self.health_check().await } diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 90d007161941c..02b4b24ea1e83 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -368,7 +368,7 @@ impl LocalSwarm { )?; let version = self.versions.get(version).unwrap(); - let mut fullnode = LocalNode::new( + let fullnode = LocalNode::new( version.to_owned(), fullnode_config.name, index, @@ -398,7 +398,7 @@ impl LocalSwarm { )?; let version = self.versions.get(version).unwrap(); - let mut fullnode = LocalNode::new( + let fullnode = LocalNode::new( version.to_owned(), fullnode_config.name, index, @@ -478,7 +478,7 @@ impl Drop for LocalSwarm { #[async_trait::async_trait] impl Swarm for LocalSwarm { - async fn health_check(&mut self) -> Result<()> { + async fn health_check(&self) -> Result<()> { Ok(()) } @@ -579,7 +579,7 @@ impl Swarm for LocalSwarm { } fn remove_full_node(&mut self, id: PeerId) -> Result<()> { - if let Some(mut fullnode) = self.fullnodes.remove(&id) { + if let Some(fullnode) = self.fullnodes.remove(&id) { fullnode.stop(); } diff --git a/testsuite/forge/src/interface/node.rs b/testsuite/forge/src/interface/node.rs index e4dbcf24edfba..c149cd61b1545 100644 --- a/testsuite/forge/src/interface/node.rs +++ b/testsuite/forge/src/interface/node.rs @@ -55,19 +55,19 @@ pub trait Node: Send + Sync { /// Start this Node. /// This should be a noop if the Node is already running. - async fn start(&mut self) -> Result<()>; + async fn start(&self) -> Result<()>; /// Stop this Node. /// This should be a noop if the Node isn't running. - async fn stop(&mut self) -> Result<()>; + async fn stop(&self) -> Result<()>; - async fn get_identity(&mut self) -> Result; + async fn get_identity(&self) -> Result; async fn set_identity(&mut self, k8s_secret_name: String) -> Result<()>; /// Clears this Node's Storage. This stops the node as well async fn clear_storage(&mut self) -> Result<()>; - async fn health_check(&mut self) -> Result<(), HealthCheckError>; + async fn health_check(&self) -> Result<(), HealthCheckError>; fn counter(&self, counter: &str, port: u64) -> Result; @@ -227,7 +227,7 @@ pub trait NodeExt: Node { Ok(self.rest_client().health_check(seconds).await?) } - async fn wait_until_healthy(&mut self, deadline: Instant) -> Result<()> { + async fn wait_until_healthy(&self, deadline: Instant) -> Result<()> { let mut healthcheck_error = HealthCheckError::Unknown(anyhow::anyhow!("No healthcheck performed yet")); while Instant::now() < deadline { diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index 0030ca9a3c767..330ebe5f2be6b 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -23,7 +23,7 @@ use tokio::runtime::Runtime; pub trait Swarm: Sync + Send { /// Performs a health check on the entire swarm, ensuring all Nodes are Live and that no forks /// have occurred - async fn health_check(&mut self) -> Result<()>; + async fn health_check(&self) -> Result<()>; /// Returns an Iterator of references to all the Validators in the Swarm fn validators<'a>(&'a self) -> Box + 'a>; diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 053ceec035a48..3a3753648ec16 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -145,19 +145,23 @@ fn upgrade_and_gather_stats( // emit trafic and gather stats scopev.spawn(async { info!("upgrade_and_gather_stats traffic thread start"); - let mut ctx_locker = emitter_ctx.ctx.lock().await; - let ctx = ctx_locker.deref_mut(); - let emit_job_request = ctx.emit_job.clone(); - let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); - let (emitter, emit_job_request) = - match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { - Ok(parts) => parts, - Err(err) => { - stats_result = Err(err); - return; - }, - }; - let source_account = ctx.swarm().chain_info().root_account; + let (emitter, emit_job_request, source_account) = { + let mut ctx_locker = emitter_ctx.ctx.lock().await; + let ctx = ctx_locker.deref_mut(); + let emit_job_request = ctx.emit_job.clone(); + let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); + let (emitter, emit_job_request) = + match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { + Ok(parts) => parts, + Err(err) => { + stats_result = Err(err); + return; + }, + }; + let source_account = ctx.swarm().chain_info().root_account; + (emitter, emit_job_request, source_account) + // release lock on network context + }; let upgrade_traffic_chunk_duration = Duration::from_secs(15); info!("upgrade_and_gather_stats traffic thread 1"); stats_result = stat_gather_task( From 27486a5dadc135b0e597efb2cea7deb7b85e4003 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Wed, 12 Jun 2024 16:16:48 -0400 Subject: [PATCH 20/28] swarm loses a bunch of &mut things as mutability goes hidden by internal mutexes --- testsuite/forge-cli/src/main.rs | 2 +- testsuite/forge/src/backend/k8s/node.rs | 4 +-- testsuite/forge/src/backend/k8s/swarm.rs | 30 ------------------- testsuite/forge/src/backend/local/node.rs | 4 +-- testsuite/forge/src/backend/local/swarm.rs | 30 ------------------- testsuite/forge/src/interface/node.rs | 4 +-- testsuite/forge/src/interface/swarm.rs | 12 -------- testsuite/testcases/src/forge_setup_test.rs | 2 +- .../src/fullnode_reboot_stress_test.rs | 2 +- testsuite/testcases/src/lib.rs | 4 +-- .../testcases/src/partial_nodes_down_test.rs | 4 +-- .../testcases/src/state_sync_performance.rs | 8 ++--- .../testcases/src/twin_validator_test.rs | 10 +++---- .../src/validator_reboot_stress_test.rs | 4 +-- 14 files changed, 24 insertions(+), 96 deletions(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 3c50c1ac4cc5b..7a6c40198f9e4 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2673,7 +2673,7 @@ impl NetworkTest for RestartValidator { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let node = ctx.swarm().validators_mut().next().unwrap(); + let node = ctx.swarm().validators().next().unwrap(); node.health_check().await.expect("node health check failed"); node.stop().await.unwrap(); println!("Restarting node {}", node.peer_id()); diff --git a/testsuite/forge/src/backend/k8s/node.rs b/testsuite/forge/src/backend/k8s/node.rs index f52b5a19f76a1..04577d22e6525 100644 --- a/testsuite/forge/src/backend/k8s/node.rs +++ b/testsuite/forge/src/backend/k8s/node.rs @@ -165,7 +165,7 @@ impl Node for K8sNode { .expect("Invalid URL.") } - async fn clear_storage(&mut self) -> Result<()> { + async fn clear_storage(&self) -> Result<()> { // Remove all storage files let ledger_db_path = format!("{}/db/{}", APTOS_DATA_DIR, LEDGER_DB_NAME); let state_db_path = format!("{}/db/{}", APTOS_DATA_DIR, STATE_MERKLE_DB_NAME); @@ -261,7 +261,7 @@ impl Node for K8sNode { stateful_set::get_identity(self.stateful_set_name(), self.namespace()).await } - async fn set_identity(&mut self, k8s_secret_name: String) -> Result<()> { + async fn set_identity(&self, k8s_secret_name: String) -> Result<()> { stateful_set::set_identity( self.stateful_set_name(), self.namespace(), diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index c795a3adb515d..27a6f2e5a2259 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -224,26 +224,10 @@ impl Swarm for K8sSwarm { Box::new(validators.into_iter()) } - fn validators_mut<'a>(&'a mut self) -> Box + 'a> { - let mut validators: Vec<_> = self - .validators - .values_mut() - .map(|v| v as &'a mut dyn Validator) - .collect(); - validators.sort_by_key(|v| v.index()); - Box::new(validators.into_iter()) - } - fn validator(&self, id: PeerId) -> Option<&dyn Validator> { self.validators.get(&id).map(|v| v as &dyn Validator) } - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator> { - self.validators - .get_mut(&id) - .map(|v| v as &mut dyn Validator) - } - /// TODO: this should really be a method on Node rather than Swarm async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()> { let validator = self @@ -284,24 +268,10 @@ impl Swarm for K8sSwarm { Box::new(full_nodes.into_iter()) } - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a> { - let mut full_nodes: Vec<_> = self - .fullnodes - .values_mut() - .map(|n| n as &'a mut dyn FullNode) - .collect(); - full_nodes.sort_by_key(|n| n.index()); - Box::new(full_nodes.into_iter()) - } - fn full_node(&self, id: PeerId) -> Option<&dyn FullNode> { self.fullnodes.get(&id).map(|v| v as &dyn FullNode) } - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode> { - self.fullnodes.get_mut(&id).map(|v| v as &mut dyn FullNode) - } - fn add_validator(&mut self, _version: &Version, _template: NodeConfig) -> Result { todo!() } diff --git a/testsuite/forge/src/backend/local/node.rs b/testsuite/forge/src/backend/local/node.rs index 5f57692224e95..227d53f9dd239 100644 --- a/testsuite/forge/src/backend/local/node.rs +++ b/testsuite/forge/src/backend/local/node.rs @@ -299,11 +299,11 @@ impl Node for LocalNode { todo!() } - async fn set_identity(&mut self, _k8s_secret_name: String) -> Result<()> { + async fn set_identity(&self, _k8s_secret_name: String) -> Result<()> { todo!() } - async fn clear_storage(&mut self) -> Result<()> { + async fn clear_storage(&self) -> Result<()> { // Remove all storage files (i.e., blockchain data, consensus data and state sync data) let node_config = self.config(); let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME); diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 02b4b24ea1e83..8ff320651b23d 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -492,26 +492,10 @@ impl Swarm for LocalSwarm { Box::new(validators.into_iter()) } - fn validators_mut<'a>(&'a mut self) -> Box + 'a> { - let mut validators: Vec<_> = self - .validators - .values_mut() - .map(|v| v as &'a mut dyn Validator) - .collect(); - validators.sort_by_key(|v| v.index()); - Box::new(validators.into_iter()) - } - fn validator(&self, id: PeerId) -> Option<&dyn Validator> { self.validators.get(&id).map(|v| v as &dyn Validator) } - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator> { - self.validators - .get_mut(&id) - .map(|v| v as &mut dyn Validator) - } - async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()> { let version = self .versions @@ -535,24 +519,10 @@ impl Swarm for LocalSwarm { Box::new(full_nodes.into_iter()) } - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a> { - let mut full_nodes: Vec<_> = self - .fullnodes - .values_mut() - .map(|v| v as &'a mut dyn FullNode) - .collect(); - full_nodes.sort_by_key(|n| n.index()); - Box::new(full_nodes.into_iter()) - } - fn full_node(&self, id: PeerId) -> Option<&dyn FullNode> { self.fullnodes.get(&id).map(|v| v as &dyn FullNode) } - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode> { - self.fullnodes.get_mut(&id).map(|v| v as &mut dyn FullNode) - } - fn add_validator(&mut self, _version: &Version, _template: NodeConfig) -> Result { todo!() } diff --git a/testsuite/forge/src/interface/node.rs b/testsuite/forge/src/interface/node.rs index c149cd61b1545..e3eaa314c2f46 100644 --- a/testsuite/forge/src/interface/node.rs +++ b/testsuite/forge/src/interface/node.rs @@ -63,9 +63,9 @@ pub trait Node: Send + Sync { async fn get_identity(&self) -> Result; - async fn set_identity(&mut self, k8s_secret_name: String) -> Result<()>; + async fn set_identity(&self, k8s_secret_name: String) -> Result<()>; /// Clears this Node's Storage. This stops the node as well - async fn clear_storage(&mut self) -> Result<()>; + async fn clear_storage(&self) -> Result<()>; async fn health_check(&self) -> Result<(), HealthCheckError>; diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index 330ebe5f2be6b..d36c70f274ed4 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -28,30 +28,18 @@ pub trait Swarm: Sync + Send { /// Returns an Iterator of references to all the Validators in the Swarm fn validators<'a>(&'a self) -> Box + 'a>; - /// Returns an Iterator of mutable references to all the Validators in the Swarm - fn validators_mut<'a>(&'a mut self) -> Box + 'a>; - /// Returns a reference to the Validator with the provided PeerId fn validator(&self, id: PeerId) -> Option<&dyn Validator>; - /// Returns a mutable reference to the Validator with the provided PeerId - fn validator_mut(&mut self, id: PeerId) -> Option<&mut dyn Validator>; - /// Upgrade a Validator to run specified `Version` async fn upgrade_validator(&mut self, id: PeerId, version: &Version) -> Result<()>; /// Returns an Iterator of references to all the FullNodes in the Swarm fn full_nodes<'a>(&'a self) -> Box + 'a>; - /// Returns an Iterator of mutable references to all the FullNodes in the Swarm - fn full_nodes_mut<'a>(&'a mut self) -> Box + 'a>; - /// Returns a reference to the FullNode with the provided PeerId fn full_node(&self, id: PeerId) -> Option<&dyn FullNode>; - /// Returns a mutable reference to the FullNode with the provided PeerId - fn full_node_mut(&mut self, id: PeerId) -> Option<&mut dyn FullNode>; - /// Adds a Validator to the swarm and returns the PeerId fn add_validator(&mut self, version: &Version, template: NodeConfig) -> Result; diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 88c4fa49d6de2..9044164a08cea 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -39,7 +39,7 @@ impl NetworkTest for ForgeSetupTest { let fullnode_id = all_fullnodes.iter().choose(&mut rng).unwrap(); info!("Pick one fullnode to stop and wipe"); - let fullnode = swarm.full_node_mut(*fullnode_id).unwrap(); + let fullnode = swarm.full_node(*fullnode_id).unwrap(); runtime.block_on(fullnode.clear_storage())?; runtime.block_on(fullnode.start())?; diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index 3abcc881b03ba..cd1f9f74f1dbd 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -38,7 +38,7 @@ impl NetworkLoadTest for FullNodeRebootStressTest { let fullnode_to_reboot = { let mut rng = thread_rng(); swarm - .full_node_mut(*all_fullnodes.choose(&mut rng).unwrap()) + .full_node(*all_fullnodes.choose(&mut rng).unwrap()) .unwrap() }; fullnode_to_reboot.stop().await?; diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index dd468bb7c49a0..161d48d9684b1 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -61,7 +61,7 @@ async fn batch_update( let deadline = Instant::now() + Duration::from_secs(60); for validator in validators_to_update { ctx.swarm() - .validator_mut(*validator) + .validator(*validator) .unwrap() .wait_until_healthy(deadline) .await?; @@ -94,7 +94,7 @@ async fn batch_update_gradually( .lock() .await .swarm() - .validator_mut(*validator) + .validator(*validator) .unwrap() .wait_until_healthy(deadline) .await?; diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index d2c12758be12a..23813ce0d5e48 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -31,7 +31,7 @@ impl NetworkTest for PartialNodesDown { let mut down_nodes = all_validators.clone(); let up_nodes = down_nodes.split_off(all_validators.len() / 10); for n in &down_nodes { - let node = ctx.swarm().validator_mut(*n).unwrap(); + let node = ctx.swarm().validator(*n).unwrap(); println!("Node {} is going to stop", node.name()); runtime.block_on(node.stop())?; } @@ -42,7 +42,7 @@ impl NetworkTest for PartialNodesDown { ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { - let node = ctx.swarm().validator_mut(*n).unwrap(); + let node = ctx.swarm().validator(*n).unwrap(); println!("Node {} is going to restart", node.name()); runtime.block_on(node.start())?; } diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 0a83c44ad20a8..24086f50e6a5a 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -251,26 +251,26 @@ fn stop_and_reset_nodes( // Stop and reset all fullnodes info!("Deleting all fullnode data!"); for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node_mut(*fullnode_id).unwrap(); + let fullnode = ctx.swarm().full_node(*fullnode_id).unwrap(); runtime.block_on(async { fullnode.clear_storage().await })?; } // Stop and reset all validators info!("Deleting all validator data!"); for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator_mut(*valdiator_id).unwrap(); + let validator = ctx.swarm().validator(*valdiator_id).unwrap(); runtime.block_on(async { validator.clear_storage().await })?; } // Restart the fullnodes so they start syncing from a fresh state for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node_mut(*fullnode_id).unwrap(); + let fullnode = ctx.swarm().full_node(*fullnode_id).unwrap(); runtime.block_on(async { fullnode.start().await })?; } // Restart the validators so they start syncing from a fresh state for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator_mut(*valdiator_id).unwrap(); + let validator = ctx.swarm().validator(*valdiator_id).unwrap(); runtime.block_on(async { validator.start().await })?; } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 15d76a228c68e..1fdfc6c873510 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -42,7 +42,7 @@ impl NetworkTest for TwinValidatorTest { let main_id: AccountAddress = all_validators_ids[i]; let twin_id = all_validators_ids[i + validator_count - twin_count]; ctx.swarm() - .validator_mut(twin_id) + .validator(twin_id) .unwrap() .clear_storage() .await @@ -51,25 +51,25 @@ impl NetworkTest for TwinValidatorTest { ))?; let main_identity = ctx .swarm() - .validator_mut(main_id) + .validator(main_id) .unwrap() .get_identity() .await .context(format!("Error while getting identity for {main_id}"))?; ctx.swarm() - .validator_mut(twin_id) + .validator(twin_id) .unwrap() .set_identity(main_identity) .await .context(format!("Error while setting identity for {twin_id}"))?; ctx.swarm() - .validator_mut(twin_id) + .validator(twin_id) .unwrap() .start() .await .context(format!("Error while starting {twin_id}"))?; ctx.swarm() - .validator_mut(twin_id) + .validator(twin_id) .unwrap() .wait_until_healthy(Instant::now() + Duration::from_secs(300)) .await diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index 07334f593730d..8f4fe9b36d356 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -41,7 +41,7 @@ impl NetworkLoadTest for ValidatorRebootStressTest { .collect() }; for adr in &addresses { - let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); + let validator_to_reboot = swarm.validator(*adr).unwrap(); validator_to_reboot.stop().await?; } if self.down_time_secs > 0.0 { @@ -49,7 +49,7 @@ impl NetworkLoadTest for ValidatorRebootStressTest { } for adr in &addresses { - let validator_to_reboot = swarm.validator_mut(*adr).unwrap(); + let validator_to_reboot = swarm.validator(*adr).unwrap(); validator_to_reboot.start().await?; } From 9049438f420d4b0078ae00fc45b331be550c8fa4 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 13 Jun 2024 16:27:34 -0400 Subject: [PATCH 21/28] &mut swarm -> Arc>> more mut refs to non-mut --- testsuite/forge-cli/src/main.rs | 26 +-- testsuite/forge/src/backend/k8s/fullnode.rs | 3 +- testsuite/forge/src/backend/k8s/node.rs | 2 +- testsuite/forge/src/backend/k8s/swarm.rs | 5 +- testsuite/forge/src/backend/local/node.rs | 6 +- testsuite/forge/src/backend/local/swarm.rs | 2 +- testsuite/forge/src/interface/aptos.rs | 2 +- testsuite/forge/src/interface/chain_info.rs | 2 +- testsuite/forge/src/interface/network.rs | 12 +- .../forge/src/interface/prometheus_metrics.rs | 18 +- testsuite/forge/src/interface/swarm.rs | 4 +- testsuite/forge/src/runner.rs | 6 +- testsuite/forge/src/success_criteria.rs | 44 +++-- .../forge/src/test_utils/consensus_utils.rs | 19 +- .../smoke-test/src/aptos/error_report.rs | 2 +- testsuite/smoke-test/src/aptos/gas_check.rs | 2 +- .../smoke-test/src/aptos/mint_transfer.rs | 2 +- .../smoke-test/src/aptos/package_publish.rs | 2 +- .../smoke-test/src/aptos_cli/validator.rs | 12 +- .../consensus/consensus_fault_tolerance.rs | 34 +++- .../src/consensus/dag/dag_fault_tolerance.rs | 26 ++- testsuite/smoke-test/src/execution.rs | 2 +- testsuite/smoke-test/src/full_nodes.rs | 4 +- testsuite/smoke-test/src/fullnode.rs | 2 +- testsuite/smoke-test/src/indexer.rs | 2 +- .../smoke-test/src/inspection_service.rs | 2 +- .../src/jwks/jwk_consensus_basic.rs | 2 +- .../src/jwks/jwk_consensus_per_issuer.rs | 2 +- .../jwk_consensus_provider_change_mind.rs | 2 +- testsuite/smoke-test/src/jwks/mod.rs | 2 +- testsuite/smoke-test/src/keyless.rs | 20 +-- .../src/randomness/disable_feature_0.rs | 2 +- .../src/randomness/disable_feature_1.rs | 2 +- .../dkg_with_validator_join_leave.rs | 2 +- .../src/randomness/e2e_basic_consumption.rs | 2 +- .../src/randomness/enable_feature_0.rs | 2 +- .../src/randomness/enable_feature_1.rs | 2 +- .../src/randomness/enable_feature_2.rs | 2 +- .../src/randomness/entry_func_attrs.rs | 2 +- testsuite/smoke-test/src/rest_api.rs | 8 +- testsuite/smoke-test/src/rosetta.rs | 6 +- testsuite/smoke-test/src/state_sync_utils.rs | 4 +- testsuite/smoke-test/src/test_smoke_tests.rs | 2 +- testsuite/smoke-test/src/transaction.rs | 2 +- testsuite/smoke-test/src/txn_broadcast.rs | 2 +- testsuite/testcases/src/compatibility_test.rs | 54 ++++-- .../src/consensus_reliability_tests.rs | 163 ++++++++++-------- .../testcases/src/dag_onchain_enable_test.rs | 22 ++- testsuite/testcases/src/forge_setup_test.rs | 69 ++++---- testsuite/testcases/src/framework_upgrade.rs | 84 +++++---- .../src/fullnode_reboot_stress_test.rs | 32 ++-- testsuite/testcases/src/lib.rs | 99 +++++++---- .../testcases/src/load_vs_perf_benchmark.rs | 56 +++--- testsuite/testcases/src/modifiers.rs | 138 ++++++++------- .../src/multi_region_network_test.rs | 31 +++- .../testcases/src/network_bandwidth_test.rs | 4 + testsuite/testcases/src/network_loss_test.rs | 4 + .../testcases/src/network_partition_test.rs | 6 + .../testcases/src/partial_nodes_down_test.rs | 10 +- .../src/public_fullnode_performance.rs | 131 ++++++++------ .../src/quorum_store_onchain_enable_test.rs | 18 +- .../testcases/src/state_sync_performance.rs | 141 ++++++++------- .../src/three_region_simulation_test.rs | 26 ++- .../testcases/src/twin_validator_test.rs | 93 +++++----- testsuite/testcases/src/two_traffics_test.rs | 23 ++- .../src/validator_join_leave_test.rs | 41 +++-- .../src/validator_reboot_stress_test.rs | 15 +- 67 files changed, 945 insertions(+), 626 deletions(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 7a6c40198f9e4..d28a4c41bce99 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2673,7 +2673,8 @@ impl NetworkTest for RestartValidator { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let node = ctx.swarm().validators().next().unwrap(); + let swarm = ctx.swarm.read().await; + let node = swarm.validators().next().unwrap(); node.health_check().await.expect("node health check failed"); node.stop().await.unwrap(); println!("Restarting node {}", node.peer_id()); @@ -2700,7 +2701,9 @@ impl NetworkTest for EmitTransaction { let ctx = ctx_locker.deref_mut(); let duration = Duration::from_secs(10); let all_validators = ctx - .swarm() + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); @@ -2764,14 +2767,17 @@ async fn gather_metrics_one(ctx: &NetworkContext<'_>) { let now = chrono::prelude::Utc::now() .format("%Y%m%d_%H%M%S") .to_string(); - for val in ctx.swarm.validators() { - let mut url = val.inspection_service_endpoint(); - let valname = val.peer_id().to_string(); - url.set_path("metrics"); - let fname = format!("{}.{}.metrics", now, valname); - let outpath: PathBuf = outdir.join(fname); - let th = handle.spawn(gather_metrics_to_file(url, outpath)); - gets.push(th); + { + let swarm = ctx.swarm.read().await; + for val in swarm.validators() { + let mut url = val.inspection_service_endpoint(); + let valname = val.peer_id().to_string(); + url.set_path("metrics"); + let fname = format!("{}.{}.metrics", now, valname); + let outpath: PathBuf = outdir.join(fname); + let th = handle.spawn(gather_metrics_to_file(url, outpath)); + gets.push(th); + } } // join all the join handles while !gets.is_empty() { diff --git a/testsuite/forge/src/backend/k8s/fullnode.rs b/testsuite/forge/src/backend/k8s/fullnode.rs index 311cb89e347ec..bbfad70651728 100644 --- a/testsuite/forge/src/backend/k8s/fullnode.rs +++ b/testsuite/forge/src/backend/k8s/fullnode.rs @@ -35,9 +35,8 @@ use std::{ env, net::{Ipv4Addr, SocketAddr, SocketAddrV4}, path::PathBuf, - sync::Arc, + sync::{atomic::AtomicU32, Arc}, }; -use std::sync::atomic::AtomicU32; use tempfile::TempDir; // these are constants given by the aptos-node helm chart diff --git a/testsuite/forge/src/backend/k8s/node.rs b/testsuite/forge/src/backend/k8s/node.rs index 04577d22e6525..35e76ba93e99f 100644 --- a/testsuite/forge/src/backend/k8s/node.rs +++ b/testsuite/forge/src/backend/k8s/node.rs @@ -20,10 +20,10 @@ use std::{ fmt::{Debug, Formatter}, process::{Command, Stdio}, str::FromStr, + sync::atomic::{AtomicU32, Ordering}, thread, time::{Duration, Instant}, }; -use std::sync::atomic::{AtomicU32, Ordering}; const APTOS_DATA_DIR: &str = "/opt/aptos/data"; diff --git a/testsuite/forge/src/backend/k8s/swarm.rs b/testsuite/forge/src/backend/k8s/swarm.rs index 27a6f2e5a2259..15c51cd8b0318 100644 --- a/testsuite/forge/src/backend/k8s/swarm.rs +++ b/testsuite/forge/src/backend/k8s/swarm.rs @@ -40,9 +40,8 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, convert::TryFrom, env, str, - sync::Arc, + sync::{atomic::AtomicU32, Arc}, }; -use std::sync::atomic::AtomicU32; // use std::sync::Mutex; use tokio::{runtime::Runtime, time::Duration}; @@ -310,7 +309,7 @@ impl Swarm for K8sSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo { + fn chain_info(&self) -> ChainInfo { let rest_api_url = self.get_rest_api_url(0); let inspection_service_url = self.get_inspection_service_url(0); ChainInfo::new( diff --git a/testsuite/forge/src/backend/local/node.rs b/testsuite/forge/src/backend/local/node.rs index 227d53f9dd239..cdb5fe9466c4a 100644 --- a/testsuite/forge/src/backend/local/node.rs +++ b/testsuite/forge/src/backend/local/node.rs @@ -117,7 +117,11 @@ impl LocalNode { pub fn start(&self) -> Result<()> { let mut process_locker = self.process.lock().unwrap(); - ensure!(process_locker.is_none(), "node {} already running", self.name); + ensure!( + process_locker.is_none(), + "node {} already running", + self.name + ); // Ensure log file exists let log_file = OpenOptions::new() diff --git a/testsuite/forge/src/backend/local/swarm.rs b/testsuite/forge/src/backend/local/swarm.rs index 8ff320651b23d..cb1f8ba2989f7 100644 --- a/testsuite/forge/src/backend/local/swarm.rs +++ b/testsuite/forge/src/backend/local/swarm.rs @@ -560,7 +560,7 @@ impl Swarm for LocalSwarm { Box::new(self.versions.keys().cloned()) } - fn chain_info(&mut self) -> ChainInfo { + fn chain_info(&self) -> ChainInfo { let rest_api_url = self .validators() .next() diff --git a/testsuite/forge/src/interface/aptos.rs b/testsuite/forge/src/interface/aptos.rs index da5f76d49aac2..6e38ae931dfed 100644 --- a/testsuite/forge/src/interface/aptos.rs +++ b/testsuite/forge/src/interface/aptos.rs @@ -301,7 +301,7 @@ impl AptosPublicInfo { Ok(account) } - pub async fn reconfig(&mut self) -> State { + pub async fn reconfig(&self) -> State { // dedupe with smoke-test::test_utils::reconfig reconfig( &self.rest_client, diff --git a/testsuite/forge/src/interface/chain_info.rs b/testsuite/forge/src/interface/chain_info.rs index 949bddf201059..ff6606f9fab67 100644 --- a/testsuite/forge/src/interface/chain_info.rs +++ b/testsuite/forge/src/interface/chain_info.rs @@ -35,7 +35,7 @@ impl ChainInfo { } } - pub fn root_account(&mut self) -> Arc { + pub fn root_account(&self) -> Arc { self.root_account.clone() } diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index b2ce759931d46..0fbb4a2432165 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -55,7 +55,7 @@ impl<'t> NetworkContextSynchronizer<'t> { pub struct NetworkContext<'t> { core: CoreContext, - pub swarm: &'t mut dyn Swarm, + pub swarm: Arc>>, pub report: &'t mut TestReport, pub global_duration: Duration, pub emit_job: EmitJobRequest, @@ -66,7 +66,7 @@ pub struct NetworkContext<'t> { impl<'t> NetworkContext<'t> { pub fn new( core: CoreContext, - swarm: &'t mut dyn Swarm, + swarm: Arc>>, report: &'t mut TestReport, global_duration: Duration, emit_job: EmitJobRequest, @@ -83,9 +83,9 @@ impl<'t> NetworkContext<'t> { } } - pub fn swarm(&mut self) -> &mut dyn Swarm { - self.swarm - } + // pub fn swarm(&mut self) -> &mut dyn Swarm { + // self.swarm + // } pub fn core(&mut self) -> &mut CoreContext { &mut self.core @@ -103,7 +103,7 @@ impl<'t> NetworkContext<'t> { ) -> Result<()> { SuccessCriteriaChecker::check_for_success( &self.success_criteria, - self.swarm, + self.swarm.clone(), self.report, stats, window, diff --git a/testsuite/forge/src/interface/prometheus_metrics.rs b/testsuite/forge/src/interface/prometheus_metrics.rs index 6edcdbe4f81f1..9ec7d6f82440c 100644 --- a/testsuite/forge/src/interface/prometheus_metrics.rs +++ b/testsuite/forge/src/interface/prometheus_metrics.rs @@ -3,7 +3,7 @@ use crate::Swarm; use prometheus_http_query::response::Sample; -use std::{collections::BTreeMap, fmt}; +use std::{collections::BTreeMap, fmt, sync::Arc}; #[derive(Clone)] pub struct MetricSamples(Vec); @@ -58,10 +58,16 @@ impl SystemMetrics { } } -pub async fn fetch_error_metrics(swarm: &dyn Swarm) -> anyhow::Result { +pub async fn fetch_error_metrics( + swarm: Arc>>, +) -> anyhow::Result { let error_query = r#"aptos_error_log_count{role=~"validator"}"#; - let result = swarm.query_metrics(error_query, None, None).await?; + let result = swarm + .read() + .await + .query_metrics(error_query, None, None) + .await?; let error_samples = result.as_instant().unwrap_or(&[]); Ok(error_samples @@ -72,13 +78,14 @@ pub async fn fetch_error_metrics(swarm: &dyn Swarm) -> anyhow::Result { } pub async fn fetch_system_metrics( - swarm: &dyn Swarm, + swarm: Arc>>, start_time: i64, end_time: i64, ) -> anyhow::Result { let cpu_query = r#"avg(rate(container_cpu_usage_seconds_total{container=~"validator"}[30s]))"#; let memory_query = r#"avg(container_memory_rss{container=~"validator"})"#; + let swarm = swarm.read().await; let cpu_samples = swarm .query_range_metrics(cpu_query, start_time, end_time, None) .await?; @@ -119,7 +126,7 @@ impl LatencyBreakdown { } pub async fn fetch_latency_breakdown( - swarm: &dyn Swarm, + swarm: Arc>>, start_time: u64, end_time: u64, ) -> anyhow::Result { @@ -131,6 +138,7 @@ pub async fn fetch_latency_breakdown( let qs_batch_to_pos_query = r#"sum(rate(quorum_store_batch_to_PoS_duration_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_batch_to_PoS_duration_count{role=~"validator"}[1m]))"#; let qs_pos_to_proposal_query = r#"sum(rate(quorum_store_pos_to_pull_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_pos_to_pull_count{role=~"validator"}[1m]))"#; + let swarm = swarm.read().await; let consensus_proposal_to_ordered_samples = swarm .query_range_metrics( consensus_proposal_to_ordered_query, diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index d36c70f274ed4..d0a6ea1c26b5a 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -67,7 +67,7 @@ pub trait Swarm: Sync + Send { fn versions<'a>(&'a self) -> Box + 'a>; /// Construct a ChainInfo from this Swarm - fn chain_info(&mut self) -> ChainInfo; + fn chain_info(&self) -> ChainInfo; fn logs_location(&mut self) -> String; @@ -95,7 +95,7 @@ pub trait Swarm: Sync + Send { timeout: Option, ) -> Result>; - fn aptos_public_info(&mut self) -> AptosPublicInfo { + fn aptos_public_info(&self) -> AptosPublicInfo { self.chain_info().into_aptos_public_info() } diff --git a/testsuite/forge/src/runner.rs b/testsuite/forge/src/runner.rs index b9f17cebf48f5..e6083cec11f1e 100644 --- a/testsuite/forge/src/runner.rs +++ b/testsuite/forge/src/runner.rs @@ -586,10 +586,12 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { summary.handle_result(test.name().to_owned(), result)?; } + let logs_location = swarm.logs_location(); + let swarm = Arc::new(tokio::sync::RwLock::new(swarm)); for test in self.filter_tests(&self.tests.network_tests) { let network_ctx = NetworkContext::new( CoreContext::from_rng(&mut rng), - &mut *swarm, + swarm.clone(), &mut report, self.global_duration, self.tests.emit_job_request.clone(), @@ -614,7 +616,7 @@ impl<'cfg, F: Factory> Forge<'cfg, F> { io::stderr().flush()?; if !summary.success() { println!(); - println!("Swarm logs can be found here: {}", swarm.logs_location()); + println!("Swarm logs can be found here: {}", logs_location); } } diff --git a/testsuite/forge/src/success_criteria.rs b/testsuite/forge/src/success_criteria.rs index 6283fbf8a57dc..e7c770d0967fd 100644 --- a/testsuite/forge/src/success_criteria.rs +++ b/testsuite/forge/src/success_criteria.rs @@ -13,7 +13,7 @@ use aptos::node::analyze::fetch_metadata::FetchMetadata; use aptos_sdk::types::PeerId; use aptos_transaction_emitter_lib::{TxnStats, TxnStatsRate}; use prometheus_http_query::response::Sample; -use std::{collections::BTreeMap, time::Duration}; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; #[derive(Clone, Debug)] pub struct StateProgressThreshold { @@ -269,7 +269,7 @@ impl SuccessCriteriaChecker { pub async fn check_for_success( success_criteria: &SuccessCriteria, - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, stats: &TxnStats, window: Duration, @@ -308,34 +308,42 @@ impl SuccessCriteriaChecker { if let Some(timeout) = success_criteria.wait_for_all_nodes_to_catchup { swarm + .read() + .await .wait_for_all_nodes_to_catchup_to_next(timeout) .await .context("Failed waiting for all nodes to catchup to next version")?; } if success_criteria.check_no_restarts { - swarm + let swarm_read = swarm.read().await; + swarm_read .ensure_no_validator_restart() .await .context("Failed ensuring no validator restarted")?; - swarm + swarm_read .ensure_no_fullnode_restart() .await .context("Failed ensuring no fullnode restarted")?; } if success_criteria.check_no_errors { - Self::check_no_errors(swarm).await?; + Self::check_no_errors(swarm.clone()).await?; } if let Some(system_metrics_threshold) = success_criteria.system_metrics_threshold.clone() { - Self::check_system_metrics(swarm, start_time, end_time, system_metrics_threshold) - .await?; + Self::check_system_metrics( + swarm.clone(), + start_time, + end_time, + system_metrics_threshold, + ) + .await?; } if let Some(chain_progress_threshold) = &success_criteria.chain_progress_check { Self::check_chain_progress( - swarm, + swarm.clone(), report, chain_progress_threshold, start_version, @@ -349,17 +357,21 @@ impl SuccessCriteriaChecker { } async fn check_chain_progress( - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, chain_progress_threshold: &StateProgressThreshold, start_version: u64, end_version: u64, ) -> anyhow::Result<()> { // Choose client with newest ledger version to fetch NewBlockEvents from: - let (_max_v, client) = swarm - .get_client_with_newest_ledger_version() - .await - .context("No clients replied in check_chain_progress")?; + let (_max_v, client) = { + swarm + .read() + .await + .get_client_with_newest_ledger_version() + .await + .context("No clients replied in check_chain_progress")? + }; let epochs = FetchMetadata::fetch_new_block_events(&client, None, None) .await @@ -565,7 +577,9 @@ impl SuccessCriteriaChecker { } } - async fn check_no_errors(swarm: &mut dyn Swarm) -> anyhow::Result<()> { + async fn check_no_errors( + swarm: Arc>>, + ) -> anyhow::Result<()> { let error_count = fetch_error_metrics(swarm).await?; if error_count > 0 { bail!( @@ -579,7 +593,7 @@ impl SuccessCriteriaChecker { } async fn check_system_metrics( - swarm: &mut dyn Swarm, + swarm: Arc>>, start_time: i64, end_time: i64, threshold: SystemMetricsThreshold, diff --git a/testsuite/forge/src/test_utils/consensus_utils.rs b/testsuite/forge/src/test_utils/consensus_utils.rs index 9eba7b81fdb60..e05bbf12b4e57 100644 --- a/testsuite/forge/src/test_utils/consensus_utils.rs +++ b/testsuite/forge/src/test_utils/consensus_utils.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::{wait_for_all_nodes_to_catchup_to_version, Swarm, SwarmExt}; +use crate::{wait_for_all_nodes_to_catchup_to_version, AptosPublicInfo}; use anyhow::{bail, Context, Result}; use aptos_config::config::DEFAULT_MAX_PAGE_SIZE; use aptos_rest_client::Client as RestClient; @@ -53,14 +53,16 @@ async fn get_node_state(validator_client: &RestClient) -> NodeState { /// I.e. if part is shorter than how long it takes for empty block to be /// generated, we can make sure one block gets created on every part. pub async fn test_consensus_fault_tolerance( - swarm: &mut dyn Swarm, + // swarm: Arc>>, + validator_clients: Vec<(String, RestClient)>, + public_info: AptosPublicInfo, cycles: usize, cycle_duration_s: f32, parts_in_cycle: usize, - mut failure_injection: Box, + mut failure_injection: Box, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) mut check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> Result<()> + Send, >, new_epoch_on_cycle: bool, // Instead of failing on first check, we check the full run, @@ -68,7 +70,9 @@ pub async fn test_consensus_fault_tolerance( // Can allow us to better see if state would've gotten resolved by itself, etc. raise_check_error_at_the_end: bool, ) -> Result<()> { - let validator_clients = swarm.get_validator_clients_with_names(); + // let validator_clients = { + // swarm.read().await.get_validator_clients_with_names() + // }; async fn get_all_states(validator_clients: &[(String, RestClient)]) -> Vec { join_all( @@ -145,7 +149,8 @@ pub async fn test_consensus_fault_tolerance( } if new_epoch_on_cycle { - swarm.aptos_public_info().reconfig().await; + // swarm.read().await.aptos_public_info().reconfig().await; + public_info.reconfig().await; } } @@ -240,7 +245,7 @@ impl FailureInjection for NoFailureInjection { async fn clear(&mut self, _: &[(String, RestClient)]) {} } -pub fn no_failure_injection() -> Box { +pub fn no_failure_injection() -> Box { Box::new(NoFailureInjection {}) } diff --git a/testsuite/smoke-test/src/aptos/error_report.rs b/testsuite/smoke-test/src/aptos/error_report.rs index e9e30e7089007..96a4a184353a1 100644 --- a/testsuite/smoke-test/src/aptos/error_report.rs +++ b/testsuite/smoke-test/src/aptos/error_report.rs @@ -34,7 +34,7 @@ async fn submit_and_check_err TransactionBuilder>( #[tokio::test] async fn test_error_report() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let local_account = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/gas_check.rs b/testsuite/smoke-test/src/aptos/gas_check.rs index 68f7d79659ff8..f3f63915486e9 100644 --- a/testsuite/smoke-test/src/aptos/gas_check.rs +++ b/testsuite/smoke-test/src/aptos/gas_check.rs @@ -12,7 +12,7 @@ use std::time::Duration; #[ignore] #[tokio::test] async fn test_gas_check() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let account1 = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/mint_transfer.rs b/testsuite/smoke-test/src/aptos/mint_transfer.rs index 6740ec1b952f7..ae2e593c60600 100644 --- a/testsuite/smoke-test/src/aptos/mint_transfer.rs +++ b/testsuite/smoke-test/src/aptos/mint_transfer.rs @@ -9,7 +9,7 @@ use aptos_types::transaction::{ExecutionStatus, TransactionStatus}; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn test_mint_transfer() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let account1 = info.random_account(); diff --git a/testsuite/smoke-test/src/aptos/package_publish.rs b/testsuite/smoke-test/src/aptos/package_publish.rs index 7d1b48285c1cc..3c9c408fc8c82 100644 --- a/testsuite/smoke-test/src/aptos/package_publish.rs +++ b/testsuite/smoke-test/src/aptos/package_publish.rs @@ -6,7 +6,7 @@ use aptos_forge::Swarm; #[tokio::test] async fn test_package_publish() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); let base_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")); diff --git a/testsuite/smoke-test/src/aptos_cli/validator.rs b/testsuite/smoke-test/src/aptos_cli/validator.rs index 2f6673c916403..5de3d15fa3e27 100644 --- a/testsuite/smoke-test/src/aptos_cli/validator.rs +++ b/testsuite/smoke-test/src/aptos_cli/validator.rs @@ -42,7 +42,7 @@ use std::{ #[tokio::test] async fn test_analyze_validators() { - let (mut swarm, cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_genesis_stake(Arc::new(|_i, genesis_stake_amount| { *genesis_stake_amount = 100000; @@ -545,7 +545,7 @@ pub(crate) fn generate_blob(data: &[u8]) -> String { async fn test_large_total_stake() { // just barelly below u64::MAX const BASE: u64 = 10_000_000_000_000_000_000; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_init_genesis_stake(Arc::new(|_, genesis_stake_amount| { // make sure we have quorum *genesis_stake_amount = BASE; @@ -613,7 +613,7 @@ async fn test_nodes_rewards() { // with 10% APY, BASE amount gives 100 rewards per second const BASE: u64 = 3600u64 * 24 * 365 * 10 * 100; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_init_config(Arc::new(|_, conf, _| { // reduce timeout, as we will have dead node during rounds conf.consensus.round_initial_timeout_ms = 200; @@ -946,7 +946,7 @@ async fn test_nodes_rewards() { #[tokio::test] async fn test_register_and_update_validator() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .build_with_cli(0) .await; @@ -1042,7 +1042,7 @@ async fn test_register_and_update_validator() { #[tokio::test] async fn test_join_and_leave_validator() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_i, conf, _| { // reduce timeout, as we will have dead node during rounds @@ -1207,7 +1207,7 @@ async fn test_join_and_leave_validator() { #[tokio::test] async fn test_owner_create_and_delegate_flow() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_i, conf, _| { // reduce timeout, as we will have dead node during rounds diff --git a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs index 83718c4b2ab52..1ae2b0c688fae 100644 --- a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs @@ -134,7 +134,8 @@ async fn run_fail_point_test( >, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()> + + Send, >, ) { let mut swarm = create_swarm(num_validators, max_block_size).await; @@ -145,8 +146,15 @@ async fn run_fail_point_test( finish_traffic: Arc::new(AtomicBool::new(false)), } }; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, cycles, cycle_duration_s, parts_in_cycle, @@ -163,10 +171,17 @@ async fn run_fail_point_test( async fn test_no_failures() { let num_validators = 3; - let mut swarm = create_swarm(num_validators, 1).await; + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, @@ -195,10 +210,17 @@ async fn test_no_failures() { async fn test_ordered_only_cert() { let num_validators = 3; - let mut swarm = create_swarm(num_validators, 1).await; + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, diff --git a/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs index e6eba097db66c..2636a3def9491 100644 --- a/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/dag/dag_fault_tolerance.rs @@ -10,7 +10,7 @@ use aptos_forge::{ test_utils::consensus_utils::{ no_failure_injection, test_consensus_fault_tolerance, FailPointFailureInjection, NodeState, }, - LocalSwarm, + LocalSwarm, Swarm, SwarmExt, }; use aptos_types::on_chain_config::{ ConsensusAlgorithmConfig, DagConsensusConfigV1, OnChainConsensusConfig, ValidatorTxnConfig, @@ -61,10 +61,16 @@ pub async fn create_dag_swarm(num_nodes: usize) -> LocalSwarm { async fn test_no_failures() { let num_validators = 3; - let mut swarm = create_dag_swarm(num_validators).await; - + let swarm = create_dag_swarm(num_validators).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, @@ -97,7 +103,8 @@ async fn run_dag_fail_point_test( >, // (cycle, executed_epochs, executed_rounds, executed_transactions, current_state, previous_state) check_cycle: Box< - dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()>, + dyn FnMut(usize, u64, u64, u64, Vec, Vec) -> anyhow::Result<()> + + Send, >, ) { let mut swarm = create_dag_swarm(num_validators).await; @@ -108,8 +115,15 @@ async fn run_dag_fail_point_test( finish_traffic: Arc::new(AtomicBool::new(false)), } }; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, cycles, cycle_duration_s, parts_in_cycle, diff --git a/testsuite/smoke-test/src/execution.rs b/testsuite/smoke-test/src/execution.rs index 41802c2c713df..a63be389708cf 100644 --- a/testsuite/smoke-test/src/execution.rs +++ b/testsuite/smoke-test/src/execution.rs @@ -96,7 +96,7 @@ async fn get_last_non_reconfig_block_ending_txn_name(rest_client: &Client) -> Op #[tokio::test] async fn block_epilogue_upgrade_test() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(2) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(2) .with_aptos() // Start with V1 .with_init_genesis_config(Arc::new(|genesis_config| { diff --git a/testsuite/smoke-test/src/full_nodes.rs b/testsuite/smoke-test/src/full_nodes.rs index 28c6b5600d050..254f2825318a1 100644 --- a/testsuite/smoke-test/src/full_nodes.rs +++ b/testsuite/smoke-test/src/full_nodes.rs @@ -34,7 +34,7 @@ async fn test_full_node_basic_flow() { ) .await .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await @@ -126,7 +126,7 @@ async fn test_vfn_failover() { .await; let transaction_factory = swarm.chain_info().transaction_factory(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/fullnode.rs b/testsuite/smoke-test/src/fullnode.rs index 2b49abe245f92..089578ddc90a9 100644 --- a/testsuite/smoke-test/src/fullnode.rs +++ b/testsuite/smoke-test/src/fullnode.rs @@ -32,7 +32,7 @@ async fn test_indexer() { ) .unwrap(); - let fullnode = swarm.full_node_mut(fullnode_peer_id).unwrap(); + let fullnode = swarm.full_node(fullnode_peer_id).unwrap(); fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/indexer.rs b/testsuite/smoke-test/src/indexer.rs index 1510eb51b86ac..45d2e6a2a4200 100644 --- a/testsuite/smoke-test/src/indexer.rs +++ b/testsuite/smoke-test/src/indexer.rs @@ -95,7 +95,7 @@ async fn test_old_indexer() { let conn_pool = setup_indexer().unwrap(); - let mut swarm = crate::smoke_test_environment::SwarmBuilder::new_local(1) + let swarm = crate::smoke_test_environment::SwarmBuilder::new_local(1) .with_aptos() .with_init_config(Arc::new(|_, config, _| { config.storage.enable_indexer = true; diff --git a/testsuite/smoke-test/src/inspection_service.rs b/testsuite/smoke-test/src/inspection_service.rs index 13341406c0fe3..da83113228628 100644 --- a/testsuite/smoke-test/src/inspection_service.rs +++ b/testsuite/smoke-test/src/inspection_service.rs @@ -6,7 +6,7 @@ use aptos_forge::Swarm; #[tokio::test] async fn test_inspection_service_connection() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); // Ping the inspection service index page and verify we get a successful response let resp = reqwest::get(info.inspection_service_url().to_owned()) diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs index f572dfae0cfa2..e91af813bc458 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_basic.rs @@ -29,7 +29,7 @@ use tokio::time::sleep; async fn jwk_consensus_basic() { let epoch_duration_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs index ac7f514051c11..76a07be724ae2 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_per_issuer.rs @@ -29,7 +29,7 @@ use tokio::time::sleep; async fn jwk_consensus_per_issuer() { let epoch_duration_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs b/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs index c87671691ff91..f26e1d4bf7489 100644 --- a/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs +++ b/testsuite/smoke-test/src/jwks/jwk_consensus_provider_change_mind.rs @@ -31,7 +31,7 @@ async fn jwk_consensus_provider_change_mind() { // Big epoch duration to ensure epoch change does not help reset validators if they are stuck. let epoch_duration_secs = 1800; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/jwks/mod.rs b/testsuite/smoke-test/src/jwks/mod.rs index ea7cab2002204..d677300ca1c6a 100644 --- a/testsuite/smoke-test/src/jwks/mod.rs +++ b/testsuite/smoke-test/src/jwks/mod.rs @@ -89,7 +89,7 @@ async fn get_patched_jwks(rest_client: &Client) -> PatchedJWKs { /// Patch the JWK with governance proposal and see it is effective. #[tokio::test] async fn jwk_patching() { - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_aptos() .build_with_cli(0) .await; diff --git a/testsuite/smoke-test/src/keyless.rs b/testsuite/smoke-test/src/keyless.rs index 213b2c9c65157..65defccaf5be0 100644 --- a/testsuite/smoke-test/src/keyless.rs +++ b/testsuite/smoke-test/src/keyless.rs @@ -45,7 +45,7 @@ use std::{fmt::Debug, time::Duration}; #[tokio::test] async fn test_keyless_oidc_txn_verifies() { - let (_, _, mut swarm, signed_txn) = get_transaction(get_sample_openid_sig_and_pk).await; + let (_, _, swarm, signed_txn) = get_transaction(get_sample_openid_sig_and_pk).await; info!("Submit OpenID transaction"); let result = swarm @@ -61,7 +61,7 @@ async fn test_keyless_oidc_txn_verifies() { #[tokio::test] async fn test_keyless_rotate_vk() { - let (tw_sk, config, jwk, mut swarm, mut cli, root_idx) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, mut cli, root_idx) = setup_local_net().await; let mut info = swarm.aptos_public_info(); let (old_sig, old_pk) = get_sample_groth16_sig_and_pk(); @@ -164,7 +164,7 @@ async fn test_keyless_secure_test_jwk_initialized_at_genesis() { #[tokio::test] async fn test_keyless_oidc_txn_with_bad_jwt_sig() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (mut sig, pk) = get_sample_openid_sig_and_pk(); match &mut sig.cert { @@ -190,7 +190,7 @@ async fn test_keyless_oidc_txn_with_bad_jwt_sig() { #[tokio::test] async fn test_keyless_oidc_txn_with_expired_epk() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (mut sig, pk) = get_sample_openid_sig_and_pk(); sig.exp_date_secs = 1; // This should fail the verification since the expiration date is way in the past @@ -211,7 +211,7 @@ async fn test_keyless_oidc_txn_with_expired_epk() { #[tokio::test] async fn test_keyless_groth16_verifies() { - let (_, _, mut swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk).await; + let (_, _, swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk).await; info!("Submit keyless Groth16 transaction"); let result = swarm @@ -227,7 +227,7 @@ async fn test_keyless_groth16_verifies() { #[tokio::test] async fn test_keyless_no_extra_field_groth16_verifies() { - let (_, _, mut swarm, signed_txn) = + let (_, _, swarm, signed_txn) = get_transaction(get_sample_groth16_sig_and_pk_no_extra_field).await; info!("Submit keyless Groth16 transaction"); @@ -244,7 +244,7 @@ async fn test_keyless_no_extra_field_groth16_verifies() { #[tokio::test] async fn test_keyless_no_training_wheels_groth16_verifies() { - let (_tw_sk, config, jwk, mut swarm, mut cli, root_idx) = setup_local_net().await; + let (_tw_sk, config, jwk, swarm, mut cli, root_idx) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -267,7 +267,7 @@ async fn test_keyless_no_training_wheels_groth16_verifies() { #[tokio::test] async fn test_keyless_groth16_with_mauled_proof() { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -287,7 +287,7 @@ async fn test_keyless_groth16_with_mauled_proof() { #[tokio::test] async fn test_keyless_groth16_with_bad_tw_signature() { - let (_tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (_tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_sample_groth16_sig_and_pk(); let mut info = swarm.aptos_public_info(); @@ -438,7 +438,7 @@ async fn get_transaction( LocalSwarm, SignedTransaction, ) { - let (tw_sk, config, jwk, mut swarm, _, _) = setup_local_net().await; + let (tw_sk, config, jwk, swarm, _, _) = setup_local_net().await; let (sig, pk) = get_pk_and_sig_func(); diff --git a/testsuite/smoke-test/src/randomness/disable_feature_0.rs b/testsuite/smoke-test/src/randomness/disable_feature_0.rs index a5273521879f8..f004a95ebb187 100644 --- a/testsuite/smoke-test/src/randomness/disable_feature_0.rs +++ b/testsuite/smoke-test/src/randomness/disable_feature_0.rs @@ -19,7 +19,7 @@ use std::{sync::Arc, time::Duration}; async fn disable_feature_0() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/disable_feature_1.rs b/testsuite/smoke-test/src/randomness/disable_feature_1.rs index 99712c7b385d6..efcfb5c5f3c53 100644 --- a/testsuite/smoke-test/src/randomness/disable_feature_1.rs +++ b/testsuite/smoke-test/src/randomness/disable_feature_1.rs @@ -21,7 +21,7 @@ use std::{sync::Arc, time::Duration}; async fn disable_feature_1() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs b/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs index 2f02146368a4e..4ce4dcae4089e 100644 --- a/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs +++ b/testsuite/smoke-test/src/randomness/dkg_with_validator_join_leave.rs @@ -16,7 +16,7 @@ async fn dkg_with_validator_join_leave() { let estimated_dkg_latency_secs = 80; let time_limit_secs = epoch_duration_secs + estimated_dkg_latency_secs; - let mut swarm = SwarmBuilder::new_local(7) + let swarm = SwarmBuilder::new_local(7) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs b/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs index 585a46fcd65cf..44154c16e69d6 100644 --- a/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs +++ b/testsuite/smoke-test/src/randomness/e2e_basic_consumption.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, str::FromStr, sync::Arc, time::Duration}; async fn e2e_basic_consumption() { let epoch_duration_secs = 20; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_0.rs b/testsuite/smoke-test/src/randomness/enable_feature_0.rs index 9aac2afb4af29..1f7e47d1d1445 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_0.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_0.rs @@ -22,7 +22,7 @@ async fn enable_feature_0() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_1.rs b/testsuite/smoke-test/src/randomness/enable_feature_1.rs index f622313643812..2288f1f16b57d 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_1.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_1.rs @@ -22,7 +22,7 @@ async fn enable_feature_1() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/enable_feature_2.rs b/testsuite/smoke-test/src/randomness/enable_feature_2.rs index e7fef83c420fa..3f008e9fe9c78 100644 --- a/testsuite/smoke-test/src/randomness/enable_feature_2.rs +++ b/testsuite/smoke-test/src/randomness/enable_feature_2.rs @@ -17,7 +17,7 @@ async fn enable_feature_2() { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 40; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(4) .with_num_fullnodes(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { diff --git a/testsuite/smoke-test/src/randomness/entry_func_attrs.rs b/testsuite/smoke-test/src/randomness/entry_func_attrs.rs index 49c931dd3df3b..72a8df885ca46 100644 --- a/testsuite/smoke-test/src/randomness/entry_func_attrs.rs +++ b/testsuite/smoke-test/src/randomness/entry_func_attrs.rs @@ -271,7 +271,7 @@ async fn common(params: TestParams) { let epoch_duration_secs = 20; let estimated_dkg_latency_secs = 30; - let (mut swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) + let (swarm, mut cli, _faucet) = SwarmBuilder::new_local(1) .with_aptos() .with_init_genesis_config(Arc::new(move |conf| { conf.epoch_duration_secs = epoch_duration_secs; diff --git a/testsuite/smoke-test/src/rest_api.rs b/testsuite/smoke-test/src/rest_api.rs index 97a6f8c2b0bfb..46cc8eb2205a5 100644 --- a/testsuite/smoke-test/src/rest_api.rs +++ b/testsuite/smoke-test/src/rest_api.rs @@ -30,7 +30,7 @@ use std::{convert::TryFrom, str::FromStr, sync::Arc, time::Duration}; #[tokio::test] async fn test_get_index() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); let resp = reqwest::get(info.url().to_owned()).await.unwrap(); @@ -39,7 +39,7 @@ async fn test_get_index() { #[tokio::test] async fn test_basic_client() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); info.client().get_ledger_information().await.unwrap(); @@ -244,7 +244,7 @@ async fn test_gas_estimation_gas_used_limit() { #[tokio::test] async fn test_bcs() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); // Create accounts @@ -546,7 +546,7 @@ async fn test_bcs() { #[tokio::test] async fn test_view_function() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let info = swarm.aptos_public_info(); let client: &Client = info.client(); diff --git a/testsuite/smoke-test/src/rosetta.rs b/testsuite/smoke-test/src/rosetta.rs index bfdc0344ef579..b80a567cffec0 100644 --- a/testsuite/smoke-test/src/rosetta.rs +++ b/testsuite/smoke-test/src/rosetta.rs @@ -242,7 +242,7 @@ async fn test_network() { #[tokio::test] async fn test_account_balance() { - let (mut swarm, cli, _faucet, rosetta_client) = setup_simple_test(3).await; + let (swarm, cli, _faucet, rosetta_client) = setup_simple_test(3).await; let account_1 = cli.account_id(0); let account_2 = cli.account_id(1); @@ -587,7 +587,7 @@ async fn wait_for_rosetta_block(node_clients: &NodeClients<'_>, block_height: u6 #[tokio::test] async fn test_transfer() { - let (mut swarm, cli, _faucet, rosetta_client) = setup_simple_test(1).await; + let (swarm, cli, _faucet, rosetta_client) = setup_simple_test(1).await; let chain_id = swarm.chain_id(); let client = swarm.aptos_public_info().client().clone(); let sender = cli.account_id(0); @@ -2494,7 +2494,7 @@ async fn withdraw_undelegated_stake_and_wait( async fn test_delegation_pool_operations() { const NUM_TXNS_PER_PAGE: u16 = 2; - let (mut swarm, cli, _, rosetta_client) = setup_test( + let (swarm, cli, _, rosetta_client) = setup_test( 2, Arc::new(|_, config, _| config.api.max_transactions_page_size = NUM_TXNS_PER_PAGE), ) diff --git a/testsuite/smoke-test/src/state_sync_utils.rs b/testsuite/smoke-test/src/state_sync_utils.rs index c5ffc487ed30d..0558d169629b0 100644 --- a/testsuite/smoke-test/src/state_sync_utils.rs +++ b/testsuite/smoke-test/src/state_sync_utils.rs @@ -31,7 +31,7 @@ pub async fn create_fullnode(full_node_config: NodeConfig, swarm: &mut LocalSwar validator_peer_id, ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await @@ -62,7 +62,7 @@ pub async fn stop_fullnode_and_delete_storage( fullnode: AccountAddress, clear_storage: bool, ) { - let fullnode = swarm.full_node_mut(fullnode).unwrap(); + let fullnode = swarm.full_node(fullnode).unwrap(); if clear_storage { // The fullnode is implicitly stopped during the clear_storage() call fullnode.clear_storage().await.unwrap(); diff --git a/testsuite/smoke-test/src/test_smoke_tests.rs b/testsuite/smoke-test/src/test_smoke_tests.rs index 45ffd8f0b0ad4..16c28215369ec 100644 --- a/testsuite/smoke-test/src/test_smoke_tests.rs +++ b/testsuite/smoke-test/src/test_smoke_tests.rs @@ -40,7 +40,7 @@ async fn test_aptos_node_after_get_bin() { ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/smoke-test/src/transaction.rs b/testsuite/smoke-test/src/transaction.rs index c58e36400dc65..1e9d47b85ba77 100644 --- a/testsuite/smoke-test/src/transaction.rs +++ b/testsuite/smoke-test/src/transaction.rs @@ -19,7 +19,7 @@ use aptos_sdk::{ #[ignore] #[tokio::test] async fn test_external_transaction_signer() { - let mut swarm = new_local_swarm_with_aptos(1).await; + let swarm = new_local_swarm_with_aptos(1).await; let mut info = swarm.aptos_public_info(); // generate key pair diff --git a/testsuite/smoke-test/src/txn_broadcast.rs b/testsuite/smoke-test/src/txn_broadcast.rs index a041e906eaf2d..018a939bcf1a7 100644 --- a/testsuite/smoke-test/src/txn_broadcast.rs +++ b/testsuite/smoke-test/src/txn_broadcast.rs @@ -39,7 +39,7 @@ async fn test_txn_broadcast() { ) .unwrap(); - for fullnode in swarm.full_nodes_mut() { + for fullnode in swarm.full_nodes() { fullnode .wait_until_healthy(Instant::now() + Duration::from_secs(MAX_HEALTHY_WAIT_SECS)) .await diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 3a3753648ec16..473ae67719338 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -150,15 +150,21 @@ fn upgrade_and_gather_stats( let ctx = ctx_locker.deref_mut(); let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); - let (emitter, emit_job_request) = - match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { - Ok(parts) => parts, - Err(err) => { - stats_result = Err(err); - return; - }, - }; - let source_account = ctx.swarm().chain_info().root_account; + let (emitter, emit_job_request) = match create_emitter_and_request( + ctx.swarm.clone(), + emit_job_request, + nodes, + rng, + ) + .await + { + Ok(parts) => parts, + Err(err) => { + stats_result = Err(err); + return; + }, + }; + let source_account = ctx.swarm.read().await.chain_info().root_account; (emitter, emit_job_request, source_account) // release lock on network context }; @@ -207,7 +213,15 @@ impl NetworkTest for SimpleValidatorUpgrade { // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctxa.ctx.lock().await.swarm().versions().collect::>(); + let mut versions = ctxa + .ctx + .lock() + .await + .swarm + .read() + .await + .versions() + .collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -224,14 +238,26 @@ impl NetworkTest for SimpleValidatorUpgrade { ctxa.report_text(msg).await; // Split the swarm into 2 parts - if ctxa.ctx.lock().await.swarm().validators().count() < 4 { + if ctxa + .ctx + .lock() + .await + .swarm + .read() + .await + .validators() + .count() + < 4 + { bail!("compat test requires >= 4 validators"); } let all_validators = ctxa .ctx .lock() .await - .swarm() + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); @@ -331,7 +357,7 @@ impl NetworkTest for SimpleValidatorUpgrade { &txn_stat_half, ); - ctx.swarm().fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration)?; // Update the second batch let msg = format!("4. upgrading second batch to new version: {}", new_version); @@ -369,7 +395,7 @@ impl NetworkTest for SimpleValidatorUpgrade { let msg = "5. check swarm health".to_string(); info!("{}", msg); ctx.report.report_text(msg); - ctx.swarm().fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration)?; ctx.report.report_text(format!( "Compatibility test for {} ==> {} passed", old_version, new_version diff --git a/testsuite/testcases/src/consensus_reliability_tests.rs b/testsuite/testcases/src/consensus_reliability_tests.rs index 9397ce353609e..909e98e7c3826 100644 --- a/testsuite/testcases/src/consensus_reliability_tests.rs +++ b/testsuite/testcases/src/consensus_reliability_tests.rs @@ -13,8 +13,7 @@ use aptos_forge::{ use aptos_logger::{info, warn}; use async_trait::async_trait; use rand::Rng; -use std::{collections::HashSet, time::Duration}; -use tokio::runtime::Runtime; +use std::{collections::HashSet, sync::Arc, time::Duration}; pub struct ChangingWorkingQuorumTest { pub min_tps: usize, @@ -39,16 +38,20 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // because we are doing failure testing, we should be sending // traffic to nodes that are alive. - if ctx.swarm().full_nodes().count() > 0 { + let full_nodes_count = { ctx.swarm.read().await.full_nodes().count() }; + if full_nodes_count > 0 { Ok(LoadDestination::AllFullnodes) } else if self.always_healthy_nodes > 0 { - Ok(LoadDestination::Peers( - ctx.swarm() + let validator_peer_ids = { + ctx.swarm + .read() + .await .validators() .take(self.always_healthy_nodes) .map(|v| v.peer_id()) - .collect(), - )) + .collect() + }; + Ok(LoadDestination::Peers(validator_peer_ids)) } else { Ok(LoadDestination::AllValidators) } @@ -56,13 +59,11 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { - let runtime = Runtime::new().unwrap(); - - let validators = swarm.get_validator_clients_with_names(); + let validators = { swarm.read().await.get_validator_clients_with_names() }; let num_validators = validators.len(); @@ -77,12 +78,22 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { ); // On every cycle, we will fail this many next nodes, and make this many previous nodes healthy again. let cycle_offset = max_fail_in_test / 4 + 1; - let num_destinations = if swarm.full_nodes().count() > 0 { - swarm.full_nodes().count() - } else if num_always_healthy > 0 { - num_always_healthy - } else { - swarm.validators().count() + let num_destinations = { + let swarm = swarm.read().await; + if swarm.full_nodes().count() > 0 { + swarm.full_nodes().count() + } else if num_always_healthy > 0 { + num_always_healthy + } else { + swarm.validators().count() + } + }; + let (validator_clients, public_info) = { + let swarm = swarm.read().await; + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) }; // Function that returns set of down nodes in a given cycle. let down_indices_f = move |cycle: usize| -> HashSet { @@ -110,34 +121,33 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { num_always_healthy, max_fail_in_test, num_validators, cycle_offset, self.num_large_validators); let slow_allowed_lagging = if self.add_execution_delay { - runtime.block_on(async { - let mut rng = rand::thread_rng(); - let mut slow_allowed_lagging = HashSet::new(); - for (index, (name, validator)) in - validators.iter().enumerate().skip(num_always_healthy) - { - let sleep_time = rng.gen_range(20, 500); - if sleep_time > 100 { - slow_allowed_lagging.insert(index); - } - let name = name.clone(); + let mut slow_allowed_lagging = HashSet::new(); + for (index, (name, validator)) in validators.iter().enumerate().skip(num_always_healthy) + { + let sleep_time = { + let mut rng = rand::thread_rng(); + rng.gen_range(20, 500) + }; + if sleep_time > 100 { + slow_allowed_lagging.insert(index); + } + let name = name.clone(); - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - format!("sleep({})", sleep_time), + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + format!("sleep({})", sleep_time), + ) + .await + .map_err(|e| { + anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::, anyhow::Error>(slow_allowed_lagging) - })? + })?; + } + slow_allowed_lagging } else { HashSet::new() }; @@ -145,32 +155,38 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { let min_tps = self.min_tps; let check_period_s = self.check_period_s; - runtime.block_on(test_consensus_fault_tolerance( - swarm, - duration.as_secs() as usize / self.check_period_s, - self.check_period_s as f32, - 1, - Box::new(FailPointFailureInjection::new(Box::new(move |cycle, part| { + let failure_injection = Box::new(FailPointFailureInjection::new(Box::new( + move |cycle, part| { if part == 0 { let down_indices = down_indices_f(cycle); info!("For cycle {} down nodes: {:?}", cycle, down_indices); // For all down nodes, we are going to drop all messages we receive. ( - down_indices.iter().flat_map(|i| { - [ - ( + down_indices + .iter() + .flat_map(|i| { + [( *i, "consensus::process::any".to_string(), "return".to_string(), - ), - ] - }).collect(), + )] + }) + .collect(), true, ) } else { (vec![], false) } - }))), + }, + ))); + + test_consensus_fault_tolerance( + validator_clients, + public_info, + duration.as_secs() as usize / self.check_period_s, + self.check_period_s as f32, + 1, + failure_injection, Box::new(move |cycle, _, _, _, cycle_end, cycle_start| { // we group nodes into 3 groups: // - active - nodes we expect to be making progress, and doing so together. we check wery strict rule of min(cycle_end) vs max(cycle_start) @@ -269,30 +285,27 @@ impl NetworkLoadTest for ChangingWorkingQuorumTest { }), false, true, - )).context("test_consensus_fault_tolerance failed")?; + ).await.context("test_consensus_fault_tolerance failed")?; // undo slowing down. if self.add_execution_delay { - runtime.block_on(async { - for (name, validator) in validators.iter().skip(num_always_healthy) { - let name = name.clone(); + for (name, validator) in validators.iter().skip(num_always_healthy) { + let name = name.clone(); - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - "off".to_string(), + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + "off".to_string(), + ) + .await + .map_err(|e| { + anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok::<(), anyhow::Error>(()) - })?; + })?; + } } Ok(()) } diff --git a/testsuite/testcases/src/dag_onchain_enable_test.rs b/testsuite/testcases/src/dag_onchain_enable_test.rs index 6a4ad2bc16069..d3b2e0f73e11c 100644 --- a/testsuite/testcases/src/dag_onchain_enable_test.rs +++ b/testsuite/testcases/src/dag_onchain_enable_test.rs @@ -14,7 +14,7 @@ use aptos_types::{ }, }; use async_trait::async_trait; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -30,14 +30,18 @@ impl Test for DagOnChainEnableTest { impl NetworkLoadTest for DagOnChainEnableTest { async fn test( &self, - swarm: &mut dyn aptos_forge::Swarm, + swarm: Arc>>, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); - let rest_client = swarm.validators().next().unwrap().rest_client(); - - let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; let mut cli = CliTestFramework::new( rest_api_endpoint, faucet_endpoint, @@ -48,7 +52,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { tokio::time::sleep(duration / 3).await; let root_cli_index = { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -99,7 +103,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { tokio::time::sleep(duration / 3).await; let root_cli_index = { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -152,7 +156,7 @@ impl NetworkLoadTest for DagOnChainEnableTest { tokio::time::sleep(duration / 3).await; let root_cli_index = { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -198,6 +202,8 @@ impl NetworkLoadTest for DagOnChainEnableTest { // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); swarm + .read() + .await .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) .await?; diff --git a/testsuite/testcases/src/forge_setup_test.rs b/testsuite/testcases/src/forge_setup_test.rs index 9044164a08cea..aba938f235c13 100644 --- a/testsuite/testcases/src/forge_setup_test.rs +++ b/testsuite/testcases/src/forge_setup_test.rs @@ -13,7 +13,6 @@ use rand::{ Rng, SeedableRng, }; use std::{ops::DerefMut, thread, time::Duration}; -use tokio::runtime::Runtime; const STATE_SYNC_VERSION_COUNTER_NAME: &str = "aptos_state_sync_version"; @@ -29,53 +28,57 @@ impl Test for ForgeSetupTest { impl NetworkTest for ForgeSetupTest { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut rng = StdRng::from_seed(OsRng.gen()); - let runtime = Runtime::new().unwrap(); let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let swarm = ctx.swarm(); + // TODO: decrease lock shadow on swarm for this test + { + let swarm = ctx.swarm.read().await; - let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); - let fullnode_id = all_fullnodes.iter().choose(&mut rng).unwrap(); + let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let fullnode_id = all_fullnodes.iter().choose(&mut rng).unwrap(); - info!("Pick one fullnode to stop and wipe"); - let fullnode = swarm.full_node(*fullnode_id).unwrap(); - runtime.block_on(fullnode.clear_storage())?; - runtime.block_on(fullnode.start())?; + info!("Pick one fullnode to stop and wipe"); + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.clear_storage().await?; + fullnode.start().await?; - let fullnode = swarm.full_node(*fullnode_id).unwrap(); - let fullnode_name = fullnode.name(); + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + let fullnode_name = fullnode.name(); - for _ in 0..10 { - let query = format!( - "{}{{instance=\"{}\",type=\"synced\"}}", - STATE_SYNC_VERSION_COUNTER_NAME, &fullnode_name - ); - info!("PromQL Query {}", query); - let r = runtime.block_on(swarm.query_metrics(&query, None, None))?; - let ivs = r.as_instant().unwrap(); - for iv in ivs { - info!( - "{}: {}", - STATE_SYNC_VERSION_COUNTER_NAME, - iv.sample().value() + for _ in 0..10 { + let query = format!( + "{}{{instance=\"{}\",type=\"synced\"}}", + STATE_SYNC_VERSION_COUNTER_NAME, &fullnode_name ); + info!("PromQL Query {}", query); + let r = swarm.query_metrics(&query, None, None).await?; + let ivs = r.as_instant().unwrap(); + for iv in ivs { + info!( + "{}: {}", + STATE_SYNC_VERSION_COUNTER_NAME, + iv.sample().value() + ); + } + thread::sleep(std::time::Duration::from_secs(5)); } - thread::sleep(std::time::Duration::from_secs(5)); } // add some PFNs and send load to them let mut pfns = Vec::new(); let num_pfns = 5; - for _ in 0..num_pfns { - let pfn_version = swarm.versions().max().unwrap(); - let pfn_node_config = - OverrideNodeConfig::new_with_default_base(swarm.get_default_pfn_node_config()); - let pfn_peer_id = - runtime.block_on(swarm.add_full_node(&pfn_version, pfn_node_config))?; + { + let mut swarm = ctx.swarm.write().await; + for _ in 0..num_pfns { + let pfn_version = swarm.versions().max().unwrap(); + let pfn_node_config = + OverrideNodeConfig::new_with_default_base(swarm.get_default_pfn_node_config()); + let pfn_peer_id = swarm.add_full_node(&pfn_version, pfn_node_config).await?; - let _pfn = swarm.full_node(pfn_peer_id).context("pfn not found")?; - pfns.push(pfn_peer_id); + let _pfn = swarm.full_node(pfn_peer_id).context("pfn not found")?; + pfns.push(pfn_peer_id); + } } let duration = Duration::from_secs(10 * num_pfns); diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index 1904e36fead12..f594ac6113358 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -14,7 +14,7 @@ use aptos_temppath::TempPath; use aptos_types::transaction::authenticator::AuthenticationKey; use async_trait::async_trait; use std::ops::DerefMut; -use tokio::{runtime::Runtime, time::Duration}; +use tokio::time::Duration; pub struct FrameworkUpgrade; @@ -33,13 +33,12 @@ impl NetworkTest for FrameworkUpgrade { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let runtime = Runtime::new()?; let epoch_duration = Duration::from_secs(Self::EPOCH_DURATION_SECS); // Get the different versions we're testing with let (old_version, new_version) = { - let mut versions = ctx.swarm().versions().collect::>(); + let mut versions = ctx.swarm.read().await.versions().collect::>(); versions.sort(); if versions.len() != 2 { bail!("exactly two different versions needed to run compat test"); @@ -48,11 +47,14 @@ impl NetworkTest for FrameworkUpgrade { (versions[0].clone(), versions[1].clone()) }; - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; let msg = format!( "Compatibility test results for {} ==> {} (PR)", @@ -66,7 +68,7 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, first_half, &new_version))?; + batch_update(ctx, first_half, &new_version).await?; // Generate some traffic let duration = Duration::from_secs(30); @@ -76,7 +78,9 @@ impl NetworkTest for FrameworkUpgrade { &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration)?; + } // Apply the framework release bundle. let root_key_path = TempPath::new(); @@ -104,39 +108,43 @@ impl NetworkTest for FrameworkUpgrade { AuthenticationKey::ed25519(&validator_key.public_key()).account_address(); let network_info = aptos_release_builder::validate::NetworkConfig { - endpoint: ctx.swarm().validators().last().unwrap().rest_api_endpoint(), + endpoint: ctx + .swarm + .read() + .await + .validators() + .last() + .unwrap() + .rest_api_endpoint(), root_key_path: root_key_path.path().to_path_buf(), validator_account, validator_key, framework_git_rev: None, }; - runtime.block_on(network_info.mint_to_validator())?; + network_info.mint_to_validator().await?; let release_config = aptos_release_builder::current_release_config(); - runtime.block_on(aptos_release_builder::validate::validate_config( - release_config.clone(), - network_info, - ))?; + aptos_release_builder::validate::validate_config(release_config.clone(), network_info) + .await?; // Update the sequence number for the root account - let root_account = ctx.swarm().chain_info().root_account().address(); + let root_account = { ctx.swarm.read().await.chain_info().root_account().address() }; // Test the module publishing workflow - let sequence_number = runtime - .block_on( - ctx.swarm() - .chain_info() - .rest_client() - .get_account(root_account), - ) - .unwrap() - .inner() - .sequence_number; - ctx.swarm() - .chain_info() - .root_account() - .set_sequence_number(sequence_number); + { + let chain_info = ctx.swarm.read().await.chain_info(); + let sequence_number = chain_info + .rest_client() + .get_account(root_account) + .await + .unwrap() + .inner() + .sequence_number; + chain_info + .root_account() + .set_sequence_number(sequence_number); + } // Generate some traffic let duration = Duration::from_secs(30); @@ -146,12 +154,16 @@ impl NetworkTest for FrameworkUpgrade { &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration)?; + } let msg = "5. check swarm health".to_string(); info!("{}", msg); ctx.report.report_text(msg); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration)?; + } ctx.report.report_text(format!( "Compatibility test for {} ==> {} passed", old_version, new_version @@ -162,7 +174,7 @@ impl NetworkTest for FrameworkUpgrade { let msg = format!("Upgrade the remaining nodes to version: {}", new_version); info!("{}", msg); ctx.report.report_text(msg); - runtime.block_on(batch_update(ctx, second_half, &new_version))?; + batch_update(ctx, second_half, &new_version).await?; let duration = Duration::from_secs(30); let txn_stat = generate_traffic(ctx, &all_validators, duration).await?; @@ -171,7 +183,9 @@ impl NetworkTest for FrameworkUpgrade { &txn_stat, ); - ctx.swarm().fork_check(epoch_duration)?; + { + ctx.swarm.read().await.fork_check(epoch_duration)?; + } Ok(()) } diff --git a/testsuite/testcases/src/fullnode_reboot_stress_test.rs b/testsuite/testcases/src/fullnode_reboot_stress_test.rs index cd1f9f74f1dbd..8fdd177b5af9b 100644 --- a/testsuite/testcases/src/fullnode_reboot_stress_test.rs +++ b/testsuite/testcases/src/fullnode_reboot_stress_test.rs @@ -7,7 +7,7 @@ use aptos_forge::{ }; use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use tokio::time::Instant; pub struct FullNodeRebootStressTest; @@ -26,23 +26,33 @@ impl NetworkLoadTest for FullNodeRebootStressTest { async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let all_fullnodes = { + swarm + .read() + .await + .full_nodes() + .map(|v| v.peer_id()) + .collect::>() + }; while start.elapsed() < duration { - let fullnode_to_reboot = { - let mut rng = thread_rng(); - swarm - .full_node(*all_fullnodes.choose(&mut rng).unwrap()) - .unwrap() - }; - fullnode_to_reboot.stop().await?; - fullnode_to_reboot.start().await?; + { + let swarm = swarm.read().await; + let fullnode_to_reboot = { + let mut rng = thread_rng(); + swarm + .full_node(*all_fullnodes.choose(&mut rng).unwrap()) + .unwrap() + }; + fullnode_to_reboot.stop().await?; + fullnode_to_reboot.start().await?; + } tokio::time::sleep(Duration::from_secs(10)).await; } diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 161d48d9684b1..6b471fec068a3 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -41,6 +41,7 @@ use rand::{rngs::StdRng, SeedableRng}; use std::{ fmt::Write, ops::DerefMut, + sync::Arc, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use tokio::runtime::{Handle, Runtime}; @@ -54,13 +55,19 @@ async fn batch_update( version: &Version, ) -> Result<()> { for validator in validators_to_update { - ctx.swarm().upgrade_validator(*validator, version).await?; + ctx.swarm + .write() + .await + .upgrade_validator(*validator, version) + .await?; } - ctx.swarm().health_check().await?; + ctx.swarm.read().await.health_check().await?; let deadline = Instant::now() + Duration::from_secs(60); for validator in validators_to_update { - ctx.swarm() + ctx.swarm + .read() + .await .validator(*validator) .unwrap() .wait_until_healthy(deadline) @@ -81,19 +88,25 @@ async fn batch_update_gradually( // let mut swarm = ctx.swarm(); for validator in validators_to_update { info!("batch_update_gradually upgrade start: {}", validator); - ctxa.ctx - .lock() - .await - .swarm() - .upgrade_validator(*validator, version) - .await?; + { + ctxa.ctx + .lock() + .await + .swarm + .write() + .await + .upgrade_validator(*validator, version) + .await?; + } if wait_until_healthy { info!("batch_update_gradually upgrade waiting: {}", validator); let deadline = Instant::now() + max_wait; ctxa.ctx .lock() .await - .swarm() + .swarm + .read() + .await .validator(*validator) .unwrap() .wait_until_healthy(deadline) @@ -107,13 +120,20 @@ async fn batch_update_gradually( info!("batch_update_gradually upgrade done: {}", validator); } - ctxa.ctx.lock().await.swarm().health_check().await?; + ctxa.ctx + .lock() + .await + .swarm + .read() + .await + .health_check() + .await?; Ok(()) } -pub fn create_emitter_and_request( - swarm: &mut dyn Swarm, +pub async fn create_emitter_and_request( + swarm: Arc>>, mut emit_job_request: EmitJobRequest, nodes: &[PeerId], rng: StdRng, @@ -121,12 +141,16 @@ pub fn create_emitter_and_request( // as we are loading nodes, use higher client timeout let client_timeout = Duration::from_secs(30); - let chain_info = swarm.chain_info(); + let chain_info = swarm.read().await.chain_info(); let transaction_factory = TransactionFactory::new(chain_info.chain_id); let emitter = TxnEmitter::new(transaction_factory, rng); - emit_job_request = - emit_job_request.rest_clients(swarm.get_clients_for_peers(nodes, client_timeout)); + emit_job_request = emit_job_request.rest_clients( + swarm + .read() + .await + .get_clients_for_peers(nodes, client_timeout), + ); Ok((emitter, emit_job_request)) } @@ -143,11 +167,11 @@ pub async fn generate_traffic( let emit_job_request = ctx.emit_job.clone(); let rng = SeedableRng::from_rng(ctx.core().rng())?; let (emitter, emit_job_request) = - create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; + create_emitter_and_request(ctx.swarm.clone(), emit_job_request, nodes, rng).await?; let stats = emitter .emit_txn_for( - ctx.swarm().chain_info().root_account, + ctx.swarm.read().await.chain_info().root_account, emit_job_request, duration, ) @@ -190,7 +214,11 @@ pub enum LoadDestination { } impl LoadDestination { - fn get_destination_nodes(self, swarm: &mut dyn Swarm) -> Vec { + async fn get_destination_nodes( + self, + swarm: Arc>>, + ) -> Vec { + let swarm = swarm.read().await; let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); let all_fullnodes = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); @@ -221,7 +249,7 @@ pub trait NetworkLoadTest: Test { // time to finish. How long this function takes will dictate how long the actual test lasts. async fn test( &self, - _swarm: &mut dyn Swarm, + _swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { @@ -244,7 +272,9 @@ impl NetworkTest for dyn NetworkLoadTest { .expect("Time went backwards") .as_secs(); let (start_version, _) = ctx - .swarm() + .swarm + .read() + .await .get_client_with_newest_ledger_version() .await .context("no clients replied for start version")?; @@ -296,7 +326,9 @@ impl NetworkTest for dyn NetworkLoadTest { .expect("Time went backwards") .as_secs(); let (end_version, _) = ctx - .swarm() + .swarm + .read() + .await .get_client_with_newest_ledger_version() .await .context("no clients replied for end version")?; @@ -332,16 +364,23 @@ impl dyn NetworkLoadTest + '_ { rng: StdRng, ) -> Result> { let destination = self.setup(ctx).await.context("setup NetworkLoadTest")?; - let nodes_to_send_load_to = destination.get_destination_nodes(ctx.swarm()); + let nodes_to_send_load_to = destination.get_destination_nodes(ctx.swarm.clone()).await; // Generate some traffic - let (mut emitter, emit_job_request) = - create_emitter_and_request(ctx.swarm(), emit_job_request, &nodes_to_send_load_to, rng) - .context("create emitter")?; + let (mut emitter, emit_job_request) = create_emitter_and_request( + ctx.swarm.clone(), + emit_job_request, + &nodes_to_send_load_to, + rng, + ) + .await + .context("create emitter")?; let clients = ctx - .swarm() + .swarm + .read() + .await .get_clients_for_peers(&nodes_to_send_load_to, Duration::from_secs(10)); let mut stats_tracking_phases = emit_job_request.get_num_phases(); @@ -353,7 +392,7 @@ impl dyn NetworkLoadTest + '_ { info!("Starting emitting txns for {}s", duration.as_secs()); let mut job = emitter .start_job( - ctx.swarm().chain_info().root_account, + ctx.swarm.read().await.chain_info().root_account, emit_job_request, stats_tracking_phases, ) @@ -387,7 +426,7 @@ impl dyn NetworkLoadTest + '_ { let phase_start = PhaseTimingStart::now(); let join_stats = Handle::current().spawn(job.periodic_stat_forward(phase_duration, 60)); - self.test(ctx.swarm, ctx.report, phase_duration) + self.test(ctx.swarm.clone(), ctx.report, phase_duration) .await .context("test NetworkLoadTest")?; job = join_stats.await.context("join stats")?; @@ -436,7 +475,7 @@ impl dyn NetworkLoadTest + '_ { Some(cur.clone()) }; let latency_breakdown = fetch_latency_breakdown( - ctx.swarm(), + ctx.swarm.clone(), phase_timing[i].start_unixtime_s, phase_timing[i].end_unixtime_s, ) diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 72c60ff0780c5..64123c184453f 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -14,7 +14,6 @@ use aptos_logger::info; use async_trait::async_trait; use rand::SeedableRng; use std::{fmt::Debug, ops::DerefMut, time::Duration}; -use tokio::runtime::Runtime; // add larger warmup, as when we are exceeding the max load, // it takes more time to fill mempool. @@ -230,26 +229,28 @@ impl NetworkTest for LoadVsPerfBenchmark { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let rt = Runtime::new().unwrap(); let mut continous_job = if let Some(continuous_traffic) = &self.continuous_traffic { - let nodes_to_send_load_to = - LoadDestination::FullnodesOtherwiseValidators.get_destination_nodes(ctx.swarm()); + let nodes_to_send_load_to = LoadDestination::FullnodesOtherwiseValidators + .get_destination_nodes(ctx.swarm.clone()) + .await; let rng = SeedableRng::from_rng(ctx.core().rng())?; let (mut emitter, emit_job_request) = create_emitter_and_request( - ctx.swarm(), + ctx.swarm.clone(), continuous_traffic.traffic.clone(), &nodes_to_send_load_to, rng, ) + .await .context("create emitter")?; - let job = rt - .block_on(emitter.start_job( - ctx.swarm().chain_info().root_account, + let job = emitter + .start_job( + ctx.swarm.read().await.chain_info().root_account, emit_job_request, 1 + 2 * self.workloads.len(), - )) + ) + .await .context("start emitter job")?; Some(job) } else { @@ -301,26 +302,29 @@ impl NetworkTest for LoadVsPerfBenchmark { ctx.report.report_text(line); } - let continuous_results = continous_job.map(|job| { - let stats_by_phase = rt.block_on(job.stop_job()); - - let mut result = vec![]; - for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { - if phase % 2 != 0 { - result.push(( - format!("continuous with traffic {}", phase / 2), - phase_stats, - )); + let continuous_results = match continous_job { + Some(job) => { + let stats_by_phase = job.stop_job().await; + + let mut result = vec![]; + for (phase, phase_stats) in stats_by_phase.into_iter().enumerate() { + if phase % 2 != 0 { + result.push(( + format!("continuous with traffic {}", phase / 2), + phase_stats, + )); + } } - } - let table = to_table_continuous("continuous traffic".to_string(), &result); - for line in table { - ctx.report.report_text(line); - } + let table = to_table_continuous("continuous traffic".to_string(), &result); + for line in table { + ctx.report.report_text(line); + } - result - }); + Some(result) + }, + None => None, + }; for (index, result) in results.iter().enumerate() { // always take last phase for success criteria diff --git a/testsuite/testcases/src/modifiers.rs b/testsuite/testcases/src/modifiers.rs index 6eac71da34b5d..a881780b17d2f 100644 --- a/testsuite/testcases/src/modifiers.rs +++ b/testsuite/testcases/src/modifiers.rs @@ -10,67 +10,68 @@ use aptos_logger::info; use aptos_types::PeerId; use async_trait::async_trait; use rand::Rng; -use tokio::runtime::Runtime; - -fn add_execution_delay(swarm: &mut dyn Swarm, config: &ExecutionDelayConfig) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let validators = swarm.get_validator_clients_with_names(); - - runtime.block_on(async { - let mut rng = rand::thread_rng(); - for (name, validator) in validators { - let sleep_percentage = if rng.gen_bool(config.inject_delay_node_fraction) { +use std::sync::Arc; + +async fn add_execution_delay( + swarm: Arc>>, + config: &ExecutionDelayConfig, +) -> anyhow::Result<()> { + let validators = { swarm.read().await.get_validator_clients_with_names() }; + + for (name, validator) in validators { + let sleep_percentage = { + let mut rng = rand::thread_rng(); + if rng.gen_bool(config.inject_delay_node_fraction) { rng.gen_range(1_u32, config.inject_delay_max_transaction_percentage) } else { 0 - }; - info!( - "Validator {} adding {}% of transactions with {}ms execution delay", - name, sleep_percentage, config.inject_delay_per_transaction_ms - ); - validator - .set_failpoint( - "aptos_vm::execution::user_transaction".to_string(), - format!( - "{}%delay({})", - sleep_percentage, config.inject_delay_per_transaction_ms - ), + } + }; + info!( + "Validator {} adding {}% of transactions with {}ms execution delay", + name, sleep_percentage, config.inject_delay_per_transaction_ms + ); + validator + .set_failpoint( + "aptos_vm::execution::user_transaction".to_string(), + format!( + "{}%delay({})", + sleep_percentage, config.inject_delay_per_transaction_ms + ), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to add execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to add execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + })?; + } + Ok(()) } -fn remove_execution_delay(swarm: &mut dyn Swarm) -> anyhow::Result<()> { - let runtime = Runtime::new().unwrap(); - let validators = swarm.get_validator_clients_with_names(); - - runtime.block_on(async { - for (name, validator) in validators { - validator - .set_failpoint( - "aptos_vm::execution::block_metadata".to_string(), - "off".to_string(), +async fn remove_execution_delay( + swarm: Arc>>, +) -> anyhow::Result<()> { + let validators = { swarm.read().await.get_validator_clients_with_names() }; + + for (name, validator) in validators { + validator + .set_failpoint( + "aptos_vm::execution::block_metadata".to_string(), + "off".to_string(), + ) + .await + .map_err(|e| { + anyhow::anyhow!( + "set_failpoint to remove execution delay on {} failed, {:?}", + name, + e ) - .await - .map_err(|e| { - anyhow::anyhow!( - "set_failpoint to remove execution delay on {} failed, {:?}", - name, - e - ) - })?; - } - Ok(()) - }) + })?; + } + Ok(()) } /// Config for adding variable processing overhead/delay into @@ -95,12 +96,12 @@ pub struct ExecutionDelayTest { #[async_trait] impl NetworkLoadTest for ExecutionDelayTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { - add_execution_delay(ctx.swarm(), &self.add_execution_delay)?; + add_execution_delay(ctx.swarm.clone(), &self.add_execution_delay).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { - remove_execution_delay(ctx.swarm()) + remove_execution_delay(ctx.swarm.clone()).await } } @@ -129,8 +130,7 @@ pub struct NetworkUnreliabilityTest { #[async_trait] impl NetworkLoadTest for NetworkUnreliabilityTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { - let swarm = ctx.swarm(); - let validators = swarm.get_validator_clients_with_names(); + let validators = { ctx.swarm.read().await.get_validator_clients_with_names() }; for (name, validator) in validators { let drop_percentage = { @@ -168,7 +168,7 @@ impl NetworkLoadTest for NetworkUnreliabilityTest { } async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { - let validators = ctx.swarm().get_validator_clients_with_names(); + let validators = { ctx.swarm.read().await.get_validator_clients_with_names() }; for (name, validator) in validators { validator @@ -227,8 +227,16 @@ impl CpuChaosTest { /// Creates a new SwarmCpuStress to be injected via chaos. Note: /// CPU chaos is only done for the validators in the swarm (and /// not the fullnodes). - fn create_cpu_chaos(&self, swarm: &mut dyn Swarm) -> SwarmCpuStress { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + async fn create_cpu_chaos( + &self, + swarm: Arc>>, + ) -> SwarmCpuStress { + let all_validators = swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>(); let cpu_chaos_config = self.cpu_chaos_config.clone(); create_swarm_cpu_stress(all_validators, Some(cpu_chaos_config)) } @@ -283,9 +291,11 @@ pub fn create_swarm_cpu_stress( #[async_trait] impl NetworkLoadTest for CpuChaosTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { - let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); + let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) .await?; @@ -293,9 +303,11 @@ impl NetworkLoadTest for CpuChaosTest { } async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { - let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm()); + let swarm_cpu_stress = self.create_cpu_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::CpuStress(swarm_cpu_stress)) .await } diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 3969a72be6877..840d37699152e 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -10,7 +10,7 @@ use aptos_logger::info; use aptos_types::PeerId; use async_trait::async_trait; use itertools::{self, EitherOrBoth, Itertools}; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; /// The link stats are obtained from https://github.com/doitintl/intercloud-throughput/blob/master/results_202202/results.csv /// The four regions were hand-picked from the dataset to simulate a multi-region setup @@ -255,9 +255,16 @@ impl MultiRegionNetworkEmulationTest { /// Creates a new SwarmNetEm to be injected via chaos. Note: network /// emulation is only done for the validators in the swarm (and not /// the fullnodes). - fn create_netem_chaos(&self, swarm: &mut dyn Swarm) -> SwarmNetEm { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - let all_vfns = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + async fn create_netem_chaos( + &self, + swarm: Arc>>, + ) -> SwarmNetEm { + let (all_validators, all_vfns) = { + let swarm = swarm.read().await; + let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + let all_vfns = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (all_validators, all_vfns) + }; let all_pairs: Vec<_> = all_validators .iter() @@ -316,15 +323,23 @@ pub fn create_multi_region_swarm_network_chaos( #[async_trait] impl NetworkLoadTest for MultiRegionNetworkEmulationTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { - let chaos = self.create_netem_chaos(ctx.swarm); - ctx.swarm.inject_chaos(SwarmChaos::NetEm(chaos)).await?; + let chaos = self.create_netem_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .inject_chaos(SwarmChaos::NetEm(chaos)) + .await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { - let chaos = self.create_netem_chaos(ctx.swarm); - ctx.swarm.remove_chaos(SwarmChaos::NetEm(chaos)).await?; + let chaos = self.create_netem_chaos(ctx.swarm.clone()).await; + ctx.swarm + .write() + .await + .remove_chaos(SwarmChaos::NetEm(chaos)) + .await?; Ok(()) } } diff --git a/testsuite/testcases/src/network_bandwidth_test.rs b/testsuite/testcases/src/network_bandwidth_test.rs index d8514c630333e..30c7bf4bba4ea 100644 --- a/testsuite/testcases/src/network_bandwidth_test.rs +++ b/testsuite/testcases/src/network_bandwidth_test.rs @@ -29,6 +29,8 @@ impl Test for NetworkBandwidthTest { impl NetworkLoadTest for NetworkBandwidthTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { group_network_bandwidths: vec![GroupNetworkBandwidth { name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), @@ -51,6 +53,8 @@ impl NetworkLoadTest for NetworkBandwidthTest { async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::Bandwidth(SwarmNetworkBandwidth { group_network_bandwidths: vec![GroupNetworkBandwidth { name: format!("forge-namespace-{}mbps-bandwidth", RATE_MBPS), diff --git a/testsuite/testcases/src/network_loss_test.rs b/testsuite/testcases/src/network_loss_test.rs index c5e93a3e2d87b..925d04dfd82f2 100644 --- a/testsuite/testcases/src/network_loss_test.rs +++ b/testsuite/testcases/src/network_loss_test.rs @@ -24,6 +24,8 @@ impl Test for NetworkLossTest { impl NetworkLoadTest for NetworkLossTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, @@ -41,6 +43,8 @@ impl NetworkLoadTest for NetworkLossTest { async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::Loss(SwarmNetworkLoss { loss_percentage: LOSS_PERCENTAGE, correlation_percentage: CORRELATION_PERCENTAGE, diff --git a/testsuite/testcases/src/network_partition_test.rs b/testsuite/testcases/src/network_partition_test.rs index 00efe7a4e38e7..48f192f91c2be 100644 --- a/testsuite/testcases/src/network_partition_test.rs +++ b/testsuite/testcases/src/network_partition_test.rs @@ -24,6 +24,8 @@ impl Test for NetworkPartitionTest { impl NetworkLoadTest for NetworkPartitionTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::Partition(SwarmNetworkPartition { partition_percentage: PARTITION_PERCENTAGE, })) @@ -38,6 +40,8 @@ impl NetworkLoadTest for NetworkPartitionTest { // Just send the load to last validator which is not included in the partition Ok(LoadDestination::Peers(vec![ctx .swarm + .read() + .await .validators() .last() .map(|v| v.peer_id()) @@ -46,6 +50,8 @@ impl NetworkLoadTest for NetworkPartitionTest { async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::Partition(SwarmNetworkPartition { partition_percentage: PARTITION_PERCENTAGE, })) diff --git a/testsuite/testcases/src/partial_nodes_down_test.rs b/testsuite/testcases/src/partial_nodes_down_test.rs index 23813ce0d5e48..84cb944df3882 100644 --- a/testsuite/testcases/src/partial_nodes_down_test.rs +++ b/testsuite/testcases/src/partial_nodes_down_test.rs @@ -24,14 +24,17 @@ impl NetworkTest for PartialNodesDown { let runtime = Runtime::new()?; let duration = Duration::from_secs(120); let all_validators = ctx - .swarm() + .swarm + .read() + .await .validators() .map(|v| v.peer_id()) .collect::>(); let mut down_nodes = all_validators.clone(); let up_nodes = down_nodes.split_off(all_validators.len() / 10); for n in &down_nodes { - let node = ctx.swarm().validator(*n).unwrap(); + let swarm = ctx.swarm.read().await; + let node = swarm.validator(*n).unwrap(); println!("Node {} is going to stop", node.name()); runtime.block_on(node.stop())?; } @@ -42,7 +45,8 @@ impl NetworkTest for PartialNodesDown { ctx.report .report_txn_stats(self.name().to_string(), &txn_stat); for n in &down_nodes { - let node = ctx.swarm().validator(*n).unwrap(); + let swarm = ctx.swarm.read().await; + let node = swarm.validator(*n).unwrap(); println!("Node {} is going to restart", node.name()); runtime.block_on(node.start())?; } diff --git a/testsuite/testcases/src/public_fullnode_performance.rs b/testsuite/testcases/src/public_fullnode_performance.rs index afa3148492b43..88b3fac41b72c 100644 --- a/testsuite/testcases/src/public_fullnode_performance.rs +++ b/testsuite/testcases/src/public_fullnode_performance.rs @@ -22,8 +22,7 @@ use rand::{ seq::SliceRandom, Rng, SeedableRng, }; -use std::iter::once; -use tokio::runtime::Runtime; +use std::{iter::once, sync::Arc}; /// A simple test that adds multiple public fullnodes (PFNs) to the swarm /// and submits transactions through them. Network emulation chaos can also @@ -58,9 +57,12 @@ impl PFNPerformance { /// Creates CPU chaos for the swarm. Note: CPU chaos is added /// to all validators, VFNs and PFNs in the swarm. - fn create_cpu_chaos(&self, swarm: &mut dyn Swarm) -> SwarmCpuStress { + async fn create_cpu_chaos( + &self, + swarm: Arc>>, + ) -> SwarmCpuStress { // Gather and shuffle all peers IDs (so that we get random CPU chaos) - let shuffled_peer_ids = self.gather_and_shuffle_peer_ids(swarm); + let shuffled_peer_ids = self.gather_and_shuffle_peer_ids(swarm).await; // Create CPU chaos for the swarm create_swarm_cpu_stress(shuffled_peer_ids, None) @@ -68,19 +70,31 @@ impl PFNPerformance { /// Creates network emulation chaos for the swarm. Note: network chaos /// is added to all validators, VFNs and PFNs in the swarm. - fn create_network_emulation_chaos(&self, swarm: &mut dyn Swarm) -> SwarmNetEm { + async fn create_network_emulation_chaos( + &self, + swarm: Arc>>, + ) -> SwarmNetEm { // Gather and shuffle all peers IDs (so that we get random network emulation) - let shuffled_peer_ids = self.gather_and_shuffle_peer_ids_with_colocation(swarm); + let shuffled_peer_ids = self + .gather_and_shuffle_peer_ids_with_colocation(swarm) + .await; // Create network emulation chaos for the swarm create_multi_region_swarm_network_chaos(shuffled_peer_ids, None) } /// Gathers and shuffles all peer IDs in the swarm - fn gather_and_shuffle_peer_ids(&self, swarm: &mut dyn Swarm) -> Vec { + async fn gather_and_shuffle_peer_ids( + &self, + swarm: Arc>>, + ) -> Vec { // Identify the validators and fullnodes in the swarm - let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); - let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let (validator_peer_ids, fullnode_peer_ids) = { + let swarm = swarm.read().await; + let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); + let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (validator_peer_ids, fullnode_peer_ids) + }; // Gather and shuffle all peers IDs let mut all_peer_ids = validator_peer_ids @@ -94,13 +108,17 @@ impl PFNPerformance { } /// Gathers and shuffles all peer IDs in the swarm, colocating VFNs with their validator - fn gather_and_shuffle_peer_ids_with_colocation( + async fn gather_and_shuffle_peer_ids_with_colocation( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, ) -> Vec> { // Identify the validators and fullnodes in the swarm - let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); - let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + let (validator_peer_ids, fullnode_peer_ids) = { + let swarm = swarm.read().await; + let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::>(); + let fullnode_peer_ids = swarm.full_nodes().map(|v| v.peer_id()).collect::>(); + (validator_peer_ids, fullnode_peer_ids) + }; let (vfn_peer_ids, pfn_peer_ids) = fullnode_peer_ids.split_at(fullnode_peer_ids.len() - self.num_pfns as usize); let mut vfn_and_vn_ids: Vec<_> = validator_peer_ids @@ -139,20 +157,24 @@ impl NetworkLoadTest for PFNPerformance { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result { // Add the PFNs to the swarm let pfn_peer_ids = - create_and_add_pfns(ctx, self.num_pfns, self.config_override_fn.clone())?; + create_and_add_pfns(ctx, self.num_pfns, self.config_override_fn.clone()).await?; // Add CPU chaos to the swarm if self.add_cpu_chaos { - let cpu_chaos = self.create_cpu_chaos(ctx.swarm); + let cpu_chaos = self.create_cpu_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::CpuStress(cpu_chaos)) .await?; } // Add network emulation to the swarm if self.add_network_emulation { - let network_chaos = self.create_network_emulation_chaos(ctx.swarm); + let network_chaos = self.create_network_emulation_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .inject_chaos(SwarmChaos::NetEm(network_chaos)) .await?; } @@ -164,16 +186,20 @@ impl NetworkLoadTest for PFNPerformance { async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> Result<()> { // Remove CPU chaos from the swarm if self.add_cpu_chaos { - let cpu_chaos = self.create_cpu_chaos(ctx.swarm); + let cpu_chaos = self.create_cpu_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::CpuStress(cpu_chaos)) .await?; } // Remove network emulation from the swarm if self.add_network_emulation { - let network_chaos = self.create_network_emulation_chaos(ctx.swarm); + let network_chaos = self.create_network_emulation_chaos(ctx.swarm.clone()).await; ctx.swarm + .write() + .await .remove_chaos(SwarmChaos::NetEm(network_chaos)) .await?; } @@ -183,48 +209,49 @@ impl NetworkLoadTest for PFNPerformance { } /// Adds a number of PFNs to the network and returns the peer IDs -fn create_and_add_pfns( - ctx: &mut NetworkContext, +async fn create_and_add_pfns<'a>( + ctx: &mut NetworkContext<'a>, num_pfns: u64, config_override_fn: Option, ) -> Result, Error> { info!("Creating {} public fullnodes!", num_pfns); // Identify the version for the PFNs - let swarm = ctx.swarm(); - let pfn_version = swarm.versions().max().unwrap(); + let pfn_version = { ctx.swarm.read().await.versions().max().unwrap() }; // Create the PFN swarm - let runtime = Runtime::new().unwrap(); - let pfn_peer_ids: Vec = (0..num_pfns) - .map(|i| { - // Create a config for the PFN. Note: this needs to be done here - // because the config will generate a unique peer ID for the PFN. - let mut pfn_config = swarm.get_default_pfn_node_config(); - let mut base_config = NodeConfig::default(); - if let Some(f) = config_override_fn.as_ref() { - f(&mut pfn_config, &mut base_config); - } - let pfn_override_config = OverrideNodeConfig::new(pfn_config, base_config); - - // Add the PFN to the swarm - let peer_id = runtime - .block_on(swarm.add_full_node(&pfn_version, pfn_override_config)) - .unwrap(); - - // Verify the PFN was added - if swarm.full_node(peer_id).is_none() { - panic!( - "Failed to locate PFN {:?} in the swarm! Peer ID: {:?}", - i, peer_id - ); - } - - // Return the peer ID - info!("Created PFN {:?} with peer ID: {:?}", i, peer_id); - peer_id - }) - .collect(); + let mut pfn_peer_ids = Vec::with_capacity(num_pfns as usize); + for i in 0..num_pfns { + // Create a config for the PFN. Note: this needs to be done here + // because the config will generate a unique peer ID for the PFN. + let mut pfn_config = ctx.swarm.read().await.get_default_pfn_node_config(); + let mut base_config = NodeConfig::default(); + if let Some(f) = config_override_fn.as_ref() { + f(&mut pfn_config, &mut base_config); + } + let pfn_override_config = OverrideNodeConfig::new(pfn_config, base_config); + + // Add the PFN to the swarm + let peer_id = ctx + .swarm + .write() + .await + .add_full_node(&pfn_version, pfn_override_config) + .await + .unwrap(); + + // Verify the PFN was added + if ctx.swarm.read().await.full_node(peer_id).is_none() { + panic!( + "Failed to locate PFN {:?} in the swarm! Peer ID: {:?}", + i, peer_id + ); + } + + // Return the peer ID + info!("Created PFN {:?} with peer ID: {:?}", i, peer_id); + pfn_peer_ids.push(peer_id); + } Ok(pfn_peer_ids) } diff --git a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs index 6dfa25a1ffe04..873ae870e7a1b 100644 --- a/testsuite/testcases/src/quorum_store_onchain_enable_test.rs +++ b/testsuite/testcases/src/quorum_store_onchain_enable_test.rs @@ -12,7 +12,7 @@ use aptos_types::{ on_chain_config::{ConsensusConfigV1, OnChainConsensusConfig}, }; use async_trait::async_trait; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -28,14 +28,18 @@ impl Test for QuorumStoreOnChainEnableTest { impl NetworkLoadTest for QuorumStoreOnChainEnableTest { async fn test( &self, - swarm: &mut dyn aptos_forge::Swarm, + swarm: Arc>>, _report: &mut aptos_forge::TestReport, duration: std::time::Duration, ) -> anyhow::Result<()> { let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); - let rest_client = swarm.validators().next().unwrap().rest_client(); - - let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; let mut cli = CliTestFramework::new( rest_api_endpoint, faucet_endpoint, @@ -46,7 +50,7 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { tokio::time::sleep(duration / 2).await; let root_cli_index = { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); cli.add_account_with_address_to_cli( root_account.private_key().clone(), root_account.address(), @@ -97,6 +101,8 @@ impl NetworkLoadTest for QuorumStoreOnChainEnableTest { // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); swarm + .read() + .await .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) .await?; diff --git a/testsuite/testcases/src/state_sync_performance.rs b/testsuite/testcases/src/state_sync_performance.rs index 24086f50e6a5a..2e0948ccc48bf 100644 --- a/testsuite/testcases/src/state_sync_performance.rs +++ b/testsuite/testcases/src/state_sync_performance.rs @@ -33,13 +33,13 @@ impl NetworkTest for StateSyncFullnodePerformance { async fn run<'a>(&self, ctx: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctx.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name()).await?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes).await?; // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(ctx, &all_fullnodes, &[]).await?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. @@ -62,26 +62,27 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { async fn run<'a>(&self, ctxa: NetworkContextSynchronizer<'a>) -> Result<()> { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name())?; + let all_fullnodes = get_fullnodes_and_check_setup(ctx, self.name()).await?; // Emit a lot of traffic and ensure the fullnodes can all sync - emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_fullnodes).await?; // Wait for an epoch change to ensure fast sync can download all the latest states info!("Waiting for an epoch change."); - let runtime = Runtime::new().unwrap(); - runtime.block_on(async { - ctx.swarm() - .wait_for_all_nodes_to_change_epoch(Duration::from_secs(MAX_EPOCH_CHANGE_SECS)) + { + ctx.swarm + .read() .await - })?; + .wait_for_all_nodes_to_change_epoch(Duration::from_secs(MAX_EPOCH_CHANGE_SECS)) + .await?; + } // Get the highest known epoch in the chain - let highest_synced_epoch = runtime.block_on(async { - get_highest_synced_epoch(&ctx.swarm().get_all_nodes_clients_with_names()) + let highest_synced_epoch = { + get_highest_synced_epoch(&ctx.swarm.read().await.get_all_nodes_clients_with_names()) .await .unwrap_or(0) - }); + }; if highest_synced_epoch == 0 { return Err(anyhow::format_err!( "The swarm has synced 0 epochs! Something has gone wrong!" @@ -89,12 +90,19 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { } // Fetch the number of state values held on-chain - let fullnode_name = ctx.swarm().full_nodes().next().unwrap().name(); - let prom_query = format!( - "{}{{instance=\"{}\"}}", - NUM_STATE_VALUE_COUNTER_NAME, &fullnode_name - ); - let promql_result = runtime.block_on(ctx.swarm().query_metrics(&prom_query, None, None))?; + let prom_query = { + let swarm = ctx.swarm.read().await; + let fullnode_name = swarm.full_nodes().next().unwrap().name(); + format!( + "{}{{instance=\"{}\"}}", + NUM_STATE_VALUE_COUNTER_NAME, &fullnode_name + ) + }; + + let promql_result = { + let swarm = ctx.swarm.read().await; + swarm.query_metrics(&prom_query, None, None).await? + }; let number_of_state_values = match promql_result.as_instant().unwrap().first() { Some(instant_vector) => instant_vector.sample().value() as u64, None => { @@ -110,7 +118,7 @@ impl NetworkTest for StateSyncFullnodeFastSyncPerformance { ); // Stop and reset the fullnodes so they start syncing from genesis - stop_and_reset_nodes(ctx, &all_fullnodes, &[])?; + stop_and_reset_nodes(ctx, &all_fullnodes, &[]).await?; // Wait for all nodes to catch up to the highest synced epoch // then calculate and display the throughput results. @@ -143,11 +151,14 @@ impl NetworkTest for StateSyncValidatorPerformance { let ctx = ctx_locker.deref_mut(); // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can kill 2 validators but still make progress. - let all_validators = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; let num_validators = all_validators.len(); if num_validators < 7 { return Err(anyhow::format_err!( @@ -165,12 +176,12 @@ impl NetworkTest for StateSyncValidatorPerformance { ); // Generate some traffic through the validators. - emit_traffic_and_ensure_bounded_sync(ctx, &all_validators)?; + emit_traffic_and_ensure_bounded_sync(ctx, &all_validators).await?; // Stop and reset two validators so they start syncing from genesis info!("Deleting data for two validators!"); let validators_to_reset = &all_validators[0..2]; - stop_and_reset_nodes(ctx, &[], validators_to_reset)?; + stop_and_reset_nodes(ctx, &[], validators_to_reset).await?; // Wait for all nodes to catch up to the highest synced version // then calculate and display the throughput results. @@ -180,16 +191,19 @@ impl NetworkTest for StateSyncValidatorPerformance { /// Verifies the setup for the given fullnode test and returns the /// set of fullnodes. -fn get_fullnodes_and_check_setup( - ctx: &mut NetworkContext, +async fn get_fullnodes_and_check_setup<'a>( + ctx: &mut NetworkContext<'a>, test_name: &'static str, ) -> Result> { // Verify we have at least 1 fullnode - let all_fullnodes = ctx - .swarm() - .full_nodes() - .map(|v| v.peer_id()) - .collect::>(); + let all_fullnodes = { + ctx.swarm + .read() + .await + .full_nodes() + .map(|v| v.peer_id()) + .collect::>() + }; if all_fullnodes.is_empty() { return Err(anyhow::format_err!( "Fullnode test {} requires at least 1 fullnode!", @@ -201,7 +215,7 @@ fn get_fullnodes_and_check_setup( info!( "Running state sync test {:?} with {:?} validators and {:?} fullnodes.", test_name, - ctx.swarm().validators().count(), + ctx.swarm.read().await.validators().count(), all_fullnodes.len() ); @@ -210,8 +224,8 @@ fn get_fullnodes_and_check_setup( /// Emits traffic through all specified nodes and ensures all nodes can /// sync within a reasonable time bound. -fn emit_traffic_and_ensure_bounded_sync( - ctx: &mut NetworkContext, +async fn emit_traffic_and_ensure_bounded_sync<'a>( + ctx: &mut NetworkContext<'a>, nodes_to_send_traffic: &[AccountAddress], ) -> Result<()> { // Generate some traffic through the specified nodes. @@ -221,57 +235,54 @@ fn emit_traffic_and_ensure_bounded_sync( "Generating the initial traffic for {:?} seconds.", emit_txn_duration.as_secs() ); - let handle = ctx.runtime.handle().clone(); - let _txn_stat = handle.block_on(generate_traffic( - ctx, - nodes_to_send_traffic, - emit_txn_duration, - ))?; + let _txn_stat = generate_traffic(ctx, nodes_to_send_traffic, emit_txn_duration).await?; // Wait for all nodes to synchronize. We time bound this to ensure // nodes don't fall too far behind. info!("Waiting for the validators and fullnodes to be synchronized."); - Runtime::new().unwrap().block_on(async { - ctx.swarm() - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await - })?; + ctx.swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; Ok(()) } /// Stops and resets all specified nodes -fn stop_and_reset_nodes( - ctx: &mut NetworkContext, +async fn stop_and_reset_nodes<'a>( + ctx: &mut NetworkContext<'a>, fullnodes_to_reset: &[AccountAddress], validators_to_reset: &[AccountAddress], ) -> Result<()> { - let runtime = Runtime::new().unwrap(); - // Stop and reset all fullnodes info!("Deleting all fullnode data!"); for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node(*fullnode_id).unwrap(); - runtime.block_on(async { fullnode.clear_storage().await })?; + let swarm = ctx.swarm.read().await; + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.clear_storage().await?; } // Stop and reset all validators info!("Deleting all validator data!"); for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator(*valdiator_id).unwrap(); - runtime.block_on(async { validator.clear_storage().await })?; + let swarm = ctx.swarm.read().await; + let validator = swarm.validator(*valdiator_id).unwrap(); + validator.clear_storage().await?; } // Restart the fullnodes so they start syncing from a fresh state for fullnode_id in fullnodes_to_reset { - let fullnode = ctx.swarm().full_node(*fullnode_id).unwrap(); - runtime.block_on(async { fullnode.start().await })?; + let swarm = ctx.swarm.read().await; + let fullnode = swarm.full_node(*fullnode_id).unwrap(); + fullnode.start().await?; } // Restart the validators so they start syncing from a fresh state for valdiator_id in validators_to_reset { - let validator = ctx.swarm().validator(*valdiator_id).unwrap(); - runtime.block_on(async { validator.start().await })?; + let swarm = ctx.swarm.read().await; + let validator = swarm.validator(*valdiator_id).unwrap(); + validator.start().await?; } Ok(()) @@ -293,7 +304,9 @@ fn display_state_sync_state_throughput( // We allow up to half the test time to do this. let node_sync_duration = ctx.global_duration.checked_div(2).unwrap(); runtime.block_on(async { - ctx.swarm() + ctx.swarm + .read() + .await .wait_for_all_nodes_to_catchup_to_epoch(highest_synced_epoch, node_sync_duration) .await })?; @@ -335,7 +348,7 @@ fn ensure_state_sync_transaction_throughput( // Get the highest synced version for the chain let runtime = Runtime::new().unwrap(); let highest_synced_version = runtime.block_on(async { - get_highest_synced_version(&ctx.swarm().get_all_nodes_clients_with_names()) + get_highest_synced_version(&ctx.swarm.read().await.get_all_nodes_clients_with_names()) .await .unwrap_or(0) }); @@ -349,7 +362,9 @@ fn ensure_state_sync_transaction_throughput( // We allow up to half the test time to do this. let node_sync_duration = ctx.global_duration.checked_div(2).unwrap(); runtime.block_on(async { - ctx.swarm() + ctx.swarm + .read() + .await .wait_for_all_nodes_to_catchup(node_sync_duration) .await })?; diff --git a/testsuite/testcases/src/three_region_simulation_test.rs b/testsuite/testcases/src/three_region_simulation_test.rs index f4718d383d6d7..3fb63ef4972e4 100644 --- a/testsuite/testcases/src/three_region_simulation_test.rs +++ b/testsuite/testcases/src/three_region_simulation_test.rs @@ -4,9 +4,10 @@ use crate::{LoadDestination, NetworkLoadTest}; use aptos_forge::{ GroupNetworkBandwidth, GroupNetworkDelay, NetworkContext, NetworkContextSynchronizer, - NetworkTest, Swarm, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test, + NetworkTest, SwarmChaos, SwarmNetworkBandwidth, SwarmNetworkDelay, Test, }; use aptos_logger::info; +use aptos_types::account_address::AccountAddress; use async_trait::async_trait; /// Represents a test that simulates a network with 3 regions, all in the same cloud. @@ -25,9 +26,9 @@ impl Test for ThreeRegionSameCloudSimulationTest { /// 4. Currently simulating a 50 percentile network delay between us-west <--> af-south <--> eu-north /// /// This is deprecated and flawed. Use [crate::multi_region_network_test::MultiRegionNetworkEmulationTest] instead -fn create_three_region_swarm_network_delay(swarm: &dyn Swarm) -> SwarmNetworkDelay { - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); - +fn create_three_region_swarm_network_delay( + all_validators: Vec, +) -> SwarmNetworkDelay { // each region has 1/3 of the validators let region_size = all_validators.len() / 3; let mut us_west = all_validators; @@ -87,20 +88,29 @@ fn create_bandwidth_limit() -> SwarmNetworkBandwidth { impl NetworkLoadTest for ThreeRegionSameCloudSimulationTest { async fn setup<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result { // inject network delay - let delay = create_three_region_swarm_network_delay(ctx.swarm()); + let all_validators = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; + let delay = create_three_region_swarm_network_delay(all_validators); + let mut swarm = ctx.swarm.write().await; let chaos = SwarmChaos::Delay(delay); - ctx.swarm.inject_chaos(chaos).await?; + swarm.inject_chaos(chaos).await?; // inject bandwidth limit let bandwidth = create_bandwidth_limit(); let chaos = SwarmChaos::Bandwidth(bandwidth); - ctx.swarm.inject_chaos(chaos).await?; + swarm.inject_chaos(chaos).await?; Ok(LoadDestination::FullnodesOtherwiseValidators) } async fn finish<'a>(&self, ctx: &mut NetworkContext<'a>) -> anyhow::Result<()> { - ctx.swarm.remove_all_chaos().await?; + ctx.swarm.write().await.remove_all_chaos().await?; Ok(()) } } diff --git a/testsuite/testcases/src/twin_validator_test.rs b/testsuite/testcases/src/twin_validator_test.rs index 1fdfc6c873510..0cd9a50ae9afd 100644 --- a/testsuite/testcases/src/twin_validator_test.rs +++ b/testsuite/testcases/src/twin_validator_test.rs @@ -10,7 +10,6 @@ use std::{ ops::DerefMut, time::{Duration, Instant}, }; -use tokio::runtime::Runtime; pub struct TwinValidatorTest; @@ -28,55 +27,55 @@ impl NetworkTest for TwinValidatorTest { { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); - let runtime = Runtime::new().unwrap(); - let all_validators_ids = ctx - .swarm() - .validators() - .map(|v| v.peer_id()) - .collect::>(); + let all_validators_ids = { + ctx.swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; let validator_count = all_validators_ids.len(); let twin_count = 2; - runtime.block_on(async { - for i in 0..twin_count { - let main_id: AccountAddress = all_validators_ids[i]; - let twin_id = all_validators_ids[i + validator_count - twin_count]; - ctx.swarm() - .validator(twin_id) - .unwrap() - .clear_storage() - .await - .context(format!( - "Error while clearing storage and stopping {twin_id}" - ))?; - let main_identity = ctx - .swarm() - .validator(main_id) - .unwrap() - .get_identity() - .await - .context(format!("Error while getting identity for {main_id}"))?; - ctx.swarm() - .validator(twin_id) - .unwrap() - .set_identity(main_identity) - .await - .context(format!("Error while setting identity for {twin_id}"))?; - ctx.swarm() - .validator(twin_id) - .unwrap() - .start() - .await - .context(format!("Error while starting {twin_id}"))?; - ctx.swarm() - .validator(twin_id) - .unwrap() - .wait_until_healthy(Instant::now() + Duration::from_secs(300)) - .await - .context(format!("Error while waiting for {twin_id}"))?; - } - Ok::<(), anyhow::Error>(()) - })?; + + for i in 0..twin_count { + let main_id: AccountAddress = all_validators_ids[i]; + let twin_id = all_validators_ids[i + validator_count - twin_count]; + let swarm = ctx.swarm.read().await; + swarm + .validator(twin_id) + .unwrap() + .clear_storage() + .await + .context(format!( + "Error while clearing storage and stopping {twin_id}" + ))?; + let main_identity = swarm + .validator(main_id) + .unwrap() + .get_identity() + .await + .context(format!("Error while getting identity for {main_id}"))?; + swarm + .validator(twin_id) + .unwrap() + .set_identity(main_identity) + .await + .context(format!("Error while setting identity for {twin_id}"))?; + swarm + .validator(twin_id) + .unwrap() + .start() + .await + .context(format!("Error while starting {twin_id}"))?; + swarm + .validator(twin_id) + .unwrap() + .wait_until_healthy(Instant::now() + Duration::from_secs(300)) + .await + .context(format!("Error while waiting for {twin_id}"))?; + } } ::run(self, ctxa).await } diff --git a/testsuite/testcases/src/two_traffics_test.rs b/testsuite/testcases/src/two_traffics_test.rs index 6dc5febbdbc5d..931881f4e6956 100644 --- a/testsuite/testcases/src/two_traffics_test.rs +++ b/testsuite/testcases/src/two_traffics_test.rs @@ -9,7 +9,10 @@ use aptos_forge::{ use aptos_logger::info; use async_trait::async_trait; use rand::{rngs::OsRng, Rng, SeedableRng}; -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; pub struct TwoTrafficsTest { pub inner_traffic: EmitJobRequest, @@ -26,7 +29,7 @@ impl Test for TwoTrafficsTest { impl NetworkLoadTest for TwoTrafficsTest { async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, report: &mut TestReport, duration: Duration, ) -> Result<()> { @@ -34,21 +37,27 @@ impl NetworkLoadTest for TwoTrafficsTest { "Running TwoTrafficsTest test for duration {}s", duration.as_secs_f32() ); - let nodes_to_send_load_to = - LoadDestination::FullnodesOtherwiseValidators.get_destination_nodes(swarm); + let nodes_to_send_load_to = LoadDestination::FullnodesOtherwiseValidators + .get_destination_nodes(swarm.clone()) + .await; let rng = ::rand::rngs::StdRng::from_seed(OsRng.gen()); let (emitter, emit_job_request) = create_emitter_and_request( - swarm, + swarm.clone(), self.inner_traffic.clone(), &nodes_to_send_load_to, rng, - )?; + ) + .await?; let test_start = Instant::now(); let stats = emitter - .emit_txn_for(swarm.chain_info().root_account, emit_job_request, duration) + .emit_txn_for( + swarm.read().await.chain_info().root_account, + emit_job_request, + duration, + ) .await?; let actual_test_duration = test_start.elapsed(); diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index f1b9bc5030b2f..f44cbf017b5fb 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -13,7 +13,7 @@ use aptos_logger::info; use aptos_sdk::crypto::{ed25519::Ed25519PrivateKey, PrivateKey}; use aptos_types::{account_address::AccountAddress, transaction::authenticator::AuthenticationKey}; use async_trait::async_trait; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; const MAX_NODE_LAG_SECS: u64 = 360; @@ -33,13 +33,13 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) // so we can lose 2 validators but still make progress. - let num_validators = swarm.validators().count(); + let num_validators = { swarm.read().await.validators().count() }; if num_validators < 7 { return Err(anyhow::format_err!( "ValidatorSet leaving and rejoining test require at least 7 validators! Given: {:?}.", @@ -49,10 +49,15 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); // Connect the operator tool to the node's JSON RPC API - let rest_client = swarm.validators().next().unwrap().rest_client(); - let transaction_factory = swarm.chain_info().transaction_factory(); - - let rest_api_endpoint = swarm.validators().next().unwrap().rest_api_endpoint(); + let transaction_factory = { swarm.read().await.chain_info().transaction_factory() }; + + let (rest_client, rest_api_endpoint) = { + let swarm = swarm.read().await; + let first_validator = swarm.validators().next().unwrap(); + let rest_client = first_validator.rest_client(); + let rest_api_endpoint = first_validator.rest_api_endpoint(); + (rest_client, rest_api_endpoint) + }; let mut cli = CliTestFramework::new( rest_api_endpoint, faucet_endpoint, @@ -60,7 +65,7 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { ) .await; - let mut public_info = swarm.chain_info().into_aptos_public_info(); + let mut public_info = { swarm.read().await.chain_info().into_aptos_public_info() }; let mut validator_cli_indices = Vec::new(); @@ -109,9 +114,13 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); - swarm - .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) - .await?; + { + swarm + .read() + .await + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await?; + } // Wait for 1/3 of the test duration. tokio::time::sleep(duration / 3).await; @@ -123,12 +132,12 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { .await .unwrap(); - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); reconfig(&rest_client, &transaction_factory, root_account).await; } { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); reconfig(&rest_client, &transaction_factory, root_account).await; } @@ -140,18 +149,20 @@ impl NetworkLoadTest for ValidatorJoinLeaveTest { for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { cli.join_validator_set(*operator_index, None).await.unwrap(); - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); reconfig(&rest_client, &transaction_factory, root_account).await; } { - let root_account = swarm.chain_info().root_account(); + let root_account = swarm.read().await.chain_info().root_account(); reconfig(&rest_client, &transaction_factory, root_account).await; } // Wait for all nodes to synchronize and stabilize. info!("Waiting for the validators to be synchronized."); swarm + .read() + .await .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) .await?; diff --git a/testsuite/testcases/src/validator_reboot_stress_test.rs b/testsuite/testcases/src/validator_reboot_stress_test.rs index 8f4fe9b36d356..1a83dde6f908c 100644 --- a/testsuite/testcases/src/validator_reboot_stress_test.rs +++ b/testsuite/testcases/src/validator_reboot_stress_test.rs @@ -5,7 +5,7 @@ use crate::NetworkLoadTest; use aptos_forge::{NetworkContextSynchronizer, NetworkTest, Result, Swarm, Test, TestReport}; use async_trait::async_trait; use rand::{seq::SliceRandom, thread_rng}; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use tokio::time::Instant; pub struct ValidatorRebootStressTest { @@ -24,13 +24,20 @@ impl Test for ValidatorRebootStressTest { impl NetworkLoadTest for ValidatorRebootStressTest { async fn test( &self, - swarm: &mut dyn Swarm, + swarm: Arc>>, _report: &mut TestReport, duration: Duration, ) -> Result<()> { let start = Instant::now(); - let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + let all_validators = { + swarm + .read() + .await + .validators() + .map(|v| v.peer_id()) + .collect::>() + }; while start.elapsed() < duration { let addresses: Vec<_> = { @@ -41,6 +48,7 @@ impl NetworkLoadTest for ValidatorRebootStressTest { .collect() }; for adr in &addresses { + let swarm = swarm.read().await; let validator_to_reboot = swarm.validator(*adr).unwrap(); validator_to_reboot.stop().await?; } @@ -49,6 +57,7 @@ impl NetworkLoadTest for ValidatorRebootStressTest { } for adr in &addresses { + let swarm = swarm.read().await; let validator_to_reboot = swarm.validator(*adr).unwrap(); validator_to_reboot.start().await?; } From 2a217e5b0875e4702cb8bbcbe3408ec32feb0b4d Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 17 Jun 2024 13:23:07 -0400 Subject: [PATCH 22/28] more async spread --- testsuite/forge/src/interface/swarm.rs | 43 ++++++++----------- testsuite/testcases/src/compatibility_test.rs | 4 +- testsuite/testcases/src/framework_upgrade.rs | 8 ++-- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/testsuite/forge/src/interface/swarm.rs b/testsuite/forge/src/interface/swarm.rs index d0a6ea1c26b5a..b2c3f501ba5ec 100644 --- a/testsuite/forge/src/interface/swarm.rs +++ b/testsuite/forge/src/interface/swarm.rs @@ -16,7 +16,6 @@ use aptos_sdk::types::PeerId; use futures::future::{join_all, try_join_all}; use prometheus_http_query::response::{PromqlResult, Sample}; use std::time::{Duration, Instant}; -use tokio::runtime::Runtime; /// Trait used to represent a running network comprised of Validators and FullNodes #[async_trait::async_trait] @@ -190,12 +189,11 @@ pub trait SwarmExt: Swarm { } /// Perform a safety check, ensuring that no forks have occurred in the network. - fn fork_check(&self, epoch_duration: Duration) -> Result<()> { - let runtime = Runtime::new().unwrap(); - + async fn fork_check(&self, epoch_duration: Duration) -> Result<()> { // Lots of errors can actually occur after an epoch change so guarantee that we change epochs here // This can wait for 2x epoch to at least force the caller to be explicit about the epoch duration - runtime.block_on(self.wait_for_all_nodes_to_change_epoch(epoch_duration * 2))?; + self.wait_for_all_nodes_to_change_epoch(epoch_duration * 2) + .await?; let clients = self .validators() @@ -203,16 +201,16 @@ pub trait SwarmExt: Swarm { .chain(self.full_nodes().map(|node| node.rest_client())) .collect::>(); - let versions = runtime - .block_on(try_join_all( - clients - .iter() - .map(|node| node.get_ledger_information()) - .collect::>(), - ))? - .into_iter() - .map(|resp| resp.into_inner().version) - .collect::>(); + let versions = try_join_all( + clients + .iter() + .map(|node| node.get_ledger_information()) + .collect::>(), + ) + .await? + .into_iter() + .map(|resp| resp.into_inner().version) + .collect::>(); let min_version = versions .iter() .min() @@ -224,21 +222,14 @@ pub trait SwarmExt: Swarm { .copied() .ok_or_else(|| anyhow!("Unable to query nodes for their latest version"))?; - if !runtime.block_on(Self::are_root_hashes_equal_at_version( - &clients, - min_version, - ))? { + if !Self::are_root_hashes_equal_at_version(&clients, min_version).await? { return Err(anyhow!("Fork check failed")); } - runtime.block_on( - self.wait_for_all_nodes_to_catchup_to_version(max_version, Duration::from_secs(10)), - )?; + self.wait_for_all_nodes_to_catchup_to_version(max_version, Duration::from_secs(10)) + .await?; - if !runtime.block_on(Self::are_root_hashes_equal_at_version( - &clients, - max_version, - ))? { + if !Self::are_root_hashes_equal_at_version(&clients, max_version).await? { return Err(anyhow!("Fork check failed")); } diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 473ae67719338..7eb65854debff 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -357,7 +357,7 @@ impl NetworkTest for SimpleValidatorUpgrade { &txn_stat_half, ); - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; // Update the second batch let msg = format!("4. upgrading second batch to new version: {}", new_version); @@ -395,7 +395,7 @@ impl NetworkTest for SimpleValidatorUpgrade { let msg = "5. check swarm health".to_string(); info!("{}", msg); ctx.report.report_text(msg); - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; ctx.report.report_text(format!( "Compatibility test for {} ==> {} passed", old_version, new_version diff --git a/testsuite/testcases/src/framework_upgrade.rs b/testsuite/testcases/src/framework_upgrade.rs index f594ac6113358..974ab91894eb4 100644 --- a/testsuite/testcases/src/framework_upgrade.rs +++ b/testsuite/testcases/src/framework_upgrade.rs @@ -79,7 +79,7 @@ impl NetworkTest for FrameworkUpgrade { ); { - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; } // Apply the framework release bundle. @@ -155,14 +155,14 @@ impl NetworkTest for FrameworkUpgrade { ); { - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; } let msg = "5. check swarm health".to_string(); info!("{}", msg); ctx.report.report_text(msg); { - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; } ctx.report.report_text(format!( "Compatibility test for {} ==> {} passed", @@ -184,7 +184,7 @@ impl NetworkTest for FrameworkUpgrade { ); { - ctx.swarm.read().await.fork_check(epoch_duration)?; + ctx.swarm.read().await.fork_check(epoch_duration).await?; } Ok(()) From 45c36703737987ec20cb9e1e72a04dfa4410f3e4 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 17 Jun 2024 15:39:11 -0400 Subject: [PATCH 23/28] fix --- .../src/consensus/consensus_fault_tolerance.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs index 3f4f468727c95..56e6cea9fa2d5 100644 --- a/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs +++ b/testsuite/smoke-test/src/consensus/consensus_fault_tolerance.rs @@ -258,10 +258,16 @@ async fn test_ordered_only_cert() { async fn test_execution_retry() { let num_validators = 4; - let mut swarm = create_swarm(num_validators, 1).await; - + let swarm = create_swarm(num_validators, 1).await; + let (validator_clients, public_info) = { + ( + swarm.get_validator_clients_with_names(), + swarm.aptos_public_info(), + ) + }; test_consensus_fault_tolerance( - &mut swarm, + validator_clients, + public_info, 3, 5.0, 1, From b5b119e8653f4f39a90a497a67034f84c141f8d9 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 17 Jun 2024 16:00:44 -0400 Subject: [PATCH 24/28] cargo lint --- testsuite/testcases/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/testcases/Cargo.toml b/testsuite/testcases/Cargo.toml index 6541a240698e3..c7443b02da9c2 100644 --- a/testsuite/testcases/Cargo.toml +++ b/testsuite/testcases/Cargo.toml @@ -26,6 +26,7 @@ aptos-runtimes = { workspace = true } aptos-sdk = { workspace = true } aptos-temppath = { workspace = true } aptos-types = { workspace = true } +async-trait = { workspace = true } bcs = { workspace = true } csv = { workspace = true } futures = { workspace = true } @@ -35,7 +36,6 @@ rand = { workspace = true } reqwest = { workspace = true } tokio = { workspace = true } tokio-scoped = { workspace = true } -async-trait = { workspace = true } [dev-dependencies] assert_approx_eq = { workspace = true } From 5f80094b89083351bdbe3e42f9927c6252841e28 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 18 Jun 2024 13:04:31 -0400 Subject: [PATCH 25/28] clenup update_seq_num_and_get_num_expired() which got refactored away --- .../src/emitter/mod.rs | 76 ------------------- .../src/emitter/submission_worker.rs | 6 -- 2 files changed, 82 deletions(-) diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs index 5bf604b211109..4b31491d81b42 100644 --- a/crates/transaction-emitter-lib/src/emitter/mod.rs +++ b/crates/transaction-emitter-lib/src/emitter/mod.rs @@ -982,82 +982,6 @@ async fn wait_for_accounts_sequence( (latest_fetched_counts, sum_of_completion_timestamps_millis) } -#[cfg(unused)] -fn update_seq_num_and_get_num_expired( - accounts: &mut [LocalAccount], - account: Arc>, - account_to_start_and_end_seq_num: HashMap, - latest_fetched_counts: HashMap, -) -> (usize, usize) { - accounts.iter_mut().for_each(|account| { - // let mut account_lock = account.lock().unwrap(); - // let account = account_lock.deref_mut(); - let (start_seq_num, end_seq_num) = - if let Some(pair) = account_to_start_and_end_seq_num.get(&account.address()) { - pair - } else { - return; - }; - assert!(account.sequence_number() == *end_seq_num); - - match latest_fetched_counts.get(&account.address()) { - Some(count) => { - if *count != account.sequence_number() { - assert!(account.sequence_number() > *count); - debug!( - "Stale sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - count - ); - account.set_sequence_number(*count); - } - }, - None => { - debug!( - "Couldn't fetch sequence_number for {}, expected {}, setting to {}", - account.address(), - account.sequence_number(), - start_seq_num - ); - account.set_sequence_number(*start_seq_num); - }, - } - }); - - account_to_start_and_end_seq_num - .iter() - .map( - |(address, (start_seq_num, end_seq_num))| match latest_fetched_counts.get(address) { - Some(count) => { - assert!( - *count <= *end_seq_num, - "{address} :: {count} > {end_seq_num}" - ); - if *count >= *start_seq_num { - ( - (*count - *start_seq_num) as usize, - (*end_seq_num - *count) as usize, - ) - } else { - debug!( - "Stale sequence_number fetched for {}, start_seq_num {}, fetched {}", - address, start_seq_num, *count - ); - (0, (*end_seq_num - *start_seq_num) as usize) - } - }, - None => (0, (end_seq_num - start_seq_num) as usize), - }, - ) - .fold( - (0, 0), - |(committed, expired), (cur_committed, cur_expired)| { - (committed + cur_committed, expired + cur_expired) - }, - ) -} - pub async fn query_sequence_number(client: &RestClient, address: AccountAddress) -> Result { Ok(query_sequence_numbers(client, [address].iter()).await?.0[0].1) } diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs index 8c9c1095e869f..ac9de081ee507 100644 --- a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs +++ b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs @@ -248,7 +248,6 @@ impl SubmissionWorker { ) .await; - // self.accounts.iter().for_each(|account| {}) for account in self.accounts.iter_mut() { update_account_seq_num( Arc::get_mut(account).unwrap(), @@ -258,11 +257,6 @@ impl SubmissionWorker { } let (num_committed, num_expired) = count_committed_expired_stats(account_to_start_and_end_seq_num, latest_fetched_counts); - // let (num_committed, num_expired) = update_seq_num_and_get_num_expired( - // self.accounts.clone(), - // account_to_start_and_end_seq_num, - // latest_fetched_counts, - // ); if num_expired > 0 { loop_stats From fc46210337def25142a9b4ffdbee5263d0f6d25d Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 24 Jun 2024 14:40:03 -0400 Subject: [PATCH 26/28] PR cleanup --- .../src/accounts_pool_wrapper.rs | 15 --------------- testsuite/forge-cli/src/main.rs | 8 +------- testsuite/forge/src/interface/network.rs | 4 ---- testsuite/forge/src/test_utils/consensus_utils.rs | 5 ----- 4 files changed, 1 insertion(+), 31 deletions(-) diff --git a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs index f7406fb845a6b..1dee749e5f019 100644 --- a/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs +++ b/crates/transaction-generator-lib/src/accounts_pool_wrapper.rs @@ -46,25 +46,10 @@ impl TransactionGenerator for AccountsPoolWrapperGenerator { if accounts_to_use.is_empty() { return Vec::new(); } - // Wrap LocalAccount in Arc+Mutex - // let account_arcs : Vec> = accounts_to_use.into_iter().map(Arc::new).collect(); - // get txns let txns = accounts_to_use .iter() .flat_map(|account| self.generator.generate_transactions(account, 1)) .collect(); - // let txns = accounts_to_use - // .iter_mut() - // .flat_map(|account| { - // - // self.generator.generate_transactions(account, 1) - // }) - // .collect(); - - // back to plain LocalAccount, add to accounts - // let accounts_to_use = account_arcs.into_iter().map(|account| { - // Arc::into_inner(account).unwrap() - // }).collect(); if let Some(destination_accounts_pool) = &self.destination_accounts_pool { destination_accounts_pool.add_to_pool(accounts_to_use); } diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index e619957651a0f..e25527f5af7bd 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -279,13 +279,7 @@ fn main() -> Result<()> { logger.build(); let args = Args::parse(); - let duration = if args.suite == "compat" { - // TODO: if this needs to be more perminent than hacking into this branch, edit - // .github/workflows/docker-build-test.yaml - Duration::from_secs(30 * 60) - } else { - Duration::from_secs(args.duration_secs as u64) - }; + let duration = Duration::from_secs(args.duration_secs as u64); let suite_name: &str = args.suite.as_ref(); let runtime = Runtime::new()?; diff --git a/testsuite/forge/src/interface/network.rs b/testsuite/forge/src/interface/network.rs index 0fbb4a2432165..26b622d58ef98 100644 --- a/testsuite/forge/src/interface/network.rs +++ b/testsuite/forge/src/interface/network.rs @@ -83,10 +83,6 @@ impl<'t> NetworkContext<'t> { } } - // pub fn swarm(&mut self) -> &mut dyn Swarm { - // self.swarm - // } - pub fn core(&mut self) -> &mut CoreContext { &mut self.core } diff --git a/testsuite/forge/src/test_utils/consensus_utils.rs b/testsuite/forge/src/test_utils/consensus_utils.rs index 0068de2634a2f..6115a5c2e60de 100644 --- a/testsuite/forge/src/test_utils/consensus_utils.rs +++ b/testsuite/forge/src/test_utils/consensus_utils.rs @@ -70,10 +70,6 @@ pub async fn test_consensus_fault_tolerance( // Can allow us to better see if state would've gotten resolved by itself, etc. raise_check_error_at_the_end: bool, ) -> Result<()> { - // let validator_clients = { - // swarm.read().await.get_validator_clients_with_names() - // }; - async fn get_all_states(validator_clients: &[(String, RestClient)]) -> Vec { join_all( validator_clients @@ -146,7 +142,6 @@ pub async fn test_consensus_fault_tolerance( } if new_epoch_on_cycle { - // swarm.read().await.aptos_public_info().reconfig().await; public_info.reconfig().await; } } From a6bc7087edc4a46ecb9ece45734ed46862c1a438 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Mon, 24 Jun 2024 14:54:35 -0400 Subject: [PATCH 27/28] cleanup --- testsuite/testcases/src/compatibility_test.rs | 73 ------------------- testsuite/testcases/src/lib.rs | 25 ------- .../src/validator_join_leave_test.rs | 1 - 3 files changed, 99 deletions(-) diff --git a/testsuite/testcases/src/compatibility_test.rs b/testsuite/testcases/src/compatibility_test.rs index 7eb65854debff..c14203af78ffe 100644 --- a/testsuite/testcases/src/compatibility_test.rs +++ b/testsuite/testcases/src/compatibility_test.rs @@ -9,7 +9,6 @@ use aptos_forge::{ TxnStats, Version, }; use aptos_logger::info; -// use aptos_sdk::transaction_builder::TransactionFactory; use aptos_sdk::types::{LocalAccount, PeerId}; use async_trait::async_trait; use rand::SeedableRng; @@ -34,29 +33,6 @@ impl Test for SimpleValidatorUpgrade { } } -#[cfg(unused)] -async fn upgrade_task( - // ctx: &mut NetworkContext<'_>, - ctxa: NetworkContextSynchronizer<'_>, - validators_to_update: &[PeerId], - version: &Version, - wait_until_healthy: bool, - delay: Duration, - max_wait: Duration, - done: Arc, -) -> Result<()> { - let result = batch_update_gradually( - ctxa, - validators_to_update, - version, - wait_until_healthy, - delay, - max_wait, - ) - .await; - done.store(true, Ordering::Relaxed); - result -} async fn stat_gather_task( emitter: TxnEmitter, emit_job_request: EmitJobRequest, @@ -82,50 +58,6 @@ async fn stat_gather_task( Ok(statsum) } -#[cfg(unused)] -fn traffic_task( - ctxa: NetworkContextSynchronizer, - nodes: &[PeerId], - upgrade_done: Arc, -) -> Result> { - let (emitter, emit_job_request, source_account) = { - let mut ctx_locker = ctxa.ctx.lock().unwrap(); - let mut ctx = ctx_locker.deref_mut(); - // spawn_generate_traffic_setup(ctx, nodes)? - let mut emit_job_request = ctx.emit_job.clone(); - let rng = SeedableRng::from_rng(ctx.core().rng()).unwrap(); - let swarm = ctx.swarm(); - let client_timeout = Duration::from_secs(30); - - let chain_info = swarm.chain_info(); - let transaction_factory = TransactionFactory::new(chain_info.chain_id); - let emitter = TxnEmitter::new(transaction_factory, rng); - - emit_job_request = - emit_job_request.rest_clients(swarm.get_clients_for_peers(nodes, client_timeout)); - let source_account = chain_info.root_account.clone(); - (emitter, emit_job_request, source_account) - }; - // match create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng) { - // Ok(parts) => parts, - // Err(err) => { - // stats_result = Err(err); - // return; - // } - // }; - // let source_account = ctx.swarm().chain_info().root_account; - let traffic_runtime = traffic_emitter_runtime()?; - // let upgrade_joiner = handle.spawn(upgrade_task(ctx, validators_to_update, version, wait_until_healthy, delay, max_wait, upgrade_done.clone())); - let upgrade_traffic_chunk_duration = Duration::from_secs(15); - traffic_runtime.block_on(stat_gather_task( - emitter, - emit_job_request, - source_account, - upgrade_traffic_chunk_duration, - upgrade_done.clone(), - )) -} - fn upgrade_and_gather_stats( ctxa: NetworkContextSynchronizer, // upgrade args @@ -261,8 +193,6 @@ impl NetworkTest for SimpleValidatorUpgrade { .validators() .map(|v| v.peer_id()) .collect::>(); - // TODO: this is the "compat" test. Expand and refine to properly validate network2. - // TODO: Ensure sustained TPS during upgrade. Slower upgrade rollout. let mut first_batch = all_validators.clone(); let second_batch = first_batch.split_off(first_batch.len() / 2); let first_node = first_batch.pop().unwrap(); @@ -291,7 +221,6 @@ impl NetworkTest for SimpleValidatorUpgrade { ); info!("{}", msg); ctxa.report_text(msg).await; - // runtime.block_on(batch_update_gradually(ctx.swarm(), &[first_node], &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; let upgrade_stats = upgrade_and_gather_stats( ctxa.clone(), &[first_node], @@ -345,7 +274,6 @@ impl NetworkTest for SimpleValidatorUpgrade { &upgrade2_stats_sum, ); } - // runtime.block_on(batch_update_gradually(ctxa.clone(), &first_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); @@ -380,7 +308,6 @@ impl NetworkTest for SimpleValidatorUpgrade { &upgrade3_stats_sum, ); } - // runtime.block_on(batch_update_gradually(ctxa.clone(), &second_batch, &new_version, upgrade_wait_for_healthy, upgrade_node_delay, upgrade_max_wait))?; { let mut ctx_locker = ctxa.ctx.lock().await; let ctx = ctx_locker.deref_mut(); diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index 6b471fec068a3..15ec3b1ffbf2a 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -85,7 +85,6 @@ async fn batch_update_gradually( delay: Duration, max_wait: Duration, ) -> Result<()> { - // let mut swarm = ctx.swarm(); for validator in validators_to_update { info!("batch_update_gradually upgrade start: {}", validator); { @@ -180,30 +179,6 @@ pub async fn generate_traffic( Ok(stats) } -#[cfg(unused)] -pub fn spawn_generate_traffic_setup<'a>( - ctx: &mut NetworkContext<'a>, - nodes: &[PeerId], -) -> Result<(TxnEmitter, EmitJobRequest, &'a mut LocalAccount)> { - let emit_job_request = ctx.emit_job.clone(); - let rng = SeedableRng::from_rng(ctx.core().rng())?; - let (emitter, emit_job_request) = - create_emitter_and_request(ctx.swarm(), emit_job_request, nodes, rng)?; - let root_account = ctx.swarm().chain_info().root_account; - return Ok((emitter, emit_job_request, root_account)); -} - -#[cfg(unused)] -pub fn spawn_generate_traffic( - emitter: TxnEmitter, - emit_job_request: EmitJobRequest, - root_account: &LocalAccount, - duration: Duration, - handle: Handle, -) -> JoinHandle> { - handle.spawn(emitter.emit_txn_for(root_account, emit_job_request, duration)) -} - pub enum LoadDestination { AllNodes, AllValidators, diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs index f44cbf017b5fb..edf9c59fc62c5 100644 --- a/testsuite/testcases/src/validator_join_leave_test.rs +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -// use std::ops::DerefMut; use crate::{LoadDestination, NetworkLoadTest}; use aptos::{account::create::DEFAULT_FUNDED_COINS, test::CliTestFramework}; use aptos_forge::{ From e138cfb4234c663a7bfd76398ef1a0ecc4c7ed15 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 27 Jun 2024 07:02:36 -0400 Subject: [PATCH 28/28] fix --- testsuite/smoke-test/src/fullnode.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/smoke-test/src/fullnode.rs b/testsuite/smoke-test/src/fullnode.rs index c060a6699264b..35b22ec579ab1 100644 --- a/testsuite/smoke-test/src/fullnode.rs +++ b/testsuite/smoke-test/src/fullnode.rs @@ -146,7 +146,7 @@ async fn test_internal_indexer_with_fast_sync() { .wait_for_all_nodes_to_catchup(Duration::from_secs(60)) .await .unwrap(); - let node = swarm.full_node_mut(peer_id).unwrap(); + let node = swarm.full_node(peer_id).unwrap(); let node_config = node.config().to_owned(); node.stop().await.unwrap(); check_indexer_db(&node_config);