Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[smoke tests] local swarm build retries 3 times #4461

Merged
merged 3 commits into from
Sep 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 27 additions & 32 deletions testsuite/smoke-test/src/smoke_test_environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ use forge::{Factory, LocalFactory, LocalSwarm};
use framework::ReleaseBundle;
use once_cell::sync::Lazy;
use rand::rngs::OsRng;
use std::pin::Pin;
use std::{num::NonZeroUsize, path::PathBuf, sync::Arc};
use tokio::task::JoinHandle;

const SWARM_BUILD_NUM_RETRIES: u8 = 3;

#[derive(Clone)]
pub struct SwarmBuilder {
local: bool,
num_validators: NonZeroUsize,
Expand Down Expand Up @@ -64,7 +66,7 @@ impl SwarmBuilder {
}

// Gas is not enabled with this setup, it's enabled via forge instance.
pub async fn build_wrapped(self) -> anyhow::Result<LocalSwarm> {
pub async fn build_inner(&mut self) -> anyhow::Result<LocalSwarm> {
::aptos_logger::Logger::new().init();
info!("Preparing to finish compiling");
// TODO change to return Swarm trait
Expand All @@ -79,16 +81,16 @@ impl SwarmBuilder {
static ACTIVE_NODES: Lazy<Arc<Mutex<usize>>> = Lazy::new(|| Arc::new(Mutex::new(0)));
let guard = ActiveNodesGuard::grab(slots, ACTIVE_NODES.clone()).await;

let init_genesis_config = self.init_genesis_config;

let builder = self.clone();
let init_genesis_config = builder.init_genesis_config;
FACTORY
.new_swarm_with_version(
OsRng,
self.num_validators,
self.num_fullnodes,
builder.num_validators,
builder.num_fullnodes,
&version,
self.genesis_framework,
self.init_config,
builder.genesis_framework,
builder.init_config,
Some(Arc::new(move |genesis_config| {
if let Some(init_genesis_config) = &init_genesis_config {
(init_genesis_config)(genesis_config);
Expand All @@ -100,12 +102,26 @@ impl SwarmBuilder {
}

// Gas is not enabled with this setup, it's enabled via forge instance.
pub async fn build(self) -> LocalSwarm {
self.build_wrapped().await.unwrap()
// Local swarm spin-up can fail due to port issues. So we retry SWARM_BUILD_NUM_RETRIES times.
pub async fn build(&mut self) -> LocalSwarm {
let num_retries = SWARM_BUILD_NUM_RETRIES;
let mut attempt = 0;
loop {
if attempt > num_retries {
panic!("Exhausted retries: {} / {}", attempt, num_retries);
}
match self.build_inner().await {
Ok(swarm) => {
return swarm;
}
Err(err) => warn!("Attempt {} / {} failed with: {}", attempt, num_retries, err),
}
attempt += 1;
}
}

pub async fn build_with_cli(
self,
&mut self,
num_cli_accounts: usize,
) -> (LocalSwarm, CliTestFramework, JoinHandle<()>) {
let swarm = self.build().await;
Expand Down Expand Up @@ -136,27 +152,6 @@ impl SwarmBuilder {
}
}

pub async fn with_retry<
F: Fn() -> Pin<Box<dyn std::future::Future<Output = anyhow::Result<LocalSwarm>>>>,
>(
build: F,
num_retries: u8,
) -> LocalSwarm {
let mut attempt = 0;
loop {
if attempt >= num_retries {
panic!("Exhausted retries: {} / {}", attempt, num_retries);
}
match build().await {
Ok(swarm) => {
return swarm;
}
Err(err) => warn!("Attempt {} / {} failed with: {}", attempt, num_retries, err),
}
attempt += 1;
}
}

// Gas is not enabled with this setup, it's enabled via forge instance.
pub async fn new_local_swarm_with_aptos(num_validators: usize) -> LocalSwarm {
SwarmBuilder::new_local(num_validators)
Expand Down
8 changes: 2 additions & 6 deletions testsuite/smoke-test/src/storage.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

use crate::smoke_test_environment::{with_retry, SwarmBuilder};
use crate::smoke_test_environment::SwarmBuilder;
use crate::{
test_utils::{
assert_balance, create_and_fund_account, swarm_utils::insert_waypoint,
Expand Down Expand Up @@ -35,11 +35,7 @@ async fn test_db_restore() {
workspace_builder::get_bin("db-backup-verify");
info!("---------- 1. pre-building finished.");

let mut swarm = with_retry(
|| Box::pin(SwarmBuilder::new_local(4).with_aptos().build_wrapped()),
3,
)
.await;
let mut swarm = SwarmBuilder::new_local(4).with_aptos().build().await;
info!("---------- 1.1 swarm built, sending some transactions.");
let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::<Vec<_>>();
let client_1 = swarm
Expand Down
23 changes: 8 additions & 15 deletions testsuite/smoke-test/src/txn_broadcast.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

use crate::smoke_test_environment::{with_retry, SwarmBuilder};
use crate::smoke_test_environment::SwarmBuilder;
use crate::test_utils::{assert_balance, create_and_fund_account, transfer_coins};
use aptos_config::config::NodeConfig;
use forge::{NodeExt, Swarm, SwarmExt};
Expand All @@ -14,20 +14,13 @@ const MAX_WAIT_SECS: u64 = 60;
/// This behavior should be true with both mempool and quorum store.
#[tokio::test]
async fn test_txn_broadcast() {
let mut swarm = with_retry(
|| {
Box::pin(
SwarmBuilder::new_local(4)
.with_aptos()
.with_init_config(Arc::new(|_, conf, _| {
conf.api.failpoints_enabled = true;
}))
.build_wrapped(),
)
},
3,
)
.await;
let mut swarm = SwarmBuilder::new_local(4)
.with_aptos()
.with_init_config(Arc::new(|_, conf, _| {
conf.api.failpoints_enabled = true;
}))
.build()
.await;
let transaction_factory = swarm.chain_info().transaction_factory();
let version = swarm.versions().max().unwrap();
let validator_peer_ids = swarm.validators().map(|v| v.peer_id()).collect::<Vec<_>>();
Expand Down