Skip to content

Commit

Permalink
fix flaky test_db_restart (#15291)
Browse files Browse the repository at this point in the history
  • Loading branch information
msmouse authored Nov 20, 2024
1 parent e2af7c4 commit 1a4fe24
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 34 deletions.
9 changes: 4 additions & 5 deletions consensus/src/transaction_shuffler/use_case_aware/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use crate::transaction_shuffler::TransactionShuffler;
use aptos_types::transaction::{
use_case::{UseCaseAwareTransaction, UseCaseKey},
SignedTransaction,
};
use aptos_types::transaction::{use_case::UseCaseKey, SignedTransaction};
use iterator::ShuffledTransactionIterator;
use std::fmt::Debug;

Expand Down Expand Up @@ -45,7 +42,9 @@ pub struct UseCaseAwareShuffler {

#[cfg(any(test, feature = "fuzzing"))]
impl UseCaseAwareShuffler {
pub fn shuffle_generic<Txn: UseCaseAwareTransaction + Debug>(
pub fn shuffle_generic<
Txn: aptos_types::transaction::use_case::UseCaseAwareTransaction + Debug,
>(
&self,
txns: Vec<Txn>,
) -> Vec<Txn> {
Expand Down
2 changes: 1 addition & 1 deletion crates/aptos-rest-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ impl Client {
if let Some(state) = aptos_error_response.state {
if expiration_timestamp_secs <= state.timestamp_usecs / 1_000_000 {
if reached_mempool {
return Err(anyhow!("Transaction expired. It is guaranteed it will not be committed on chain.").into());
return Err(anyhow!("Used to be pending and now not found. Transaction expired. It is guaranteed it will not be committed on chain.").into());
} else {
// We want to know whether we ever got Pending state from the mempool,
// to warn in case we didn't.
Expand Down
18 changes: 5 additions & 13 deletions testsuite/forge/src/interface/aptos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,20 +332,12 @@ pub async fn reconfig(
transaction_factory: &TransactionFactory,
root_account: Arc<LocalAccount>,
) -> State {
let aptos_version = client.get_aptos_version().await.unwrap();
let current = aptos_version.into_inner();
let current_version = *current.major.inner();
let txns = {
vec![
root_account.sign_with_transaction_builder(transaction_factory.clone().payload(
aptos_stdlib::version_set_for_next_epoch(current_version + 1),
)),
root_account.sign_with_transaction_builder(
transaction_factory
.clone()
.payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()),
),
]
vec![root_account.sign_with_transaction_builder(
transaction_factory
.clone()
.payload(aptos_stdlib::aptos_governance_force_end_epoch_test_only()),
)]
};

submit_and_wait_reconfig(client, txns).await
Expand Down
40 changes: 25 additions & 15 deletions testsuite/smoke-test/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,14 +466,16 @@ async fn do_transfer_or_reconfig(info: &mut AptosPublicInfo) -> Result<()> {
const LOTS_MONEY: u64 = 100_000_000;
let r = rand::random::<u64>() % 10;
if r < 3 {
// reconfig
info!(
"{LINE} background task: triggering reconfig. Root account seq_num: {}. Ledger info: {:?}",
info.root_account().sequence_number(),
info.client().get_ledger_information().await.unwrap(),
);
info.reconfig().await;
} else if r == 9 {
// drain backlog
let mut sender = info.create_and_fund_user_account(LOTS_MONEY).await?;
let receiver = info.create_and_fund_user_account(LOTS_MONEY).await?;
let pending_txn = info.transfer(&mut sender, &receiver, 1).await?;
info.client().wait_for_transaction(&pending_txn).await?;
info!(
"{LINE} background task: Reconfig done. Root account seq_num: {}",
info.root_account().sequence_number(),
);
} else {
let mut sender = info.create_and_fund_user_account(LOTS_MONEY).await?;
let receiver = info.create_and_fund_user_account(LOTS_MONEY).await?;
Expand Down Expand Up @@ -531,23 +533,30 @@ async fn test_db_restart() {
for round in 0..3 {
info!("{LINE} Restart round {round}");
for (v, vid) in restarting_validator_ids.iter().enumerate() {
info!("{LINE} Round {round}: Restarting validator {v}.");
info!(
"{LINE} ledger info: {:?}",
client.get_ledger_information().await.unwrap(),
);
let validator = swarm.validator_mut(*vid).unwrap();
// sometimes trigger reconfig right before the restart, to expose edge cases around
// epoch change
if rand::random::<usize>() % 3 == 0 {
info!("{LINE} Triggering reconfig right before restarting.");
info!(
"{LINE} Triggering reconfig right before restarting. Root account seq_num: {}. Ledger info: {:?}",
pub_chain_info.root_account().sequence_number(),
client.get_ledger_information().await.unwrap(),
);
reconfig(
&validator.rest_client(),
&client,
&pub_chain_info.transaction_factory(),
pub_chain_info.root_account(),
)
.await;
info!(
"{LINE} Reconfig done. Root account seq_num: {}",
pub_chain_info.root_account().sequence_number(),
)
}
info!(
"{LINE} Round {round}: Restarting validator {v}. ledger info: {:?}",
client.get_ledger_information().await.unwrap(),
);
validator.restart().await.unwrap();
swarm
.wait_for_all_nodes_to_catchup(Duration::from_secs(60))
Expand All @@ -558,11 +567,12 @@ async fn test_db_restart() {
}
}

info!("{LINE} Stopping background traffic, and check again that all validators are alive.");
info!("{LINE} Stopping background traffic, and make sure background task didn't panic.");
quit_flag.store(true, Ordering::Release);
// Make sure background thread didn't panic.
background_traffic.await.unwrap();

info!("{LINE} Check again that all validators are alive.");
swarm
.wait_for_all_nodes_to_catchup(Duration::from_secs(60))
.await
Expand Down

0 comments on commit 1a4fe24

Please sign in to comment.