Skip to content

Commit

Permalink
Add more logging and documentation to flaky optimistic confirmation t…
Browse files Browse the repository at this point in the history
…ests (solana-labs#29418)

* Revert "add retry for flakey local cluster test (solana-labs#29228)"

This reverts commit 7a97121.

* Add logging for repair
  • Loading branch information
AshwinSekar authored and gnapoli23 committed Jan 10, 2023
1 parent 0ff0861 commit 50b2d39
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 46 deletions.
5 changes: 5 additions & 0 deletions core/src/serve_repair.rs
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,11 @@ impl ServeRepair {
nonce,
identity_keypair,
)?;
debug!(
"Sending repair request from {} for {:#?}",
identity_keypair.pubkey(),
repair_request
);
Ok((addr, out))
}

Expand Down
62 changes: 16 additions & 46 deletions local-cluster/tests/local_cluster_flakey.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,43 +38,18 @@ fn test_optimistic_confirmation_violation_without_tower() {
do_test_optimistic_confirmation_violation_with_or_without_tower(false);
}

enum RunResult {
Success,
FailNoViolation,
FailViolation,
}

fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) {
let mut retry = 10;
while retry > 0 {
match do_test_optimistic_confirmation_violation_with_or_without_tower_inner(with_tower) {
RunResult::Success => {
return;
}
_ => {
retry -= 1;
}
}
}
panic!("optimistic confirmation violation with or without tower failed after 10 trial");
}

// A bit convoluted test case; but this roughly follows this test theoretical scenario:
// Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half
// of the test B is always leader, second half C is. Additionally we have a non voting validator D with 0
// stake to propagate gossip info.
//
// Step 1: You have validator A + B with 31% and 36% of the stake. Run only validator B:
//
// S0 -> S1 -> S2 -> S3 (B vote)
//
// Step 2: Turn off B, and truncate the ledger after slot `S3` (simulate votes not
// landing in next slot).
// Copy ledger fully to validator A and validator C
//
// Step 3: Turn on A, and have it vote up to S3. Truncate anything past slot `S3`.
// Step 1: Kill C, only A, B and D should be running
//
// S0 -> S1 -> S2 -> S3 (A & B vote, optimistically confirmed)
//
// Step 4:
// Start validator C with 33% of the stake with same ledger, but only up to slot S2.
// Step 2:
// Kill A and B once we verify that they have voted on S3 or beyond. Copy B's ledger to C but only
// up to slot S2
// Have `C` generate some blocks like:
//
// S0 -> S1 -> S2 -> S4
Expand All @@ -99,10 +74,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
// With the persisted tower:
// `A` should not be able to generate a switching proof.
//
fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
with_tower: bool,
) -> RunResult {
solana_logger::setup_with_default(RUST_LOG_FILTER);
fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) {
solana_logger::setup_with("debug");

// First set up the cluster with 4 nodes
let slots_per_epoch = 2048;
Expand Down Expand Up @@ -150,7 +123,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
// 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential
// because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger
// below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains
// to the otehr forks, and may violate switching proofs on restart.
// to the other forks, and may violate switching proofs on restart.
let mut default_config = ValidatorConfig::default_for_test();
// Split leader schedule 50-50 between validators B and C, don't give validator A any slots because
// it's going to be deleting its ledger, so may create versions of slots it's already created, but
Expand All @@ -172,8 +145,9 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
let mut validator_configs =
make_identical_validator_configs(&default_config, node_stakes.len());

// Disable voting on validator C
// Disable voting on validators C, and D
validator_configs[2].voting_disabled = true;
validator_configs[3].voting_disabled = true;

let mut config = ClusterConfig {
cluster_lamports: DEFAULT_CLUSTER_LAMPORTS + node_stakes.iter().sum::<u64>(),
Expand Down Expand Up @@ -254,7 +228,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
let validator_a_info = cluster.exit_node(&validator_a_pubkey);

// Step 2:
// Stop validator and truncate ledger, copy over B's ledger to C
// Truncate ledger, copy over B's ledger to C
info!("Create validator C's ledger");
{
// first copy from validator B's ledger
Expand Down Expand Up @@ -290,10 +264,10 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
.unwrap()
.0;
assert!(last_vote_slot >= next_slot_on_a);
info!("Success, A voted on slot {}", last_vote_slot);

{
let blockstore = open_blockstore(&val_a_ledger_path);
purge_slots_with_count(&blockstore, next_slot_on_a + 1, truncated_slots);
if !with_tower {
info!("Removing tower!");
remove_tower(&val_a_ledger_path, &validator_a_pubkey);
Expand Down Expand Up @@ -369,17 +343,13 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
let expects_optimistic_confirmation_violation = !with_tower;
if bad_vote_detected != expects_optimistic_confirmation_violation {
if bad_vote_detected {
error!("No violation expected because of persisted tower!");
return RunResult::FailNoViolation;
panic!("No violation expected because of persisted tower!");
} else {
error!("Violation expected because of removed persisted tower!");
return RunResult::FailViolation;
panic!("Violation expected because of removed persisted tower!");
}
} else if bad_vote_detected {
info!("THIS TEST expected violations. And indeed, there was some, because of removed persisted tower.");
} else {
info!("THIS TEST expected no violation. And indeed, there was none, thanks to persisted tower.");
}

RunResult::Success
}

0 comments on commit 50b2d39

Please sign in to comment.