From 50b2d3928019809ee6b259331b5355ec764b275d Mon Sep 17 00:00:00 2001
From: Ashwin Sekar <ashwin@solana.com>
Date: Tue, 27 Dec 2022 09:47:45 -0800
Subject: [PATCH] Add more logging and documentation to flaky optimistic
 confirmation tests (#29418)

* Revert "add retry for flakey local cluster test (#29228)"

This reverts commit 7a97121747946bf14dbb97c855a064720520bf15.

* Add logging for repair
---
 core/src/serve_repair.rs                    |  5 ++
 local-cluster/tests/local_cluster_flakey.rs | 62 ++++++---------------
 2 files changed, 21 insertions(+), 46 deletions(-)

diff --git a/core/src/serve_repair.rs b/core/src/serve_repair.rs
index 998b1da253a0a0..65c60f62d741be 100644
--- a/core/src/serve_repair.rs
+++ b/core/src/serve_repair.rs
@@ -926,6 +926,11 @@ impl ServeRepair {
             nonce,
             identity_keypair,
         )?;
+        debug!(
+            "Sending repair request from {} for {:#?}",
+            identity_keypair.pubkey(),
+            repair_request
+        );
         Ok((addr, out))
     }
 
diff --git a/local-cluster/tests/local_cluster_flakey.rs b/local-cluster/tests/local_cluster_flakey.rs
index 4f3a98463a8fed..20a0272a71ce93 100644
--- a/local-cluster/tests/local_cluster_flakey.rs
+++ b/local-cluster/tests/local_cluster_flakey.rs
@@ -38,43 +38,18 @@ fn test_optimistic_confirmation_violation_without_tower() {
     do_test_optimistic_confirmation_violation_with_or_without_tower(false);
 }
 
-enum RunResult {
-    Success,
-    FailNoViolation,
-    FailViolation,
-}
-
-fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) {
-    let mut retry = 10;
-    while retry > 0 {
-        match do_test_optimistic_confirmation_violation_with_or_without_tower_inner(with_tower) {
-            RunResult::Success => {
-                return;
-            }
-            _ => {
-                retry -= 1;
-            }
-        }
-    }
-    panic!("optimistic confirmation violation with or without tower failed after 10 trial");
-}
-
 // A bit convoluted test case; but this roughly follows this test theoretical scenario:
+// Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half
+// of the test B is always leader, second half C is. Additionally we have a non voting validator D with 0
+// stake to propagate gossip info.
 //
-// Step 1: You have validator A + B with 31% and 36% of the stake. Run only validator B:
-//
-//  S0 -> S1 -> S2 -> S3 (B vote)
-//
-// Step 2: Turn off B, and truncate the ledger after slot `S3` (simulate votes not
-// landing in next slot).
-// Copy ledger fully to validator A and validator C
-//
-// Step 3: Turn on A, and have it vote up to S3. Truncate anything past slot `S3`.
+// Step 1: Kill C, only A, B and D should be running
 //
 //  S0 -> S1 -> S2 -> S3 (A & B vote, optimistically confirmed)
 //
-// Step 4:
-// Start validator C with 33% of the stake with same ledger, but only up to slot S2.
+// Step 2:
+// Kill A and B once we verify that they have voted on S3 or beyond. Copy B's ledger to C but only
+// up to slot S2
 // Have `C` generate some blocks like:
 //
 // S0 -> S1 -> S2 -> S4
@@ -99,10 +74,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
 // With the persisted tower:
 //    `A` should not be able to generate a switching proof.
 //
-fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
-    with_tower: bool,
-) -> RunResult {
-    solana_logger::setup_with_default(RUST_LOG_FILTER);
+fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) {
+    solana_logger::setup_with("debug");
 
     // First set up the cluster with 4 nodes
     let slots_per_epoch = 2048;
@@ -150,7 +123,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
     // 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential
     // because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger
     // below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains
-    // to the otehr forks, and may violate switching proofs on restart.
+    // to the other forks, and may violate switching proofs on restart.
     let mut default_config = ValidatorConfig::default_for_test();
     // Split leader schedule 50-50 between validators B and C, don't give validator A any slots because
     // it's going to be deleting its ledger, so may create versions of slots it's already created, but
@@ -172,8 +145,9 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
     let mut validator_configs =
         make_identical_validator_configs(&default_config, node_stakes.len());
 
-    // Disable voting on validator C
+    // Disable voting on validators C, and D
     validator_configs[2].voting_disabled = true;
+    validator_configs[3].voting_disabled = true;
 
     let mut config = ClusterConfig {
         cluster_lamports: DEFAULT_CLUSTER_LAMPORTS + node_stakes.iter().sum::<u64>(),
@@ -254,7 +228,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
     let validator_a_info = cluster.exit_node(&validator_a_pubkey);
 
     // Step 2:
-    // Stop validator and truncate ledger, copy over B's ledger to C
+    // Truncate ledger, copy over B's ledger to C
     info!("Create validator C's ledger");
     {
         // first copy from validator B's ledger
@@ -290,10 +264,10 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
         .unwrap()
         .0;
     assert!(last_vote_slot >= next_slot_on_a);
+    info!("Success, A voted on slot {}", last_vote_slot);
 
     {
         let blockstore = open_blockstore(&val_a_ledger_path);
-        purge_slots_with_count(&blockstore, next_slot_on_a + 1, truncated_slots);
         if !with_tower {
             info!("Removing tower!");
             remove_tower(&val_a_ledger_path, &validator_a_pubkey);
@@ -369,17 +343,13 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower_inner(
     let expects_optimistic_confirmation_violation = !with_tower;
     if bad_vote_detected != expects_optimistic_confirmation_violation {
         if bad_vote_detected {
-            error!("No violation expected because of persisted tower!");
-            return RunResult::FailNoViolation;
+            panic!("No violation expected because of persisted tower!");
         } else {
-            error!("Violation expected because of removed persisted tower!");
-            return RunResult::FailViolation;
+            panic!("Violation expected because of removed persisted tower!");
         }
     } else if bad_vote_detected {
         info!("THIS TEST expected violations. And indeed, there was some, because of removed persisted tower.");
     } else {
         info!("THIS TEST expected no violation. And indeed, there was none, thanks to persisted tower.");
     }
-
-    RunResult::Success
 }