From 574fa48194ca42013b4b5171ec81ebfb5ea5ac14 Mon Sep 17 00:00:00 2001
From: igor-aptos <110557261+igor-aptos@users.noreply.github.com>
Date: Sun, 15 Sep 2024 00:49:49 -0700
Subject: [PATCH 01/36] cleanup info logs (#14555)

---
 consensus/src/liveness/proposal_generator.rs |  4 ++--
 consensus/src/pipeline/buffer_manager.rs     |  2 +-
 consensus/src/round_manager.rs               | 12 ++++++++++--
 types/src/transaction/use_case.rs            |  2 +-
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs
index 411d24c7ac2fa..334b0a76fbf4e 100644
--- a/consensus/src/liveness/proposal_generator.rs
+++ b/consensus/src/liveness/proposal_generator.rs
@@ -29,7 +29,7 @@ use aptos_consensus_types::{
 };
 use aptos_crypto::{hash::CryptoHash, HashValue};
 use aptos_infallible::Mutex;
-use aptos_logger::{error, info, sample, sample::SampleRate, warn};
+use aptos_logger::{error, sample, sample::SampleRate, warn};
 use aptos_types::{on_chain_config::ValidatorTxnConfig, validator_txn::ValidatorTransaction};
 use aptos_validator_transaction_pool as vtxn_pool;
 use futures::future::BoxFuture;
@@ -203,7 +203,7 @@ impl PipelineBackpressureConfig {
                 PROPOSER_ESTIMATED_CALIBRATED_BLOCK_TXNS.observe(calibrated_block_size as f64);
                 // Check if calibrated block size is reduction in size, to turn on backpressure.
                 if max_block_txns > calibrated_block_size {
-                    info!(
+                    warn!(
                         block_execution_times = format!("{:?}", block_execution_times),
                         estimated_calibrated_block_sizes = format!("{:?}", sizes),
                         calibrated_block_size = calibrated_block_size,
diff --git a/consensus/src/pipeline/buffer_manager.rs b/consensus/src/pipeline/buffer_manager.rs
index b3ebe706f608c..603a246b228a0 100644
--- a/consensus/src/pipeline/buffer_manager.rs
+++ b/consensus/src/pipeline/buffer_manager.rs
@@ -708,7 +708,7 @@ impl BufferManager {
                 // find the corresponding item
                 let author = vote.author();
                 let commit_info = vote.commit_info().clone();
-                info!("Receive commit vote {} from {}", commit_info, author);
+                trace!("Receive commit vote {} from {}", commit_info, author);
                 let target_block_id = vote.commit_info().id();
                 let current_cursor = self
                     .buffer
diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs
index 748d01f29ad88..03893ac4e79c2 100644
--- a/consensus/src/round_manager.rs
+++ b/consensus/src/round_manager.rs
@@ -1109,11 +1109,19 @@ impl RoundManager {
                     .await?;
             } else {
                 ORDER_VOTE_VERY_OLD.inc();
-                info!(
+                sample!(
+                    SampleRate::Duration(Duration::from_secs(30)),
+                    info!(
+                        "[sampled] Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}",
+                        order_vote_msg.order_vote().ledger_info().round(),
+                        self.block_store.sync_info().highest_ordered_round()
+                    )
+                );
+                debug!(
                     "Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}",
                     order_vote_msg.order_vote().ledger_info().round(),
                     self.block_store.sync_info().highest_ordered_round()
-                );
+                )
             }
         }
         Ok(())
diff --git a/types/src/transaction/use_case.rs b/types/src/transaction/use_case.rs
index ee72a61b5d964..d947b76874b44 100644
--- a/types/src/transaction/use_case.rs
+++ b/types/src/transaction/use_case.rs
@@ -18,7 +18,7 @@ impl std::fmt::Debug for UseCaseKey {
 
         match self {
             Platform => write!(f, "PP"),
-            ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[31..])),
+            ContractAddress(addr) => write!(f, "c{}", hex::encode_upper(&addr[29..])),
             Others => write!(f, "OO"),
         }
     }

From bb012f63ebc37cd08c3e35c8cf33e46632153a5f Mon Sep 17 00:00:00 2001
From: igor-aptos <110557261+igor-aptos@users.noreply.github.com>
Date: Mon, 16 Sep 2024 09:24:07 -0700
Subject: [PATCH 02/36] cleanup warn logs (#14614)

---
 consensus/src/round_manager.rs                | 26 ++++++++++++-------
 consensus/src/round_manager_test.rs           |  5 +++-
 crates/reliable-broadcast/src/lib.rs          |  4 +--
 .../executor/src/components/chunk_output.rs   | 15 ++++++-----
 .../framework/src/application/interface.rs    |  2 +-
 network/framework/src/peer/mod.rs             | 19 ++++++++------
 network/framework/src/protocols/rpc/mod.rs    | 17 +++++++-----
 .../storage-service/server/src/handler.rs     |  2 +-
 8 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs
index 03893ac4e79c2..aed3118835b9e 100644
--- a/consensus/src/round_manager.rs
+++ b/consensus/src/round_manager.rs
@@ -555,20 +555,28 @@ impl RoundManager {
             block_parent_hash = proposal_msg.proposal().quorum_cert().certified_block().id(),
         );
 
-        ensure!(
-            self.ensure_round_and_sync_up(
+        let in_correct_round = self
+            .ensure_round_and_sync_up(
                 proposal_msg.proposal().round(),
                 proposal_msg.sync_info(),
                 proposal_msg.proposer(),
             )
             .await
-            .context("[RoundManager] Process proposal")?,
-            "Stale proposal {}, current round {}",
-            proposal_msg.proposal(),
-            self.round_state.current_round()
-        );
-
-        self.process_proposal(proposal_msg.take_proposal()).await
+            .context("[RoundManager] Process proposal")?;
+        if in_correct_round {
+            self.process_proposal(proposal_msg.take_proposal()).await
+        } else {
+            sample!(
+                SampleRate::Duration(Duration::from_secs(30)),
+                warn!(
+                    "[sampled] Stale proposal {}, current round {}",
+                    proposal_msg.proposal(),
+                    self.round_state.current_round()
+                )
+            );
+            counters::ERROR_COUNT.inc();
+            Ok(())
+        }
     }
 
     pub async fn process_delayed_proposal_msg(&mut self, proposal: Block) -> anyhow::Result<()> {
diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs
index cf34840d95a15..a01fef7b06bab 100644
--- a/consensus/src/round_manager_test.rs
+++ b/consensus/src/round_manager_test.rs
@@ -4,6 +4,7 @@
 
 use crate::{
     block_storage::{pending_blocks::PendingBlocks, BlockReader, BlockStore},
+    counters,
     liveness::{
         proposal_generator::{
             ChainHealthBackoffConfig, PipelineBackpressureConfig, ProposalGenerator,
@@ -1147,11 +1148,13 @@ fn new_round_on_timeout_certificate() {
                 None,
             ),
         );
+        let before = counters::ERROR_COUNT.get();
         assert!(node
             .round_manager
             .process_proposal_msg(old_good_proposal)
             .await
-            .is_err());
+            .is_ok()); // we eat the error
+        assert_eq!(counters::ERROR_COUNT.get(), before + 1); // but increase the counter
     });
 }
 
diff --git a/crates/reliable-broadcast/src/lib.rs b/crates/reliable-broadcast/src/lib.rs
index a46e806f9aca9..12647b7a1581a 100644
--- a/crates/reliable-broadcast/src/lib.rs
+++ b/crates/reliable-broadcast/src/lib.rs
@@ -210,8 +210,8 @@ where
 fn log_rpc_failure(error: anyhow::Error, receiver: Author) {
     // Log a sampled warning (to prevent spam)
     sample!(
-        SampleRate::Duration(Duration::from_secs(1)),
-        warn!(error = ?error, "rpc to {} failed, error {}", receiver, error)
+        SampleRate::Duration(Duration::from_secs(30)),
+        warn!(error = ?error, "[sampled] rpc to {} failed, error {}", receiver, error)
     );
 
     // Log at the debug level (this is useful for debugging
diff --git a/execution/executor/src/components/chunk_output.rs b/execution/executor/src/components/chunk_output.rs
index 7de6d39417a0d..3e471f5dcf714 100644
--- a/execution/executor/src/components/chunk_output.rs
+++ b/execution/executor/src/components/chunk_output.rs
@@ -301,13 +301,6 @@ pub fn update_counters_for_processed_chunk<T, O>(
                 ),
             },
             TransactionStatus::Discard(discard_status_code) => {
-                sample!(
-                    SampleRate::Duration(Duration::from_secs(15)),
-                    warn!(
-                        "Txn being discarded is {:?} with status code {:?}",
-                        txn, discard_status_code
-                    )
-                );
                 (
                     // Specialize duplicate txns for alerts
                     if *discard_status_code == StatusCode::SEQUENCE_NUMBER_TOO_OLD {
@@ -317,6 +310,14 @@ pub fn update_counters_for_processed_chunk<T, O>(
                     } else if *discard_status_code == StatusCode::TRANSACTION_EXPIRED {
                         "discard_transaction_expired"
                     } else {
+                        // Only log if it is an interesting discard
+                        sample!(
+                            SampleRate::Duration(Duration::from_secs(15)),
+                            warn!(
+                                "[sampled] Txn being discarded is {:?} with status code {:?}",
+                                txn, discard_status_code
+                            )
+                        );
                         "discard"
                     },
                     "error_code",
diff --git a/network/framework/src/application/interface.rs b/network/framework/src/application/interface.rs
index 6ccb2cf36354e..912e34c49e98b 100644
--- a/network/framework/src/application/interface.rs
+++ b/network/framework/src/application/interface.rs
@@ -177,7 +177,7 @@ impl<Message: NetworkMessageTrait + Clone> NetworkClient<Message> {
             sample!(
                 SampleRate::Duration(Duration::from_secs(10)),
                 warn!(
-                    "Unavailable peers (without a common network protocol): {:?}",
+                    "[sampled] Unavailable peers (without a common network protocol): {:?}",
                     peers_without_a_protocol
                 )
             );
diff --git a/network/framework/src/peer/mod.rs b/network/framework/src/peer/mod.rs
index 094e3d70c0421..651d5fed0eece 100644
--- a/network/framework/src/peer/mod.rs
+++ b/network/framework/src/peer/mod.rs
@@ -639,14 +639,17 @@ where
                     .outbound_rpcs
                     .handle_outbound_request(request, write_reqs_tx)
                 {
-                    warn!(
-                        NetworkSchema::new(&self.network_context)
-                            .connection_metadata(&self.connection_metadata),
-                        error = %e,
-                        "Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}",
-                        protocol_id,
-                        self.remote_peer_id().short_str(),
-                        e,
+                    sample!(
+                        SampleRate::Duration(Duration::from_secs(10)),
+                        warn!(
+                            NetworkSchema::new(&self.network_context)
+                                .connection_metadata(&self.connection_metadata),
+                            error = %e,
+                            "[sampled] Failed to send outbound rpc request for protocol {} to peer: {}. Error: {}",
+                            protocol_id,
+                            self.remote_peer_id().short_str(),
+                            e,
+                        )
                     );
                 }
             },
diff --git a/network/framework/src/protocols/rpc/mod.rs b/network/framework/src/protocols/rpc/mod.rs
index b948226c4cd70..2be2a22a5f667 100644
--- a/network/framework/src/protocols/rpc/mod.rs
+++ b/network/framework/src/protocols/rpc/mod.rs
@@ -666,13 +666,16 @@ impl OutboundRpcs {
                         FAILED_LABEL,
                     )
                     .inc();
-                    warn!(
-                        NetworkSchema::new(network_context).remote_peer(peer_id),
-                        "{} Error making outbound RPC request to {} (request_id {}). Error: {}",
-                        network_context,
-                        peer_id.short_str(),
-                        request_id,
-                        error
+                    sample!(
+                        SampleRate::Duration(Duration::from_secs(10)),
+                        warn!(
+                            NetworkSchema::new(network_context).remote_peer(peer_id),
+                            "[sampled] {} Error making outbound RPC request to {} (request_id {}). Error: {}",
+                            network_context,
+                            peer_id.short_str(),
+                            request_id,
+                            error
+                        )
                     );
                 }
             },
diff --git a/state-sync/storage-service/server/src/handler.rs b/state-sync/storage-service/server/src/handler.rs
index d1748ae8a72d9..fc642e212ded2 100644
--- a/state-sync/storage-service/server/src/handler.rs
+++ b/state-sync/storage-service/server/src/handler.rs
@@ -247,7 +247,7 @@ impl<T: StorageReaderInterface> Handler<T> {
         {
             sample!(
                 SampleRate::Duration(Duration::from_secs(ERROR_LOG_FREQUENCY_SECS)),
-                warn!(LogSchema::new(LogEntry::OptimisticFetchRequest)
+                trace!(LogSchema::new(LogEntry::OptimisticFetchRequest)
                     .error(&Error::InvalidRequest(
                         "An active optimistic fetch was already found for the peer!".into()
                     ))

From f76560e216157ef094c0973d1e7c0bbf752bb422 Mon Sep 17 00:00:00 2001
From: igor-aptos <110557261+igor-aptos@users.noreply.github.com>
Date: Mon, 16 Sep 2024 13:30:27 -0700
Subject: [PATCH 03/36] remove backtrace from common warn logs (#14622)

---
 consensus/src/round_manager.rs                        | 4 ++--
 crates/reliable-broadcast/src/lib.rs                  | 4 ++--
 network/framework/src/protocols/health_checker/mod.rs | 6 ++----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs
index aed3118835b9e..27382493cc2eb 100644
--- a/consensus/src/round_manager.rs
+++ b/consensus/src/round_manager.rs
@@ -1535,7 +1535,7 @@ impl RoundManager {
                             Ok(_) => trace!(RoundStateLogSchema::new(round_state)),
                             Err(e) => {
                                 counters::ERROR_COUNT.inc();
-                                warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state));
+                                warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e);
                             }
                         }
                     }
@@ -1583,7 +1583,7 @@ impl RoundManager {
                         Ok(_) => trace!(RoundStateLogSchema::new(round_state)),
                         Err(e) => {
                             counters::ERROR_COUNT.inc();
-                            warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(round_state));
+                            warn!(kind = error_kind(&e), RoundStateLogSchema::new(round_state), "Error: {:#}", e);
                         }
                     }
                 },
diff --git a/crates/reliable-broadcast/src/lib.rs b/crates/reliable-broadcast/src/lib.rs
index 12647b7a1581a..7246f2b729a52 100644
--- a/crates/reliable-broadcast/src/lib.rs
+++ b/crates/reliable-broadcast/src/lib.rs
@@ -211,12 +211,12 @@ fn log_rpc_failure(error: anyhow::Error, receiver: Author) {
     // Log a sampled warning (to prevent spam)
     sample!(
         SampleRate::Duration(Duration::from_secs(30)),
-        warn!(error = ?error, "[sampled] rpc to {} failed, error {}", receiver, error)
+        warn!("[sampled] rpc to {} failed, error {:#}", receiver, error)
     );
 
     // Log at the debug level (this is useful for debugging
     // and won't spam the logs in a production environment).
-    debug!(error = ?error, "rpc to {} failed, error {}", receiver, error);
+    debug!("rpc to {} failed, error {:#}", receiver, error);
 }
 
 pub struct DropGuard {
diff --git a/network/framework/src/protocols/health_checker/mod.rs b/network/framework/src/protocols/health_checker/mod.rs
index fea7da738dd95..c59bc8a4a3dde 100644
--- a/network/framework/src/protocols/health_checker/mod.rs
+++ b/network/framework/src/protocols/health_checker/mod.rs
@@ -342,11 +342,9 @@ impl<NetworkClient: NetworkClientInterface<HealthCheckerMsg> + Unpin> HealthChec
             },
             Err(err) => {
                 warn!(
-                    NetworkSchema::new(&self.network_context)
-                        .remote_peer(&peer_id),
-                    error = ?err,
+                    NetworkSchema::new(&self.network_context).remote_peer(&peer_id),
                     round = round,
-                    "{} Ping failed for peer: {} round: {} with error: {:?}",
+                    "{} Ping failed for peer: {} round: {} with error: {:#}",
                     self.network_context,
                     peer_id.short_str(),
                     round,

From 4dd94c673534d30d665d0ac855cdf918ad30acad Mon Sep 17 00:00:00 2001
From: Greg Nazario <greg@gnazar.io>
Date: Mon, 16 Sep 2024 18:17:47 -0700
Subject: [PATCH 04/36] [cli] Release 4.2.0 (#14653)

---
 Cargo.lock                | 2 +-
 crates/aptos/CHANGELOG.md | 4 ++++
 crates/aptos/Cargo.toml   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6e0f8ca54acc8..413ce73564963 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -262,7 +262,7 @@ dependencies = [
 
 [[package]]
 name = "aptos"
-version = "4.1.0"
+version = "4.2.0"
 dependencies = [
  "anyhow",
  "aptos-api-types",
diff --git a/crates/aptos/CHANGELOG.md b/crates/aptos/CHANGELOG.md
index 79547763c1bd9..516bace3b2591 100644
--- a/crates/aptos/CHANGELOG.md
+++ b/crates/aptos/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to the Aptos CLI will be captured in this file. This project
 
 ## Unreleased
 
+## [4.2.0] - 2024/09/16
+- Update latest VM and associated changes
+- Update to latest compiler
+
 ## [4.1.0] - 2024/08/30
 - Marks Move 2 and compiler v2 as stable.
 - Adds new `--move-2` flag to work with Move 2 without need for multiple other flags. 
diff --git a/crates/aptos/Cargo.toml b/crates/aptos/Cargo.toml
index 46e0ee8a6f92d..b1270bc827272 100644
--- a/crates/aptos/Cargo.toml
+++ b/crates/aptos/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "aptos"
 description = "Aptos tool for management of nodes and interacting with the blockchain"
-version = "4.1.0"
+version = "4.2.0"
 
 # Workspace inherited keys
 authors = { workspace = true }

From 637311bda2d070a4e32e7e813570b00bc8fd971e Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Mon, 16 Sep 2024 19:30:36 -0700
Subject: [PATCH 05/36] Addressing PR comments

---
 Cargo.lock                      |  1 +
 types/Cargo.toml                |  1 +
 types/src/ledger_info.rs        | 50 ++++++++++++++++++---------------
 types/src/validator_verifier.rs | 16 +++++------
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6e0f8ca54acc8..551fdaee2bb63 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4225,6 +4225,7 @@ dependencies = [
  "claims",
  "coset",
  "criterion",
+ "dashmap",
  "derivative",
  "fixed",
  "fxhash",
diff --git a/types/Cargo.toml b/types/Cargo.toml
index 79c5b17de97c1..d52bec17b259b 100644
--- a/types/Cargo.toml
+++ b/types/Cargo.toml
@@ -28,6 +28,7 @@ arr_macro = { workspace = true }
 base64 = { workspace = true }
 bcs = { workspace = true }
 bytes = { workspace = true }
+dashmap = { workspace = true }
 fixed = { workspace = true }
 fxhash = { workspace = true }
 hashbrown = { workspace = true }
diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 60e737a8cc214..d704defe627ad 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -475,9 +475,13 @@ impl LedgerInfoWithMixedSignatures {
     pub fn check_voting_power(
         &self,
         verifier: &ValidatorVerifier,
+        check_super_majority: bool,
     ) -> std::result::Result<u128, VerifyError> {
         let all_voters = self.all_voters();
-        verifier.check_voting_power(all_voters.iter().collect_vec().into_iter(), true)
+        verifier.check_voting_power(
+            all_voters.iter().collect_vec().into_iter(),
+            check_super_majority,
+        )
     }
 
     // Aggregates all the signatures, verifies the aggregate signature, and returns the aggregate signature.
@@ -485,7 +489,7 @@ impl LedgerInfoWithMixedSignatures {
         &mut self,
         epoch_state: Arc<EpochState>,
     ) -> Result<LedgerInfoWithSignatures, VerifyError> {
-        self.check_voting_power(&epoch_state.verifier)?;
+        self.check_voting_power(&epoch_state.verifier, true)?;
 
         let mut all_signatures = self.verified_signatures.clone();
         for (author, signature) in self.unverified_signatures.signatures() {
@@ -494,7 +498,7 @@ impl LedgerInfoWithMixedSignatures {
 
         let aggregated_sig = epoch_state.verifier.aggregate_signatures(&all_signatures)?;
 
-        let (verified_aggregate_signature, malicious_authors) = match epoch_state
+        match epoch_state
             .verifier
             .clone()
             .verify_multi_signatures(self.ledger_info(), &aggregated_sig)
@@ -505,7 +509,10 @@ impl LedgerInfoWithMixedSignatures {
                         .add_signature(*account_address, signature.clone());
                 }
                 self.unverified_signatures = PartialSignatures::empty();
-                (aggregated_sig, vec![])
+                Ok(LedgerInfoWithSignatures::new(
+                    self.ledger_info.clone(),
+                    aggregated_sig,
+                ))
             },
             Err(_) => {
                 // Question: Should we assign min tasks per thread here for into_par_iter()?
@@ -537,22 +544,21 @@ impl LedgerInfoWithMixedSignatures {
                     .collect();
                 self.unverified_signatures = PartialSignatures::empty();
 
-                let aggregated_sig = epoch_state
+                epoch_state
                     .verifier
-                    .aggregate_signatures(&self.verified_signatures)?;
-                // epoch_state
-                //     .read()
-                //     .verifier
-                //     .verify_multi_signatures(self.ledger_info(), &aggregated_sig)?;
-                (aggregated_sig, malicious_authors)
+                    .add_malicious_authors(malicious_authors);
+
+                match self.check_voting_power(&epoch_state.verifier, true) {
+                    Ok(_) => Ok(LedgerInfoWithSignatures::new(
+                        self.ledger_info.clone(),
+                        epoch_state
+                            .verifier
+                            .aggregate_signatures(&self.verified_signatures)?,
+                    )),
+                    Err(e) => Err(e),
+                }
             },
-        };
-        epoch_state
-            .verifier
-            .add_malicious_authors(malicious_authors);
-        self.check_voting_power(&epoch_state.verifier).map(|_| {
-            LedgerInfoWithSignatures::new(self.ledger_info.clone(), verified_aggregate_signature)
-        })
+        }
     }
 
     pub fn ledger_info(&self) -> &LedgerInfo {
@@ -746,7 +752,7 @@ mod tests {
             2
         );
         assert_eq!(
-            ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier),
+            ledger_info_with_mixed_signatures.check_voting_power(&validator_verifier, true),
             Err(VerifyError::TooLittleVotingPower {
                 voting_power: 4,
                 expected_voting_power: 5
@@ -776,7 +782,7 @@ mod tests {
         );
         assert_eq!(
             ledger_info_with_mixed_signatures
-                .check_voting_power(&validator_verifier)
+                .check_voting_power(&validator_verifier, true)
                 .unwrap(),
             5
         );
@@ -831,7 +837,7 @@ mod tests {
         );
         assert_eq!(
             ledger_info_with_mixed_signatures
-                .check_voting_power(&validator_verifier)
+                .check_voting_power(&validator_verifier, true)
                 .unwrap(),
             5
         );
@@ -872,7 +878,7 @@ mod tests {
         assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 6);
         assert_eq!(
             ledger_info_with_mixed_signatures
-                .check_voting_power(&validator_verifier)
+                .check_voting_power(&validator_verifier, true)
                 .unwrap(),
             6
         );
diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs
index 32ca1572a58a2..6ca856ae7b61f 100644
--- a/types/src/validator_verifier.rs
+++ b/types/src/validator_verifier.rs
@@ -17,13 +17,13 @@ use aptos_crypto::{
     hash::CryptoHash,
     Signature, VerifyingKey,
 };
-use aptos_infallible::RwLock;
+use dashmap::DashSet;
 use itertools::Itertools;
 #[cfg(any(test, feature = "fuzzing"))]
 use proptest_derive::Arbitrary;
 use serde::{Deserialize, Deserializer, Serialize};
 use std::{
-    collections::{BTreeMap, HashMap, HashSet},
+    collections::{BTreeMap, HashMap},
     fmt,
     sync::Arc,
 };
@@ -149,7 +149,7 @@ pub struct ValidatorVerifier {
     /// submitted bad votes that has resulted in having to verify each vote individually. Further votes by these validators
     /// will be verified individually bypassing the optimization.
     #[serde(skip)]
-    malicious_authors: Arc<RwLock<HashSet<AccountAddress>>>,
+    malicious_authors: Arc<DashSet<AccountAddress>>,
 }
 
 // Implement Eq and PartialEq for ValidatorVerifier. Skip malicious_authors field in the comparison.
@@ -200,7 +200,7 @@ impl ValidatorVerifier {
             quorum_voting_power,
             total_voting_power,
             address_to_validator_index,
-            malicious_authors: Arc::new(RwLock::new(HashSet::new())),
+            malicious_authors: Arc::new(DashSet::new()),
         }
     }
 
@@ -238,16 +238,16 @@ impl ValidatorVerifier {
 
     pub fn add_malicious_authors(&self, malicious_authors: Vec<AccountAddress>) {
         for author in malicious_authors {
-            self.malicious_authors.write().insert(author);
+            self.malicious_authors.insert(author);
         }
     }
 
-    pub fn malicious_authors(&self) -> HashSet<AccountAddress> {
-        self.malicious_authors.read().clone()
+    pub fn malicious_authors(&self) -> Arc<DashSet<AccountAddress>> {
+        self.malicious_authors.clone()
     }
 
     pub fn is_malicious_author(&self, author: &AccountAddress) -> bool {
-        self.malicious_authors.read().contains(author)
+        self.malicious_authors.contains(author)
     }
 
     /// Helper method to initialize with a single author and public key with quorum voting power 1.

From 8b4d7e2527c9c0d4c1a034d6bda37712804f668e Mon Sep 17 00:00:00 2001
From: Bo Wu <bo@aptoslabs.com>
Date: Fri, 13 Sep 2024 10:43:34 -0700
Subject: [PATCH 06/36] add a field for internal indexer version

---
 api/src/accounts.rs          | 23 ++--------
 api/src/context.rs           | 86 ++++++++++++++++++++----------------
 api/src/events.rs            |  4 +-
 api/src/index.rs             |  1 -
 api/src/transactions.rs      |  2 +-
 api/types/src/ledger_info.rs | 20 +++++++++
 6 files changed, 74 insertions(+), 62 deletions(-)

diff --git a/api/src/accounts.rs b/api/src/accounts.rs
index 3ac9a13005e61..d94454f6b34e3 100644
--- a/api/src/accounts.rs
+++ b/api/src/accounts.rs
@@ -66,7 +66,7 @@ impl AccountsApi {
 
         let context = self.context.clone();
         api_spawn_blocking(move || {
-            let account = Account::new(context, address.0, ledger_version.0, None, None, false)?;
+            let account = Account::new(context, address.0, ledger_version.0, None, None)?;
             account.account(&accept_type)
         })
         .await
@@ -118,7 +118,6 @@ impl AccountsApi {
                 ledger_version.0,
                 start.0.map(StateKey::from),
                 limit.0,
-                true,
             )?;
             account.resources(&accept_type)
         })
@@ -171,7 +170,6 @@ impl AccountsApi {
                 ledger_version.0,
                 start.0.map(StateKey::from),
                 limit.0,
-                true,
             )?;
             account.modules(&accept_type)
         })
@@ -201,24 +199,11 @@ impl Account {
         requested_ledger_version: Option<U64>,
         start: Option<StateKey>,
         limit: Option<u16>,
-        require_state_indices: bool,
     ) -> Result<Self, BasicErrorWith404> {
-        let sharding_enabled = context
-            .node_config
-            .storage
-            .rocksdb_configs
-            .enable_storage_sharding;
-
-        let (latest_ledger_info, requested_version) = if sharding_enabled && require_state_indices {
-            context.get_latest_ledger_info_and_verify_internal_indexer_lookup_version(
+        let (latest_ledger_info, requested_version) = context
+            .get_latest_ledger_info_and_verify_lookup_version(
                 requested_ledger_version.map(|inner| inner.0),
-            )?
-        } else {
-            // Use the latest ledger version, or the requested associated version
-            context.get_latest_ledger_info_and_verify_lookup_version(
-                requested_ledger_version.map(|inner| inner.0),
-            )?
-        };
+            )?;
 
         Ok(Self {
             context,
diff --git a/api/src/context.rs b/api/src/context.rs
index aa9e59848544f..73b3c31b11d91 100644
--- a/api/src/context.rs
+++ b/api/src/context.rs
@@ -221,20 +221,26 @@ impl Context {
             .map_err(|e| e.into())
     }
 
-    pub fn get_latest_ledger_info<E: ServiceUnavailableError>(&self) -> Result<LedgerInfo, E> {
+    pub fn get_oldest_version_and_block_height<E: ServiceUnavailableError>(
+        &self,
+    ) -> Result<(Version, u64), E> {
+        self.db
+            .get_first_viable_block()
+            .context("Failed to retrieve oldest block information")
+            .map_err(|e| E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError))
+    }
+
+    pub fn get_latest_storage_ledger_info<E: ServiceUnavailableError>(
+        &self,
+    ) -> Result<LedgerInfo, E> {
         let ledger_info = self
             .get_latest_ledger_info_with_signatures()
             .context("Failed to retrieve latest ledger info")
             .map_err(|e| {
                 E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError)
             })?;
-        let (oldest_version, oldest_block_height) = self
-            .db
-            .get_first_viable_block()
-            .context("Failed to retrieve oldest block information")
-            .map_err(|e| {
-                E::service_unavailable_with_code_no_info(e, AptosErrorCode::InternalError)
-            })?;
+
+        let (oldest_version, oldest_block_height) = self.get_oldest_version_and_block_height()?;
         let (_, _, newest_block_event) = self
             .db
             .get_block_info_by_version(ledger_info.ledger_info().version())
@@ -252,32 +258,12 @@ impl Context {
         ))
     }
 
-    pub fn get_latest_ledger_info_and_verify_internal_indexer_lookup_version<E: StdApiError>(
-        &self,
-        requested_ledger_version: Option<Version>,
-    ) -> Result<(LedgerInfo, Version), E> {
-        if self.indexer_reader.is_none() {
-            return Err(E::internal_with_code_no_info(
-                "Indexer reader doesn't exist",
-                AptosErrorCode::InternalError,
-            ));
-        }
-
-        let (latest_ledger_info, latest_internal_indexer_ledger_version) =
-            self.get_latest_internal_indexer_ledger_version_and_main_db_info()?;
-        if let Some(version) = requested_ledger_version {
-            let request_ledger_version = Version::from(version);
-            if latest_internal_indexer_ledger_version < request_ledger_version {
-                return Err(version_not_found(
-                    request_ledger_version,
-                    &latest_ledger_info,
-                ));
-            } else if request_ledger_version < latest_ledger_info.oldest_ledger_version.0 {
-                return Err(version_pruned(request_ledger_version, &latest_ledger_info));
-            }
-            Ok((latest_ledger_info, request_ledger_version))
+    pub fn get_latest_ledger_info<E: ServiceUnavailableError>(&self) -> Result<LedgerInfo, E> {
+        if self.indexer_reader.is_some() {
+            let ledger_info = self.get_latest_internal_indexer_ledger_version_and_ledger_info()?;
+            Ok(ledger_info)
         } else {
-            Ok((latest_ledger_info, latest_internal_indexer_ledger_version))
+            self.get_latest_storage_ledger_info()
         }
     }
 
@@ -306,20 +292,42 @@ impl Context {
         Ok((latest_ledger_info, requested_ledger_version))
     }
 
-    pub fn get_latest_internal_indexer_ledger_version_and_main_db_info<E: StdApiError>(
+    pub fn get_latest_internal_indexer_ledger_version_and_ledger_info<
+        E: ServiceUnavailableError,
+    >(
         &self,
-    ) -> Result<(LedgerInfo, Version), E> {
+    ) -> Result<LedgerInfo, E> {
         if let Some(indexer_reader) = self.indexer_reader.as_ref() {
             if let Some(latest_version) = indexer_reader
                 .get_latest_internal_indexer_ledger_version()
-                .map_err(|err| E::internal_with_code_no_info(err, AptosErrorCode::InternalError))?
+                .map_err(|err| {
+                    E::service_unavailable_with_code_no_info(err, AptosErrorCode::InternalError)
+                })?
             {
-                let latest_ledger_info = self.get_latest_ledger_info()?;
-                return Ok((latest_ledger_info, latest_version));
+                let (_, _, new_block_event) = self
+                    .db
+                    .get_block_info_by_version(latest_version)
+                    .map_err(|_| {
+                        E::service_unavailable_with_code_no_info(
+                            "Failed to get block",
+                            AptosErrorCode::InternalError,
+                        )
+                    })?;
+                let (oldest_version, oldest_block_height) =
+                    self.get_oldest_version_and_block_height()?;
+                return Ok(LedgerInfo::new_ledger_info(
+                    &self.chain_id(),
+                    new_block_event.epoch(),
+                    latest_version,
+                    oldest_version,
+                    oldest_block_height,
+                    new_block_event.height(),
+                    new_block_event.proposed_time(),
+                ));
             }
         }
 
-        Err(E::internal_with_code_no_info(
+        Err(E::service_unavailable_with_code_no_info(
             "Indexer reader doesn't exist, or doesn't have data.",
             AptosErrorCode::InternalError,
         ))
diff --git a/api/src/events.rs b/api/src/events.rs
index 5c9266df373b8..49c4fad21ce9f 100644
--- a/api/src/events.rs
+++ b/api/src/events.rs
@@ -77,7 +77,7 @@ impl EventsApi {
         // Ensure that account exists
         let api = self.clone();
         api_spawn_blocking(move || {
-            let account = Account::new(api.context.clone(), address.0, None, None, None, true)?;
+            let account = Account::new(api.context.clone(), address.0, None, None, None)?;
             account.verify_account_or_object_resource()?;
             api.list(
                 account.latest_ledger_info,
@@ -144,7 +144,7 @@ impl EventsApi {
 
         let api = self.clone();
         api_spawn_blocking(move || {
-            let account = Account::new(api.context.clone(), address.0, None, None, None, true)?;
+            let account = Account::new(api.context.clone(), address.0, None, None, None)?;
             let key = account.find_event_key(event_handle.0, field_name.0.into())?;
             api.list(account.latest_ledger_info, accept_type, page, key)
         })
diff --git a/api/src/index.rs b/api/src/index.rs
index 94b5289636413..ba91cbb34c342 100644
--- a/api/src/index.rs
+++ b/api/src/index.rs
@@ -33,7 +33,6 @@ impl IndexApi {
         self.context
             .check_api_output_enabled("Get ledger info", &accept_type)?;
         let ledger_info = self.context.get_latest_ledger_info()?;
-
         let node_role = self.context.node_role();
 
         api_spawn_blocking(move || match accept_type {
diff --git a/api/src/transactions.rs b/api/src/transactions.rs
index 86a16b8a356bd..1e1214361961b 100644
--- a/api/src/transactions.rs
+++ b/api/src/transactions.rs
@@ -986,7 +986,7 @@ impl TransactionsApi {
         address: Address,
     ) -> BasicResultWith404<Vec<Transaction>> {
         // Verify the account exists
-        let account = Account::new(self.context.clone(), address, None, None, None, true)?;
+        let account = Account::new(self.context.clone(), address, None, None, None)?;
         account.get_account_resource()?;
 
         let latest_ledger_info = account.latest_ledger_info;
diff --git a/api/types/src/ledger_info.rs b/api/types/src/ledger_info.rs
index ef912190c94c9..97438ae104013 100644
--- a/api/types/src/ledger_info.rs
+++ b/api/types/src/ledger_info.rs
@@ -40,6 +40,26 @@ impl LedgerInfo {
         }
     }
 
+    pub fn new_ledger_info(
+        chain_id: &ChainId,
+        epoch: u64,
+        ledger_version: u64,
+        oldest_ledger_version: u64,
+        oldest_block_height: u64,
+        block_height: u64,
+        ledger_timestamp: u64,
+    ) -> Self {
+        Self {
+            chain_id: chain_id.id(),
+            epoch: epoch.into(),
+            ledger_version: ledger_version.into(),
+            oldest_ledger_version: oldest_ledger_version.into(),
+            block_height: block_height.into(),
+            oldest_block_height: oldest_block_height.into(),
+            ledger_timestamp: ledger_timestamp.into(),
+        }
+    }
+
     pub fn epoch(&self) -> u64 {
         self.epoch.into()
     }

From 45f5d6570fa101c242beb02d13197a8c50b4aab7 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 06:33:23 -0400
Subject: [PATCH 07/36] [Consensus Observer] Wrap block and payload stores in
 locks.

---
 .../observer/active_state.rs                  |  51 ++--
 .../observer/consensus_observer.rs            |  79 ++++--
 .../observer/ordered_blocks.rs                | 104 +++----
 .../observer/payload_store.rs                 | 116 ++++----
 .../observer/pending_blocks.rs                | 263 ++++++++++--------
 5 files changed, 343 insertions(+), 270 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/active_state.rs b/consensus/src/consensus_observer/observer/active_state.rs
index 73c03af670eee..fb5482bba3306 100644
--- a/consensus/src/consensus_observer/observer/active_state.rs
+++ b/consensus/src/consensus_observer/observer/active_state.rs
@@ -101,8 +101,8 @@ impl ActiveObserverState {
     /// root ledger info and remove the blocks from the given stores.
     pub fn create_commit_callback(
         &self,
-        pending_ordered_blocks: OrderedBlockStore,
-        block_payload_store: BlockPayloadStore,
+        pending_ordered_blocks: Arc<Mutex<OrderedBlockStore>>,
+        block_payload_store: Arc<Mutex<BlockPayloadStore>>,
     ) -> StateComputerCommitCallBackType {
         // Clone the root pointer
         let root = self.root.clone();
@@ -282,15 +282,17 @@ async fn extract_on_chain_configs(
 /// A simple helper function that handles the committed blocks
 /// (as part of the commit callback).
 fn handle_committed_blocks(
-    pending_ordered_blocks: OrderedBlockStore,
-    block_payload_store: BlockPayloadStore,
+    pending_ordered_blocks: Arc<Mutex<OrderedBlockStore>>,
+    block_payload_store: Arc<Mutex<BlockPayloadStore>>,
     root: Arc<Mutex<LedgerInfoWithSignatures>>,
     blocks: &[Arc<PipelinedBlock>],
     ledger_info: LedgerInfoWithSignatures,
 ) {
     // Remove the committed blocks from the payload and pending stores
-    block_payload_store.remove_committed_blocks(blocks);
-    pending_ordered_blocks.remove_blocks_for_commit(&ledger_info);
+    block_payload_store.lock().remove_committed_blocks(blocks);
+    pending_ordered_blocks
+        .lock()
+        .remove_blocks_for_commit(&ledger_info);
 
     // Verify the ledger info is for the same epoch
     let mut root = root.lock();
@@ -407,8 +409,12 @@ mod test {
         let root = Arc::new(Mutex::new(create_ledger_info(epoch, round)));
 
         // Create the ordered block store and block payload store
-        let ordered_block_store = OrderedBlockStore::new(node_config.consensus_observer);
-        let mut block_payload_store = BlockPayloadStore::new(node_config.consensus_observer);
+        let ordered_block_store = Arc::new(Mutex::new(OrderedBlockStore::new(
+            node_config.consensus_observer,
+        )));
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            node_config.consensus_observer,
+        )));
 
         // Handle the committed blocks at the wrong epoch and verify the root is not updated
         handle_committed_blocks(
@@ -432,12 +438,16 @@ mod test {
 
         // Add pending ordered blocks
         let num_ordered_blocks = 10;
-        let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, epoch, round);
+        let ordered_blocks = create_and_add_ordered_blocks(
+            ordered_block_store.clone(),
+            num_ordered_blocks,
+            epoch,
+            round,
+        );
 
         // Add block payloads for the ordered blocks
         for ordered_block in &ordered_blocks {
-            create_and_add_payloads_for_ordered_block(&mut block_payload_store, ordered_block);
+            create_and_add_payloads_for_ordered_block(block_payload_store.clone(), ordered_block);
         }
 
         // Create the commit ledger info (for the second to last block)
@@ -461,8 +471,11 @@ mod test {
         );
 
         // Verify the committed blocks are removed from the stores
-        assert_eq!(ordered_block_store.get_all_ordered_blocks().len(), 1);
-        assert_eq!(block_payload_store.get_block_payloads().lock().len(), 1);
+        assert_eq!(ordered_block_store.lock().get_all_ordered_blocks().len(), 1);
+        assert_eq!(
+            block_payload_store.lock().get_block_payloads().lock().len(),
+            1
+        );
 
         // Verify the root is updated
         assert_eq!(root.lock().clone(), committed_ledger_info);
@@ -495,7 +508,7 @@ mod test {
 
     /// Creates and adds the specified number of ordered blocks to the ordered blocks
     fn create_and_add_ordered_blocks(
-        ordered_block_store: &OrderedBlockStore,
+        ordered_block_store: Arc<Mutex<OrderedBlockStore>>,
         num_ordered_blocks: usize,
         epoch: u64,
         starting_round: Round,
@@ -532,7 +545,9 @@ mod test {
             let ordered_block = OrderedBlock::new(blocks, ordered_proof);
 
             // Insert the block into the ordered block store
-            ordered_block_store.insert_ordered_block(ordered_block.clone());
+            ordered_block_store
+                .lock()
+                .insert_ordered_block(ordered_block.clone());
 
             // Add the block to the ordered blocks
             ordered_blocks.push(ordered_block);
@@ -543,13 +558,15 @@ mod test {
 
     /// Creates and adds payloads for the ordered block
     fn create_and_add_payloads_for_ordered_block(
-        block_payload_store: &mut BlockPayloadStore,
+        block_payload_store: Arc<Mutex<BlockPayloadStore>>,
         ordered_block: &OrderedBlock,
     ) {
         for block in ordered_block.blocks() {
             let block_payload =
                 BlockPayload::new(block.block_info(), BlockTransactionPayload::empty());
-            block_payload_store.insert_block_payload(block_payload, true);
+            block_payload_store
+                .lock()
+                .insert_block_payload(block_payload, true);
         }
     }
 
diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index 250b338d23344..9917adad0f74a 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -32,6 +32,7 @@ use aptos_config::config::{ConsensusObserverConfig, NodeConfig};
 use aptos_consensus_types::{pipeline, pipelined_block::PipelinedBlock};
 use aptos_crypto::{bls12381, Genesis};
 use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener};
+use aptos_infallible::Mutex;
 use aptos_logger::{debug, error, info, warn};
 use aptos_network::{
     application::interface::NetworkClient, protocols::wire::handshake::v1::ProtocolId,
@@ -63,13 +64,13 @@ pub struct ConsensusObserver {
     active_observer_state: ActiveObserverState,
 
     // The block payload store (containing the block transaction payloads)
-    block_payload_store: BlockPayloadStore,
+    block_payload_store: Arc<Mutex<BlockPayloadStore>>,
 
     // The ordered block store (containing ordered blocks that are ready for execution)
-    ordered_block_store: OrderedBlockStore,
+    ordered_block_store: Arc<Mutex<OrderedBlockStore>>,
 
     // The pending block store (containing pending blocks that are without payloads)
-    pending_block_store: PendingBlockStore,
+    pending_block_store: Arc<Mutex<PendingBlockStore>>,
 
     // The execution client to the buffer manager
     execution_client: Arc<dyn TExecutionClient>,
@@ -116,12 +117,17 @@ impl ConsensusObserver {
         let active_observer_state =
             ActiveObserverState::new(node_config, db_reader, reconfig_events, consensus_publisher);
 
+        // Create the block and payload stores
+        let ordered_block_store = OrderedBlockStore::new(consensus_observer_config);
+        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+
         // Create the consensus observer
         Self {
             active_observer_state,
-            ordered_block_store: OrderedBlockStore::new(consensus_observer_config),
-            block_payload_store: BlockPayloadStore::new(consensus_observer_config),
-            pending_block_store: PendingBlockStore::new(consensus_observer_config),
+            ordered_block_store: Arc::new(Mutex::new(ordered_block_store)),
+            block_payload_store: Arc::new(Mutex::new(block_payload_store)),
+            pending_block_store: Arc::new(Mutex::new(pending_block_store)),
             execution_client,
             sync_notification_sender,
             sync_handle: None,
@@ -137,7 +143,7 @@ impl ConsensusObserver {
         }
 
         // Otherwise, check if all the payloads exist in the payload store
-        self.block_payload_store.all_payloads_exist(blocks)
+        self.block_payload_store.lock().all_payloads_exist(blocks)
     }
 
     /// Checks the progress of the consensus observer
@@ -171,13 +177,13 @@ impl ConsensusObserver {
     /// subscriptions, where we want to wipe all state and restart).
     async fn clear_pending_block_state(&self) {
         // Clear the payload store
-        self.block_payload_store.clear_all_payloads();
+        self.block_payload_store.lock().clear_all_payloads();
 
         // Clear the pending blocks
-        self.pending_block_store.clear_missing_blocks();
+        self.pending_block_store.lock().clear_missing_blocks();
 
         // Clear the ordered blocks
-        self.ordered_block_store.clear_all_ordered_blocks();
+        self.ordered_block_store.lock().clear_all_ordered_blocks();
 
         // Reset the execution pipeline for the root
         let root = self.active_observer_state.root();
@@ -256,9 +262,9 @@ impl ConsensusObserver {
         self.active_observer_state.epoch_state()
     }
 
-    /// Returns the last known block
-    fn get_last_block(&self) -> BlockInfo {
-        if let Some(last_pending_block) = self.ordered_block_store.get_last_ordered_block() {
+    /// Returns the last ordered block
+    fn get_last_ordered_block(&self) -> BlockInfo {
+        if let Some(last_pending_block) = self.ordered_block_store.lock().get_last_ordered_block() {
             last_pending_block
         } else {
             // Return the root ledger info
@@ -278,12 +284,16 @@ impl ConsensusObserver {
 
     /// Orders any ready pending blocks for the given epoch and round
     async fn order_ready_pending_block(&mut self, block_epoch: u64, block_round: Round) {
-        if let Some(ordered_block) = self.pending_block_store.remove_ready_block(
+        // Get any ready ordered block
+        let ready_ordered_block = self.pending_block_store.lock().remove_ready_block(
             block_epoch,
             block_round,
-            &self.block_payload_store,
-        ) {
-            self.process_ordered_block(ordered_block).await;
+            self.block_payload_store.clone(),
+        );
+
+        // Process the ready ordered block (if it exists)
+        if let Some(ready_ordered_block) = ready_ordered_block {
+            self.process_ordered_block(ready_ordered_block).await;
         }
     }
 
@@ -332,6 +342,7 @@ impl ConsensusObserver {
 
         // Update the payload store with the payload
         self.block_payload_store
+            .lock()
             .insert_block_payload(block_payload, verified_payload);
 
         // Check if there are blocks that were missing payloads but are
@@ -379,7 +390,7 @@ impl ConsensusObserver {
 
         // Otherwise, we failed to process the commit decision. If the commit
         // is for a future epoch or round, we need to state sync.
-        let last_block = self.get_last_block();
+        let last_block = self.get_last_ordered_block();
         let commit_decision_round = commit_decision.round();
         let epoch_changed = commit_decision_epoch > last_block.epoch();
         if epoch_changed || commit_decision_round > last_block.round() {
@@ -408,8 +419,10 @@ impl ConsensusObserver {
             self.active_observer_state
                 .update_root(commit_decision.commit_proof().clone());
             self.block_payload_store
+                .lock()
                 .remove_blocks_for_epoch_round(commit_decision_epoch, commit_decision_round);
             self.ordered_block_store
+                .lock()
                 .remove_blocks_for_commit(commit_decision.commit_proof());
 
             // Start the state sync process
@@ -431,6 +444,7 @@ impl ConsensusObserver {
         // Get the pending block for the commit decision
         let pending_block = self
             .ordered_block_store
+            .lock()
             .get_ordered_block(commit_decision.epoch(), commit_decision.round());
 
         // Process the pending block
@@ -444,6 +458,7 @@ impl ConsensusObserver {
                     ))
                 );
                 self.ordered_block_store
+                    .lock()
                     .update_commit_decision(commit_decision);
 
                 // If we are not in sync mode, forward the commit decision to the execution pipeline
@@ -553,7 +568,9 @@ impl ConsensusObserver {
         if self.all_payloads_exist(ordered_block.blocks()) {
             self.process_ordered_block(ordered_block).await;
         } else {
-            self.pending_block_store.insert_pending_block(ordered_block);
+            self.pending_block_store
+                .lock()
+                .insert_pending_block(ordered_block);
         }
     }
 
@@ -587,6 +604,7 @@ impl ConsensusObserver {
         // Verify the block payloads against the ordered block
         if let Err(error) = self
             .block_payload_store
+            .lock()
             .verify_payloads_against_ordered_block(&ordered_block)
         {
             error!(
@@ -601,9 +619,10 @@ impl ConsensusObserver {
 
         // The block was verified correctly. If the block is a child of our
         // last block, we can insert it into the ordered block store.
-        if self.get_last_block().id() == ordered_block.first_block().parent_id() {
+        if self.get_last_ordered_block().id() == ordered_block.first_block().parent_id() {
             // Insert the ordered block into the pending blocks
             self.ordered_block_store
+                .lock()
                 .insert_ordered_block(ordered_block.clone());
 
             // If we're not in sync mode, finalize the ordered blocks
@@ -655,6 +674,7 @@ impl ConsensusObserver {
             let new_epoch_state = self.get_epoch_state();
             let verified_payload_rounds = self
                 .block_payload_store
+                .lock()
                 .verify_payload_signatures(&new_epoch_state);
 
             // Order all the pending blocks that are now ready (these were buffered during state sync)
@@ -668,9 +688,8 @@ impl ConsensusObserver {
         self.sync_handle = None;
 
         // Process all the newly ordered blocks
-        for (_, (ordered_block, commit_decision)) in
-            self.ordered_block_store.get_all_ordered_blocks()
-        {
+        let all_ordered_blocks = self.ordered_block_store.lock().get_all_ordered_blocks();
+        for (_, (ordered_block, commit_decision)) in all_ordered_blocks {
             // Finalize the ordered block
             self.finalize_ordered_block(ordered_block).await;
 
@@ -684,19 +703,25 @@ impl ConsensusObserver {
     /// Updates the metrics for the processed blocks
     fn update_processed_blocks_metrics(&self) {
         // Update the payload store metrics
-        self.block_payload_store.update_payload_store_metrics();
+        self.block_payload_store
+            .lock()
+            .update_payload_store_metrics();
 
         // Update the pending block metrics
-        self.pending_block_store.update_pending_blocks_metrics();
+        self.pending_block_store
+            .lock()
+            .update_pending_blocks_metrics();
 
         // Update the pending block metrics
-        self.ordered_block_store.update_ordered_blocks_metrics();
+        self.ordered_block_store
+            .lock()
+            .update_ordered_blocks_metrics();
     }
 
     /// Waits for a new epoch to start
     async fn wait_for_epoch_start(&mut self) {
         // Wait for the active state epoch to update
-        let block_payloads = self.block_payload_store.get_block_payloads();
+        let block_payloads = self.block_payload_store.lock().get_block_payloads();
         let (payload_manager, consensus_config, execution_config, randomness_config) = self
             .active_observer_state
             .wait_for_epoch_start(block_payloads)
diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs
index edfde50a4ed8f..7bb43fbc4108f 100644
--- a/consensus/src/consensus_observer/observer/ordered_blocks.rs
+++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs
@@ -10,46 +10,43 @@ use crate::consensus_observer::{
 };
 use aptos_config::config::ConsensusObserverConfig;
 use aptos_consensus_types::common::Round;
-use aptos_infallible::Mutex;
 use aptos_logger::{debug, warn};
 use aptos_types::{block_info::BlockInfo, ledger_info::LedgerInfoWithSignatures};
-use std::{collections::BTreeMap, sync::Arc};
+use std::collections::BTreeMap;
 
 /// A simple struct to store ordered blocks
-#[derive(Clone)]
 pub struct OrderedBlockStore {
     // The configuration of the consensus observer
     consensus_observer_config: ConsensusObserverConfig,
 
     // Ordered blocks. The key is the epoch and round of the last block in the
     // ordered block. Each entry contains the block and the commit decision (if any).
-    ordered_blocks: Arc<Mutex<BTreeMap<(u64, Round), (OrderedBlock, Option<CommitDecision>)>>>,
+    ordered_blocks: BTreeMap<(u64, Round), (OrderedBlock, Option<CommitDecision>)>,
 }
 
 impl OrderedBlockStore {
     pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self {
         Self {
             consensus_observer_config,
-            ordered_blocks: Arc::new(Mutex::new(BTreeMap::new())),
+            ordered_blocks: BTreeMap::new(),
         }
     }
 
     /// Clears all ordered blocks
-    pub fn clear_all_ordered_blocks(&self) {
-        self.ordered_blocks.lock().clear();
+    pub fn clear_all_ordered_blocks(&mut self) {
+        self.ordered_blocks.clear();
     }
 
     /// Returns a copy of the ordered blocks
     pub fn get_all_ordered_blocks(
         &self,
     ) -> BTreeMap<(u64, Round), (OrderedBlock, Option<CommitDecision>)> {
-        self.ordered_blocks.lock().clone()
+        self.ordered_blocks.clone()
     }
 
     /// Returns the last ordered block (if any)
     pub fn get_last_ordered_block(&self) -> Option<BlockInfo> {
         self.ordered_blocks
-            .lock()
             .last_key_value()
             .map(|(_, (ordered_block, _))| ordered_block.last_block().block_info())
     }
@@ -57,7 +54,6 @@ impl OrderedBlockStore {
     /// Returns the ordered block for the given epoch and round (if any)
     pub fn get_ordered_block(&self, epoch: u64, round: Round) -> Option<OrderedBlock> {
         self.ordered_blocks
-            .lock()
             .get(&(epoch, round))
             .map(|(ordered_block, _)| ordered_block.clone())
     }
@@ -65,10 +61,10 @@ impl OrderedBlockStore {
     /// Inserts the given ordered block into the ordered blocks. This function
     /// assumes the block has already been checked to extend the current ordered
     /// blocks, and that the ordered proof has been verified.
-    pub fn insert_ordered_block(&self, ordered_block: OrderedBlock) {
+    pub fn insert_ordered_block(&mut self, ordered_block: OrderedBlock) {
         // Verify that the number of ordered blocks doesn't exceed the maximum
         let max_num_ordered_blocks = self.consensus_observer_config.max_num_pending_blocks as usize;
-        if self.ordered_blocks.lock().len() >= max_num_ordered_blocks {
+        if self.ordered_blocks.len() >= max_num_ordered_blocks {
             warn!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                     "Exceeded the maximum number of ordered blocks: {:?}. Dropping block: {:?}.",
@@ -94,32 +90,32 @@ impl OrderedBlockStore {
 
         // Insert the ordered block
         self.ordered_blocks
-            .lock()
             .insert((last_block_epoch, last_block_round), (ordered_block, None));
     }
 
     /// Removes the ordered blocks for the given commit ledger info. This will
     /// remove all blocks up to (and including) the epoch and round of the commit.
-    pub fn remove_blocks_for_commit(&self, commit_ledger_info: &LedgerInfoWithSignatures) {
+    pub fn remove_blocks_for_commit(&mut self, commit_ledger_info: &LedgerInfoWithSignatures) {
         // Determine the epoch and round to split off
         let split_off_epoch = commit_ledger_info.ledger_info().epoch();
         let split_off_round = commit_ledger_info.commit_info().round().saturating_add(1);
 
         // Remove the blocks from the ordered blocks
-        let mut ordered_blocks = self.ordered_blocks.lock();
-        *ordered_blocks = ordered_blocks.split_off(&(split_off_epoch, split_off_round));
+        self.ordered_blocks = self
+            .ordered_blocks
+            .split_off(&(split_off_epoch, split_off_round));
     }
 
     /// Updates the commit decision of the ordered block (if found)
-    pub fn update_commit_decision(&self, commit_decision: &CommitDecision) {
+    pub fn update_commit_decision(&mut self, commit_decision: &CommitDecision) {
         // Get the epoch and round of the commit decision
         let commit_decision_epoch = commit_decision.epoch();
         let commit_decision_round = commit_decision.round();
 
         // Update the commit decision for the ordered blocks
-        let mut ordered_blocks = self.ordered_blocks.lock();
-        if let Some((_, existing_commit_decision)) =
-            ordered_blocks.get_mut(&(commit_decision_epoch, commit_decision_round))
+        if let Some((_, existing_commit_decision)) = self
+            .ordered_blocks
+            .get_mut(&(commit_decision_epoch, commit_decision_round))
         {
             *existing_commit_decision = Some(commit_decision.clone());
         }
@@ -128,8 +124,7 @@ impl OrderedBlockStore {
     /// Updates the metrics for the ordered blocks
     pub fn update_ordered_blocks_metrics(&self) {
         // Update the number of ordered block entries
-        let ordered_blocks = self.ordered_blocks.lock();
-        let num_entries = ordered_blocks.len() as u64;
+        let num_entries = self.ordered_blocks.len() as u64;
         metrics::set_gauge_with_label(
             &metrics::OBSERVER_NUM_PROCESSED_BLOCKS,
             metrics::ORDERED_BLOCK_ENTRIES_LABEL,
@@ -137,7 +132,8 @@ impl OrderedBlockStore {
         );
 
         // Update the total number of ordered blocks
-        let num_ordered_blocks = ordered_blocks
+        let num_ordered_blocks = self
+            .ordered_blocks
             .values()
             .map(|(ordered_block, _)| ordered_block.blocks().len() as u64)
             .sum();
@@ -148,7 +144,8 @@ impl OrderedBlockStore {
         );
 
         // Update the highest round for the ordered blocks
-        let highest_ordered_round = ordered_blocks
+        let highest_ordered_round = self
+            .ordered_blocks
             .last_key_value()
             .map(|(_, (ordered_block, _))| ordered_block.last_block().round())
             .unwrap_or(0);
@@ -173,28 +170,29 @@ mod test {
     use aptos_types::{
         aggregate_signature::AggregateSignature, ledger_info::LedgerInfo, transaction::Version,
     };
+    use std::sync::Arc;
 
     #[test]
     fn test_clear_all_ordered_blocks() {
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
 
         // Insert several ordered blocks for the current epoch
         let current_epoch = 0;
         let num_ordered_blocks = 10;
-        create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch);
 
         // Clear all ordered blocks
         ordered_block_store.clear_all_ordered_blocks();
 
         // Check that all the ordered blocks were removed
-        assert!(ordered_block_store.ordered_blocks.lock().is_empty());
+        assert!(ordered_block_store.ordered_blocks.is_empty());
     }
 
     #[test]
     fn test_get_last_ordered_block() {
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
 
         // Verify that we have no last ordered block
         assert!(ordered_block_store.get_last_ordered_block().is_none());
@@ -202,8 +200,11 @@ mod test {
         // Insert several ordered blocks for the current epoch
         let current_epoch = 0;
         let num_ordered_blocks = 50;
-        let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        let ordered_blocks = create_and_add_ordered_blocks(
+            &mut ordered_block_store,
+            num_ordered_blocks,
+            current_epoch,
+        );
 
         // Verify the last ordered block is the block with the highest round
         let last_ordered_block = ordered_blocks.last().unwrap();
@@ -217,7 +218,7 @@ mod test {
         let next_epoch = current_epoch + 1;
         let num_ordered_blocks = 50;
         let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch);
+            create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch);
 
         // Verify the last ordered block is the block with the highest epoch and round
         let last_ordered_block = ordered_blocks.last().unwrap();
@@ -231,13 +232,16 @@ mod test {
     #[test]
     fn test_get_ordered_block() {
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
 
         // Insert several ordered blocks for the current epoch
         let current_epoch = 0;
         let num_ordered_blocks = 50;
-        let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        let ordered_blocks = create_and_add_ordered_blocks(
+            &mut ordered_block_store,
+            num_ordered_blocks,
+            current_epoch,
+        );
 
         // Ensure the ordered blocks were all inserted
         let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks();
@@ -272,12 +276,12 @@ mod test {
         };
 
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(consensus_observer_config);
+        let mut ordered_block_store = OrderedBlockStore::new(consensus_observer_config);
 
         // Insert several ordered blocks for the current epoch
         let current_epoch = 0;
         let num_ordered_blocks = max_num_pending_blocks * 2; // Insert more than the maximum
-        create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, current_epoch);
 
         // Verify the ordered blocks were inserted up to the maximum
         let all_ordered_blocks = ordered_block_store.get_all_ordered_blocks();
@@ -287,7 +291,7 @@ mod test {
         let next_epoch = current_epoch + 1;
         let num_ordered_blocks = max_num_pending_blocks - 1; // Insert one less than the maximum
         let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, next_epoch);
+            create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch);
 
         // Verify the ordered blocks were not inserted (they should have just been dropped)
         for ordered_block in &ordered_blocks {
@@ -305,19 +309,22 @@ mod test {
     #[test]
     fn test_remove_blocks_for_commit() {
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
 
         // Insert several ordered blocks for the current epoch
         let current_epoch = 10;
         let num_ordered_blocks = 10;
-        let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        let ordered_blocks = create_and_add_ordered_blocks(
+            &mut ordered_block_store,
+            num_ordered_blocks,
+            current_epoch,
+        );
 
         // Insert several ordered blocks for the next epoch
         let next_epoch = current_epoch + 1;
         let num_ordered_blocks_next_epoch = 20;
         let ordered_blocks_next_epoch = create_and_add_ordered_blocks(
-            &ordered_block_store,
+            &mut ordered_block_store,
             num_ordered_blocks_next_epoch,
             next_epoch,
         );
@@ -326,7 +333,7 @@ mod test {
         let future_epoch = next_epoch + 1;
         let num_ordered_blocks_future_epoch = 30;
         create_and_add_ordered_blocks(
-            &ordered_block_store,
+            &mut ordered_block_store,
             num_ordered_blocks_future_epoch,
             future_epoch,
         );
@@ -399,19 +406,22 @@ mod test {
     #[test]
     fn test_update_commit_decision() {
         // Create a new ordered block store
-        let ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
 
         // Insert several ordered blocks for the current epoch
         let current_epoch = 0;
         let num_ordered_blocks = 10;
-        let ordered_blocks =
-            create_and_add_ordered_blocks(&ordered_block_store, num_ordered_blocks, current_epoch);
+        let ordered_blocks = create_and_add_ordered_blocks(
+            &mut ordered_block_store,
+            num_ordered_blocks,
+            current_epoch,
+        );
 
         // Insert several ordered blocks for the next epoch
         let next_epoch = current_epoch + 1;
         let num_ordered_blocks_next_epoch = 20;
         let ordered_blocks_next_epoch = create_and_add_ordered_blocks(
-            &ordered_block_store,
+            &mut ordered_block_store,
             num_ordered_blocks_next_epoch,
             next_epoch,
         );
@@ -499,7 +509,7 @@ mod test {
 
     /// Creates and adds the specified number of ordered blocks to the ordered blocks
     fn create_and_add_ordered_blocks(
-        ordered_block_store: &OrderedBlockStore,
+        ordered_block_store: &mut OrderedBlockStore,
         num_ordered_blocks: usize,
         epoch: u64,
     ) -> Vec<OrderedBlock> {
diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs
index bae1225c58118..8781595026194 100644
--- a/consensus/src/consensus_observer/observer/payload_store.rs
+++ b/consensus/src/consensus_observer/observer/payload_store.rs
@@ -26,12 +26,12 @@ pub enum BlockPayloadStatus {
 }
 
 /// A simple struct to store the block payloads of ordered and committed blocks
-#[derive(Clone)]
 pub struct BlockPayloadStore {
     // The configuration of the consensus observer
     consensus_observer_config: ConsensusObserverConfig,
 
-    // Block transaction payloads (indexed by epoch and round)
+    // Block transaction payloads (indexed by epoch and round).
+    // This is directly accessed by the payload manager.
     block_payloads: Arc<Mutex<BTreeMap<(u64, Round), BlockPayloadStatus>>>,
 }
 
@@ -299,16 +299,12 @@ mod test {
         };
 
         // Create a new block payload store
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some unverified blocks to the payload store
         let num_blocks_in_store = 100;
-        let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
-            num_blocks_in_store,
-            1,
-            false,
-        );
+        let unverified_blocks =
+            create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false);
 
         // Verify the payloads don't exist in the block payload store
         assert!(!block_payload_store.all_payloads_exist(&unverified_blocks));
@@ -320,12 +316,8 @@ mod test {
 
         // Add some verified blocks to the payload store
         let num_blocks_in_store = 100;
-        let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
-            num_blocks_in_store,
-            0,
-            true,
-        );
+        let verified_blocks =
+            create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true);
 
         // Check that all the payloads exist in the block payload store
         assert!(block_payload_store.all_payloads_exist(&verified_blocks));
@@ -355,22 +347,18 @@ mod test {
     fn test_all_payloads_exist_unverified() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add several verified blocks to the payload store
         let num_blocks_in_store = 10;
-        let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
-            num_blocks_in_store,
-            0,
-            true,
-        );
+        let verified_blocks =
+            create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true);
 
         // Check that the payloads exists in the block payload store
         assert!(block_payload_store.all_payloads_exist(&verified_blocks));
 
         // Mark the payload of the first block as unverified
-        mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]);
+        mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]);
 
         // Check that the payload no longer exists in the block payload store
         assert!(!block_payload_store.all_payloads_exist(&verified_blocks));
@@ -383,19 +371,15 @@ mod test {
     fn test_clear_all_payloads() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some unverified blocks to the payload store
         let num_blocks_in_store = 30;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 1, false);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 1, false);
 
         // Add some verified blocks to the payload store
-        let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
-            num_blocks_in_store,
-            0,
-            true,
-        );
+        let verified_blocks =
+            create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true);
 
         // Check that the payloads exist in the block payload store
         assert!(block_payload_store.all_payloads_exist(&verified_blocks));
@@ -423,12 +407,8 @@ mod test {
 
         // Add some verified blocks to the payload store
         let num_blocks_in_store = 20;
-        let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
-            num_blocks_in_store,
-            0,
-            true,
-        );
+        let verified_blocks =
+            create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true);
 
         // Check that the block payload store contains the new block payloads
         assert!(block_payload_store.all_payloads_exist(&verified_blocks));
@@ -438,7 +418,7 @@ mod test {
         check_num_verified_payloads(&block_payload_store, num_blocks_in_store);
 
         // Mark the payload of the first block as unverified
-        mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]);
+        mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]);
 
         // Check that the payload no longer exists in the block payload store
         assert!(!block_payload_store.all_payloads_exist(&verified_blocks));
@@ -465,11 +445,11 @@ mod test {
         };
 
         // Create a new block payload store
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add the maximum number of verified blocks to the payload store
         let num_blocks_in_store = max_num_pending_blocks as usize;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, true);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, true);
 
         // Verify the number of blocks in the block payload store
         check_num_verified_payloads(&block_payload_store, num_blocks_in_store);
@@ -477,7 +457,7 @@ mod test {
 
         // Add more blocks to the payload store
         let num_blocks_to_add = 5;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true);
 
         // Verify the number of blocks in the block payload store
         check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize);
@@ -485,7 +465,7 @@ mod test {
 
         // Add a large number of blocks to the payload store
         let num_blocks_to_add = 100;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, true);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, true);
 
         // Verify the number of blocks in the block payload store
         check_num_verified_payloads(&block_payload_store, max_num_pending_blocks as usize);
@@ -502,11 +482,11 @@ mod test {
         };
 
         // Create a new block payload store
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add the maximum number of unverified blocks to the payload store
         let num_blocks_in_store = max_num_pending_blocks as usize;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_in_store, 0, false);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_in_store, 0, false);
 
         // Verify the number of blocks in the block payload store
         check_num_unverified_payloads(&block_payload_store, num_blocks_in_store);
@@ -514,7 +494,7 @@ mod test {
 
         // Add more blocks to the payload store
         let num_blocks_to_add = 5;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false);
 
         // Verify the number of blocks in the block payload store
         check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize);
@@ -522,7 +502,7 @@ mod test {
 
         // Add a large number of blocks to the payload store
         let num_blocks_to_add = 100;
-        create_and_add_blocks_to_store(block_payload_store.clone(), num_blocks_to_add, 0, false);
+        create_and_add_blocks_to_store(&mut block_payload_store, num_blocks_to_add, 0, false);
 
         // Verify the number of blocks in the block payload store
         check_num_unverified_payloads(&block_payload_store, max_num_pending_blocks as usize);
@@ -533,13 +513,13 @@ mod test {
     fn test_remove_blocks_for_epoch_round_verified() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some verified blocks to the payload store for the current epoch
         let current_epoch = 0;
         let num_blocks_in_store = 100;
         let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             current_epoch,
             true,
@@ -573,7 +553,7 @@ mod test {
         // Add some verified blocks to the payload store for the next epoch
         let next_epoch = current_epoch + 1;
         create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             next_epoch,
             true,
@@ -591,13 +571,13 @@ mod test {
     fn test_remove_blocks_for_epoch_round_unverified() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some unverified blocks to the payload store for the current epoch
         let current_epoch = 10;
         let num_blocks_in_store = 100;
         let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             current_epoch,
             false,
@@ -630,7 +610,7 @@ mod test {
         // Add some unverified blocks to the payload store for the next epoch
         let next_epoch = current_epoch + 1;
         create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             next_epoch,
             false,
@@ -648,13 +628,13 @@ mod test {
     fn test_remove_committed_blocks_verified() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some blocks to the payload store for the current epoch
         let current_epoch = 0;
         let num_blocks_in_store = 100;
         let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             current_epoch,
             true,
@@ -700,7 +680,7 @@ mod test {
         // Add some blocks to the payload store for the next epoch
         let next_epoch = 1;
         let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             next_epoch,
             true,
@@ -717,13 +697,13 @@ mod test {
     fn test_remove_committed_blocks_unverified() {
         // Create a new block payload store
         let consensus_observer_config = ConsensusObserverConfig::default();
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
 
         // Add some blocks to the payload store for the current epoch
         let current_epoch = 10;
         let num_blocks_in_store = 100;
         let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             current_epoch,
             false,
@@ -768,7 +748,7 @@ mod test {
         // Add some blocks to the payload store for the next epoch
         let next_epoch = 11;
         let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_blocks_in_store,
             next_epoch,
             false,
@@ -791,7 +771,7 @@ mod test {
         let current_epoch = 0;
         let num_verified_blocks = 10;
         create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_verified_blocks,
             current_epoch,
             true,
@@ -801,7 +781,7 @@ mod test {
         let next_epoch = current_epoch + 1;
         let num_unverified_blocks = 20;
         let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_unverified_blocks,
             next_epoch,
             false,
@@ -811,7 +791,7 @@ mod test {
         let future_epoch = current_epoch + 30;
         let num_future_blocks = 30;
         let future_unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_future_blocks,
             future_epoch,
             false,
@@ -877,7 +857,7 @@ mod test {
         let current_epoch = 0;
         let num_verified_blocks = 10;
         let verified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_verified_blocks,
             current_epoch,
             true,
@@ -895,7 +875,7 @@ mod test {
             .unwrap();
 
         // Mark the first block payload as unverified
-        mark_payload_as_unverified(block_payload_store.clone(), &verified_blocks[0]);
+        mark_payload_as_unverified(&block_payload_store, &verified_blocks[0]);
 
         // Verify the ordered block and ensure it fails (since the payloads are unverified)
         let error = block_payload_store
@@ -923,7 +903,7 @@ mod test {
         let current_epoch = 10;
         let num_verified_blocks = 6;
         create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_verified_blocks,
             current_epoch,
             true,
@@ -933,7 +913,7 @@ mod test {
         let next_epoch = current_epoch + 1;
         let num_unverified_blocks = 15;
         let unverified_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_unverified_blocks,
             next_epoch,
             false,
@@ -943,7 +923,7 @@ mod test {
         let future_epoch = next_epoch + 1;
         let num_future_blocks = 10;
         let unverified_future_blocks = create_and_add_blocks_to_store(
-            block_payload_store.clone(),
+            &mut block_payload_store,
             num_future_blocks,
             future_epoch,
             false,
@@ -986,7 +966,7 @@ mod test {
 
     /// Creates and adds the given number of blocks to the block payload store
     fn create_and_add_blocks_to_store(
-        mut block_payload_store: BlockPayloadStore,
+        block_payload_store: &mut BlockPayloadStore,
         num_blocks: usize,
         epoch: u64,
         verified_payload_signatures: bool,
@@ -1110,7 +1090,7 @@ mod test {
 
     /// Marks the payload of the given block as unverified
     fn mark_payload_as_unverified(
-        block_payload_store: BlockPayloadStore,
+        block_payload_store: &BlockPayloadStore,
         block: &Arc<PipelinedBlock>,
     ) {
         // Get the payload entry for the given block
diff --git a/consensus/src/consensus_observer/observer/pending_blocks.rs b/consensus/src/consensus_observer/observer/pending_blocks.rs
index 46c0586f08130..d3ce297cd5fdb 100644
--- a/consensus/src/consensus_observer/observer/pending_blocks.rs
+++ b/consensus/src/consensus_observer/observer/pending_blocks.rs
@@ -19,41 +19,36 @@ use std::{
 };
 
 /// A simple struct to hold blocks that are waiting for payloads
-#[derive(Clone)]
 pub struct PendingBlockStore {
     // The configuration of the consensus observer
     consensus_observer_config: ConsensusObserverConfig,
 
-    // A map of ordered blocks that are without payloads. The key is the
-    // (epoch, round) of the first block in the ordered block.
-    blocks_without_payloads: Arc<Mutex<BTreeMap<(u64, Round), OrderedBlock>>>,
+    // A map of ordered blocks that are without payloads. The key is
+    // the (epoch, round) of the first block in the ordered block.
+    blocks_without_payloads: BTreeMap<(u64, Round), OrderedBlock>,
 }
 
 impl PendingBlockStore {
     pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self {
         Self {
             consensus_observer_config,
-            blocks_without_payloads: Arc::new(Mutex::new(BTreeMap::new())),
+            blocks_without_payloads: BTreeMap::new(),
         }
     }
 
     /// Clears all missing blocks from the store
-    pub fn clear_missing_blocks(&self) {
-        self.blocks_without_payloads.lock().clear();
+    pub fn clear_missing_blocks(&mut self) {
+        self.blocks_without_payloads.clear();
     }
 
     /// Inserts a block (without payloads) into the store
-    pub fn insert_pending_block(&self, ordered_block: OrderedBlock) {
+    pub fn insert_pending_block(&mut self, ordered_block: OrderedBlock) {
         // Get the epoch and round of the first block
         let first_block = ordered_block.first_block();
         let first_block_epoch_round = (first_block.epoch(), first_block.round());
 
         // Insert the block into the store using the round of the first block
-        match self
-            .blocks_without_payloads
-            .lock()
-            .entry(first_block_epoch_round)
-        {
+        match self.blocks_without_payloads.entry(first_block_epoch_round) {
             Entry::Occupied(_) => {
                 // The block is already in the store
                 warn!(
@@ -75,16 +70,15 @@ impl PendingBlockStore {
 
     /// Garbage collects the pending blocks store by removing
     /// the oldest blocks if the store is too large.
-    fn garbage_collect_pending_blocks(&self) {
+    fn garbage_collect_pending_blocks(&mut self) {
         // Calculate the number of blocks to remove
-        let mut blocks_without_payloads = self.blocks_without_payloads.lock();
-        let num_pending_blocks = blocks_without_payloads.len() as u64;
+        let num_pending_blocks = self.blocks_without_payloads.len() as u64;
         let max_pending_blocks = self.consensus_observer_config.max_num_pending_blocks;
         let num_blocks_to_remove = num_pending_blocks.saturating_sub(max_pending_blocks);
 
         // Remove the oldest blocks if the store is too large
         for _ in 0..num_blocks_to_remove {
-            if let Some((oldest_epoch_round, _)) = blocks_without_payloads.pop_first() {
+            if let Some((oldest_epoch_round, _)) = self.blocks_without_payloads.pop_first() {
                 warn!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                         "The pending block store is too large: {:?} blocks. Removing the block for the oldest epoch and round: {:?}",
@@ -98,25 +92,28 @@ impl PendingBlockStore {
     /// Removes and returns the block from the store that is now ready
     /// to be processed (after the new payload has been received).
     pub fn remove_ready_block(
-        &self,
+        &mut self,
         received_payload_epoch: u64,
         received_payload_round: Round,
-        block_payload_store: &BlockPayloadStore,
+        block_payload_store: Arc<Mutex<BlockPayloadStore>>,
     ) -> Option<OrderedBlock> {
         // Calculate the round at which to split the blocks
         let split_round = received_payload_round.saturating_add(1);
 
         // Split the blocks at the epoch and round
-        let mut blocks_without_payloads = self.blocks_without_payloads.lock();
-        let mut blocks_at_higher_rounds =
-            blocks_without_payloads.split_off(&(received_payload_epoch, split_round));
+        let mut blocks_at_higher_rounds = self
+            .blocks_without_payloads
+            .split_off(&(received_payload_epoch, split_round));
 
         // Check if the last block is ready (this should be the only ready block).
         // Any earlier blocks are considered out-of-date and will be dropped.
         let mut ready_block = None;
-        if let Some((epoch_and_round, ordered_block)) = blocks_without_payloads.pop_last() {
+        if let Some((epoch_and_round, ordered_block)) = self.blocks_without_payloads.pop_last() {
             // If all payloads exist for the block, then the block is ready
-            if block_payload_store.all_payloads_exist(ordered_block.blocks()) {
+            if block_payload_store
+                .lock()
+                .all_payloads_exist(ordered_block.blocks())
+            {
                 ready_block = Some(ordered_block);
             } else {
                 // Otherwise, check if we're still waiting for higher payloads for the block
@@ -127,18 +124,18 @@ impl PendingBlockStore {
         }
 
         // Check if any out-of-date blocks were dropped
-        if !blocks_without_payloads.is_empty() {
+        if !self.blocks_without_payloads.is_empty() {
             info!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                     "Dropped {:?} out-of-date pending blocks before epoch and round: {:?}",
-                    blocks_without_payloads.len(),
+                    self.blocks_without_payloads.len(),
                     (received_payload_epoch, received_payload_round)
                 ))
             );
         }
 
         // Update the pending blocks to only include the blocks at higher rounds
-        *blocks_without_payloads = blocks_at_higher_rounds;
+        self.blocks_without_payloads = blocks_at_higher_rounds;
 
         // Return the ready block (if one exists)
         ready_block
@@ -147,8 +144,7 @@ impl PendingBlockStore {
     /// Updates the metrics for the pending blocks
     pub fn update_pending_blocks_metrics(&self) {
         // Update the number of pending block entries
-        let blocks_without_payloads = self.blocks_without_payloads.lock();
-        let num_entries = blocks_without_payloads.len() as u64;
+        let num_entries = self.blocks_without_payloads.len() as u64;
         metrics::set_gauge_with_label(
             &metrics::OBSERVER_NUM_PROCESSED_BLOCKS,
             metrics::PENDING_BLOCK_ENTRIES_LABEL,
@@ -156,7 +152,8 @@ impl PendingBlockStore {
         );
 
         // Update the total number of pending blocks
-        let num_pending_blocks = blocks_without_payloads
+        let num_pending_blocks = self
+            .blocks_without_payloads
             .values()
             .map(|block| block.blocks().len() as u64)
             .sum();
@@ -167,7 +164,8 @@ impl PendingBlockStore {
         );
 
         // Update the highest round for the pending blocks
-        let highest_pending_round = blocks_without_payloads
+        let highest_pending_round = self
+            .blocks_without_payloads
             .last_key_value()
             .map(|(_, pending_block)| pending_block.last_block().round())
             .unwrap_or(0);
@@ -208,13 +206,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 0;
         let starting_round = 0;
         let missing_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -223,17 +223,19 @@ mod test {
 
         // Verify that the store is not empty
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             &missing_blocks,
         );
 
         // Clear the missing blocks from the store
-        pending_block_store.clear_missing_blocks();
+        pending_block_store.lock().clear_missing_blocks();
 
         // Verify that the store is now empty
-        let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock();
-        assert!(blocks_without_payloads.is_empty());
+        assert!(pending_block_store
+            .lock()
+            .blocks_without_payloads
+            .is_empty());
     }
 
     #[test]
@@ -244,13 +246,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 0;
         let starting_round = 0;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -259,7 +263,7 @@ mod test {
 
         // Verify that all blocks were inserted correctly
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             &pending_blocks,
         );
@@ -267,7 +271,7 @@ mod test {
         // Insert the maximum number of blocks into the store again
         let starting_round = (max_num_pending_blocks * 100) as Round;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -276,7 +280,7 @@ mod test {
 
         // Verify that all blocks were inserted correctly
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             &pending_blocks,
         );
@@ -284,12 +288,17 @@ mod test {
         // Insert one more block into the store (for the next epoch)
         let next_epoch = 1;
         let starting_round = 0;
-        let new_pending_block =
-            create_and_add_pending_blocks(&pending_block_store, 1, next_epoch, starting_round, 5);
+        let new_pending_block = create_and_add_pending_blocks(
+            pending_block_store.clone(),
+            1,
+            next_epoch,
+            starting_round,
+            5,
+        );
 
         // Verify the new block was inserted correctly
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             &new_pending_block,
         );
@@ -303,13 +312,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 0;
         let starting_round = 200;
         let mut pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -318,7 +329,7 @@ mod test {
 
         // Verify that all blocks were inserted correctly
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             &pending_blocks,
         );
@@ -329,7 +340,7 @@ mod test {
             // Insert one more block into the store
             let starting_round = ((max_num_pending_blocks * 10) + (i * 100)) as Round;
             let new_pending_block = create_and_add_pending_blocks(
-                &pending_block_store,
+                pending_block_store.clone(),
                 1,
                 current_epoch,
                 starting_round,
@@ -338,7 +349,7 @@ mod test {
 
             // Verify the new block was inserted correctly
             verify_pending_blocks(
-                &pending_block_store,
+                pending_block_store.clone(),
                 max_num_pending_blocks,
                 &new_pending_block,
             );
@@ -348,7 +359,8 @@ mod test {
             let oldest_block_round = oldest_block.first_block().round();
 
             // Verify that the oldest block was garbage collected
-            let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock();
+            let blocks_without_payloads =
+                pending_block_store.lock().blocks_without_payloads.clone();
             assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round)));
         }
 
@@ -359,7 +371,7 @@ mod test {
             // Insert one more block into the store
             let starting_round = i;
             let new_pending_block = create_and_add_pending_blocks(
-                &pending_block_store,
+                pending_block_store.clone(),
                 1,
                 next_epoch,
                 starting_round,
@@ -368,7 +380,7 @@ mod test {
 
             // Verify the new block was inserted correctly
             verify_pending_blocks(
-                &pending_block_store,
+                pending_block_store.clone(),
                 max_num_pending_blocks,
                 &new_pending_block,
             );
@@ -378,7 +390,8 @@ mod test {
             let oldest_block_round = oldest_block.first_block().round();
 
             // Verify that the oldest block was garbage collected
-            let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock();
+            let blocks_without_payloads =
+                pending_block_store.lock().blocks_without_payloads.clone();
             assert!(!blocks_without_payloads.contains_key(&(current_epoch, oldest_block_round)));
         }
     }
@@ -391,13 +404,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 0;
         let starting_round = 0;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -405,43 +420,45 @@ mod test {
         );
 
         // Create a new block payload store and insert payloads for the second block
-        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            consensus_observer_config,
+        )));
         let second_block = pending_blocks[1].clone();
-        insert_payloads_for_ordered_block(&mut block_payload_store, &second_block);
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block);
 
         // Remove the second block (which is now ready)
         let payload_round = second_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             payload_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
         assert_eq!(ready_block, Some(second_block));
 
         // Verify that the first and second blocks were removed
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks - 2,
             &pending_blocks[2..].to_vec(),
         );
 
         // Insert payloads for the last block
         let last_block = pending_blocks.last().unwrap().clone();
-        insert_payloads_for_ordered_block(&mut block_payload_store, &last_block);
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block);
 
         // Remove the last block (which is now ready)
         let payload_round = last_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             payload_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
 
         // Verify that the last block was removed
         assert_eq!(ready_block, Some(last_block));
 
         // Verify that the store is empty
-        verify_pending_blocks(&pending_block_store, 0, &vec![]);
+        verify_pending_blocks(pending_block_store.clone(), 0, &vec![]);
     }
 
     #[test]
@@ -452,13 +469,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 10;
         let starting_round = 100;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -466,7 +485,9 @@ mod test {
         );
 
         // Create an empty block payload store
-        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            consensus_observer_config,
+        )));
 
         // Incrementally insert and process each payload for the first block
         let first_block = pending_blocks.first().unwrap().clone();
@@ -474,14 +495,16 @@ mod test {
             // Insert the block
             let block_payload =
                 BlockPayload::new(block.block_info(), BlockTransactionPayload::empty());
-            block_payload_store.insert_block_payload(block_payload, true);
+            block_payload_store
+                .lock()
+                .insert_block_payload(block_payload, true);
 
             // Attempt to remove the block (which might not be ready)
             let payload_round = block.round();
-            let ready_block = pending_block_store.remove_ready_block(
+            let ready_block = pending_block_store.lock().remove_ready_block(
                 current_epoch,
                 payload_round,
-                &block_payload_store,
+                block_payload_store.clone(),
             );
 
             // If the block is ready, verify that it was removed.
@@ -492,7 +515,7 @@ mod test {
 
                 // Verify that the block was removed
                 verify_pending_blocks(
-                    &pending_block_store,
+                    pending_block_store.clone(),
                     max_num_pending_blocks - 1,
                     &pending_blocks[1..].to_vec(),
                 );
@@ -502,7 +525,7 @@ mod test {
 
                 // Verify that the block still remains
                 verify_pending_blocks(
-                    &pending_block_store,
+                    pending_block_store.clone(),
                     max_num_pending_blocks,
                     &pending_blocks,
                 );
@@ -517,14 +540,16 @@ mod test {
             if payload_round != last_block.first_block().round() {
                 let block_payload =
                     BlockPayload::new(block.block_info(), BlockTransactionPayload::empty());
-                block_payload_store.insert_block_payload(block_payload, true);
+                block_payload_store
+                    .lock()
+                    .insert_block_payload(block_payload, true);
             }
 
             // Attempt to remove the block (which might not be ready)
-            let ready_block = pending_block_store.remove_ready_block(
+            let ready_block = pending_block_store.lock().remove_ready_block(
                 current_epoch,
                 payload_round,
-                &block_payload_store,
+                block_payload_store.clone(),
             );
 
             // The block should not be ready
@@ -532,14 +557,14 @@ mod test {
 
             // Verify that the block still remains or has been removed on the last insert
             if payload_round == last_block.last_block().round() {
-                verify_pending_blocks(&pending_block_store, 0, &vec![]);
+                verify_pending_blocks(pending_block_store.clone(), 0, &vec![]);
             } else {
-                verify_pending_blocks(&pending_block_store, 1, &vec![last_block.clone()]);
+                verify_pending_blocks(pending_block_store.clone(), 1, &vec![last_block.clone()]);
             }
         }
 
         // Verify that the store is now empty
-        verify_pending_blocks(&pending_block_store, 0, &vec![]);
+        verify_pending_blocks(pending_block_store.clone(), 0, &vec![]);
     }
 
     #[test]
@@ -550,13 +575,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 0;
         let starting_round = 0;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -564,63 +591,65 @@ mod test {
         );
 
         // Create a new block payload store and insert payloads for the first block
-        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            consensus_observer_config,
+        )));
         let first_block = pending_blocks.first().unwrap().clone();
-        insert_payloads_for_ordered_block(&mut block_payload_store, &first_block);
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &first_block);
 
         // Remove the first block (which is now ready)
         let payload_round = first_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             payload_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
         assert_eq!(ready_block, Some(first_block));
 
         // Verify that the first block was removed
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks - 1,
             &pending_blocks[1..].to_vec(),
         );
 
         // Insert payloads for the second block
         let second_block = pending_blocks[1].clone();
-        insert_payloads_for_ordered_block(&mut block_payload_store, &second_block);
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block);
 
         // Remove the second block (which is now ready)
         let payload_round = second_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             payload_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
         assert_eq!(ready_block, Some(second_block));
 
         // Verify that the first and second blocks were removed
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks - 2,
             &pending_blocks[2..].to_vec(),
         );
 
         // Insert payloads for the last block
         let last_block = pending_blocks.last().unwrap().clone();
-        insert_payloads_for_ordered_block(&mut block_payload_store, &last_block);
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &last_block);
 
         // Remove the last block (which is now ready)
         let payload_round = last_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             payload_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
 
         // Verify that the last block was removed
         assert_eq!(ready_block, Some(last_block));
 
         // Verify that the store is empty
-        verify_pending_blocks(&pending_block_store, 0, &vec![]);
+        verify_pending_blocks(pending_block_store.clone(), 0, &vec![]);
     }
 
     #[test]
@@ -631,13 +660,15 @@ mod test {
             max_num_pending_blocks: max_num_pending_blocks as u64,
             ..ConsensusObserverConfig::default()
         };
-        let pending_block_store = PendingBlockStore::new(consensus_observer_config);
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            consensus_observer_config,
+        )));
 
         // Insert the maximum number of blocks into the store
         let current_epoch = 10;
         let starting_round = 100;
         let pending_blocks = create_and_add_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks,
             current_epoch,
             starting_round,
@@ -645,21 +676,23 @@ mod test {
         );
 
         // Create an empty block payload store
-        let block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            consensus_observer_config,
+        )));
 
         // Remove the third block (which is not ready)
         let third_block = pending_blocks[2].clone();
         let third_block_round = third_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             third_block_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
         assert!(ready_block.is_none());
 
         // Verify that the first three blocks were removed
         verify_pending_blocks(
-            &pending_block_store,
+            pending_block_store.clone(),
             max_num_pending_blocks - 3,
             &pending_blocks[3..].to_vec(),
         );
@@ -667,20 +700,20 @@ mod test {
         // Remove the last block (which is not ready)
         let last_block = pending_blocks.last().unwrap().clone();
         let last_block_round = last_block.first_block().round();
-        let ready_block = pending_block_store.remove_ready_block(
+        let ready_block = pending_block_store.lock().remove_ready_block(
             current_epoch,
             last_block_round,
-            &block_payload_store,
+            block_payload_store.clone(),
         );
         assert!(ready_block.is_none());
 
         // Verify that the store is now empty
-        verify_pending_blocks(&pending_block_store, 0, &vec![]);
+        verify_pending_blocks(pending_block_store.clone(), 0, &vec![]);
     }
 
     /// Creates and adds the specified number of blocks to the pending block store
     fn create_and_add_pending_blocks(
-        pending_block_store: &PendingBlockStore,
+        pending_block_store: Arc<Mutex<PendingBlockStore>>,
         num_pending_blocks: usize,
         epoch: u64,
         starting_round: Round,
@@ -732,7 +765,9 @@ mod test {
             let ordered_block = OrderedBlock::new(pipelined_blocks, ordered_proof.clone());
 
             // Insert the ordered block into the pending block store
-            pending_block_store.insert_pending_block(ordered_block.clone());
+            pending_block_store
+                .lock()
+                .insert_pending_block(ordered_block.clone());
 
             // Add the ordered block to the pending blocks
             pending_blocks.push(ordered_block);
@@ -743,31 +778,37 @@ mod test {
 
     /// Inserts payloads into the payload store for the ordered block
     fn insert_payloads_for_ordered_block(
-        block_payload_store: &mut BlockPayloadStore,
+        block_payload_store: Arc<Mutex<BlockPayloadStore>>,
         ordered_block: &OrderedBlock,
     ) {
         for block in ordered_block.blocks() {
             let block_payload =
                 BlockPayload::new(block.block_info(), BlockTransactionPayload::empty());
-            block_payload_store.insert_block_payload(block_payload, true);
+            block_payload_store
+                .lock()
+                .insert_block_payload(block_payload, true);
         }
     }
 
     /// Verifies that the pending block store contains the expected blocks
     fn verify_pending_blocks(
-        pending_block_store: &PendingBlockStore,
+        pending_block_store: Arc<Mutex<PendingBlockStore>>,
         num_expected_blocks: usize,
         pending_blocks: &Vec<OrderedBlock>,
     ) {
         // Check the number of pending blocks
-        let blocks_without_payloads = pending_block_store.blocks_without_payloads.lock();
-        assert_eq!(blocks_without_payloads.len(), num_expected_blocks);
+        assert_eq!(
+            pending_block_store.lock().blocks_without_payloads.len(),
+            num_expected_blocks
+        );
 
         // Check that all pending blocks are in the store
         for pending_block in pending_blocks {
             let first_block = pending_block.first_block();
             assert_eq!(
-                blocks_without_payloads
+                pending_block_store
+                    .lock()
+                    .blocks_without_payloads
                     .get(&(first_block.epoch(), first_block.round()))
                     .unwrap(),
                 pending_block

From 1fef8b8d46997e1937356e819ec5e2c7b09e6bdb Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 06:33:48 -0400
Subject: [PATCH 08/36] [Consensus Observer] Handle duplicate commit messages.

---
 .../src/consensus_observer/common/metrics.rs  |  11 +
 .../observer/consensus_observer.rs            | 205 +++++++++++++-----
 .../observer/ordered_blocks.rs                | 149 +++++++++++++
 3 files changed, 310 insertions(+), 55 deletions(-)

diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs
index 8cf8144d25a86..e290d74640c70 100644
--- a/consensus/src/consensus_observer/common/metrics.rs
+++ b/consensus/src/consensus_observer/common/metrics.rs
@@ -13,6 +13,7 @@ use once_cell::sync::Lazy;
 // Useful metric labels
 pub const BLOCK_PAYLOAD_LABEL: &str = "block_payload";
 pub const COMMIT_DECISION_LABEL: &str = "commit_decision";
+pub const COMMITTED_BLOCKS_LABEL: &str = "committed_blocks";
 pub const CREATED_SUBSCRIPTION_LABEL: &str = "created_subscription";
 pub const ORDERED_BLOCK_ENTRIES_LABEL: &str = "ordered_block_entries";
 pub const ORDERED_BLOCKS_LABEL: &str = "ordered_blocks";
@@ -30,6 +31,16 @@ pub static OBSERVER_CREATED_SUBSCRIPTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
     .unwrap()
 });
 
+/// Counter for tracking dropped (direct send) messages by the consensus observer
+pub static OBSERVER_DROPPED_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "consensus_observer_dropped_messages",
+        "Counters related to dropped (direct send) messages by the consensus observer",
+        &["message_type", "network_id"]
+    )
+    .unwrap()
+});
+
 /// Gauge for tracking the number of active subscriptions for the consensus observer
 pub static OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS: Lazy<IntGaugeVec> = Lazy::new(|| {
     register_int_gauge_vec!(
diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index 9917adad0f74a..ce641e10457e6 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -28,7 +28,10 @@ use crate::{
     pipeline::execution_client::TExecutionClient,
 };
 use aptos_channels::{aptos_channel, aptos_channel::Receiver, message_queues::QueueStyle};
-use aptos_config::config::{ConsensusObserverConfig, NodeConfig};
+use aptos_config::{
+    config::{ConsensusObserverConfig, NodeConfig},
+    network_id::PeerNetworkId,
+};
 use aptos_consensus_types::{pipeline, pipelined_block::PipelinedBlock};
 use aptos_crypto::{bls12381, Genesis};
 use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener};
@@ -262,10 +265,25 @@ impl ConsensusObserver {
         self.active_observer_state.epoch_state()
     }
 
+    /// Returns the highest committed block epoch and round
+    fn get_highest_committed_epoch_round(&self) -> (u64, Round) {
+        if let Some(epoch_round) = self
+            .ordered_block_store
+            .lock()
+            .get_highest_committed_epoch_round()
+        {
+            epoch_round
+        } else {
+            // Return the root epoch and round
+            let root_block_info = self.active_observer_state.root().commit_info().clone();
+            (root_block_info.epoch(), root_block_info.round())
+        }
+    }
+
     /// Returns the last ordered block
     fn get_last_ordered_block(&self) -> BlockInfo {
-        if let Some(last_pending_block) = self.ordered_block_store.lock().get_last_ordered_block() {
-            last_pending_block
+        if let Some(last_ordered_block) = self.ordered_block_store.lock().get_last_ordered_block() {
+            last_ordered_block
         } else {
             // Return the root ledger info
             self.active_observer_state.root().commit_info().clone()
@@ -298,18 +316,18 @@ impl ConsensusObserver {
     }
 
     /// Processes the block payload message
-    async fn process_block_payload_message(&mut self, block_payload: BlockPayload) {
+    async fn process_block_payload_message(
+        &mut self,
+        peer_network_id: PeerNetworkId,
+        block_payload: BlockPayload,
+    ) {
+        // Update the metrics for the received block payload
+        update_metrics_for_block_payload_message(peer_network_id, &block_payload);
+
         // Get the epoch and round for the block
         let block_epoch = block_payload.block.epoch();
         let block_round = block_payload.block.round();
 
-        // Update the metrics for the received block payload
-        metrics::set_gauge_with_label(
-            &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
-            metrics::BLOCK_PAYLOAD_LABEL,
-            block_round,
-        );
-
         // Verify the block payload digests
         if let Err(error) = block_payload.verify_payload_digests() {
             error!(
@@ -355,18 +373,28 @@ impl ConsensusObserver {
     }
 
     /// Processes the commit decision message
-    fn process_commit_decision_message(&mut self, commit_decision: CommitDecision) {
+    fn process_commit_decision_message(
+        &mut self,
+        peer_network_id: PeerNetworkId,
+        commit_decision: CommitDecision,
+    ) {
+        // Get the commit decision epoch and round
+        let commit_epoch = commit_decision.epoch();
+        let commit_round = commit_decision.round();
+
+        // If the commit message is behind our highest committed block, ignore it
+        if (commit_epoch, commit_round) <= self.get_highest_committed_epoch_round() {
+            // Update the metrics for the dropped commit decision
+            update_metrics_for_dropped_commit_decision_message(peer_network_id, &commit_decision);
+            return;
+        }
+
         // Update the metrics for the received commit decision
-        metrics::set_gauge_with_label(
-            &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
-            metrics::COMMIT_DECISION_LABEL,
-            commit_decision.round(),
-        );
+        update_metrics_for_commit_decision_message(peer_network_id, &commit_decision);
 
         // If the commit decision is for the current epoch, verify and process it
         let epoch_state = self.get_epoch_state();
-        let commit_decision_epoch = commit_decision.epoch();
-        if commit_decision_epoch == epoch_state.epoch {
+        if commit_epoch == epoch_state.epoch {
             // Verify the commit decision
             if let Err(error) = commit_decision.verify_commit_proof(&epoch_state) {
                 error!(
@@ -391,9 +419,8 @@ impl ConsensusObserver {
         // Otherwise, we failed to process the commit decision. If the commit
         // is for a future epoch or round, we need to state sync.
         let last_block = self.get_last_ordered_block();
-        let commit_decision_round = commit_decision.round();
-        let epoch_changed = commit_decision_epoch > last_block.epoch();
-        if epoch_changed || commit_decision_round > last_block.round() {
+        let epoch_changed = commit_epoch > last_block.epoch();
+        if epoch_changed || commit_round > last_block.round() {
             // If we're waiting for state sync to transition into a new epoch,
             // we should just wait and not issue a new state sync request.
             if self.in_state_sync_epoch_change() {
@@ -420,7 +447,7 @@ impl ConsensusObserver {
                 .update_root(commit_decision.commit_proof().clone());
             self.block_payload_store
                 .lock()
-                .remove_blocks_for_epoch_round(commit_decision_epoch, commit_decision_round);
+                .remove_blocks_for_epoch_round(commit_epoch, commit_round);
             self.ordered_block_store
                 .lock()
                 .remove_blocks_for_commit(commit_decision.commit_proof());
@@ -428,8 +455,8 @@ impl ConsensusObserver {
             // Start the state sync process
             let abort_handle = sync_to_commit_decision(
                 commit_decision,
-                commit_decision_epoch,
-                commit_decision_round,
+                commit_epoch,
+                commit_round,
                 self.execution_client.clone(),
                 self.sync_notification_sender.clone(),
             );
@@ -509,39 +536,15 @@ impl ConsensusObserver {
         // Process the message based on the type
         match message {
             ConsensusObserverDirectSend::OrderedBlock(ordered_block) => {
-                // Log the received ordered block message
-                let log_message = format!(
-                    "Received ordered block: {}, from peer: {}!",
-                    ordered_block.proof_block_info(),
-                    peer_network_id
-                );
-                log_received_message(log_message);
-
-                // Process the ordered block message
-                self.process_ordered_block_message(ordered_block).await;
+                self.process_ordered_block_message(peer_network_id, ordered_block)
+                    .await;
             },
             ConsensusObserverDirectSend::CommitDecision(commit_decision) => {
-                // Log the received commit decision message
-                let log_message = format!(
-                    "Received commit decision: {}, from peer: {}!",
-                    commit_decision.proof_block_info(),
-                    peer_network_id
-                );
-                log_received_message(log_message);
-
-                // Process the commit decision message
-                self.process_commit_decision_message(commit_decision);
+                self.process_commit_decision_message(peer_network_id, commit_decision);
             },
             ConsensusObserverDirectSend::BlockPayload(block_payload) => {
-                // Log the received block payload message
-                let log_message = format!(
-                    "Received block payload: {}, from peer: {}!",
-                    block_payload.block, peer_network_id
-                );
-                log_received_message(log_message);
-
-                // Process the block payload message
-                self.process_block_payload_message(block_payload).await;
+                self.process_block_payload_message(peer_network_id, block_payload)
+                    .await;
             },
         }
 
@@ -550,7 +553,14 @@ impl ConsensusObserver {
     }
 
     /// Processes the ordered block
-    async fn process_ordered_block_message(&mut self, ordered_block: OrderedBlock) {
+    async fn process_ordered_block_message(
+        &mut self,
+        peer_network_id: PeerNetworkId,
+        ordered_block: OrderedBlock,
+    ) {
+        // Update the metrics for the received ordered block
+        update_metrics_for_ordered_block_message(peer_network_id, &ordered_block);
+
         // Verify the ordered blocks before processing
         if let Err(error) = ordered_block.verify_ordered_blocks() {
             error!(
@@ -847,3 +857,88 @@ fn sync_to_commit_decision(
     ));
     abort_handle
 }
+
+/// Updates the metrics for the received block payload message
+fn update_metrics_for_block_payload_message(
+    peer_network_id: PeerNetworkId,
+    block_payload: &BlockPayload,
+) {
+    // Log the received block payload message
+    let log_message = format!(
+        "Received block payload: {}, from peer: {}!",
+        block_payload.block, peer_network_id
+    );
+    log_received_message(log_message);
+
+    // Update the metrics for the received block payload
+    metrics::set_gauge_with_label(
+        &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
+        metrics::BLOCK_PAYLOAD_LABEL,
+        block_payload.block.round(),
+    );
+}
+
+/// Updates the metrics for the received commit decision message
+fn update_metrics_for_commit_decision_message(
+    peer_network_id: PeerNetworkId,
+    commit_decision: &CommitDecision,
+) {
+    // Log the received commit decision message
+    let log_message = format!(
+        "Received commit decision: {}, from peer: {}!",
+        commit_decision.proof_block_info(),
+        peer_network_id
+    );
+    log_received_message(log_message);
+
+    // Update the metrics for the received commit decision
+    metrics::set_gauge_with_label(
+        &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
+        metrics::COMMIT_DECISION_LABEL,
+        commit_decision.round(),
+    );
+}
+
+/// Updates the metrics for the dropped commit decision message
+fn update_metrics_for_dropped_commit_decision_message(
+    peer_network_id: PeerNetworkId,
+    commit_decision: &CommitDecision,
+) {
+    // Increment the dropped message counter
+    metrics::increment_request_counter(
+        &metrics::OBSERVER_DROPPED_MESSAGES,
+        metrics::COMMITTED_BLOCKS_LABEL,
+        &peer_network_id,
+    );
+
+    // Log the dropped commit decision message
+    debug!(
+        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+            "Ignoring commit decision message from peer: {:?}! Commit epoch and round: ({}, {})",
+            peer_network_id,
+            commit_decision.epoch(),
+            commit_decision.round()
+        ))
+    );
+}
+
+/// Updates the metrics for the received ordered block message
+fn update_metrics_for_ordered_block_message(
+    peer_network_id: PeerNetworkId,
+    ordered_block: &OrderedBlock,
+) {
+    // Log the received ordered block message
+    let log_message = format!(
+        "Received ordered block: {}, from peer: {}!",
+        ordered_block.proof_block_info(),
+        peer_network_id
+    );
+    log_received_message(log_message);
+
+    // Update the metrics for the received ordered block
+    metrics::set_gauge_with_label(
+        &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
+        metrics::ORDERED_BLOCKS_LABEL,
+        ordered_block.proof_block_info().round(),
+    );
+}
diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs
index 7bb43fbc4108f..60aa56cf48d85 100644
--- a/consensus/src/consensus_observer/observer/ordered_blocks.rs
+++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs
@@ -19,6 +19,9 @@ pub struct OrderedBlockStore {
     // The configuration of the consensus observer
     consensus_observer_config: ConsensusObserverConfig,
 
+    // The highest committed block (epoch and round)
+    highest_committed_epoch_round: Option<(u64, Round)>,
+
     // Ordered blocks. The key is the epoch and round of the last block in the
     // ordered block. Each entry contains the block and the commit decision (if any).
     ordered_blocks: BTreeMap<(u64, Round), (OrderedBlock, Option<CommitDecision>)>,
@@ -28,6 +31,7 @@ impl OrderedBlockStore {
     pub fn new(consensus_observer_config: ConsensusObserverConfig) -> Self {
         Self {
             consensus_observer_config,
+            highest_committed_epoch_round: None,
             ordered_blocks: BTreeMap::new(),
         }
     }
@@ -44,6 +48,11 @@ impl OrderedBlockStore {
         self.ordered_blocks.clone()
     }
 
+    /// Returns the highest committed epoch and round (if any)
+    pub fn get_highest_committed_epoch_round(&self) -> Option<(u64, Round)> {
+        self.highest_committed_epoch_round
+    }
+
     /// Returns the last ordered block (if any)
     pub fn get_last_ordered_block(&self) -> Option<BlockInfo> {
         self.ordered_blocks
@@ -104,6 +113,9 @@ impl OrderedBlockStore {
         self.ordered_blocks = self
             .ordered_blocks
             .split_off(&(split_off_epoch, split_off_round));
+
+        // Update the highest committed epoch and round
+        self.update_highest_committed_epoch_round(commit_ledger_info);
     }
 
     /// Updates the commit decision of the ordered block (if found)
@@ -119,6 +131,32 @@ impl OrderedBlockStore {
         {
             *existing_commit_decision = Some(commit_decision.clone());
         }
+
+        // Update the highest committed epoch and round
+        self.update_highest_committed_epoch_round(commit_decision.commit_proof());
+    }
+
+    /// Updates the highest committed epoch and round based on the commit ledger info
+    fn update_highest_committed_epoch_round(
+        &mut self,
+        commit_ledger_info: &LedgerInfoWithSignatures,
+    ) {
+        // Get the epoch and round of the commit ledger info
+        let commit_epoch = commit_ledger_info.ledger_info().epoch();
+        let commit_round = commit_ledger_info.commit_info().round();
+        let commit_epoch_round = (commit_epoch, commit_round);
+
+        // Update the highest committed epoch and round (if appropriate)
+        match self.highest_committed_epoch_round {
+            Some(highest_committed_epoch_round) => {
+                if commit_epoch_round > highest_committed_epoch_round {
+                    self.highest_committed_epoch_round = Some(commit_epoch_round);
+                }
+            },
+            None => {
+                self.highest_committed_epoch_round = Some(commit_epoch_round);
+            },
+        }
     }
 
     /// Updates the metrics for the ordered blocks
@@ -154,6 +192,17 @@ impl OrderedBlockStore {
             metrics::ORDERED_BLOCKS_LABEL,
             highest_ordered_round,
         );
+
+        // Update the highest round for the committed blocks
+        let highest_committed_round = self
+            .highest_committed_epoch_round
+            .map(|(_, round)| round)
+            .unwrap_or(0);
+        metrics::set_gauge_with_label(
+            &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS,
+            metrics::COMMITTED_BLOCKS_LABEL,
+            highest_committed_round,
+        );
     }
 }
 
@@ -189,6 +238,91 @@ mod test {
         assert!(ordered_block_store.ordered_blocks.is_empty());
     }
 
+    #[test]
+    fn test_get_highest_committed_epoch_round() {
+        // Create a new ordered block store
+        let mut ordered_block_store = OrderedBlockStore::new(ConsensusObserverConfig::default());
+
+        // Verify that we have no highest committed epoch and round
+        assert!(ordered_block_store
+            .get_highest_committed_epoch_round()
+            .is_none());
+
+        // Insert several ordered blocks for the current epoch
+        let current_epoch = 10;
+        let num_ordered_blocks = 50;
+        let ordered_blocks = create_and_add_ordered_blocks(
+            &mut ordered_block_store,
+            num_ordered_blocks,
+            current_epoch,
+        );
+
+        // Create a commit decision for the first ordered block
+        let first_ordered_block = ordered_blocks.first().unwrap();
+        let first_ordered_block_info = first_ordered_block.last_block().block_info();
+        let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new(
+            LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()),
+            AggregateSignature::empty(),
+        ));
+
+        // Update the commit decision for the first ordered block
+        ordered_block_store.update_commit_decision(&commit_decision);
+
+        // Verify the highest committed epoch and round is the first ordered block
+        verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info);
+
+        // Create a commit decision for the last ordered block
+        let last_ordered_block = ordered_blocks.last().unwrap();
+        let last_ordered_block_info = last_ordered_block.last_block().block_info();
+        let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new(
+            LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()),
+            AggregateSignature::empty(),
+        ));
+
+        // Update the commit decision for the last ordered block
+        ordered_block_store.update_commit_decision(&commit_decision);
+
+        // Verify the highest committed epoch and round is the last ordered block
+        verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info);
+
+        // Insert several ordered blocks for the next epoch
+        let next_epoch = current_epoch + 1;
+        let num_ordered_blocks = 10;
+        let ordered_blocks =
+            create_and_add_ordered_blocks(&mut ordered_block_store, num_ordered_blocks, next_epoch);
+
+        // Verify the highest committed epoch and round is still the last ordered block
+        verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info);
+
+        // Create a commit decision for the first ordered block (in the next epoch)
+        let first_ordered_block = ordered_blocks.first().unwrap();
+        let first_ordered_block_info = first_ordered_block.last_block().block_info();
+        let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new(
+            LedgerInfo::new(first_ordered_block_info.clone(), HashValue::random()),
+            AggregateSignature::empty(),
+        ));
+
+        // Update the commit decision for the first ordered block
+        ordered_block_store.update_commit_decision(&commit_decision);
+
+        // Verify the highest committed epoch and round is the first ordered block (in the next epoch)
+        verify_highest_committed_epoch_round(&ordered_block_store, &first_ordered_block_info);
+
+        // Create a commit decision for the last ordered block (in the next epoch)
+        let last_ordered_block = ordered_blocks.last().unwrap();
+        let last_ordered_block_info = last_ordered_block.last_block().block_info();
+        let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new(
+            LedgerInfo::new(last_ordered_block_info.clone(), HashValue::random()),
+            AggregateSignature::empty(),
+        ));
+
+        // Remove the ordered blocks for the commit decision
+        ordered_block_store.remove_blocks_for_commit(commit_decision.commit_proof());
+
+        // Verify the highest committed epoch and round is the last ordered block (in the next epoch)
+        verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info);
+    }
+
     #[test]
     fn test_get_last_ordered_block() {
         // Create a new ordered block store
@@ -581,4 +715,19 @@ mod test {
             updated_commit_decision.as_ref().unwrap().clone()
         );
     }
+
+    /// Verifies the highest committed epoch and round matches the given block info
+    fn verify_highest_committed_epoch_round(
+        ordered_block_store: &OrderedBlockStore,
+        block_info: &BlockInfo,
+    ) {
+        // Verify the highest committed epoch and round is the block info
+        let highest_committed_epoch_round = ordered_block_store
+            .get_highest_committed_epoch_round()
+            .unwrap();
+        assert_eq!(
+            highest_committed_epoch_round,
+            (block_info.epoch(), block_info.round())
+        );
+    }
 }

From 6b06cad8e825f4ccfaeac38eb1808cf6e68d1c1c Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 06:33:59 -0400
Subject: [PATCH 09/36] [Consensus Observer] Handle duplicate payload messages.

---
 .../observer/consensus_observer.rs            | 45 ++++++++++++++--
 .../observer/payload_store.rs                 | 51 +++++++++++++++++++
 2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index ce641e10457e6..fdd2763fd5765 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -321,13 +321,29 @@ impl ConsensusObserver {
         peer_network_id: PeerNetworkId,
         block_payload: BlockPayload,
     ) {
-        // Update the metrics for the received block payload
-        update_metrics_for_block_payload_message(peer_network_id, &block_payload);
-
         // Get the epoch and round for the block
         let block_epoch = block_payload.block.epoch();
         let block_round = block_payload.block.round();
 
+        // Determine if the payload is behind the last ordered block, or it already exists
+        let last_ordered_block = self.get_last_ordered_block();
+        let payload_out_of_date =
+            (block_epoch, block_round) <= (last_ordered_block.epoch(), last_ordered_block.round());
+        let payload_exists = self
+            .block_payload_store
+            .lock()
+            .existing_payload_entry(&block_payload);
+
+        // If the payload already exists, or is behind the last ordered block, we should ignore it
+        if payload_exists || payload_out_of_date {
+            // Update the metrics for the dropped block payload
+            update_metrics_for_dropped_block_payload_message(peer_network_id, &block_payload);
+            return;
+        }
+
+        // Update the metrics for the received block payload
+        update_metrics_for_block_payload_message(peer_network_id, &block_payload);
+
         // Verify the block payload digests
         if let Err(error) = block_payload.verify_payload_digests() {
             error!(
@@ -899,6 +915,29 @@ fn update_metrics_for_commit_decision_message(
     );
 }
 
+/// Updates the metrics for the dropped block payload message
+fn update_metrics_for_dropped_block_payload_message(
+    peer_network_id: PeerNetworkId,
+    block_payload: &BlockPayload,
+) {
+    // Increment the dropped message counter
+    metrics::increment_request_counter(
+        &metrics::OBSERVER_DROPPED_MESSAGES,
+        metrics::BLOCK_PAYLOAD_LABEL,
+        &peer_network_id,
+    );
+
+    // Log the dropped block payload message
+    debug!(
+        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+            "Ignoring block payload message from peer: {:?}! Block epoch and round: ({}, {})",
+            peer_network_id,
+            block_payload.block.epoch(),
+            block_payload.block.round()
+        ))
+    );
+}
+
 /// Updates the metrics for the dropped commit decision message
 fn update_metrics_for_dropped_commit_decision_message(
     peer_network_id: PeerNetworkId,
diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs
index 8781595026194..edea188be6f0e 100644
--- a/consensus/src/consensus_observer/observer/payload_store.rs
+++ b/consensus/src/consensus_observer/observer/payload_store.rs
@@ -61,6 +61,16 @@ impl BlockPayloadStore {
         self.block_payloads.lock().clear();
     }
 
+    /// Returns true iff we already have a payload entry for the given block
+    pub fn existing_payload_entry(&self, block_payload: &BlockPayload) -> bool {
+        // Get the epoch and round of the payload
+        let block_info = &block_payload.block;
+        let epoch_and_round = (block_info.epoch(), block_info.round());
+
+        // Check if a payload already exists in the store
+        self.block_payloads.lock().contains_key(&epoch_and_round)
+    }
+
     /// Returns a reference to the block payloads
     pub fn get_block_payloads(&self) -> Arc<Mutex<BTreeMap<(u64, Round), BlockPayloadStatus>>> {
         self.block_payloads.clone()
@@ -399,6 +409,41 @@ mod test {
         check_num_verified_payloads(&block_payload_store, 0);
     }
 
+    #[test]
+    fn test_existing_payload_entry() {
+        // Create a new block payload store
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let mut block_payload_store = BlockPayloadStore::new(consensus_observer_config);
+
+        // Create a new block payload
+        let epoch = 10;
+        let round = 100;
+        let block_payload = create_block_payload(epoch, round);
+
+        // Check that the payload doesn't exist in the block payload store
+        assert!(!block_payload_store.existing_payload_entry(&block_payload));
+
+        // Insert the verified block payload into the block payload store
+        block_payload_store.insert_block_payload(block_payload.clone(), true);
+
+        // Check that the payload now exists in the block payload store
+        assert!(block_payload_store.existing_payload_entry(&block_payload));
+
+        // Create another block payload
+        let epoch = 5;
+        let round = 101;
+        let block_payload = create_block_payload(epoch, round);
+
+        // Check that the payload doesn't exist in the block payload store
+        assert!(!block_payload_store.existing_payload_entry(&block_payload));
+
+        // Insert the unverified block payload into the block payload store
+        block_payload_store.insert_block_payload(block_payload.clone(), false);
+
+        // Check that the payload now exists in the block payload store
+        assert!(block_payload_store.existing_payload_entry(&block_payload));
+    }
+
     #[test]
     fn test_insert_block_payload() {
         // Create a new block payload store
@@ -1040,6 +1085,12 @@ mod test {
         pipelined_blocks
     }
 
+    /// Creates a new block payload with the given epoch and round
+    fn create_block_payload(epoch: u64, round: Round) -> BlockPayload {
+        let block_info = BlockInfo::random_with_epoch(epoch, round);
+        BlockPayload::new(block_info, BlockTransactionPayload::empty())
+    }
+
     /// Checks the number of unverified payloads in the block payload store
     fn check_num_unverified_payloads(
         block_payload_store: &BlockPayloadStore,

From f39c6b137208b2c76aa38ba1cd2dfdea7ec564c4 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 06:34:07 -0400
Subject: [PATCH 10/36] [Consensus Observer] Handle duplicate ordered block
 messages.

---
 .../observer/consensus_observer.rs            | 55 ++++++++++++--
 .../observer/ordered_blocks.rs                | 14 ++++
 .../observer/pending_blocks.rs                | 72 +++++++++++++++++++
 3 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index fdd2763fd5765..6939567dbb178 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -325,7 +325,7 @@ impl ConsensusObserver {
         let block_epoch = block_payload.block.epoch();
         let block_round = block_payload.block.round();
 
-        // Determine if the payload is behind the last ordered block, or it already exists
+        // Determine if the payload is behind the last ordered block, or if it already exists
         let last_ordered_block = self.get_last_ordered_block();
         let payload_out_of_date =
             (block_epoch, block_round) <= (last_ordered_block.epoch(), last_ordered_block.round());
@@ -334,8 +334,8 @@ impl ConsensusObserver {
             .lock()
             .existing_payload_entry(&block_payload);
 
-        // If the payload already exists, or is behind the last ordered block, we should ignore it
-        if payload_exists || payload_out_of_date {
+        // If the payload is out of date or already exists, ignore it
+        if payload_out_of_date || payload_exists {
             // Update the metrics for the dropped block payload
             update_metrics_for_dropped_block_payload_message(peer_network_id, &block_payload);
             return;
@@ -574,9 +574,6 @@ impl ConsensusObserver {
         peer_network_id: PeerNetworkId,
         ordered_block: OrderedBlock,
     ) {
-        // Update the metrics for the received ordered block
-        update_metrics_for_ordered_block_message(peer_network_id, &ordered_block);
-
         // Verify the ordered blocks before processing
         if let Err(error) = ordered_block.verify_ordered_blocks() {
             error!(
@@ -589,6 +586,29 @@ impl ConsensusObserver {
             return;
         };
 
+        // Get the epoch and round of the first block
+        let first_block = ordered_block.first_block();
+        let first_block_epoch_round = (first_block.epoch(), first_block.round());
+
+        // Determine if the block is behind the last ordered block, or if it is already pending
+        let last_ordered_block = self.get_last_ordered_block();
+        let block_out_of_date =
+            first_block_epoch_round <= (last_ordered_block.epoch(), last_ordered_block.round());
+        let block_pending = self
+            .pending_block_store
+            .lock()
+            .existing_pending_block(&ordered_block);
+
+        // If the block is out of date or already pending, ignore it
+        if block_out_of_date || block_pending {
+            // Update the metrics for the dropped ordered block
+            update_metrics_for_dropped_ordered_block_message(peer_network_id, &ordered_block);
+            return;
+        }
+
+        // Update the metrics for the received ordered block
+        update_metrics_for_ordered_block_message(peer_network_id, &ordered_block);
+
         // If all payloads exist, process the block. Otherwise, store it
         // in the pending block store and wait for the payloads to arrive.
         if self.all_payloads_exist(ordered_block.blocks()) {
@@ -961,6 +981,29 @@ fn update_metrics_for_dropped_commit_decision_message(
     );
 }
 
+/// Updates the metrics for the dropped ordered block message
+fn update_metrics_for_dropped_ordered_block_message(
+    peer_network_id: PeerNetworkId,
+    ordered_block: &OrderedBlock,
+) {
+    // Increment the dropped message counter
+    metrics::increment_request_counter(
+        &metrics::OBSERVER_DROPPED_MESSAGES,
+        metrics::ORDERED_BLOCKS_LABEL,
+        &peer_network_id,
+    );
+
+    // Log the dropped ordered block message
+    debug!(
+        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+            "Ignoring ordered block message from peer: {:?}! Block epoch and round: ({}, {})",
+            peer_network_id,
+            ordered_block.proof_block_info().epoch(),
+            ordered_block.proof_block_info().round()
+        ))
+    );
+}
+
 /// Updates the metrics for the received ordered block message
 fn update_metrics_for_ordered_block_message(
     peer_network_id: PeerNetworkId,
diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs
index 60aa56cf48d85..36af25939232e 100644
--- a/consensus/src/consensus_observer/observer/ordered_blocks.rs
+++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs
@@ -321,6 +321,20 @@ mod test {
 
         // Verify the highest committed epoch and round is the last ordered block (in the next epoch)
         verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info);
+
+        // Create a commit decision for an out-of-date ordered block
+        let out_of_date_ordered_block = ordered_blocks.first().unwrap();
+        let out_of_date_ordered_block_info = out_of_date_ordered_block.last_block().block_info();
+        let commit_decision = CommitDecision::new(LedgerInfoWithSignatures::new(
+            LedgerInfo::new(out_of_date_ordered_block_info.clone(), HashValue::random()),
+            AggregateSignature::empty(),
+        ));
+
+        // Update the commit decision for the out-of-date ordered block
+        ordered_block_store.update_commit_decision(&commit_decision);
+
+        // Verify the highest committed epoch and round is still the last ordered block (in the next epoch)
+        verify_highest_committed_epoch_round(&ordered_block_store, &last_ordered_block_info);
     }
 
     #[test]
diff --git a/consensus/src/consensus_observer/observer/pending_blocks.rs b/consensus/src/consensus_observer/observer/pending_blocks.rs
index d3ce297cd5fdb..2a7ebbde0519f 100644
--- a/consensus/src/consensus_observer/observer/pending_blocks.rs
+++ b/consensus/src/consensus_observer/observer/pending_blocks.rs
@@ -41,6 +41,17 @@ impl PendingBlockStore {
         self.blocks_without_payloads.clear();
     }
 
+    /// Returns true iff the store contains an entry for the given ordered block
+    pub fn existing_pending_block(&self, ordered_block: &OrderedBlock) -> bool {
+        // Get the epoch and round of the first block
+        let first_block = ordered_block.first_block();
+        let first_block_epoch_round = (first_block.epoch(), first_block.round());
+
+        // Check if the block is already in the store
+        self.blocks_without_payloads
+            .contains_key(&first_block_epoch_round)
+    }
+
     /// Inserts a block (without payloads) into the store
     pub fn insert_pending_block(&mut self, ordered_block: OrderedBlock) {
         // Get the epoch and round of the first block
@@ -238,6 +249,67 @@ mod test {
             .is_empty());
     }
 
+    #[test]
+    fn test_existing_pending_block() {
+        // Create a new pending block store
+        let max_num_pending_blocks = 10;
+        let consensus_observer_config = ConsensusObserverConfig {
+            max_num_pending_blocks: max_num_pending_blocks as u64,
+            ..ConsensusObserverConfig::default()
+        };
+        let pending_block_store = Arc::new(Mutex::new(PendingBlockStore::new(
+            ConsensusObserverConfig::default(),
+        )));
+
+        // Insert the maximum number of blocks into the store
+        let current_epoch = 10;
+        let starting_round = 100;
+        let pending_blocks = create_and_add_pending_blocks(
+            pending_block_store.clone(),
+            max_num_pending_blocks,
+            current_epoch,
+            starting_round,
+            5,
+        );
+
+        // Verify that all blocks were inserted correctly
+        for pending_block in &pending_blocks {
+            assert!(pending_block_store
+                .lock()
+                .existing_pending_block(pending_block));
+        }
+
+        // Create a new block payload store and insert payloads for the second block
+        let block_payload_store = Arc::new(Mutex::new(BlockPayloadStore::new(
+            consensus_observer_config,
+        )));
+        let second_block = pending_blocks[1].clone();
+        insert_payloads_for_ordered_block(block_payload_store.clone(), &second_block);
+
+        // Remove the second block (which is now ready)
+        let payload_round = second_block.first_block().round();
+        let ready_block = pending_block_store.lock().remove_ready_block(
+            current_epoch,
+            payload_round,
+            block_payload_store.clone(),
+        );
+        assert_eq!(ready_block, Some(second_block));
+
+        // Verify that the first and second blocks were removed
+        verify_pending_blocks(
+            pending_block_store.clone(),
+            max_num_pending_blocks - 2,
+            &pending_blocks[2..].to_vec(),
+        );
+
+        // Verify that the first and second blocks are no longer in the store
+        for pending_block in &pending_blocks[..2] {
+            assert!(!pending_block_store
+                .lock()
+                .existing_pending_block(pending_block));
+        }
+    }
+
     #[test]
     fn test_insert_pending_block() {
         // Create a new pending block store

From b015ee41dfa89ae4fc95e4264aad1ada61ee310b Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 06:48:00 -0400
Subject: [PATCH 11/36] [Consensus Observer] Small renames and refactors.

---
 .../src/consensus_observer/common/metrics.rs  |  6 ++--
 .../network/observer_client.rs                | 12 +++----
 .../network/observer_message.rs               | 32 +++++++++++++++----
 .../observer/consensus_observer.rs            | 30 +++++++++--------
 .../observer/ordered_blocks.rs                |  4 +--
 .../observer/payload_store.rs                 | 12 +++----
 .../observer/subscription_manager.rs          |  4 +--
 .../publisher/consensus_publisher.rs          |  2 +-
 consensus/src/payload_manager.rs              |  2 +-
 9 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs
index e290d74640c70..0e91e1d9af702 100644
--- a/consensus/src/consensus_observer/common/metrics.rs
+++ b/consensus/src/consensus_observer/common/metrics.rs
@@ -16,7 +16,7 @@ pub const COMMIT_DECISION_LABEL: &str = "commit_decision";
 pub const COMMITTED_BLOCKS_LABEL: &str = "committed_blocks";
 pub const CREATED_SUBSCRIPTION_LABEL: &str = "created_subscription";
 pub const ORDERED_BLOCK_ENTRIES_LABEL: &str = "ordered_block_entries";
-pub const ORDERED_BLOCKS_LABEL: &str = "ordered_blocks";
+pub const ORDERED_BLOCK_LABEL: &str = "ordered_block";
 pub const PENDING_BLOCK_ENTRIES_LABEL: &str = "pending_block_entries";
 pub const PENDING_BLOCKS_LABEL: &str = "pending_blocks";
 pub const STORED_PAYLOADS_LABEL: &str = "stored_payloads";
@@ -191,8 +191,8 @@ pub static PUBLISHER_SENT_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
     .unwrap()
 });
 
-/// Increments the given request counter with the provided values
-pub fn increment_request_counter(
+/// Increments the given counter with the provided values
+pub fn increment_counter(
     counter: &Lazy<IntCounterVec>,
     label: &str,
     peer_network_id: &PeerNetworkId,
diff --git a/consensus/src/consensus_observer/network/observer_client.rs b/consensus/src/consensus_observer/network/observer_client.rs
index a2f94ff44524f..33c4ce902af33 100644
--- a/consensus/src/consensus_observer/network/observer_client.rs
+++ b/consensus/src/consensus_observer/network/observer_client.rs
@@ -46,7 +46,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
         message_label: &str,
     ) -> Result<(), Error> {
         // Increment the message counter
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::PUBLISHER_SENT_MESSAGES,
             message_label,
             peer_network_id,
@@ -74,7 +74,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
                 .message(&format!("Failed to send message: {:?}", error)));
 
             // Update the direct send error metrics
-            metrics::increment_request_counter(
+            metrics::increment_counter(
                 &metrics::PUBLISHER_SENT_MESSAGE_ERRORS,
                 error.get_label(),
                 peer_network_id,
@@ -125,7 +125,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
                     .message(&format!("Failed to serialize message: {:?}", error)));
 
                 // Update the direct send error metrics
-                metrics::increment_request_counter(
+                metrics::increment_counter(
                     &metrics::PUBLISHER_SENT_MESSAGE_ERRORS,
                     error.get_label(),
                     peer_network_id,
@@ -147,7 +147,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
         let request_id = rand::thread_rng().gen();
 
         // Increment the request counter
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::OBSERVER_SENT_REQUESTS,
             request.get_label(),
             peer_network_id,
@@ -174,7 +174,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
         match result {
             Ok(consensus_observer_response) => {
                 // Update the RPC success metrics
-                metrics::increment_request_counter(
+                metrics::increment_counter(
                     &metrics::OBSERVER_RECEIVED_MESSAGE_RESPONSES,
                     request_label,
                     peer_network_id,
@@ -192,7 +192,7 @@ impl<NetworkClient: NetworkClientInterface<ConsensusObserverMessage>>
                     .error(&error));
 
                 // Update the RPC error metrics
-                metrics::increment_request_counter(
+                metrics::increment_counter(
                     &metrics::OBSERVER_SENT_MESSAGE_ERRORS,
                     error.get_label(),
                     peer_network_id,
diff --git a/consensus/src/consensus_observer/network/observer_message.rs b/consensus/src/consensus_observer/network/observer_message.rs
index 6c68384cda32e..6ecb14d7995de 100644
--- a/consensus/src/consensus_observer/network/observer_message.rs
+++ b/consensus/src/consensus_observer/network/observer_message.rs
@@ -312,8 +312,8 @@ impl CommitDecision {
 /// The transaction payload and proof of each block
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
 pub struct PayloadWithProof {
-    pub transactions: Vec<SignedTransaction>,
-    pub proofs: Vec<ProofOfStore>,
+    transactions: Vec<SignedTransaction>,
+    proofs: Vec<ProofOfStore>,
 }
 
 impl PayloadWithProof {
@@ -337,8 +337,8 @@ impl PayloadWithProof {
 /// The transaction payload and proof of each block with a transaction limit
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
 pub struct PayloadWithProofAndLimit {
-    pub payload_with_proof: PayloadWithProof,
-    pub transaction_limit: Option<u64>,
+    payload_with_proof: PayloadWithProof,
+    transaction_limit: Option<u64>,
 }
 
 impl PayloadWithProofAndLimit {
@@ -629,8 +629,8 @@ impl BlockTransactionPayload {
 /// Payload message contains the block and transaction payload
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
 pub struct BlockPayload {
-    pub block: BlockInfo,
-    pub transaction_payload: BlockTransactionPayload,
+    block: BlockInfo,
+    transaction_payload: BlockTransactionPayload,
 }
 
 impl BlockPayload {
@@ -641,6 +641,26 @@ impl BlockPayload {
         }
     }
 
+    /// Returns a reference to the block info
+    pub fn block(&self) -> &BlockInfo {
+        &self.block
+    }
+
+    /// Returns the epoch of the block info
+    pub fn epoch(&self) -> u64 {
+        self.block.epoch()
+    }
+
+    /// Returns the round of the block info
+    pub fn round(&self) -> Round {
+        self.block.round()
+    }
+
+    /// Returns a reference to the block transaction payload
+    pub fn transaction_payload(&self) -> &BlockTransactionPayload {
+        &self.transaction_payload
+    }
+
     /// Verifies the block payload digests and returns an error if the data is invalid
     pub fn verify_payload_digests(&self) -> Result<(), Error> {
         // Verify the proof of store digests against the transaction
diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index 6939567dbb178..e1468748a781c 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -322,8 +322,8 @@ impl ConsensusObserver {
         block_payload: BlockPayload,
     ) {
         // Get the epoch and round for the block
-        let block_epoch = block_payload.block.epoch();
-        let block_round = block_payload.block.round();
+        let block_epoch = block_payload.epoch();
+        let block_round = block_payload.round();
 
         // Determine if the payload is behind the last ordered block, or if it already exists
         let last_ordered_block = self.get_last_ordered_block();
@@ -349,7 +349,8 @@ impl ConsensusObserver {
             error!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                     "Failed to verify block payload digests! Ignoring block: {:?}. Error: {:?}",
-                    block_payload.block, error
+                    block_payload.block(),
+                    error
                 ))
             );
             return;
@@ -363,7 +364,7 @@ impl ConsensusObserver {
                 error!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                         "Failed to verify block payload signatures! Ignoring block: {:?}. Error: {:?}",
-                        block_payload.block, error
+                        block_payload.block(), error
                     ))
                 );
                 return;
@@ -543,7 +544,7 @@ impl ConsensusObserver {
         }
 
         // Increment the received message counter
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::OBSERVER_RECEIVED_MESSAGES,
             message.get_label(),
             &peer_network_id,
@@ -902,7 +903,8 @@ fn update_metrics_for_block_payload_message(
     // Log the received block payload message
     let log_message = format!(
         "Received block payload: {}, from peer: {}!",
-        block_payload.block, peer_network_id
+        block_payload.block(),
+        peer_network_id
     );
     log_received_message(log_message);
 
@@ -910,7 +912,7 @@ fn update_metrics_for_block_payload_message(
     metrics::set_gauge_with_label(
         &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
         metrics::BLOCK_PAYLOAD_LABEL,
-        block_payload.block.round(),
+        block_payload.round(),
     );
 }
 
@@ -941,7 +943,7 @@ fn update_metrics_for_dropped_block_payload_message(
     block_payload: &BlockPayload,
 ) {
     // Increment the dropped message counter
-    metrics::increment_request_counter(
+    metrics::increment_counter(
         &metrics::OBSERVER_DROPPED_MESSAGES,
         metrics::BLOCK_PAYLOAD_LABEL,
         &peer_network_id,
@@ -952,8 +954,8 @@ fn update_metrics_for_dropped_block_payload_message(
         LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
             "Ignoring block payload message from peer: {:?}! Block epoch and round: ({}, {})",
             peer_network_id,
-            block_payload.block.epoch(),
-            block_payload.block.round()
+            block_payload.epoch(),
+            block_payload.round()
         ))
     );
 }
@@ -964,7 +966,7 @@ fn update_metrics_for_dropped_commit_decision_message(
     commit_decision: &CommitDecision,
 ) {
     // Increment the dropped message counter
-    metrics::increment_request_counter(
+    metrics::increment_counter(
         &metrics::OBSERVER_DROPPED_MESSAGES,
         metrics::COMMITTED_BLOCKS_LABEL,
         &peer_network_id,
@@ -987,9 +989,9 @@ fn update_metrics_for_dropped_ordered_block_message(
     ordered_block: &OrderedBlock,
 ) {
     // Increment the dropped message counter
-    metrics::increment_request_counter(
+    metrics::increment_counter(
         &metrics::OBSERVER_DROPPED_MESSAGES,
-        metrics::ORDERED_BLOCKS_LABEL,
+        metrics::ORDERED_BLOCK_LABEL,
         &peer_network_id,
     );
 
@@ -1020,7 +1022,7 @@ fn update_metrics_for_ordered_block_message(
     // Update the metrics for the received ordered block
     metrics::set_gauge_with_label(
         &metrics::OBSERVER_RECEIVED_MESSAGE_ROUNDS,
-        metrics::ORDERED_BLOCKS_LABEL,
+        metrics::ORDERED_BLOCK_LABEL,
         ordered_block.proof_block_info().round(),
     );
 }
diff --git a/consensus/src/consensus_observer/observer/ordered_blocks.rs b/consensus/src/consensus_observer/observer/ordered_blocks.rs
index 36af25939232e..a2408b3a4b20d 100644
--- a/consensus/src/consensus_observer/observer/ordered_blocks.rs
+++ b/consensus/src/consensus_observer/observer/ordered_blocks.rs
@@ -177,7 +177,7 @@ impl OrderedBlockStore {
             .sum();
         metrics::set_gauge_with_label(
             &metrics::OBSERVER_NUM_PROCESSED_BLOCKS,
-            metrics::ORDERED_BLOCKS_LABEL,
+            metrics::ORDERED_BLOCK_LABEL,
             num_ordered_blocks,
         );
 
@@ -189,7 +189,7 @@ impl OrderedBlockStore {
             .unwrap_or(0);
         metrics::set_gauge_with_label(
             &metrics::OBSERVER_PROCESSED_BLOCK_ROUNDS,
-            metrics::ORDERED_BLOCKS_LABEL,
+            metrics::ORDERED_BLOCK_LABEL,
             highest_ordered_round,
         );
 
diff --git a/consensus/src/consensus_observer/observer/payload_store.rs b/consensus/src/consensus_observer/observer/payload_store.rs
index edea188be6f0e..59859ec0b82ea 100644
--- a/consensus/src/consensus_observer/observer/payload_store.rs
+++ b/consensus/src/consensus_observer/observer/payload_store.rs
@@ -64,8 +64,7 @@ impl BlockPayloadStore {
     /// Returns true iff we already have a payload entry for the given block
     pub fn existing_payload_entry(&self, block_payload: &BlockPayload) -> bool {
         // Get the epoch and round of the payload
-        let block_info = &block_payload.block;
-        let epoch_and_round = (block_info.epoch(), block_info.round());
+        let epoch_and_round = (block_payload.epoch(), block_payload.round());
 
         // Check if a payload already exists in the store
         self.block_payloads.lock().contains_key(&epoch_and_round)
@@ -88,14 +87,15 @@ impl BlockPayloadStore {
             warn!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                     "Exceeded the maximum number of payloads: {:?}. Dropping block: {:?}!",
-                    max_num_pending_blocks, block_payload.block,
+                    max_num_pending_blocks,
+                    block_payload.block(),
                 ))
             );
             return; // Drop the block if we've exceeded the maximum
         }
 
         // Create the new payload status
-        let epoch_and_round = (block_payload.block.epoch(), block_payload.block.round());
+        let epoch_and_round = (block_payload.epoch(), block_payload.round());
         let payload_status = if verified_payload_signatures {
             BlockPayloadStatus::AvailableAndVerified(block_payload)
         } else {
@@ -171,7 +171,7 @@ impl BlockPayloadStore {
                     // Get the block transaction payload
                     let transaction_payload = match entry.get() {
                         BlockPayloadStatus::AvailableAndVerified(block_payload) => {
-                            &block_payload.transaction_payload
+                            block_payload.transaction_payload()
                         },
                         BlockPayloadStatus::AvailableAndUnverified(_) => {
                             // The payload should have already been verified
@@ -261,7 +261,7 @@ impl BlockPayloadStore {
         // Collect the rounds of all newly verified blocks
         let verified_payload_rounds: Vec<Round> = verified_payloads_to_update
             .iter()
-            .map(|block_payload| block_payload.block.round())
+            .map(|block_payload| block_payload.round())
             .collect();
 
         // Update the verified block payloads. Note: this will cause
diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index 2f124e5841cd3..8f70fe21b9261 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -360,7 +360,7 @@ impl SubscriptionManager {
         );
 
         // Update the number of created subscriptions
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::OBSERVER_CREATED_SUBSCRIPTIONS,
             metrics::CREATED_SUBSCRIPTION_LABEL,
             &peer_network_id,
@@ -381,7 +381,7 @@ impl SubscriptionManager {
         );
 
         // Update the number of terminated subscriptions
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS,
             error.get_label(),
             &peer_network_id,
diff --git a/consensus/src/consensus_observer/publisher/consensus_publisher.rs b/consensus/src/consensus_observer/publisher/consensus_publisher.rs
index 11e2f63aa92de..1379c87131cc5 100644
--- a/consensus/src/consensus_observer/publisher/consensus_publisher.rs
+++ b/consensus/src/consensus_observer/publisher/consensus_publisher.rs
@@ -150,7 +150,7 @@ impl ConsensusPublisher {
         let (peer_network_id, message, response_sender) = network_message.into_parts();
 
         // Update the RPC request counter
-        metrics::increment_request_counter(
+        metrics::increment_counter(
             &metrics::PUBLISHER_RECEIVED_REQUESTS,
             message.get_label(),
             &peer_network_id,
diff --git a/consensus/src/payload_manager.rs b/consensus/src/payload_manager.rs
index 4749efb10c643..c2e7c580fb9b3 100644
--- a/consensus/src/payload_manager.rs
+++ b/consensus/src/payload_manager.rs
@@ -471,7 +471,7 @@ async fn get_transactions_for_observer(
     };
 
     // If the payload is valid, publish it to any downstream observers
-    let transaction_payload = block_payload.transaction_payload;
+    let transaction_payload = block_payload.transaction_payload();
     if let Some(consensus_publisher) = consensus_publisher {
         let message = ConsensusObserverMessage::new_block_payload_message(
             block.gen_block_info(HashValue::zero(), 0, None),

From 9e4556c330a8bf6a82a4a2c43ba23b4defefcb53 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Sun, 8 Sep 2024 07:00:45 -0400
Subject: [PATCH 12/36] [Consensus Observer] Support multiple subscriptions.

---
 .../src/config/consensus_observer_config.rs   |   5 +-
 .../src/consensus_observer/common/error.rs    |   4 +
 .../src/consensus_observer/common/metrics.rs  |  27 +-
 .../observer/consensus_observer.rs            |  32 +-
 .../observer/subscription.rs                  | 426 ++++----
 .../observer/subscription_manager.rs          | 976 +++++++++++++-----
 6 files changed, 1002 insertions(+), 468 deletions(-)

diff --git a/config/src/config/consensus_observer_config.rs b/config/src/config/consensus_observer_config.rs
index 8d930cf17c8d3..0ca55c31d50e9 100644
--- a/config/src/config/consensus_observer_config.rs
+++ b/config/src/config/consensus_observer_config.rs
@@ -30,6 +30,8 @@ pub struct ConsensusObserverConfig {
 
     /// Interval (in milliseconds) to garbage collect peer state
     pub garbage_collection_interval_ms: u64,
+    /// The maximum number of concurrent subscriptions
+    pub max_concurrent_subscriptions: u64,
     /// Maximum number of blocks to keep in memory (e.g., pending blocks, ordered blocks, etc.)
     pub max_num_pending_blocks: u64,
     /// Maximum timeout (in milliseconds) for active subscriptions
@@ -52,8 +54,9 @@ impl Default for ConsensusObserverConfig {
             publisher_enabled: false,
             max_network_channel_size: 1000,
             max_parallel_serialization_tasks: num_cpus::get(), // Default to the number of CPUs
-            network_request_timeout_ms: 10_000,                // 10 seconds
+            network_request_timeout_ms: 5_000,                 // 5 seconds
             garbage_collection_interval_ms: 60_000,            // 60 seconds
+            max_concurrent_subscriptions: 2,                   // 2 streams should be sufficient
             max_num_pending_blocks: 100,                       // 100 blocks
             max_subscription_timeout_ms: 30_000,               // 30 seconds
             max_synced_version_timeout_ms: 60_000,             // 60 seconds
diff --git a/consensus/src/consensus_observer/common/error.rs b/consensus/src/consensus_observer/common/error.rs
index 37a516d10115c..7fc6a78785a96 100644
--- a/consensus/src/consensus_observer/common/error.rs
+++ b/consensus/src/consensus_observer/common/error.rs
@@ -21,6 +21,9 @@ pub enum Error {
     #[error("Subscription progress stopped: {0}")]
     SubscriptionProgressStopped(String),
 
+    #[error("Subscriptions reset: {0}")]
+    SubscriptionsReset(String),
+
     #[error("Subscription suboptimal: {0}")]
     SubscriptionSuboptimal(String),
 
@@ -40,6 +43,7 @@ impl Error {
             Self::RpcError(_) => "rpc_error",
             Self::SubscriptionDisconnected(_) => "subscription_disconnected",
             Self::SubscriptionProgressStopped(_) => "subscription_progress_stopped",
+            Self::SubscriptionsReset(_) => "subscriptions_reset",
             Self::SubscriptionSuboptimal(_) => "subscription_suboptimal",
             Self::SubscriptionTimeout(_) => "subscription_timeout",
             Self::UnexpectedError(_) => "unexpected_error",
diff --git a/consensus/src/consensus_observer/common/metrics.rs b/consensus/src/consensus_observer/common/metrics.rs
index 0e91e1d9af702..5888bbfcaca26 100644
--- a/consensus/src/consensus_observer/common/metrics.rs
+++ b/consensus/src/consensus_observer/common/metrics.rs
@@ -5,8 +5,8 @@
 
 use aptos_config::network_id::{NetworkId, PeerNetworkId};
 use aptos_metrics_core::{
-    register_histogram_vec, register_int_counter_vec, register_int_gauge_vec, HistogramVec,
-    IntCounterVec, IntGaugeVec,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge_vec,
+    HistogramVec, IntCounter, IntCounterVec, IntGaugeVec,
 };
 use once_cell::sync::Lazy;
 
@@ -31,6 +31,14 @@ pub static OBSERVER_CREATED_SUBSCRIPTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
     .unwrap()
 });
 
+/// Counter for tracking the number of times the block state was cleared by the consensus observer
+pub static OBSERVER_CLEARED_BLOCK_STATE: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "consensus_observer_cleared_block_state",
+        "Counter for tracking the number of times the block state was cleared by the consensus observer",
+    ).unwrap()
+});
+
 /// Counter for tracking dropped (direct send) messages by the consensus observer
 pub static OBSERVER_DROPPED_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
     register_int_counter_vec!(
@@ -41,6 +49,16 @@ pub static OBSERVER_DROPPED_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
     .unwrap()
 });
 
+/// Counter for tracking rejected (direct send) messages by the consensus observer
+pub static OBSERVER_REJECTED_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "consensus_observer_rejected_messages",
+        "Counters related to rejected (direct send) messages by the consensus observer",
+        &["message_type", "network_id"]
+    )
+    .unwrap()
+});
+
 /// Gauge for tracking the number of active subscriptions for the consensus observer
 pub static OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS: Lazy<IntGaugeVec> = Lazy::new(|| {
     register_int_gauge_vec!(
@@ -203,6 +221,11 @@ pub fn increment_counter(
         .inc();
 }
 
+/// Increments the given counter without labels
+pub fn increment_counter_without_labels(counter: &Lazy<IntCounter>) {
+    counter.inc();
+}
+
 /// Observes the value for the provided histogram and label
 pub fn observe_value_with_label(
     histogram: &Lazy<HistogramVec>,
diff --git a/consensus/src/consensus_observer/observer/consensus_observer.rs b/consensus/src/consensus_observer/observer/consensus_observer.rs
index e1468748a781c..032a3fa38f8bc 100644
--- a/consensus/src/consensus_observer/observer/consensus_observer.rs
+++ b/consensus/src/consensus_observer/observer/consensus_observer.rs
@@ -85,7 +85,7 @@ pub struct ConsensusObserver {
     // The flag indicates if we're waiting to transition to a new epoch.
     sync_handle: Option<(DropGuard, bool)>,
 
-    // The subscription manager
+    // The consensus observer subscription manager
     subscription_manager: SubscriptionManager,
 }
 
@@ -165,13 +165,15 @@ impl ConsensusObserver {
             return;
         }
 
-        // Otherwise, check the health of the active subscription
-        let new_subscription_created = self
+        // Otherwise, check the health of the active subscriptions
+        if let Err(error) = self
             .subscription_manager
             .check_and_manage_subscriptions()
-            .await;
-        if new_subscription_created {
-            // Clear the pending block state (a new subscription was created)
+            .await
+        {
+            // Log the failure and clear the pending block state
+            warn!(LogSchema::new(LogEntry::ConsensusObserver)
+                .message(&format!("Subscription checks failed! Error: {:?}", error)));
             self.clear_pending_block_state().await;
         }
     }
@@ -198,6 +200,9 @@ impl ConsensusObserver {
                 ))
             );
         }
+
+        // Increment the cleared block state counter
+        metrics::increment_counter_without_labels(&metrics::OBSERVER_CLEARED_BLOCK_STATE);
     }
 
     /// Finalizes the ordered block by sending it to the execution pipeline
@@ -528,18 +533,25 @@ impl ConsensusObserver {
         // Unpack the network message
         let (peer_network_id, message) = network_message.into_parts();
 
-        // Verify the message is from the peer we've subscribed to
+        // Verify the message is from the peers we've subscribed to
         if let Err(error) = self
             .subscription_manager
-            .verify_message_sender(peer_network_id)
+            .verify_message_for_subscription(peer_network_id)
         {
+            // Increment the rejected message counter
+            metrics::increment_counter(
+                &metrics::OBSERVER_REJECTED_MESSAGES,
+                message.get_label(),
+                &peer_network_id,
+            );
+
+            // Log the error and return
             warn!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                    "Message failed subscription sender verification! Error: {:?}",
+                    "Received message that was not from an active subscription! Error: {:?}",
                     error,
                 ))
             );
-
             return;
         }
 
diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs
index fe29aa6a5a577..d3023da292d00 100644
--- a/consensus/src/consensus_observer/observer/subscription.rs
+++ b/consensus/src/consensus_observer/observer/subscription.rs
@@ -31,7 +31,7 @@ pub struct ConsensusObserverSubscription {
     // The peer network id of the active subscription
     peer_network_id: PeerNetworkId,
 
-    // The timestamp of the last message received from the peer
+    // The timestamp of the last message received for the subscription
     last_message_receive_time: Instant,
 
     // The timestamp and connected peers for the last optimality check
@@ -71,7 +71,7 @@ impl ConsensusObserverSubscription {
     /// last check; or (ii) enough time has elapsed to force a refresh.
     pub fn check_subscription_peer_optimality(
         &mut self,
-        peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+        peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
     ) -> Result<(), Error> {
         // Get the last optimality check time and connected peers
         let (last_optimality_check_time, last_optimality_check_peers) =
@@ -106,16 +106,20 @@ impl ConsensusObserverSubscription {
         self.last_optimality_check_time_and_peers = (time_now, current_connected_peers);
 
         // Sort the peers by subscription optimality
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-
-        // Verify that we're subscribed to the most optimal peer
-        if let Some(optimal_peer) = sorted_peers.first() {
-            if *optimal_peer != self.peer_network_id {
-                return Err(Error::SubscriptionSuboptimal(format!(
-                    "Subscription to peer: {} is no longer optimal! New optimal peer: {}",
-                    self.peer_network_id, optimal_peer
-                )));
-            }
+        let sorted_peers = sort_peers_by_subscription_optimality(peers_and_metadata);
+
+        // Verify that this peer is one of the most optimal peers
+        let max_concurrent_subscriptions =
+            self.consensus_observer_config.max_concurrent_subscriptions as usize;
+        if !sorted_peers
+            .iter()
+            .take(max_concurrent_subscriptions)
+            .any(|peer| peer == &self.peer_network_id)
+        {
+            return Err(Error::SubscriptionSuboptimal(format!(
+                "Subscription to peer: {} is no longer optimal! New optimal peers: {:?}",
+                self.peer_network_id, sorted_peers
+            )));
         }
 
         Ok(())
@@ -180,25 +184,9 @@ impl ConsensusObserverSubscription {
         Ok(())
     }
 
-    /// Returns the peer network id of the subscription
-    pub fn get_peer_network_id(&self) -> PeerNetworkId {
-        self.peer_network_id
-    }
-
-    /// Verifies the given message is from the expected peer
-    pub fn verify_message_sender(&mut self, peer_network_id: &PeerNetworkId) -> Result<(), Error> {
-        // Verify the message is from the expected peer
-        if self.peer_network_id != *peer_network_id {
-            return Err(Error::UnexpectedError(format!(
-                "Received message from unexpected peer: {}! Subscribed to: {}",
-                peer_network_id, self.peer_network_id
-            )));
-        }
-
-        // Update the last message receive time
+    /// Updates the last message receive time to the current time
+    pub fn update_last_message_receive_time(&mut self) {
         self.last_message_receive_time = self.time_service.now();
-
-        Ok(())
     }
 }
 
@@ -346,6 +334,7 @@ mod test {
     };
     use aptos_storage_interface::Result;
     use aptos_types::{network_address::NetworkAddress, transaction::Version};
+    use claims::assert_matches;
     use mockall::mock;
 
     // This is a simple mock of the DbReader (it generates a MockDatabaseReader)
@@ -357,12 +346,12 @@ mod test {
     }
 
     #[test]
-    fn check_subscription_peer_optimality() {
-        // Create a consensus observer config and time service
-        let consensus_observer_config = ConsensusObserverConfig::default();
-        let time_service = TimeService::mock();
+    fn test_check_subscription_peer_optimality_single() {
+        // Create a consensus observer config with a maximum of 1 subscription
+        let consensus_observer_config = create_observer_config(1);
 
         // Create a new observer subscription
+        let time_service = TimeService::mock();
         let peer_network_id = PeerNetworkId::random();
         let mut subscription = ConsensusObserverSubscription::new(
             consensus_observer_config,
@@ -372,46 +361,27 @@ mod test {
         );
 
         // Verify the time and peers for the last optimality check
-        let (last_check_time, last_check_peers) =
-            subscription.last_optimality_check_time_and_peers.clone();
-        assert_eq!(last_check_time, time_service.now());
-        assert!(last_check_peers.is_empty());
+        let mock_time_service = time_service.into_mock();
+        verify_last_check_time_and_peers(&subscription, mock_time_service.now(), HashSet::new());
 
         // Create a peers and metadata map for the subscription
         let mut peers_and_metadata = HashMap::new();
-        peers_and_metadata.insert(
-            peer_network_id,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(peer_network_id, true),
-                PeerMonitoringMetadata::new(None, None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
 
         // Add a more optimal peer to the set of peers
         let new_optimal_peer = PeerNetworkId::random();
-        peers_and_metadata.insert(
-            new_optimal_peer,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(new_optimal_peer, true),
-                PeerMonitoringMetadata::new(Some(0.1), None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true);
 
         // Verify that the peer is optimal (not enough time has elapsed to check)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Elapse some amount of time (but not enough to check optimality)
-        let mock_time_service = time_service.into_mock();
         mock_time_service.advance(Duration::from_millis(
             consensus_observer_config.subscription_peer_change_interval_ms / 2,
         ));
 
         // Verify that the peer is still optimal (not enough time has elapsed to check)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Elapse enough time to check the peer optimality
         mock_time_service.advance(Duration::from_millis(
@@ -419,17 +389,13 @@ mod test {
         ));
 
         // Verify that the peer is no longer optimal (a more optimal peer has been added)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_err());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false);
 
         // Verify the time of the last peer optimality check
-        let (last_check_time, last_check_peers) =
-            subscription.last_optimality_check_time_and_peers.clone();
-        assert_eq!(last_check_time, mock_time_service.now());
-        assert_eq!(
-            last_check_peers,
-            peers_and_metadata.keys().cloned().collect()
+        verify_last_check_time_and_peers(
+            &subscription,
+            mock_time_service.now(),
+            peers_and_metadata.keys().cloned().collect(),
         );
 
         // Elapse enough time to check the peer optimality
@@ -438,35 +404,29 @@ mod test {
         ));
 
         // Verify that the peer is now optimal (the peers haven't changed)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Remove the current peer from the list of peers
         peers_and_metadata.remove(&peer_network_id);
 
         // Verify that the peer is not optimal (the peers have changed)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_err());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false);
 
         // Verify the time of the last peer optimality check
-        let (last_check_time, last_check_peers) =
-            subscription.last_optimality_check_time_and_peers.clone();
-        assert_eq!(last_check_time, mock_time_service.now());
-        assert_eq!(
-            last_check_peers,
-            peers_and_metadata.keys().cloned().collect()
+        verify_last_check_time_and_peers(
+            &subscription,
+            mock_time_service.now(),
+            peers_and_metadata.keys().cloned().collect(),
         );
     }
 
     #[test]
-    fn check_subscription_peer_refresh() {
-        // Create a consensus observer config and time service
-        let consensus_observer_config = ConsensusObserverConfig::default();
-        let time_service = TimeService::mock();
+    fn test_check_subscription_peer_optimality_multiple() {
+        // Create a consensus observer config with a maximum of 2 subscriptions
+        let consensus_observer_config = create_observer_config(2);
 
         // Create a new observer subscription
+        let time_service = TimeService::mock();
         let peer_network_id = PeerNetworkId::random();
         let mut subscription = ConsensusObserverSubscription::new(
             consensus_observer_config,
@@ -477,33 +437,73 @@ mod test {
 
         // Create a peers and metadata map for the subscription
         let mut peers_and_metadata = HashMap::new();
-        peers_and_metadata.insert(
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
+
+        // Add a more optimal peer to the set of peers
+        let new_optimal_peer = PeerNetworkId::random();
+        add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true);
+
+        // Elapse enough time to check the peer optimality
+        let mock_time_service = time_service.into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.subscription_peer_change_interval_ms + 1,
+        ));
+
+        // Verify that the peer is optimal (it's in the top 2 most optimal peers)
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
+
+        // Add another more optimal peer to the set of peers
+        let another_optimal_peer = PeerNetworkId::random();
+        add_metadata_for_peer(&mut peers_and_metadata, another_optimal_peer, true, true);
+
+        // Elapse enough time to check the peer optimality
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.subscription_peer_change_interval_ms + 1,
+        ));
+
+        // Verify that the peer is no longer optimal (it's not in the top 2 most optimal peers)
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false);
+
+        // Remove the previous optimal peer from the list of peers
+        peers_and_metadata.remove(&new_optimal_peer);
+
+        // Elapse enough time to check the peer optimality
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.subscription_peer_change_interval_ms + 1,
+        ));
+
+        // Verify that the peer is optimal (it's in the top 2 most optimal peers)
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
+    }
+
+    #[test]
+    fn test_check_subscription_peer_refresh() {
+        // Create a consensus observer config with a maximum of 1 subscription
+        let consensus_observer_config = create_observer_config(1);
+
+        // Create a new observer subscription
+        let time_service = TimeService::mock();
+        let peer_network_id = PeerNetworkId::random();
+        let mut subscription = ConsensusObserverSubscription::new(
+            consensus_observer_config,
+            Arc::new(MockDatabaseReader::new()),
             peer_network_id,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(peer_network_id, true),
-                PeerMonitoringMetadata::new(None, None, None, None, None),
-            ),
+            time_service.clone(),
         );
 
+        // Create a peers and metadata map for the subscription
+        let mut peers_and_metadata = HashMap::new();
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
+
         // Verify that the peer is optimal (not enough time has elapsed to refresh)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Add a more optimal peer to the set of peers
         let new_optimal_peer = PeerNetworkId::random();
-        peers_and_metadata.insert(
-            new_optimal_peer,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(new_optimal_peer, true),
-                PeerMonitoringMetadata::new(Some(0.1), None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true);
 
         // Verify that the peer is still optimal (not enough time has elapsed to refresh)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Elapse enough time to refresh optimality
         let mock_time_service = time_service.into_mock();
@@ -512,9 +512,7 @@ mod test {
         ));
 
         // Verify that the peer is no longer optimal
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_err());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false);
 
         // Elapse some amount of time (but not enough to refresh)
         mock_time_service.advance(Duration::from_millis(
@@ -522,9 +520,7 @@ mod test {
         ));
 
         // Verify that the peer is now optimal (not enough time has elapsed to refresh)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Remove the more optimal peer from the list of peers
         peers_and_metadata.remove(&new_optimal_peer);
@@ -535,23 +531,23 @@ mod test {
         ));
 
         // Verify that the peer is optimal
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata)
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Verify the time of the last peer optimality check
-        let current_time = mock_time_service.now();
-        let (last_check_time, _) = subscription.last_optimality_check_time_and_peers;
-        assert_eq!(last_check_time, current_time);
+        verify_last_check_time_and_peers(
+            &subscription,
+            mock_time_service.now(),
+            peers_and_metadata.keys().cloned().collect(),
+        );
     }
 
     #[test]
-    fn check_subscription_peer_optimality_supported() {
-        // Create a consensus observer config and time service
-        let consensus_observer_config = ConsensusObserverConfig::default();
-        let time_service = TimeService::mock();
+    fn test_check_subscription_peer_optimality_supported() {
+        // Create a consensus observer config with a maximum of 1 subscription
+        let consensus_observer_config = create_observer_config(1);
 
         // Create a new observer subscription
+        let time_service = TimeService::mock();
         let peer_network_id = PeerNetworkId::random();
         let mut subscription = ConsensusObserverSubscription::new(
             consensus_observer_config,
@@ -562,13 +558,7 @@ mod test {
 
         // Insert empty metadata for the subscription peer
         let mut peers_and_metadata = HashMap::new();
-        peers_and_metadata.insert(
-            peer_network_id,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(peer_network_id, true),
-                PeerMonitoringMetadata::new(None, None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
 
         // Elapse enough time to check optimality
         let mock_time_service = time_service.into_mock();
@@ -577,19 +567,11 @@ mod test {
         ));
 
         // Verify that the peer is still optimal (there are no other peers)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Add a more optimal peer without consensus observer support
         let unsupported_peer = PeerNetworkId::random();
-        peers_and_metadata.insert(
-            unsupported_peer,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(unsupported_peer, false),
-                PeerMonitoringMetadata::new(Some(0.1), None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, unsupported_peer, false, false);
 
         // Elapse enough time to check optimality
         mock_time_service.advance(Duration::from_millis(
@@ -597,19 +579,11 @@ mod test {
         ));
 
         // Verify that the peer is still optimal (the unsupported peer is ignored)
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_ok());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, true);
 
         // Add another more optimal peer with consensus observer support
         let supported_peer = PeerNetworkId::random();
-        peers_and_metadata.insert(
-            supported_peer,
-            PeerMetadata::new_for_test(
-                create_connection_metadata(supported_peer, true),
-                PeerMonitoringMetadata::new(Some(0.01), None, None, None, None),
-            ),
-        );
+        add_metadata_for_peer(&mut peers_and_metadata, supported_peer, true, true);
 
         // Elapse enough time to check optimality
         mock_time_service.advance(Duration::from_millis(
@@ -617,9 +591,7 @@ mod test {
         ));
 
         // Verify that the peer is no longer optimal
-        assert!(subscription
-            .check_subscription_peer_optimality(peers_and_metadata.clone())
-            .is_err());
+        verify_subscription_peer_optimality(&mut subscription, &peers_and_metadata, false);
     }
 
     #[test]
@@ -637,7 +609,7 @@ mod test {
 
         // Verify that the subscription has not timed out and that the last message time is updated
         let current_time = time_service.now();
-        assert!(subscription.check_subscription_timeout().is_ok());
+        verify_subscription_time_out(&subscription, false);
         assert_eq!(subscription.last_message_receive_time, current_time);
 
         // Elapse some amount of time (but not enough to timeout)
@@ -647,17 +619,15 @@ mod test {
         ));
 
         // Verify that the subscription has not timed out
-        assert!(subscription.check_subscription_timeout().is_ok());
+        verify_subscription_time_out(&subscription, false);
 
-        // Verify a new message is received successfully and that the last message time is updated
+        // Update the last message receive time
         let current_time = mock_time_service.now();
-        subscription
-            .verify_message_sender(&peer_network_id)
-            .unwrap();
+        subscription.update_last_message_receive_time();
         assert_eq!(subscription.last_message_receive_time, current_time);
 
         // Verify that the subscription has not timed out
-        assert!(subscription.check_subscription_timeout().is_ok());
+        verify_subscription_time_out(&subscription, false);
 
         // Elapse enough time to timeout the subscription
         mock_time_service.advance(Duration::from_millis(
@@ -665,7 +635,7 @@ mod test {
         ));
 
         // Verify that the subscription has timed out
-        assert!(subscription.check_subscription_timeout().is_err());
+        verify_subscription_time_out(&subscription, true);
     }
 
     #[test]
@@ -694,25 +664,23 @@ mod test {
         );
 
         // Verify that the DB is making sync progress and that the highest synced version is updated
-        let current_time = time_service.now();
-        assert!(subscription.check_syncing_progress().is_ok());
-        assert_eq!(
-            subscription.highest_synced_version_and_time,
-            (first_synced_version, current_time)
+        let mock_time_service = time_service.into_mock();
+        verify_subscription_syncing_progress(
+            &mut subscription,
+            first_synced_version,
+            mock_time_service.now(),
         );
 
         // Elapse some amount of time (not enough to timeout)
-        let mock_time_service = time_service.into_mock();
         mock_time_service.advance(Duration::from_millis(
             consensus_observer_config.max_synced_version_timeout_ms / 2,
         ));
 
         // Verify that the DB is still making sync progress
-        let current_time = mock_time_service.now();
-        assert!(subscription.check_syncing_progress().is_ok());
-        assert_eq!(
-            subscription.highest_synced_version_and_time,
-            (first_synced_version, current_time)
+        verify_subscription_syncing_progress(
+            &mut subscription,
+            first_synced_version,
+            mock_time_service.now(),
         );
 
         // Elapse enough time to timeout the subscription
@@ -721,11 +689,10 @@ mod test {
         ));
 
         // Verify that the DB is still making sync progress (the next version is higher)
-        let current_time = mock_time_service.now();
-        assert!(subscription.check_syncing_progress().is_ok());
-        assert_eq!(
-            subscription.highest_synced_version_and_time,
-            (second_synced_version, current_time)
+        verify_subscription_syncing_progress(
+            &mut subscription,
+            second_synced_version,
+            mock_time_service.now(),
         );
 
         // Elapse enough time to timeout the subscription
@@ -734,11 +701,14 @@ mod test {
         ));
 
         // Verify that the DB is not making sync progress and that the subscription has timed out
-        assert!(subscription.check_syncing_progress().is_err());
+        assert_matches!(
+            subscription.check_syncing_progress(),
+            Err(Error::SubscriptionProgressStopped(_))
+        );
     }
 
     #[test]
-    fn test_verify_message_sender() {
+    fn test_update_last_message_receive_time() {
         // Create a new observer subscription
         let consensus_observer_config = ConsensusObserverConfig::default();
         let peer_network_id = PeerNetworkId::random();
@@ -750,28 +720,18 @@ mod test {
             time_service.clone(),
         );
 
-        // Verify that the message sender is valid
-        let current_time = time_service.now();
-        assert!(subscription.verify_message_sender(&peer_network_id).is_ok());
-        assert_eq!(subscription.last_message_receive_time, current_time);
+        // Verify the initial last message time
+        assert_eq!(subscription.last_message_receive_time, time_service.now());
 
         // Elapse some amount of time
         let mock_time_service = time_service.into_mock();
         mock_time_service.advance(Duration::from_secs(10));
 
-        // Verify that the message sender is not the expected peer
-        let other_peer_network_id = PeerNetworkId::random();
-        assert!(subscription
-            .verify_message_sender(&other_peer_network_id)
-            .is_err());
-        assert_eq!(subscription.last_message_receive_time, current_time);
-
-        // Elapse more time
-        mock_time_service.advance(Duration::from_secs(10));
-
-        // Verify that the message sender is the expected peer and that the last message time is updated
+        // Update the last message time
         let current_time = mock_time_service.now();
-        assert!(subscription.verify_message_sender(&peer_network_id).is_ok());
+        subscription.update_last_message_receive_time();
+
+        // Verify that the last message time is updated
         assert_eq!(subscription.last_message_receive_time, current_time);
     }
 
@@ -886,6 +846,26 @@ mod test {
         assert_eq!(sorted_peers, vec![*supported_peer]);
     }
 
+    /// Adds metadata for the specified peer to the map of peers and metadata
+    fn add_metadata_for_peer(
+        peers_and_metadata: &mut HashMap<PeerNetworkId, PeerMetadata>,
+        peer_network_id: PeerNetworkId,
+        support_consensus_observer: bool,
+        set_ping_latency: bool,
+    ) {
+        // Determine the ping latency to use for the peer
+        let average_ping_latency = if set_ping_latency { Some(0.1) } else { None };
+
+        // Add the peer and metadata to the map
+        peers_and_metadata.insert(
+            peer_network_id,
+            PeerMetadata::new_for_test(
+                create_connection_metadata(peer_network_id, support_consensus_observer),
+                PeerMonitoringMetadata::new(average_ping_latency, None, None, None, None),
+            ),
+        );
+    }
+
     /// Creates a new connection metadata for testing
     fn create_connection_metadata(
         peer_network_id: PeerNetworkId,
@@ -913,6 +893,14 @@ mod test {
         }
     }
 
+    /// Creates a consensus observer config with the given max concurrent subscriptions
+    fn create_observer_config(max_concurrent_subscriptions: u64) -> ConsensusObserverConfig {
+        ConsensusObserverConfig {
+            max_concurrent_subscriptions,
+            ..ConsensusObserverConfig::default()
+        }
+    }
+
     /// Creates a new peer and metadata for testing
     fn create_peer_and_metadata(
         latency: Option<f64>,
@@ -991,4 +979,62 @@ mod test {
             previous_distance = distance;
         }
     }
+
+    /// Verifies that the last check time and peers are as expected
+    fn verify_last_check_time_and_peers(
+        subscription: &ConsensusObserverSubscription,
+        expected_last_check_time: Instant,
+        expected_last_check_peers: HashSet<PeerNetworkId>,
+    ) {
+        // Get the last check time and peers from the subscription
+        let (last_check_time, last_check_peers) =
+            subscription.last_optimality_check_time_and_peers.clone();
+
+        // Verify the last check time and peers match the expected values
+        assert_eq!(last_check_time, expected_last_check_time);
+        assert_eq!(last_check_peers, expected_last_check_peers);
+    }
+
+    /// Verifies that the subscription time out matches the expected value
+    fn verify_subscription_time_out(subscription: &ConsensusObserverSubscription, timed_out: bool) {
+        // Check if the subscription has timed out
+        let result = subscription.check_subscription_timeout();
+
+        // Verify the result
+        if timed_out {
+            assert_matches!(result, Err(Error::SubscriptionTimeout(_)));
+        } else {
+            assert!(result.is_ok());
+        }
+    }
+
+    /// Verifies that the peer optimality matches the expected value
+    fn verify_subscription_peer_optimality(
+        subscription: &mut ConsensusObserverSubscription,
+        peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+        is_optimal: bool,
+    ) {
+        // Check the subscription peer optimality
+        let result = subscription.check_subscription_peer_optimality(peers_and_metadata);
+
+        // Verify the result
+        if is_optimal {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::SubscriptionSuboptimal(_)));
+        }
+    }
+
+    /// Verifies that the syncing progress is as expected
+    fn verify_subscription_syncing_progress(
+        subscription: &mut ConsensusObserverSubscription,
+        first_synced_version: Version,
+        time: Instant,
+    ) {
+        assert!(subscription.check_syncing_progress().is_ok());
+        assert_eq!(
+            subscription.highest_synced_version_and_time,
+            (first_synced_version, time)
+        );
+    }
 }
diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index 8f70fe21b9261..e63fdfc68fa23 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -21,12 +21,13 @@ use aptos_logger::{error, info, warn};
 use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata};
 use aptos_storage_interface::DbReader;
 use aptos_time_service::TimeService;
+use itertools::Itertools;
 use std::{collections::HashMap, sync::Arc};
 
 /// The manager for consensus observer subscriptions
 pub struct SubscriptionManager {
-    // The currently active consensus observer subscription
-    active_observer_subscription: Option<ConsensusObserverSubscription>,
+    // The currently active set of consensus observer subscriptions
+    active_observer_subscriptions: HashMap<PeerNetworkId, ConsensusObserverSubscription>,
 
     // The consensus observer client to send network messages
     consensus_observer_client:
@@ -56,7 +57,7 @@ impl SubscriptionManager {
         time_service: TimeService,
     ) -> Self {
         Self {
-            active_observer_subscription: None,
+            active_observer_subscriptions: HashMap::new(),
             consensus_observer_client,
             consensus_observer_config,
             consensus_publisher,
@@ -65,244 +66,356 @@ impl SubscriptionManager {
         }
     }
 
-    /// Checks if the active subscription is still healthy. If not, an error is returned.
-    fn check_active_subscription(&mut self) -> Result<(), Error> {
-        let active_observer_subscription = self.active_observer_subscription.take();
-        if let Some(mut active_subscription) = active_observer_subscription {
-            // Check if the peer for the subscription is still connected
-            let peer_network_id = active_subscription.get_peer_network_id();
-            let peer_still_connected = self
-                .get_connected_peers_and_metadata()
-                .map_or(false, |peers_and_metadata| {
-                    peers_and_metadata.contains_key(&peer_network_id)
-                });
-
-            // Verify the peer is still connected
-            if !peer_still_connected {
-                return Err(Error::SubscriptionDisconnected(
-                    "The peer is no longer connected!".to_string(),
-                ));
-            }
+    /// Checks if the subscription to the given peer is still healthy.
+    /// If not, an error explaining why it is unhealthy is returned.
+    fn check_subscription_health(
+        &mut self,
+        connected_peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+        peer_network_id: PeerNetworkId,
+    ) -> Result<(), Error> {
+        match self.active_observer_subscriptions.get_mut(&peer_network_id) {
+            Some(active_subscription) => {
+                // Verify the peer is still connected
+                if !connected_peers_and_metadata.contains_key(&peer_network_id) {
+                    return Err(Error::SubscriptionDisconnected(format!(
+                        "The peer: {:?} is no longer connected!",
+                        peer_network_id
+                    )));
+                }
 
-            // Verify the subscription has not timed out
-            active_subscription.check_subscription_timeout()?;
+                // Verify the subscription has not timed out
+                active_subscription.check_subscription_timeout()?;
 
-            // Verify that the DB is continuing to sync and commit new data
-            active_subscription.check_syncing_progress()?;
+                // Verify that the DB is continuing to sync and commit new data
+                active_subscription.check_syncing_progress()?;
 
-            // Verify that the subscription peer is optimal
-            if let Some(peers_and_metadata) = self.get_connected_peers_and_metadata() {
-                active_subscription.check_subscription_peer_optimality(peers_and_metadata)?;
-            }
+                // Verify that the subscription peer is still optimal
+                active_subscription
+                    .check_subscription_peer_optimality(connected_peers_and_metadata)?;
 
-            // The subscription seems healthy, we can keep it
-            self.active_observer_subscription = Some(active_subscription);
+                // The subscription seems healthy
+                Ok(())
+            },
+            None => Err(Error::UnexpectedError(format!(
+                "The subscription to peer: {:?} is not active!",
+                peer_network_id
+            ))),
         }
-
-        Ok(())
     }
 
-    /// Checks the health of the active subscription. If the subscription is
-    /// unhealthy, it will be terminated and a new subscription will be created.
-    /// This returns true iff a new subscription was created.
-    pub async fn check_and_manage_subscriptions(&mut self) -> bool {
-        // Get the peer ID of the currently active subscription (if any)
-        let active_subscription_peer = self
-            .active_observer_subscription
-            .as_ref()
-            .map(|subscription| subscription.get_peer_network_id());
-
-        // If we have an active subscription, verify that the subscription
-        // is still healthy. If not, the subscription should be terminated.
-        if let Some(active_subscription_peer) = active_subscription_peer {
-            if let Err(error) = self.check_active_subscription() {
-                // Log the subscription termination
-                warn!(
-                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                        "Terminating subscription to peer: {:?}! Error: {:?}",
-                        active_subscription_peer, error
-                    ))
-                );
-
-                // Unsubscribe from the peer
-                self.unsubscribe_from_peer(active_subscription_peer);
+    /// Checks the health of the active subscriptions. If any subscription is
+    /// unhealthy, it will be terminated and new subscriptions will be created.
+    /// This returns an error iff all subscriptions were unhealthy and terminated.
+    pub async fn check_and_manage_subscriptions(&mut self) -> Result<(), Error> {
+        // Get the subscription and connected peers
+        let initial_subscription_peers = self.get_active_subscription_peers();
+        let connected_peers_and_metadata = self.get_connected_peers_and_metadata();
+
+        // Terminate any unhealthy subscriptions
+        let terminated_subscriptions =
+            self.terminate_unhealthy_subscriptions(&connected_peers_and_metadata);
+
+        // Check if all subscriptions were terminated
+        let num_terminated_subscriptions = terminated_subscriptions.len();
+        let all_subscriptions_terminated = num_terminated_subscriptions > 0
+            && num_terminated_subscriptions == initial_subscription_peers.len();
+
+        // Calculate the number of new subscriptions to create
+        let max_concurrent_subscriptions =
+            self.consensus_observer_config.max_concurrent_subscriptions as usize;
+        let num_subscriptions_to_create =
+            max_concurrent_subscriptions.saturating_sub(self.active_observer_subscriptions.len());
+
+        // Create the new subscriptions (if required)
+        let terminated_subscription_peers = terminated_subscriptions
+            .iter()
+            .map(|(peer, _)| *peer)
+            .collect();
+        let new_subscription_peers = self
+            .create_new_subscriptions(
+                connected_peers_and_metadata,
+                num_subscriptions_to_create,
+                terminated_subscription_peers,
+            )
+            .await;
 
-                // Update the subscription termination metrics
-                self.update_subscription_termination_metrics(active_subscription_peer, error);
-            }
+        // Log a warning if we failed to create as many subscriptions as requested
+        let num_subscriptions_created = new_subscription_peers.len();
+        if num_subscriptions_created < num_subscriptions_to_create {
+            warn!(
+                LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                    "Failed to create the requested number of subscriptions! Number of subscriptions \
+                    requested: {:?}, number of subscriptions created: {:?}.",
+                    num_subscriptions_to_create,
+                    num_subscriptions_created
+                ))
+            );
         }
 
-        // If we don't have a subscription, we should select a new peer to
-        // subscribe to. If we had a previous subscription (and it was
-        // terminated) it should be excluded from the selection process.
-        if self.active_observer_subscription.is_none() {
-            // Create a new observer subscription
-            self.create_new_observer_subscription(active_subscription_peer)
-                .await;
-
-            // If we successfully created a new subscription, update the metrics
-            if let Some(active_subscription) = &self.active_observer_subscription {
-                // Update the subscription creation metrics
-                self.update_subscription_creation_metrics(
-                    active_subscription.get_peer_network_id(),
-                );
+        // Update the subscription metrics
+        self.update_subscription_metrics(&new_subscription_peers, terminated_subscriptions);
 
-                return true; // A new subscription was created
-            }
+        // Return an error if all subscriptions were terminated
+        if all_subscriptions_terminated {
+            Err(Error::SubscriptionsReset(format!(
+                "All subscriptions were unhealthy and terminated! Number of terminated \
+                    subscriptions: {:?}, number of new subscriptions created: {:?}.",
+                num_terminated_subscriptions, num_subscriptions_created,
+            )))
+        } else {
+            Ok(())
         }
-
-        false // No new subscription was created
     }
 
-    /// Creates a new observer subscription by sending subscription requests to
-    /// appropriate peers and waiting for a successful response. If `previous_subscription_peer`
-    /// is provided, it will be excluded from the selection process.
-    async fn create_new_observer_subscription(
+    /// Attempts to create the given number of new subscriptions
+    /// and returns the peer IDs of the newly created subscriptions.
+    /// Any `unhealthy_subscription_peers` are excluded from selection.
+    async fn create_new_subscriptions(
         &mut self,
-        previous_subscription_peer: Option<PeerNetworkId>,
-    ) {
-        // Get a set of sorted peers to service our subscription request
-        let sorted_peers = match self.sort_peers_for_subscription(previous_subscription_peer) {
+        connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+        num_subscriptions_to_create: usize,
+        unhealthy_subscription_peers: Vec<PeerNetworkId>,
+    ) -> Vec<PeerNetworkId> {
+        // Return early if we don't need to create any new subscriptions
+        if num_subscriptions_to_create == 0 {
+            return vec![];
+        }
+
+        // Sort the potential peers for subscription requests
+        let mut sorted_potential_peers = match self.sort_peers_for_subscriptions(
+            connected_peers_and_metadata,
+            unhealthy_subscription_peers,
+        ) {
             Some(sorted_peers) => sorted_peers,
             None => {
                 error!(LogSchema::new(LogEntry::ConsensusObserver)
                     .message("Failed to sort peers for subscription requests!"));
-                return;
+                return vec![];
             },
         };
 
-        // Verify that we have potential peers
-        if sorted_peers.is_empty() {
+        // Verify that we have potential peers to subscribe to
+        if sorted_potential_peers.is_empty() {
             warn!(LogSchema::new(LogEntry::ConsensusObserver)
-                .message("There are no peers to subscribe to!"));
-            return;
+                .message("There are no potential peers to subscribe to!"));
+            return vec![];
         }
 
-        // Go through the sorted peers and attempt to subscribe to a single peer.
-        // The first peer that responds successfully will be the selected peer.
-        for selected_peer in &sorted_peers {
+        // Go through the potential peers and attempt to create new subscriptions
+        let mut created_subscription_peers = vec![];
+        for _ in 0..num_subscriptions_to_create {
+            // If there are no peers left to subscribe to, return early
+            if sorted_potential_peers.is_empty() {
+                info!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "There are no more potential peers to subscribe to! \
+                    Num created subscriptions: {:?}",
+                        created_subscription_peers.len()
+                    ))
+                );
+                break;
+            }
+
+            // Attempt to create a subscription
+            let (subscription_peer, failed_subscription_peers) = self
+                .create_single_subscription(sorted_potential_peers.clone())
+                .await;
+
+            // Remove the failed peers from the sorted list
+            sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer));
+
+            // Process a successful subscription creation
+            if let Some(subscription_peer) = subscription_peer {
+                // Add the peer to the list of created subscriptions
+                created_subscription_peers.push(subscription_peer);
+
+                // Remove the peer from the sorted list (for the next selection)
+                sorted_potential_peers.retain(|peer| peer != &subscription_peer);
+            }
+        }
+
+        // Return the list of created subscriptions
+        created_subscription_peers
+    }
+
+    /// Attempts to create a new subscription to a single peer from
+    /// the sorted list of potential peers. If a new subscription is
+    /// successfully created, the peer is returned. Likewise, any
+    /// peers with failed subscription attempts are also returned.
+    async fn create_single_subscription(
+        &mut self,
+        sorted_potential_peers: Vec<PeerNetworkId>,
+    ) -> (Option<PeerNetworkId>, Vec<PeerNetworkId>) {
+        let mut peers_with_failed_attempts = vec![];
+        for potential_peer in sorted_potential_peers {
+            // Log the subscription attempt
             info!(
                 LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                    "Attempting to subscribe to peer: {}!",
-                    selected_peer
+                    "Attempting to subscribe to potential peer: {}!",
+                    potential_peer
                 ))
             );
 
             // Send a subscription request to the peer and wait for the response.
-            // Note: it is fine to block here because we assume only a single active subscription.
+            // TODO: we should make this non-blocking!
             let subscription_request = ConsensusObserverRequest::Subscribe;
             let request_timeout_ms = self.consensus_observer_config.network_request_timeout_ms;
             let response = self
                 .consensus_observer_client
-                .send_rpc_request_to_peer(selected_peer, subscription_request, request_timeout_ms)
+                .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms)
                 .await;
 
             // Process the response and update the active subscription
             match response {
                 Ok(ConsensusObserverResponse::SubscribeAck) => {
+                    // Log the successful subscription
                     info!(
                         LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                             "Successfully subscribed to peer: {}!",
-                            selected_peer
+                            potential_peer
                         ))
                     );
 
-                    // Update the active subscription
+                    // Create the new subscription
                     let subscription = ConsensusObserverSubscription::new(
                         self.consensus_observer_config,
                         self.db_reader.clone(),
-                        *selected_peer,
+                        potential_peer,
                         self.time_service.clone(),
                     );
-                    self.active_observer_subscription = Some(subscription);
 
-                    return; // Return after successfully subscribing
+                    // Add the subscription to the active subscriptions
+                    self.active_observer_subscriptions
+                        .insert(potential_peer, subscription);
+
+                    // Return the successful subscription peer
+                    return (Some(potential_peer), peers_with_failed_attempts);
                 },
                 Ok(response) => {
                     // We received an invalid response
                     warn!(
                         LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                            "Got unexpected response type: {:?}",
+                            "Got unexpected response type for subscription request: {:?}",
                             response.get_label()
                         ))
                     );
+
+                    // Add the peer to the list of failed attempts
+                    peers_with_failed_attempts.push(potential_peer);
                 },
                 Err(error) => {
                     // We encountered an error while sending the request
                     error!(
                         LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                             "Failed to send subscription request to peer: {}! Error: {:?}",
-                            selected_peer, error
+                            potential_peer, error
                         ))
                     );
+
+                    // Add the peer to the list of failed attempts
+                    peers_with_failed_attempts.push(potential_peer);
                 },
             }
         }
 
-        // We failed to connect to any peers
-        warn!(
-            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                "Failed to subscribe to any peers! Num peers attempted: {:?}",
-                sorted_peers.len()
-            ))
-        );
+        // We failed to create a new subscription
+        (None, peers_with_failed_attempts)
     }
 
-    /// Gets the connected peers and metadata. If an error occurred,
-    /// it is logged and None is returned.
-    fn get_connected_peers_and_metadata(&self) -> Option<HashMap<PeerNetworkId, PeerMetadata>> {
-        match self
-            .consensus_observer_client
+    /// Returns the currently active subscription peers
+    fn get_active_subscription_peers(&self) -> Vec<PeerNetworkId> {
+        self.active_observer_subscriptions.keys().cloned().collect()
+    }
+
+    /// Gets the connected peers and metadata. If an error
+    /// occurred, it is logged and an empty map is returned.
+    fn get_connected_peers_and_metadata(&self) -> HashMap<PeerNetworkId, PeerMetadata> {
+        self.consensus_observer_client
             .get_peers_and_metadata()
             .get_connected_peers_and_metadata()
-        {
-            Ok(connected_peers_and_metadata) => Some(connected_peers_and_metadata),
-            Err(error) => {
+            .unwrap_or_else(|error| {
+                // Log the error
                 error!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                         "Failed to get connected peers and metadata! Error: {:?}",
                         error
                     ))
                 );
-                None
-            },
-        }
+
+                // Return an empty map
+                HashMap::new()
+            })
     }
 
-    /// Produces a list of sorted peers to service our subscription request.
-    /// Note: if `previous_subscription_peer` is provided, it will be excluded
+    /// Produces a list of sorted peers to service our subscription requests.
+    /// Note: if `unhealthy_subscription_peers` are provided, they will be excluded
     /// from the selection process. Likewise, all peers currently subscribed to us
     /// will be excluded from the selection process.
-    fn sort_peers_for_subscription(
+    fn sort_peers_for_subscriptions(
         &mut self,
-        previous_subscription_peer: Option<PeerNetworkId>,
+        mut connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+        unhealthy_subscription_peers: Vec<PeerNetworkId>,
     ) -> Option<Vec<PeerNetworkId>> {
-        if let Some(mut peers_and_metadata) = self.get_connected_peers_and_metadata() {
-            // Remove the previous subscription peer (if provided)
-            if let Some(previous_subscription_peer) = previous_subscription_peer {
-                let _ = peers_and_metadata.remove(&previous_subscription_peer);
-            }
+        // Remove any peers we're already subscribed to
+        for active_subscription_peer in self.get_active_subscription_peers() {
+            let _ = connected_peers_and_metadata.remove(&active_subscription_peer);
+        }
 
-            // Remove any peers that are currently subscribed to us
-            if let Some(consensus_publisher) = &self.consensus_publisher {
-                for peer_network_id in consensus_publisher.get_active_subscribers() {
-                    let _ = peers_and_metadata.remove(&peer_network_id);
-                }
+        // Remove any unhealthy subscription peers
+        for unhealthy_peer in unhealthy_subscription_peers {
+            let _ = connected_peers_and_metadata.remove(&unhealthy_peer);
+        }
+
+        // Remove any peers that are currently subscribed to us
+        if let Some(consensus_publisher) = &self.consensus_publisher {
+            for peer_network_id in consensus_publisher.get_active_subscribers() {
+                let _ = connected_peers_and_metadata.remove(&peer_network_id);
             }
+        }
 
-            // Sort the peers by subscription optimality
-            let sorted_peers =
-                subscription::sort_peers_by_subscription_optimality(&peers_and_metadata);
+        // Sort the peers by subscription optimality
+        let sorted_peers =
+            subscription::sort_peers_by_subscription_optimality(&connected_peers_and_metadata);
 
-            // Return the sorted peers
-            Some(sorted_peers)
-        } else {
-            None // No connected peers were found
+        // Return the sorted peers
+        Some(sorted_peers)
+    }
+
+    /// Terminates any unhealthy subscriptions and returns the list of terminated subscriptions
+    fn terminate_unhealthy_subscriptions(
+        &mut self,
+        connected_peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+    ) -> Vec<(PeerNetworkId, Error)> {
+        let mut terminated_subscriptions = vec![];
+        for subscription_peer in self.get_active_subscription_peers() {
+            // Check the health of the subscription and terminate it if needed
+            if let Err(error) =
+                self.check_subscription_health(connected_peers_and_metadata, subscription_peer)
+            {
+                // Log the subscription termination error
+                warn!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "Terminating subscription to peer: {:?}! Termination reason: {:?}",
+                        subscription_peer, error
+                    ))
+                );
+
+                // Unsubscribe from the peer and remove the subscription
+                self.unsubscribe_from_peer(subscription_peer);
+
+                // Add the peer to the list of terminated subscriptions
+                terminated_subscriptions.push((subscription_peer, error));
+            }
         }
+
+        terminated_subscriptions
     }
 
     /// Unsubscribes from the given peer by sending an unsubscribe request
-    fn unsubscribe_from_peer(&self, peer_network_id: PeerNetworkId) {
+    fn unsubscribe_from_peer(&mut self, peer_network_id: PeerNetworkId) {
+        // Remove the peer from the active subscriptions
+        self.active_observer_subscriptions.remove(&peer_network_id);
+
         // Send an unsubscribe request to the peer and process the response.
         // Note: we execute this asynchronously, as we don't need to wait for the response.
         let consensus_observer_client = self.consensus_observer_client.clone();
@@ -350,63 +463,64 @@ impl SubscriptionManager {
         });
     }
 
-    /// Updates the subscription creation metrics for the given peer
-    fn update_subscription_creation_metrics(&self, peer_network_id: PeerNetworkId) {
-        // Set the number of active subscriptions
-        metrics::set_gauge(
-            &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS,
-            &peer_network_id.network_id(),
-            1,
-        );
-
-        // Update the number of created subscriptions
-        metrics::increment_counter(
-            &metrics::OBSERVER_CREATED_SUBSCRIPTIONS,
-            metrics::CREATED_SUBSCRIPTION_LABEL,
-            &peer_network_id,
-        );
-    }
-
-    /// Updates the subscription termination metrics for the given peer
-    fn update_subscription_termination_metrics(
+    /// Updates the subscription creation and termination metrics
+    fn update_subscription_metrics(
         &self,
-        peer_network_id: PeerNetworkId,
-        error: Error,
+        new_subscription_peers: &[PeerNetworkId],
+        terminated_subscription_peers: Vec<(PeerNetworkId, Error)>,
     ) {
-        // Reset the number of active subscriptions
-        metrics::set_gauge(
-            &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS,
-            &peer_network_id.network_id(),
-            0,
-        );
+        // Update the created subscriptions metrics
+        for peer_network_id in new_subscription_peers {
+            metrics::increment_counter(
+                &metrics::OBSERVER_CREATED_SUBSCRIPTIONS,
+                metrics::CREATED_SUBSCRIPTION_LABEL,
+                peer_network_id,
+            );
+        }
 
-        // Update the number of terminated subscriptions
-        metrics::increment_counter(
-            &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS,
-            error.get_label(),
-            &peer_network_id,
-        );
-    }
+        // Update the terminated subscriptions metrics
+        for (peer_network_id, termination_reason) in terminated_subscription_peers {
+            metrics::increment_counter(
+                &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS,
+                termination_reason.get_label(),
+                &peer_network_id,
+            );
+        }
 
-    /// Verifies that the message sender is the currently subscribed peer.
-    /// If the sender is not the subscribed peer, an error is returned.
-    pub fn verify_message_sender(&mut self, message_sender: PeerNetworkId) -> Result<(), Error> {
-        if let Some(active_subscription) = &mut self.active_observer_subscription {
-            active_subscription
-                .verify_message_sender(&message_sender)
-                .map_err(|error| {
-                    // Send another unsubscription request to the peer (in case the previous was lost)
-                    self.unsubscribe_from_peer(message_sender);
-                    error
-                })
-        } else {
-            // Send another unsubscription request to the peer (in case the previous was lost)
-            self.unsubscribe_from_peer(message_sender);
+        // Set the number of active subscriptions (grouped by network ID)
+        let active_subscription_peers = self.get_active_subscription_peers();
+        for (network_id, active_subscription_peers) in &active_subscription_peers
+            .iter()
+            .chunk_by(|peer_network_id| peer_network_id.network_id())
+        {
+            metrics::set_gauge(
+                &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS,
+                &network_id,
+                active_subscription_peers.collect::<Vec<_>>().len() as i64,
+            );
+        }
+    }
 
-            Err(Error::UnexpectedError(format!(
-                "Received message from unexpected peer: {}! No active subscription found!",
-                message_sender
-            )))
+    /// Verifies that the message is from an active subscription.
+    /// If not, an error is returned.
+    pub fn verify_message_for_subscription(
+        &mut self,
+        message_sender: PeerNetworkId,
+    ) -> Result<(), Error> {
+        match self.active_observer_subscriptions.get_mut(&message_sender) {
+            Some(active_subscription) => {
+                // The message is from an active subscription (update the last message time)
+                active_subscription.update_last_message_receive_time();
+                Ok(())
+            },
+            None => {
+                // The message is not from an active subscription (send another unsubscribe request)
+                self.unsubscribe_from_peer(message_sender);
+                Err(Error::InvalidMessageError(format!(
+                    "Received message from unexpected peer, and not an active subscription: {}!",
+                    message_sender
+                )))
+            },
         }
     }
 }
@@ -439,7 +553,7 @@ mod test {
     }
 
     #[tokio::test]
-    async fn test_check_active_subscription_connected() {
+    async fn test_check_subscription_health_connected() {
         // Create a consensus observer client
         let network_id = NetworkId::Public;
         let (peers_and_metadata, consensus_observer_client) =
@@ -457,20 +571,23 @@ mod test {
         );
 
         // Create a new subscription
-        let observer_subscription = ConsensusObserverSubscription::new(
+        let peer_network_id = PeerNetworkId::random();
+        create_observer_subscription(
+            &mut subscription_manager,
             consensus_observer_config,
             db_reader.clone(),
-            PeerNetworkId::random(),
+            peer_network_id,
             TimeService::mock(),
         );
-        subscription_manager.active_observer_subscription = Some(observer_subscription);
 
-        // Check the active subscription and verify that it is removed (the peer is not connected)
-        assert_matches!(
-            subscription_manager.check_active_subscription(),
-            Err(Error::SubscriptionDisconnected(_))
-        );
-        assert!(subscription_manager.active_observer_subscription.is_none());
+        // Check the active subscription and verify that it unhealthy (the peer is not connected)
+        check_subscription_connection(&mut subscription_manager, peer_network_id, false);
+
+        // Terminate the subscription
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert_eq!(terminated_subscriptions.len(), 1);
+        assert_eq!(terminated_subscriptions.first().unwrap().0, peer_network_id);
 
         // Add a new connected peer
         let connected_peer =
@@ -485,14 +602,17 @@ mod test {
             TimeService::mock(),
         );
 
-        // Check the active subscription and verify that it is still active (the peer is connected)
-        assert!(subscription_manager.check_active_subscription().is_ok());
-        let active_subscription = subscription_manager.active_observer_subscription.unwrap();
-        assert_eq!(active_subscription.get_peer_network_id(), connected_peer);
+        // Check the active subscriptions is still healthy
+        check_subscription_connection(&mut subscription_manager, connected_peer, true);
+
+        // Verify that the active subscription is still present
+        assert!(subscription_manager
+            .get_active_subscription_peers()
+            .contains(&connected_peer));
     }
 
     #[tokio::test]
-    async fn test_check_active_subscription_progress_stopped() {
+    async fn test_check_subscription_health_progress_stopped() {
         // Create a consensus observer config
         let consensus_observer_config = ConsensusObserverConfig {
             max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out
@@ -528,22 +648,32 @@ mod test {
             time_service.clone(),
         );
 
+        // Check the active subscription and verify that it is healthy
+        check_subscription_progress(&mut subscription_manager, connected_peer, true);
+
         // Elapse time to simulate a DB progress error
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
             consensus_observer_config.max_synced_version_timeout_ms + 1,
         ));
 
-        // Check the active subscription and verify that it is removed (the DB is not syncing)
-        assert_matches!(
-            subscription_manager.check_active_subscription(),
-            Err(Error::SubscriptionProgressStopped(_))
-        );
-        assert!(subscription_manager.active_observer_subscription.is_none());
+        // Check the active subscription and verify that it is unhealthy (the DB is not syncing)
+        check_subscription_progress(&mut subscription_manager, connected_peer, false);
+
+        // Terminate the subscription
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert_eq!(terminated_subscriptions.len(), 1);
+        assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer);
+
+        // Verify the active subscription is no longer present
+        assert!(subscription_manager
+            .get_active_subscription_peers()
+            .is_empty());
     }
 
     #[tokio::test]
-    async fn test_check_active_subscription_timeout() {
+    async fn test_check_subscription_health_timeout() {
         // Create a consensus observer client
         let network_id = NetworkId::Public;
         let (peers_and_metadata, consensus_observer_client) =
@@ -574,25 +704,36 @@ mod test {
             time_service.clone(),
         );
 
+        // Check the active subscription and verify that it is healthy
+        check_subscription_timeout(&mut subscription_manager, connected_peer, true);
+
         // Elapse time to simulate a timeout
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
             consensus_observer_config.max_subscription_timeout_ms + 1,
         ));
 
-        // Check the active subscription and verify that it is removed (the subscription timed out)
-        assert_matches!(
-            subscription_manager.check_active_subscription(),
-            Err(Error::SubscriptionTimeout(_))
-        );
-        assert!(subscription_manager.active_observer_subscription.is_none());
+        // Check the active subscription and verify that it is unhealthy (the subscription timed out)
+        check_subscription_timeout(&mut subscription_manager, connected_peer, false);
+
+        // Terminate the subscription
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert_eq!(terminated_subscriptions.len(), 1);
+        assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer);
+
+        // Verify the active subscription is no longer present
+        assert!(subscription_manager
+            .get_active_subscription_peers()
+            .is_empty());
     }
 
     #[tokio::test]
-    async fn test_check_active_subscription_suboptimal() {
+    async fn test_check_subscription_health_suboptimal() {
         // Create a consensus observer config
         let consensus_observer_config = ConsensusObserverConfig {
             max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out
+            max_concurrent_subscriptions: 1,          // Only allow one subscription
             max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors
             ..ConsensusObserverConfig::default()
         };
@@ -618,7 +759,7 @@ mod test {
 
         // Add a suboptimal validator peer
         let suboptimal_peer =
-            create_peer_and_connection(network_id, peers_and_metadata.clone(), 0, None, true);
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true);
 
         // Create a new subscription to the suboptimal peer
         create_observer_subscription(
@@ -629,22 +770,38 @@ mod test {
             time_service.clone(),
         );
 
+        // Check the active subscription and verify that it is healthy
+        check_subscription_optimality(&mut subscription_manager, suboptimal_peer, true);
+
         // Elapse enough time to trigger the peer optimality check
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
             consensus_observer_config.subscription_peer_change_interval_ms + 1,
         ));
 
-        // Check the active subscription and verify that it is removed (the peer is suboptimal)
-        assert_matches!(
-            subscription_manager.check_active_subscription(),
-            Err(Error::SubscriptionSuboptimal(_))
-        );
-        assert!(subscription_manager.active_observer_subscription.is_none());
+        // Check the active subscription and verify that it is unhealthy (the peer is suboptimal)
+        check_subscription_optimality(&mut subscription_manager, suboptimal_peer, false);
+
+        // Elapse enough time to trigger the peer optimality check again
+        let mock_time_service = time_service.clone().into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.subscription_refresh_interval_ms + 1,
+        ));
+
+        // Terminate the subscription
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert_eq!(terminated_subscriptions.len(), 1);
+        assert_eq!(terminated_subscriptions.first().unwrap().0, suboptimal_peer);
+
+        // Verify the active subscription is no longer present
+        assert!(subscription_manager
+            .get_active_subscription_peers()
+            .is_empty());
     }
 
     #[tokio::test]
-    async fn test_sort_peers_for_subscription() {
+    async fn test_sort_peers_for_subscriptions() {
         // Create a consensus observer client
         let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
         let (peers_and_metadata, consensus_observer_client) =
@@ -661,10 +818,8 @@ mod test {
             TimeService::mock(),
         );
 
-        // Sort the peers for a subscription and verify that no peers are returned
-        let sorted_peers = subscription_manager
-            .sort_peers_for_subscription(None)
-            .unwrap();
+        // Sort the peers and verify that no peers are returned
+        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
         assert!(sorted_peers.is_empty());
 
         // Add a connected validator peer, VFN peer and public peer
@@ -683,28 +838,34 @@ mod test {
             );
         }
 
-        // Sort the peers for a subscription and verify the ordering (according to distance)
-        let sorted_peers = subscription_manager
-            .sort_peers_for_subscription(None)
-            .unwrap();
+        // Sort the peers and verify the ordering (according to distance)
+        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
         assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator);
         assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn);
         assert_eq!(sorted_peers[2].network_id(), NetworkId::Public);
         assert_eq!(sorted_peers.len(), 3);
 
-        // Sort the peers, but mark the validator as the last subscribed peer
-        let previous_subscription_peer = sorted_peers[0];
-        let sorted_peer_subset = subscription_manager
-            .sort_peers_for_subscription(Some(previous_subscription_peer))
-            .unwrap();
+        // Sort the peers, but mark the validator as unhealthy (so it's ignored)
+        let sorted_peer_subset =
+            sort_subscription_peers(&mut subscription_manager, vec![sorted_peers[0]]);
         assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn);
         assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public);
         assert_eq!(sorted_peer_subset.len(), 2);
 
-        // Remove all the peers and verify that no peers are returned
+        // Sort the peers, but mark the VFN and validator as unhealthy (so they're ignored)
+        let sorted_peer_subset = sort_subscription_peers(&mut subscription_manager, vec![
+            sorted_peers[0],
+            sorted_peers[1],
+        ]);
+        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public);
+        assert_eq!(sorted_peer_subset.len(), 1);
+
+        // Remove all the peers and verify that no peers are returned upon sorting
         for peer_network_id in sorted_peers {
             remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id);
         }
+        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
+        assert!(sorted_peers.is_empty());
 
         // Add multiple validator peers, with different latencies
         let mut validator_peers = vec![];
@@ -719,16 +880,89 @@ mod test {
             validator_peers.push(validator_peer);
         }
 
-        // Sort the peers for a subscription and verify the ordering (according to latency)
-        let sorted_peers = subscription_manager
-            .sort_peers_for_subscription(None)
-            .unwrap();
+        // Sort the peers and verify the ordering (according to latency)
+        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
         let expected_peers = validator_peers.into_iter().rev().collect::<Vec<_>>();
         assert_eq!(sorted_peers, expected_peers);
     }
 
     #[tokio::test]
-    async fn test_verify_message_sender() {
+    async fn test_terminate_unhealthy_subscriptions() {
+        // Create a consensus observer client
+        let network_id = NetworkId::Public;
+        let (peers_and_metadata, consensus_observer_client) =
+            create_consensus_observer_client(&[network_id]);
+
+        // Create a new subscription manager
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let db_reader = create_mock_db_reader();
+        let time_service = TimeService::mock();
+        let mut subscription_manager = SubscriptionManager::new(
+            consensus_observer_client,
+            consensus_observer_config,
+            None,
+            db_reader.clone(),
+            time_service.clone(),
+        );
+
+        // Create two new subscriptions
+        let subscription_peer_1 =
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true);
+        let subscription_peer_2 =
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true);
+        for peer in &[subscription_peer_1, subscription_peer_2] {
+            // Create the subscription
+            create_observer_subscription(
+                &mut subscription_manager,
+                consensus_observer_config,
+                db_reader.clone(),
+                *peer,
+                time_service.clone(),
+            );
+        }
+
+        // Terminate any unhealthy subscriptions and verify that both subscriptions are still healthy
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert!(terminated_subscriptions.is_empty());
+        assert_eq!(
+            subscription_manager.get_active_subscription_peers().len(),
+            2
+        );
+
+        // Create another subscription
+        let subscription_peer_3 =
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true);
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            subscription_peer_3,
+            TimeService::mock(), // Use a different time service (to avoid timeouts)
+        );
+
+        // Elapse time to simulate a timeout (on the first two subscriptions)
+        let mock_time_service = time_service.into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.max_subscription_timeout_ms + 1,
+        ));
+
+        // Terminate the unhealthy subscriptions and verify the first two subscriptions were terminated
+        let terminated_subscriptions =
+            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
+        assert_eq!(terminated_subscriptions.len(), 2);
+        assert_eq!(subscription_manager.get_active_subscription_peers(), vec![
+            subscription_peer_3
+        ]);
+
+        // Verify that both subscriptions were terminated due to a timeout
+        for (_, error) in terminated_subscriptions {
+            assert_matches!(error, Error::SubscriptionTimeout(_));
+        }
+    }
+
+    #[tokio::test]
+    async fn test_unsubscribe_from_peer() {
         // Create a consensus observer client
         let network_id = NetworkId::Public;
         let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]);
@@ -744,30 +978,212 @@ mod test {
             TimeService::mock(),
         );
 
-        // Check that message verification fails (we have no active subscription)
+        // Verify that no subscriptions are active
         assert!(subscription_manager
-            .verify_message_sender(PeerNetworkId::random())
-            .is_err());
+            .get_active_subscription_peers()
+            .is_empty());
 
         // Create a new subscription
-        let subscription_peer = PeerNetworkId::random();
+        let subscription_peer_1 = PeerNetworkId::random();
         create_observer_subscription(
             &mut subscription_manager,
             consensus_observer_config,
             db_reader.clone(),
-            subscription_peer,
+            subscription_peer_1,
             TimeService::mock(),
         );
 
-        // Check that message verification fails if the peer doesn't match the subscription
+        // Verify the subscription is active
         assert!(subscription_manager
-            .verify_message_sender(PeerNetworkId::random())
-            .is_err());
+            .get_active_subscription_peers()
+            .contains(&subscription_peer_1));
 
-        // Check that message verification passes if the peer matches the subscription
+        // Create another subscription
+        let subscription_peer_2 = PeerNetworkId::random();
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            subscription_peer_2,
+            TimeService::mock(),
+        );
+
+        // Verify the second subscription is active
         assert!(subscription_manager
-            .verify_message_sender(subscription_peer)
-            .is_ok());
+            .get_active_subscription_peers()
+            .contains(&subscription_peer_2));
+
+        // Unsubscribe from the first peer
+        subscription_manager.unsubscribe_from_peer(subscription_peer_1);
+
+        // Verify that the first subscription is no longer active
+        assert!(!subscription_manager
+            .get_active_subscription_peers()
+            .contains(&subscription_peer_1));
+
+        // Verify that only the second subscription is still active
+        assert!(subscription_manager
+            .get_active_subscription_peers()
+            .contains(&subscription_peer_2));
+        assert_eq!(
+            subscription_manager.get_active_subscription_peers().len(),
+            1
+        );
+    }
+
+    #[tokio::test]
+    async fn test_verify_message_for_subscription() {
+        // Create a consensus observer client
+        let network_id = NetworkId::Public;
+        let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]);
+
+        // Create a new subscription manager
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let db_reader = Arc::new(MockDatabaseReader::new());
+        let mut subscription_manager = SubscriptionManager::new(
+            consensus_observer_client,
+            consensus_observer_config,
+            None,
+            db_reader.clone(),
+            TimeService::mock(),
+        );
+
+        // Check that message verification fails (we have no active subscriptions)
+        check_message_verification_result(
+            &mut subscription_manager,
+            PeerNetworkId::random(),
+            false,
+        );
+
+        // Create a new subscription
+        let subscription_peer = PeerNetworkId::random();
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            subscription_peer,
+            TimeService::mock(),
+        );
+
+        // Check that message verification passes for the subscription
+        check_message_verification_result(&mut subscription_manager, subscription_peer, true);
+
+        // Create another subscription
+        let second_subscription_peer = PeerNetworkId::random();
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            second_subscription_peer,
+            TimeService::mock(),
+        );
+
+        // Check that message verification passes for the second subscription
+        check_message_verification_result(
+            &mut subscription_manager,
+            second_subscription_peer,
+            true,
+        );
+
+        // Check that message verification fails if the peer doesn't match either subscription
+        check_message_verification_result(
+            &mut subscription_manager,
+            PeerNetworkId::random(),
+            false,
+        );
+    }
+
+    /// Checks the result of verifying a message from a given peer
+    fn check_message_verification_result(
+        subscription_manager: &mut SubscriptionManager,
+        peer_network_id: PeerNetworkId,
+        pass_verification: bool,
+    ) {
+        // Verify the message for the given peer
+        let result = subscription_manager.verify_message_for_subscription(peer_network_id);
+
+        // Ensure the result matches the expected value
+        if pass_verification {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::InvalidMessageError(_)));
+        }
+    }
+
+    /// Checks the health of a subscription and verifies the connection status
+    fn check_subscription_connection(
+        subscription_manager: &mut SubscriptionManager,
+        subscription_peer: PeerNetworkId,
+        expect_connected: bool,
+    ) {
+        // Check the health of the subscription
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+        let result = subscription_manager
+            .check_subscription_health(&connected_peers_and_metadata, subscription_peer);
+
+        // Check the result based on the expected connection status
+        if expect_connected {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::SubscriptionDisconnected(_)));
+        }
+    }
+
+    /// Checks the health of a subscription and verifies the optimality status
+    fn check_subscription_optimality(
+        subscription_manager: &mut SubscriptionManager,
+        subscription_peer: PeerNetworkId,
+        expect_optimal: bool,
+    ) {
+        // Check the health of the subscription
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+        let result = subscription_manager
+            .check_subscription_health(&connected_peers_and_metadata, subscription_peer);
+
+        // Check the result based on the expected optimality status
+        if expect_optimal {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::SubscriptionSuboptimal(_)));
+        }
+    }
+
+    /// Checks the health of a subscription and verifies the progress status
+    fn check_subscription_progress(
+        subscription_manager: &mut SubscriptionManager,
+        subscription_peer: PeerNetworkId,
+        expect_progress: bool,
+    ) {
+        // Check the health of the subscription
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+        let result = subscription_manager
+            .check_subscription_health(&connected_peers_and_metadata, subscription_peer);
+
+        // Check the result based on the expected progress status
+        if expect_progress {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::SubscriptionProgressStopped(_)));
+        }
+    }
+
+    /// Checks the health of a subscription and verifies the timeout status
+    fn check_subscription_timeout(
+        subscription_manager: &mut SubscriptionManager,
+        subscription_peer: PeerNetworkId,
+        expect_timeout: bool,
+    ) {
+        // Check the health of the subscription
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+        let result = subscription_manager
+            .check_subscription_health(&connected_peers_and_metadata, subscription_peer);
+
+        // Check the result based on the expected timeout status
+        if expect_timeout {
+            assert!(result.is_ok());
+        } else {
+            assert_matches!(result, Err(Error::SubscriptionTimeout(_)));
+        }
     }
 
     /// Creates a new consensus observer client and a peers and metadata container
@@ -808,7 +1224,9 @@ mod test {
             subscription_peer,
             time_service,
         );
-        subscription_manager.active_observer_subscription = Some(observer_subscription);
+        subscription_manager
+            .active_observer_subscriptions
+            .insert(subscription_peer, observer_subscription);
     }
 
     /// Creates a new peer with the specified connection metadata
@@ -879,4 +1297,32 @@ mod test {
             .remove_peer_metadata(peer_network_id, connection_id)
             .unwrap();
     }
+
+    /// A simple helper method that sorts the given peers for a subscription
+    fn sort_subscription_peers(
+        subscription_manager: &mut SubscriptionManager,
+        unhealthy_subscription_peers: Vec<PeerNetworkId>,
+    ) -> Vec<PeerNetworkId> {
+        // Get the connected peers and metadata
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+
+        // Sort the peers for subscription requests
+        subscription_manager
+            .sort_peers_for_subscriptions(
+                connected_peers_and_metadata,
+                unhealthy_subscription_peers,
+            )
+            .unwrap()
+    }
+
+    /// A simple helper method that terminates any unhealthy subscriptions
+    fn terminate_any_unhealthy_subscriptions(
+        subscription_manager: &mut SubscriptionManager,
+    ) -> Vec<(PeerNetworkId, Error)> {
+        // Get the connected peers and metadata
+        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
+
+        // Terminate any unhealthy subscriptions
+        subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata)
+    }
 }

From 994bd3c1389b9c23c003798a7b304adf079e742a Mon Sep 17 00:00:00 2001
From: Greg Nazario <greg@gnazar.io>
Date: Tue, 17 Sep 2024 13:48:56 -0700
Subject: [PATCH 13/36] [cli] Add contribution guide (#14435)

---
 crates/aptos/CONTRIBUTING.md | 247 +++++++++++++++++++++++++++++++++++
 1 file changed, 247 insertions(+)
 create mode 100644 crates/aptos/CONTRIBUTING.md

diff --git a/crates/aptos/CONTRIBUTING.md b/crates/aptos/CONTRIBUTING.md
new file mode 100644
index 0000000000000..7bd0fe7d976ee
--- /dev/null
+++ b/crates/aptos/CONTRIBUTING.md
@@ -0,0 +1,247 @@
+# Aptos CLI Development Guide
+
+This is a list of design decisions and guidelines for adding commands to the Aptos CLI.
+
+## Command Groups
+
+Commands should be grouped into the existing categories. The current categories are:
+
+- account
+- config
+- genesis
+- governance
+- key
+- move
+- multisig
+- node
+- stake
+- update
+
+All categories must have a doc comment that describes the command. It must also derive `Parser` and `Subcommand`. For
+example:
+
+```rust
+/// Tool for interacting with accounts
+///
+/// This tool is used to create accounts, get information about the
+/// account's resources, and transfer resources between accounts.
+#[derive(Debug, Subcommand)]
+pub enum AccountTool {
+    Create(create::CreateAccount),
+    CreateResourceAccount(create_resource_account::CreateResourceAccount),
+    DeriveResourceAccountAddress(derive_resource_account::DeriveResourceAccount),
+    FundWithFaucet(fund::FundWithFaucet),
+    Balance(balance::Balance),
+    List(list::ListAccount),
+    LookupAddress(key_rotation::LookupAddress),
+    RotateKey(key_rotation::RotateKey),
+    Transfer(transfer::TransferCoins),
+}
+```
+
+Then it must also be added to the top level command structure:
+
+```rust
+/// Command Line Interface (CLI) for developing and interacting with the Aptos blockchain
+#[derive(Parser)]
+#[clap(name = "aptos", author, version, propagate_version = true, styles = aptos_cli_common::aptos_cli_style())]
+pub enum Tool {
+    #[clap(subcommand)]
+    Account(account::AccountTool),
+    #[clap(subcommand)]
+    Config(config::ConfigTool),
+    #[clap(subcommand)]
+    Genesis(genesis::GenesisTool),
+    #[clap(subcommand)]
+    Governance(governance::GovernanceTool),
+    Info(InfoTool),
+    Init(common::init::InitTool),
+    #[clap(subcommand)]
+    Key(op::key::KeyTool),
+    #[clap(subcommand)]
+    Move(move_tool::MoveTool),
+    #[clap(subcommand)]
+    Multisig(account::MultisigAccountTool),
+    #[clap(subcommand)]
+    Node(node::NodeTool),
+    #[clap(subcommand)]
+    Stake(stake::StakeTool),
+    #[clap(subcommand)]
+    Update(update::UpdateTool),
+}
+```
+
+## Commands
+
+A command is a single top level command for the CLI. The CLI command must complete it's action in the single command
+execution.
+
+### Command Names
+
+```rust
+/// Compiles a package and returns the associated ModuleIds
+#[derive(Parser)]
+pub struct CompilePackage {
+    /// Save the package metadata in the package's build directory
+    ///
+    /// If set, package metadata should be generated and stored in the package's build directory.
+    /// This metadata can be used to construct a transaction to publish a package.
+    #[clap(long)]
+    pub(crate) save_metadata: bool,
+
+    #[clap(flatten)]
+    pub(crate) included_artifacts_args: IncludedArtifactsArgs,
+    #[clap(flatten)]
+    pub(crate) move_options: MovePackageDir,
+}
+```
+
+Command names should be simple, identifiable, and easy to use. For example, compilation is grouped in `move` and uses
+the subcommand `compile`.
+
+```bash
+aptos move compile
+```
+
+Once the new command is created, it should have `#[derive(Parser)]` added above. Additionally, it will need to be added
+the higher level tool:
+
+```rust
+#[derive(Subcommand)]
+pub enum MoveTool {
+    #[clap(alias = "build")]
+    Compile(CompilePackage),
+    #[clap(alias = "build-script")]
+    CompileScript(CompileScript),
+    Init(Init),
+    // ...
+}
+
+impl MoveTool {
+    pub async fn execute(self) -> CliResult {
+        match self {
+            MoveTool::Compile(tool) => tool.execute_serialized().await,
+            MoveTool::CompileScript(tool) => tool.execute_serialized().await,
+            MoveTool::Init(tool) => tool.execute_serialized_success().await,
+        }
+    }
+}
+```
+
+Note that, there are two types of commands here `execute_serialized()` and `execute_serialized_success()`, if the
+command must be returning a value, then it should call `execute_serialized()`, which will convert the input type as JSON
+to `stdout`.
+
+Additionally, `alias` is allowed, but discouraged for new commands. This is mostly to provide either backwards
+compatibility or reduce confusion for new users.
+
+### Command flags
+
+```rust
+#[derive(Parser)]
+pub struct CompilePackage {
+    /// Save the package metadata in the package's build directory
+    ///
+    /// If set, package metadata should be generated and stored in the package's build directory.
+    /// This metadata can be used to construct a transaction to publish a package.
+    #[clap(long)]
+    pub(crate) save_metadata: bool,
+
+    // ...
+}
+```
+
+Command inputs should always be documented for help to show up in the CLI. for example, below is the example for
+`save_metadata`. They should be snake case, and will show up as a flag. Do not use `short` commands, as they can be
+confused between different commands.
+
+```bash
+aptos move compile --save-metadata
+```
+
+### Command flag groupings
+
+```rust
+/// Compiles a package and returns the associated ModuleIds
+#[derive(Parser)]
+pub struct CompilePackage {
+    // ...
+    #[clap(flatten)]
+    pub(crate) included_artifacts_args: IncludedArtifactsArgs,
+    #[clap(flatten)]
+    pub(crate) move_options: MovePackageDir,
+}
+```
+
+Command flags can be grouped into common structs to be used across multiple commands. These should be flattened by
+adding the struct associated and using `#[clap(flatten)]` like above. These should not have a doc comment, and any doc
+comments will not end up in the command. Instead, document the structs directly like so:
+
+```rust
+#[derive(Parser)]
+pub struct IncludedArtifactsArgs {
+    /// Artifacts to be generated when building the package
+    ///
+    /// Which artifacts to include in the package. This can be one of `none`, `sparse`, and
+    /// `all`. `none` is the most compact form and does not allow to reconstruct a source
+    /// package from chain; `sparse` is the minimal set of artifacts needed to reconstruct
+    /// a source package; `all` includes all available artifacts. The choice of included
+    /// artifacts heavily influences the size and therefore gas cost of publishing: `none`
+    /// is the size of bytecode alone; `sparse` is roughly 2 times as much; and `all` 3-4
+    /// as much.
+    #[clap(long, default_value_t = IncludedArtifacts::Sparse)]
+    pub(crate) included_artifacts: IncludedArtifacts,
+}
+```
+
+### Command Implementation
+
+```rust
+#[async_trait]
+impl CliCommand<Vec<String>> for CompilePackage {
+    fn command_name(&self) -> &'static str {
+        "CompilePackage"
+    }
+
+    async fn execute(self) -> CliTypedResult<Vec<String>> {
+        let build_options = BuildOptions {
+            install_dir: self.move_options.output_dir.clone(),
+            ..self
+                .included_artifacts_args
+                .included_artifacts
+                .build_options(
+                    self.move_options.dev,
+                    self.move_options.skip_fetch_latest_git_deps,
+                    self.move_options.named_addresses(),
+                    self.move_options.override_std.clone(),
+                    self.move_options.bytecode_version,
+                    self.move_options.compiler_version,
+                    self.move_options.language_version,
+                    self.move_options.skip_attribute_checks,
+                    self.move_options.check_test_code,
+                )
+        };
+        let pack = BuiltPackage::build(self.move_options.get_package_path()?, build_options)
+            .map_err(|e| CliError::MoveCompilationError(format!("{:#}", e)))?;
+        if self.save_metadata {
+            pack.extract_metadata_and_save()?;
+        }
+        let ids = pack
+            .modules()
+            .map(|m| m.self_id().to_string())
+            .collect::<Vec<_>>();
+        // TODO: Also say how many scripts are compiled
+        Ok(ids)
+    }
+}
+```
+
+Commands should implement the `CliCommand<T>` trait for the package. This allows it to be called upstream generically
+and `T` will automatically be serialized to JSON for the output. This allows for typed testing in unit tests, while
+still having output converted for the total CLI.
+
+It's an anti-pattern to `panic`, please avoid panicking, and instead provide `CliError` or `CliError` conversion for the
+current types.
+
+All output from the CLI should use `eprintln!()`, rather than `println!()`.  `stdout` is reserved for the JSON output at
+the end of the command, `stderr` is used for the rest of the output.

From 1f35406cbbb136ce7216539e678235074d14f078 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Wed, 11 Sep 2024 20:30:26 -0400
Subject: [PATCH 14/36] [Consensus Observer] Make subscription creation
 asynchronous.

---
 .../src/consensus_observer/observer/mod.rs    |   1 +
 .../observer/subscription.rs                  | 347 +-------
 .../observer/subscription_manager.rs          | 555 ++++--------
 .../observer/subscription_utils.rs            | 823 ++++++++++++++++++
 .../publisher/consensus_publisher.rs          |  20 +
 5 files changed, 1024 insertions(+), 722 deletions(-)
 create mode 100644 consensus/src/consensus_observer/observer/subscription_utils.rs

diff --git a/consensus/src/consensus_observer/observer/mod.rs b/consensus/src/consensus_observer/observer/mod.rs
index 35dd0ea2ec72e..4a4e5d42881a3 100644
--- a/consensus/src/consensus_observer/observer/mod.rs
+++ b/consensus/src/consensus_observer/observer/mod.rs
@@ -8,3 +8,4 @@ pub mod payload_store;
 pub mod pending_blocks;
 pub mod subscription;
 pub mod subscription_manager;
+pub mod subscription_utils;
diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs
index d3023da292d00..7b368fe3417c6 100644
--- a/consensus/src/consensus_observer/observer/subscription.rs
+++ b/consensus/src/consensus_observer/observer/subscription.rs
@@ -1,25 +1,17 @@
 // Copyright © Aptos Foundation
 // SPDX-License-Identifier: Apache-2.0
 
-use crate::consensus_observer::common::{
-    error::Error,
-    logging::{LogEntry, LogSchema},
-};
+use crate::consensus_observer::{common::error::Error, observer::subscription_utils};
 use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId};
-use aptos_logger::{info, warn};
-use aptos_network::{application::metadata::PeerMetadata, ProtocolId};
+use aptos_network::application::metadata::PeerMetadata;
 use aptos_storage_interface::DbReader;
 use aptos_time_service::{TimeService, TimeServiceTrait};
-use ordered_float::OrderedFloat;
 use std::{
-    collections::{BTreeMap, HashMap, HashSet},
+    collections::{HashMap, HashSet},
     sync::Arc,
     time::{Duration, Instant},
 };
 
-// A useful constant for representing the maximum ping latency
-const MAX_PING_LATENCY_SECS: f64 = 10_000.0;
-
 /// A single consensus observer subscription
 pub struct ConsensusObserverSubscription {
     // The configuration of the consensus observer
@@ -106,7 +98,8 @@ impl ConsensusObserverSubscription {
         self.last_optimality_check_time_and_peers = (time_now, current_connected_peers);
 
         // Sort the peers by subscription optimality
-        let sorted_peers = sort_peers_by_subscription_optimality(peers_and_metadata);
+        let sorted_peers =
+            subscription_utils::sort_peers_by_subscription_optimality(peers_and_metadata);
 
         // Verify that this peer is one of the most optimal peers
         let max_concurrent_subscriptions =
@@ -184,142 +177,17 @@ impl ConsensusObserverSubscription {
         Ok(())
     }
 
+    /// Returns the peer network id of the subscription
+    pub fn get_peer_network_id(&self) -> PeerNetworkId {
+        self.peer_network_id
+    }
+
     /// Updates the last message receive time to the current time
     pub fn update_last_message_receive_time(&mut self) {
         self.last_message_receive_time = self.time_service.now();
     }
 }
 
-/// Gets the distance from the validators for the specified peer from the peer metadata
-fn get_distance_for_peer(
-    peer_network_id: &PeerNetworkId,
-    peer_metadata: &PeerMetadata,
-) -> Option<u64> {
-    // Get the distance for the peer
-    let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata();
-    let distance = peer_monitoring_metadata
-        .latest_network_info_response
-        .as_ref()
-        .map(|response| response.distance_from_validators);
-
-    // If the distance is missing, log a warning
-    if distance.is_none() {
-        warn!(
-            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                "Unable to get distance for peer! Peer: {:?}",
-                peer_network_id
-            ))
-        );
-    }
-
-    distance
-}
-
-/// Gets the latency for the specified peer from the peer metadata
-fn get_latency_for_peer(
-    peer_network_id: &PeerNetworkId,
-    peer_metadata: &PeerMetadata,
-) -> Option<f64> {
-    // Get the latency for the peer
-    let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata();
-    let latency = peer_monitoring_metadata.average_ping_latency_secs;
-
-    // If the latency is missing, log a warning
-    if latency.is_none() {
-        warn!(
-            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                "Unable to get latency for peer! Peer: {:?}",
-                peer_network_id
-            ))
-        );
-    }
-
-    latency
-}
-
-/// Sorts the peers by subscription optimality (in descending order of
-/// optimality). This requires: (i) sorting the peers by distance from the
-/// validator set and ping latency (lower values are more optimal); and (ii)
-/// filtering out peers that don't support consensus observer.
-///
-/// Note: we prioritize distance over latency as we want to avoid close
-/// but not up-to-date peers. If peers don't have sufficient metadata
-/// for sorting, they are given a lower priority.
-pub fn sort_peers_by_subscription_optimality(
-    peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
-) -> Vec<PeerNetworkId> {
-    // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)]
-    let mut unsupported_peers = Vec::new();
-    let mut peers_and_latencies_by_distance = BTreeMap::new();
-    for (peer_network_id, peer_metadata) in peers_and_metadata {
-        // Verify that the peer supports consensus observer
-        if !supports_consensus_observer(peer_metadata) {
-            unsupported_peers.push(*peer_network_id);
-            continue; // Skip the peer
-        }
-
-        // Get the distance and latency for the peer
-        let distance = get_distance_for_peer(peer_network_id, peer_metadata);
-        let latency = get_latency_for_peer(peer_network_id, peer_metadata);
-
-        // If the distance is not found, use the maximum distance
-        let distance =
-            distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS);
-
-        // If the latency is not found, use a large latency
-        let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS);
-
-        // Add the peer and latency to the distance group
-        peers_and_latencies_by_distance
-            .entry(distance)
-            .or_insert_with(Vec::new)
-            .push((*peer_network_id, OrderedFloat(latency)));
-    }
-
-    // If there are peers that don't support consensus observer, log them
-    if !unsupported_peers.is_empty() {
-        info!(
-            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                "Found {} peers that don't support consensus observer! Peers: {:?}",
-                unsupported_peers.len(),
-                unsupported_peers
-            ))
-        );
-    }
-
-    // Sort the peers by distance and latency. Note: BTreeMaps are
-    // sorted by key, so the entries will be sorted by distance in ascending order.
-    let mut sorted_peers = Vec::new();
-    for (_, mut peers_and_latencies) in peers_and_latencies_by_distance {
-        // Sort the peers by latency
-        peers_and_latencies.sort_by_key(|(_, latency)| *latency);
-
-        // Add the peers to the sorted list (in sorted order)
-        sorted_peers.extend(
-            peers_and_latencies
-                .into_iter()
-                .map(|(peer_network_id, _)| peer_network_id),
-        );
-    }
-
-    // Log the sorted peers
-    info!(
-        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-            "Sorted {} peers by subscription optimality! Peers: {:?}",
-            sorted_peers.len(),
-            sorted_peers
-        ))
-    );
-
-    sorted_peers
-}
-
-/// Returns true iff the peer metadata indicates support for consensus observer
-fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool {
-    peer_metadata.supports_protocol(ProtocolId::ConsensusObserver)
-        && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc)
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -328,10 +196,9 @@ mod test {
     use aptos_network::{
         protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet},
         transport::{ConnectionId, ConnectionMetadata},
+        ProtocolId,
     };
-    use aptos_peer_monitoring_service_types::{
-        response::NetworkInformationResponse, PeerMonitoringMetadata,
-    };
+    use aptos_peer_monitoring_service_types::PeerMonitoringMetadata;
     use aptos_storage_interface::Result;
     use aptos_types::{network_address::NetworkAddress, transaction::Version};
     use claims::assert_matches;
@@ -735,117 +602,6 @@ mod test {
         assert_eq!(subscription.last_message_receive_time, current_time);
     }
 
-    #[test]
-    fn test_sort_peers_by_distance_and_latency() {
-        // Sort an empty list of peers
-        let peers_and_metadata = HashMap::new();
-        assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty());
-
-        // Create a list of peers with empty metadata
-        let peers_and_metadata = create_peers_and_metadata(true, true, true, 10);
-
-        // Sort the peers and verify the results
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers.len(), 10);
-
-        // Create a list of peers with valid metadata
-        let peers_and_metadata = create_peers_and_metadata(false, false, true, 10);
-
-        // Sort the peers
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-
-        // Verify the order of the peers
-        verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers);
-        assert_eq!(sorted_peers.len(), 10);
-
-        // Create a list of peers with and without metadata
-        let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10);
-        peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10));
-        peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10));
-        peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10));
-
-        // Sort the peers
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers.len(), 40);
-
-        // Verify the order of the first 20 peers
-        let (first_20_peers, sorted_peers) = sorted_peers.split_at(20);
-        verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers);
-
-        // Verify that the next 10 peers only have latency metadata
-        let (next_10_peers, sorted_peers) = sorted_peers.split_at(10);
-        for sorted_peer in next_10_peers {
-            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
-            assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none());
-            assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some());
-        }
-
-        // Verify that the last 10 peers have no metadata
-        let (last_10_peers, remaining_peers) = sorted_peers.split_at(10);
-        for sorted_peer in last_10_peers {
-            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
-            assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none());
-            assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none());
-        }
-        assert!(remaining_peers.is_empty());
-    }
-
-    #[test]
-    fn test_sort_peers_by_distance_and_latency_filter() {
-        // Sort an empty list of peers
-        let peers_and_metadata = HashMap::new();
-        assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty());
-
-        // Create a list of peers with empty metadata (with consensus observer support)
-        let peers_and_metadata = create_peers_and_metadata(true, true, true, 10);
-
-        // Sort the peers and verify the results
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers.len(), 10);
-
-        // Create a list of peers with empty metadata (without consensus observer support)
-        let peers_and_metadata = create_peers_and_metadata(true, true, false, 10);
-
-        // Sort the peers and verify the results
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert!(sorted_peers.is_empty());
-
-        // Create a list of peers with valid metadata (without consensus observer support)
-        let peers_and_metadata = create_peers_and_metadata(false, false, false, 10);
-
-        // Sort the peers and verify the results
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert!(sorted_peers.is_empty());
-
-        // Create a list of peers with empty metadata (with and without consensus observer support)
-        let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5);
-        peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50));
-
-        // Sort the peers and verify the results (only the supported peers are sorted)
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers.len(), 5);
-
-        // Create a list of peers with valid metadata (with and without consensus observer support)
-        let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50);
-        peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10));
-
-        // Sort the peers and verify the results (only the supported peers are sorted)
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers.len(), 50);
-
-        // Create a list of peers with valid metadata (with and without consensus observer support)
-        let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1);
-        let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1);
-        let mut peers_and_metadata = HashMap::new();
-        peers_and_metadata.extend(supported_peer_and_metadata.clone());
-        peers_and_metadata.extend(unsupported_peer_and_metadata);
-
-        // Sort the peers and verify the results (only the supported peer is sorted)
-        let supported_peer = supported_peer_and_metadata.keys().next().unwrap();
-        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
-        assert_eq!(sorted_peers, vec![*supported_peer]);
-    }
-
     /// Adds metadata for the specified peer to the map of peers and metadata
     fn add_metadata_for_peer(
         peers_and_metadata: &mut HashMap<PeerNetworkId, PeerMetadata>,
@@ -901,85 +657,6 @@ mod test {
         }
     }
 
-    /// Creates a new peer and metadata for testing
-    fn create_peer_and_metadata(
-        latency: Option<f64>,
-        distance_from_validators: Option<u64>,
-        support_consensus_observer: bool,
-    ) -> (PeerNetworkId, PeerMetadata) {
-        // Create a random peer
-        let peer_network_id = PeerNetworkId::random();
-
-        // Create a new peer metadata with the given latency and distance
-        let connection_metadata =
-            create_connection_metadata(peer_network_id, support_consensus_observer);
-        let network_information_response =
-            distance_from_validators.map(|distance| NetworkInformationResponse {
-                connected_peers: BTreeMap::new(),
-                distance_from_validators: distance,
-            });
-        let peer_monitoring_metadata =
-            PeerMonitoringMetadata::new(latency, None, network_information_response, None, None);
-        let peer_metadata =
-            PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata);
-
-        (peer_network_id, peer_metadata)
-    }
-
-    /// Creates a list of peers and metadata for testing
-    fn create_peers_and_metadata(
-        empty_latency: bool,
-        empty_distance: bool,
-        support_consensus_observer: bool,
-        num_peers: u64,
-    ) -> HashMap<PeerNetworkId, PeerMetadata> {
-        let mut peers_and_metadata = HashMap::new();
-        for i in 1..num_peers + 1 {
-            // Determine the distance for the peer
-            let distance = if empty_distance { None } else { Some(i) };
-
-            // Determine the latency for the peer
-            let latency = if empty_latency { None } else { Some(i as f64) };
-
-            // Create a new peer and metadata
-            let (peer_network_id, peer_metadata) =
-                create_peer_and_metadata(latency, distance, support_consensus_observer);
-            peers_and_metadata.insert(peer_network_id, peer_metadata);
-        }
-        peers_and_metadata
-    }
-
-    /// Verifies that the distance and latencies for the peers are in
-    /// increasing order (with the distance taking precedence over the latency).
-    fn verify_increasing_distance_latencies(
-        peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
-        sorted_peers: &[PeerNetworkId],
-    ) {
-        let mut previous_latency = None;
-        let mut previous_distance = 0;
-        for sorted_peer in sorted_peers {
-            // Get the distance and latency for the peer
-            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
-            let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap();
-            let latency = get_latency_for_peer(sorted_peer, peer_metadata);
-
-            // Verify the order of the peers
-            if distance == previous_distance {
-                if let Some(latency) = latency {
-                    if let Some(previous_latency) = previous_latency {
-                        assert!(latency >= previous_latency);
-                    }
-                }
-            } else {
-                assert!(distance > previous_distance);
-            }
-
-            // Update the previous latency and distance
-            previous_latency = latency;
-            previous_distance = distance;
-        }
-    }
-
     /// Verifies that the last check time and peers are as expected
     fn verify_last_check_time_and_peers(
         subscription: &ConsensusObserverSubscription,
diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index e63fdfc68fa23..16cd756c176dd 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -13,21 +13,27 @@ use crate::consensus_observer::{
             ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse,
         },
     },
-    observer::{subscription, subscription::ConsensusObserverSubscription},
+    observer::{subscription::ConsensusObserverSubscription, subscription_utils},
     publisher::consensus_publisher::ConsensusPublisher,
 };
 use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId};
+use aptos_infallible::Mutex;
 use aptos_logger::{error, info, warn};
 use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata};
 use aptos_storage_interface::DbReader;
 use aptos_time_service::TimeService;
 use itertools::Itertools;
 use std::{collections::HashMap, sync::Arc};
+use tokio::task::JoinHandle;
 
 /// The manager for consensus observer subscriptions
 pub struct SubscriptionManager {
     // The currently active set of consensus observer subscriptions
-    active_observer_subscriptions: HashMap<PeerNetworkId, ConsensusObserverSubscription>,
+    active_observer_subscriptions:
+        Arc<Mutex<HashMap<PeerNetworkId, ConsensusObserverSubscription>>>,
+
+    // The active subscription creation task (if one is currently running)
+    active_subscription_creation_task: Arc<Mutex<Option<JoinHandle<()>>>>,
 
     // The consensus observer client to send network messages
     consensus_observer_client:
@@ -57,7 +63,8 @@ impl SubscriptionManager {
         time_service: TimeService,
     ) -> Self {
         Self {
-            active_observer_subscriptions: HashMap::new(),
+            active_observer_subscriptions: Arc::new(Mutex::new(HashMap::new())),
+            active_subscription_creation_task: Arc::new(Mutex::new(None)),
             consensus_observer_client,
             consensus_observer_config,
             consensus_publisher,
@@ -73,7 +80,12 @@ impl SubscriptionManager {
         connected_peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
         peer_network_id: PeerNetworkId,
     ) -> Result<(), Error> {
-        match self.active_observer_subscriptions.get_mut(&peer_network_id) {
+        // Get the active subscription for the peer
+        let mut active_observer_subscriptions = self.active_observer_subscriptions.lock();
+        let active_subscription = active_observer_subscriptions.get_mut(&peer_network_id);
+
+        // Check the health of the subscription
+        match active_subscription {
             Some(active_subscription) => {
                 // Verify the peer is still connected
                 if !connected_peers_and_metadata.contains_key(&peer_network_id) {
@@ -121,210 +133,39 @@ impl SubscriptionManager {
             && num_terminated_subscriptions == initial_subscription_peers.len();
 
         // Calculate the number of new subscriptions to create
+        let remaining_subscription_peers = self.get_active_subscription_peers();
         let max_concurrent_subscriptions =
             self.consensus_observer_config.max_concurrent_subscriptions as usize;
         let num_subscriptions_to_create =
-            max_concurrent_subscriptions.saturating_sub(self.active_observer_subscriptions.len());
-
-        // Create the new subscriptions (if required)
-        let terminated_subscription_peers = terminated_subscriptions
-            .iter()
-            .map(|(peer, _)| *peer)
-            .collect();
-        let new_subscription_peers = self
-            .create_new_subscriptions(
-                connected_peers_and_metadata,
-                num_subscriptions_to_create,
-                terminated_subscription_peers,
-            )
-            .await;
+            max_concurrent_subscriptions.saturating_sub(remaining_subscription_peers.len());
 
-        // Log a warning if we failed to create as many subscriptions as requested
-        let num_subscriptions_created = new_subscription_peers.len();
-        if num_subscriptions_created < num_subscriptions_to_create {
-            warn!(
-                LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                    "Failed to create the requested number of subscriptions! Number of subscriptions \
-                    requested: {:?}, number of subscriptions created: {:?}.",
-                    num_subscriptions_to_create,
-                    num_subscriptions_created
-                ))
-            );
-        }
+        // Update the total subscription metrics
+        update_total_subscription_metrics(&remaining_subscription_peers);
 
-        // Update the subscription metrics
-        self.update_subscription_metrics(&new_subscription_peers, terminated_subscriptions);
+        // Spawn a task to create the new subscriptions (asynchronously)
+        self.spawn_subscription_creation_task(
+            num_subscriptions_to_create,
+            remaining_subscription_peers,
+            terminated_subscriptions,
+            connected_peers_and_metadata,
+        )
+        .await;
 
         // Return an error if all subscriptions were terminated
         if all_subscriptions_terminated {
             Err(Error::SubscriptionsReset(format!(
-                "All subscriptions were unhealthy and terminated! Number of terminated \
-                    subscriptions: {:?}, number of new subscriptions created: {:?}.",
-                num_terminated_subscriptions, num_subscriptions_created,
+                "All {:?} subscriptions were unhealthy and terminated!",
+                num_terminated_subscriptions,
             )))
         } else {
             Ok(())
         }
     }
 
-    /// Attempts to create the given number of new subscriptions
-    /// and returns the peer IDs of the newly created subscriptions.
-    /// Any `unhealthy_subscription_peers` are excluded from selection.
-    async fn create_new_subscriptions(
-        &mut self,
-        connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
-        num_subscriptions_to_create: usize,
-        unhealthy_subscription_peers: Vec<PeerNetworkId>,
-    ) -> Vec<PeerNetworkId> {
-        // Return early if we don't need to create any new subscriptions
-        if num_subscriptions_to_create == 0 {
-            return vec![];
-        }
-
-        // Sort the potential peers for subscription requests
-        let mut sorted_potential_peers = match self.sort_peers_for_subscriptions(
-            connected_peers_and_metadata,
-            unhealthy_subscription_peers,
-        ) {
-            Some(sorted_peers) => sorted_peers,
-            None => {
-                error!(LogSchema::new(LogEntry::ConsensusObserver)
-                    .message("Failed to sort peers for subscription requests!"));
-                return vec![];
-            },
-        };
-
-        // Verify that we have potential peers to subscribe to
-        if sorted_potential_peers.is_empty() {
-            warn!(LogSchema::new(LogEntry::ConsensusObserver)
-                .message("There are no potential peers to subscribe to!"));
-            return vec![];
-        }
-
-        // Go through the potential peers and attempt to create new subscriptions
-        let mut created_subscription_peers = vec![];
-        for _ in 0..num_subscriptions_to_create {
-            // If there are no peers left to subscribe to, return early
-            if sorted_potential_peers.is_empty() {
-                info!(
-                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                        "There are no more potential peers to subscribe to! \
-                    Num created subscriptions: {:?}",
-                        created_subscription_peers.len()
-                    ))
-                );
-                break;
-            }
-
-            // Attempt to create a subscription
-            let (subscription_peer, failed_subscription_peers) = self
-                .create_single_subscription(sorted_potential_peers.clone())
-                .await;
-
-            // Remove the failed peers from the sorted list
-            sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer));
-
-            // Process a successful subscription creation
-            if let Some(subscription_peer) = subscription_peer {
-                // Add the peer to the list of created subscriptions
-                created_subscription_peers.push(subscription_peer);
-
-                // Remove the peer from the sorted list (for the next selection)
-                sorted_potential_peers.retain(|peer| peer != &subscription_peer);
-            }
-        }
-
-        // Return the list of created subscriptions
-        created_subscription_peers
-    }
-
-    /// Attempts to create a new subscription to a single peer from
-    /// the sorted list of potential peers. If a new subscription is
-    /// successfully created, the peer is returned. Likewise, any
-    /// peers with failed subscription attempts are also returned.
-    async fn create_single_subscription(
-        &mut self,
-        sorted_potential_peers: Vec<PeerNetworkId>,
-    ) -> (Option<PeerNetworkId>, Vec<PeerNetworkId>) {
-        let mut peers_with_failed_attempts = vec![];
-        for potential_peer in sorted_potential_peers {
-            // Log the subscription attempt
-            info!(
-                LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                    "Attempting to subscribe to potential peer: {}!",
-                    potential_peer
-                ))
-            );
-
-            // Send a subscription request to the peer and wait for the response.
-            // TODO: we should make this non-blocking!
-            let subscription_request = ConsensusObserverRequest::Subscribe;
-            let request_timeout_ms = self.consensus_observer_config.network_request_timeout_ms;
-            let response = self
-                .consensus_observer_client
-                .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms)
-                .await;
-
-            // Process the response and update the active subscription
-            match response {
-                Ok(ConsensusObserverResponse::SubscribeAck) => {
-                    // Log the successful subscription
-                    info!(
-                        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                            "Successfully subscribed to peer: {}!",
-                            potential_peer
-                        ))
-                    );
-
-                    // Create the new subscription
-                    let subscription = ConsensusObserverSubscription::new(
-                        self.consensus_observer_config,
-                        self.db_reader.clone(),
-                        potential_peer,
-                        self.time_service.clone(),
-                    );
-
-                    // Add the subscription to the active subscriptions
-                    self.active_observer_subscriptions
-                        .insert(potential_peer, subscription);
-
-                    // Return the successful subscription peer
-                    return (Some(potential_peer), peers_with_failed_attempts);
-                },
-                Ok(response) => {
-                    // We received an invalid response
-                    warn!(
-                        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                            "Got unexpected response type for subscription request: {:?}",
-                            response.get_label()
-                        ))
-                    );
-
-                    // Add the peer to the list of failed attempts
-                    peers_with_failed_attempts.push(potential_peer);
-                },
-                Err(error) => {
-                    // We encountered an error while sending the request
-                    error!(
-                        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                            "Failed to send subscription request to peer: {}! Error: {:?}",
-                            potential_peer, error
-                        ))
-                    );
-
-                    // Add the peer to the list of failed attempts
-                    peers_with_failed_attempts.push(potential_peer);
-                },
-            }
-        }
-
-        // We failed to create a new subscription
-        (None, peers_with_failed_attempts)
-    }
-
     /// Returns the currently active subscription peers
     fn get_active_subscription_peers(&self) -> Vec<PeerNetworkId> {
-        self.active_observer_subscriptions.keys().cloned().collect()
+        let active_observer_subscriptions = self.active_observer_subscriptions.lock();
+        active_observer_subscriptions.keys().cloned().collect()
     }
 
     /// Gets the connected peers and metadata. If an error
@@ -347,38 +188,89 @@ impl SubscriptionManager {
             })
     }
 
-    /// Produces a list of sorted peers to service our subscription requests.
-    /// Note: if `unhealthy_subscription_peers` are provided, they will be excluded
-    /// from the selection process. Likewise, all peers currently subscribed to us
-    /// will be excluded from the selection process.
-    fn sort_peers_for_subscriptions(
+    /// Spawns a new subscription creation task to create
+    /// the specified number of new subscriptions.
+    async fn spawn_subscription_creation_task(
         &mut self,
-        mut connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
-        unhealthy_subscription_peers: Vec<PeerNetworkId>,
-    ) -> Option<Vec<PeerNetworkId>> {
-        // Remove any peers we're already subscribed to
-        for active_subscription_peer in self.get_active_subscription_peers() {
-            let _ = connected_peers_and_metadata.remove(&active_subscription_peer);
+        num_subscriptions_to_create: usize,
+        active_subscription_peers: Vec<PeerNetworkId>,
+        terminated_subscriptions: Vec<(PeerNetworkId, Error)>,
+        connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+    ) {
+        // If there are no new subscriptions to create, return early
+        if num_subscriptions_to_create == 0 {
+            return;
         }
 
-        // Remove any unhealthy subscription peers
-        for unhealthy_peer in unhealthy_subscription_peers {
-            let _ = connected_peers_and_metadata.remove(&unhealthy_peer);
+        // If there is an active subscription creation task, return early
+        if let Some(subscription_creation_task) = &*self.active_subscription_creation_task.lock() {
+            if !subscription_creation_task.is_finished() {
+                return; // The task is still running
+            }
         }
 
-        // Remove any peers that are currently subscribed to us
-        if let Some(consensus_publisher) = &self.consensus_publisher {
-            for peer_network_id in consensus_publisher.get_active_subscribers() {
-                let _ = connected_peers_and_metadata.remove(&peer_network_id);
+        // Clone the shared state for the task
+        let active_observer_subscriptions = self.active_observer_subscriptions.clone();
+        let consensus_observer_config = self.consensus_observer_config;
+        let consensus_observer_client = self.consensus_observer_client.clone();
+        let consensus_publisher = self.consensus_publisher.clone();
+        let db_reader = self.db_reader.clone();
+        let time_service = self.time_service.clone();
+
+        // Otherwise, we should spawn a new subscription creation task
+        let subscription_creation_task = tokio::spawn(async move {
+            // Identify the terminated subscription peers
+            let terminated_subscription_peers = terminated_subscriptions
+                .iter()
+                .map(|(peer, _)| *peer)
+                .collect();
+
+            // Create the new subscriptions
+            let new_subscriptions = subscription_utils::create_new_subscriptions(
+                consensus_observer_config,
+                consensus_observer_client,
+                consensus_publisher,
+                db_reader,
+                time_service,
+                connected_peers_and_metadata,
+                num_subscriptions_to_create,
+                active_subscription_peers,
+                terminated_subscription_peers,
+            )
+            .await;
+
+            // Identify the new subscription peers
+            let new_subscription_peers = new_subscriptions
+                .iter()
+                .map(|subscription| subscription.get_peer_network_id())
+                .collect::<Vec<_>>();
+
+            // Add the new subscriptions to the list of active subscriptions
+            for subscription in new_subscriptions {
+                active_observer_subscriptions
+                    .lock()
+                    .insert(subscription.get_peer_network_id(), subscription);
             }
-        }
 
-        // Sort the peers by subscription optimality
-        let sorted_peers =
-            subscription::sort_peers_by_subscription_optimality(&connected_peers_and_metadata);
+            // Log a warning if we failed to create as many subscriptions as requested
+            let num_subscriptions_created = new_subscription_peers.len();
+            if num_subscriptions_created < num_subscriptions_to_create {
+                warn!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "Failed to create the requested number of subscriptions! Number of subscriptions \
+                        requested: {:?}, number of subscriptions created: {:?}.",
+                        num_subscriptions_to_create,
+                        num_subscriptions_created
+                    ))
+                );
+            }
+
+            // Update the subscription change metrics
+            update_subscription_change_metrics(new_subscription_peers, terminated_subscriptions);
+        });
 
-        // Return the sorted peers
-        Some(sorted_peers)
+        // Update the active subscription creation task
+        *self.active_subscription_creation_task.lock() = Some(subscription_creation_task);
     }
 
     /// Terminates any unhealthy subscriptions and returns the list of terminated subscriptions
@@ -414,7 +306,9 @@ impl SubscriptionManager {
     /// Unsubscribes from the given peer by sending an unsubscribe request
     fn unsubscribe_from_peer(&mut self, peer_network_id: PeerNetworkId) {
         // Remove the peer from the active subscriptions
-        self.active_observer_subscriptions.remove(&peer_network_id);
+        self.active_observer_subscriptions
+            .lock()
+            .remove(&peer_network_id);
 
         // Send an unsubscribe request to the peer and process the response.
         // Note: we execute this asynchronously, as we don't need to wait for the response.
@@ -463,65 +357,68 @@ impl SubscriptionManager {
         });
     }
 
-    /// Updates the subscription creation and termination metrics
-    fn update_subscription_metrics(
-        &self,
-        new_subscription_peers: &[PeerNetworkId],
-        terminated_subscription_peers: Vec<(PeerNetworkId, Error)>,
-    ) {
-        // Update the created subscriptions metrics
-        for peer_network_id in new_subscription_peers {
-            metrics::increment_counter(
-                &metrics::OBSERVER_CREATED_SUBSCRIPTIONS,
-                metrics::CREATED_SUBSCRIPTION_LABEL,
-                peer_network_id,
-            );
-        }
-
-        // Update the terminated subscriptions metrics
-        for (peer_network_id, termination_reason) in terminated_subscription_peers {
-            metrics::increment_counter(
-                &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS,
-                termination_reason.get_label(),
-                &peer_network_id,
-            );
-        }
-
-        // Set the number of active subscriptions (grouped by network ID)
-        let active_subscription_peers = self.get_active_subscription_peers();
-        for (network_id, active_subscription_peers) in &active_subscription_peers
-            .iter()
-            .chunk_by(|peer_network_id| peer_network_id.network_id())
-        {
-            metrics::set_gauge(
-                &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS,
-                &network_id,
-                active_subscription_peers.collect::<Vec<_>>().len() as i64,
-            );
-        }
-    }
-
-    /// Verifies that the message is from an active subscription.
-    /// If not, an error is returned.
+    /// Verifies that the message is from an active
+    /// subscription. If not, an error is returned.
     pub fn verify_message_for_subscription(
         &mut self,
         message_sender: PeerNetworkId,
     ) -> Result<(), Error> {
-        match self.active_observer_subscriptions.get_mut(&message_sender) {
-            Some(active_subscription) => {
-                // The message is from an active subscription (update the last message time)
-                active_subscription.update_last_message_receive_time();
-                Ok(())
-            },
-            None => {
-                // The message is not from an active subscription (send another unsubscribe request)
-                self.unsubscribe_from_peer(message_sender);
-                Err(Error::InvalidMessageError(format!(
-                    "Received message from unexpected peer, and not an active subscription: {}!",
-                    message_sender
-                )))
-            },
+        // Check if the message is from an active subscription
+        if let Some(active_subscription) = self
+            .active_observer_subscriptions
+            .lock()
+            .get_mut(&message_sender)
+        {
+            // Update the last message receive time and return early
+            active_subscription.update_last_message_receive_time();
+            return Ok(());
         }
+
+        // Otherwise, the message is not from an active subscription.
+        // Send another unsubscribe request, and return an error.
+        self.unsubscribe_from_peer(message_sender);
+        Err(Error::InvalidMessageError(format!(
+            "Received message from unexpected peer, and not an active subscription: {}!",
+            message_sender
+        )))
+    }
+}
+
+/// Updates the subscription creation and termination metrics
+fn update_subscription_change_metrics(
+    new_subscription_peers: Vec<PeerNetworkId>,
+    terminated_subscription_peers: Vec<(PeerNetworkId, Error)>,
+) {
+    // Update the created subscriptions metrics
+    for peer_network_id in new_subscription_peers {
+        metrics::increment_counter(
+            &metrics::OBSERVER_CREATED_SUBSCRIPTIONS,
+            metrics::CREATED_SUBSCRIPTION_LABEL,
+            &peer_network_id,
+        );
+    }
+
+    // Update the terminated subscriptions metrics
+    for (peer_network_id, termination_reason) in terminated_subscription_peers {
+        metrics::increment_counter(
+            &metrics::OBSERVER_TERMINATED_SUBSCRIPTIONS,
+            termination_reason.get_label(),
+            &peer_network_id,
+        );
+    }
+}
+
+/// Updates the total subscription metrics (grouped by network ID)
+fn update_total_subscription_metrics(active_subscription_peers: &[PeerNetworkId]) {
+    for (network_id, active_subscription_peers) in &active_subscription_peers
+        .iter()
+        .chunk_by(|peer_network_id| peer_network_id.network_id())
+    {
+        metrics::set_gauge(
+            &metrics::OBSERVER_NUM_ACTIVE_SUBSCRIPTIONS,
+            &network_id,
+            active_subscription_peers.collect::<Vec<_>>().len() as i64,
+        );
     }
 }
 
@@ -800,92 +697,6 @@ mod test {
             .is_empty());
     }
 
-    #[tokio::test]
-    async fn test_sort_peers_for_subscriptions() {
-        // Create a consensus observer client
-        let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
-        let (peers_and_metadata, consensus_observer_client) =
-            create_consensus_observer_client(network_ids);
-
-        // Create a new subscription manager
-        let consensus_observer_config = ConsensusObserverConfig::default();
-        let db_reader = create_mock_db_reader();
-        let mut subscription_manager = SubscriptionManager::new(
-            consensus_observer_client,
-            consensus_observer_config,
-            None,
-            db_reader.clone(),
-            TimeService::mock(),
-        );
-
-        // Sort the peers and verify that no peers are returned
-        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
-        assert!(sorted_peers.is_empty());
-
-        // Add a connected validator peer, VFN peer and public peer
-        for network_id in network_ids {
-            let distance_from_validators = match network_id {
-                NetworkId::Validator => 0,
-                NetworkId::Vfn => 1,
-                NetworkId::Public => 2,
-            };
-            create_peer_and_connection(
-                *network_id,
-                peers_and_metadata.clone(),
-                distance_from_validators,
-                None,
-                true,
-            );
-        }
-
-        // Sort the peers and verify the ordering (according to distance)
-        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
-        assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator);
-        assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn);
-        assert_eq!(sorted_peers[2].network_id(), NetworkId::Public);
-        assert_eq!(sorted_peers.len(), 3);
-
-        // Sort the peers, but mark the validator as unhealthy (so it's ignored)
-        let sorted_peer_subset =
-            sort_subscription_peers(&mut subscription_manager, vec![sorted_peers[0]]);
-        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn);
-        assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public);
-        assert_eq!(sorted_peer_subset.len(), 2);
-
-        // Sort the peers, but mark the VFN and validator as unhealthy (so they're ignored)
-        let sorted_peer_subset = sort_subscription_peers(&mut subscription_manager, vec![
-            sorted_peers[0],
-            sorted_peers[1],
-        ]);
-        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public);
-        assert_eq!(sorted_peer_subset.len(), 1);
-
-        // Remove all the peers and verify that no peers are returned upon sorting
-        for peer_network_id in sorted_peers {
-            remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id);
-        }
-        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
-        assert!(sorted_peers.is_empty());
-
-        // Add multiple validator peers, with different latencies
-        let mut validator_peers = vec![];
-        for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] {
-            let validator_peer = create_peer_and_connection(
-                NetworkId::Validator,
-                peers_and_metadata.clone(),
-                0,
-                Some(ping_latency_secs),
-                true,
-            );
-            validator_peers.push(validator_peer);
-        }
-
-        // Sort the peers and verify the ordering (according to latency)
-        let sorted_peers = sort_subscription_peers(&mut subscription_manager, vec![]);
-        let expected_peers = validator_peers.into_iter().rev().collect::<Vec<_>>();
-        assert_eq!(sorted_peers, expected_peers);
-    }
-
     #[tokio::test]
     async fn test_terminate_unhealthy_subscriptions() {
         // Create a consensus observer client
@@ -1226,6 +1037,7 @@ mod test {
         );
         subscription_manager
             .active_observer_subscriptions
+            .lock()
             .insert(subscription_peer, observer_subscription);
     }
 
@@ -1284,37 +1096,6 @@ mod test {
         peer_network_id
     }
 
-    /// Removes the peer and connection metadata for the given peer
-    fn remove_peer_and_connection(
-        peers_and_metadata: Arc<PeersAndMetadata>,
-        peer_network_id: PeerNetworkId,
-    ) {
-        let peer_metadata = peers_and_metadata
-            .get_metadata_for_peer(peer_network_id)
-            .unwrap();
-        let connection_id = peer_metadata.get_connection_metadata().connection_id;
-        peers_and_metadata
-            .remove_peer_metadata(peer_network_id, connection_id)
-            .unwrap();
-    }
-
-    /// A simple helper method that sorts the given peers for a subscription
-    fn sort_subscription_peers(
-        subscription_manager: &mut SubscriptionManager,
-        unhealthy_subscription_peers: Vec<PeerNetworkId>,
-    ) -> Vec<PeerNetworkId> {
-        // Get the connected peers and metadata
-        let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
-
-        // Sort the peers for subscription requests
-        subscription_manager
-            .sort_peers_for_subscriptions(
-                connected_peers_and_metadata,
-                unhealthy_subscription_peers,
-            )
-            .unwrap()
-    }
-
     /// A simple helper method that terminates any unhealthy subscriptions
     fn terminate_any_unhealthy_subscriptions(
         subscription_manager: &mut SubscriptionManager,
diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs
new file mode 100644
index 0000000000000..7dd5ffa9b2ace
--- /dev/null
+++ b/consensus/src/consensus_observer/observer/subscription_utils.rs
@@ -0,0 +1,823 @@
+// Copyright © Aptos Foundation
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::consensus_observer::{
+    common::logging::{LogEntry, LogSchema},
+    network::{
+        observer_client::ConsensusObserverClient,
+        observer_message::{
+            ConsensusObserverMessage, ConsensusObserverRequest, ConsensusObserverResponse,
+        },
+    },
+    observer::subscription::ConsensusObserverSubscription,
+    publisher::consensus_publisher::ConsensusPublisher,
+};
+use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId};
+use aptos_logger::{error, info, warn};
+use aptos_network::{
+    application::{interface::NetworkClient, metadata::PeerMetadata},
+    ProtocolId,
+};
+use aptos_storage_interface::DbReader;
+use aptos_time_service::TimeService;
+use ordered_float::OrderedFloat;
+use std::{
+    collections::{BTreeMap, HashMap},
+    sync::Arc,
+};
+
+// A useful constant for representing the maximum ping latency
+const MAX_PING_LATENCY_SECS: f64 = 10_000.0;
+
+/// Attempts to create the given number of new subscriptions
+/// from the connected peers and metadata. Any active or unhealthy
+/// subscriptions are excluded from the selection process.
+pub async fn create_new_subscriptions(
+    consensus_observer_config: ConsensusObserverConfig,
+    consensus_observer_client: Arc<
+        ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>,
+    >,
+    consensus_publisher: Option<Arc<ConsensusPublisher>>,
+    db_reader: Arc<dyn DbReader>,
+    time_service: TimeService,
+    connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+    num_subscriptions_to_create: usize,
+    active_subscription_peers: Vec<PeerNetworkId>,
+    unhealthy_subscription_peers: Vec<PeerNetworkId>,
+) -> Vec<ConsensusObserverSubscription> {
+    // Sort the potential peers for subscription requests
+    let mut sorted_potential_peers = match sort_peers_for_subscriptions(
+        connected_peers_and_metadata,
+        unhealthy_subscription_peers,
+        active_subscription_peers,
+        consensus_publisher,
+    ) {
+        Some(sorted_peers) => sorted_peers,
+        None => {
+            error!(LogSchema::new(LogEntry::ConsensusObserver)
+                .message("Failed to sort peers for subscription requests!"));
+            return vec![];
+        },
+    };
+
+    // Verify that we have potential peers to subscribe to
+    if sorted_potential_peers.is_empty() {
+        warn!(LogSchema::new(LogEntry::ConsensusObserver)
+            .message("There are no potential peers to subscribe to!"));
+        return vec![];
+    }
+
+    // Go through the potential peers and attempt to create new subscriptions
+    let mut created_subscriptions = vec![];
+    for _ in 0..num_subscriptions_to_create {
+        // If there are no peers left to subscribe to, return early
+        if sorted_potential_peers.is_empty() {
+            info!(
+                LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                    "There are no more potential peers to subscribe to! \
+                    Num created subscriptions: {:?}",
+                    created_subscriptions.len()
+                ))
+            );
+            break;
+        }
+
+        // Attempt to create a new subscription
+        let (observer_subscription, failed_subscription_peers) = create_single_subscription(
+            consensus_observer_config,
+            consensus_observer_client.clone(),
+            db_reader.clone(),
+            sorted_potential_peers.clone(),
+            time_service.clone(),
+        )
+        .await;
+
+        // Remove the failed peers from the sorted list
+        sorted_potential_peers.retain(|peer| !failed_subscription_peers.contains(peer));
+
+        // Process a successful subscription creation
+        if let Some(observer_subscription) = observer_subscription {
+            // Remove the peer from the sorted list (for the next selection)
+            sorted_potential_peers
+                .retain(|peer| *peer != observer_subscription.get_peer_network_id());
+
+            // Add the newly created subscription to the subscription list
+            created_subscriptions.push(observer_subscription);
+        }
+    }
+
+    // Return the list of created subscriptions
+    created_subscriptions
+}
+
+/// Attempts to create a new subscription to a single peer from the
+/// sorted list of potential peers. If successful, the new subscription
+/// is returned, alongside any peers with failed attempts.
+async fn create_single_subscription(
+    consensus_observer_config: ConsensusObserverConfig,
+    consensus_observer_client: Arc<
+        ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>,
+    >,
+    db_reader: Arc<dyn DbReader>,
+    sorted_potential_peers: Vec<PeerNetworkId>,
+    time_service: TimeService,
+) -> (Option<ConsensusObserverSubscription>, Vec<PeerNetworkId>) {
+    let mut peers_with_failed_attempts = vec![];
+    for potential_peer in sorted_potential_peers {
+        // Log the subscription attempt
+        info!(
+            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                "Attempting to subscribe to potential peer: {}!",
+                potential_peer
+            ))
+        );
+
+        // Send a subscription request to the peer and wait for the response
+        let subscription_request = ConsensusObserverRequest::Subscribe;
+        let request_timeout_ms = consensus_observer_config.network_request_timeout_ms;
+        let response = consensus_observer_client
+            .send_rpc_request_to_peer(&potential_peer, subscription_request, request_timeout_ms)
+            .await;
+
+        // Process the response and update the active subscription
+        match response {
+            Ok(ConsensusObserverResponse::SubscribeAck) => {
+                // Log the successful subscription
+                info!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "Successfully subscribed to peer: {}!",
+                        potential_peer
+                    ))
+                );
+
+                // Create the new subscription
+                let subscription = ConsensusObserverSubscription::new(
+                    consensus_observer_config,
+                    db_reader.clone(),
+                    potential_peer,
+                    time_service.clone(),
+                );
+
+                // Return the successful subscription
+                return (Some(subscription), peers_with_failed_attempts);
+            },
+            Ok(response) => {
+                // We received an invalid response
+                warn!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "Got unexpected response type for subscription request: {:?}",
+                        response.get_label()
+                    ))
+                );
+
+                // Add the peer to the list of failed attempts
+                peers_with_failed_attempts.push(potential_peer);
+            },
+            Err(error) => {
+                // We encountered an error while sending the request
+                error!(
+                    LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                        "Failed to send subscription request to peer: {}! Error: {:?}",
+                        potential_peer, error
+                    ))
+                );
+
+                // Add the peer to the list of failed attempts
+                peers_with_failed_attempts.push(potential_peer);
+            },
+        }
+    }
+
+    // We failed to create a new subscription
+    (None, peers_with_failed_attempts)
+}
+
+/// Gets the distance from the validators for the specified peer from the peer metadata
+fn get_distance_for_peer(
+    peer_network_id: &PeerNetworkId,
+    peer_metadata: &PeerMetadata,
+) -> Option<u64> {
+    // Get the distance for the peer
+    let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata();
+    let distance = peer_monitoring_metadata
+        .latest_network_info_response
+        .as_ref()
+        .map(|response| response.distance_from_validators);
+
+    // If the distance is missing, log a warning
+    if distance.is_none() {
+        warn!(
+            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                "Unable to get distance for peer! Peer: {:?}",
+                peer_network_id
+            ))
+        );
+    }
+
+    distance
+}
+
+/// Gets the latency for the specified peer from the peer metadata
+fn get_latency_for_peer(
+    peer_network_id: &PeerNetworkId,
+    peer_metadata: &PeerMetadata,
+) -> Option<f64> {
+    // Get the latency for the peer
+    let peer_monitoring_metadata = peer_metadata.get_peer_monitoring_metadata();
+    let latency = peer_monitoring_metadata.average_ping_latency_secs;
+
+    // If the latency is missing, log a warning
+    if latency.is_none() {
+        warn!(
+            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                "Unable to get latency for peer! Peer: {:?}",
+                peer_network_id
+            ))
+        );
+    }
+
+    latency
+}
+
+/// Produces a list of sorted peers to service the subscription requests.
+/// Any active or unhealthy subscriptions are excluded from the selection process.
+/// Likewise, any peers currently subscribed to us are also excluded.
+fn sort_peers_for_subscriptions(
+    mut connected_peers_and_metadata: HashMap<PeerNetworkId, PeerMetadata>,
+    active_subscription_peers: Vec<PeerNetworkId>,
+    unhealthy_subscription_peers: Vec<PeerNetworkId>,
+    consensus_publisher: Option<Arc<ConsensusPublisher>>,
+) -> Option<Vec<PeerNetworkId>> {
+    // Remove any peers we're already subscribed to
+    for active_subscription_peer in active_subscription_peers {
+        let _ = connected_peers_and_metadata.remove(&active_subscription_peer);
+    }
+
+    // Remove any unhealthy subscription peers
+    for unhealthy_peer in unhealthy_subscription_peers {
+        let _ = connected_peers_and_metadata.remove(&unhealthy_peer);
+    }
+
+    // Remove any peers that are currently subscribed to us
+    if let Some(consensus_publisher) = consensus_publisher {
+        for peer_network_id in consensus_publisher.get_active_subscribers() {
+            let _ = connected_peers_and_metadata.remove(&peer_network_id);
+        }
+    }
+
+    // Sort the peers by subscription optimality
+    let sorted_peers = sort_peers_by_subscription_optimality(&connected_peers_and_metadata);
+
+    // Return the sorted peers
+    Some(sorted_peers)
+}
+
+/// Sorts the peers by subscription optimality (in descending order of
+/// optimality). This requires: (i) sorting the peers by distance from the
+/// validator set and ping latency (lower values are more optimal); and (ii)
+/// filtering out peers that don't support consensus observer.
+///
+/// Note: we prioritize distance over latency as we want to avoid close
+/// but not up-to-date peers. If peers don't have sufficient metadata
+/// for sorting, they are given a lower priority.
+pub fn sort_peers_by_subscription_optimality(
+    peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+) -> Vec<PeerNetworkId> {
+    // Group peers and latencies by validator distance, i.e., distance -> [(peer, latency)]
+    let mut unsupported_peers = Vec::new();
+    let mut peers_and_latencies_by_distance = BTreeMap::new();
+    for (peer_network_id, peer_metadata) in peers_and_metadata {
+        // Verify that the peer supports consensus observer
+        if !supports_consensus_observer(peer_metadata) {
+            unsupported_peers.push(*peer_network_id);
+            continue; // Skip the peer
+        }
+
+        // Get the distance and latency for the peer
+        let distance = get_distance_for_peer(peer_network_id, peer_metadata);
+        let latency = get_latency_for_peer(peer_network_id, peer_metadata);
+
+        // If the distance is not found, use the maximum distance
+        let distance =
+            distance.unwrap_or(aptos_peer_monitoring_service_types::MAX_DISTANCE_FROM_VALIDATORS);
+
+        // If the latency is not found, use a large latency
+        let latency = latency.unwrap_or(MAX_PING_LATENCY_SECS);
+
+        // Add the peer and latency to the distance group
+        peers_and_latencies_by_distance
+            .entry(distance)
+            .or_insert_with(Vec::new)
+            .push((*peer_network_id, OrderedFloat(latency)));
+    }
+
+    // If there are peers that don't support consensus observer, log them
+    if !unsupported_peers.is_empty() {
+        info!(
+            LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+                "Found {} peers that don't support consensus observer! Peers: {:?}",
+                unsupported_peers.len(),
+                unsupported_peers
+            ))
+        );
+    }
+
+    // Sort the peers by distance and latency. Note: BTreeMaps are
+    // sorted by key, so the entries will be sorted by distance in ascending order.
+    let mut sorted_peers = Vec::new();
+    for (_, mut peers_and_latencies) in peers_and_latencies_by_distance {
+        // Sort the peers by latency
+        peers_and_latencies.sort_by_key(|(_, latency)| *latency);
+
+        // Add the peers to the sorted list (in sorted order)
+        sorted_peers.extend(
+            peers_and_latencies
+                .into_iter()
+                .map(|(peer_network_id, _)| peer_network_id),
+        );
+    }
+
+    // Log the sorted peers
+    info!(
+        LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
+            "Sorted {} peers by subscription optimality! Peers: {:?}",
+            sorted_peers.len(),
+            sorted_peers
+        ))
+    );
+
+    sorted_peers
+}
+
+/// Returns true iff the peer metadata indicates support for consensus observer
+fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool {
+    peer_metadata.supports_protocol(ProtocolId::ConsensusObserver)
+        && peer_metadata.supports_protocol(ProtocolId::ConsensusObserverRpc)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use aptos_config::{config::PeerRole, network_id::NetworkId};
+    use aptos_netcore::transport::ConnectionOrigin;
+    use aptos_network::{
+        application::storage::PeersAndMetadata,
+        protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet},
+        transport::{ConnectionId, ConnectionMetadata},
+    };
+    use aptos_peer_monitoring_service_types::{
+        response::NetworkInformationResponse, PeerMonitoringMetadata,
+    };
+    use aptos_types::{network_address::NetworkAddress, PeerId};
+    use maplit::hashmap;
+    use std::collections::HashSet;
+
+    #[test]
+    fn test_sort_peers_by_distance_and_latency() {
+        // Sort an empty list of peers
+        let peers_and_metadata = HashMap::new();
+        assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty());
+
+        // Create a list of peers with empty metadata
+        let peers_and_metadata = create_peers_and_metadata(true, true, true, 10);
+
+        // Sort the peers and verify the results
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers.len(), 10);
+
+        // Create a list of peers with valid metadata
+        let peers_and_metadata = create_peers_and_metadata(false, false, true, 10);
+
+        // Sort the peers
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+
+        // Verify the order of the peers
+        verify_increasing_distance_latencies(&peers_and_metadata, &sorted_peers);
+        assert_eq!(sorted_peers.len(), 10);
+
+        // Create a list of peers with and without metadata
+        let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 10);
+        peers_and_metadata.extend(create_peers_and_metadata(true, false, true, 10));
+        peers_and_metadata.extend(create_peers_and_metadata(false, true, true, 10));
+        peers_and_metadata.extend(create_peers_and_metadata(true, true, true, 10));
+
+        // Sort the peers
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers.len(), 40);
+
+        // Verify the order of the first 20 peers
+        let (first_20_peers, sorted_peers) = sorted_peers.split_at(20);
+        verify_increasing_distance_latencies(&peers_and_metadata, first_20_peers);
+
+        // Verify that the next 10 peers only have latency metadata
+        let (next_10_peers, sorted_peers) = sorted_peers.split_at(10);
+        for sorted_peer in next_10_peers {
+            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
+            assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none());
+            assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_some());
+        }
+
+        // Verify that the last 10 peers have no metadata
+        let (last_10_peers, remaining_peers) = sorted_peers.split_at(10);
+        for sorted_peer in last_10_peers {
+            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
+            assert!(get_distance_for_peer(sorted_peer, peer_metadata).is_none());
+            assert!(get_latency_for_peer(sorted_peer, peer_metadata).is_none());
+        }
+        assert!(remaining_peers.is_empty());
+    }
+
+    #[test]
+    fn test_sort_peers_by_distance_and_latency_filter() {
+        // Sort an empty list of peers
+        let peers_and_metadata = HashMap::new();
+        assert!(sort_peers_by_subscription_optimality(&peers_and_metadata).is_empty());
+
+        // Create a list of peers with empty metadata (with consensus observer support)
+        let peers_and_metadata = create_peers_and_metadata(true, true, true, 10);
+
+        // Sort the peers and verify the results
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers.len(), 10);
+
+        // Create a list of peers with empty metadata (without consensus observer support)
+        let peers_and_metadata = create_peers_and_metadata(true, true, false, 10);
+
+        // Sort the peers and verify the results
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert!(sorted_peers.is_empty());
+
+        // Create a list of peers with valid metadata (without consensus observer support)
+        let peers_and_metadata = create_peers_and_metadata(false, false, false, 10);
+
+        // Sort the peers and verify the results
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert!(sorted_peers.is_empty());
+
+        // Create a list of peers with empty metadata (with and without consensus observer support)
+        let mut peers_and_metadata = create_peers_and_metadata(true, true, true, 5);
+        peers_and_metadata.extend(create_peers_and_metadata(true, true, false, 50));
+
+        // Sort the peers and verify the results (only the supported peers are sorted)
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers.len(), 5);
+
+        // Create a list of peers with valid metadata (with and without consensus observer support)
+        let mut peers_and_metadata = create_peers_and_metadata(false, false, true, 50);
+        peers_and_metadata.extend(create_peers_and_metadata(false, false, false, 10));
+
+        // Sort the peers and verify the results (only the supported peers are sorted)
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers.len(), 50);
+
+        // Create a list of peers with valid metadata (with and without consensus observer support)
+        let supported_peer_and_metadata = create_peers_and_metadata(false, false, true, 1);
+        let unsupported_peer_and_metadata = create_peers_and_metadata(false, false, false, 1);
+        let mut peers_and_metadata = HashMap::new();
+        peers_and_metadata.extend(supported_peer_and_metadata.clone());
+        peers_and_metadata.extend(unsupported_peer_and_metadata);
+
+        // Sort the peers and verify the results (only the supported peer is sorted)
+        let supported_peer = supported_peer_and_metadata.keys().next().unwrap();
+        let sorted_peers = sort_peers_by_subscription_optimality(&peers_and_metadata);
+        assert_eq!(sorted_peers, vec![*supported_peer]);
+    }
+
+    #[tokio::test]
+    async fn test_sort_peers_for_subscriptions() {
+        // Create a consensus observer client
+        let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
+        let (peers_and_metadata, consensus_observer_client) =
+            create_consensus_observer_client(network_ids);
+
+        // Create a consensus publisher
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let (consensus_publisher, _) =
+            ConsensusPublisher::new(consensus_observer_config, consensus_observer_client.clone());
+        let consensus_publisher = Arc::new(consensus_publisher);
+
+        // Sort the peers and verify that no peers are returned
+        let sorted_peers = sort_subscription_peers(
+            consensus_publisher.clone(),
+            peers_and_metadata.clone(),
+            vec![],
+            vec![],
+        );
+        assert!(sorted_peers.is_empty());
+
+        // Add a connected validator peer, VFN peer and public peer
+        for network_id in network_ids {
+            let distance_from_validators = match network_id {
+                NetworkId::Validator => 0,
+                NetworkId::Vfn => 1,
+                NetworkId::Public => 2,
+            };
+            create_peer_and_connection(
+                *network_id,
+                peers_and_metadata.clone(),
+                distance_from_validators,
+                None,
+                true,
+            );
+        }
+
+        // Sort the peers and verify the ordering (according to distance)
+        let sorted_peers = sort_subscription_peers(
+            consensus_publisher.clone(),
+            peers_and_metadata.clone(),
+            vec![],
+            vec![],
+        );
+        assert_eq!(sorted_peers[0].network_id(), NetworkId::Validator);
+        assert_eq!(sorted_peers[1].network_id(), NetworkId::Vfn);
+        assert_eq!(sorted_peers[2].network_id(), NetworkId::Public);
+        assert_eq!(sorted_peers.len(), 3);
+
+        // Sort the peers, but mark the validator as unhealthy (so it's ignored)
+        let sorted_peer_subset = sort_subscription_peers(
+            consensus_publisher.clone(),
+            peers_and_metadata.clone(),
+            vec![],
+            vec![sorted_peers[0]],
+        );
+        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Vfn);
+        assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Public);
+        assert_eq!(sorted_peer_subset.len(), 2);
+
+        // Sort the peers, but mark the VFN and validator as active subscriptions (so they're ignored)
+        let sorted_peer_subset = sort_subscription_peers(
+            consensus_publisher.clone(),
+            peers_and_metadata.clone(),
+            vec![sorted_peers[0], sorted_peers[1]],
+            vec![],
+        );
+        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Public);
+        assert_eq!(sorted_peer_subset.len(), 1);
+
+        // Create a consensus publisher with the PFN as an active subscriber
+        let consensus_publisher_with_subscribers =
+            Arc::new(ConsensusPublisher::new_with_active_subscribers(
+                consensus_observer_config,
+                consensus_observer_client.clone(),
+                HashSet::from_iter(vec![sorted_peers[2]]),
+            ));
+
+        // Sort the peers, and verify the PFN is ignored (since it's an active subscriber)
+        let sorted_peer_subset = sort_subscription_peers(
+            consensus_publisher_with_subscribers,
+            peers_and_metadata.clone(),
+            vec![],
+            vec![],
+        );
+        assert_eq!(sorted_peer_subset[0].network_id(), NetworkId::Validator);
+        assert_eq!(sorted_peer_subset[1].network_id(), NetworkId::Vfn);
+        assert_eq!(sorted_peer_subset.len(), 2);
+
+        // Remove all the peers and verify that no peers are returned upon sorting
+        for peer_network_id in sorted_peers {
+            remove_peer_and_connection(peers_and_metadata.clone(), peer_network_id);
+        }
+        let sorted_peers = sort_subscription_peers(
+            consensus_publisher.clone(),
+            peers_and_metadata.clone(),
+            vec![],
+            vec![],
+        );
+        assert!(sorted_peers.is_empty());
+
+        // Add multiple validator peers, with different latencies
+        let mut validator_peers = vec![];
+        for ping_latency_secs in [0.9, 0.8, 0.5, 0.1, 0.05] {
+            let validator_peer = create_peer_and_connection(
+                NetworkId::Validator,
+                peers_and_metadata.clone(),
+                0,
+                Some(ping_latency_secs),
+                true,
+            );
+            validator_peers.push(validator_peer);
+        }
+
+        // Sort the peers and verify the ordering (according to latency)
+        let sorted_peers = sort_subscription_peers(
+            consensus_publisher,
+            peers_and_metadata.clone(),
+            vec![],
+            vec![],
+        );
+        let expected_peers = validator_peers.into_iter().rev().collect::<Vec<_>>();
+        assert_eq!(sorted_peers, expected_peers);
+    }
+
+    /// Creates a new connection metadata for testing
+    fn create_connection_metadata(
+        peer_network_id: PeerNetworkId,
+        support_consensus_observer: bool,
+    ) -> ConnectionMetadata {
+        if support_consensus_observer {
+            // Create a protocol set that supports consensus observer
+            let protocol_set = ProtocolIdSet::from_iter(vec![
+                ProtocolId::ConsensusObserver,
+                ProtocolId::ConsensusObserverRpc,
+            ]);
+
+            // Create the connection metadata with the protocol set
+            ConnectionMetadata::new(
+                peer_network_id.peer_id(),
+                ConnectionId::default(),
+                NetworkAddress::mock(),
+                ConnectionOrigin::Inbound,
+                MessagingProtocolVersion::V1,
+                protocol_set,
+                PeerRole::PreferredUpstream,
+            )
+        } else {
+            ConnectionMetadata::mock(peer_network_id.peer_id())
+        }
+    }
+
+    /// Creates a new consensus observer client and a peers and metadata container
+    fn create_consensus_observer_client(
+        network_ids: &[NetworkId],
+    ) -> (
+        Arc<PeersAndMetadata>,
+        Arc<ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>>,
+    ) {
+        let peers_and_metadata = PeersAndMetadata::new(network_ids);
+        let network_client =
+            NetworkClient::new(vec![], vec![], hashmap![], peers_and_metadata.clone());
+        let consensus_observer_client = Arc::new(ConsensusObserverClient::new(network_client));
+
+        (peers_and_metadata, consensus_observer_client)
+    }
+
+    /// Creates a new peer with the specified connection metadata
+    fn create_peer_and_connection(
+        network_id: NetworkId,
+        peers_and_metadata: Arc<PeersAndMetadata>,
+        distance_from_validators: u64,
+        ping_latency_secs: Option<f64>,
+        support_consensus_observer: bool,
+    ) -> PeerNetworkId {
+        // Create the connection metadata
+        let peer_network_id = PeerNetworkId::new(network_id, PeerId::random());
+        let connection_metadata = if support_consensus_observer {
+            // Create a protocol set that supports consensus observer
+            let protocol_set = ProtocolIdSet::from_iter(vec![
+                ProtocolId::ConsensusObserver,
+                ProtocolId::ConsensusObserverRpc,
+            ]);
+
+            // Create the connection metadata with the protocol set
+            ConnectionMetadata::new(
+                peer_network_id.peer_id(),
+                ConnectionId::default(),
+                NetworkAddress::mock(),
+                ConnectionOrigin::Inbound,
+                MessagingProtocolVersion::V1,
+                protocol_set,
+                PeerRole::PreferredUpstream,
+            )
+        } else {
+            ConnectionMetadata::mock(peer_network_id.peer_id())
+        };
+
+        // Insert the connection into peers and metadata
+        peers_and_metadata
+            .insert_connection_metadata(peer_network_id, connection_metadata.clone())
+            .unwrap();
+
+        // Update the peer monitoring metadata
+        let latest_network_info_response = NetworkInformationResponse {
+            connected_peers: BTreeMap::new(),
+            distance_from_validators,
+        };
+        let monitoring_metdata = PeerMonitoringMetadata::new(
+            ping_latency_secs,
+            ping_latency_secs,
+            Some(latest_network_info_response),
+            None,
+            None,
+        );
+        peers_and_metadata
+            .update_peer_monitoring_metadata(peer_network_id, monitoring_metdata.clone())
+            .unwrap();
+
+        peer_network_id
+    }
+
+    /// Creates a new peer and metadata for testing
+    fn create_peer_and_metadata(
+        latency: Option<f64>,
+        distance_from_validators: Option<u64>,
+        support_consensus_observer: bool,
+    ) -> (PeerNetworkId, PeerMetadata) {
+        // Create a random peer
+        let peer_network_id = PeerNetworkId::random();
+
+        // Create a new peer metadata with the given latency and distance
+        let connection_metadata =
+            create_connection_metadata(peer_network_id, support_consensus_observer);
+        let network_information_response =
+            distance_from_validators.map(|distance| NetworkInformationResponse {
+                connected_peers: BTreeMap::new(),
+                distance_from_validators: distance,
+            });
+        let peer_monitoring_metadata =
+            PeerMonitoringMetadata::new(latency, None, network_information_response, None, None);
+        let peer_metadata =
+            PeerMetadata::new_for_test(connection_metadata, peer_monitoring_metadata);
+
+        (peer_network_id, peer_metadata)
+    }
+
+    /// Creates a list of peers and metadata for testing
+    fn create_peers_and_metadata(
+        empty_latency: bool,
+        empty_distance: bool,
+        support_consensus_observer: bool,
+        num_peers: u64,
+    ) -> HashMap<PeerNetworkId, PeerMetadata> {
+        let mut peers_and_metadata = HashMap::new();
+        for i in 1..num_peers + 1 {
+            // Determine the distance for the peer
+            let distance = if empty_distance { None } else { Some(i) };
+
+            // Determine the latency for the peer
+            let latency = if empty_latency { None } else { Some(i as f64) };
+
+            // Create a new peer and metadata
+            let (peer_network_id, peer_metadata) =
+                create_peer_and_metadata(latency, distance, support_consensus_observer);
+            peers_and_metadata.insert(peer_network_id, peer_metadata);
+        }
+        peers_and_metadata
+    }
+
+    /// Removes the peer and connection metadata for the given peer
+    fn remove_peer_and_connection(
+        peers_and_metadata: Arc<PeersAndMetadata>,
+        peer_network_id: PeerNetworkId,
+    ) {
+        let peer_metadata = peers_and_metadata
+            .get_metadata_for_peer(peer_network_id)
+            .unwrap();
+        let connection_id = peer_metadata.get_connection_metadata().connection_id;
+        peers_and_metadata
+            .remove_peer_metadata(peer_network_id, connection_id)
+            .unwrap();
+    }
+
+    /// A simple helper method that sorts the given peers for a subscription
+    fn sort_subscription_peers(
+        consensus_publisher: Arc<ConsensusPublisher>,
+        peers_and_metadata: Arc<PeersAndMetadata>,
+        active_subscription_peers: Vec<PeerNetworkId>,
+        unhealthy_subscription_peers: Vec<PeerNetworkId>,
+    ) -> Vec<PeerNetworkId> {
+        // Get the connected peers and metadata
+        let connected_peers_and_metadata = peers_and_metadata
+            .get_connected_peers_and_metadata()
+            .unwrap();
+
+        // Sort the peers for subscription requests
+        sort_peers_for_subscriptions(
+            connected_peers_and_metadata,
+            unhealthy_subscription_peers,
+            active_subscription_peers,
+            Some(consensus_publisher),
+        )
+        .unwrap()
+    }
+
+    /// Verifies that the distance and latencies for the peers are in
+    /// increasing order (with the distance taking precedence over the latency).
+    fn verify_increasing_distance_latencies(
+        peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+        sorted_peers: &[PeerNetworkId],
+    ) {
+        let mut previous_latency = None;
+        let mut previous_distance = 0;
+        for sorted_peer in sorted_peers {
+            // Get the distance and latency for the peer
+            let peer_metadata = peers_and_metadata.get(sorted_peer).unwrap();
+            let distance = get_distance_for_peer(sorted_peer, peer_metadata).unwrap();
+            let latency = get_latency_for_peer(sorted_peer, peer_metadata);
+
+            // Verify the order of the peers
+            if distance == previous_distance {
+                if let Some(latency) = latency {
+                    if let Some(previous_latency) = previous_latency {
+                        assert!(latency >= previous_latency);
+                    }
+                }
+            } else {
+                assert!(distance > previous_distance);
+            }
+
+            // Update the previous latency and distance
+            previous_latency = latency;
+            previous_distance = distance;
+        }
+    }
+}
diff --git a/consensus/src/consensus_observer/publisher/consensus_publisher.rs b/consensus/src/consensus_observer/publisher/consensus_publisher.rs
index 1379c87131cc5..899901593f7ed 100644
--- a/consensus/src/consensus_observer/publisher/consensus_publisher.rs
+++ b/consensus/src/consensus_observer/publisher/consensus_publisher.rs
@@ -70,6 +70,26 @@ impl ConsensusPublisher {
         (consensus_publisher, outbound_message_receiver)
     }
 
+    #[cfg(test)]
+    /// Creates a new consensus publisher with the given active subscribers
+    pub fn new_with_active_subscribers(
+        consensus_observer_config: ConsensusObserverConfig,
+        consensus_observer_client: Arc<
+            ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>,
+        >,
+        active_subscribers: HashSet<PeerNetworkId>,
+    ) -> Self {
+        // Create the consensus publisher
+        let (consensus_publisher, _) =
+            ConsensusPublisher::new(consensus_observer_config, consensus_observer_client);
+
+        // Update the active subscribers
+        *consensus_publisher.active_subscribers.write() = active_subscribers;
+
+        // Return the publisher
+        consensus_publisher
+    }
+
     /// Adds the given subscriber to the set of active subscribers
     fn add_active_subscriber(&self, peer_network_id: PeerNetworkId) {
         self.active_subscribers.write().insert(peer_network_id);

From d779590fb37a710232760c5bca5264ca4fed8181 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Thu, 12 Sep 2024 09:05:10 -0400
Subject: [PATCH 15/36] [Consensus Observer] Move subscription health check to
 subscription.rs

---
 .../observer/subscription.rs                  | 208 +++++++++++++++++-
 .../observer/subscription_manager.rs          |  23 +-
 2 files changed, 206 insertions(+), 25 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/subscription.rs b/consensus/src/consensus_observer/observer/subscription.rs
index 7b368fe3417c6..5d9ae4d43def1 100644
--- a/consensus/src/consensus_observer/observer/subscription.rs
+++ b/consensus/src/consensus_observer/observer/subscription.rs
@@ -58,10 +58,38 @@ impl ConsensusObserverSubscription {
         }
     }
 
+    /// Checks if the subscription is still healthy. If not, an error
+    /// is returned indicating the reason for the subscription failure.
+    pub fn check_subscription_health(
+        &mut self,
+        connected_peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
+    ) -> Result<(), Error> {
+        // Verify the subscription peer is still connected
+        let peer_network_id = self.get_peer_network_id();
+        if !connected_peers_and_metadata.contains_key(&peer_network_id) {
+            return Err(Error::SubscriptionDisconnected(format!(
+                "The peer: {:?} is no longer connected!",
+                peer_network_id
+            )));
+        }
+
+        // Verify the subscription has not timed out
+        self.check_subscription_timeout()?;
+
+        // Verify that the DB is continuing to sync and commit new data
+        self.check_syncing_progress()?;
+
+        // Verify that the subscription peer is still optimal
+        self.check_subscription_peer_optimality(connected_peers_and_metadata)?;
+
+        // The subscription seems healthy
+        Ok(())
+    }
+
     /// Verifies that the peer currently selected for the subscription is
     /// optimal. This is only done if: (i) the peers have changed since the
     /// last check; or (ii) enough time has elapsed to force a refresh.
-    pub fn check_subscription_peer_optimality(
+    fn check_subscription_peer_optimality(
         &mut self,
         peers_and_metadata: &HashMap<PeerNetworkId, PeerMetadata>,
     ) -> Result<(), Error> {
@@ -120,7 +148,7 @@ impl ConsensusObserverSubscription {
 
     /// Verifies that the subscription has not timed out based
     /// on the last received message time.
-    pub fn check_subscription_timeout(&self) -> Result<(), Error> {
+    fn check_subscription_timeout(&self) -> Result<(), Error> {
         // Calculate the duration since the last message
         let time_now = self.time_service.now();
         let duration_since_last_message = time_now.duration_since(self.last_message_receive_time);
@@ -139,7 +167,7 @@ impl ConsensusObserverSubscription {
     }
 
     /// Verifies that the DB is continuing to sync and commit new data
-    pub fn check_syncing_progress(&mut self) -> Result<(), Error> {
+    fn check_syncing_progress(&mut self) -> Result<(), Error> {
         // Get the current synced version from storage
         let current_synced_version =
             self.db_reader
@@ -212,6 +240,161 @@ mod test {
         }
     }
 
+    #[test]
+    fn test_check_subscription_health_connected_and_timeout() {
+        // Create a consensus observer config
+        let consensus_observer_config = ConsensusObserverConfig {
+            max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors
+            ..ConsensusObserverConfig::default()
+        };
+
+        // Create a new observer subscription
+        let time_service = TimeService::mock();
+        let peer_network_id = PeerNetworkId::random();
+        let mut subscription = ConsensusObserverSubscription::new(
+            consensus_observer_config,
+            Arc::new(MockDatabaseReader::new()),
+            peer_network_id,
+            time_service.clone(),
+        );
+
+        // Verify that the subscription is unhealthy (the peer is not connected)
+        assert_matches!(
+            subscription.check_subscription_health(&HashMap::new()),
+            Err(Error::SubscriptionDisconnected(_))
+        );
+
+        // Create a peers and metadata map for the subscription
+        let mut peers_and_metadata = HashMap::new();
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
+
+        // Elapse enough time to timeout the subscription
+        let mock_time_service = time_service.into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.max_subscription_timeout_ms + 1,
+        ));
+
+        // Verify that the subscription has timed out
+        assert_matches!(
+            subscription.check_subscription_health(&peers_and_metadata),
+            Err(Error::SubscriptionTimeout(_))
+        );
+    }
+
+    #[test]
+    fn test_check_subscription_health_progress() {
+        // Create a consensus observer config with a large timeout
+        let consensus_observer_config = ConsensusObserverConfig {
+            max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out
+            ..ConsensusObserverConfig::default()
+        };
+
+        // Create a mock DB reader with expectations
+        let first_synced_version = 1;
+        let second_synced_version = 2;
+        let mut mock_db_reader = MockDatabaseReader::new();
+        mock_db_reader
+            .expect_get_latest_ledger_info_version()
+            .returning(move || Ok(first_synced_version))
+            .times(1); // Only allow one call for the first version
+        mock_db_reader
+            .expect_get_latest_ledger_info_version()
+            .returning(move || Ok(second_synced_version)); // Allow multiple calls for the second version
+
+        // Create a new observer subscription
+        let peer_network_id = PeerNetworkId::random();
+        let time_service = TimeService::mock();
+        let mut subscription = ConsensusObserverSubscription::new(
+            consensus_observer_config,
+            Arc::new(mock_db_reader),
+            peer_network_id,
+            time_service.clone(),
+        );
+
+        // Verify that the DB is making sync progress and that the highest synced version is updated
+        let mock_time_service = time_service.into_mock();
+        verify_subscription_syncing_progress(
+            &mut subscription,
+            first_synced_version,
+            mock_time_service.now(),
+        );
+
+        // Elapse enough time to timeout the subscription
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.max_synced_version_timeout_ms + 1,
+        ));
+
+        // Verify that the DB is still making sync progress (the next version is higher)
+        verify_subscription_syncing_progress(
+            &mut subscription,
+            second_synced_version,
+            mock_time_service.now(),
+        );
+
+        // Elapse enough time to timeout the subscription
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.max_synced_version_timeout_ms + 1,
+        ));
+
+        // Verify that the DB is not making sync progress and that the subscription has timed out
+        assert_matches!(
+            subscription.check_syncing_progress(),
+            Err(Error::SubscriptionProgressStopped(_))
+        );
+    }
+
+    #[test]
+    fn test_check_subscription_health_optimality() {
+        // Create a consensus observer config with a single subscription and large timeouts
+        let consensus_observer_config = ConsensusObserverConfig {
+            max_concurrent_subscriptions: 1,
+            max_subscription_timeout_ms: 100_000_000, // Use a large value so that we don't time out
+            max_synced_version_timeout_ms: 100_000_000, // Use a large value so that we don't get DB progress errors
+            ..ConsensusObserverConfig::default()
+        };
+
+        // Create a mock DB reader with expectations
+        let mut mock_db_reader = MockDatabaseReader::new();
+        mock_db_reader
+            .expect_get_latest_ledger_info_version()
+            .returning(move || Ok(1));
+
+        // Create a new observer subscription
+        let time_service = TimeService::mock();
+        let peer_network_id = PeerNetworkId::random();
+        let mut subscription = ConsensusObserverSubscription::new(
+            consensus_observer_config,
+            Arc::new(mock_db_reader),
+            peer_network_id,
+            time_service.clone(),
+        );
+
+        // Create a peers and metadata map for the subscription
+        let mut peers_and_metadata = HashMap::new();
+        add_metadata_for_peer(&mut peers_and_metadata, peer_network_id, true, false);
+
+        // Verify that the subscription is healthy
+        assert!(subscription
+            .check_subscription_health(&peers_and_metadata)
+            .is_ok());
+
+        // Add a more optimal peer to the set of peers
+        let new_optimal_peer = PeerNetworkId::random();
+        add_metadata_for_peer(&mut peers_and_metadata, new_optimal_peer, true, true);
+
+        // Elapse enough time for a peer optimality check
+        let mock_time_service = time_service.into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.subscription_peer_change_interval_ms + 1,
+        ));
+
+        // Verify that the subscription is no longer optimal
+        assert_matches!(
+            subscription.check_subscription_health(&peers_and_metadata),
+            Err(Error::SubscriptionSuboptimal(_))
+        );
+    }
+
     #[test]
     fn test_check_subscription_peer_optimality_single() {
         // Create a consensus observer config with a maximum of 1 subscription
@@ -344,7 +527,7 @@ mod test {
     }
 
     #[test]
-    fn test_check_subscription_peer_refresh() {
+    fn test_check_subscription_peer_optimality_refresh() {
         // Create a consensus observer config with a maximum of 1 subscription
         let consensus_observer_config = create_observer_config(1);
 
@@ -574,6 +757,23 @@ mod test {
         );
     }
 
+    #[test]
+    fn test_get_peer_network_id() {
+        // Create a new observer subscription
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let peer_network_id = PeerNetworkId::random();
+        let time_service = TimeService::mock();
+        let subscription = ConsensusObserverSubscription::new(
+            consensus_observer_config,
+            Arc::new(MockDatabaseReader::new()),
+            peer_network_id,
+            time_service.clone(),
+        );
+
+        // Verify that the peer network id matches the expected value
+        assert_eq!(subscription.get_peer_network_id(), peer_network_id);
+    }
+
     #[test]
     fn test_update_last_message_receive_time() {
         // Create a new observer subscription
diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index 16cd756c176dd..deba8d52a98e6 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -87,26 +87,7 @@ impl SubscriptionManager {
         // Check the health of the subscription
         match active_subscription {
             Some(active_subscription) => {
-                // Verify the peer is still connected
-                if !connected_peers_and_metadata.contains_key(&peer_network_id) {
-                    return Err(Error::SubscriptionDisconnected(format!(
-                        "The peer: {:?} is no longer connected!",
-                        peer_network_id
-                    )));
-                }
-
-                // Verify the subscription has not timed out
-                active_subscription.check_subscription_timeout()?;
-
-                // Verify that the DB is continuing to sync and commit new data
-                active_subscription.check_syncing_progress()?;
-
-                // Verify that the subscription peer is still optimal
-                active_subscription
-                    .check_subscription_peer_optimality(connected_peers_and_metadata)?;
-
-                // The subscription seems healthy
-                Ok(())
+                active_subscription.check_subscription_health(connected_peers_and_metadata)
             },
             None => Err(Error::UnexpectedError(format!(
                 "The subscription to peer: {:?} is not active!",
@@ -217,7 +198,7 @@ impl SubscriptionManager {
         let db_reader = self.db_reader.clone();
         let time_service = self.time_service.clone();
 
-        // Otherwise, we should spawn a new subscription creation task
+        // Spawn a new subscription creation task
         let subscription_creation_task = tokio::spawn(async move {
             // Identify the terminated subscription peers
             let terminated_subscription_peers = terminated_subscriptions

From 0e6ca9887234f67731b99f1f909885d33ffbce8a Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Thu, 12 Sep 2024 11:44:48 -0400
Subject: [PATCH 16/36] [Consensus Observer] Improve subscription manager unit
 tests.

---
 .../observer/subscription_manager.rs          | 337 ++++++++++++++----
 1 file changed, 260 insertions(+), 77 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index deba8d52a98e6..2d89163e1ae86 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -430,6 +430,95 @@ mod test {
         }
     }
 
+    #[tokio::test]
+    async fn test_check_and_manage_subscriptions() {
+        // Create a consensus observer client
+        let network_id = NetworkId::Public;
+        let (peers_and_metadata, consensus_observer_client) =
+            create_consensus_observer_client(&[network_id]);
+
+        // Create a new subscription manager
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let db_reader = create_mock_db_reader();
+        let time_service = TimeService::mock();
+        let mut subscription_manager = SubscriptionManager::new(
+            consensus_observer_client,
+            consensus_observer_config,
+            None,
+            db_reader.clone(),
+            time_service.clone(),
+        );
+
+        // Verify that no subscriptions are active
+        verify_active_subscription_peers(&subscription_manager, vec![]);
+
+        // Check and manage the subscriptions
+        let result = subscription_manager.check_and_manage_subscriptions().await;
+
+        // Verify that no subscriptions were terminated
+        assert!(result.is_ok());
+        verify_active_subscription_peers(&subscription_manager, vec![]);
+
+        // Add a new connected peer and subscription
+        let connected_peer_1 =
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 1, None, true);
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            connected_peer_1,
+            time_service.clone(),
+        );
+
+        // Add another connected peer and subscription
+        let connected_peer_2 =
+            create_peer_and_connection(network_id, peers_and_metadata.clone(), 2, None, true);
+        create_observer_subscription(
+            &mut subscription_manager,
+            consensus_observer_config,
+            db_reader.clone(),
+            connected_peer_2,
+            TimeService::mock(), // Use a different time service (to avoid timeouts!)
+        );
+
+        // Check and manage the subscriptions
+        subscription_manager
+            .check_and_manage_subscriptions()
+            .await
+            .unwrap();
+
+        // Verify that the subscriptions are still active
+        verify_active_subscription_peers(&subscription_manager, vec![
+            connected_peer_1,
+            connected_peer_2,
+        ]);
+
+        // Elapse time to simulate a timeout for peer 1
+        let mock_time_service = time_service.into_mock();
+        mock_time_service.advance(Duration::from_millis(
+            consensus_observer_config.max_subscription_timeout_ms + 1,
+        ));
+
+        // Check and manage the subscriptions
+        subscription_manager
+            .check_and_manage_subscriptions()
+            .await
+            .unwrap();
+
+        // Verify that the first subscription was terminated
+        verify_active_subscription_peers(&subscription_manager, vec![connected_peer_2]);
+
+        // Disconnect the second peer
+        remove_peer_and_connection(peers_and_metadata.clone(), connected_peer_2);
+
+        // Check and manage the subscriptions
+        let result = subscription_manager.check_and_manage_subscriptions().await;
+
+        // Verify that the second subscription was terminated and an error was returned
+        verify_active_subscription_peers(&subscription_manager, vec![]);
+        assert_matches!(result, Err(Error::SubscriptionsReset(_)));
+    }
+
     #[tokio::test]
     async fn test_check_subscription_health_connected() {
         // Create a consensus observer client
@@ -461,11 +550,8 @@ mod test {
         // Check the active subscription and verify that it unhealthy (the peer is not connected)
         check_subscription_connection(&mut subscription_manager, peer_network_id, false);
 
-        // Terminate the subscription
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert_eq!(terminated_subscriptions.len(), 1);
-        assert_eq!(terminated_subscriptions.first().unwrap().0, peer_network_id);
+        // Terminate unhealthy subscriptions and verify the subscription was removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![peer_network_id]);
 
         // Add a new connected peer
         let connected_peer =
@@ -480,13 +566,14 @@ mod test {
             TimeService::mock(),
         );
 
-        // Check the active subscriptions is still healthy
+        // Check the active subscription is still healthy
         check_subscription_connection(&mut subscription_manager, connected_peer, true);
 
+        // Terminate unhealthy subscriptions and verify none are removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]);
+
         // Verify that the active subscription is still present
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .contains(&connected_peer));
+        verify_active_subscription_peers(&subscription_manager, vec![connected_peer]);
     }
 
     #[tokio::test]
@@ -529,6 +616,9 @@ mod test {
         // Check the active subscription and verify that it is healthy
         check_subscription_progress(&mut subscription_manager, connected_peer, true);
 
+        // Terminate unhealthy subscriptions and verify none are removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]);
+
         // Elapse time to simulate a DB progress error
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
@@ -538,16 +628,11 @@ mod test {
         // Check the active subscription and verify that it is unhealthy (the DB is not syncing)
         check_subscription_progress(&mut subscription_manager, connected_peer, false);
 
-        // Terminate the subscription
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert_eq!(terminated_subscriptions.len(), 1);
-        assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer);
+        // Terminate unhealthy subscriptions and verify the subscription was removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]);
 
         // Verify the active subscription is no longer present
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .is_empty());
+        verify_active_subscription_peers(&subscription_manager, vec![]);
     }
 
     #[tokio::test]
@@ -585,6 +670,9 @@ mod test {
         // Check the active subscription and verify that it is healthy
         check_subscription_timeout(&mut subscription_manager, connected_peer, true);
 
+        // Terminate unhealthy subscriptions and verify none are removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]);
+
         // Elapse time to simulate a timeout
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
@@ -594,16 +682,11 @@ mod test {
         // Check the active subscription and verify that it is unhealthy (the subscription timed out)
         check_subscription_timeout(&mut subscription_manager, connected_peer, false);
 
-        // Terminate the subscription
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert_eq!(terminated_subscriptions.len(), 1);
-        assert_eq!(terminated_subscriptions.first().unwrap().0, connected_peer);
+        // Terminate unhealthy subscriptions and verify the subscription was removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![connected_peer]);
 
         // Verify the active subscription is no longer present
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .is_empty());
+        verify_active_subscription_peers(&subscription_manager, vec![]);
     }
 
     #[tokio::test]
@@ -651,6 +734,9 @@ mod test {
         // Check the active subscription and verify that it is healthy
         check_subscription_optimality(&mut subscription_manager, suboptimal_peer, true);
 
+        // Terminate unhealthy subscriptions and verify none are removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]);
+
         // Elapse enough time to trigger the peer optimality check
         let mock_time_service = time_service.clone().into_mock();
         mock_time_service.advance(Duration::from_millis(
@@ -666,20 +752,89 @@ mod test {
             consensus_observer_config.subscription_refresh_interval_ms + 1,
         ));
 
-        // Terminate the subscription
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert_eq!(terminated_subscriptions.len(), 1);
-        assert_eq!(terminated_subscriptions.first().unwrap().0, suboptimal_peer);
+        // Terminate any unhealthy subscriptions and verify the subscription was removed
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![suboptimal_peer]);
 
         // Verify the active subscription is no longer present
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .is_empty());
+        verify_active_subscription_peers(&subscription_manager, vec![]);
     }
 
     #[tokio::test]
-    async fn test_terminate_unhealthy_subscriptions() {
+    #[allow(clippy::await_holding_lock)] // Required to wait on the subscription creation task
+    async fn test_spawn_subscription_creation_task() {
+        // Create a consensus observer client
+        let network_id = NetworkId::Public;
+        let (_, consensus_observer_client) = create_consensus_observer_client(&[network_id]);
+
+        // Create a new subscription manager
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let db_reader = create_mock_db_reader();
+        let time_service = TimeService::mock();
+        let mut subscription_manager = SubscriptionManager::new(
+            consensus_observer_client,
+            consensus_observer_config,
+            None,
+            db_reader.clone(),
+            time_service.clone(),
+        );
+
+        // Verify that the active subscription creation task is empty
+        verify_subscription_creation_task(&subscription_manager, false);
+
+        // Spawn a subscription creation task with 0 subscriptions to create
+        subscription_manager
+            .spawn_subscription_creation_task(0, vec![], vec![], hashmap![])
+            .await;
+
+        // Verify that the active subscription creation task is still empty (no task was spawned)
+        verify_subscription_creation_task(&subscription_manager, false);
+
+        // Spawn a subscription creation task with 1 subscription to create
+        subscription_manager
+            .spawn_subscription_creation_task(1, vec![], vec![], hashmap![])
+            .await;
+
+        // Verify that the active subscription creation task is now populated
+        verify_subscription_creation_task(&subscription_manager, true);
+
+        // Wait for the active subscription creation task to finish
+        if let Some(active_task) = subscription_manager
+            .active_subscription_creation_task
+            .lock()
+            .as_mut()
+        {
+            active_task.await.unwrap();
+        }
+
+        // Verify that the active subscription creation task is still present
+        verify_subscription_creation_task(&subscription_manager, true);
+
+        // Verify that the active subscription creation task is finished
+        if let Some(active_task) = subscription_manager
+            .active_subscription_creation_task
+            .lock()
+            .as_ref()
+        {
+            assert!(active_task.is_finished());
+        }
+
+        // Spawn a subscription creation task with 2 subscriptions to create
+        subscription_manager
+            .spawn_subscription_creation_task(2, vec![], vec![], hashmap![])
+            .await;
+
+        // Verify the new active subscription creation task is not finished
+        if let Some(active_task) = subscription_manager
+            .active_subscription_creation_task
+            .lock()
+            .as_ref()
+        {
+            assert!(!active_task.is_finished());
+        };
+    }
+
+    #[tokio::test]
+    async fn test_terminate_unhealthy_subscriptions_multiple() {
         // Create a consensus observer client
         let network_id = NetworkId::Public;
         let (peers_and_metadata, consensus_observer_client) =
@@ -713,14 +868,8 @@ mod test {
             );
         }
 
-        // Terminate any unhealthy subscriptions and verify that both subscriptions are still healthy
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert!(terminated_subscriptions.is_empty());
-        assert_eq!(
-            subscription_manager.get_active_subscription_peers().len(),
-            2
-        );
+        // Terminate unhealthy subscriptions and verify that both subscriptions are still healthy
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![]);
 
         // Create another subscription
         let subscription_peer_3 =
@@ -739,18 +888,14 @@ mod test {
             consensus_observer_config.max_subscription_timeout_ms + 1,
         ));
 
-        // Terminate the unhealthy subscriptions and verify the first two subscriptions were terminated
-        let terminated_subscriptions =
-            terminate_any_unhealthy_subscriptions(&mut subscription_manager);
-        assert_eq!(terminated_subscriptions.len(), 2);
-        assert_eq!(subscription_manager.get_active_subscription_peers(), vec![
-            subscription_peer_3
+        // Terminate unhealthy subscriptions and verify the first two subscriptions were terminated
+        verify_terminated_unhealthy_subscriptions(&mut subscription_manager, vec![
+            subscription_peer_1,
+            subscription_peer_2,
         ]);
 
-        // Verify that both subscriptions were terminated due to a timeout
-        for (_, error) in terminated_subscriptions {
-            assert_matches!(error, Error::SubscriptionTimeout(_));
-        }
+        // Verify the third subscription is still active
+        verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_3]);
     }
 
     #[tokio::test]
@@ -771,9 +916,7 @@ mod test {
         );
 
         // Verify that no subscriptions are active
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .is_empty());
+        verify_active_subscription_peers(&subscription_manager, vec![]);
 
         // Create a new subscription
         let subscription_peer_1 = PeerNetworkId::random();
@@ -786,9 +929,7 @@ mod test {
         );
 
         // Verify the subscription is active
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .contains(&subscription_peer_1));
+        verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_1]);
 
         // Create another subscription
         let subscription_peer_2 = PeerNetworkId::random();
@@ -801,26 +942,16 @@ mod test {
         );
 
         // Verify the second subscription is active
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .contains(&subscription_peer_2));
+        verify_active_subscription_peers(&subscription_manager, vec![
+            subscription_peer_1,
+            subscription_peer_2,
+        ]);
 
         // Unsubscribe from the first peer
         subscription_manager.unsubscribe_from_peer(subscription_peer_1);
 
         // Verify that the first subscription is no longer active
-        assert!(!subscription_manager
-            .get_active_subscription_peers()
-            .contains(&subscription_peer_1));
-
-        // Verify that only the second subscription is still active
-        assert!(subscription_manager
-            .get_active_subscription_peers()
-            .contains(&subscription_peer_2));
-        assert_eq!(
-            subscription_manager.get_active_subscription_peers().len(),
-            1
-        );
+        verify_active_subscription_peers(&subscription_manager, vec![subscription_peer_2]);
     }
 
     #[tokio::test]
@@ -1077,14 +1208,66 @@ mod test {
         peer_network_id
     }
 
-    /// A simple helper method that terminates any unhealthy subscriptions
-    fn terminate_any_unhealthy_subscriptions(
+    /// Removes the peer and connection metadata for the given peer
+    fn remove_peer_and_connection(
+        peers_and_metadata: Arc<PeersAndMetadata>,
+        peer_network_id: PeerNetworkId,
+    ) {
+        let peer_metadata = peers_and_metadata
+            .get_metadata_for_peer(peer_network_id)
+            .unwrap();
+        let connection_id = peer_metadata.get_connection_metadata().connection_id;
+        peers_and_metadata
+            .remove_peer_metadata(peer_network_id, connection_id)
+            .unwrap();
+    }
+
+    /// Verifies the active subscription peers
+    fn verify_active_subscription_peers(
+        subscription_manager: &SubscriptionManager,
+        expected_active_peers: Vec<PeerNetworkId>,
+    ) {
+        // Get the active subscription peers
+        let active_peers = subscription_manager.get_active_subscription_peers();
+
+        // Verify the active subscription peers
+        for peer in &expected_active_peers {
+            assert!(active_peers.contains(peer));
+        }
+        assert_eq!(active_peers.len(), expected_active_peers.len());
+    }
+
+    /// Verifies the status of the active subscription creation task
+    fn verify_subscription_creation_task(
+        subscription_manager: &SubscriptionManager,
+        expect_active_task: bool,
+    ) {
+        let current_active_task = subscription_manager
+            .active_subscription_creation_task
+            .lock()
+            .is_some();
+        assert_eq!(current_active_task, expect_active_task);
+    }
+
+    /// Verifies the list of terminated unhealthy subscriptions
+    fn verify_terminated_unhealthy_subscriptions(
         subscription_manager: &mut SubscriptionManager,
-    ) -> Vec<(PeerNetworkId, Error)> {
+        expected_terminated_peers: Vec<PeerNetworkId>,
+    ) {
         // Get the connected peers and metadata
         let connected_peers_and_metadata = subscription_manager.get_connected_peers_and_metadata();
 
         // Terminate any unhealthy subscriptions
-        subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata)
+        let terminated_subscriptions =
+            subscription_manager.terminate_unhealthy_subscriptions(&connected_peers_and_metadata);
+
+        // Verify the terminated subscriptions
+        for (terminated_subscription_peer, _) in &terminated_subscriptions {
+            assert!(expected_terminated_peers.contains(terminated_subscription_peer));
+        }
+        assert_eq!(
+            terminated_subscriptions.len(),
+            expected_terminated_peers.len()
+        );
     }
 }

From 4a87ad175d514dd1e1bd1cc766f6fffe5d065d43 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Fri, 13 Sep 2024 14:03:58 -0400
Subject: [PATCH 17/36] [Consensus Observer] Improve subscription utility unit
 tests.

---
 .../observer/subscription_utils.rs            | 391 +++++++++++++++++-
 1 file changed, 377 insertions(+), 14 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs
index 7dd5ffa9b2ace..d654af8aaf0d5 100644
--- a/consensus/src/consensus_observer/observer/subscription_utils.rs
+++ b/consensus/src/consensus_observer/observer/subscription_utils.rs
@@ -358,20 +358,227 @@ fn supports_consensus_observer(peer_metadata: &PeerMetadata) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use aptos_channels::{aptos_channel, message_queues::QueueStyle};
     use aptos_config::{config::PeerRole, network_id::NetworkId};
     use aptos_netcore::transport::ConnectionOrigin;
     use aptos_network::{
         application::storage::PeersAndMetadata,
-        protocols::wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet},
+        peer_manager::{ConnectionRequestSender, PeerManagerRequest, PeerManagerRequestSender},
+        protocols::{
+            network::{NetworkSender, NewNetworkSender},
+            wire::handshake::v1::{MessagingProtocolVersion, ProtocolIdSet},
+        },
         transport::{ConnectionId, ConnectionMetadata},
     };
     use aptos_peer_monitoring_service_types::{
         response::NetworkInformationResponse, PeerMonitoringMetadata,
     };
-    use aptos_types::{network_address::NetworkAddress, PeerId};
-    use maplit::hashmap;
+    use aptos_storage_interface::Result;
+    use aptos_types::{network_address::NetworkAddress, transaction::Version, PeerId};
+    use bytes::Bytes;
+    use futures::StreamExt;
+    use mockall::mock;
     use std::collections::HashSet;
 
+    // This is a simple mock of the DbReader (it generates a MockDatabaseReader)
+    mock! {
+        pub DatabaseReader {}
+        impl DbReader for DatabaseReader {
+            fn get_latest_ledger_info_version(&self) -> Result<Version>;
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_create_new_subscriptions() {
+        // Create a consensus observer config and client
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
+        let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) =
+            create_consensus_observer_client(network_ids);
+
+        // Create a list of connected peers (one per network)
+        let mut connected_peers = vec![];
+        for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] {
+            // Create a new peer
+            let peer_network_id = create_peer_and_connection(
+                *network_id,
+                peers_and_metadata.clone(),
+                get_distance_from_validators(network_id),
+                None,
+                true,
+            );
+
+            // Add the peer to the list of sorted peers
+            connected_peers.push(peer_network_id);
+        }
+
+        // Get the connected peers and metadata
+        let connected_peers_and_metadata = peers_and_metadata
+            .get_connected_peers_and_metadata()
+            .unwrap();
+
+        // Spawn the subscription creation task to create 2 subscriptions
+        let num_subscriptions_to_create = 2;
+        let subscription_creation_handle = tokio::spawn(async move {
+            create_new_subscriptions(
+                consensus_observer_config,
+                consensus_observer_client.clone(),
+                None,
+                Arc::new(MockDatabaseReader::new()),
+                TimeService::mock(),
+                connected_peers_and_metadata,
+                num_subscriptions_to_create,
+                vec![],
+                vec![],
+            )
+            .await
+        });
+
+        // Handle the peer manager requests made by the subscription creation task.
+        // The VFN peer should fail the subscription request.
+        for connected_peer in &connected_peers {
+            let network_id = connected_peer.network_id();
+            handle_next_subscription_request(
+                network_id,
+                &mut peer_manager_request_receivers,
+                network_id != NetworkId::Vfn, // The VFN peer should fail the subscription request
+            )
+            .await;
+        }
+
+        // Wait for the subscription creation task to complete
+        let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap();
+
+        // Verify the number of created subscriptions
+        assert_eq!(
+            consensus_observer_subscriptions.len(),
+            num_subscriptions_to_create
+        );
+
+        // Verify the created subscription peers
+        let first_peer = *connected_peers.first().unwrap();
+        let last_peer = *connected_peers.last().unwrap();
+        let expected_subscription_peers = [first_peer, last_peer];
+        for consensus_observer_subscription in consensus_observer_subscriptions {
+            let peer_network_id = consensus_observer_subscription.get_peer_network_id();
+            assert!(expected_subscription_peers.contains(&peer_network_id));
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_create_new_subscriptions_multiple() {
+        // Create a consensus observer config and client
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
+        let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) =
+            create_consensus_observer_client(network_ids);
+
+        // Create a list of connected peers (one per network)
+        let mut connected_peers = vec![];
+        for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] {
+            // Create a new peer
+            let peer_network_id = create_peer_and_connection(
+                *network_id,
+                peers_and_metadata.clone(),
+                get_distance_from_validators(network_id),
+                None,
+                true,
+            );
+
+            // Add the peer to the list of sorted peers
+            connected_peers.push(peer_network_id);
+        }
+
+        // Create multiple sets of subscriptions and verify the results
+        for num_subscriptions_to_create in [0, 1, 2, 3, 10] {
+            // Determine the expected subscription peers
+            let expected_subscription_peers = connected_peers
+                .iter()
+                .take(num_subscriptions_to_create)
+                .cloned()
+                .collect();
+
+            // Create the subscriptions and verify the result
+            create_and_verify_subscriptions(
+                consensus_observer_config,
+                peers_and_metadata.clone(),
+                consensus_observer_client.clone(),
+                &mut peer_manager_request_receivers,
+                num_subscriptions_to_create,
+                expected_subscription_peers,
+            )
+            .await;
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_create_single_subscription() {
+        // Create a consensus observer config and client
+        let consensus_observer_config = ConsensusObserverConfig::default();
+        let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
+        let (peers_and_metadata, consensus_observer_client, mut peer_manager_request_receivers) =
+            create_consensus_observer_client(network_ids);
+
+        // Create a list of connected peers (one per network)
+        let mut connected_peers = vec![];
+        for network_id in &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public] {
+            // Create a new peer
+            let peer_network_id =
+                create_peer_and_connection(*network_id, peers_and_metadata.clone(), 0, None, true);
+
+            // Add the peer to the list of sorted peers
+            connected_peers.push(peer_network_id);
+        }
+
+        // Spawn the subscription creation task
+        let sorted_potential_peers = connected_peers.clone();
+        let subscription_creation_handle = tokio::spawn(async move {
+            create_single_subscription(
+                consensus_observer_config,
+                consensus_observer_client.clone(),
+                Arc::new(MockDatabaseReader::new()),
+                sorted_potential_peers,
+                TimeService::mock(),
+            )
+            .await
+        });
+
+        // Handle the peer manager requests made by the subscription creation task.
+        // We should only respond successfully to the peer on the public network.
+        handle_next_subscription_request(
+            NetworkId::Validator,
+            &mut peer_manager_request_receivers,
+            false,
+        )
+        .await;
+        handle_next_subscription_request(
+            NetworkId::Vfn,
+            &mut peer_manager_request_receivers,
+            false,
+        )
+        .await;
+        handle_next_subscription_request(
+            NetworkId::Public,
+            &mut peer_manager_request_receivers,
+            true,
+        )
+        .await;
+
+        // Wait for the subscription creation task to complete
+        let (observer_subscription, failed_subscription_peers) =
+            subscription_creation_handle.await.unwrap();
+
+        // Verify that the public peer was successfully subscribed to
+        assert_eq!(
+            &observer_subscription.unwrap().get_peer_network_id(),
+            connected_peers.last().unwrap()
+        );
+
+        // Verify that the other peers failed our subscription attempts
+        let expected_failed_peers = connected_peers.iter().take(2).cloned().collect::<Vec<_>>();
+        assert_eq!(failed_subscription_peers, expected_failed_peers);
+    }
+
     #[test]
     fn test_sort_peers_by_distance_and_latency() {
         // Sort an empty list of peers
@@ -487,7 +694,7 @@ mod tests {
     async fn test_sort_peers_for_subscriptions() {
         // Create a consensus observer client
         let network_ids = &[NetworkId::Validator, NetworkId::Vfn, NetworkId::Public];
-        let (peers_and_metadata, consensus_observer_client) =
+        let (peers_and_metadata, consensus_observer_client, _) =
             create_consensus_observer_client(network_ids);
 
         // Create a consensus publisher
@@ -507,15 +714,10 @@ mod tests {
 
         // Add a connected validator peer, VFN peer and public peer
         for network_id in network_ids {
-            let distance_from_validators = match network_id {
-                NetworkId::Validator => 0,
-                NetworkId::Vfn => 1,
-                NetworkId::Public => 2,
-            };
             create_peer_and_connection(
                 *network_id,
                 peers_and_metadata.clone(),
-                distance_from_validators,
+                get_distance_from_validators(network_id),
                 None,
                 true,
             );
@@ -609,6 +811,64 @@ mod tests {
         assert_eq!(sorted_peers, expected_peers);
     }
 
+    /// Creates new subscriptions and verifies the results
+    async fn create_and_verify_subscriptions(
+        consensus_observer_config: ConsensusObserverConfig,
+        peers_and_metadata: Arc<PeersAndMetadata>,
+        consensus_observer_client: Arc<
+            ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>,
+        >,
+        peer_manager_request_receivers: &mut HashMap<
+            NetworkId,
+            aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>,
+        >,
+        num_subscriptions_to_create: usize,
+        expected_subscription_peers: Vec<PeerNetworkId>,
+    ) {
+        // Get the connected peers and metadata
+        let connected_peers_and_metadata = peers_and_metadata
+            .get_connected_peers_and_metadata()
+            .unwrap();
+
+        // Spawn the subscription creation task
+        let subscription_creation_handle = tokio::spawn(async move {
+            create_new_subscriptions(
+                consensus_observer_config,
+                consensus_observer_client.clone(),
+                None,
+                Arc::new(MockDatabaseReader::new()),
+                TimeService::mock(),
+                connected_peers_and_metadata,
+                num_subscriptions_to_create,
+                vec![],
+                vec![],
+            )
+            .await
+        });
+
+        // Handle the peer manager requests made by the subscription creation task
+        for expected_subscription_peer in &expected_subscription_peers {
+            handle_next_subscription_request(
+                expected_subscription_peer.network_id(),
+                peer_manager_request_receivers,
+                true,
+            )
+            .await;
+        }
+
+        // Wait for the subscription creation task to complete
+        let consensus_observer_subscriptions = subscription_creation_handle.await.unwrap();
+
+        // Verify the created subscriptions
+        assert_eq!(
+            consensus_observer_subscriptions.len(),
+            expected_subscription_peers.len()
+        );
+        for subscription in consensus_observer_subscriptions {
+            assert!(expected_subscription_peers.contains(&subscription.get_peer_network_id()));
+        }
+    }
+
     /// Creates a new connection metadata for testing
     fn create_connection_metadata(
         peer_network_id: PeerNetworkId,
@@ -636,19 +896,52 @@ mod tests {
         }
     }
 
-    /// Creates a new consensus observer client and a peers and metadata container
+    /// Creates a new consensus observer client, along with the
+    /// associated network senders and peers and metadata.
     fn create_consensus_observer_client(
         network_ids: &[NetworkId],
     ) -> (
         Arc<PeersAndMetadata>,
         Arc<ConsensusObserverClient<NetworkClient<ConsensusObserverMessage>>>,
+        HashMap<NetworkId, aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>>,
     ) {
+        // Create the network senders and receivers for each network
+        let mut network_senders = HashMap::new();
+        let mut peer_manager_request_receivers = HashMap::new();
+        for network_id in network_ids {
+            // Create the request managers
+            let queue_cfg = aptos_channel::Config::new(10).queue_style(QueueStyle::FIFO);
+            let (peer_manager_request_sender, peer_manager_request_receiver) = queue_cfg.build();
+            let (connected_request_sender, _) = queue_cfg.build();
+
+            // Create the network sender
+            let network_sender = NetworkSender::new(
+                PeerManagerRequestSender::new(peer_manager_request_sender),
+                ConnectionRequestSender::new(connected_request_sender),
+            );
+
+            // Save the network sender and the request receiver
+            network_senders.insert(*network_id, network_sender);
+            peer_manager_request_receivers.insert(*network_id, peer_manager_request_receiver);
+        }
+
+        // Create the network client
         let peers_and_metadata = PeersAndMetadata::new(network_ids);
-        let network_client =
-            NetworkClient::new(vec![], vec![], hashmap![], peers_and_metadata.clone());
+        let network_client = NetworkClient::new(
+            vec![ProtocolId::ConsensusObserver],
+            vec![ProtocolId::ConsensusObserverRpc],
+            network_senders,
+            peers_and_metadata.clone(),
+        );
+
+        // Create the consensus observer client
         let consensus_observer_client = Arc::new(ConsensusObserverClient::new(network_client));
 
-        (peers_and_metadata, consensus_observer_client)
+        (
+            peers_and_metadata,
+            consensus_observer_client,
+            peer_manager_request_receivers,
+        )
     }
 
     /// Creates a new peer with the specified connection metadata
@@ -754,6 +1047,76 @@ mod tests {
         peers_and_metadata
     }
 
+    /// Returns the distance from the validators for the specified network
+    fn get_distance_from_validators(network_id: &NetworkId) -> u64 {
+        match network_id {
+            NetworkId::Validator => 0,
+            NetworkId::Vfn => 1,
+            NetworkId::Public => 2,
+        }
+    }
+
+    /// Fetches and handles the next subscription request from the peer manager
+    async fn handle_next_subscription_request(
+        network_id: NetworkId,
+        peer_manager_request_receivers: &mut HashMap<
+            NetworkId,
+            aptos_channel::Receiver<(PeerId, ProtocolId), PeerManagerRequest>,
+        >,
+        return_successfully: bool,
+    ) {
+        // Get the request receiver for the given network
+        let peer_manager_request_receiver =
+            peer_manager_request_receivers.get_mut(&network_id).unwrap();
+
+        // Wait for the next subscription request
+        match peer_manager_request_receiver.next().await {
+            Some(PeerManagerRequest::SendRpc(_, network_request)) => {
+                // Parse the network request
+                let data = network_request.data;
+                let response_sender = network_request.res_tx;
+                let message: ConsensusObserverMessage = bcs::from_bytes(data.as_ref()).unwrap();
+
+                // Process the network message
+                match message {
+                    ConsensusObserverMessage::Request(request) => {
+                        // Verify the request is for a new subscription
+                        match request {
+                            ConsensusObserverRequest::Subscribe => (),
+                            _ => panic!(
+                                "Unexpected consensus observer request received: {:?}!",
+                                request
+                            ),
+                        }
+
+                        // Determine the response to send
+                        let response = if return_successfully {
+                            // Ack the subscription request
+                            ConsensusObserverResponse::SubscribeAck
+                        } else {
+                            // Respond with the wrong message type
+                            ConsensusObserverResponse::UnsubscribeAck
+                        };
+                        let response_message = ConsensusObserverMessage::Response(response);
+
+                        // Send the response to the peer
+                        let response_bytes =
+                            bcs::to_bytes(&response_message).map(Bytes::from).unwrap();
+                        let _ = response_sender.send(Ok(response_bytes));
+                    },
+                    _ => panic!(
+                        "Unexpected consensus observer message type received: {:?}!",
+                        message
+                    ),
+                }
+            },
+            Some(PeerManagerRequest::SendDirectSend(_, _)) => {
+                panic!("Unexpected direct send message received!")
+            },
+            None => panic!("No subscription request received!"),
+        }
+    }
+
     /// Removes the peer and connection metadata for the given peer
     fn remove_peer_and_connection(
         peers_and_metadata: Arc<PeersAndMetadata>,

From f92c74969eae70e5e23dfef4b6f49db675849e64 Mon Sep 17 00:00:00 2001
From: Wolfgang Grieskamp <wg@aptoslabs.com>
Date: Tue, 17 Sep 2024 15:17:15 -0700
Subject: [PATCH 18/36] [move-vm] Fixes to enum type implementation (#14657)

---
 aptos-move/framework/src/module_metadata.rs   | 22 +++--
 .../move-binary-format/src/check_bounds.rs    |  7 +-
 .../src/check_complexity.rs                   |  1 +
 .../src/proptest_types/types.rs               | 17 ++--
 .../src/unit_tests/mod.rs                     |  1 +
 .../src/unit_tests/variant_name_test.rs       | 81 +++++++++++++++++++
 .../src/check_duplication.rs                  | 67 ++++++++++++++-
 .../move/move-bytecode-verifier/src/limits.rs | 18 ++++-
 .../src/signature_v2.rs                       | 26 ++++--
 .../move-bytecode-verifier/src/verifier.rs    | 13 ++-
 10 files changed, 228 insertions(+), 25 deletions(-)
 create mode 100644 third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs

diff --git a/aptos-move/framework/src/module_metadata.rs b/aptos-move/framework/src/module_metadata.rs
index e0dc1d36b4fa6..18a6178e23bc6 100644
--- a/aptos-move/framework/src/module_metadata.rs
+++ b/aptos-move/framework/src/module_metadata.rs
@@ -624,11 +624,23 @@ fn check_module_complexity(module: &CompiledModule) -> Result<(), MetaDataValida
         check_ident_complexity(module, &mut meter, handle.name)?;
     }
     for def in module.struct_defs() {
-        if let StructFieldInformation::Declared(fields) = &def.field_information {
-            for field in fields {
-                check_ident_complexity(module, &mut meter, field.name)?;
-                check_sigtok_complexity(module, &mut meter, &field.signature.0)?
-            }
+        match &def.field_information {
+            StructFieldInformation::Native => {},
+            StructFieldInformation::Declared(fields) => {
+                for field in fields {
+                    check_ident_complexity(module, &mut meter, field.name)?;
+                    check_sigtok_complexity(module, &mut meter, &field.signature.0)?
+                }
+            },
+            StructFieldInformation::DeclaredVariants(variants) => {
+                for variant in variants {
+                    check_ident_complexity(module, &mut meter, variant.name)?;
+                    for field in &variant.fields {
+                        check_ident_complexity(module, &mut meter, field.name)?;
+                        check_sigtok_complexity(module, &mut meter, &field.signature.0)?
+                    }
+                }
+            },
         }
     }
     for def in module.function_defs() {
diff --git a/third_party/move/move-binary-format/src/check_bounds.rs b/third_party/move/move-binary-format/src/check_bounds.rs
index a3629a450af90..cee325ef32f87 100644
--- a/third_party/move/move-binary-format/src/check_bounds.rs
+++ b/third_party/move/move-binary-format/src/check_bounds.rs
@@ -385,8 +385,11 @@ impl<'a> BoundsChecker<'a> {
                 }
             },
             StructFieldInformation::DeclaredVariants(variants) => {
-                for field in variants.iter().flat_map(|v| v.fields.iter()) {
-                    self.check_field_def(type_param_count, field)?;
+                for variant in variants {
+                    check_bounds_impl(self.view.identifiers(), variant.name)?;
+                    for field in &variant.fields {
+                        self.check_field_def(type_param_count, field)?;
+                    }
                 }
                 if variants.is_empty() {
                     // Empty variants are not allowed
diff --git a/third_party/move/move-binary-format/src/check_complexity.rs b/third_party/move/move-binary-format/src/check_complexity.rs
index 79ccc6b48bfc3..232d530404cc9 100644
--- a/third_party/move/move-binary-format/src/check_complexity.rs
+++ b/third_party/move/move-binary-format/src/check_complexity.rs
@@ -244,6 +244,7 @@ impl<'a> BinaryComplexityMeter<'a> {
                 },
                 StructFieldInformation::DeclaredVariants(variants) => {
                     for variant in variants {
+                        self.meter_identifier(variant.name)?;
                         for field in &variant.fields {
                             self.charge(field.signature.0.num_nodes() as u64)?;
                         }
diff --git a/third_party/move/move-binary-format/src/proptest_types/types.rs b/third_party/move/move-binary-format/src/proptest_types/types.rs
index 03f5a4f7544c3..566d45809a735 100644
--- a/third_party/move/move-binary-format/src/proptest_types/types.rs
+++ b/third_party/move/move-binary-format/src/proptest_types/types.rs
@@ -230,15 +230,22 @@ impl StructDefinitionGen {
                     for (i, fd) in fields.into_iter().enumerate() {
                         variant_fields[i % self.variants.len()].push(fd)
                     }
+                    let mut seen_names = BTreeSet::new();
                     StructFieldInformation::DeclaredVariants(
                         variant_fields
                             .into_iter()
                             .zip(self.variants.iter())
-                            .map(|(fields, name)| VariantDefinition {
-                                name: IdentifierIndex(
-                                    name.index(state.identifiers_len) as TableIndex
-                                ),
-                                fields,
+                            .filter_map(|(fields, name)| {
+                                let variant_name = name.index(state.identifiers_len) as TableIndex;
+                                // avoid duplicates
+                                if seen_names.insert(variant_name) {
+                                    Some(VariantDefinition {
+                                        name: IdentifierIndex(variant_name),
+                                        fields,
+                                    })
+                                } else {
+                                    None
+                                }
                             })
                             .collect(),
                     )
diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs
index 8f0d3704aa5ae..0540045fb8b43 100644
--- a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs
+++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/mod.rs
@@ -22,4 +22,5 @@ pub mod negative_stack_size_tests;
 pub mod reference_safety_tests;
 pub mod signature_tests;
 pub mod struct_defs_tests;
+pub mod variant_name_test;
 pub mod vec_pack_tests;
diff --git a/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs
new file mode 100644
index 0000000000000..fd936241cd1af
--- /dev/null
+++ b/third_party/move/move-bytecode-verifier/bytecode-verifier-tests/src/unit_tests/variant_name_test.rs
@@ -0,0 +1,81 @@
+// Copyright (c) The Move Contributors
+// SPDX-License-Identifier: Apache-2.0
+
+use move_binary_format::{
+    file_format::{
+        AbilitySet, AddressIdentifierIndex, FieldDefinition, IdentifierIndex, ModuleHandle,
+        ModuleHandleIndex, Signature, SignatureToken, StructDefinition, StructFieldInformation,
+        StructHandle, StructHandleIndex, StructTypeParameter, TypeSignature, VariantDefinition,
+    },
+    file_format_common::VERSION_7,
+    CompiledModule,
+};
+use move_bytecode_verifier::{
+    verifier::verify_module_with_config_for_test_with_version, VerifierConfig,
+};
+use move_core_types::{identifier::Identifier, vm_status::StatusCode};
+
+/// Tests whether the name of a variant is in bounds. (That is, the IdentifierIndex
+/// is in bounds of the identifier table.)
+#[test]
+fn test_variant_name() {
+    // This is a POC produced during auditing
+    let ty = SignatureToken::Bool;
+
+    let cm = CompiledModule {
+        version: 7,
+        self_module_handle_idx: ModuleHandleIndex(0),
+        module_handles: vec![ModuleHandle {
+            address: AddressIdentifierIndex(0),
+            name: IdentifierIndex(0),
+        }],
+        struct_handles: vec![StructHandle {
+            module: ModuleHandleIndex(0),
+            name: IdentifierIndex(0),
+            abilities: AbilitySet::ALL,
+            type_parameters: vec![StructTypeParameter {
+                constraints: AbilitySet::EMPTY,
+                is_phantom: true,
+            }],
+        }],
+        function_handles: vec![],
+        field_handles: vec![],
+        friend_decls: vec![],
+        struct_def_instantiations: vec![],
+        function_instantiations: vec![],
+        field_instantiations: vec![],
+        signatures: vec![Signature(vec![]), Signature(vec![ty])],
+        identifiers: vec![Identifier::new("M").unwrap()],
+        address_identifiers: vec![],
+        constant_pool: vec![],
+        metadata: vec![],
+        struct_defs: vec![StructDefinition {
+            struct_handle: StructHandleIndex(0),
+            field_information: StructFieldInformation::DeclaredVariants(vec![VariantDefinition {
+                fields: vec![FieldDefinition {
+                    name: IdentifierIndex(0),
+                    signature: TypeSignature(SignatureToken::Bool),
+                }],
+                // <---- out of bound
+                name: IdentifierIndex(1),
+            }]),
+        }],
+        function_defs: vec![],
+        struct_variant_handles: vec![],
+        struct_variant_instantiations: vec![],
+        variant_field_handles: vec![],
+        variant_field_instantiations: vec![],
+    };
+
+    let result = verify_module_with_config_for_test_with_version(
+        "test_variant_name",
+        &VerifierConfig::production(),
+        &cm,
+        Some(VERSION_7),
+    );
+
+    assert_eq!(
+        result.unwrap_err().major_status(),
+        StatusCode::INDEX_OUT_OF_BOUNDS,
+    );
+}
diff --git a/third_party/move/move-bytecode-verifier/src/check_duplication.rs b/third_party/move/move-bytecode-verifier/src/check_duplication.rs
index e79f279dc021d..ad6d317c7b579 100644
--- a/third_party/move/move-bytecode-verifier/src/check_duplication.rs
+++ b/third_party/move/move-bytecode-verifier/src/check_duplication.rs
@@ -15,7 +15,7 @@ use move_binary_format::{
     file_format::{
         CompiledModule, CompiledScript, Constant, FieldDefinition, FunctionHandle,
         FunctionHandleIndex, FunctionInstantiation, ModuleHandle, Signature,
-        StructFieldInformation, StructHandle, StructHandleIndex, TableIndex,
+        StructFieldInformation, StructHandle, StructHandleIndex, TableIndex, VariantDefinition,
     },
     IndexKind,
 };
@@ -52,6 +52,10 @@ impl<'a> DuplicationChecker<'a> {
         let checker = Self { module };
         checker.check_field_handles()?;
         checker.check_field_instantiations()?;
+        checker.check_variant_field_handles()?;
+        checker.check_variant_field_instantiations()?;
+        checker.check_struct_variant_handles()?;
+        checker.check_struct_variant_instantiations()?;
         checker.check_function_definitions()?;
         checker.check_struct_definitions()?;
         checker.check_struct_instantiations()
@@ -201,6 +205,50 @@ impl<'a> DuplicationChecker<'a> {
         Ok(())
     }
 
+    fn check_variant_field_handles(&self) -> PartialVMResult<()> {
+        match Self::first_duplicate_element(self.module.variant_field_handles()) {
+            Some(idx) => Err(verification_error(
+                StatusCode::DUPLICATE_ELEMENT,
+                IndexKind::VariantFieldHandle,
+                idx,
+            )),
+            None => Ok(()),
+        }
+    }
+
+    fn check_variant_field_instantiations(&self) -> PartialVMResult<()> {
+        match Self::first_duplicate_element(self.module.variant_field_instantiations()) {
+            Some(idx) => Err(verification_error(
+                StatusCode::DUPLICATE_ELEMENT,
+                IndexKind::VariantFieldInstantiation,
+                idx,
+            )),
+            None => Ok(()),
+        }
+    }
+
+    fn check_struct_variant_handles(&self) -> PartialVMResult<()> {
+        match Self::first_duplicate_element(self.module.struct_variant_handles()) {
+            Some(idx) => Err(verification_error(
+                StatusCode::DUPLICATE_ELEMENT,
+                IndexKind::StructVariantHandle,
+                idx,
+            )),
+            None => Ok(()),
+        }
+    }
+
+    fn check_struct_variant_instantiations(&self) -> PartialVMResult<()> {
+        match Self::first_duplicate_element(self.module.struct_variant_instantiations()) {
+            Some(idx) => Err(verification_error(
+                StatusCode::DUPLICATE_ELEMENT,
+                IndexKind::StructVariantInstantiation,
+                idx,
+            )),
+            None => Ok(()),
+        }
+    }
+
     fn check_struct_definitions(&self) -> PartialVMResult<()> {
         // StructDefinition - contained StructHandle defines uniqueness
         if let Some(idx) =
@@ -212,7 +260,7 @@ impl<'a> DuplicationChecker<'a> {
                 idx,
             ));
         }
-        // Field names in structs must be unique
+        // Field names in variants and structs must be unique
         for (struct_idx, struct_def) in self.module.struct_defs().iter().enumerate() {
             match &struct_def.field_information {
                 StructFieldInformation::Native => continue,
@@ -227,6 +275,7 @@ impl<'a> DuplicationChecker<'a> {
                     Self::check_duplicate_fields(fields.iter())?
                 },
                 StructFieldInformation::DeclaredVariants(variants) => {
+                    Self::check_duplicate_variants(variants.iter())?;
                     for variant in variants {
                         Self::check_duplicate_fields(variant.fields.iter())?
                     }
@@ -278,6 +327,20 @@ impl<'a> DuplicationChecker<'a> {
         }
     }
 
+    fn check_duplicate_variants<'l>(
+        variants: impl Iterator<Item = &'l VariantDefinition>,
+    ) -> PartialVMResult<()> {
+        if let Some(idx) = Self::first_duplicate_element(variants.map(|x| x.name)) {
+            Err(verification_error(
+                StatusCode::DUPLICATE_ELEMENT,
+                IndexKind::VariantDefinition,
+                idx,
+            ))
+        } else {
+            Ok(())
+        }
+    }
+
     fn check_function_definitions(&self) -> PartialVMResult<()> {
         // FunctionDefinition - contained FunctionHandle defines uniqueness
         if let Some(idx) =
diff --git a/third_party/move/move-bytecode-verifier/src/limits.rs b/third_party/move/move-bytecode-verifier/src/limits.rs
index 8d95b0b55aa13..1fcb2436be6f2 100644
--- a/third_party/move/move-bytecode-verifier/src/limits.rs
+++ b/third_party/move/move-bytecode-verifier/src/limits.rs
@@ -97,10 +97,20 @@ impl<'a> LimitsVerifier<'a> {
         }
         if let Some(sdefs) = self.resolver.struct_defs() {
             for sdef in sdefs {
-                if let StructFieldInformation::Declared(fdefs) = &sdef.field_information {
-                    for fdef in fdefs {
-                        self.verify_type_node(config, &fdef.signature.0)?
-                    }
+                match &sdef.field_information {
+                    StructFieldInformation::Native => {},
+                    StructFieldInformation::Declared(fdefs) => {
+                        for fdef in fdefs {
+                            self.verify_type_node(config, &fdef.signature.0)?
+                        }
+                    },
+                    StructFieldInformation::DeclaredVariants(variants) => {
+                        for variant in variants {
+                            for fdef in &variant.fields {
+                                self.verify_type_node(config, &fdef.signature.0)?
+                            }
+                        }
+                    },
                 }
             }
         }
diff --git a/third_party/move/move-bytecode-verifier/src/signature_v2.rs b/third_party/move/move-bytecode-verifier/src/signature_v2.rs
index e618353a0725f..77388dec740ad 100644
--- a/third_party/move/move-bytecode-verifier/src/signature_v2.rs
+++ b/third_party/move/move-bytecode-verifier/src/signature_v2.rs
@@ -1151,14 +1151,28 @@ fn max_num_of_ty_params_or_args(resolver: BinaryIndexedView) -> usize {
 
     if let Some(struct_defs) = resolver.struct_defs() {
         for struct_def in struct_defs {
-            if let StructFieldInformation::Declared(fields) = &struct_def.field_information {
-                for field in fields {
-                    for ty in field.signature.0.preorder_traversal() {
-                        if let SignatureToken::TypeParameter(ty_param_idx) = ty {
-                            n = n.max(*ty_param_idx as usize + 1)
+            match &struct_def.field_information {
+                StructFieldInformation::Native => {},
+                StructFieldInformation::Declared(fields) => {
+                    for field in fields {
+                        for ty in field.signature.0.preorder_traversal() {
+                            if let SignatureToken::TypeParameter(ty_param_idx) = ty {
+                                n = n.max(*ty_param_idx as usize + 1)
+                            }
                         }
                     }
-                }
+                },
+                StructFieldInformation::DeclaredVariants(variants) => {
+                    for variant in variants {
+                        for field in &variant.fields {
+                            for ty in field.signature.0.preorder_traversal() {
+                                if let SignatureToken::TypeParameter(ty_param_idx) = ty {
+                                    n = n.max(*ty_param_idx as usize + 1)
+                                }
+                            }
+                        }
+                    }
+                },
             }
         }
     }
diff --git a/third_party/move/move-bytecode-verifier/src/verifier.rs b/third_party/move/move-bytecode-verifier/src/verifier.rs
index 9783d8f33e9e3..506560dacc4cf 100644
--- a/third_party/move/move-bytecode-verifier/src/verifier.rs
+++ b/third_party/move/move-bytecode-verifier/src/verifier.rs
@@ -63,10 +63,21 @@ pub fn verify_module_with_config_for_test(
     name: &str,
     config: &VerifierConfig,
     module: &CompiledModule,
+) -> VMResult<()> {
+    verify_module_with_config_for_test_with_version(name, config, module, None)
+}
+
+pub fn verify_module_with_config_for_test_with_version(
+    name: &str,
+    config: &VerifierConfig,
+    module: &CompiledModule,
+    bytecode_version: Option<u32>,
 ) -> VMResult<()> {
     const MAX_MODULE_SIZE: usize = 65355;
     let mut bytes = vec![];
-    module.serialize(&mut bytes).unwrap();
+    module
+        .serialize_for_version(bytecode_version, &mut bytes)
+        .unwrap();
     let now = Instant::now();
     let result = verify_module_with_config(config, module);
     eprintln!(

From a0193e637dd6862662f3535b59f54171a88e6d59 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Tue, 17 Sep 2024 15:48:43 -0700
Subject: [PATCH 19/36] Sync up QC in order vote message (#14637)

---
 .../src/wrapped_ledger_info.rs                |   4 +
 consensus/src/counters.rs                     |   4 +-
 consensus/src/pending_order_votes.rs          |  94 +++++++----
 consensus/src/round_manager.rs                | 156 +++++++++++-------
 4 files changed, 170 insertions(+), 88 deletions(-)

diff --git a/consensus/consensus-types/src/wrapped_ledger_info.rs b/consensus/consensus-types/src/wrapped_ledger_info.rs
index 6125f85ca2c94..ee254af17304b 100644
--- a/consensus/consensus-types/src/wrapped_ledger_info.rs
+++ b/consensus/consensus-types/src/wrapped_ledger_info.rs
@@ -77,6 +77,10 @@ impl WrappedLedgerInfo {
         &self.signed_ledger_info
     }
 
+    pub fn epoch(&self) -> u64 {
+        self.ledger_info().ledger_info().epoch()
+    }
+
     pub fn commit_info(&self) -> &BlockInfo {
         self.ledger_info().ledger_info().commit_info()
     }
diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs
index 214506e6f92bc..1af6f4f8c6da1 100644
--- a/consensus/src/counters.rs
+++ b/consensus/src/counters.rs
@@ -662,9 +662,9 @@ pub static ORDER_VOTE_ADDED: Lazy<IntCounter> = Lazy::new(|| {
     .unwrap()
 });
 
-pub static ORDER_VOTE_VERY_OLD: Lazy<IntCounter> = Lazy::new(|| {
+pub static ORDER_VOTE_NOT_IN_RANGE: Lazy<IntCounter> = Lazy::new(|| {
     register_int_counter!(
-        "aptos_consensus_order_vote_very_old",
+        "aptos_consensus_order_vote_not_in_range",
         "Count of the number of order votes that are very old"
     )
     .unwrap()
diff --git a/consensus/src/pending_order_votes.rs b/consensus/src/pending_order_votes.rs
index 7420b565ce3c1..94b1ba6d15451 100644
--- a/consensus/src/pending_order_votes.rs
+++ b/consensus/src/pending_order_votes.rs
@@ -2,7 +2,7 @@
 // Parts of the project are originally copyright © Meta Platforms, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use aptos_consensus_types::{common::Author, order_vote::OrderVote};
+use aptos_consensus_types::{common::Author, order_vote::OrderVote, quorum_cert::QuorumCert};
 use aptos_crypto::{hash::CryptoHash, HashValue};
 use aptos_logger::prelude::*;
 use aptos_types::{
@@ -10,7 +10,7 @@ use aptos_types::{
     ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures},
     validator_verifier::{ValidatorVerifier, VerifyError},
 };
-use std::collections::HashMap;
+use std::{collections::HashMap, sync::Arc};
 
 /// Result of the order vote processing. The failure case (Verification error) is returned
 /// as the Error part of the result.
@@ -20,7 +20,8 @@ pub enum OrderVoteReceptionResult {
     /// QC currently has.
     VoteAdded(u128),
     /// This block has just been certified after adding the vote.
-    NewLedgerInfoWithSignatures(LedgerInfoWithSignatures),
+    /// Returns the created order certificate and the QC on which the order certificate is based.
+    NewLedgerInfoWithSignatures((Arc<QuorumCert>, LedgerInfoWithSignatures)),
     /// There might be some issues adding a vote
     ErrorAddingVote(VerifyError),
     /// Error happens when aggregating signature
@@ -39,7 +40,9 @@ enum OrderVoteStatus {
 pub struct PendingOrderVotes {
     /// Maps LedgerInfo digest to associated signatures (contained in a partial LedgerInfoWithSignatures).
     /// Order vote status stores caches the information on whether the votes are enough to form a QC.
-    li_digest_to_votes: HashMap<HashValue /* LedgerInfo digest */, OrderVoteStatus>,
+    /// We also store the QC that the order votes certify.
+    li_digest_to_votes:
+        HashMap<HashValue /* LedgerInfo digest */, (QuorumCert, OrderVoteStatus)>,
 }
 
 impl PendingOrderVotes {
@@ -50,29 +53,42 @@ impl PendingOrderVotes {
         }
     }
 
+    pub fn exists(&self, li_digest: &HashValue) -> bool {
+        self.li_digest_to_votes.contains_key(li_digest)
+    }
+
     /// Add a vote to the pending votes
     // TODO: Should we add any counters here?
     pub fn insert_order_vote(
         &mut self,
         order_vote: &OrderVote,
         validator_verifier: &ValidatorVerifier,
+        verified_quorum_cert: Option<QuorumCert>,
     ) -> OrderVoteReceptionResult {
         // derive data from order vote
         let li_digest = order_vote.ledger_info().hash();
 
         // obtain the ledger info with signatures associated to the order vote's ledger info
-        let status = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| {
+        let (quorum_cert, status) = self.li_digest_to_votes.entry(li_digest).or_insert_with(|| {
             // if the ledger info with signatures doesn't exist yet, create it
-            OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new(
-                order_vote.ledger_info().clone(),
-                PartialSignatures::empty(),
-            ))
+            (
+                verified_quorum_cert.expect(
+                    "Quorum Cert is expected when creating a new entry in pending order votes",
+                ),
+                OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new(
+                    order_vote.ledger_info().clone(),
+                    PartialSignatures::empty(),
+                )),
+            )
         });
 
         match status {
             OrderVoteStatus::EnoughVotes(li_with_sig) => {
                 // we already have enough votes for this ledger info
-                OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig.clone())
+                OrderVoteReceptionResult::NewLedgerInfoWithSignatures((
+                    Arc::new(quorum_cert.clone()),
+                    li_with_sig.clone(),
+                ))
             },
             OrderVoteStatus::NotEnoughVotes(li_with_sig) => {
                 // we don't have enough votes for this ledger info yet
@@ -107,9 +123,10 @@ impl PendingOrderVotes {
                             Ok(ledger_info_with_sig) => {
                                 *status =
                                     OrderVoteStatus::EnoughVotes(ledger_info_with_sig.clone());
-                                OrderVoteReceptionResult::NewLedgerInfoWithSignatures(
+                                OrderVoteReceptionResult::NewLedgerInfoWithSignatures((
+                                    Arc::new(quorum_cert.clone()),
                                     ledger_info_with_sig,
-                                )
+                                ))
                             },
                             Err(e) => OrderVoteReceptionResult::ErrorAggregatingSignature(e),
                         }
@@ -135,19 +152,21 @@ impl PendingOrderVotes {
 
     // Removes votes older than highest_ordered_round
     pub fn garbage_collect(&mut self, highest_ordered_round: u64) {
-        self.li_digest_to_votes.retain(|_, status| match status {
-            OrderVoteStatus::EnoughVotes(li_with_sig) => {
-                li_with_sig.ledger_info().round() > highest_ordered_round
-            },
-            OrderVoteStatus::NotEnoughVotes(li_with_sig) => {
-                li_with_sig.ledger_info().round() > highest_ordered_round
-            },
-        });
+        self.li_digest_to_votes
+            .retain(|_, (_, status)| match status {
+                OrderVoteStatus::EnoughVotes(li_with_sig) => {
+                    li_with_sig.ledger_info().round() > highest_ordered_round
+                },
+                OrderVoteStatus::NotEnoughVotes(li_with_sig) => {
+                    li_with_sig.ledger_info().round() > highest_ordered_round
+                },
+            });
     }
 
     pub fn has_enough_order_votes(&self, ledger_info: &LedgerInfo) -> bool {
         let li_digest = ledger_info.hash();
-        if let Some(OrderVoteStatus::EnoughVotes(_)) = self.li_digest_to_votes.get(&li_digest) {
+        if let Some((_, OrderVoteStatus::EnoughVotes(_))) = self.li_digest_to_votes.get(&li_digest)
+        {
             return true;
         }
         false
@@ -157,7 +176,7 @@ impl PendingOrderVotes {
 #[cfg(test)]
 mod tests {
     use super::{OrderVoteReceptionResult, PendingOrderVotes};
-    use aptos_consensus_types::order_vote::OrderVote;
+    use aptos_consensus_types::{order_vote::OrderVote, quorum_cert::QuorumCert};
     use aptos_crypto::HashValue;
     use aptos_types::{
         block_info::BlockInfo, ledger_info::LedgerInfo,
@@ -182,6 +201,7 @@ mod tests {
 
         // create random vote from validator[0]
         let li1 = random_ledger_info();
+        let qc = QuorumCert::dummy();
         let order_vote_1_author_0 = OrderVote::new_with_signature(
             signers[0].author(),
             li1.clone(),
@@ -190,13 +210,21 @@ mod tests {
 
         // first time a new order vote is added -> OrderVoteAdded
         assert_eq!(
-            pending_order_votes.insert_order_vote(&order_vote_1_author_0, &validator),
-            OrderVoteReceptionResult::VoteAdded(1)
+            pending_order_votes.insert_order_vote(
+                &order_vote_1_author_0,
+                &validator,
+                Some(qc.clone())
+            ),
+            OrderVoteReceptionResult::VoteAdded(1),
         );
 
         // same author voting for the same thing -> OrderVoteAdded
         assert_eq!(
-            pending_order_votes.insert_order_vote(&order_vote_1_author_0, &validator),
+            pending_order_votes.insert_order_vote(
+                &order_vote_1_author_0,
+                &validator,
+                Some(qc.clone())
+            ),
             OrderVoteReceptionResult::VoteAdded(1)
         );
 
@@ -208,8 +236,12 @@ mod tests {
             signers[1].sign(&li2).expect("Unable to sign ledger info"),
         );
         assert_eq!(
-            pending_order_votes.insert_order_vote(&order_vote_2_author_1, &validator),
-            OrderVoteReceptionResult::VoteAdded(1)
+            pending_order_votes.insert_order_vote(
+                &order_vote_2_author_1,
+                &validator,
+                Some(qc.clone())
+            ),
+            OrderVoteReceptionResult::VoteAdded(1),
         );
 
         assert!(!pending_order_votes.has_enough_order_votes(&li1));
@@ -220,8 +252,12 @@ mod tests {
             li2.clone(),
             signers[2].sign(&li2).expect("Unable to sign ledger info"),
         );
-        match pending_order_votes.insert_order_vote(&order_vote_2_author_2, &validator) {
-            OrderVoteReceptionResult::NewLedgerInfoWithSignatures(li_with_sig) => {
+        match pending_order_votes.insert_order_vote(
+            &order_vote_2_author_2,
+            &validator,
+            Some(qc.clone()),
+        ) {
+            OrderVoteReceptionResult::NewLedgerInfoWithSignatures((_, li_with_sig)) => {
                 assert!(li_with_sig.check_voting_power(&validator).is_ok());
             },
             _ => {
diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs
index 27382493cc2eb..bd7be5172d775 100644
--- a/consensus/src/round_manager.rs
+++ b/consensus/src/round_manager.rs
@@ -9,8 +9,8 @@ use crate::{
     },
     counters::{
         self, ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE, ORDER_VOTE_ADDED,
-        ORDER_VOTE_BROADCASTED, ORDER_VOTE_OTHER_ERRORS, ORDER_VOTE_VERY_OLD, PROPOSAL_VOTE_ADDED,
-        PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT,
+        ORDER_VOTE_BROADCASTED, ORDER_VOTE_NOT_IN_RANGE, ORDER_VOTE_OTHER_ERRORS,
+        PROPOSAL_VOTE_ADDED, PROPOSAL_VOTE_BROADCASTED, PROPOSED_VTXN_BYTES, PROPOSED_VTXN_COUNT,
         QC_AGGREGATED_FROM_VOTES, SYNC_INFO_RECEIVED_WITH_NEWER_CERT,
     },
     error::{error_kind, VerifyError},
@@ -51,7 +51,7 @@ use aptos_consensus_types::{
     vote_msg::VoteMsg,
     wrapped_ledger_info::WrappedLedgerInfo,
 };
-use aptos_crypto::HashValue;
+use aptos_crypto::{hash::CryptoHash, HashValue};
 use aptos_infallible::{checked, Mutex};
 use aptos_logger::prelude::*;
 #[cfg(test)]
@@ -1090,8 +1090,6 @@ impl RoundManager {
             });
 
             let order_vote = order_vote_msg.order_vote();
-            self.new_qc_from_order_vote_msg(&order_vote_msg).await?;
-
             debug!(
                 self.new_log(LogEvent::ReceiveOrderVote)
                     .remote_peer(order_vote.author()),
@@ -1107,26 +1105,53 @@ impl RoundManager {
                 return Ok(());
             }
 
-            if order_vote_msg.order_vote().ledger_info().round()
-                > self.block_store.sync_info().highest_ordered_round()
+            let highest_ordered_round = self.block_store.sync_info().highest_ordered_round();
+            let order_vote_round = order_vote_msg.order_vote().ledger_info().round();
+            let li_digest = order_vote_msg.order_vote().ledger_info().hash();
+            if order_vote_round > highest_ordered_round
+                && order_vote_round < highest_ordered_round + 100
             {
-                let vote_reception_result = self
-                    .pending_order_votes
-                    .insert_order_vote(order_vote_msg.order_vote(), &self.epoch_state.verifier);
-                self.process_order_vote_reception_result(vote_reception_result)
-                    .await?;
+                // If it is the first order vote received for the block, verify the QC and insert along with QC.
+                // For the subsequent order votes for the same block, we don't have to verify the QC. Just inserting the
+                // order vote is enough.
+                let vote_reception_result = if !self.pending_order_votes.exists(&li_digest) {
+                    let start = Instant::now();
+                    order_vote_msg
+                        .quorum_cert()
+                        .verify(&self.epoch_state().verifier)
+                        .context("[OrderVoteMsg QuorumCert verification failed")?;
+                    counters::VERIFY_MSG
+                        .with_label_values(&["order_vote_qc"])
+                        .observe(start.elapsed().as_secs_f64());
+                    self.pending_order_votes.insert_order_vote(
+                        order_vote_msg.order_vote(),
+                        &self.epoch_state.verifier,
+                        Some(order_vote_msg.quorum_cert().clone()),
+                    )
+                } else {
+                    self.pending_order_votes.insert_order_vote(
+                        order_vote_msg.order_vote(),
+                        &self.epoch_state.verifier,
+                        None,
+                    )
+                };
+                self.process_order_vote_reception_result(
+                    vote_reception_result,
+                    order_vote_msg.order_vote().author(),
+                )
+                .await?;
             } else {
-                ORDER_VOTE_VERY_OLD.inc();
+                ORDER_VOTE_NOT_IN_RANGE.inc();
                 sample!(
-                    SampleRate::Duration(Duration::from_secs(30)),
+                    SampleRate::Duration(Duration::from_secs(1)),
                     info!(
-                        "[sampled] Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}",
+                        "[sampled] Received an order vote not in the 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}",
                         order_vote_msg.order_vote().ledger_info().round(),
                         self.block_store.sync_info().highest_ordered_round()
                     )
                 );
                 debug!(
-                    "Received old order vote. Order vote round: {:?}, Highest ordered round: {:?}",
+                    "Received an order vote not in the next 100 rounds. Order vote round: {:?}, Highest ordered round: {:?}",
                     order_vote_msg.order_vote().ledger_info().round(),
                     self.block_store.sync_info().highest_ordered_round()
                 )
@@ -1315,13 +1340,18 @@ impl RoundManager {
     async fn process_order_vote_reception_result(
         &mut self,
         result: OrderVoteReceptionResult,
+        preferred_peer: Author,
     ) -> anyhow::Result<()> {
         match result {
-            OrderVoteReceptionResult::NewLedgerInfoWithSignatures(ledger_info_with_signatures) => {
-                self.new_ordered_cert(WrappedLedgerInfo::new(
-                    VoteData::dummy(),
-                    ledger_info_with_signatures,
-                ))
+            OrderVoteReceptionResult::NewLedgerInfoWithSignatures((
+                verified_qc,
+                ledger_info_with_signatures,
+            )) => {
+                self.new_ordered_cert(
+                    WrappedLedgerInfo::new(VoteData::dummy(), ledger_info_with_signatures),
+                    verified_qc,
+                    preferred_peer,
+                )
                 .await
             },
             OrderVoteReceptionResult::VoteAdded(_) => {
@@ -1351,49 +1381,61 @@ impl RoundManager {
 
     async fn new_qc_from_order_vote_msg(
         &mut self,
-        order_vote_msg: &OrderVoteMsg,
+        verified_qc: Arc<QuorumCert>,
+        preferred_peer: Author,
     ) -> anyhow::Result<()> {
-        if let NeedFetchResult::QCAlreadyExist = self
+        match self
             .block_store
-            .need_fetch_for_quorum_cert(order_vote_msg.quorum_cert())
+            .need_fetch_for_quorum_cert(verified_qc.as_ref())
         {
-            return Ok(());
+            NeedFetchResult::QCAlreadyExist => Ok(()),
+            NeedFetchResult::QCBlockExist => {
+                // If the block is already in the block store, but QC isn't available in the block store, insert QC.
+                let result = self
+                    .block_store
+                    .insert_quorum_cert(
+                        verified_qc.as_ref(),
+                        &mut self.create_block_retriever(preferred_peer),
+                    )
+                    .await
+                    .context("[RoundManager] Failed to process the QC from order vote msg");
+                self.process_certificates().await?;
+                result
+            },
+            NeedFetchResult::NeedFetch => {
+                // If the block doesn't exist, we could ideally do sync up based on the qc.
+                // But this could trigger fetching a lot of past blocks in case the node is lagging behind.
+                // So, we just log a warning here to avoid a long sequence of block fetchs.
+                // One of the subsequence syncinfo messages will trigger the block fetch or state sync if required.
+                ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc();
+                sample!(
+                    SampleRate::Duration(Duration::from_millis(200)),
+                    info!(
+                        "Ordered certificate created without block in block store: {:?}",
+                        verified_qc.certified_block()
+                    );
+                );
+                Err(anyhow::anyhow!(
+                    "Ordered certificate created without block in block store"
+                ))
+            },
+            NeedFetchResult::QCRoundBeforeRoot => {
+                Err(anyhow::anyhow!("Ordered certificate is old"))
+            },
         }
-
-        let start = Instant::now();
-        order_vote_msg
-            .quorum_cert()
-            .verify(&self.epoch_state().verifier)
-            .context("[OrderVoteMsg QuorumCert verification failed")?;
-        counters::VERIFY_MSG
-            .with_label_values(&["order_vote_qc"])
-            .observe(start.elapsed().as_secs_f64());
-
-        let result = self
-            .block_store
-            .insert_quorum_cert(
-                order_vote_msg.quorum_cert(),
-                &mut self.create_block_retriever(order_vote_msg.order_vote().author()),
-            )
-            .await
-            .context("[RoundManager] Failed to process the QC from order vote msg");
-        self.process_certificates().await?;
-        result
     }
 
     // Insert ordered certificate formed by aggregating order votes
-    async fn new_ordered_cert(&mut self, ordered_cert: WrappedLedgerInfo) -> anyhow::Result<()> {
-        if self
-            .block_store
-            .get_block(ordered_cert.commit_info().id())
-            .is_none()
-        {
-            ORDER_CERT_CREATED_WITHOUT_BLOCK_IN_BLOCK_STORE.inc();
-            error!(
-                "Ordered certificate created without block in block store: {:?}",
-                ordered_cert
-            );
-        }
+    async fn new_ordered_cert(
+        &mut self,
+        ordered_cert: WrappedLedgerInfo,
+        verified_qc: Arc<QuorumCert>,
+        preferred_peer: Author,
+    ) -> anyhow::Result<()> {
+        self.new_qc_from_order_vote_msg(verified_qc, preferred_peer)
+            .await?;
+
+        // If the block and qc now exist in the quorum store, insert the ordered cert
         let result = self
             .block_store
             .insert_ordered_cert(&ordered_cert)

From 3f920cba65f5146f68a75eb952dce848bad83222 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Fri, 6 Sep 2024 19:42:43 -0400
Subject: [PATCH 20/36] [Consensus Observer] Downgrade unnecessary error logs.

---
 consensus/src/consensus_observer/observer/active_state.rs | 2 +-
 consensus/src/network.rs                                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/consensus/src/consensus_observer/observer/active_state.rs b/consensus/src/consensus_observer/observer/active_state.rs
index fb5482bba3306..f162fab553e15 100644
--- a/consensus/src/consensus_observer/observer/active_state.rs
+++ b/consensus/src/consensus_observer/observer/active_state.rs
@@ -243,7 +243,7 @@ async fn extract_on_chain_configs(
     let onchain_randomness_config_seq_num: anyhow::Result<RandomnessConfigSeqNum> =
         on_chain_configs.get();
     if let Err(error) = &onchain_randomness_config_seq_num {
-        error!(
+        warn!(
             LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                 "Failed to read on-chain randomness config seq num! Error: {:?}",
                 error
diff --git a/consensus/src/network.rs b/consensus/src/network.rs
index 698e089638513..517c01fce472c 100644
--- a/consensus/src/network.rs
+++ b/consensus/src/network.rs
@@ -346,7 +346,7 @@ impl NetworkSender {
             if self.author == peer {
                 let self_msg = Event::Message(self.author, msg.clone());
                 if let Err(err) = self_sender.send(self_msg).await {
-                    error!(error = ?err, "Error delivering a self msg");
+                    warn!(error = ?err, "Error delivering a self msg");
                 }
                 continue;
             }

From ecfa13033b4e674bfdeafc39ade89c498ed683e7 Mon Sep 17 00:00:00 2001
From: Wolfgang Grieskamp <wg@aptoslabs.com>
Date: Tue, 17 Sep 2024 16:31:41 -0700
Subject: [PATCH 21/36] [compiler-v2] Disallow empty enum types (#14658)

Previously, those where compiled to empty structs which in turn lead to ZERO_STRUCT_SIZE bytecode verifier errors.
---
 .../checking/variants/variants_empty.exp      |  7 ++++++
 .../checking/variants/variants_empty.move     |  3 +++
 .../move-model/src/builder/module_builder.rs  | 23 ++++++++++++-------
 3 files changed, 25 insertions(+), 8 deletions(-)
 create mode 100644 third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp
 create mode 100644 third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move

diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp
new file mode 100644
index 0000000000000..4ebc0cefaba81
--- /dev/null
+++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.exp
@@ -0,0 +1,7 @@
+
+Diagnostics:
+error: enum type `T` must have at least one variant.
+  ┌─ tests/checking/variants/variants_empty.move:2:5
+  │
+2 │     enum T{}
+  │     ^^^^^^^^
diff --git a/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move
new file mode 100644
index 0000000000000..ceecab879f3de
--- /dev/null
+++ b/third_party/move/move-compiler-v2/tests/checking/variants/variants_empty.move
@@ -0,0 +1,3 @@
+module 0x42::variants_empty {
+    enum T{}
+}
diff --git a/third_party/move/move-model/src/builder/module_builder.rs b/third_party/move/move-model/src/builder/module_builder.rs
index c0ebb45a8c29a..7cf73ae32f664 100644
--- a/third_party/move/move-model/src/builder/module_builder.rs
+++ b/third_party/move/move-model/src/builder/module_builder.rs
@@ -1257,6 +1257,15 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> {
                         }
                     })
                     .collect_vec();
+                if variant_maps.is_empty() {
+                    self.parent.error(
+                        &self.parent.to_loc(&def.loc),
+                        &format!(
+                            "enum type `{}` must have at least one variant.",
+                            qsym.symbol.display(self.parent.env.symbol_pool())
+                        ),
+                    )
+                }
                 (StructLayout::Variants(variant_maps), false)
             },
             EA::StructLayout::Native(_) => (StructLayout::None, false),
@@ -3480,9 +3489,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> {
             let spec = self.struct_specs.remove(&name.symbol).unwrap_or_default();
             let mut field_data: BTreeMap<FieldId, FieldData> = BTreeMap::new();
             let mut variants: BTreeMap<Symbol, model::StructVariant> = BTreeMap::new();
-            match &entry.layout {
+            let is_enum = match &entry.layout {
                 StructLayout::Singleton(fields, _) => {
                     field_data.extend(fields.values().map(|f| (FieldId::new(f.name), f.clone())));
+                    false
                 },
                 StructLayout::Variants(entry_variants) => {
                     for (order, variant) in entry_variants.iter().enumerate() {
@@ -3501,9 +3511,10 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> {
                             field_data.insert(field_id, field);
                         }
                     }
+                    true
                 },
-                StructLayout::None => {},
-            }
+                StructLayout::None => false,
+            };
             let data = StructData {
                 name: name.symbol,
                 loc: entry.loc.clone(),
@@ -3513,11 +3524,7 @@ impl<'env, 'translator> ModuleBuilder<'env, 'translator> {
                 abilities: entry.abilities,
                 spec_var_opt: None,
                 field_data,
-                variants: if variants.is_empty() {
-                    None
-                } else {
-                    Some(variants)
-                },
+                variants: if is_enum { Some(variants) } else { None },
                 spec: RefCell::new(spec),
                 is_native: entry.is_native,
             };

From 3b73588de37a99fc622d311310fad7729e33b884 Mon Sep 17 00:00:00 2001
From: Guoteng Rao <3603304+grao1991@users.noreply.github.com>
Date: Tue, 17 Sep 2024 17:26:08 -0700
Subject: [PATCH 22/36] Remove an unnecessary clone. (#14659)

---
 third_party/move/tools/move-resource-viewer/src/lib.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/third_party/move/tools/move-resource-viewer/src/lib.rs b/third_party/move/tools/move-resource-viewer/src/lib.rs
index 6691bed783f3f..54ffa7f3c65ab 100644
--- a/third_party/move/tools/move-resource-viewer/src/lib.rs
+++ b/third_party/move/tools/move-resource-viewer/src/lib.rs
@@ -476,8 +476,8 @@ impl<V: CompiledModuleView> MoveValueAnnotator<V> {
             values
                 .iter()
                 .zip(tys)
-                .zip(field_names.iter())
-                .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n.clone(), v)))
+                .zip(field_names)
+                .map(|((v, ty), n)| self.annotate_value(v, ty, limit).map(|v| (n, v)))
                 .collect::<anyhow::Result<Vec<_>>>()
         };
 

From e74444909d848e5511132e06fd0ea2cf20bcea50 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Tue, 17 Sep 2024 18:29:19 -0700
Subject: [PATCH 23/36] Addressing PR comments

---
 types/Cargo.toml                |  1 +
 types/src/ledger_info.rs        | 43 ++++++++++++------------------
 types/src/validator_verifier.rs | 46 +++++++++++++++++----------------
 3 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/types/Cargo.toml b/types/Cargo.toml
index d52bec17b259b..e4b52e7e8521c 100644
--- a/types/Cargo.toml
+++ b/types/Cargo.toml
@@ -29,6 +29,7 @@ base64 = { workspace = true }
 bcs = { workspace = true }
 bytes = { workspace = true }
 dashmap = { workspace = true }
+derivative = { workspace = true }
 fixed = { workspace = true }
 fxhash = { workspace = true }
 hashbrown = { workspace = true }
diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index d704defe627ad..0a8fc80e35962 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -463,13 +463,11 @@ impl LedgerInfoWithMixedSignatures {
     }
 
     // Collecting all the authors from verified signatures, unverified signatures and the aggregated signature.
-    pub fn all_voters(&self) -> Vec<AccountAddress> {
+    pub fn all_voters(&self) -> impl Iterator<Item = &AccountAddress> {
         self.verified_signatures
             .signatures()
             .keys()
             .chain(self.unverified_signatures.signatures().keys())
-            .cloned()
-            .collect()
     }
 
     pub fn check_voting_power(
@@ -478,10 +476,7 @@ impl LedgerInfoWithMixedSignatures {
         check_super_majority: bool,
     ) -> std::result::Result<u128, VerifyError> {
         let all_voters = self.all_voters();
-        verifier.check_voting_power(
-            all_voters.iter().collect_vec().into_iter(),
-            check_super_majority,
-        )
+        verifier.check_voting_power(all_voters, check_super_majority)
     }
 
     // Aggregates all the signatures, verifies the aggregate signature, and returns the aggregate signature.
@@ -500,7 +495,6 @@ impl LedgerInfoWithMixedSignatures {
 
         match epoch_state
             .verifier
-            .clone()
             .verify_multi_signatures(self.ledger_info(), &aggregated_sig)
         {
             Ok(_) => {
@@ -515,7 +509,7 @@ impl LedgerInfoWithMixedSignatures {
                 ))
             },
             Err(_) => {
-                // Question: Should we assign min tasks per thread here for into_par_iter()?
+                // Question: How to add counters to keep track of the total time spent in the parallel threads?
                 let verified = self
                     .unverified_signatures
                     .signatures()
@@ -536,17 +530,14 @@ impl LedgerInfoWithMixedSignatures {
                         .add_signature(account_address, signature.clone());
                     self.unverified_signatures.remove_signature(account_address);
                 }
-                let malicious_authors = self
-                    .unverified_signatures
-                    .signatures()
-                    .keys()
-                    .cloned()
-                    .collect();
-                self.unverified_signatures = PartialSignatures::empty();
 
+                // For these authors, we will not use optimistic signature verification in the future.
+                let pessimistic_authors = self.unverified_signatures.signatures().keys().cloned();
                 epoch_state
                     .verifier
-                    .add_malicious_authors(malicious_authors);
+                    .add_pessimistic_verify_set(pessimistic_authors);
+
+                self.unverified_signatures = PartialSignatures::empty();
 
                 match self.check_voting_power(&epoch_state.verifier, true) {
                     Ok(_) => Ok(LedgerInfoWithSignatures::new(
@@ -736,7 +727,7 @@ mod tests {
             validator_signers[3].sign(&ledger_info).unwrap(),
         );
 
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4);
         assert_eq!(
             ledger_info_with_mixed_signatures
                 .unverified_signatures
@@ -765,7 +756,7 @@ mod tests {
             VerificationStatus::Unverified,
         );
 
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5);
         assert_eq!(
             ledger_info_with_mixed_signatures
                 .unverified_signatures
@@ -807,8 +798,8 @@ mod tests {
                 .len(),
             4
         );
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 4);
-        assert_eq!(epoch_state.verifier.malicious_authors().len(), 1);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 4);
+        assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1);
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[5].author(),
@@ -820,7 +811,7 @@ mod tests {
             validator_signers[5].sign(&ledger_info).unwrap(),
         );
 
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5);
         assert_eq!(
             ledger_info_with_mixed_signatures
                 .unverified_signatures
@@ -867,7 +858,7 @@ mod tests {
                 .len(),
             5
         );
-        assert_eq!(epoch_state.verifier.malicious_authors().len(), 1);
+        assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 1);
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[6].author(),
@@ -875,7 +866,7 @@ mod tests {
             VerificationStatus::Unverified,
         );
 
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 6);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 6);
         assert_eq!(
             ledger_info_with_mixed_signatures
                 .check_voting_power(&validator_verifier, true)
@@ -902,7 +893,7 @@ mod tests {
                 .len(),
             5
         );
-        assert_eq!(ledger_info_with_mixed_signatures.all_voters().len(), 5);
-        assert_eq!(epoch_state.verifier.malicious_authors().len(), 2);
+        assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5);
+        assert_eq!(epoch_state.verifier.pessimistic_verify_set().len(), 2);
     }
 }
diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs
index 6ca856ae7b61f..26005cf374a5a 100644
--- a/types/src/validator_verifier.rs
+++ b/types/src/validator_verifier.rs
@@ -18,6 +18,7 @@ use aptos_crypto::{
     Signature, VerifyingKey,
 };
 use dashmap::DashSet;
+use derivative::Derivative;
 use itertools::Itertools;
 #[cfg(any(test, feature = "fuzzing"))]
 use proptest_derive::Arbitrary;
@@ -130,7 +131,8 @@ impl TryFrom<ValidatorConsensusInfoMoveStruct> for ValidatorConsensusInfo {
 /// Supports validation of signatures for known authors with individual voting powers. This struct
 /// can be used for all signature verification operations including block and network signature
 /// verification, respectively.
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Derivative, Serialize)]
+#[derivative(PartialEq, Eq)]
 pub struct ValidatorVerifier {
     /// A vector of each validator's on-chain account address to its pubkeys and voting power.
     validator_infos: Vec<ValidatorConsensusInfo>,
@@ -149,20 +151,21 @@ pub struct ValidatorVerifier {
     /// submitted bad votes that has resulted in having to verify each vote individually. Further votes by these validators
     /// will be verified individually bypassing the optimization.
     #[serde(skip)]
-    malicious_authors: Arc<DashSet<AccountAddress>>,
+    #[derivative(PartialEq = "ignore")]
+    pessimistic_verify_set: Arc<DashSet<AccountAddress>>,
 }
 
-// Implement Eq and PartialEq for ValidatorVerifier. Skip malicious_authors field in the comparison.
-impl PartialEq for ValidatorVerifier {
-    fn eq(&self, other: &Self) -> bool {
-        self.validator_infos == other.validator_infos
-            && self.quorum_voting_power == other.quorum_voting_power
-            && self.total_voting_power == other.total_voting_power
-            && self.address_to_validator_index == other.address_to_validator_index
-    }
-}
+// // Implement Eq and PartialEq for ValidatorVerifier. Skip pessimistic_verify_set field in the comparison.
+// impl PartialEq for ValidatorVerifier {
+//     fn eq(&self, other: &Self) -> bool {
+//         self.validator_infos == other.validator_infos
+//             && self.quorum_voting_power == other.quorum_voting_power
+//             && self.total_voting_power == other.total_voting_power
+//             && self.address_to_validator_index == other.address_to_validator_index
+//     }
+// }
 
-impl Eq for ValidatorVerifier {}
+// impl Eq for ValidatorVerifier {}
 
 /// Reconstruct fields from the raw data upon deserialization.
 impl<'de> Deserialize<'de> for ValidatorVerifier {
@@ -200,7 +203,7 @@ impl ValidatorVerifier {
             quorum_voting_power,
             total_voting_power,
             address_to_validator_index,
-            malicious_authors: Arc::new(DashSet::new()),
+            pessimistic_verify_set: Arc::new(DashSet::new()),
         }
     }
 
@@ -236,18 +239,17 @@ impl ValidatorVerifier {
         ))
     }
 
-    pub fn add_malicious_authors(&self, malicious_authors: Vec<AccountAddress>) {
-        for author in malicious_authors {
-            self.malicious_authors.insert(author);
+    pub fn add_pessimistic_verify_set(
+        &self,
+        pessimistic_authors: impl Iterator<Item = AccountAddress>,
+    ) {
+        for author in pessimistic_authors {
+            self.pessimistic_verify_set.insert(author);
         }
     }
 
-    pub fn malicious_authors(&self) -> Arc<DashSet<AccountAddress>> {
-        self.malicious_authors.clone()
-    }
-
-    pub fn is_malicious_author(&self, author: &AccountAddress) -> bool {
-        self.malicious_authors.contains(author)
+    pub fn pessimistic_verify_set(&self) -> Arc<DashSet<AccountAddress>> {
+        self.pessimistic_verify_set.clone()
     }
 
     /// Helper method to initialize with a single author and public key with quorum voting power 1.

From cd56587f6eca6020a72cb554f5912facd4ffc86e Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Tue, 17 Sep 2024 18:30:01 -0700
Subject: [PATCH 24/36] Addressing PR comments

---
 types/src/validator_verifier.rs | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs
index 26005cf374a5a..763db6c23c30b 100644
--- a/types/src/validator_verifier.rs
+++ b/types/src/validator_verifier.rs
@@ -155,18 +155,6 @@ pub struct ValidatorVerifier {
     pessimistic_verify_set: Arc<DashSet<AccountAddress>>,
 }
 
-// // Implement Eq and PartialEq for ValidatorVerifier. Skip pessimistic_verify_set field in the comparison.
-// impl PartialEq for ValidatorVerifier {
-//     fn eq(&self, other: &Self) -> bool {
-//         self.validator_infos == other.validator_infos
-//             && self.quorum_voting_power == other.quorum_voting_power
-//             && self.total_voting_power == other.total_voting_power
-//             && self.address_to_validator_index == other.address_to_validator_index
-//     }
-// }
-
-// impl Eq for ValidatorVerifier {}
-
 /// Reconstruct fields from the raw data upon deserialization.
 impl<'de> Deserialize<'de> for ValidatorVerifier {
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>

From 09ce976ab373c834b1a3ef4de06a743456765404 Mon Sep 17 00:00:00 2001
From: 0xbe1 <0xbetrue@gmail.com>
Date: Wed, 18 Sep 2024 16:28:03 +0800
Subject: [PATCH 25/36] fix `aptos move disassemble` help message (#14594)

---
 crates/aptos/src/move_tool/bytecode.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/aptos/src/move_tool/bytecode.rs b/crates/aptos/src/move_tool/bytecode.rs
index 7a80844d5b55f..6a662300b20db 100644
--- a/crates/aptos/src/move_tool/bytecode.rs
+++ b/crates/aptos/src/move_tool/bytecode.rs
@@ -39,7 +39,7 @@ const DECOMPILER_EXTENSION: &str = "mv.move";
 ///
 /// For example, if you want to disassemble an on-chain package `PackName` at account `0x42`:
 /// 1. Download the package with `aptos move download --account 0x42 --package PackName --bytecode`
-/// 2. Disassemble the package bytecode with `aptos disassemble --package-path PackName/bytecode_modules`
+/// 2. Disassemble the package bytecode with `aptos move disassemble --package-path PackName/bytecode_modules`
 #[derive(Debug, Parser)]
 pub struct Disassemble {
     #[clap(flatten)]

From 94966aec081c554b115ab44fbda6eb240f2f1b42 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 10:57:28 -0700
Subject: [PATCH 26/36] Cache commit votes received for future rounds in the
 buffer manager (#14570)

---
 config/src/config/consensus_config.rs         |  3 +
 consensus/src/pipeline/buffer_item.rs         |  3 +-
 consensus/src/pipeline/buffer_manager.rs      | 61 +++++++++++++++++--
 .../src/pipeline/decoupled_execution_utils.rs |  2 +
 consensus/src/pipeline/execution_client.rs    |  2 +
 .../pipeline/tests/buffer_manager_tests.rs    |  1 +
 6 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs
index bc45494f2de4c..90526afc77510 100644
--- a/config/src/config/consensus_config.rs
+++ b/config/src/config/consensus_config.rs
@@ -89,6 +89,8 @@ pub struct ConsensusConfig {
     pub rand_rb_config: ReliableBroadcastConfig,
     pub num_bounded_executor_tasks: u64,
     pub enable_pre_commit: bool,
+
+    pub max_pending_rounds_in_commit_vote_cache: u64,
 }
 
 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
@@ -354,6 +356,7 @@ impl Default for ConsensusConfig {
             },
             num_bounded_executor_tasks: 16,
             enable_pre_commit: true,
+            max_pending_rounds_in_commit_vote_cache: 100,
         }
     }
 }
diff --git a/consensus/src/pipeline/buffer_item.rs b/consensus/src/pipeline/buffer_item.rs
index f44cf291c04ae..46e92047d3d18 100644
--- a/consensus/src/pipeline/buffer_item.rs
+++ b/consensus/src/pipeline/buffer_item.rs
@@ -146,9 +146,10 @@ impl BufferItem {
         ordered_blocks: Vec<PipelinedBlock>,
         ordered_proof: LedgerInfoWithSignatures,
         callback: StateComputerCommitCallBackType,
+        unverified_signatures: PartialSignatures,
     ) -> Self {
         Self::Ordered(Box::new(OrderedItem {
-            unverified_signatures: PartialSignatures::empty(),
+            unverified_signatures,
             commit_proof: None,
             callback,
             ordered_blocks,
diff --git a/consensus/src/pipeline/buffer_manager.rs b/consensus/src/pipeline/buffer_manager.rs
index 603a246b228a0..38d5aa8578893 100644
--- a/consensus/src/pipeline/buffer_manager.rs
+++ b/consensus/src/pipeline/buffer_manager.rs
@@ -28,6 +28,7 @@ use aptos_bounded_executor::BoundedExecutor;
 use aptos_config::config::ConsensusObserverConfig;
 use aptos_consensus_types::{
     common::{Author, Round},
+    pipeline::commit_vote::CommitVote,
     pipelined_block::PipelinedBlock,
 };
 use aptos_crypto::HashValue;
@@ -37,8 +38,8 @@ use aptos_network::protocols::{rpc::error::RpcError, wire::handshake::v1::Protoc
 use aptos_reliable_broadcast::{DropGuard, ReliableBroadcast};
 use aptos_time_service::TimeService;
 use aptos_types::{
-    account_address::AccountAddress, epoch_change::EpochChangeProof, epoch_state::EpochState,
-    ledger_info::LedgerInfoWithSignatures,
+    account_address::AccountAddress, aggregate_signature::PartialSignatures,
+    epoch_change::EpochChangeProof, epoch_state::EpochState, ledger_info::LedgerInfoWithSignatures,
 };
 use bytes::Bytes;
 use futures::{
@@ -51,7 +52,7 @@ use futures::{
 };
 use once_cell::sync::OnceCell;
 use std::{
-    collections::BTreeMap,
+    collections::{BTreeMap, HashMap},
     sync::{
         atomic::{AtomicBool, AtomicU64, Ordering},
         Arc,
@@ -164,6 +165,11 @@ pub struct BufferManager {
     consensus_publisher: Option<Arc<ConsensusPublisher>>,
 
     pending_commit_proofs: BTreeMap<Round, LedgerInfoWithSignatures>,
+
+    max_pending_rounds_in_commit_vote_cache: u64,
+    // If the buffer manager receives a commit vote for a block that is not in buffer items, then
+    // the vote will be cached. We can cache upto max_pending_rounds_in_commit_vote_cache (100) blocks.
+    pending_commit_votes: BTreeMap<Round, HashMap<AccountAddress, CommitVote>>,
 }
 
 impl BufferManager {
@@ -194,6 +200,7 @@ impl BufferManager {
         highest_committed_round: Round,
         consensus_observer_config: ConsensusObserverConfig,
         consensus_publisher: Option<Arc<ConsensusPublisher>>,
+        max_pending_rounds_in_commit_vote_cache: u64,
     ) -> Self {
         let buffer = Buffer::<BufferItem>::new();
 
@@ -257,6 +264,9 @@ impl BufferManager {
             consensus_publisher,
 
             pending_commit_proofs: BTreeMap::new(),
+
+            max_pending_rounds_in_commit_vote_cache,
+            pending_commit_votes: BTreeMap::new(),
         }
     }
 
@@ -333,6 +343,30 @@ impl BufferManager {
         }
     }
 
+    fn try_add_pending_commit_vote(&mut self, vote: CommitVote) -> bool {
+        let block_id = vote.commit_info().id();
+        let round = vote.commit_info().round();
+
+        // Store the commit vote only if it is for one of the next 100 rounds.
+        if round > self.highest_committed_round
+            && self.highest_committed_round + self.max_pending_rounds_in_commit_vote_cache > round
+        {
+            self.pending_commit_votes
+                .entry(round)
+                .or_default()
+                .insert(vote.author(), vote);
+            true
+        } else {
+            debug!(
+                round = round,
+                highest_committed_round = self.highest_committed_round,
+                block_id = block_id,
+                "Received a commit vote not in the next 100 rounds, ignored."
+            );
+            false
+        }
+    }
+
     fn drain_pending_commit_proof_till(
         &mut self,
         round: Round,
@@ -381,7 +415,23 @@ impl BufferManager {
             .await
             .expect("Failed to send execution schedule request");
 
-        let item = BufferItem::new_ordered(ordered_blocks, ordered_proof, callback);
+        let mut unverified_signatures = PartialSignatures::empty();
+        if let Some(block) = ordered_blocks.last() {
+            if let Some(votes) = self.pending_commit_votes.remove(&block.round()) {
+                votes
+                    .values()
+                    .filter(|vote| vote.commit_info().id() == block.id())
+                    .for_each(|vote| {
+                        unverified_signatures.add_signature(vote.author(), vote.signature().clone())
+                    });
+            }
+        }
+        let item = BufferItem::new_ordered(
+            ordered_blocks,
+            ordered_proof,
+            callback,
+            unverified_signatures,
+        );
         self.buffer.push_back(item);
     }
 
@@ -741,6 +791,8 @@ impl BufferManager {
                     } else {
                         return None;
                     }
+                } else if self.try_add_pending_commit_vote(vote) {
+                    reply_ack(protocol, response_sender);
                 } else {
                     reply_nack(protocol, response_sender); // TODO: send_commit_vote() doesn't care about the response and this should be direct send not RPC
                 }
@@ -944,6 +996,7 @@ impl BufferManager {
                 },
                 Some(Ok(round)) = self.persisting_phase_rx.next() => {
                     // see where `need_backpressure()` is called.
+                    self.pending_commit_votes.retain(|rnd, _| *rnd > round);
                     self.highest_committed_round = round
                 },
                 Some(rpc_request) = verified_commit_msg_rx.next() => {
diff --git a/consensus/src/pipeline/decoupled_execution_utils.rs b/consensus/src/pipeline/decoupled_execution_utils.rs
index 039834497bce9..8178d871e7efc 100644
--- a/consensus/src/pipeline/decoupled_execution_utils.rs
+++ b/consensus/src/pipeline/decoupled_execution_utils.rs
@@ -44,6 +44,7 @@ pub fn prepare_phases_and_buffer_manager(
     highest_committed_round: u64,
     consensus_observer_config: ConsensusObserverConfig,
     consensus_publisher: Option<Arc<ConsensusPublisher>>,
+    max_pending_rounds_in_commit_vote_cache: u64,
 ) -> (
     PipelinePhase<ExecutionSchedulePhase>,
     PipelinePhase<ExecutionWaitPhase>,
@@ -134,6 +135,7 @@ pub fn prepare_phases_and_buffer_manager(
             highest_committed_round,
             consensus_observer_config,
             consensus_publisher,
+            max_pending_rounds_in_commit_vote_cache,
         ),
     )
 }
diff --git a/consensus/src/pipeline/execution_client.rs b/consensus/src/pipeline/execution_client.rs
index 9228c2dcaedc8..9d50fe08e4a3f 100644
--- a/consensus/src/pipeline/execution_client.rs
+++ b/consensus/src/pipeline/execution_client.rs
@@ -282,6 +282,8 @@ impl ExecutionProxyClient {
             highest_committed_round,
             consensus_observer_config,
             consensus_publisher,
+            self.consensus_config
+                .max_pending_rounds_in_commit_vote_cache,
         );
 
         tokio::spawn(execution_schedule_phase.start());
diff --git a/consensus/src/pipeline/tests/buffer_manager_tests.rs b/consensus/src/pipeline/tests/buffer_manager_tests.rs
index d8ca6523d1c66..9ef9ed94600cd 100644
--- a/consensus/src/pipeline/tests/buffer_manager_tests.rs
+++ b/consensus/src/pipeline/tests/buffer_manager_tests.rs
@@ -161,6 +161,7 @@ pub fn prepare_buffer_manager(
         0,
         ConsensusObserverConfig::default(),
         None,
+        100,
     );
 
     (

From 0a9d6543943313cb373c5dc7bb239cacf24ce969 Mon Sep 17 00:00:00 2001
From: Greg Nazario <greg@gnazar.io>
Date: Wed, 28 Aug 2024 10:33:47 -0700
Subject: [PATCH 27/36] [framework] Disable object burn

---
 .../framework/aptos-framework/doc/object.md   | 90 ++++++++++---------
 .../aptos-framework/sources/object.move       | 44 ++++++---
 .../aptos-framework/sources/object.spec.move  |  6 +-
 .../sources/primary_fungible_store.move       |  4 +-
 .../simple_dispatchable_token_pfs_tests.move  |  4 +-
 5 files changed, 88 insertions(+), 60 deletions(-)

diff --git a/aptos-move/framework/aptos-framework/doc/object.md b/aptos-move/framework/aptos-framework/doc/object.md
index bba128592ff31..f3dae60d94a88 100644
--- a/aptos-move/framework/aptos-framework/doc/object.md
+++ b/aptos-move/framework/aptos-framework/doc/object.md
@@ -604,6 +604,16 @@ generate_unique_address uses this for domain separation within its native implem
 
 
 
+<a id="0x1_object_EBURN_NOT_ALLOWED"></a>
+
+Objects cannot be burnt
+
+
+<pre><code><b>const</b> <a href="object.md#0x1_object_EBURN_NOT_ALLOWED">EBURN_NOT_ALLOWED</a>: u64 = 10;
+</code></pre>
+
+
+
 <a id="0x1_object_ECANNOT_DELETE"></a>
 
 The object does not allow for deletion
@@ -2130,12 +2140,13 @@ objects may have cyclic dependencies.
 
 ## Function `burn`
 
-Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed.
-This only works for objects directly owned and for simplicity does not apply to indirectly owned objects.
-Original owners can reclaim burnt objects any time in the future by calling unburn.
+Previously allowed to burn objects, has now been disabled.  Objects can still be unburnt.
 
+Please use the test only [<code>object::burn_object</code>] for testing with previously burned objects.
 
-<pre><code><b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, <a href="object.md#0x1_object">object</a>: <a href="object.md#0x1_object_Object">object::Object</a>&lt;T&gt;)
+
+<pre><code>#[deprecated]
+<b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(_owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, _object: <a href="object.md#0x1_object_Object">object::Object</a>&lt;T&gt;)
 </code></pre>
 
 
@@ -2144,12 +2155,8 @@ Original owners can reclaim burnt objects any time in the future by calling unbu
 <summary>Implementation</summary>
 
 
-<pre><code><b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, <a href="object.md#0x1_object">object</a>: <a href="object.md#0x1_object_Object">Object</a>&lt;T&gt;) <b>acquires</b> <a href="object.md#0x1_object_ObjectCore">ObjectCore</a> {
-    <b>let</b> original_owner = <a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer_address_of">signer::address_of</a>(owner);
-    <b>assert</b>!(<a href="object.md#0x1_object_is_owner">is_owner</a>(<a href="object.md#0x1_object">object</a>, original_owner), <a href="../../aptos-stdlib/../move-stdlib/doc/error.md#0x1_error_permission_denied">error::permission_denied</a>(<a href="object.md#0x1_object_ENOT_OBJECT_OWNER">ENOT_OBJECT_OWNER</a>));
-    <b>let</b> object_addr = <a href="object.md#0x1_object">object</a>.inner;
-    <b>move_to</b>(&<a href="create_signer.md#0x1_create_signer">create_signer</a>(object_addr), <a href="object.md#0x1_object_TombStone">TombStone</a> { original_owner });
-    <a href="object.md#0x1_object_transfer_raw_inner">transfer_raw_inner</a>(object_addr, <a href="object.md#0x1_object_BURN_ADDRESS">BURN_ADDRESS</a>);
+<pre><code><b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(_owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, _object: <a href="object.md#0x1_object_Object">Object</a>&lt;T&gt;) {
+    <b>abort</b> <a href="../../aptos-stdlib/../move-stdlib/doc/error.md#0x1_error_permission_denied">error::permission_denied</a>(<a href="object.md#0x1_object_EBURN_NOT_ALLOWED">EBURN_NOT_ALLOWED</a>)
 }
 </code></pre>
 
@@ -2441,6 +2448,33 @@ to determine the identity of the starting point of ownership.
 
 
 
+
+<a id="0x1_object_spec_create_object_address"></a>
+
+
+<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_object_address">spec_create_object_address</a>(source: <b>address</b>, seed: <a href="../../aptos-stdlib/../move-stdlib/doc/vector.md#0x1_vector">vector</a>&lt;u8&gt;): <b>address</b>;
+</code></pre>
+
+
+
+
+<a id="0x1_object_spec_create_user_derived_object_address"></a>
+
+
+<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_user_derived_object_address">spec_create_user_derived_object_address</a>(source: <b>address</b>, derive_from: <b>address</b>): <b>address</b>;
+</code></pre>
+
+
+
+
+<a id="0x1_object_spec_create_guid_object_address"></a>
+
+
+<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_guid_object_address">spec_create_guid_object_address</a>(source: <b>address</b>, creation_num: u64): <b>address</b>;
+</code></pre>
+
+
+
 <a id="@Specification_1_address_to_object"></a>
 
 ### Function `address_to_object`
@@ -3245,17 +3279,14 @@ to determine the identity of the starting point of ownership.
 ### Function `burn`
 
 
-<pre><code><b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, <a href="object.md#0x1_object">object</a>: <a href="object.md#0x1_object_Object">object::Object</a>&lt;T&gt;)
+<pre><code>#[deprecated]
+<b>public</b> entry <b>fun</b> <a href="object.md#0x1_object_burn">burn</a>&lt;T: key&gt;(_owner: &<a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer">signer</a>, _object: <a href="object.md#0x1_object_Object">object::Object</a>&lt;T&gt;)
 </code></pre>
 
 
 
 
-<pre><code><b>pragma</b> aborts_if_is_partial;
-<b>let</b> object_address = <a href="object.md#0x1_object">object</a>.inner;
-<b>aborts_if</b> !<b>exists</b>&lt;<a href="object.md#0x1_object_ObjectCore">ObjectCore</a>&gt;(object_address);
-<b>aborts_if</b> <a href="object.md#0x1_object_owner">owner</a>(<a href="object.md#0x1_object">object</a>) != <a href="../../aptos-stdlib/../move-stdlib/doc/signer.md#0x1_signer_address_of">signer::address_of</a>(owner);
-<b>aborts_if</b> <a href="object.md#0x1_object_is_burnt">is_burnt</a>(<a href="object.md#0x1_object">object</a>);
+<pre><code><b>aborts_if</b> <b>true</b>;
 </code></pre>
 
 
@@ -3368,31 +3399,4 @@ to determine the identity of the starting point of ownership.
 </code></pre>
 
 
-
-
-<a id="0x1_object_spec_create_object_address"></a>
-
-
-<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_object_address">spec_create_object_address</a>(source: <b>address</b>, seed: <a href="../../aptos-stdlib/../move-stdlib/doc/vector.md#0x1_vector">vector</a>&lt;u8&gt;): <b>address</b>;
-</code></pre>
-
-
-
-
-<a id="0x1_object_spec_create_user_derived_object_address"></a>
-
-
-<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_user_derived_object_address">spec_create_user_derived_object_address</a>(source: <b>address</b>, derive_from: <b>address</b>): <b>address</b>;
-</code></pre>
-
-
-
-
-<a id="0x1_object_spec_create_guid_object_address"></a>
-
-
-<pre><code><b>fun</b> <a href="object.md#0x1_object_spec_create_guid_object_address">spec_create_guid_object_address</a>(source: <b>address</b>, creation_num: u64): <b>address</b>;
-</code></pre>
-
-
 [move-book]: https://aptos.dev/move/book/SUMMARY
diff --git a/aptos-move/framework/aptos-framework/sources/object.move b/aptos-move/framework/aptos-framework/sources/object.move
index 6e809e87e8736..c03914fb7675c 100644
--- a/aptos-move/framework/aptos-framework/sources/object.move
+++ b/aptos-move/framework/aptos-framework/sources/object.move
@@ -50,6 +50,8 @@ module aptos_framework::object {
     const EOBJECT_NOT_BURNT: u64 = 8;
     /// Object is untransferable any operations that might result in a transfer are disallowed.
     const EOBJECT_NOT_TRANSFERRABLE: u64 = 9;
+    /// Objects cannot be burnt
+    const EBURN_NOT_ALLOWED: u64 = 10;
 
     /// Explicitly separate the GUID space between Object and Account to prevent accidental overlap.
     const INIT_GUID_CREATION_NUM: u64 = 0x4000000000000;
@@ -610,15 +612,12 @@ module aptos_framework::object {
         };
     }
 
-    /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed.
-    /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects.
-    /// Original owners can reclaim burnt objects any time in the future by calling unburn.
-    public entry fun burn<T: key>(owner: &signer, object: Object<T>) acquires ObjectCore {
-        let original_owner = signer::address_of(owner);
-        assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER));
-        let object_addr = object.inner;
-        move_to(&create_signer(object_addr), TombStone { original_owner });
-        transfer_raw_inner(object_addr, BURN_ADDRESS);
+    #[deprecated]
+    /// Previously allowed to burn objects, has now been disabled.  Objects can still be unburnt.
+    ///
+    /// Please use the test only [`object::burn_object`] for testing with previously burned objects.
+    public entry fun burn<T: key>(_owner: &signer, _object: Object<T>) {
+        abort error::permission_denied(EBURN_NOT_ALLOWED)
     }
 
     /// Allow origin owners to reclaim any objects they previous burnt.
@@ -705,6 +704,20 @@ module aptos_framework::object {
     #[test_only]
     const EWEAPON_DOES_NOT_EXIST: u64 = 0x101;
 
+    #[test_only]
+    /// For testing the previous behavior of `object::burn()`
+    ///
+    /// Forcefully transfer an unwanted object to BURN_ADDRESS, ignoring whether ungated_transfer is allowed.
+    /// This only works for objects directly owned and for simplicity does not apply to indirectly owned objects.
+    /// Original owners can reclaim burnt objects any time in the future by calling unburn.
+    public fun burn_object<T: key>(owner: &signer, object: Object<T>) acquires ObjectCore {
+        let original_owner = signer::address_of(owner);
+        assert!(is_owner(object, original_owner), error::permission_denied(ENOT_OBJECT_OWNER));
+        let object_addr = object.inner;
+        move_to(&create_signer(object_addr), TombStone { original_owner });
+        transfer_raw_inner(object_addr, BURN_ADDRESS);
+    }
+
     #[test_only]
     struct HeroEquipEvent has drop, store {
         weapon_id: Option<Object<Weapon>>,
@@ -820,7 +833,7 @@ module aptos_framework::object {
     #[expected_failure(abort_code = 0x10008, location = Self)]
     fun test_cannot_unburn_after_transfer_with_ref(creator: &signer) acquires ObjectCore, TombStone {
         let (hero_constructor, hero) = create_hero(creator);
-        burn(creator, hero);
+        burn_object(creator, hero);
         let transfer_ref = generate_transfer_ref(&hero_constructor);
         transfer_with_ref(generate_linear_transfer_ref(&transfer_ref), @0x456);
         unburn(creator, hero);
@@ -876,7 +889,7 @@ module aptos_framework::object {
         disable_ungated_transfer(&transfer_ref);
 
         // Owner should be able to burn, despite ungated transfer disallowed.
-        burn(creator, hero);
+        burn_object(creator, hero);
         assert!(owner(hero) == BURN_ADDRESS, 0);
         assert!(!ungated_transfer_allowed(hero), 0);
 
@@ -897,7 +910,7 @@ module aptos_framework::object {
         // Owner should be not be able to burn weapon directly.
         assert!(owner(weapon) == object_address(&hero), 0);
         assert!(owns(weapon, signer::address_of(creator)), 0);
-        burn(creator, weapon);
+        burn_object(creator, weapon);
     }
 
     #[test(creator = @0x123)]
@@ -907,6 +920,13 @@ module aptos_framework::object {
         unburn(creator, hero);
     }
 
+    #[test(creator = @0x123)]
+    #[expected_failure(abort_code = 0x5000A, location = Self)]
+    fun test_burn_should_fail(creator: &signer) acquires ObjectCore {
+        let (_, hero) = create_hero(creator);
+        burn(creator, hero);
+    }
+
     #[test_only]
     fun create_simple_object(creator: &signer, seed: vector<u8>): Object<ObjectCore> {
         object_from_constructor_ref<ObjectCore>(&create_named_object(creator, seed))
diff --git a/aptos-move/framework/aptos-framework/sources/object.spec.move b/aptos-move/framework/aptos-framework/sources/object.spec.move
index d2627d649fd61..51ae05b568368 100644
--- a/aptos-move/framework/aptos-framework/sources/object.spec.move
+++ b/aptos-move/framework/aptos-framework/sources/object.spec.move
@@ -475,7 +475,11 @@ spec aptos_framework::object {
         aborts_if !global<ObjectCore>(object_address).allow_ungated_transfer;
     }
 
-    spec burn<T: key>(owner: &signer, object: Object<T>) {
+    spec burn<T: key>(_owner: &signer, _object: Object<T>) {
+        aborts_if true;
+    }
+
+    spec burn_object<T: key>(owner: &signer, object: Object<T>) {
         pragma aborts_if_is_partial;
         let object_address = object.inner;
         aborts_if !exists<ObjectCore>(object_address);
diff --git a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move
index fc20e1cf311a6..9e39b97fa2854 100644
--- a/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move
+++ b/aptos-move/framework/aptos-framework/sources/primary_fungible_store.move
@@ -372,7 +372,7 @@ module aptos_framework::primary_fungible_store {
 
         // User 2 burns their primary store but should still be able to transfer afterward.
         let user_2_primary_store = primary_store(user_2_address, metadata);
-        object::burn(user_2, user_2_primary_store);
+        object::burn_object(user_2, user_2_primary_store);
         assert!(object::is_burnt(user_2_primary_store), 0);
         // Balance still works
         assert!(balance(user_2_address, metadata) == 80, 0);
@@ -396,7 +396,7 @@ module aptos_framework::primary_fungible_store {
 
         // User 2 burns their primary store but should still be able to withdraw afterward.
         let user_2_primary_store = primary_store(user_2_address, metadata);
-        object::burn(user_2, user_2_primary_store);
+        object::burn_object(user_2, user_2_primary_store);
         assert!(object::is_burnt(user_2_primary_store), 0);
         let coins = withdraw(user_2, metadata, 70);
         assert!(balance(user_2_address, metadata) == 10, 0);
diff --git a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move
index 1b80c489024e5..d069923a5f8ef 100644
--- a/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move
+++ b/aptos-move/framework/aptos-framework/tests/simple_dispatchable_token_pfs_tests.move
@@ -28,7 +28,7 @@ module aptos_framework::simple_token_pfs_tests {
 
         // User 2 burns their primary store but should still be able to transfer afterward.
         let user_2_primary_store = primary_store(user_2_address, metadata);
-        object::burn(user_2, user_2_primary_store);
+        object::burn_object(user_2, user_2_primary_store);
         assert!(object::is_burnt(user_2_primary_store), 0);
         // Balance still works
         assert!(balance(user_2_address, metadata) == 80, 0);
@@ -54,7 +54,7 @@ module aptos_framework::simple_token_pfs_tests {
 
         // User 2 burns their primary store but should still be able to withdraw afterward.
         let user_2_primary_store = primary_store(user_2_address, metadata);
-        object::burn(user_2, user_2_primary_store);
+        object::burn_object(user_2, user_2_primary_store);
         assert!(object::is_burnt(user_2_primary_store), 0);
         let coins = withdraw(user_2, metadata, 70);
         assert!(balance(user_2_address, metadata) == 10, 0);

From 20335bba8659f1df6b708115aca45db6a40ea32f Mon Sep 17 00:00:00 2001
From: Victor Gao <10379359+vgao1996@users.noreply.github.com>
Date: Wed, 18 Sep 2024 11:33:35 -0700
Subject: [PATCH 28/36] [gas] bump gas feature version to 1.20 (#14668)

---
 aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs | 8 ++++----
 aptos-move/aptos-gas-schedule/src/ver.rs                | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs
index 26557162fb157..1e7f1a4860103 100644
--- a/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs
+++ b/aptos-move/aptos-gas-schedule/src/gas_schedule/instr.rs
@@ -50,15 +50,15 @@ crate::gas_schedule::macros::define_gas_parameters!(
         [mut_borrow_variant_field: InternalGas,
             { RELEASE_V1_18.. => "mut_borrow_variant_field" }, 835],
         [imm_borrow_variant_field_generic: InternalGas,
-            { RELEASE_V1_18 => "imm_borrow_variant_field_generic" }, 835],
+            { RELEASE_V1_18.. => "imm_borrow_variant_field_generic" }, 835],
         [mut_borrow_variant_field_generic: InternalGas,
-            { RELEASE_V1_18 => "mut_borrow_variant_field_generic" }, 835],
+            { RELEASE_V1_18.. => "mut_borrow_variant_field_generic" }, 835],
 
         // variant testing
         [test_variant: InternalGas,
-            { RELEASE_V1_18 => "test_variant" }, 535],
+            { RELEASE_V1_18.. => "test_variant" }, 535],
         [test_variant_generic: InternalGas,
-            { RELEASE_V1_18 => "test_variant_generic" }, 535],
+            { RELEASE_V1_18.. => "test_variant_generic" }, 535],
 
         // locals
         [copy_loc_base: InternalGas, "copy_loc.base", 294],
diff --git a/aptos-move/aptos-gas-schedule/src/ver.rs b/aptos-move/aptos-gas-schedule/src/ver.rs
index f8b5d7617151c..2df67131a21f7 100644
--- a/aptos-move/aptos-gas-schedule/src/ver.rs
+++ b/aptos-move/aptos-gas-schedule/src/ver.rs
@@ -69,7 +69,7 @@
 ///       global operations.
 /// - V1
 ///   - TBA
-pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_18;
+pub const LATEST_GAS_FEATURE_VERSION: u64 = gas_feature_versions::RELEASE_V1_20;
 
 pub mod gas_feature_versions {
     pub const RELEASE_V1_8: u64 = 11;

From f5fa2f8d5044c5811734c68fe25d37f79a46c105 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Tue, 17 Sep 2024 18:00:03 -0400
Subject: [PATCH 29/36] [Consensus Observer] Enable CO for VFNs.

---
 config/src/config/consensus_observer_config.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/src/config/consensus_observer_config.rs b/config/src/config/consensus_observer_config.rs
index 0ca55c31d50e9..02d8572134950 100644
--- a/config/src/config/consensus_observer_config.rs
+++ b/config/src/config/consensus_observer_config.rs
@@ -9,8 +9,8 @@ use serde::{Deserialize, Serialize};
 use serde_yaml::Value;
 
 // Useful constants for enabling consensus observer on different node types
-const ENABLE_ON_VALIDATORS: bool = false;
-const ENABLE_ON_VALIDATOR_FULLNODES: bool = false;
+const ENABLE_ON_VALIDATORS: bool = true;
+const ENABLE_ON_VALIDATOR_FULLNODES: bool = true;
 const ENABLE_ON_PUBLIC_FULLNODES: bool = false;
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]

From 25a081116546670e62ca927ba90478de78557056 Mon Sep 17 00:00:00 2001
From: Josh Lind <josh.lind@hotmail.com>
Date: Wed, 18 Sep 2024 11:00:23 -0400
Subject: [PATCH 30/36] [Consensus Observer] Improve error messages for payload
 verification.

---
 .../network/network_handler.rs                |  2 +-
 .../network/observer_message.rs               | 45 +++++++++++++++----
 .../observer/subscription_manager.rs          |  6 +--
 .../observer/subscription_utils.rs            |  2 +-
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/consensus/src/consensus_observer/network/network_handler.rs b/consensus/src/consensus_observer/network/network_handler.rs
index d8aa1447312f7..bbaeca0dc4843 100644
--- a/consensus/src/consensus_observer/network/network_handler.rs
+++ b/consensus/src/consensus_observer/network/network_handler.rs
@@ -208,7 +208,7 @@ impl ConsensusObserverNetworkHandler {
             None => {
                 error!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
-                        "Missing response sender for RCP request: {:?}",
+                        "Missing response sender for the RPC request: {:?}",
                         request
                     ))
                 );
diff --git a/consensus/src/consensus_observer/network/observer_message.rs b/consensus/src/consensus_observer/network/observer_message.rs
index 6ecb14d7995de..8b673f6335f56 100644
--- a/consensus/src/consensus_observer/network/observer_message.rs
+++ b/consensus/src/consensus_observer/network/observer_message.rs
@@ -663,23 +663,50 @@ impl BlockPayload {
 
     /// Verifies the block payload digests and returns an error if the data is invalid
     pub fn verify_payload_digests(&self) -> Result<(), Error> {
-        // Verify the proof of store digests against the transaction
+        // Get the transactions, payload proofs and inline batches
         let transactions = self.transaction_payload.transactions();
+        let payload_proofs = self.transaction_payload.payload_proofs();
+        let inline_batches = self.transaction_payload.inline_batches();
+
+        // Get the number of transactions, payload proofs and inline batches
+        let num_transactions = transactions.len();
+        let num_payload_proofs = payload_proofs.len();
+        let num_inline_batches = inline_batches.len();
+
+        // Verify the payload proof digests using the transactions
         let mut transactions_iter = transactions.iter();
-        for proof_of_store in &self.transaction_payload.payload_proofs() {
-            reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info())?;
+        for proof_of_store in &payload_proofs {
+            reconstruct_and_verify_batch(&mut transactions_iter, proof_of_store.info()).map_err(
+                |error| {
+                    Error::InvalidMessageError(format!(
+                        "Failed to verify payload proof digests! Num transactions: {:?}, \
+                        num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}",
+                        num_transactions, num_payload_proofs, num_inline_batches, proof_of_store.info(), error
+                    ))
+                },
+            )?;
         }
 
-        // Verify the inline batch digests against the inline batches
-        for batch_info in self.transaction_payload.inline_batches() {
-            reconstruct_and_verify_batch(&mut transactions_iter, batch_info)?;
+        // Verify the inline batch digests using the transactions
+        for batch_info in inline_batches.into_iter() {
+            reconstruct_and_verify_batch(&mut transactions_iter, batch_info).map_err(
+                |error| {
+                    Error::InvalidMessageError(format!(
+                        "Failed to verify inline batch digests! Num transactions: {:?}, \
+                        num batches: {:?}, num inline batches: {:?}, failed batch: {:?}, Error: {:?}",
+                        num_transactions, num_payload_proofs, num_inline_batches, batch_info, error
+                    ))
+                },
+            )?;
         }
 
-        // Verify that there are no transactions remaining
+        // Verify that there are no transactions remaining (all transactions should be consumed)
         let remaining_transactions = transactions_iter.as_slice();
         if !remaining_transactions.is_empty() {
             return Err(Error::InvalidMessageError(format!(
-                "Failed to verify payload transactions! Transactions remaining: {:?}. Expected: 0",
+                "Failed to verify payload transactions! Num transactions: {:?}, \
+                transactions remaining: {:?}. Expected: 0",
+                num_transactions,
                 remaining_transactions.len()
             )));
         }
@@ -740,7 +767,7 @@ fn reconstruct_and_verify_batch(
     let expected_digest = expected_batch_info.digest();
     if batch_digest != *expected_digest {
         return Err(Error::InvalidMessageError(format!(
-            "The reconstructed batch digest does not match the expected digest!\
+            "The reconstructed batch digest does not match the expected digest! \
              Batch: {:?}, Expected digest: {:?}, Reconstructed digest: {:?}",
             expected_batch_info, expected_digest, batch_digest
         )));
diff --git a/consensus/src/consensus_observer/observer/subscription_manager.rs b/consensus/src/consensus_observer/observer/subscription_manager.rs
index 2d89163e1ae86..24ae1f7d321b4 100644
--- a/consensus/src/consensus_observer/observer/subscription_manager.rs
+++ b/consensus/src/consensus_observer/observer/subscription_manager.rs
@@ -18,7 +18,7 @@ use crate::consensus_observer::{
 };
 use aptos_config::{config::ConsensusObserverConfig, network_id::PeerNetworkId};
 use aptos_infallible::Mutex;
-use aptos_logger::{error, info, warn};
+use aptos_logger::{info, warn};
 use aptos_network::application::{interface::NetworkClient, metadata::PeerMetadata};
 use aptos_storage_interface::DbReader;
 use aptos_time_service::TimeService;
@@ -157,7 +157,7 @@ impl SubscriptionManager {
             .get_connected_peers_and_metadata()
             .unwrap_or_else(|error| {
                 // Log the error
-                error!(
+                warn!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                         "Failed to get connected peers and metadata! Error: {:?}",
                         error
@@ -327,7 +327,7 @@ impl SubscriptionManager {
                 },
                 Err(error) => {
                     // We encountered an error while sending the request
-                    error!(
+                    warn!(
                         LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                             "Failed to send unsubscribe request to peer: {}! Error: {:?}",
                             peer_network_id, error
diff --git a/consensus/src/consensus_observer/observer/subscription_utils.rs b/consensus/src/consensus_observer/observer/subscription_utils.rs
index d654af8aaf0d5..0bca7c61b007d 100644
--- a/consensus/src/consensus_observer/observer/subscription_utils.rs
+++ b/consensus/src/consensus_observer/observer/subscription_utils.rs
@@ -175,7 +175,7 @@ async fn create_single_subscription(
             },
             Err(error) => {
                 // We encountered an error while sending the request
-                error!(
+                warn!(
                     LogSchema::new(LogEntry::ConsensusObserver).message(&format!(
                         "Failed to send subscription request to peer: {}! Error: {:?}",
                         potential_peer, error

From 16370eb0265228e801e28492bb16786d54bb3b43 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 12:29:53 -0700
Subject: [PATCH 31/36] Addressing PR comments

---
 types/src/ledger_info.rs        | 35 ++++++++++++++++-----------------
 types/src/validator_verifier.rs |  9 ++-------
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 0a8fc80e35962..1e2d475833fdd 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize};
 use std::{
     collections::BTreeMap,
     fmt::{Display, Formatter},
+    mem,
     ops::{Deref, DerefMut},
     sync::Arc,
 };
@@ -380,8 +381,12 @@ impl LedgerInfoWithPartialSignatures {
     }
 }
 
-/// Contains the ledger info and partially aggregated signature from a set of validators, this data
-/// is only used during the aggregating the votes from different validators and is not persisted in DB.
+/// This data structure is used to support the optimistic signature verification feature.
+/// Contains the ledger info and the signatures received on the ledger info from different validators.
+/// Some of the signatures could be verified before inserting into this data structure. Some of the signatures
+/// are not verified. Rather than verifying the signatures immediately, we aggregate all the signatures and
+/// verify the aggregated signature at once. If the aggregated signature is invalid, then we verify each individual
+/// unverified signature and remove the invalid signatures.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct LedgerInfoWithMixedSignatures {
     ledger_info: LedgerInfo,
@@ -447,19 +452,11 @@ impl LedgerInfoWithMixedSignatures {
     }
 
     pub fn verified_voters(&self) -> Vec<&AccountAddress> {
-        self.verified_signatures
-            .signatures()
-            .keys()
-            .collect_vec()
-            .clone()
+        self.verified_signatures.signatures().keys().collect_vec()
     }
 
     pub fn unverified_voters(&self) -> Vec<&AccountAddress> {
-        self.unverified_signatures
-            .signatures()
-            .keys()
-            .collect_vec()
-            .clone()
+        self.unverified_signatures.signatures().keys().collect_vec()
     }
 
     // Collecting all the authors from verified signatures, unverified signatures and the aggregated signature.
@@ -532,12 +529,14 @@ impl LedgerInfoWithMixedSignatures {
                 }
 
                 // For these authors, we will not use optimistic signature verification in the future.
-                let pessimistic_authors = self.unverified_signatures.signatures().keys().cloned();
-                epoch_state
-                    .verifier
-                    .add_pessimistic_verify_set(pessimistic_authors);
-
-                self.unverified_signatures = PartialSignatures::empty();
+                for author in mem::replace(
+                    &mut self.unverified_signatures.signatures(),
+                    &BTreeMap::new(),
+                )
+                .keys()
+                {
+                    epoch_state.verifier.add_pessimistic_verify_set(*author);
+                }
 
                 match self.check_voting_power(&epoch_state.verifier, true) {
                     Ok(_) => Ok(LedgerInfoWithSignatures::new(
diff --git a/types/src/validator_verifier.rs b/types/src/validator_verifier.rs
index 763db6c23c30b..45470fc658352 100644
--- a/types/src/validator_verifier.rs
+++ b/types/src/validator_verifier.rs
@@ -227,13 +227,8 @@ impl ValidatorVerifier {
         ))
     }
 
-    pub fn add_pessimistic_verify_set(
-        &self,
-        pessimistic_authors: impl Iterator<Item = AccountAddress>,
-    ) {
-        for author in pessimistic_authors {
-            self.pessimistic_verify_set.insert(author);
-        }
+    pub fn add_pessimistic_verify_set(&self, author: AccountAddress) {
+        self.pessimistic_verify_set.insert(author);
     }
 
     pub fn pessimistic_verify_set(&self) -> Arc<DashSet<AccountAddress>> {

From a7b5e82fa229fc8c1ebb536d903a07b4a22d5381 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 12:59:49 -0700
Subject: [PATCH 32/36] Minor changes

---
 types/src/ledger_info.rs | 54 +++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 1e2d475833fdd..3e9557e62f0f7 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -381,6 +381,11 @@ impl LedgerInfoWithPartialSignatures {
     }
 }
 
+pub enum SignatureWithStatus {
+    Verified(bls12381::Signature),
+    Unverified(bls12381::Signature),
+}
+
 /// This data structure is used to support the optimistic signature verification feature.
 /// Contains the ledger info and the signatures received on the ledger info from different validators.
 /// Some of the signatures could be verified before inserting into this data structure. Some of the signatures
@@ -442,12 +447,15 @@ impl LedgerInfoWithMixedSignatures {
     pub fn add_signature(
         &mut self,
         validator: AccountAddress,
-        signature: bls12381::Signature,
-        verification_status: VerificationStatus,
+        signature_with_status: SignatureWithStatus,
     ) {
-        match verification_status {
-            VerificationStatus::Verified => self.add_verified_signature(validator, signature),
-            VerificationStatus::Unverified => self.add_unverified_signature(validator, signature),
+        match signature_with_status {
+            SignatureWithStatus::Verified(signature) => {
+                self.add_verified_signature(validator, signature)
+            },
+            SignatureWithStatus::Unverified(signature) => {
+                self.add_unverified_signature(validator, signature)
+            },
         };
     }
 
@@ -495,11 +503,13 @@ impl LedgerInfoWithMixedSignatures {
             .verify_multi_signatures(self.ledger_info(), &aggregated_sig)
         {
             Ok(_) => {
-                for (account_address, signature) in self.unverified_signatures.signatures() {
+                for (account_address, signature) in
+                    mem::replace(&mut self.unverified_signatures, PartialSignatures::empty())
+                        .signatures()
+                {
                     self.verified_signatures
                         .add_signature(*account_address, signature.clone());
                 }
-                self.unverified_signatures = PartialSignatures::empty();
                 Ok(LedgerInfoWithSignatures::new(
                     self.ledger_info.clone(),
                     aggregated_sig,
@@ -529,11 +539,10 @@ impl LedgerInfoWithMixedSignatures {
                 }
 
                 // For these authors, we will not use optimistic signature verification in the future.
-                for author in mem::replace(
-                    &mut self.unverified_signatures.signatures(),
-                    &BTreeMap::new(),
-                )
-                .keys()
+                for author in
+                    mem::replace(&mut self.unverified_signatures, PartialSignatures::empty())
+                        .signatures()
+                        .keys()
                 {
                     epoch_state.verifier.add_pessimistic_verify_set(*author);
                 }
@@ -688,8 +697,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[0].author(),
-            validator_signers[0].sign(&ledger_info).unwrap(),
-            VerificationStatus::Verified,
+            SignatureWithStatus::Verified(validator_signers[0].sign(&ledger_info).unwrap()),
         );
         partial_sig.add_signature(
             validator_signers[0].author(),
@@ -698,8 +706,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[1].author(),
-            validator_signers[1].sign(&ledger_info).unwrap(),
-            VerificationStatus::Unverified,
+            SignatureWithStatus::Unverified(validator_signers[1].sign(&ledger_info).unwrap()),
         );
         partial_sig.add_signature(
             validator_signers[1].author(),
@@ -708,8 +715,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[2].author(),
-            validator_signers[2].sign(&ledger_info).unwrap(),
-            VerificationStatus::Verified,
+            SignatureWithStatus::Verified(validator_signers[2].sign(&ledger_info).unwrap()),
         );
         partial_sig.add_signature(
             validator_signers[2].author(),
@@ -718,8 +724,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[3].author(),
-            validator_signers[3].sign(&ledger_info).unwrap(),
-            VerificationStatus::Unverified,
+            SignatureWithStatus::Unverified(validator_signers[3].sign(&ledger_info).unwrap()),
         );
         partial_sig.add_signature(
             validator_signers[3].author(),
@@ -751,8 +756,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[4].author(),
-            bls12381::Signature::dummy_signature(),
-            VerificationStatus::Unverified,
+            SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()),
         );
 
         assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 5);
@@ -802,8 +806,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[5].author(),
-            validator_signers[5].sign(&ledger_info).unwrap(),
-            VerificationStatus::Unverified,
+            SignatureWithStatus::Unverified(validator_signers[5].sign(&ledger_info).unwrap()),
         );
         partial_sig.add_signature(
             validator_signers[5].author(),
@@ -861,8 +864,7 @@ mod tests {
 
         ledger_info_with_mixed_signatures.add_signature(
             validator_signers[6].author(),
-            bls12381::Signature::dummy_signature(),
-            VerificationStatus::Unverified,
+            SignatureWithStatus::Unverified(bls12381::Signature::dummy_signature()),
         );
 
         assert_eq!(ledger_info_with_mixed_signatures.all_voters().count(), 6);

From 2e46bb343ae6250026734a10d5c44099b4d8e752 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 13:06:51 -0700
Subject: [PATCH 33/36] Minor change

---
 types/src/ledger_info.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 3e9557e62f0f7..18aff380a54f9 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -534,7 +534,7 @@ impl LedgerInfoWithMixedSignatures {
                     .collect::<Vec<_>>();
                 for (account_address, signature) in verified {
                     self.verified_signatures
-                        .add_signature(account_address, signature.clone());
+                        .add_signature(account_address, signature);
                     self.unverified_signatures.remove_signature(account_address);
                 }
 

From 30f76bfee45d1c419f7ee3cd463ee8d904a97aab Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 13:19:33 -0700
Subject: [PATCH 34/36] Deprecate delayed QC aggregate msg (#14640)

---
 config/src/config/consensus_config.rs         |  39 +---
 .../consensus-types/src/delayed_qc_msg.rs     |  32 ----
 consensus/consensus-types/src/lib.rs          |   1 -
 .../src/block_storage/block_store_test.rs     |  10 +-
 consensus/src/epoch_manager.rs                |  37 +---
 consensus/src/lib.rs                          |   1 -
 consensus/src/liveness/round_state.rs         |  35 +---
 consensus/src/liveness/round_state_test.rs    |  11 +-
 consensus/src/pending_votes.rs                |  88 ++++-----
 consensus/src/qc_aggregator.rs                | 181 ------------------
 consensus/src/round_manager.rs                |  39 +---
 consensus/src/round_manager_fuzzing.rs        |  15 +-
 consensus/src/round_manager_test.rs           |  12 +-
 13 files changed, 51 insertions(+), 450 deletions(-)
 delete mode 100644 consensus/consensus-types/src/delayed_qc_msg.rs
 delete mode 100644 consensus/src/qc_aggregator.rs

diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs
index 90526afc77510..021edf0b365b4 100644
--- a/config/src/config/consensus_config.rs
+++ b/config/src/config/consensus_config.rs
@@ -93,48 +93,11 @@ pub struct ConsensusConfig {
     pub max_pending_rounds_in_commit_vote_cache: u64,
 }
 
+/// Deprecated
 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
 pub enum QcAggregatorType {
     #[default]
     NoDelay,
-    Delayed(DelayedQcAggregatorConfig),
-}
-
-impl QcAggregatorType {
-    pub fn default_delayed() -> Self {
-        // TODO: Enable the delayed aggregation by default once we have tested it more.
-        Self::Delayed(DelayedQcAggregatorConfig::default())
-    }
-}
-
-#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
-pub struct DelayedQcAggregatorConfig {
-    // Maximum Delay for a QC to be aggregated after round start (in milliseconds). This assumes that
-    // we have enough voting power to form a QC. If we don't have enough voting power, we will wait
-    // until we have enough voting power to form a QC.
-    pub max_delay_after_round_start_ms: u64,
-    // Percentage of aggregated voting power to wait for before aggregating a QC. For example, if this
-    // is set to 95% then, a QC is formed as soon as we have 95% of the voting power aggregated without
-    // any additional waiting.
-    pub aggregated_voting_power_pct_to_wait: usize,
-    // This knob control what is the % of the time (as compared to time between round start and time when we
-    // have enough voting power to form a QC) we wait after we have enough voting power to form a QC. In a sense,
-    // this knobs controls how much slower we are willing to make consensus to wait for more votes.
-    pub pct_delay_after_qc_aggregated: usize,
-    // In summary, let's denote the time we have enough voting power (2f + 1) to form a QC as T1 and
-    // the time we have aggregated `aggregated_voting_power_pct_to_wait` as T2. Then, we wait for
-    // min((T1 + `pct_delay_after_qc_aggregated` * T1 / 100), `max_delay_after_round_start_ms`, T2)
-    // before forming a QC.
-}
-
-impl Default for DelayedQcAggregatorConfig {
-    fn default() -> Self {
-        Self {
-            max_delay_after_round_start_ms: 700,
-            aggregated_voting_power_pct_to_wait: 90,
-            pct_delay_after_qc_aggregated: 30,
-        }
-    }
 }
 
 /// Execution backpressure which handles gas/s variance,
diff --git a/consensus/consensus-types/src/delayed_qc_msg.rs b/consensus/consensus-types/src/delayed_qc_msg.rs
deleted file mode 100644
index 75d9752c2ea5b..0000000000000
--- a/consensus/consensus-types/src/delayed_qc_msg.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright © Aptos Foundation
-// Parts of the project are originally copyright © Meta Platforms, Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-use crate::vote::Vote;
-use serde::{Deserialize, Serialize};
-use std::fmt::{Display, Formatter};
-
-/// DelayedQCMsg is the struct that is sent by the proposer to self when it receives enough votes
-/// for a QC but it still delays the creation of the QC to ensure that slow nodes are given enough
-/// time to catch up to the chain and cast their votes.
-#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)]
-pub struct DelayedQcMsg {
-    /// Vote data for the QC that is being delayed.
-    pub vote: Vote,
-}
-
-impl Display for DelayedQcMsg {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "DelayedQcMsg: vote [{}]", self.vote,)
-    }
-}
-
-impl DelayedQcMsg {
-    pub fn new(vote: Vote) -> Self {
-        Self { vote }
-    }
-
-    pub fn vote(&self) -> &Vote {
-        &self.vote
-    }
-}
diff --git a/consensus/consensus-types/src/lib.rs b/consensus/consensus-types/src/lib.rs
index c9e555da27a7d..bc70a1ad942f2 100644
--- a/consensus/consensus-types/src/lib.rs
+++ b/consensus/consensus-types/src/lib.rs
@@ -8,7 +8,6 @@ pub mod block;
 pub mod block_data;
 pub mod block_retrieval;
 pub mod common;
-pub mod delayed_qc_msg;
 pub mod epoch_retrieval;
 pub mod order_vote;
 pub mod order_vote_msg;
diff --git a/consensus/src/block_storage/block_store_test.rs b/consensus/src/block_storage/block_store_test.rs
index 7328688f2f48e..41def8f1c322d 100644
--- a/consensus/src/block_storage/block_store_test.rs
+++ b/consensus/src/block_storage/block_store_test.rs
@@ -8,9 +8,7 @@ use crate::{
     test_utils::{
         build_empty_tree, build_simple_tree, consensus_runtime, timed_block_on, TreeInserter,
     },
-    util::mock_time_service::SimulatedTimeService,
 };
-use aptos_config::config::QcAggregatorType;
 use aptos_consensus_types::{
     block::{
         block_test_utils::{
@@ -27,9 +25,8 @@ use aptos_crypto::{HashValue, PrivateKey};
 use aptos_types::{
     validator_signer::ValidatorSigner, validator_verifier::random_validator_verifier,
 };
-use futures_channel::mpsc::unbounded;
 use proptest::prelude::*;
-use std::{cmp::min, collections::HashSet, sync::Arc};
+use std::{cmp::min, collections::HashSet};
 
 #[tokio::test]
 async fn test_highest_block_and_quorum_cert() {
@@ -284,11 +281,8 @@ async fn test_insert_vote() {
     let block = inserter
         .insert_block_with_qc(certificate_for_genesis(), &genesis, 1)
         .await;
-    let time_service = Arc::new(SimulatedTimeService::new());
-    let (delayed_qc_tx, _) = unbounded();
 
-    let mut pending_votes =
-        PendingVotes::new(time_service, delayed_qc_tx, QcAggregatorType::NoDelay);
+    let mut pending_votes = PendingVotes::new();
 
     assert!(block_store.get_quorum_cert_for_block(block.id()).is_none());
     for (i, voter) in signers.iter().enumerate().take(10).skip(1) {
diff --git a/consensus/src/epoch_manager.rs b/consensus/src/epoch_manager.rs
index da8c23ea509d8..a6c43221f9a0d 100644
--- a/consensus/src/epoch_manager.rs
+++ b/consensus/src/epoch_manager.rs
@@ -56,12 +56,9 @@ use crate::{
 use anyhow::{anyhow, bail, ensure, Context};
 use aptos_bounded_executor::BoundedExecutor;
 use aptos_channels::{aptos_channel, message_queues::QueueStyle};
-use aptos_config::config::{
-    ConsensusConfig, DagConsensusConfig, ExecutionConfig, NodeConfig, QcAggregatorType,
-};
+use aptos_config::config::{ConsensusConfig, DagConsensusConfig, ExecutionConfig, NodeConfig};
 use aptos_consensus_types::{
     common::{Author, Round},
-    delayed_qc_msg::DelayedQcMsg,
     epoch_retrieval::EpochRetrievalRequest,
     proof_of_store::ProofCache,
     utils::PayloadTxnsSize,
@@ -96,11 +93,7 @@ use aptos_types::{
 use aptos_validator_transaction_pool::VTxnPoolState;
 use fail::fail_point;
 use futures::{
-    channel::{
-        mpsc,
-        mpsc::{unbounded, Sender, UnboundedSender},
-        oneshot,
-    },
+    channel::{mpsc, mpsc::Sender, oneshot},
     SinkExt, StreamExt,
 };
 use itertools::Itertools;
@@ -265,21 +258,13 @@ impl<P: OnChainConfigProvider> EpochManager<P> {
         &self,
         time_service: Arc<dyn TimeService>,
         timeout_sender: aptos_channels::Sender<Round>,
-        delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-        qc_aggregator_type: QcAggregatorType,
     ) -> RoundState {
         let time_interval = Box::new(ExponentialTimeInterval::new(
             Duration::from_millis(self.config.round_initial_timeout_ms),
             self.config.round_timeout_backoff_exponent_base,
             self.config.round_timeout_backoff_max_exponent,
         ));
-        RoundState::new(
-            time_interval,
-            time_service,
-            timeout_sender,
-            delayed_qc_tx,
-            qc_aggregator_type,
-        )
+        RoundState::new(time_interval, time_service, timeout_sender)
     }
 
     /// Create a proposer election handler based on proposers
@@ -793,15 +778,10 @@ impl<P: OnChainConfigProvider> EpochManager<P> {
                 "Unable to initialize safety rules.",
             );
         }
-        let (delayed_qc_tx, delayed_qc_rx) = unbounded();
 
         info!(epoch = epoch, "Create RoundState");
-        let round_state = self.create_round_state(
-            self.time_service.clone(),
-            self.timeout_sender.clone(),
-            delayed_qc_tx,
-            self.config.qc_aggregator_type.clone(),
-        );
+        let round_state =
+            self.create_round_state(self.time_service.clone(), self.timeout_sender.clone());
 
         info!(epoch = epoch, "Create ProposerElection");
         let proposer_election =
@@ -913,12 +893,7 @@ impl<P: OnChainConfigProvider> EpochManager<P> {
 
         let (close_tx, close_rx) = oneshot::channel();
         self.round_manager_close_tx = Some(close_tx);
-        tokio::spawn(round_manager.start(
-            round_manager_rx,
-            buffered_proposal_rx,
-            delayed_qc_rx,
-            close_rx,
-        ));
+        tokio::spawn(round_manager.start(round_manager_rx, buffered_proposal_rx, close_rx));
 
         self.spawn_block_retrieval_task(epoch, block_store, max_blocks_allowed);
     }
diff --git a/consensus/src/lib.rs b/consensus/src/lib.rs
index 87eb81e0f40cc..f8545073966bd 100644
--- a/consensus/src/lib.rs
+++ b/consensus/src/lib.rs
@@ -58,7 +58,6 @@ mod execution_pipeline;
 /// AptosNet interface.
 pub mod network_interface;
 mod payload_manager;
-mod qc_aggregator;
 mod transaction_deduper;
 mod transaction_filter;
 mod transaction_shuffler;
diff --git a/consensus/src/liveness/round_state.rs b/consensus/src/liveness/round_state.rs
index ea7e6e7f5b362..0dc03ab88d105 100644
--- a/consensus/src/liveness/round_state.rs
+++ b/consensus/src/liveness/round_state.rs
@@ -7,10 +7,9 @@ use crate::{
     pending_votes::{PendingVotes, VoteReceptionResult},
     util::time_service::{SendTask, TimeService},
 };
-use aptos_config::config::QcAggregatorType;
 use aptos_consensus_types::{
-    common::Round, delayed_qc_msg::DelayedQcMsg, sync_info::SyncInfo,
-    timeout_2chain::TwoChainTimeoutWithPartialSignatures, vote::Vote,
+    common::Round, sync_info::SyncInfo, timeout_2chain::TwoChainTimeoutWithPartialSignatures,
+    vote::Vote,
 };
 use aptos_crypto::HashValue;
 use aptos_logger::{prelude::*, Schema};
@@ -18,7 +17,6 @@ use aptos_types::{
     ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier,
 };
 use futures::future::AbortHandle;
-use futures_channel::mpsc::UnboundedSender;
 use serde::Serialize;
 use std::{fmt, sync::Arc, time::Duration};
 
@@ -163,9 +161,6 @@ pub struct RoundState {
     vote_sent: Option<Vote>,
     // The handle to cancel previous timeout task when moving to next round.
     abort_handle: Option<AbortHandle>,
-    // Self sender to send delayed QC aggregation events to the round manager.
-    delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-    qc_aggregator_type: QcAggregatorType,
 }
 
 #[derive(Default, Schema)]
@@ -194,8 +189,6 @@ impl RoundState {
         time_interval: Box<dyn RoundTimeInterval>,
         time_service: Arc<dyn TimeService>,
         timeout_sender: aptos_channels::Sender<Round>,
-        delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-        qc_aggregator_type: QcAggregatorType,
     ) -> Self {
         // Our counters are initialized lazily, so they're not going to appear in
         // Prometheus if some conditions never happen. Invoking get() function enforces creation.
@@ -203,11 +196,7 @@ impl RoundState {
         counters::TIMEOUT_ROUNDS_COUNT.get();
         counters::TIMEOUT_COUNT.get();
 
-        let pending_votes = PendingVotes::new(
-            time_service.clone(),
-            delayed_qc_tx.clone(),
-            qc_aggregator_type.clone(),
-        );
+        let pending_votes = PendingVotes::new();
         Self {
             time_interval,
             highest_ordered_round: 0,
@@ -218,8 +207,6 @@ impl RoundState {
             pending_votes,
             vote_sent: None,
             abort_handle: None,
-            delayed_qc_tx,
-            qc_aggregator_type,
         }
     }
 
@@ -262,11 +249,7 @@ impl RoundState {
 
             // Start a new round.
             self.current_round = new_round;
-            self.pending_votes = PendingVotes::new(
-                self.time_service.clone(),
-                self.delayed_qc_tx.clone(),
-                self.qc_aggregator_type.clone(),
-            );
+            self.pending_votes = PendingVotes::new();
             self.vote_sent = None;
             let timeout = self.setup_timeout(1);
             // The new round reason is QCReady in case both QC.round + 1 == new_round, otherwise
@@ -310,16 +293,6 @@ impl RoundState {
         }
     }
 
-    pub fn process_delayed_qc_msg(
-        &mut self,
-        validator_verifier: &ValidatorVerifier,
-        msg: DelayedQcMsg,
-    ) -> VoteReceptionResult {
-        let DelayedQcMsg { vote } = msg;
-        self.pending_votes
-            .process_delayed_qc(validator_verifier, vote)
-    }
-
     pub fn vote_sent(&self) -> Option<Vote> {
         self.vote_sent.clone()
     }
diff --git a/consensus/src/liveness/round_state_test.rs b/consensus/src/liveness/round_state_test.rs
index 03f1d245359d1..ad2eec8809e53 100644
--- a/consensus/src/liveness/round_state_test.rs
+++ b/consensus/src/liveness/round_state_test.rs
@@ -8,7 +8,6 @@ use crate::{
     },
     util::mock_time_service::SimulatedTimeService,
 };
-use aptos_config::config::QcAggregatorType;
 use aptos_consensus_types::{
     common::Round,
     quorum_cert::QuorumCert,
@@ -23,7 +22,6 @@ use aptos_types::{
     ledger_info::{LedgerInfo, LedgerInfoWithSignatures},
 };
 use futures::StreamExt;
-use futures_channel::mpsc::unbounded;
 use std::{sync::Arc, time::Duration};
 
 #[test]
@@ -88,15 +86,8 @@ fn make_round_state() -> (RoundState, aptos_channels::Receiver<Round>) {
     let time_interval = Box::new(ExponentialTimeInterval::fixed(Duration::from_millis(2)));
     let simulated_time = SimulatedTimeService::auto_advance_until(Duration::from_millis(4));
     let (timeout_tx, timeout_rx) = aptos_channels::new_test(1_024);
-    let (delayed_qc_tx, _) = unbounded();
     (
-        RoundState::new(
-            time_interval,
-            Arc::new(simulated_time),
-            timeout_tx,
-            delayed_qc_tx,
-            QcAggregatorType::NoDelay,
-        ),
+        RoundState::new(time_interval, Arc::new(simulated_time), timeout_tx),
         timeout_rx,
     )
 }
diff --git a/consensus/src/pending_votes.rs b/consensus/src/pending_votes.rs
index ff8bc37a1ae70..05abc30dc63a2 100644
--- a/consensus/src/pending_votes.rs
+++ b/consensus/src/pending_votes.rs
@@ -8,15 +8,9 @@
 //! when enough votes (or timeout votes) have been observed.
 //! Votes are automatically dropped when the structure goes out of scope.
 
-use crate::{
-    counters,
-    qc_aggregator::{create_qc_aggregator, QcAggregator},
-    util::time_service::TimeService,
-};
-use aptos_config::config::QcAggregatorType;
+use crate::counters;
 use aptos_consensus_types::{
     common::Author,
-    delayed_qc_msg::DelayedQcMsg,
     quorum_cert::QuorumCert,
     timeout_2chain::{TwoChainTimeoutCertificate, TwoChainTimeoutWithPartialSignatures},
     vote::Vote,
@@ -29,7 +23,6 @@ use aptos_types::{
     ledger_info::LedgerInfoWithPartialSignatures,
     validator_verifier::{ValidatorVerifier, VerifyError},
 };
-use futures_channel::mpsc::UnboundedSender;
 use std::{
     collections::{BTreeMap, HashMap},
     fmt,
@@ -43,9 +36,6 @@ pub enum VoteReceptionResult {
     /// The vote has been added but QC has not been formed yet. Return the amount of voting power
     /// QC currently has.
     VoteAdded(u128),
-    /// The vote has been added and we have gather enough voting power to form the QC but we have
-    /// delayed the QC to aggregate as many signatures as possible.
-    VoteAddedQCDelayed(u128),
     /// The very same vote message has been processed in past.
     DuplicateVote,
     /// The very same author has already voted for another proposal in this round (equivocation).
@@ -79,23 +69,16 @@ pub struct PendingVotes {
     author_to_vote: HashMap<Author, (Vote, HashValue)>,
     /// Whether we have echoed timeout for this round.
     echo_timeout: bool,
-
-    qc_aggregator: Box<dyn QcAggregator>,
 }
 
 impl PendingVotes {
     /// Creates an empty PendingVotes structure for a specific epoch and round
-    pub fn new(
-        time_service: Arc<dyn TimeService>,
-        delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-        qc_aggregator_type: QcAggregatorType,
-    ) -> Self {
+    pub fn new() -> Self {
         PendingVotes {
             li_digest_to_votes: HashMap::new(),
             maybe_partial_2chain_tc: None,
             author_to_vote: HashMap::new(),
             echo_timeout: false,
-            qc_aggregator: create_qc_aggregator(qc_aggregator_type, time_service, delayed_qc_tx),
         }
     }
 
@@ -189,30 +172,37 @@ impl PendingVotes {
         li_with_sig.add_signature(vote.author(), vote.signature().clone());
 
         // check if we have enough signatures to create a QC
-        let voting_power =
-            match validator_verifier.check_voting_power(li_with_sig.signatures().keys(), true) {
-                // a quorum of signature was reached, a new QC is formed
-                Ok(aggregated_voting_power) => {
-                    return self.qc_aggregator.handle_aggregated_qc(
-                        validator_verifier,
-                        aggregated_voting_power,
-                        vote,
-                        li_with_sig,
+        let voting_power = match validator_verifier
+            .check_voting_power(li_with_sig.signatures().keys(), true)
+        {
+            // a quorum of signature was reached, a new QC is formed
+            Ok(aggregated_voting_power) => {
+                assert!(
+                        aggregated_voting_power >= validator_verifier.quorum_voting_power(),
+                        "QC aggregation should not be triggered if we don't have enough votes to form a QC"
                     );
-                },
+                match li_with_sig.aggregate_signatures(validator_verifier) {
+                    Ok(ledger_info_with_sig) => {
+                        return VoteReceptionResult::NewQuorumCertificate(Arc::new(
+                            QuorumCert::new(vote.vote_data().clone(), ledger_info_with_sig),
+                        ))
+                    },
+                    Err(e) => return VoteReceptionResult::ErrorAggregatingSignature(e),
+                }
+            },
 
-                // not enough votes
-                Err(VerifyError::TooLittleVotingPower { voting_power, .. }) => voting_power,
+            // not enough votes
+            Err(VerifyError::TooLittleVotingPower { voting_power, .. }) => voting_power,
 
-                // error
-                Err(error) => {
-                    error!(
-                        "MUST_FIX: vote received could not be added: {}, vote: {}",
-                        error, vote
-                    );
-                    return VoteReceptionResult::ErrorAddingVote(error);
-                },
-            };
+            // error
+            Err(error) => {
+                error!(
+                    "MUST_FIX: vote received could not be added: {}, vote: {}",
+                    error, vote
+                );
+                return VoteReceptionResult::ErrorAddingVote(error);
+            },
+        };
 
         //
         // 4. We couldn't form a QC, let's check if we can create a TC
@@ -405,8 +395,6 @@ impl fmt::Display for PendingVotes {
 #[cfg(test)]
 mod tests {
     use super::{PendingVotes, VoteReceptionResult};
-    use crate::util::mock_time_service::SimulatedTimeService;
-    use aptos_config::config::QcAggregatorType;
     use aptos_consensus_types::{
         block::block_test_utils::certificate_for_genesis, vote::Vote, vote_data::VoteData,
     };
@@ -415,9 +403,7 @@ mod tests {
         block_info::BlockInfo, ledger_info::LedgerInfo,
         validator_verifier::random_validator_verifier,
     };
-    use futures_channel::mpsc::unbounded;
     use itertools::Itertools;
-    use std::sync::Arc;
 
     /// Creates a random ledger info for epoch 1 and round 1.
     fn random_ledger_info() -> LedgerInfo {
@@ -440,12 +426,7 @@ mod tests {
 
         // set up 4 validators
         let (signers, validator) = random_validator_verifier(4, Some(2), false);
-        let (delayed_qc_tx, _) = unbounded();
-        let mut pending_votes = PendingVotes::new(
-            Arc::new(SimulatedTimeService::new()),
-            delayed_qc_tx,
-            QcAggregatorType::NoDelay,
-        );
+        let mut pending_votes = PendingVotes::new();
 
         // create random vote from validator[0]
         let li1 = random_ledger_info();
@@ -512,12 +493,7 @@ mod tests {
 
         // set up 4 validators
         let (signers, validator) = random_validator_verifier(4, None, false);
-        let (delayed_qc_tx, _) = unbounded();
-        let mut pending_votes = PendingVotes::new(
-            Arc::new(SimulatedTimeService::new()),
-            delayed_qc_tx,
-            QcAggregatorType::NoDelay,
-        );
+        let mut pending_votes = PendingVotes::new();
 
         // submit a new vote from validator[0] -> VoteAdded
         let li0 = random_ledger_info();
diff --git a/consensus/src/qc_aggregator.rs b/consensus/src/qc_aggregator.rs
deleted file mode 100644
index 2f695c651927b..0000000000000
--- a/consensus/src/qc_aggregator.rs
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright © Aptos Foundation
-// Parts of the project are originally copyright © Meta Platforms, Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-use crate::{
-    pending_votes::{PendingVotes, VoteReceptionResult},
-    util::time_service::TimeService,
-};
-use aptos_config::config::{DelayedQcAggregatorConfig, QcAggregatorType};
-use aptos_consensus_types::{delayed_qc_msg::DelayedQcMsg, vote::Vote};
-use aptos_logger::{error, info};
-use aptos_types::{
-    ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier,
-};
-use futures::SinkExt;
-use futures_channel::mpsc::UnboundedSender;
-use std::{sync::Arc, time::Duration};
-use tokio::time::sleep;
-
-pub trait QcAggregator: Send + Sync {
-    fn handle_aggregated_qc(
-        &mut self,
-        validator_verifier: &ValidatorVerifier,
-        aggregated_voting_power: u128,
-        vote: &Vote,
-        li_with_sig: &LedgerInfoWithPartialSignatures,
-    ) -> VoteReceptionResult;
-}
-
-struct NoDelayQcAggregator {}
-
-pub fn create_qc_aggregator(
-    qc_aggregator_type: QcAggregatorType,
-    time_service: Arc<dyn TimeService>,
-    delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-) -> Box<dyn QcAggregator> {
-    match qc_aggregator_type {
-        QcAggregatorType::NoDelay => Box::new(NoDelayQcAggregator {}),
-        QcAggregatorType::Delayed(delay_config) => {
-            let DelayedQcAggregatorConfig {
-                max_delay_after_round_start_ms,
-                aggregated_voting_power_pct_to_wait,
-                pct_delay_after_qc_aggregated,
-            } = delay_config;
-            Box::new(DelayedQcAggregator::new(
-                Duration::from_millis(max_delay_after_round_start_ms),
-                aggregated_voting_power_pct_to_wait,
-                pct_delay_after_qc_aggregated,
-                time_service,
-                delayed_qc_tx,
-            ))
-        },
-    }
-}
-
-impl QcAggregator for NoDelayQcAggregator {
-    fn handle_aggregated_qc(
-        &mut self,
-        validator_verifier: &ValidatorVerifier,
-        aggregated_voting_power: u128,
-        vote: &Vote,
-        li_with_sig: &LedgerInfoWithPartialSignatures,
-    ) -> VoteReceptionResult {
-        assert!(
-            aggregated_voting_power >= validator_verifier.quorum_voting_power(),
-            "QC aggregation should not be triggered if we don't have enough votes to form a QC"
-        );
-        PendingVotes::aggregate_qc_now(validator_verifier, li_with_sig, vote.vote_data())
-    }
-}
-
-struct DelayedQcAggregator {
-    round_start_time: Duration,
-    max_delay_after_round_start: Duration,
-    aggregated_voting_power_pct_to_wait: usize,
-    pct_delay_after_qc_aggregated: usize,
-    time_service: Arc<dyn TimeService>,
-    // True, if we already have enough vote to aggregate a QC, but we have trigged a delayed QC
-    // aggregation event to collect as many votes as possible.
-    qc_aggregation_delayed: bool,
-    // To send delayed QC aggregation events to the round manager.
-    delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-}
-
-impl DelayedQcAggregator {
-    pub fn new(
-        max_delay_after_round_start: Duration,
-        aggregated_voting_power_pct_to_wait: usize,
-        pct_delay_after_qc_aggregated: usize,
-        time_service: Arc<dyn TimeService>,
-        delayed_qc_tx: UnboundedSender<DelayedQcMsg>,
-    ) -> Self {
-        let round_start_time = time_service.get_current_timestamp();
-        Self {
-            round_start_time,
-            max_delay_after_round_start,
-            aggregated_voting_power_pct_to_wait,
-            pct_delay_after_qc_aggregated,
-            time_service,
-            qc_aggregation_delayed: false,
-            delayed_qc_tx,
-        }
-    }
-}
-
-impl QcAggregator for DelayedQcAggregator {
-    fn handle_aggregated_qc(
-        &mut self,
-        validator_verifier: &ValidatorVerifier,
-        aggregated_voting_power: u128,
-        vote: &Vote,
-        li_with_sig: &LedgerInfoWithPartialSignatures,
-    ) -> VoteReceptionResult {
-        assert!(
-            aggregated_voting_power >= validator_verifier.quorum_voting_power(),
-            "QC aggregation should not be triggered if we don't have enough votes to form a QC"
-        );
-        let current_time = self.time_service.get_current_timestamp();
-
-        // If we have reached the aggregated voting power threshold, we should aggregate the QC now.
-        if aggregated_voting_power
-            >= self.aggregated_voting_power_pct_to_wait as u128
-                * validator_verifier.total_voting_power()
-                / 100
-        {
-            // Voting power is u128 so there is no overflow here.
-            info!(
-                "QC aggregation triggered by aggregated voting power: {}",
-                aggregated_voting_power
-            );
-            return PendingVotes::aggregate_qc_now(
-                validator_verifier,
-                li_with_sig,
-                vote.vote_data(),
-            );
-        }
-
-        // If we have not reached the aggregated voting power threshold and have
-        // already triggered a delayed QC aggregation event, we should not trigger another
-        // one.
-        if self.qc_aggregation_delayed {
-            return VoteReceptionResult::VoteAddedQCDelayed(aggregated_voting_power);
-        }
-
-        let time_since_round_start = current_time - self.round_start_time;
-        if time_since_round_start >= self.max_delay_after_round_start {
-            info!(
-                "QC aggregation triggered by time: {} ms",
-                time_since_round_start.as_millis()
-            );
-            return PendingVotes::aggregate_qc_now(
-                validator_verifier,
-                li_with_sig,
-                vote.vote_data(),
-            );
-        }
-
-        let wait_time = (self.max_delay_after_round_start - time_since_round_start)
-            .min(time_since_round_start * self.pct_delay_after_qc_aggregated as u32 / 100);
-
-        let delayed_qc_event = DelayedQcMsg::new(vote.clone());
-        self.qc_aggregation_delayed = true;
-
-        let mut delayed_qc_sender = self.delayed_qc_tx.clone();
-
-        info!(
-            "QC aggregation delayed by {} ms, wait time: {} ms",
-            time_since_round_start.as_millis(),
-            wait_time.as_millis()
-        );
-
-        tokio::spawn(async move {
-            sleep(wait_time).await;
-            if let Err(e) = delayed_qc_sender.send(delayed_qc_event).await {
-                error!("Failed to send event to round manager {:?}", e);
-            }
-        });
-
-        VoteReceptionResult::VoteAddedQCDelayed(aggregated_voting_power)
-    }
-}
diff --git a/consensus/src/round_manager.rs b/consensus/src/round_manager.rs
index bd7be5172d775..f423d93d1e0ff 100644
--- a/consensus/src/round_manager.rs
+++ b/consensus/src/round_manager.rs
@@ -39,7 +39,6 @@ use aptos_consensus_types::{
     block::Block,
     block_data::BlockType,
     common::{Author, Round},
-    delayed_qc_msg::DelayedQcMsg,
     order_vote_msg::OrderVoteMsg,
     proof_of_store::{ProofCache, ProofOfStoreMsg, SignedBatchInfoMsg},
     proposal_msg::ProposalMsg,
@@ -70,7 +69,6 @@ use aptos_types::{
 };
 use fail::fail_point;
 use futures::{channel::oneshot, stream::FuturesUnordered, Future, FutureExt, StreamExt};
-use futures_channel::mpsc::UnboundedReceiver;
 use lru::LruCache;
 use serde::Serialize;
 use std::{mem::Discriminant, pin::Pin, sync::Arc, time::Duration};
@@ -591,25 +589,6 @@ impl RoundManager {
         self.process_verified_proposal(proposal).await
     }
 
-    pub async fn process_delayed_qc_msg(&mut self, msg: DelayedQcMsg) -> anyhow::Result<()> {
-        ensure!(
-            msg.vote.vote_data().proposed().round() == self.round_state.current_round(),
-            "Discarding stale delayed QC for round {}, current round {}",
-            msg.vote.vote_data().proposed().round(),
-            self.round_state.current_round()
-        );
-        let vote = msg.vote().clone();
-        let vote_reception_result = self
-            .round_state
-            .process_delayed_qc_msg(&self.epoch_state.verifier, msg);
-        trace!(
-            "Received delayed QC message and vote reception result is {:?}",
-            vote_reception_result
-        );
-        self.process_vote_reception_result(&vote, vote_reception_result)
-            .await
-    }
-
     /// Sync to the sync info sending from peer if it has newer certificates.
     async fn sync_up(&mut self, sync_info: &SyncInfo, author: Author) -> anyhow::Result<()> {
         let local_sync_info = self.block_store.sync_info();
@@ -1330,9 +1309,7 @@ impl RoundManager {
                 PROPOSAL_VOTE_ADDED.inc();
                 Ok(())
             },
-            VoteReceptionResult::VoteAddedQCDelayed(_)
-            | VoteReceptionResult::EchoTimeout(_)
-            | VoteReceptionResult::DuplicateVote => Ok(()),
+            VoteReceptionResult::EchoTimeout(_) | VoteReceptionResult::DuplicateVote => Ok(()),
             e => Err(anyhow::anyhow!("{:?}", e)),
         }
     }
@@ -1511,7 +1488,6 @@ impl RoundManager {
             (Author, VerifiedEvent),
         >,
         mut buffered_proposal_rx: aptos_channel::Receiver<Author, VerifiedEvent>,
-        mut delayed_qc_rx: UnboundedReceiver<DelayedQcMsg>,
         close_rx: oneshot::Receiver<oneshot::Sender<()>>,
     ) {
         info!(epoch = self.epoch_state().epoch, "RoundManager started");
@@ -1524,19 +1500,6 @@ impl RoundManager {
                         ack_sender.send(()).expect("[RoundManager] Fail to ack shutdown");
                     }
                     break;
-                }
-                delayed_qc_msg = delayed_qc_rx.select_next_some() => {
-                    let result = monitor!(
-                        "process_delayed_qc",
-                        self.process_delayed_qc_msg(delayed_qc_msg).await
-                    );
-                    match result {
-                        Ok(_) => trace!(RoundStateLogSchema::new(self.round_state())),
-                        Err(e) => {
-                            counters::ERROR_COUNT.inc();
-                            warn!(error = ?e, kind = error_kind(&e), RoundStateLogSchema::new(self.round_state()));
-                        }
-                    }
                 },
                 proposal = buffered_proposal_rx.select_next_some() => {
                     let mut proposals = vec![proposal];
diff --git a/consensus/src/round_manager_fuzzing.rs b/consensus/src/round_manager_fuzzing.rs
index ab7a14740624f..2eefa70b0752d 100644
--- a/consensus/src/round_manager_fuzzing.rs
+++ b/consensus/src/round_manager_fuzzing.rs
@@ -24,10 +24,7 @@ use crate::{
     util::{mock_time_service::SimulatedTimeService, time_service::TimeService},
 };
 use aptos_channels::{self, aptos_channel, message_queues::QueueStyle};
-use aptos_config::{
-    config::{ConsensusConfig, QcAggregatorType},
-    network_id::NetworkId,
-};
+use aptos_config::{config::ConsensusConfig, network_id::NetworkId};
 use aptos_consensus_types::{proposal_msg::ProposalMsg, utils::PayloadTxnsSize};
 use aptos_infallible::Mutex;
 use aptos_network::{
@@ -50,7 +47,6 @@ use aptos_types::{
     validator_verifier::ValidatorVerifier,
 };
 use futures::{channel::mpsc, executor::block_on};
-use futures_channel::mpsc::unbounded;
 use maplit::hashmap;
 use once_cell::sync::Lazy;
 use std::{sync::Arc, time::Duration};
@@ -113,16 +109,9 @@ fn create_round_state() -> RoundState {
     let base_timeout = std::time::Duration::new(60, 0);
     let time_interval = Box::new(ExponentialTimeInterval::fixed(base_timeout));
     let (round_timeout_sender, _) = aptos_channels::new_test(1_024);
-    let (delayed_qc_tx, _) = unbounded();
     let time_service = Arc::new(SimulatedTimeService::new());
 
-    RoundState::new(
-        time_interval,
-        time_service,
-        round_timeout_sender,
-        delayed_qc_tx,
-        QcAggregatorType::NoDelay,
-    )
+    RoundState::new(time_interval, time_service, round_timeout_sender)
 }
 
 // Creates an RoundManager for fuzzing
diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs
index a01fef7b06bab..c12e476a7f56c 100644
--- a/consensus/src/round_manager_test.rs
+++ b/consensus/src/round_manager_test.rs
@@ -30,7 +30,7 @@ use crate::{
 };
 use aptos_channels::{self, aptos_channel, message_queues::QueueStyle};
 use aptos_config::{
-    config::{ConsensusConfig, QcAggregatorType},
+    config::ConsensusConfig,
     network_id::{NetworkId, PeerNetworkId},
 };
 use aptos_consensus_types::{
@@ -83,7 +83,6 @@ use futures::{
     stream::select,
     FutureExt, Stream, StreamExt,
 };
-use futures_channel::mpsc::unbounded;
 use maplit::hashmap;
 use std::{
     iter::FromIterator,
@@ -124,14 +123,7 @@ impl NodeSetup {
         let base_timeout = Duration::new(60, 0);
         let time_interval = Box::new(ExponentialTimeInterval::fixed(base_timeout));
         let (round_timeout_sender, _) = aptos_channels::new_test(1_024);
-        let (delayed_qc_tx, _) = unbounded();
-        RoundState::new(
-            time_interval,
-            time_service,
-            round_timeout_sender,
-            delayed_qc_tx,
-            QcAggregatorType::NoDelay,
-        )
+        RoundState::new(time_interval, time_service, round_timeout_sender)
     }
 
     fn create_proposer_election(proposers: Vec<Author>) -> Arc<dyn ProposerElection + Send + Sync> {

From 065d7602cf402188c8395ec105265c640d8e7c50 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 16:42:30 -0700
Subject: [PATCH 35/36] Changing names

---
 consensus/consensus-types/src/block_test.rs     |  6 +++---
 consensus/consensus-types/src/timeout_2chain.rs |  4 ++--
 consensus/safety-rules/src/test_utils.rs        |  4 ++--
 consensus/src/liveness/round_state.rs           |  4 ++--
 consensus/src/pending_order_votes.rs            |  6 +++---
 consensus/src/pending_votes.rs                  | 10 +++++-----
 consensus/src/pipeline/buffer_item.rs           |  8 ++++----
 types/src/ledger_info.rs                        | 14 +++++++-------
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/consensus/consensus-types/src/block_test.rs b/consensus/consensus-types/src/block_test.rs
index bc33ddec8bc5a..54ece0539e2c8 100644
--- a/consensus/consensus-types/src/block_test.rs
+++ b/consensus/consensus-types/src/block_test.rs
@@ -17,7 +17,7 @@ use aptos_types::{
     account_address::AccountAddress,
     aggregate_signature::PartialSignatures,
     block_info::{BlockInfo, Round},
-    ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures},
+    ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures},
     on_chain_config::ValidatorSet,
     validator_signer::ValidatorSigner,
     validator_verifier::{random_validator_verifier, ValidatorVerifier},
@@ -131,7 +131,7 @@ fn test_same_qc_different_authors() {
     .unwrap();
 
     let signature = signer.sign(genesis_qc.ledger_info().ledger_info()).unwrap();
-    let mut ledger_info_altered = LedgerInfoWithPartialSignatures::new(
+    let mut ledger_info_altered = LedgerInfoWithVerifiedSignatures::new(
         genesis_qc.ledger_info().ledger_info().clone(),
         PartialSignatures::empty(),
     );
@@ -201,7 +201,7 @@ fn test_block_metadata_bitvec() {
     );
 
     let mut ledger_info_1 =
-        LedgerInfoWithPartialSignatures::new(ledger_info.clone(), PartialSignatures::empty());
+        LedgerInfoWithVerifiedSignatures::new(ledger_info.clone(), PartialSignatures::empty());
     let votes_1 = vec![true, false, true, true];
     votes_1
         .iter()
diff --git a/consensus/consensus-types/src/timeout_2chain.rs b/consensus/consensus-types/src/timeout_2chain.rs
index c0d62edc6ffb3..87d35bc99bd92 100644
--- a/consensus/consensus-types/src/timeout_2chain.rs
+++ b/consensus/consensus-types/src/timeout_2chain.rs
@@ -406,7 +406,7 @@ mod tests {
         use aptos_types::{
             aggregate_signature::PartialSignatures,
             block_info::BlockInfo,
-            ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures},
+            ledger_info::{LedgerInfo, LedgerInfoWithVerifiedSignatures},
             validator_verifier::random_validator_verifier,
         };
 
@@ -415,7 +415,7 @@ mod tests {
         let quorum_size = validators.quorum_voting_power() as usize;
         let generate_quorum = |round, num_of_signature| {
             let vote_data = VoteData::new(BlockInfo::random(round), BlockInfo::random(0));
-            let mut ledger_info = LedgerInfoWithPartialSignatures::new(
+            let mut ledger_info = LedgerInfoWithVerifiedSignatures::new(
                 LedgerInfo::new(BlockInfo::empty(), vote_data.hash()),
                 PartialSignatures::empty(),
             );
diff --git a/consensus/safety-rules/src/test_utils.rs b/consensus/safety-rules/src/test_utils.rs
index 07b9159c66a45..ce161c0a5fb14 100644
--- a/consensus/safety-rules/src/test_utils.rs
+++ b/consensus/safety-rules/src/test_utils.rs
@@ -24,7 +24,7 @@ use aptos_types::{
     block_info::BlockInfo,
     epoch_change::EpochChangeProof,
     epoch_state::EpochState,
-    ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures},
+    ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures},
     on_chain_config::ValidatorSet,
     proof::AccumulatorExtensionProof,
     validator_info::ValidatorInfo,
@@ -168,7 +168,7 @@ pub fn make_proposal_with_parent_and_overrides(
     )
     .unwrap();
 
-    let mut ledger_info_with_signatures = LedgerInfoWithPartialSignatures::new(
+    let mut ledger_info_with_signatures = LedgerInfoWithVerifiedSignatures::new(
         vote.ledger_info().clone(),
         PartialSignatures::empty(),
     );
diff --git a/consensus/src/liveness/round_state.rs b/consensus/src/liveness/round_state.rs
index 0dc03ab88d105..74e78e9c9f024 100644
--- a/consensus/src/liveness/round_state.rs
+++ b/consensus/src/liveness/round_state.rs
@@ -14,7 +14,7 @@ use aptos_consensus_types::{
 use aptos_crypto::HashValue;
 use aptos_logger::{prelude::*, Schema};
 use aptos_types::{
-    ledger_info::LedgerInfoWithPartialSignatures, validator_verifier::ValidatorVerifier,
+    ledger_info::LedgerInfoWithVerifiedSignatures, validator_verifier::ValidatorVerifier,
 };
 use futures::future::AbortHandle;
 use serde::Serialize;
@@ -45,7 +45,7 @@ pub struct NewRoundEvent {
     pub round: Round,
     pub reason: NewRoundReason,
     pub timeout: Duration,
-    pub prev_round_votes: Vec<(HashValue, LedgerInfoWithPartialSignatures)>,
+    pub prev_round_votes: Vec<(HashValue, LedgerInfoWithVerifiedSignatures)>,
     pub prev_round_timeout_votes: Option<TwoChainTimeoutWithPartialSignatures>,
 }
 
diff --git a/consensus/src/pending_order_votes.rs b/consensus/src/pending_order_votes.rs
index 94b1ba6d15451..46cf23cfe2b90 100644
--- a/consensus/src/pending_order_votes.rs
+++ b/consensus/src/pending_order_votes.rs
@@ -7,7 +7,7 @@ use aptos_crypto::{hash::CryptoHash, HashValue};
 use aptos_logger::prelude::*;
 use aptos_types::{
     aggregate_signature::PartialSignatures,
-    ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures},
+    ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures},
     validator_verifier::{ValidatorVerifier, VerifyError},
 };
 use std::{collections::HashMap, sync::Arc};
@@ -33,7 +33,7 @@ pub enum OrderVoteReceptionResult {
 #[derive(Debug, PartialEq, Eq)]
 enum OrderVoteStatus {
     EnoughVotes(LedgerInfoWithSignatures),
-    NotEnoughVotes(LedgerInfoWithPartialSignatures),
+    NotEnoughVotes(LedgerInfoWithVerifiedSignatures),
 }
 
 /// A PendingVotes structure keep track of order votes for the last few rounds
@@ -75,7 +75,7 @@ impl PendingOrderVotes {
                 verified_quorum_cert.expect(
                     "Quorum Cert is expected when creating a new entry in pending order votes",
                 ),
-                OrderVoteStatus::NotEnoughVotes(LedgerInfoWithPartialSignatures::new(
+                OrderVoteStatus::NotEnoughVotes(LedgerInfoWithVerifiedSignatures::new(
                     order_vote.ledger_info().clone(),
                     PartialSignatures::empty(),
                 )),
diff --git a/consensus/src/pending_votes.rs b/consensus/src/pending_votes.rs
index 05abc30dc63a2..b2177d2c5889a 100644
--- a/consensus/src/pending_votes.rs
+++ b/consensus/src/pending_votes.rs
@@ -20,7 +20,7 @@ use aptos_crypto::{hash::CryptoHash, HashValue};
 use aptos_logger::prelude::*;
 use aptos_types::{
     aggregate_signature::PartialSignatures,
-    ledger_info::LedgerInfoWithPartialSignatures,
+    ledger_info::LedgerInfoWithVerifiedSignatures,
     validator_verifier::{ValidatorVerifier, VerifyError},
 };
 use std::{
@@ -62,7 +62,7 @@ pub struct PendingVotes {
     /// This might keep multiple LedgerInfos for the current round: either due to different proposals (byzantine behavior)
     /// or due to different NIL proposals (clients can have a different view of what block to extend).
     li_digest_to_votes:
-        HashMap<HashValue /* LedgerInfo digest */, (usize, LedgerInfoWithPartialSignatures)>,
+        HashMap<HashValue /* LedgerInfo digest */, (usize, LedgerInfoWithVerifiedSignatures)>,
     /// Tracks all the signatures of the 2-chain timeout for the given round.
     maybe_partial_2chain_tc: Option<TwoChainTimeoutWithPartialSignatures>,
     /// Map of Author to (vote, li_digest). This is useful to discard multiple votes.
@@ -138,7 +138,7 @@ impl PendingVotes {
                 // if the ledger info with signatures doesn't exist yet, create it
                 (
                     len,
-                    LedgerInfoWithPartialSignatures::new(
+                    LedgerInfoWithVerifiedSignatures::new(
                         vote.ledger_info().clone(),
                         PartialSignatures::empty(),
                     ),
@@ -264,7 +264,7 @@ impl PendingVotes {
 
     pub fn aggregate_qc_now(
         validator_verifier: &ValidatorVerifier,
-        li_with_sig: &LedgerInfoWithPartialSignatures,
+        li_with_sig: &LedgerInfoWithVerifiedSignatures,
         vote_data: &VoteData,
     ) -> VoteReceptionResult {
         match li_with_sig.aggregate_signatures(validator_verifier) {
@@ -317,7 +317,7 @@ impl PendingVotes {
     pub fn drain_votes(
         &mut self,
     ) -> (
-        Vec<(HashValue, LedgerInfoWithPartialSignatures)>,
+        Vec<(HashValue, LedgerInfoWithVerifiedSignatures)>,
         Option<TwoChainTimeoutWithPartialSignatures>,
     ) {
         for (hash_index, _) in self.li_digest_to_votes.values() {
diff --git a/consensus/src/pipeline/buffer_item.rs b/consensus/src/pipeline/buffer_item.rs
index 46e92047d3d18..3d06658cd4323 100644
--- a/consensus/src/pipeline/buffer_item.rs
+++ b/consensus/src/pipeline/buffer_item.rs
@@ -16,7 +16,7 @@ use aptos_reliable_broadcast::DropGuard;
 use aptos_types::{
     aggregate_signature::PartialSignatures,
     block_info::BlockInfo,
-    ledger_info::{LedgerInfo, LedgerInfoWithPartialSignatures, LedgerInfoWithSignatures},
+    ledger_info::{LedgerInfo, LedgerInfoWithSignatures, LedgerInfoWithVerifiedSignatures},
     validator_verifier::ValidatorVerifier,
 };
 use futures::future::BoxFuture;
@@ -68,7 +68,7 @@ fn generate_executed_item_from_ordered(
     order_vote_enabled: bool,
 ) -> BufferItem {
     debug!("{} advance to executed from ordered", commit_info);
-    let partial_commit_proof = LedgerInfoWithPartialSignatures::new(
+    let partial_commit_proof = LedgerInfoWithVerifiedSignatures::new(
         generate_commit_ledger_info(&commit_info, &ordered_proof, order_vote_enabled),
         verified_signatures,
     );
@@ -106,7 +106,7 @@ pub struct OrderedItem {
 
 pub struct ExecutedItem {
     pub executed_blocks: Vec<PipelinedBlock>,
-    pub partial_commit_proof: LedgerInfoWithPartialSignatures,
+    pub partial_commit_proof: LedgerInfoWithVerifiedSignatures,
     pub callback: StateComputerCommitCallBackType,
     pub commit_info: BlockInfo,
     pub ordered_proof: LedgerInfoWithSignatures,
@@ -114,7 +114,7 @@ pub struct ExecutedItem {
 
 pub struct SignedItem {
     pub executed_blocks: Vec<PipelinedBlock>,
-    pub partial_commit_proof: LedgerInfoWithPartialSignatures,
+    pub partial_commit_proof: LedgerInfoWithVerifiedSignatures,
     pub callback: StateComputerCommitCallBackType,
     pub commit_vote: CommitVote,
     pub rb_handle: Option<(Instant, DropGuard)>,
diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 18aff380a54f9..7e0da229864e0 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -326,18 +326,18 @@ pub enum VerificationStatus {
 /// Contains the ledger info and partially aggregated signature from a set of validators, this data
 /// is only used during the aggregating the votes from different validators and is not persisted in DB.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct LedgerInfoWithPartialSignatures {
+pub struct LedgerInfoWithVerifiedSignatures {
     ledger_info: LedgerInfo,
     partial_sigs: PartialSignatures,
 }
 
-impl Display for LedgerInfoWithPartialSignatures {
+impl Display for LedgerInfoWithVerifiedSignatures {
     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
         write!(f, "{}", self.ledger_info)
     }
 }
 
-impl LedgerInfoWithPartialSignatures {
+impl LedgerInfoWithVerifiedSignatures {
     pub fn new(ledger_info: LedgerInfo, signatures: PartialSignatures) -> Self {
         Self {
             ledger_info,
@@ -393,7 +393,7 @@ pub enum SignatureWithStatus {
 /// verify the aggregated signature at once. If the aggregated signature is invalid, then we verify each individual
 /// unverified signature and remove the invalid signatures.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct LedgerInfoWithMixedSignatures {
+pub struct LedgerInfoWithUnverifiedSignatures {
     ledger_info: LedgerInfo,
     // These signatures are not yet verified. For efficiency, once enough unverified signatures are collected,
     // they will be aggregated and verified.
@@ -401,13 +401,13 @@ pub struct LedgerInfoWithMixedSignatures {
     verified_signatures: PartialSignatures,
 }
 
-impl Display for LedgerInfoWithMixedSignatures {
+impl Display for LedgerInfoWithUnverifiedSignatures {
     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
         write!(f, "{}", self.ledger_info)
     }
 }
 
-impl LedgerInfoWithMixedSignatures {
+impl LedgerInfoWithUnverifiedSignatures {
     pub fn new(ledger_info: LedgerInfo) -> Self {
         Self {
             ledger_info,
@@ -691,7 +691,7 @@ mod tests {
         let epoch_state = Arc::new(EpochState::new(10, validator_verifier.clone()));
 
         let mut ledger_info_with_mixed_signatures =
-            LedgerInfoWithMixedSignatures::new(ledger_info.clone());
+            LedgerInfoWithUnverifiedSignatures::new(ledger_info.clone());
 
         let mut partial_sig = PartialSignatures::empty();
 

From 5ed8ad557de8026aa8592aed38ffafd69466e891 Mon Sep 17 00:00:00 2001
From: Satya Vusirikala <satyasatya123456@gmail.com>
Date: Wed, 18 Sep 2024 23:31:45 -0700
Subject: [PATCH 36/36] Addressing PR comments

---
 types/src/aggregate_signature.rs |  6 +++---
 types/src/ledger_info.rs         | 21 ++++++++-------------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/types/src/aggregate_signature.rs b/types/src/aggregate_signature.rs
index 3202583b6252d..24ac789da671e 100644
--- a/types/src/aggregate_signature.rs
+++ b/types/src/aggregate_signature.rs
@@ -86,12 +86,12 @@ impl PartialSignatures {
         self.signatures.is_empty()
     }
 
-    pub fn remove_signature(&mut self, validator: AccountAddress) {
-        self.signatures.remove(&validator);
+    pub fn remove_signature(&mut self, validator: AccountAddress) -> Option<bls12381::Signature> {
+        self.signatures.remove(&validator)
     }
 
     pub fn add_signature(&mut self, validator: AccountAddress, signature: bls12381::Signature) {
-        self.signatures.entry(validator).or_insert(signature);
+        self.signatures.insert(validator, signature);
     }
 
     pub fn signatures(&self) -> &BTreeMap<AccountAddress, bls12381::Signature> {
diff --git a/types/src/ledger_info.rs b/types/src/ledger_info.rs
index 7e0da229864e0..9f3aead311f81 100644
--- a/types/src/ledger_info.rs
+++ b/types/src/ledger_info.rs
@@ -318,11 +318,6 @@ impl LedgerInfoWithV0 {
     }
 }
 
-pub enum VerificationStatus {
-    Verified,
-    Unverified,
-}
-
 /// Contains the ledger info and partially aggregated signature from a set of validators, this data
 /// is only used during the aggregating the votes from different validators and is not persisted in DB.
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -437,9 +432,6 @@ impl LedgerInfoWithUnverifiedSignatures {
         if self.verified_signatures.contains_voter(&validator) {
             return;
         }
-        if self.unverified_signatures.contains_voter(&validator) {
-            self.unverified_signatures.remove_signature(validator);
-        }
         self.unverified_signatures
             .add_signature(validator, signature);
     }
@@ -527,15 +519,18 @@ impl LedgerInfoWithUnverifiedSignatures {
                             .verify(*account_address, self.ledger_info(), signature)
                             .is_ok()
                         {
-                            return Some((*account_address, signature.clone()));
+                            return Some(*account_address);
                         }
                         None
                     })
                     .collect::<Vec<_>>();
-                for (account_address, signature) in verified {
-                    self.verified_signatures
-                        .add_signature(account_address, signature);
-                    self.unverified_signatures.remove_signature(account_address);
+                for account_address in verified {
+                    if let Some(signature) =
+                        self.unverified_signatures.remove_signature(account_address)
+                    {
+                        self.verified_signatures
+                            .add_signature(account_address, signature);
+                    }
                 }
 
                 // For these authors, we will not use optimistic signature verification in the future.