From 60e8b8747f284c29fa303515e6a048abae7d9b91 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Sun, 2 Jun 2024 19:14:55 -0700 Subject: [PATCH 01/67] Use proof queue asynchronously --- consensus/src/quorum_store/mod.rs | 1 + consensus/src/quorum_store/proof_manager.rs | 207 ++++++---- consensus/src/quorum_store/proof_queue.rs | 360 ++++++++++++++++++ .../src/quorum_store/quorum_store_builder.rs | 12 +- .../quorum_store/tests/proof_manager_test.rs | 88 +++-- consensus/src/quorum_store/tests/utils.rs | 2 +- consensus/src/quorum_store/utils.rs | 290 +------------- 7 files changed, 562 insertions(+), 398 deletions(-) create mode 100644 consensus/src/quorum_store/proof_queue.rs diff --git a/consensus/src/quorum_store/mod.rs b/consensus/src/quorum_store/mod.rs index 888b62b0122c2..64525643334f6 100644 --- a/consensus/src/quorum_store/mod.rs +++ b/consensus/src/quorum_store/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod batch_store; pub(crate) mod network_listener; pub(crate) mod proof_coordinator; pub(crate) mod proof_manager; +pub(crate) mod proof_queue; pub(crate) mod quorum_store_builder; pub(crate) mod quorum_store_coordinator; pub mod quorum_store_db; diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 3cdac9a768ad3..9392bc60279b4 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -1,13 +1,12 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use super::batch_store::BatchStore; +use super::proof_queue::BatchSortKey; use crate::{ monitor, quorum_store::{ - batch_generator::BackPressure, - counters, - utils::{BatchSortKey, ProofQueue}, + batch_generator::BackPressure, batch_store::BatchStore, counters, + proof_queue::ProofQueueCommand, }, }; use aptos_consensus_types::{ @@ -18,7 +17,7 @@ use aptos_consensus_types::{ use aptos_logger::prelude::*; use aptos_types::{transaction::SignedTransaction, PeerId}; use futures::StreamExt; -use futures_channel::mpsc::Receiver; +use futures_channel::{mpsc::Receiver, oneshot}; use rand::{seq::SliceRandom, thread_rng}; use std::{ cmp::min, @@ -129,41 +128,57 @@ impl BatchQueue { } pub struct ProofManager { - proofs_for_consensus: ProofQueue, batch_queue: BatchQueue, back_pressure_total_txn_limit: u64, remaining_total_txn_num: u64, back_pressure_total_proof_limit: u64, remaining_total_proof_num: u64, allow_batches_without_pos_in_proposal: bool, + proof_queue_tx: Arc>, } impl ProofManager { pub fn new( - my_peer_id: PeerId, back_pressure_total_txn_limit: u64, back_pressure_total_proof_limit: u64, batch_store: Arc, allow_batches_without_pos_in_proposal: bool, + proof_queue_tx: Arc>, ) -> Self { Self { - proofs_for_consensus: ProofQueue::new(my_peer_id), batch_queue: BatchQueue::new(batch_store), back_pressure_total_txn_limit, remaining_total_txn_num: 0, back_pressure_total_proof_limit, remaining_total_proof_num: 0, allow_batches_without_pos_in_proposal, + proof_queue_tx, } } - pub(crate) fn receive_proofs(&mut self, proofs: Vec) { + pub(crate) async fn receive_proofs(&mut self, proofs: Vec) { + if !proofs.is_empty() { + let (response_tx, response_rx) = oneshot::channel(); + if self + .proof_queue_tx + .send(ProofQueueCommand::AddProofs(proofs.clone(), response_tx)) + .await + .is_ok() + { + if let Ok((remaining_total_txn_num, remaining_total_proof_num)) = response_rx.await + { + self.remaining_total_txn_num = remaining_total_txn_num; + self.remaining_total_proof_num = remaining_total_proof_num; + } else { + warn!("Failed to get response from proof queue after adding proofs"); + } + } else { + warn!("Failed to add proofs to proof queue"); + } + } for proof in proofs.into_iter() { self.batch_queue.remove_batch(proof.info()); - self.proofs_for_consensus.push(proof); } - (self.remaining_total_txn_num, self.remaining_total_proof_num) = - self.proofs_for_consensus.remaining_txns_and_proofs(); } pub(crate) fn receive_batches(&mut self, batches: Vec) { @@ -172,7 +187,7 @@ impl ProofManager { } } - pub(crate) fn handle_commit_notification( + pub(crate) async fn handle_commit_notification( &mut self, block_timestamp: u64, batches: Vec, @@ -185,14 +200,30 @@ impl ProofManager { for batch in &batches { self.batch_queue.remove_batch(batch); } - self.proofs_for_consensus.mark_committed(batches); - self.proofs_for_consensus - .handle_updated_block_timestamp(block_timestamp); - (self.remaining_total_txn_num, self.remaining_total_proof_num) = - self.proofs_for_consensus.remaining_txns_and_proofs(); + + let (response_tx, response_rx) = oneshot::channel(); + if self + .proof_queue_tx + .send(ProofQueueCommand::MarkCommitted( + batches, + block_timestamp, + response_tx, + )) + .await + .is_ok() + { + if let Ok((remaining_total_txn_num, remaining_total_proof_num)) = response_rx.await { + self.remaining_total_txn_num = remaining_total_txn_num; + self.remaining_total_proof_num = remaining_total_proof_num; + } else { + warn!("Failed to get response from proof queue after marking proofs as committed"); + } + } else { + warn!("Failed to mark proofs as committed in proof queue"); + } } - pub(crate) fn handle_proposal_request(&mut self, msg: GetPayloadCommand) { + pub(crate) async fn handle_proposal_request(&mut self, msg: GetPayloadCommand) { match msg { GetPayloadCommand::GetPayloadRequest( max_txns, @@ -211,63 +242,87 @@ impl ProofManager { PayloadFilter::InQuorumStore(proofs) => proofs, }; - let (proof_block, proof_queue_fully_utilized) = self - .proofs_for_consensus - .pull_proofs(&excluded_batches, max_txns, max_bytes, return_non_full); - - counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE.observe(self.batch_queue.len() as f64); - counters::PROOF_QUEUE_FULLY_UTILIZED - .observe(if proof_queue_fully_utilized { 1.0 } else { 0.0 }); + let (response_tx, response_rx) = oneshot::channel(); + if self + .proof_queue_tx + .send(ProofQueueCommand::PullProofs { + excluded_batches: excluded_batches.clone(), + max_txns, + max_bytes, + return_non_full, + response_sender: response_tx, + }) + .await + .is_ok() + { + match response_rx.await { + Ok((proof_block, proof_queue_fully_utilized)) => { + counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE + .observe(self.batch_queue.len() as f64); + counters::PROOF_QUEUE_FULLY_UTILIZED + .observe(if proof_queue_fully_utilized { 1.0 } else { 0.0 }); - let mut inline_block: Vec<(BatchInfo, Vec)> = vec![]; - let cur_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); - let cur_bytes: u64 = proof_block.iter().map(|p| p.num_bytes()).sum(); + let mut inline_block: Vec<(BatchInfo, Vec)> = vec![]; + let cur_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); + let cur_bytes: u64 = proof_block.iter().map(|p| p.num_bytes()).sum(); - if self.allow_batches_without_pos_in_proposal && proof_queue_fully_utilized { - inline_block = self.batch_queue.pull_batches( - min(max_txns - cur_txns, max_inline_txns), - min(max_bytes - cur_bytes, max_inline_bytes), - excluded_batches - .iter() - .cloned() - .chain(proof_block.iter().map(|proof| proof.info().clone())) - .collect(), - ); - } - let inline_txns = inline_block - .iter() - .map(|(_, txns)| txns.len()) - .sum::(); - counters::NUM_INLINE_BATCHES.observe(inline_block.len() as f64); - counters::NUM_INLINE_TXNS.observe(inline_txns as f64); + if self.allow_batches_without_pos_in_proposal + && proof_queue_fully_utilized + { + inline_block = self.batch_queue.pull_batches( + min(max_txns - cur_txns, max_inline_txns), + min(max_bytes - cur_bytes, max_inline_bytes), + excluded_batches + .iter() + .cloned() + .chain(proof_block.iter().map(|proof| proof.info().clone())) + .collect(), + ); + } + let inline_txns = inline_block + .iter() + .map(|(_, txns)| txns.len()) + .sum::(); + counters::NUM_INLINE_BATCHES.observe(inline_block.len() as f64); + counters::NUM_INLINE_TXNS.observe(inline_txns as f64); - let res = GetPayloadResponse::GetPayloadResponse( - if proof_block.is_empty() && inline_block.is_empty() { - Payload::empty(true, self.allow_batches_without_pos_in_proposal) - } else if inline_block.is_empty() { - trace!( - "QS: GetBlockRequest excluded len {}, block len {}", - excluded_batches.len(), - proof_block.len() - ); - Payload::InQuorumStore(ProofWithData::new(proof_block)) - } else { - trace!( - "QS: GetBlockRequest excluded len {}, block len {}, inline len {}", - excluded_batches.len(), - proof_block.len(), - inline_block.len() - ); - Payload::QuorumStoreInlineHybrid( - inline_block, - ProofWithData::new(proof_block), - None, - ) - }, - ); - match callback.send(Ok(res)) { - Ok(_) => (), - Err(err) => debug!("BlockResponse receiver not available! error {:?}", err), + let res = GetPayloadResponse::GetPayloadResponse( + if proof_block.is_empty() && inline_block.is_empty() { + Payload::empty(true, self.allow_batches_without_pos_in_proposal) + } else if inline_block.is_empty() { + trace!( + "QS: GetBlockRequest excluded len {}, block len {}", + excluded_batches.len(), + proof_block.len() + ); + Payload::InQuorumStore(ProofWithData::new(proof_block)) + } else { + trace!( + "QS: GetBlockRequest excluded len {}, block len {}, inline len {}", + excluded_batches.len(), + proof_block.len(), + inline_block.len() + ); + Payload::QuorumStoreInlineHybrid( + inline_block, + ProofWithData::new(proof_block), + None, + ) + }, + ); + match callback.send(Ok(res)) { + Ok(_) => (), + Err(err) => { + debug!("BlockResponse receiver not available! error {:?}", err) + }, + } + }, + Err(e) => { + warn!("Failed to get response from ProofQueue after sending PullProofs command. {:?}", e); + }, + } + } else { + warn!("Failed to get remaining total num from proof queue"); } }, } @@ -297,7 +352,7 @@ impl ProofManager { tokio::select! { Some(msg) = proposal_rx.next() => monitor!("proof_manager_handle_proposal", { - self.handle_proposal_request(msg); + self.handle_proposal_request(msg).await; let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { @@ -317,7 +372,7 @@ impl ProofManager { break; }, ProofManagerCommand::ReceiveProofs(proofs) => { - self.receive_proofs(proofs.take()); + self.receive_proofs(proofs.take()).await; }, ProofManagerCommand::ReceiveBatches(batches) => { self.receive_batches(batches); @@ -326,7 +381,7 @@ impl ProofManager { self.handle_commit_notification( block_timestamp, batches, - ); + ).await; }, } let updated_back_pressure = self.qs_back_pressure(); diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs new file mode 100644 index 0000000000000..449eaf1301e45 --- /dev/null +++ b/consensus/src/quorum_store/proof_queue.rs @@ -0,0 +1,360 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use super::utils::TimeExpirations; +use crate::quorum_store::counters; +use aptos_consensus_types::proof_of_store::{BatchId, BatchInfo, ProofOfStore}; +use aptos_logger::prelude::*; +use aptos_types::PeerId; +use futures::channel::oneshot; +use move_core_types::account_address::AccountAddress; +use rand::{seq::SliceRandom, thread_rng}; +use std::{ + cmp::{Ordering, Reverse}, + collections::{BTreeMap, HashMap, HashSet}, + time::Instant, +}; + +#[derive(PartialEq, Eq, Hash, Clone)] +pub struct BatchKey { + author: PeerId, + batch_id: BatchId, +} + +impl BatchKey { + pub fn from_info(info: &BatchInfo) -> Self { + Self { + author: info.author(), + batch_id: info.batch_id(), + } + } +} + +#[derive(PartialEq, Eq, Clone, Hash)] +pub struct BatchSortKey { + batch_key: BatchKey, + gas_bucket_start: u64, +} + +impl BatchSortKey { + pub fn from_info(info: &BatchInfo) -> Self { + Self { + batch_key: BatchKey::from_info(info), + gas_bucket_start: info.gas_bucket_start(), + } + } + + pub fn author(&self) -> PeerId { + self.batch_key.author + } +} + +impl PartialOrd for BatchSortKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BatchSortKey { + fn cmp(&self, other: &Self) -> Ordering { + // ascending + match self.gas_bucket_start.cmp(&other.gas_bucket_start) { + Ordering::Equal => {}, + ordering => return ordering, + } + // descending + other.batch_key.batch_id.cmp(&self.batch_key.batch_id) + } +} + +#[derive(Debug)] +pub enum ProofQueueCommand { + // Proof manager sends this command to add the proofs to the proof queue + // We send back (remaining_txns, remaining_proofs) to the proof manager + AddProofs(Vec, oneshot::Sender<(u64, u64)>), + // Batch coordinator sends this command to add the received batches to the proof queue + // AddBatches(Vec), + // Proof manager sends this command to pull proofs from the proof queue to + // include in the block proposal. + PullProofs { + excluded_batches: HashSet, + max_txns: u64, + max_bytes: u64, + return_non_full: bool, + response_sender: oneshot::Sender<(Vec, bool)>, + }, + // Proof manager sends this command to mark these batches as committed and + // update the block timestamp. + // We send back the (remaining_txns, remaining_proofs) to the proof manager + MarkCommitted(Vec, u64, oneshot::Sender<(u64, u64)>), +} + +pub struct ProofQueue { + my_peer_id: PeerId, + // Queue per peer to ensure fairness between peers and priority within peer + author_to_batches: HashMap>, + // ProofOfStore and insertion_time. None if committed + batch_to_proof: HashMap>, + // Expiration index + expirations: TimeExpirations, + latest_block_timestamp: u64, + remaining_txns: u64, + remaining_proofs: u64, + remaining_local_txns: u64, + remaining_local_proofs: u64, +} + +impl ProofQueue { + pub(crate) fn new(my_peer_id: PeerId) -> Self { + Self { + my_peer_id, + author_to_batches: HashMap::new(), + batch_to_proof: HashMap::new(), + expirations: TimeExpirations::new(), + latest_block_timestamp: 0, + remaining_txns: 0, + remaining_proofs: 0, + remaining_local_txns: 0, + remaining_local_proofs: 0, + } + } + + #[inline] + fn inc_remaining(&mut self, author: &AccountAddress, num_txns: u64) { + self.remaining_txns += num_txns; + self.remaining_proofs += 1; + if *author == self.my_peer_id { + self.remaining_local_txns += num_txns; + self.remaining_local_proofs += 1; + } + } + + #[inline] + fn dec_remaining(&mut self, author: &AccountAddress, num_txns: u64) { + self.remaining_txns -= num_txns; + self.remaining_proofs -= 1; + if *author == self.my_peer_id { + self.remaining_local_txns -= num_txns; + self.remaining_local_proofs -= 1; + } + } + + pub(crate) fn push(&mut self, proof: ProofOfStore) { + if proof.expiration() < self.latest_block_timestamp { + counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); + return; + } + let batch_key = BatchKey::from_info(proof.info()); + if self.batch_to_proof.get(&batch_key).is_some() { + counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); + return; + } + + let author = proof.author(); + let bucket = proof.gas_bucket_start(); + let num_txns = proof.num_txns(); + let expiration = proof.expiration(); + + let batch_sort_key = BatchSortKey::from_info(proof.info()); + let queue = self.author_to_batches.entry(author).or_default(); + queue.insert(batch_sort_key.clone(), proof.info().clone()); + self.expirations.add_item(batch_sort_key, expiration); + self.batch_to_proof + .insert(batch_key, Some((proof, Instant::now()))); + + if author == self.my_peer_id { + counters::inc_local_pos_count(bucket); + } else { + counters::inc_remote_pos_count(bucket); + } + + self.inc_remaining(&author, num_txns); + } + + // gets excluded and iterates over the vector returning non excluded or expired entries. + // return the vector of pulled PoS, and the size of the remaining PoS + // The flag in the second return argument is true iff the entire proof queue is fully utilized + // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, + // this flag is set false. + pub(crate) fn pull_proofs( + &mut self, + excluded_batches: &HashSet, + max_txns: u64, + max_bytes: u64, + return_non_full: bool, + ) -> (Vec, bool) { + let mut ret = vec![]; + let mut cur_bytes = 0; + let mut cur_txns = 0; + let mut excluded_txns = 0; + let mut full = false; + + let mut iters = vec![]; + for (_, batches) in self.author_to_batches.iter() { + iters.push(batches.iter().rev()); + } + + while !iters.is_empty() { + iters.shuffle(&mut thread_rng()); + iters.retain_mut(|iter| { + if full { + return false; + } + if let Some((sort_key, batch)) = iter.next() { + if excluded_batches.contains(batch) { + excluded_txns += batch.num_txns(); + } else if let Some(Some((proof, insertion_time))) = + self.batch_to_proof.get(&sort_key.batch_key) + { + cur_bytes += batch.num_bytes(); + cur_txns += batch.num_txns(); + if cur_bytes > max_bytes || cur_txns > max_txns { + // Exceeded the limit for requested bytes or number of transactions. + full = true; + return false; + } + let bucket = proof.gas_bucket_start(); + ret.push(proof.clone()); + counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); + if cur_bytes == max_bytes || cur_txns == max_txns { + // Exactly the limit for requested bytes or number of transactions. + full = true; + return false; + } + } + true + } else { + false + } + }) + } + info!( + // before non full check + byte_size = cur_bytes, + block_size = cur_txns, + batch_count = ret.len(), + full = full, + return_non_full = return_non_full, + "Pull payloads from QuorumStore: internal" + ); + + if full || return_non_full { + counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); + counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); + counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); + counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); + // Stable sort, so the order of proofs within an author will not change. + ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); + (ret, !full) + } else { + (Vec::new(), !full) + } + } + + pub(crate) fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { + assert!( + self.latest_block_timestamp <= block_timestamp, + "Decreasing block timestamp" + ); + self.latest_block_timestamp = block_timestamp; + + let expired = self.expirations.expire(block_timestamp); + let mut num_expired_but_not_committed = 0; + for key in &expired { + if let Some(mut queue) = self.author_to_batches.remove(&key.author()) { + if let Some(batch) = queue.remove(key) { + if self + .batch_to_proof + .get(&key.batch_key) + .expect("Entry for unexpired batch must exist") + .is_some() + { + // non-committed proof that is expired + num_expired_but_not_committed += 1; + counters::GAP_BETWEEN_BATCH_EXPIRATION_AND_CURRENT_TIME_WHEN_COMMIT + .observe((block_timestamp - batch.expiration()) as f64); + self.dec_remaining(&batch.author(), batch.num_txns()); + } + claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); + } + if !queue.is_empty() { + self.author_to_batches.insert(key.author(), queue); + } + } + } + counters::NUM_PROOFS_EXPIRED_WHEN_COMMIT.inc_by(num_expired_but_not_committed); + } + + pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { + counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns as f64); + counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); + counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); + counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); + + (self.remaining_txns, self.remaining_proofs) + } + + // Mark in the hashmap committed PoS, but keep them until they expire + pub(crate) fn mark_committed(&mut self, batches: Vec) { + for batch in batches { + let batch_key = BatchKey::from_info(&batch); + if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { + counters::pos_to_commit( + proof.gas_bucket_start(), + insertion_time.elapsed().as_secs_f64(), + ); + self.dec_remaining(&batch.author(), batch.num_txns()); + } + self.batch_to_proof.insert(batch_key, None); + } + } + + pub async fn start(mut self, mut command_rx: tokio::sync::mpsc::Receiver) { + loop { + let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); + if let Some(msg) = command_rx.recv().await { + match msg { + ProofQueueCommand::AddProofs(proofs, response_sender) => { + for proof in proofs { + self.push(proof); + } + if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { + warn!("Failed to send response to AddProofs: {:?}", e); + } + }, + ProofQueueCommand::PullProofs { + excluded_batches, + max_txns, + max_bytes, + return_non_full, + response_sender, + } => { + let (proofs, full) = self.pull_proofs( + &excluded_batches, + max_txns, + max_bytes, + return_non_full, + ); + if let Err(e) = response_sender.send((proofs, full)) { + warn!("Failed to send response to PullProofs: {:?}", e); + } + }, + ProofQueueCommand::MarkCommitted(batches, block_timestamp, response_sender) => { + self.mark_committed(batches); + self.handle_updated_block_timestamp(block_timestamp); + if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { + error!("Failed to send response to MarkCommitted: {:?}", e); + } + }, // ProofQueueCommand::AddBatches(batches) => { + // for batch in batches { + // let batch_key = BatchKey::from_info(&batch); + // if self.batch_to_proof.get(&batch_key).is_none() { + // self.inc_remaining(&batch.author(), batch.num_txns()); + // } + // } + // } + } + } + } + } +} diff --git a/consensus/src/quorum_store/quorum_store_builder.rs b/consensus/src/quorum_store/quorum_store_builder.rs index 957c33eddfd2b..c21ab16a9b7b3 100644 --- a/consensus/src/quorum_store/quorum_store_builder.rs +++ b/consensus/src/quorum_store/quorum_store_builder.rs @@ -18,6 +18,7 @@ use crate::{ network_listener::NetworkListener, proof_coordinator::{ProofCoordinator, ProofCoordinatorCommand}, proof_manager::{ProofManager, ProofManagerCommand}, + proof_queue::{ProofQueue, ProofQueueCommand}, quorum_store_coordinator::{CoordinatorCommand, QuorumStoreCoordinator}, types::{Batch, BatchResponse}, }, @@ -136,6 +137,8 @@ pub struct InnerBuilder { proof_coordinator_cmd_rx: Option>, proof_manager_cmd_tx: tokio::sync::mpsc::Sender, proof_manager_cmd_rx: Option>, + proof_queue_cmd_tx: Arc>, + proof_queue_cmd_rx: Option>, back_pressure_tx: tokio::sync::mpsc::Sender, back_pressure_rx: Option>, quorum_store_storage: Arc, @@ -179,6 +182,7 @@ impl InnerBuilder { config.channel_size, None, ); + let (proof_queue_tx, proof_queue_rx) = tokio::sync::mpsc::channel(config.channel_size); let mut remote_batch_coordinator_cmd_tx = Vec::new(); let mut remote_batch_coordinator_cmd_rx = Vec::new(); for _ in 0..config.num_workers_for_remote_batches { @@ -209,6 +213,8 @@ impl InnerBuilder { proof_coordinator_cmd_rx: Some(proof_coordinator_cmd_rx), proof_manager_cmd_tx, proof_manager_cmd_rx: Some(proof_manager_cmd_rx), + proof_queue_cmd_tx: Arc::new(proof_queue_tx), + proof_queue_cmd_rx: Some(proof_queue_rx), back_pressure_tx, back_pressure_rx: Some(back_pressure_rx), quorum_store_storage, @@ -312,6 +318,10 @@ impl InnerBuilder { ) ); + let proof_queue = ProofQueue::new(self.author); + let proof_queue_cmd_rx = self.proof_queue_cmd_rx.take().unwrap(); + spawn_named!("proof_queue", proof_queue.start(proof_queue_cmd_rx)); + for (i, remote_batch_coordinator_cmd_rx) in self.remote_batch_coordinator_cmd_rx.into_iter().enumerate() { @@ -354,7 +364,6 @@ impl InnerBuilder { let proof_manager_cmd_rx = self.proof_manager_cmd_rx.take().unwrap(); let proof_manager = ProofManager::new( - self.author, self.config.back_pressure.backlog_txn_limit_count, self.config .back_pressure @@ -362,6 +371,7 @@ impl InnerBuilder { * self.num_validators, self.batch_store.clone().unwrap(), self.config.allow_batches_without_pos_in_proposal, + self.proof_queue_cmd_tx.clone(), ); spawn_named!( "proof_manager", diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index 812a854f62d9c..bff1087c94162 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use crate::quorum_store::{ - proof_manager::ProofManager, tests::batch_store_test::batch_store_for_test, + proof_manager::ProofManager, proof_queue::ProofQueue, + tests::batch_store_test::batch_store_for_test, }; use aptos_consensus_types::{ common::{Payload, PayloadFilter}, @@ -12,11 +13,14 @@ use aptos_consensus_types::{ use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; use futures::channel::oneshot; -use std::collections::HashSet; +use std::{collections::HashSet, sync::Arc}; -fn create_proof_manager() -> ProofManager { +async fn create_proof_manager() -> ProofManager { + let (proof_cmd_tx, proof_cmd_rx) = tokio::sync::mpsc::channel(100); + let proof_queue = ProofQueue::new(PeerId::random()); + tokio::spawn(proof_queue.start(proof_cmd_rx)); let batch_store = batch_store_for_test(5 * 1024 * 1024); - ProofManager::new(PeerId::random(), 10, 10, batch_store, true) + ProofManager::new(10, 10, batch_store, true, Arc::new(proof_cmd_tx)) } fn create_proof(author: PeerId, expiration: u64, batch_sequence: u64) -> ProofOfStore { @@ -62,7 +66,7 @@ async fn get_proposal( PayloadFilter::InQuorumStore(filter_set), callback_tx, ); - proof_manager.handle_proposal_request(req); + proof_manager.handle_proposal_request(req).await; let GetPayloadResponse::GetPayloadResponse(payload) = callback_rx.await.unwrap().unwrap(); payload } @@ -113,20 +117,20 @@ async fn get_proposal_and_assert( #[tokio::test] async fn test_block_request() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]); + proof_manager.receive_proofs(vec![proof.clone()]).await; get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof]).await; } #[tokio::test] async fn test_max_txns_from_block_to_execute() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]); + proof_manager.receive_proofs(vec![proof.clone()]).await; let payload = get_proposal(&mut proof_manager, 100, &[]).await; // convert payload to v2 format and assert @@ -140,45 +144,53 @@ async fn test_max_txns_from_block_to_execute() { #[tokio::test] async fn test_block_timestamp_expiration() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]); + proof_manager.receive_proofs(vec![proof.clone()]).await; - proof_manager.handle_commit_notification(1, vec![]); + proof_manager.handle_commit_notification(1, vec![]).await; get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof]).await; - proof_manager.handle_commit_notification(20, vec![]); + proof_manager.handle_commit_notification(20, vec![]).await; get_proposal_and_assert(&mut proof_manager, 100, &[], &[]).await; } #[tokio::test] async fn test_batch_commit() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let proof0 = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof0.clone()]); + proof_manager.receive_proofs(vec![proof0.clone()]).await; let proof1 = create_proof(PeerId::random(), 11, 2); - proof_manager.receive_proofs(vec![proof1.clone()]); + proof_manager.receive_proofs(vec![proof1.clone()]).await; - proof_manager.handle_commit_notification(1, vec![proof1.info().clone()]); + proof_manager + .handle_commit_notification(1, vec![proof1.info().clone()]) + .await; get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof0]).await; } #[tokio::test] async fn test_proposal_priority() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let peer0 = PeerId::random(); let peer0_proof0 = create_proof_with_gas(peer0, 10, 2, 1000); let peer0_proof1 = create_proof_with_gas(peer0, 10, 1, 0); - proof_manager.receive_proofs(vec![peer0_proof1.clone(), peer0_proof0.clone()]); + proof_manager + .receive_proofs(vec![peer0_proof1.clone(), peer0_proof0.clone()]) + .await; let peer0_proof2 = create_proof_with_gas(peer0, 10, 4, 500); - proof_manager.receive_proofs(vec![peer0_proof2.clone()]); + proof_manager + .receive_proofs(vec![peer0_proof2.clone()]) + .await; let peer0_proof3 = create_proof_with_gas(peer0, 10, 3, 500); - proof_manager.receive_proofs(vec![peer0_proof3.clone()]); + proof_manager + .receive_proofs(vec![peer0_proof3.clone()]) + .await; // Gas bucket is the most significant prioritization let expected = vec![peer0_proof0.clone()]; @@ -197,19 +209,21 @@ async fn test_proposal_priority() { #[tokio::test] async fn test_proposal_fairness() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let peer0 = PeerId::random(); let peer1 = PeerId::random(); let mut peer0_proofs = vec![]; for i in 0..4 { let proof = create_proof(peer0, 10 + i, 1 + i); - proof_manager.receive_proofs(vec![proof.clone()]); + proof_manager.receive_proofs(vec![proof.clone()]).await; peer0_proofs.push(proof); } let peer1_proof_0 = create_proof(peer1, 7, 1); - proof_manager.receive_proofs(vec![peer1_proof_0.clone()]); + proof_manager + .receive_proofs(vec![peer1_proof_0.clone()]) + .await; // Without filter, and large max size, all proofs are retrieved let mut expected = peer0_proofs.clone(); @@ -237,7 +251,7 @@ async fn test_proposal_fairness() { #[tokio::test] async fn test_duplicate_batches_on_commit() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let author = PeerId::random(); let digest = HashValue::random(); @@ -247,30 +261,32 @@ async fn test_duplicate_batches_on_commit() { let proof1 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); let proof2 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); - proof_manager.receive_proofs(vec![proof0.clone()]); - proof_manager.receive_proofs(vec![proof1.clone()]); + proof_manager.receive_proofs(vec![proof0.clone()]).await; + proof_manager.receive_proofs(vec![proof1.clone()]).await; // Only one copy of the batch exists get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; // Nothing goes wrong on commits - proof_manager.handle_commit_notification(4, vec![batch.clone()]); + proof_manager + .handle_commit_notification(4, vec![batch.clone()]) + .await; get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; // Before expiration, still marked as committed - proof_manager.receive_proofs(vec![proof2.clone()]); + proof_manager.receive_proofs(vec![proof2.clone()]).await; get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; // Nothing goes wrong on expiration - proof_manager.handle_commit_notification(5, vec![]); + proof_manager.handle_commit_notification(5, vec![]).await; get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; - proof_manager.handle_commit_notification(12, vec![]); + proof_manager.handle_commit_notification(12, vec![]).await; get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; } #[tokio::test] async fn test_duplicate_batches_on_expiration() { - let mut proof_manager = create_proof_manager(); + let mut proof_manager = create_proof_manager().await; let author = PeerId::random(); let digest = HashValue::random(); @@ -279,15 +295,15 @@ async fn test_duplicate_batches_on_expiration() { let proof0 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); let proof1 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); - proof_manager.receive_proofs(vec![proof0.clone()]); - proof_manager.receive_proofs(vec![proof1.clone()]); + proof_manager.receive_proofs(vec![proof0.clone()]).await; + proof_manager.receive_proofs(vec![proof1.clone()]).await; // Only one copy of the batch exists get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; // Nothing goes wrong on expiration - proof_manager.handle_commit_notification(5, vec![]); + proof_manager.handle_commit_notification(5, vec![]).await; get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; - proof_manager.handle_commit_notification(12, vec![]); + proof_manager.handle_commit_notification(12, vec![]).await; get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; } diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 922ae1d67a3af..fd7796da610b0 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::quorum_store::utils::ProofQueue; +use crate::quorum_store::proof_queue::ProofQueue; use aptos_consensus_types::proof_of_store::{BatchId, BatchInfo, ProofOfStore}; use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 7ee291da35e6c..95a721c6d2bd8 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -1,23 +1,18 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::{monitor, quorum_store::counters}; -use aptos_consensus_types::{ - common::{TransactionInProgress, TransactionSummary}, - proof_of_store::{BatchId, BatchInfo, ProofOfStore}, -}; +use crate::monitor; +use aptos_consensus_types::common::{TransactionInProgress, TransactionSummary}; use aptos_logger::prelude::*; use aptos_mempool::{QuorumStoreRequest, QuorumStoreResponse}; -use aptos_types::{transaction::SignedTransaction, PeerId}; +use aptos_types::transaction::SignedTransaction; use chrono::Utc; use futures::channel::{mpsc::Sender, oneshot}; -use move_core_types::account_address::AccountAddress; -use rand::{seq::SliceRandom, thread_rng}; use std::{ - cmp::{Ordering, Reverse}, - collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}, + cmp::Reverse, + collections::{BTreeMap, BinaryHeap, HashSet, VecDeque}, hash::Hash, - time::{Duration, Instant}, + time::Duration, }; use tokio::time::timeout; @@ -139,276 +134,3 @@ impl MempoolProxy { } } } - -#[derive(PartialEq, Eq, Hash, Clone)] -pub struct BatchKey { - author: PeerId, - batch_id: BatchId, -} - -impl BatchKey { - pub fn from_info(info: &BatchInfo) -> Self { - Self { - author: info.author(), - batch_id: info.batch_id(), - } - } -} - -#[derive(PartialEq, Eq, Clone, Hash)] -pub struct BatchSortKey { - batch_key: BatchKey, - gas_bucket_start: u64, -} - -impl BatchSortKey { - pub fn from_info(info: &BatchInfo) -> Self { - Self { - batch_key: BatchKey::from_info(info), - gas_bucket_start: info.gas_bucket_start(), - } - } - - pub fn author(&self) -> PeerId { - self.batch_key.author - } -} - -impl PartialOrd for BatchSortKey { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BatchSortKey { - fn cmp(&self, other: &Self) -> Ordering { - // ascending - match self.gas_bucket_start.cmp(&other.gas_bucket_start) { - Ordering::Equal => {}, - ordering => return ordering, - } - // descending - other.batch_key.batch_id.cmp(&self.batch_key.batch_id) - } -} - -pub struct ProofQueue { - my_peer_id: PeerId, - // Queue per peer to ensure fairness between peers and priority within peer - author_to_batches: HashMap>, - // ProofOfStore and insertion_time. None if committed - batch_to_proof: HashMap>, - // Expiration index - expirations: TimeExpirations, - latest_block_timestamp: u64, - remaining_txns: u64, - remaining_proofs: u64, - remaining_local_txns: u64, - remaining_local_proofs: u64, -} - -impl ProofQueue { - pub(crate) fn new(my_peer_id: PeerId) -> Self { - Self { - my_peer_id, - author_to_batches: HashMap::new(), - batch_to_proof: HashMap::new(), - expirations: TimeExpirations::new(), - latest_block_timestamp: 0, - remaining_txns: 0, - remaining_proofs: 0, - remaining_local_txns: 0, - remaining_local_proofs: 0, - } - } - - #[inline] - fn inc_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns += num_txns; - self.remaining_proofs += 1; - if *author == self.my_peer_id { - self.remaining_local_txns += num_txns; - self.remaining_local_proofs += 1; - } - } - - #[inline] - fn dec_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns -= num_txns; - self.remaining_proofs -= 1; - if *author == self.my_peer_id { - self.remaining_local_txns -= num_txns; - self.remaining_local_proofs -= 1; - } - } - - pub(crate) fn push(&mut self, proof: ProofOfStore) { - if proof.expiration() < self.latest_block_timestamp { - counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); - return; - } - let batch_key = BatchKey::from_info(proof.info()); - if self.batch_to_proof.get(&batch_key).is_some() { - counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); - return; - } - - let author = proof.author(); - let bucket = proof.gas_bucket_start(); - let num_txns = proof.num_txns(); - let expiration = proof.expiration(); - - let batch_sort_key = BatchSortKey::from_info(proof.info()); - let queue = self.author_to_batches.entry(author).or_default(); - queue.insert(batch_sort_key.clone(), proof.info().clone()); - self.expirations.add_item(batch_sort_key, expiration); - self.batch_to_proof - .insert(batch_key, Some((proof, Instant::now()))); - - if author == self.my_peer_id { - counters::inc_local_pos_count(bucket); - } else { - counters::inc_remote_pos_count(bucket); - } - - self.inc_remaining(&author, num_txns); - } - - // gets excluded and iterates over the vector returning non excluded or expired entries. - // return the vector of pulled PoS, and the size of the remaining PoS - // The flag in the second return argument is true iff the entire proof queue is fully utilized - // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, - // this flag is set false. - pub(crate) fn pull_proofs( - &mut self, - excluded_batches: &HashSet, - max_txns: u64, - max_bytes: u64, - return_non_full: bool, - ) -> (Vec, bool) { - let mut ret = vec![]; - let mut cur_bytes = 0; - let mut cur_txns = 0; - let mut excluded_txns = 0; - let mut full = false; - - let mut iters = vec![]; - for (_, batches) in self.author_to_batches.iter() { - iters.push(batches.iter().rev()); - } - - while !iters.is_empty() { - iters.shuffle(&mut thread_rng()); - iters.retain_mut(|iter| { - if full { - return false; - } - if let Some((sort_key, batch)) = iter.next() { - if excluded_batches.contains(batch) { - excluded_txns += batch.num_txns(); - } else if let Some(Some((proof, insertion_time))) = - self.batch_to_proof.get(&sort_key.batch_key) - { - cur_bytes += batch.num_bytes(); - cur_txns += batch.num_txns(); - if cur_bytes > max_bytes || cur_txns > max_txns { - // Exceeded the limit for requested bytes or number of transactions. - full = true; - return false; - } - let bucket = proof.gas_bucket_start(); - ret.push(proof.clone()); - counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); - if cur_bytes == max_bytes || cur_txns == max_txns { - // Exactly the limit for requested bytes or number of transactions. - full = true; - return false; - } - } - true - } else { - false - } - }) - } - info!( - // before non full check - byte_size = cur_bytes, - block_size = cur_txns, - batch_count = ret.len(), - full = full, - return_non_full = return_non_full, - "Pull payloads from QuorumStore: internal" - ); - - if full || return_non_full { - counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); - counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); - counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); - counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); - // Stable sort, so the order of proofs within an author will not change. - ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); - (ret, !full) - } else { - (Vec::new(), !full) - } - } - - pub(crate) fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { - assert!( - self.latest_block_timestamp <= block_timestamp, - "Decreasing block timestamp" - ); - self.latest_block_timestamp = block_timestamp; - - let expired = self.expirations.expire(block_timestamp); - let mut num_expired_but_not_committed = 0; - for key in &expired { - if let Some(mut queue) = self.author_to_batches.remove(&key.author()) { - if let Some(batch) = queue.remove(key) { - if self - .batch_to_proof - .get(&key.batch_key) - .expect("Entry for unexpired batch must exist") - .is_some() - { - // non-committed proof that is expired - num_expired_but_not_committed += 1; - counters::GAP_BETWEEN_BATCH_EXPIRATION_AND_CURRENT_TIME_WHEN_COMMIT - .observe((block_timestamp - batch.expiration()) as f64); - self.dec_remaining(&batch.author(), batch.num_txns()); - } - claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); - } - if !queue.is_empty() { - self.author_to_batches.insert(key.author(), queue); - } - } - } - counters::NUM_PROOFS_EXPIRED_WHEN_COMMIT.inc_by(num_expired_but_not_committed); - } - - pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { - counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns as f64); - counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); - counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); - counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); - - (self.remaining_txns, self.remaining_proofs) - } - - // Mark in the hashmap committed PoS, but keep them until they expire - pub(crate) fn mark_committed(&mut self, batches: Vec) { - for batch in batches { - let batch_key = BatchKey::from_info(&batch); - if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { - counters::pos_to_commit( - proof.gas_bucket_start(), - insertion_time.elapsed().as_secs_f64(), - ); - self.dec_remaining(&batch.author(), batch.num_txns()); - } - self.batch_to_proof.insert(batch_key, None); - } - } -} From b56cc333ea141fa1980ab43d86b69bd78ccfadeb Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 3 Jun 2024 10:22:28 -0700 Subject: [PATCH 02/67] Committing what I have --- consensus/src/quorum_store/batch_coordinator.rs | 6 ++++++ consensus/src/quorum_store/quorum_store_builder.rs | 1 + consensus/src/quorum_store/types.rs | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index 177e8d102c6e9..79364b7bc1e1e 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -20,6 +20,8 @@ use tokio::sync::{ oneshot, }; +use super::proof_queue::ProofQueueCommand; + #[derive(Debug)] pub enum BatchCoordinatorCommand { Shutdown(oneshot::Sender<()>), @@ -32,6 +34,7 @@ pub struct BatchCoordinator { network_sender: Arc, sender_to_proof_manager: Arc>, sender_to_batch_generator: Arc>, + sender_to_proof_queue: Arc>, batch_store: Arc, max_batch_txns: u64, max_batch_bytes: u64, @@ -45,6 +48,7 @@ impl BatchCoordinator { network_sender: NetworkSender, sender_to_proof_manager: Sender, sender_to_batch_generator: Sender, + sender_to_proof_queue: Arc>, batch_store: Arc, max_batch_txns: u64, max_batch_bytes: u64, @@ -56,6 +60,7 @@ impl BatchCoordinator { network_sender: Arc::new(network_sender), sender_to_proof_manager: Arc::new(sender_to_proof_manager), sender_to_batch_generator: Arc::new(sender_to_batch_generator), + sender_to_proof_queue, batch_store, max_batch_txns, max_batch_bytes, @@ -134,6 +139,7 @@ impl BatchCoordinator { } let mut persist_requests = vec![]; + let batches_summary = batches.iter().map(|batch| (batch.batch_info(), batch.summary())).collect(); for batch in batches.into_iter() { // TODO: maybe don't message batch generator if the persist is unsuccessful? if let Err(e) = self diff --git a/consensus/src/quorum_store/quorum_store_builder.rs b/consensus/src/quorum_store/quorum_store_builder.rs index c21ab16a9b7b3..fe8e8c4c6ce66 100644 --- a/consensus/src/quorum_store/quorum_store_builder.rs +++ b/consensus/src/quorum_store/quorum_store_builder.rs @@ -330,6 +330,7 @@ impl InnerBuilder { self.network_sender.clone(), self.proof_manager_cmd_tx.clone(), self.batch_generator_cmd_tx.clone(), + self.proof_queue_cmd_tx.clone(), self.batch_store.clone().unwrap(), self.config.receiver_max_batch_txns as u64, self.config.receiver_max_batch_bytes as u64, diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 15c6d67211f9a..91e49749c1f26 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -172,6 +172,10 @@ impl Batch { self.payload.into_transactions() } + pub fn summary(&self) -> Vec<(PeerId, BatchId)> { + self.payload.txns().iter().map(|txn| (txn.sender(), txn.sequence_number())).collect() + } + pub fn batch_info(&self) -> &BatchInfo { &self.batch_info } From a369c280efedf076504cd968e77f93fa77a41821 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 3 Jun 2024 15:02:43 -0700 Subject: [PATCH 03/67] Sending AddBatches message --- .../src/quorum_store/batch_coordinator.rs | 12 ++++++-- consensus/src/quorum_store/proof_queue.rs | 29 ++++++++++++------- consensus/src/quorum_store/types.rs | 8 +++-- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index 79364b7bc1e1e..3a79f35d9cac0 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -1,6 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use super::proof_queue::ProofQueueCommand; use crate::{ network::{NetworkSender, QuorumStoreSender}, quorum_store::{ @@ -20,8 +21,6 @@ use tokio::sync::{ oneshot, }; -use super::proof_queue::ProofQueueCommand; - #[derive(Debug)] pub enum BatchCoordinatorCommand { Shutdown(oneshot::Sender<()>), @@ -139,7 +138,10 @@ impl BatchCoordinator { } let mut persist_requests = vec![]; - let batches_summary = batches.iter().map(|batch| (batch.batch_info(), batch.summary())).collect(); + let batches_summary = batches + .iter() + .map(|batch| (batch.batch_info().clone(), batch.summary())) + .collect(); for batch in batches.into_iter() { // TODO: maybe don't message batch generator if the persist is unsuccessful? if let Err(e) = self @@ -151,6 +153,10 @@ impl BatchCoordinator { } persist_requests.push(batch.into()); } + self.sender_to_proof_queue + .send(ProofQueueCommand::AddBatches(batches_summary)) + .await + .expect("Failed to send NewBatches to ProofQueue"); counters::RECEIVED_BATCH_COUNT.inc_by(persist_requests.len() as u64); if author != self.my_peer_id { counters::RECEIVED_REMOTE_BATCH_COUNT.inc_by(persist_requests.len() as u64); diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 449eaf1301e45..0ecf76d17fcb9 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -72,8 +72,9 @@ pub enum ProofQueueCommand { // Proof manager sends this command to add the proofs to the proof queue // We send back (remaining_txns, remaining_proofs) to the proof manager AddProofs(Vec, oneshot::Sender<(u64, u64)>), - // Batch coordinator sends this command to add the received batches to the proof queue - // AddBatches(Vec), + // Batch coordinator sends this command to add the received batches to the proof queue. + // For each transaction, the proof queue stores the list of batches containing the transaction. + AddBatches(Vec<(BatchInfo, Vec<(PeerId, u64)>)>), // Proof manager sends this command to pull proofs from the proof queue to // include in the block proposal. PullProofs { @@ -95,6 +96,9 @@ pub struct ProofQueue { author_to_batches: HashMap>, // ProofOfStore and insertion_time. None if committed batch_to_proof: HashMap>, + // Map of txn_summary = (sender, sequence number) to all the batches that contain + // the transaction. This helps in counting the number of unique transactions in the pipeline. + txn_summary_to_batches: HashMap<(PeerId, u64), HashSet>, // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, @@ -110,6 +114,7 @@ impl ProofQueue { my_peer_id, author_to_batches: HashMap::new(), batch_to_proof: HashMap::new(), + txn_summary_to_batches: HashMap::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, remaining_txns: 0, @@ -345,14 +350,18 @@ impl ProofQueue { if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { error!("Failed to send response to MarkCommitted: {:?}", e); } - }, // ProofQueueCommand::AddBatches(batches) => { - // for batch in batches { - // let batch_key = BatchKey::from_info(&batch); - // if self.batch_to_proof.get(&batch_key).is_none() { - // self.inc_remaining(&batch.author(), batch.num_txns()); - // } - // } - // } + }, + ProofQueueCommand::AddBatches(batch_summaries) => { + for (batch_info, txn_summaries) in batch_summaries { + let batch_key = BatchKey::from_info(&batch_info); + for txn_summary in txn_summaries { + self.txn_summary_to_batches + .entry(txn_summary) + .or_default() + .insert(batch_key.clone()); + } + } + }, } } } diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 91e49749c1f26..a207febfa6a3a 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -172,8 +172,12 @@ impl Batch { self.payload.into_transactions() } - pub fn summary(&self) -> Vec<(PeerId, BatchId)> { - self.payload.txns().iter().map(|txn| (txn.sender(), txn.sequence_number())).collect() + pub fn summary(&self) -> Vec<(PeerId, u64)> { + self.payload + .txns() + .iter() + .map(|txn| (txn.sender(), txn.sequence_number())) + .collect() } pub fn batch_info(&self) -> &BatchInfo { From 80a5a349bef0856bef64b33107c3cf7a4c3fc5f9 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 3 Jun 2024 16:22:58 -0700 Subject: [PATCH 04/67] Calcuating the remaining txns --- consensus/src/quorum_store/proof_queue.rs | 44 +++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 0ecf76d17fcb9..c696242bcd3e3 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -99,6 +99,8 @@ pub struct ProofQueue { // Map of txn_summary = (sender, sequence number) to all the batches that contain // the transaction. This helps in counting the number of unique transactions in the pipeline. txn_summary_to_batches: HashMap<(PeerId, u64), HashSet>, + // List of batches for which we received txn summaries from the batch coordinator + batches_with_txn_summary: HashSet, // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, @@ -115,6 +117,7 @@ impl ProofQueue { author_to_batches: HashMap::new(), batch_to_proof: HashMap::new(), txn_summary_to_batches: HashMap::new(), + batches_with_txn_summary: HashSet::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, remaining_txns: 0, @@ -144,15 +147,43 @@ impl ProofQueue { } } - pub(crate) fn push(&mut self, proof: ProofOfStore) { + fn remaining_txns(&self) -> u64 { + // All the bath keys for which batch_to_proof is not None + let batch_keys = self + .batch_to_proof + .iter() + .filter_map(|(batch_key, proof)| proof.as_ref().map(|_| batch_key)) + .collect::>(); + let mut remaining_txns = self + .txn_summary_to_batches + .iter() + .filter(|(_, batches)| batches.iter().any(|batch_key| batch_keys.contains(batch_key))) + .count() as u64; + // If a batch_key is not in batches_with_txn_summary, then add the number of txns in the batch to remaining_txns + remaining_txns += self + .batch_to_proof + .iter() + .filter_map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + Some(proof.as_ref().unwrap().0.num_txns()) + } else { + None + } + }) + .sum::(); + remaining_txns + } + + /// Add the ProofOfStore to proof queue. Return true if the proof is added successfully. + fn push(&mut self, proof: ProofOfStore) -> bool { if proof.expiration() < self.latest_block_timestamp { counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); - return; + return false; } let batch_key = BatchKey::from_info(proof.info()); if self.batch_to_proof.get(&batch_key).is_some() { counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); - return; + return false; } let author = proof.author(); @@ -174,6 +205,7 @@ impl ProofQueue { } self.inc_remaining(&author, num_txns); + return true; } // gets excluded and iterates over the vector returning non excluded or expired entries. @@ -181,7 +213,7 @@ impl ProofQueue { // The flag in the second return argument is true iff the entire proof queue is fully utilized // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, // this flag is set false. - pub(crate) fn pull_proofs( + fn pull_proofs( &mut self, excluded_batches: &HashSet, max_txns: u64, @@ -256,7 +288,7 @@ impl ProofQueue { } } - pub(crate) fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { + fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { assert!( self.latest_block_timestamp <= block_timestamp, "Decreasing block timestamp" @@ -300,7 +332,7 @@ impl ProofQueue { } // Mark in the hashmap committed PoS, but keep them until they expire - pub(crate) fn mark_committed(&mut self, batches: Vec) { + fn mark_committed(&mut self, batches: Vec) { for batch in batches { let batch_key = BatchKey::from_info(&batch); if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { From 4e5babe5cf3da610beb598f8ada480f7c404dcdc Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 3 Jun 2024 17:10:12 -0700 Subject: [PATCH 05/67] Calculate proof queue size correctly --- consensus/src/quorum_store/proof_queue.rs | 33 ++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index c696242bcd3e3..3470420a29d65 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -148,7 +148,7 @@ impl ProofQueue { } fn remaining_txns(&self) -> u64 { - // All the bath keys for which batch_to_proof is not None + // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. let batch_keys = self .batch_to_proof .iter() @@ -157,9 +157,14 @@ impl ProofQueue { let mut remaining_txns = self .txn_summary_to_batches .iter() - .filter(|(_, batches)| batches.iter().any(|batch_key| batch_keys.contains(batch_key))) + .filter(|(_, batches)| { + batches + .iter() + .any(|batch_key| batch_keys.contains(batch_key)) + }) .count() as u64; - // If a batch_key is not in batches_with_txn_summary, then add the number of txns in the batch to remaining_txns + // If a batch_key is not in batches_with_txn_summary, it means we've received the proof but haven't receive the + // transaction summary of the batch from batch coordinator. Add the number of txns in the batch to remaining_txns. remaining_txns += self .batch_to_proof .iter() @@ -174,16 +179,14 @@ impl ProofQueue { remaining_txns } - /// Add the ProofOfStore to proof queue. Return true if the proof is added successfully. - fn push(&mut self, proof: ProofOfStore) -> bool { + /// Add the ProofOfStore to proof queue. + pub(crate) fn push(&mut self, proof: ProofOfStore) { if proof.expiration() < self.latest_block_timestamp { counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); - return false; } let batch_key = BatchKey::from_info(proof.info()); if self.batch_to_proof.get(&batch_key).is_some() { counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); - return false; } let author = proof.author(); @@ -205,7 +208,6 @@ impl ProofQueue { } self.inc_remaining(&author, num_txns); - return true; } // gets excluded and iterates over the vector returning non excluded or expired entries. @@ -213,7 +215,7 @@ impl ProofQueue { // The flag in the second return argument is true iff the entire proof queue is fully utilized // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, // this flag is set false. - fn pull_proofs( + pub(crate) fn pull_proofs( &mut self, excluded_batches: &HashSet, max_txns: u64, @@ -310,6 +312,11 @@ impl ProofQueue { num_expired_but_not_committed += 1; counters::GAP_BETWEEN_BATCH_EXPIRATION_AND_CURRENT_TIME_WHEN_COMMIT .observe((block_timestamp - batch.expiration()) as f64); + self.txn_summary_to_batches.retain(|_, batches| { + batches.remove(&key.batch_key); + !batches.is_empty() + }); + self.batches_with_txn_summary.remove(&key.batch_key); self.dec_remaining(&batch.author(), batch.num_txns()); } claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); @@ -342,7 +349,12 @@ impl ProofQueue { ); self.dec_remaining(&batch.author(), batch.num_txns()); } - self.batch_to_proof.insert(batch_key, None); + self.batch_to_proof.insert(batch_key.clone(), None); + self.batches_with_txn_summary.remove(&batch_key); + self.txn_summary_to_batches.retain(|_, batches| { + batches.remove(&batch_key); + !batches.is_empty() + }); } } @@ -392,6 +404,7 @@ impl ProofQueue { .or_default() .insert(batch_key.clone()); } + self.batches_with_txn_summary.insert(batch_key); } }, } From 1653475c3a3ab022fa38ee3c60a646248dac198a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 3 Jun 2024 17:14:47 -0700 Subject: [PATCH 06/67] Add a counter --- consensus/src/quorum_store/counters.rs | 7 +++++++ consensus/src/quorum_store/proof_queue.rs | 6 ++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 74f179e1fb3d6..8fc54bff198c4 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -312,6 +312,13 @@ pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { ) }); +pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES: Lazy = Lazy::new(|| { + register_avg_counter( + "quorum_store_num_total_txns_left_on_update_without_duplicates", + "Histogram for the number of total txns left after adding or cleaning batches, without duplicates.", + ) +}); + /// Histogram for the number of total batches/PoS left after adding or cleaning batches. pub static NUM_TOTAL_PROOFS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { register_avg_counter( diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 3470420a29d65..6ce7e6e2a9d66 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -334,8 +334,10 @@ impl ProofQueue { counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); - - (self.remaining_txns, self.remaining_proofs) + let remaining_txns_without_duplicates = self.remaining_txns(); + counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES + .observe(remaining_txns_without_duplicates as f64); + (remaining_txns_without_duplicates, self.remaining_proofs) } // Mark in the hashmap committed PoS, but keep them until they expire From 844e4aea62283a05f6ee0d37fd2d0b42a333fb2e Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 10:26:36 -0700 Subject: [PATCH 07/67] Update pfn_const_tps test --- testsuite/forge-cli/src/main.rs | 57 +++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 74c23f0fc7780..340207a9d4b21 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2439,10 +2439,35 @@ fn pfn_const_tps( 60 * 60 * 2 // 2 hours; avoid epoch changes which can introduce noise }; - ForgeConfig::default() - .with_initial_validator_count(NonZeroUsize::new(7).unwrap()) - .with_initial_fullnode_count(7) - .with_emit_job(EmitJobRequest::default().mode(EmitJobMode::ConstTps { tps: 100 })) + // Increase the concurrency level + const USE_CRAZY_MACHINES: bool = false; + + let mut forge_config = ForgeConfig::default() + .with_initial_validator_count(NonZeroUsize::new(100).unwrap()) + .with_initial_fullnode_count(2) + .with_validator_override_node_config_fn(Arc::new(|config, _| { + // Increase the state sync chunk sizes (consensus blocks are much larger than 1k) + optimize_state_sync_for_throughput(config); + + config.consensus_observer.publisher_enabled = true; + + // Increase the concurrency level + if USE_CRAZY_MACHINES { + config.execution.concurrency_level = 58; + } + })) + .with_fullnode_override_node_config_fn(Arc::new(|config, _| { + // Increase the state sync chunk sizes (consensus blocks are much larger than 1k) + optimize_state_sync_for_throughput(config); + + config.consensus_observer.observer_enabled = true; + + // Increase the concurrency level + if USE_CRAZY_MACHINES { + config.execution.concurrency_level = 58; + } + })) + .with_emit_job(EmitJobRequest::default().mode(EmitJobMode::ConstTps { tps: 5000 })) .add_network_test(PFNPerformance::new( 7, add_cpu_chaos, @@ -2453,14 +2478,14 @@ fn pfn_const_tps( helm_values["chain"]["epoch_duration_secs"] = epoch_duration_secs.into(); })) .with_success_criteria( - SuccessCriteria::new(95) + SuccessCriteria::new(12000) .add_no_restarts() .add_max_expired_tps(0) .add_max_failed_submission_tps(0) // Percentile thresholds are set to +1 second of non-PFN tests. Should be revisited. - .add_latency_threshold(2.5, LatencyType::P50) - .add_latency_threshold(4., LatencyType::P90) - .add_latency_threshold(5., LatencyType::P99) + .add_latency_threshold(5., LatencyType::P50) + .add_latency_threshold(6., LatencyType::P90) + .add_latency_threshold(7., LatencyType::P99) .add_wait_for_catchup_s( // Give at least 60s for catchup and at most 10% of the run (duration.as_secs() / 10).max(60), @@ -2469,7 +2494,21 @@ fn pfn_const_tps( max_no_progress_secs: 10.0, max_round_gap: 4, }), - ) + ); + + if USE_CRAZY_MACHINES { + forge_config = forge_config + .with_validator_resource_override(NodeResourceOverride { + cpu_cores: Some(58), + memory_gib: Some(200), + }) + .with_fullnode_resource_override(NodeResourceOverride { + cpu_cores: Some(58), + memory_gib: Some(200), + }) + } + + forge_config } /// This test runs a performance benchmark where the network includes From be89d17785fe67ad9ea66eda92c1a3f646c1d7b9 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 14:14:10 -0700 Subject: [PATCH 08/67] Minor changes --- consensus/src/quorum_store/tests/proof_manager_test.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index bff1087c94162..052f7aff6af96 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -16,11 +16,11 @@ use futures::channel::oneshot; use std::{collections::HashSet, sync::Arc}; async fn create_proof_manager() -> ProofManager { - let (proof_cmd_tx, proof_cmd_rx) = tokio::sync::mpsc::channel(100); + let (proof_queue_tx, proof_queue_rx) = tokio::sync::mpsc::channel(100); let proof_queue = ProofQueue::new(PeerId::random()); - tokio::spawn(proof_queue.start(proof_cmd_rx)); + tokio::spawn(proof_queue.start(proof_queue_rx)); let batch_store = batch_store_for_test(5 * 1024 * 1024); - ProofManager::new(10, 10, batch_store, true, Arc::new(proof_cmd_tx)) + ProofManager::new(10, 10, batch_store, true, Arc::new(proof_queue_tx)) } fn create_proof(author: PeerId, expiration: u64, batch_sequence: u64) -> ProofOfStore { From 8d77c78ebf44c4857b8fb374206713898c017884 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 17:28:24 -0700 Subject: [PATCH 09/67] Minor change --- consensus/src/quorum_store/proof_queue.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 6ce7e6e2a9d66..ce016e5448793 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -183,12 +183,13 @@ impl ProofQueue { pub(crate) fn push(&mut self, proof: ProofOfStore) { if proof.expiration() < self.latest_block_timestamp { counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); + return; } let batch_key = BatchKey::from_info(proof.info()); if self.batch_to_proof.get(&batch_key).is_some() { counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); + return; } - let author = proof.author(); let bucket = proof.gas_bucket_start(); let num_txns = proof.num_txns(); @@ -206,7 +207,6 @@ impl ProofQueue { } else { counters::inc_remote_pos_count(bucket); } - self.inc_remaining(&author, num_txns); } @@ -342,8 +342,8 @@ impl ProofQueue { // Mark in the hashmap committed PoS, but keep them until they expire fn mark_committed(&mut self, batches: Vec) { - for batch in batches { - let batch_key = BatchKey::from_info(&batch); + for batch in &batches { + let batch_key = BatchKey::from_info(batch); if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { counters::pos_to_commit( proof.gas_bucket_start(), From e064652331965edecf571e44aef974eec19a325e Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 18:34:27 -0700 Subject: [PATCH 10/67] Add some coutners --- consensus/src/quorum_store/counters.rs | 36 ++++++++++++++++++++++- consensus/src/quorum_store/proof_queue.rs | 26 ++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 8fc54bff198c4..1dee7c8d3b744 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -4,7 +4,7 @@ use aptos_metrics_core::{ exponential_buckets, op_counters::DurationHistogram, register_avg_counter, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, - HistogramVec, IntCounter, IntCounterVec, + HistogramVec, IntCounter, IntCounterVec, IntGauge, }; use once_cell::sync::Lazy; use std::time::Duration; @@ -93,6 +93,40 @@ pub static BATCH_GENERATOR_MAIN_LOOP: Lazy = Lazy::new(|| { ) }); +pub static PROOFS_WITHOUT_BATCH_DATA: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_proofs_without_batch_data", + "Number of proofs received without batch data" + ) + .unwrap() +}); + +pub static TXNS_WITH_DUPLICATE_BATCHES: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_txns_with_duplicate_batches", + "Number of transactions received with duplicate batches" + ) + .unwrap() +}); + +pub static TXNS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_txns_in_proof_queue", + "Number of transactions in the proof queue" + ) + .unwrap() +}); + +pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_proofs_in_proof_queue", + "Number of proofs in the proof queue" + ) + .unwrap() +}); + + + ////////////////////// // NEW QUORUM STORE ////////////////////// diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index ce016e5448793..8b8eb77f97f31 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -163,6 +163,24 @@ impl ProofQueue { .any(|batch_key| batch_keys.contains(batch_key)) }) .count() as u64; + + // count the number of batches with proofs but without txn summaries + counters::PROOFS_WITHOUT_BATCH_DATA.set(self.batch_to_proof.iter().map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + 1 + } else { + 0 + } + }).sum() as i64); + + counters::PROOFS_IN_PROOF_QUEUE.set(self.batch_to_proof.iter().map(|(batch_key, proof)| { + if proof.is_some() { + 1 + } else { + 0 + } + }).sum::()); + // If a batch_key is not in batches_with_txn_summary, it means we've received the proof but haven't receive the // transaction summary of the batch from batch coordinator. Add the number of txns in the batch to remaining_txns. remaining_txns += self @@ -176,6 +194,14 @@ impl ProofQueue { } }) .sum::(); + + //count the number of transactions with more than one batches + counters::TXNS_WITH_DUPLICATE_BATCHES.set(self.txn_summary_to_batches.iter().filter(|(_, batches)| { + batches.len() > 1 + }).count() as i64); + + counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); + remaining_txns } From 204d823014444cb6ed9c88752d0bd3e8eb6e9c2a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 20:49:59 -0700 Subject: [PATCH 11/67] Rust lint --- consensus/src/quorum_store/counters.rs | 4 +--- consensus/src/quorum_store/proof_queue.rs | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 1dee7c8d3b744..ccdbe06f7cc73 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -2,9 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use aptos_metrics_core::{ - exponential_buckets, op_counters::DurationHistogram, register_avg_counter, register_histogram, - register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, - HistogramVec, IntCounter, IntCounterVec, IntGauge, + exponential_buckets, op_counters::DurationHistogram, register_avg_counter, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge }; use once_cell::sync::Lazy; use std::time::Duration; diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 8b8eb77f97f31..d6c78c42734d3 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -171,9 +171,9 @@ impl ProofQueue { } else { 0 } - }).sum() as i64); + }).sum::()); - counters::PROOFS_IN_PROOF_QUEUE.set(self.batch_to_proof.iter().map(|(batch_key, proof)| { + counters::PROOFS_IN_PROOF_QUEUE.set(self.batch_to_proof.iter().map(|(_, proof)| { if proof.is_some() { 1 } else { From 32eb5a4ed18f955f40902df10ed87cd2924da84c Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 23:10:49 -0700 Subject: [PATCH 12/67] Increasing quorum store backpressure limits --- config/src/config/quorum_store_config.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/src/config/quorum_store_config.rs b/config/src/config/quorum_store_config.rs index 8d99d131529ca..7f8aef84b8ee4 100644 --- a/config/src/config/quorum_store_config.rs +++ b/config/src/config/quorum_store_config.rs @@ -29,14 +29,14 @@ impl Default for QuorumStoreBackPressureConfig { QuorumStoreBackPressureConfig { // QS will be backpressured if the remaining total txns is more than this number // Roughly, target TPS * commit latency seconds - backlog_txn_limit_count: 12_000, + backlog_txn_limit_count: 15_000, // QS will create batches at the max rate until this number is reached backlog_per_validator_batch_limit_count: 4, decrease_duration_ms: 1000, increase_duration_ms: 1000, decrease_fraction: 0.5, - dynamic_min_txn_per_s: 160, - dynamic_max_txn_per_s: 4000, + dynamic_min_txn_per_s: 500, + dynamic_max_txn_per_s: 6000, } } } From 7f634001062ae21a7f9cf20afa5722172b7bb350 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 4 Jun 2024 23:21:06 -0700 Subject: [PATCH 13/67] setting dynamic_min_txns_per_sec to 160 --- config/src/config/quorum_store_config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/src/config/quorum_store_config.rs b/config/src/config/quorum_store_config.rs index 7f8aef84b8ee4..d80f99ea13da0 100644 --- a/config/src/config/quorum_store_config.rs +++ b/config/src/config/quorum_store_config.rs @@ -35,7 +35,7 @@ impl Default for QuorumStoreBackPressureConfig { decrease_duration_ms: 1000, increase_duration_ms: 1000, decrease_fraction: 0.5, - dynamic_min_txn_per_s: 500, + dynamic_min_txn_per_s: 160, dynamic_max_txn_per_s: 6000, } } From 76f932670d58ed2055f62b7550b6688026b3b990 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 6 Jun 2024 15:53:41 -0700 Subject: [PATCH 14/67] Fixing the calculation --- consensus/src/quorum_store/counters.rs | 8 ++-- consensus/src/quorum_store/proof_queue.rs | 54 +++++++++++++---------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index ccdbe06f7cc73..a1abad50eb9f3 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use aptos_metrics_core::{ - exponential_buckets, op_counters::DurationHistogram, register_avg_counter, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge + exponential_buckets, op_counters::DurationHistogram, register_avg_counter, register_histogram, + register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge, + Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, }; use once_cell::sync::Lazy; use std::time::Duration; @@ -115,7 +117,7 @@ pub static TXNS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { .unwrap() }); -pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { +pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { register_int_gauge!( "quorum_store_proofs_in_proof_queue", "Number of proofs in the proof queue" @@ -123,8 +125,6 @@ pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { .unwrap() }); - - ////////////////////// // NEW QUORUM STORE ////////////////////// diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index d6c78c42734d3..8f551cbff3e02 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -149,10 +149,11 @@ impl ProofQueue { fn remaining_txns(&self) -> u64 { // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. - let batch_keys = self + let unexpired_batch_keys = self .batch_to_proof .iter() - .filter_map(|(batch_key, proof)| proof.as_ref().map(|_| batch_key)) + .filter(|(_, proof)| proof.is_some()) + .map(|(batch_key, _)| batch_key) .collect::>(); let mut remaining_txns = self .txn_summary_to_batches @@ -160,27 +161,10 @@ impl ProofQueue { .filter(|(_, batches)| { batches .iter() - .any(|batch_key| batch_keys.contains(batch_key)) + .any(|batch_key| unexpired_batch_keys.contains(batch_key)) }) .count() as u64; - // count the number of batches with proofs but without txn summaries - counters::PROOFS_WITHOUT_BATCH_DATA.set(self.batch_to_proof.iter().map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { - 1 - } else { - 0 - } - }).sum::()); - - counters::PROOFS_IN_PROOF_QUEUE.set(self.batch_to_proof.iter().map(|(_, proof)| { - if proof.is_some() { - 1 - } else { - 0 - } - }).sum::()); - // If a batch_key is not in batches_with_txn_summary, it means we've received the proof but haven't receive the // transaction summary of the batch from batch coordinator. Add the number of txns in the batch to remaining_txns. remaining_txns += self @@ -196,12 +180,36 @@ impl ProofQueue { .sum::(); //count the number of transactions with more than one batches - counters::TXNS_WITH_DUPLICATE_BATCHES.set(self.txn_summary_to_batches.iter().filter(|(_, batches)| { - batches.len() > 1 - }).count() as i64); + counters::TXNS_WITH_DUPLICATE_BATCHES.set( + self.txn_summary_to_batches + .iter() + .filter(|(_, batches)| batches.len() > 1) + .count() as i64, + ); counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); + // count the number of batches with proofs but without txn summaries + counters::PROOFS_WITHOUT_BATCH_DATA.set( + self.batch_to_proof + .iter() + .map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + 1 + } else { + 0 + } + }) + .sum::(), + ); + + counters::PROOFS_IN_PROOF_QUEUE.set( + self.batch_to_proof + .values() + .map(|proof| if proof.is_some() { 1 } else { 0 }) + .sum::(), + ); + remaining_txns } From 4c7af58e8dd2c6fd200fc39c502cfca138897f5b Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 7 Jun 2024 11:43:03 -0700 Subject: [PATCH 15/67] increase vfns to 7 --- testsuite/forge-cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 59b9fde24e5a8..f6f64f044fce2 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2447,7 +2447,7 @@ fn pfn_const_tps( let mut forge_config = ForgeConfig::default() .with_initial_validator_count(NonZeroUsize::new(100).unwrap()) - .with_initial_fullnode_count(2) + .with_initial_fullnode_count(7) .with_validator_override_node_config_fn(Arc::new(|config, _| { // Increase the state sync chunk sizes (consensus blocks are much larger than 1k) optimize_state_sync_for_throughput(config); From f71d2d0e51474bf8d9ac3bf4420bc71280295a1b Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 7 Jun 2024 11:44:05 -0700 Subject: [PATCH 16/67] Fixing the typo in batch generator --- consensus/src/quorum_store/batch_generator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/src/quorum_store/batch_generator.rs b/consensus/src/quorum_store/batch_generator.rs index 97157ae9c6720..ae59c83166fb5 100644 --- a/consensus/src/quorum_store/batch_generator.rs +++ b/consensus/src/quorum_store/batch_generator.rs @@ -336,7 +336,7 @@ impl BatchGenerator { .mempool_proxy .pull_internal( max_count, - self.config.sender_max_batch_bytes as u64, + self.config.sender_max_total_bytes as u64, self.txns_in_progress_sorted.clone(), ) .await From 29951a933dc609572b40e49fbdace8500031a298 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 10:35:18 -0700 Subject: [PATCH 17/67] Add increase fraction --- config/src/config/quorum_store_config.rs | 2 ++ consensus/src/quorum_store/batch_generator.rs | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/src/config/quorum_store_config.rs b/config/src/config/quorum_store_config.rs index 20b7f890a630e..5f4c783f3fb85 100644 --- a/config/src/config/quorum_store_config.rs +++ b/config/src/config/quorum_store_config.rs @@ -19,6 +19,7 @@ pub struct QuorumStoreBackPressureConfig { pub backlog_per_validator_batch_limit_count: u64, pub decrease_duration_ms: u64, pub increase_duration_ms: u64, + pub increase_fraction: f64, pub decrease_fraction: f64, pub dynamic_min_txn_per_s: u64, pub dynamic_max_txn_per_s: u64, @@ -34,6 +35,7 @@ impl Default for QuorumStoreBackPressureConfig { backlog_per_validator_batch_limit_count: 4, decrease_duration_ms: 1000, increase_duration_ms: 1000, + increase_fraction: 1.5, decrease_fraction: 0.5, dynamic_min_txn_per_s: 160, dynamic_max_txn_per_s: 4000, diff --git a/consensus/src/quorum_store/batch_generator.rs b/consensus/src/quorum_store/batch_generator.rs index ae59c83166fb5..d3a55353edd14 100644 --- a/consensus/src/quorum_store/batch_generator.rs +++ b/consensus/src/quorum_store/batch_generator.rs @@ -399,9 +399,7 @@ impl BatchGenerator { Duration::from_millis(self.config.back_pressure.increase_duration_ms); let mut back_pressure_decrease_latest = start; let mut back_pressure_increase_latest = start; - let mut dynamic_pull_txn_per_s = (self.config.back_pressure.dynamic_min_txn_per_s - + self.config.back_pressure.dynamic_max_txn_per_s) - / 2; + let mut dynamic_pull_txn_per_s = self.config.back_pressure.dynamic_max_txn_per_s; loop { let _timer = counters::BATCH_GENERATOR_MAIN_LOOP.start_timer(); @@ -431,7 +429,7 @@ impl BatchGenerator { if back_pressure_increase_latest.elapsed() >= back_pressure_increase_duration { back_pressure_increase_latest = tick_start; dynamic_pull_txn_per_s = std::cmp::min( - dynamic_pull_txn_per_s + self.config.back_pressure.dynamic_min_txn_per_s, + (dynamic_pull_txn_per_s as f64 * self.config.back_pressure.increase_fraction) as u64, self.config.back_pressure.dynamic_max_txn_per_s, ); trace!("QS: dynamic_max_pull_txn_per_s: {}", dynamic_pull_txn_per_s); From cc3d7a2f96d9d1e0de0f17f0785cf754c2670387 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 10:40:34 -0700 Subject: [PATCH 18/67] Removing skipped transactions after inserting them --- mempool/src/core_mempool/mempool.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/mempool/src/core_mempool/mempool.rs b/mempool/src/core_mempool/mempool.rs index 5a7fc6d289f81..92703193ace04 100644 --- a/mempool/src/core_mempool/mempool.rs +++ b/mempool/src/core_mempool/mempool.rs @@ -355,6 +355,7 @@ impl Mempool { while skipped.contains(&skipped_txn) { inserted.insert(skipped_txn); result.push(skipped_txn); + skipped.remove(&skipped_txn); if (result.len() as u64) == max_txns { break 'main; } From 5319f79c402aa747f91e5a6013feea9ada1134f0 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 11:30:52 -0700 Subject: [PATCH 19/67] Add some counters --- consensus/src/counters.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 33dab16c9e12c..31c7231fe722f 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -745,6 +745,25 @@ pub static NUM_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { .unwrap() }); +/// Histogram for the number of txns per (committed) blocks. +pub static NUM_INPUT_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { + register_histogram!( + "aptos_consensus_num_input_txns_per_block", + "Histogram for the number of input txns per (committed) blocks.", + NUM_CONSENSUS_TRANSACTIONS_BUCKETS.to_vec() + ) + .unwrap() +}); + +pub static NUM_BYTES_PER_BLOCK: Lazy = Lazy::new(|| { + register_histogram!( + "aptos_consensus_num_bytes_per_block", + "Histogram for the number of bytes per (committed) blocks.", + exponential_buckets(/*start=*/ 500.0, /*factor=*/ 1.4, /*count=*/ 32).unwrap() + ) + .unwrap() +}); + // Histogram buckets that expand DEFAULT_BUCKETS with more granularity: // * 0.3 to 2.0: step 0.1 // * 2.0 to 4.0: step 0.2 @@ -1046,6 +1065,10 @@ pub fn update_counters_for_committed_blocks(blocks_to_commit: &[Arc Date: Thu, 13 Jun 2024 11:58:04 -0700 Subject: [PATCH 20/67] Update consensus pending duration counter --- consensus/src/block_storage/block_store.rs | 2 +- consensus/src/counters.rs | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/consensus/src/block_storage/block_store.rs b/consensus/src/block_storage/block_store.rs index 7651076b15d8e..5a9e9b29714da 100644 --- a/consensus/src/block_storage/block_store.rs +++ b/consensus/src/block_storage/block_store.rs @@ -538,7 +538,7 @@ impl BlockStore { counters::CONSENSUS_PROPOSAL_PENDING_ROUNDS.observe(pending_rounds as f64); counters::CONSENSUS_PROPOSAL_PENDING_DURATION - .observe(oldest_not_committed_spent_in_pipeline.as_secs_f64()); + .observe_duration(oldest_not_committed_spent_in_pipeline); if pending_rounds > 1 { // TODO cleanup diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 31c7231fe722f..967760eecd581 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -301,10 +301,13 @@ pub static CONSENSUS_PROPOSAL_PENDING_ROUNDS: Lazy = Lazy::new(|| { }); /// duration pending when creating proposal -pub static CONSENSUS_PROPOSAL_PENDING_DURATION: Lazy = Lazy::new(|| { - register_avg_counter( - "aptos_consensus_proposal_pending_duration", - "duration pending when creating proposal", +pub static CONSENSUS_PROPOSAL_PENDING_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "aptos_consensus_proposal_pending_duration", + "duration pending when creating proposal", + ) + .unwrap(), ) }); From eabe96c1d8adf22f93c1ee2ef6ddb9387b5e2ddf Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 14:08:57 -0700 Subject: [PATCH 21/67] Add more counters --- config/src/config/mempool_config.rs | 2 +- consensus/src/quorum_store/counters.rs | 30 +++++++++++++++++++++++ consensus/src/quorum_store/proof_queue.rs | 24 ++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/config/src/config/mempool_config.rs b/config/src/config/mempool_config.rs index 459998b3f5b86..09448b335d174 100644 --- a/config/src/config/mempool_config.rs +++ b/config/src/config/mempool_config.rs @@ -81,7 +81,7 @@ impl Default for MempoolConfig { enable_intelligent_peer_prioritization: true, shared_mempool_peer_update_interval_ms: 1_000, shared_mempool_priority_update_interval_secs: 600, // 10 minutes (frequent reprioritization is expensive) - shared_mempool_failover_delay_ms: 500, + shared_mempool_failover_delay_ms: 2000, system_transaction_timeout_secs: 600, system_transaction_gc_interval_ms: 60_000, broadcast_buckets: DEFAULT_BUCKETS.to_vec(), diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index a1abad50eb9f3..6e04be81f0f72 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -28,6 +28,14 @@ static TRANSACTION_COUNT_BUCKETS: Lazy> = Lazy::new(|| { .unwrap() }); +static PROOF_COUNT_BUCKETS: Lazy> = Lazy::new(|| { + [ + 1.0, 3.0, 5.0, 7.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 60.0, 75.0, 100.0, + 125.0, 150.0, 200.0, 250.0, 300.0, 500.0, + ] + .to_vec() +}); + static BYTE_BUCKETS: Lazy> = Lazy::new(|| { exponential_buckets( /*start=*/ 500.0, /*factor=*/ 1.5, /*count=*/ 25, @@ -233,6 +241,28 @@ pub static EXCLUDED_TXNS_WHEN_PULL: Lazy = Lazy::new(|| { .unwrap() }); +pub static NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( + || { + register_histogram!( + "quorum_store_num_proofs_left_in_proof_queue_after_proposal_generation", + "Histogram for the number of proofs left in the proof queue after block proposal generation.", + PROOF_COUNT_BUCKETS.clone(), + ) + .unwrap() + }, +); + +pub static NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( + || { + register_histogram!( + "quorum_store_num_txns_left_in_proof_queue_after_proposal_generation", + "Histogram for the number of transactions left in the proof queue after block proposal generation.", + TRANSACTION_COUNT_BUCKETS.clone(), + ) + .unwrap() + }, +); + pub static BATCH_IN_PROGRESS_COMMITTED: Lazy = Lazy::new(|| { register_int_counter!( "quorum_store_batch_in_progress_committed", diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 8f551cbff3e02..49ede95a39be0 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -316,6 +316,30 @@ impl ProofQueue { counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); + + // Number of proofs remaining in proof queue after the pull + let mut num_proofs_remaining_after_pull = 0; + let mut num_txns_remaining_after_pull = 0; + let excluded_batch_keys = excluded_batches + .iter() + .map(BatchKey::from_info) + .collect::>(); + for (batch_key, proof) in &self.batch_to_proof { + if proof.is_some() + && !ret + .iter() + .any(|p| BatchKey::from_info(p.info()) == *batch_key) + && !excluded_batch_keys.contains(batch_key) + { + num_proofs_remaining_after_pull += 1; + num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); + } + } + counters::NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + .observe(num_proofs_remaining_after_pull as f64); + counters::NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + .observe(num_txns_remaining_after_pull as f64); + // Stable sort, so the order of proofs within an author will not change. ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); (ret, !full) From 0c413fdc8e758649e0f49f1281faa2e36b75524a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 14:54:05 -0700 Subject: [PATCH 22/67] Increasing block size to 2500 --- config/src/config/consensus_config.rs | 4 ++-- config/src/config/mempool_config.rs | 2 +- types/src/on_chain_config/execution_config.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 501a80c73f852..bc74baa41ae89 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; // NOTE: when changing, make sure to update QuorumStoreBackPressureConfig::backlog_txn_limit_count as well. -const MAX_SENDING_BLOCK_TXNS: u64 = 1900; +const MAX_SENDING_BLOCK_TXNS: u64 = 2500; pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_TXNS)); // stop reducing size at this point, so 1MB transactions can still go through @@ -153,7 +153,7 @@ impl Default for ConsensusConfig { ConsensusConfig { max_network_channel_size: 1024, max_sending_block_txns: MAX_SENDING_BLOCK_TXNS, - max_sending_block_bytes: 3 * 1024 * 1024, // 3MB + max_sending_block_bytes: 5 * 1024 * 1024, // 3MB max_receiving_block_txns: *MAX_RECEIVING_BLOCK_TXNS, max_sending_inline_txns: 100, max_sending_inline_bytes: 200 * 1024, // 200 KB diff --git a/config/src/config/mempool_config.rs b/config/src/config/mempool_config.rs index 09448b335d174..459998b3f5b86 100644 --- a/config/src/config/mempool_config.rs +++ b/config/src/config/mempool_config.rs @@ -81,7 +81,7 @@ impl Default for MempoolConfig { enable_intelligent_peer_prioritization: true, shared_mempool_peer_update_interval_ms: 1_000, shared_mempool_priority_update_interval_secs: 600, // 10 minutes (frequent reprioritization is expensive) - shared_mempool_failover_delay_ms: 2000, + shared_mempool_failover_delay_ms: 500, system_transaction_timeout_secs: 600, system_transaction_gc_interval_ms: 60_000, broadcast_buckets: DEFAULT_BUCKETS.to_vec(), diff --git a/types/src/on_chain_config/execution_config.rs b/types/src/on_chain_config/execution_config.rs index 69f8ea20cad5f..e8757f58369e1 100644 --- a/types/src/on_chain_config/execution_config.rs +++ b/types/src/on_chain_config/execution_config.rs @@ -90,13 +90,13 @@ impl OnChainExecutionConfig { impl BlockGasLimitType { pub fn default_for_genesis() -> Self { BlockGasLimitType::ComplexLimitV1 { - effective_block_gas_limit: 30000, + effective_block_gas_limit: 40000, execution_gas_effective_multiplier: 1, io_gas_effective_multiplier: 1, conflict_penalty_window: 9, use_granular_resource_group_conflicts: false, use_module_publishing_block_conflict: true, - block_output_limit: Some(5 * 1024 * 1024), + block_output_limit: Some(7 * 1024 * 1024), include_user_txn_size_in_block_output: true, add_block_limit_outcome_onchain: true, } From 78b559b2a012afad15b5e2eb81d2b16f33e78428 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 15:35:45 -0700 Subject: [PATCH 23/67] Update a counter --- consensus/src/counters.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 967760eecd581..9a45588b95013 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -1071,7 +1071,7 @@ pub fn update_counters_for_committed_blocks(blocks_to_commit: &[Arc Date: Thu, 13 Jun 2024 15:57:24 -0700 Subject: [PATCH 24/67] Increase block size limit --- config/src/config/consensus_config.rs | 6 +++--- types/src/on_chain_config/execution_config.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index bc74baa41ae89..d6ce558761f62 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; // NOTE: when changing, make sure to update QuorumStoreBackPressureConfig::backlog_txn_limit_count as well. -const MAX_SENDING_BLOCK_TXNS: u64 = 2500; +const MAX_SENDING_BLOCK_TXNS: u64 = 3000; pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_TXNS)); // stop reducing size at this point, so 1MB transactions can still go through @@ -153,11 +153,11 @@ impl Default for ConsensusConfig { ConsensusConfig { max_network_channel_size: 1024, max_sending_block_txns: MAX_SENDING_BLOCK_TXNS, - max_sending_block_bytes: 5 * 1024 * 1024, // 3MB + max_sending_block_bytes: 6 * 1024 * 1024, // 3MB max_receiving_block_txns: *MAX_RECEIVING_BLOCK_TXNS, max_sending_inline_txns: 100, max_sending_inline_bytes: 200 * 1024, // 200 KB - max_receiving_block_bytes: 6 * 1024 * 1024, // 6MB + max_receiving_block_bytes: 7 * 1024 * 1024, // 6MB max_pruned_blocks_in_mem: 100, mempool_executed_txn_timeout_ms: 1000, mempool_txn_pull_timeout_ms: 1000, diff --git a/types/src/on_chain_config/execution_config.rs b/types/src/on_chain_config/execution_config.rs index e8757f58369e1..941db49ad0189 100644 --- a/types/src/on_chain_config/execution_config.rs +++ b/types/src/on_chain_config/execution_config.rs @@ -90,13 +90,13 @@ impl OnChainExecutionConfig { impl BlockGasLimitType { pub fn default_for_genesis() -> Self { BlockGasLimitType::ComplexLimitV1 { - effective_block_gas_limit: 40000, + effective_block_gas_limit: 60000, execution_gas_effective_multiplier: 1, io_gas_effective_multiplier: 1, conflict_penalty_window: 9, use_granular_resource_group_conflicts: false, use_module_publishing_block_conflict: true, - block_output_limit: Some(7 * 1024 * 1024), + block_output_limit: Some(10 * 1024 * 1024), include_user_txn_size_in_block_output: true, add_block_limit_outcome_onchain: true, } From 6e54abbaed40ca1f6fab603054b515ea8b57e1b2 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 13 Jun 2024 17:02:11 -0700 Subject: [PATCH 25/67] Resetting execution config params --- config/src/config/consensus_config.rs | 6 +- consensus/src/quorum_store/proof_queue.rs | 75 +++++++++---------- types/src/on_chain_config/execution_config.rs | 4 +- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index d6ce558761f62..501a80c73f852 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; // NOTE: when changing, make sure to update QuorumStoreBackPressureConfig::backlog_txn_limit_count as well. -const MAX_SENDING_BLOCK_TXNS: u64 = 3000; +const MAX_SENDING_BLOCK_TXNS: u64 = 1900; pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_TXNS)); // stop reducing size at this point, so 1MB transactions can still go through @@ -153,11 +153,11 @@ impl Default for ConsensusConfig { ConsensusConfig { max_network_channel_size: 1024, max_sending_block_txns: MAX_SENDING_BLOCK_TXNS, - max_sending_block_bytes: 6 * 1024 * 1024, // 3MB + max_sending_block_bytes: 3 * 1024 * 1024, // 3MB max_receiving_block_txns: *MAX_RECEIVING_BLOCK_TXNS, max_sending_inline_txns: 100, max_sending_inline_bytes: 200 * 1024, // 200 KB - max_receiving_block_bytes: 7 * 1024 * 1024, // 6MB + max_receiving_block_bytes: 6 * 1024 * 1024, // 6MB max_pruned_blocks_in_mem: 100, mempool_executed_txn_timeout_ms: 1000, mempool_txn_pull_timeout_ms: 1000, diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs index 49ede95a39be0..9c5c8f4019e4a 100644 --- a/consensus/src/quorum_store/proof_queue.rs +++ b/consensus/src/quorum_store/proof_queue.rs @@ -104,7 +104,7 @@ pub struct ProofQueue { // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, - remaining_txns: u64, + remaining_txns_with_duplicates: u64, remaining_proofs: u64, remaining_local_txns: u64, remaining_local_proofs: u64, @@ -120,7 +120,7 @@ impl ProofQueue { batches_with_txn_summary: HashSet::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, - remaining_txns: 0, + remaining_txns_with_duplicates: 0, remaining_proofs: 0, remaining_local_txns: 0, remaining_local_proofs: 0, @@ -129,7 +129,7 @@ impl ProofQueue { #[inline] fn inc_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns += num_txns; + self.remaining_txns_with_duplicates += num_txns; self.remaining_proofs += 1; if *author == self.my_peer_id { self.remaining_local_txns += num_txns; @@ -139,7 +139,7 @@ impl ProofQueue { #[inline] fn dec_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns -= num_txns; + self.remaining_txns_with_duplicates -= num_txns; self.remaining_proofs -= 1; if *author == self.my_peer_id { self.remaining_local_txns -= num_txns; @@ -147,7 +147,7 @@ impl ProofQueue { } } - fn remaining_txns(&self) -> u64 { + fn remaining_txns_without_duplicates(&self) -> u64 { // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. let unexpired_batch_keys = self .batch_to_proof @@ -179,37 +179,6 @@ impl ProofQueue { }) .sum::(); - //count the number of transactions with more than one batches - counters::TXNS_WITH_DUPLICATE_BATCHES.set( - self.txn_summary_to_batches - .iter() - .filter(|(_, batches)| batches.len() > 1) - .count() as i64, - ); - - counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); - - // count the number of batches with proofs but without txn summaries - counters::PROOFS_WITHOUT_BATCH_DATA.set( - self.batch_to_proof - .iter() - .map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { - 1 - } else { - 0 - } - }) - .sum::(), - ); - - counters::PROOFS_IN_PROOF_QUEUE.set( - self.batch_to_proof - .values() - .map(|proof| if proof.is_some() { 1 } else { 0 }) - .sum::(), - ); - remaining_txns } @@ -388,13 +357,43 @@ impl ProofQueue { } pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { - counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns as f64); + counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns_with_duplicates as f64); counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); - let remaining_txns_without_duplicates = self.remaining_txns(); + let remaining_txns_without_duplicates = self.remaining_txns_without_duplicates(); counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES .observe(remaining_txns_without_duplicates as f64); + //count the number of transactions with more than one batches + counters::TXNS_WITH_DUPLICATE_BATCHES.set( + self.txn_summary_to_batches + .iter() + .filter(|(_, batches)| batches.len() > 1) + .count() as i64, + ); + + counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); + + // count the number of batches with proofs but without txn summaries + counters::PROOFS_WITHOUT_BATCH_DATA.set( + self.batch_to_proof + .iter() + .map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + 1 + } else { + 0 + } + }) + .sum::(), + ); + + counters::PROOFS_IN_PROOF_QUEUE.set( + self.batch_to_proof + .values() + .map(|proof| if proof.is_some() { 1 } else { 0 }) + .sum::(), + ); (remaining_txns_without_duplicates, self.remaining_proofs) } diff --git a/types/src/on_chain_config/execution_config.rs b/types/src/on_chain_config/execution_config.rs index 941db49ad0189..69f8ea20cad5f 100644 --- a/types/src/on_chain_config/execution_config.rs +++ b/types/src/on_chain_config/execution_config.rs @@ -90,13 +90,13 @@ impl OnChainExecutionConfig { impl BlockGasLimitType { pub fn default_for_genesis() -> Self { BlockGasLimitType::ComplexLimitV1 { - effective_block_gas_limit: 60000, + effective_block_gas_limit: 30000, execution_gas_effective_multiplier: 1, io_gas_effective_multiplier: 1, conflict_penalty_window: 9, use_granular_resource_group_conflicts: false, use_module_publishing_block_conflict: true, - block_output_limit: Some(10 * 1024 * 1024), + block_output_limit: Some(5 * 1024 * 1024), include_user_txn_size_in_block_output: true, add_block_limit_outcome_onchain: true, } From 685325ab59a4eeb2ae94cdab2bbe48b9e4dc5147 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 13:25:38 -0700 Subject: [PATCH 26/67] Moving proof queue to utils.rs --- .../src/quorum_store/batch_coordinator.rs | 2 +- consensus/src/quorum_store/mod.rs | 1 - consensus/src/quorum_store/proof_manager.rs | 7 +- consensus/src/quorum_store/proof_queue.rs | 473 ----------------- .../src/quorum_store/quorum_store_builder.rs | 2 +- .../quorum_store/tests/proof_manager_test.rs | 3 +- consensus/src/quorum_store/tests/utils.rs | 2 +- consensus/src/quorum_store/utils.rs | 474 +++++++++++++++++- 8 files changed, 476 insertions(+), 488 deletions(-) delete mode 100644 consensus/src/quorum_store/proof_queue.rs diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index 3a79f35d9cac0..012863432d70f 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use super::proof_queue::ProofQueueCommand; +use super::utils::ProofQueueCommand; use crate::{ network::{NetworkSender, QuorumStoreSender}, quorum_store::{ diff --git a/consensus/src/quorum_store/mod.rs b/consensus/src/quorum_store/mod.rs index 64525643334f6..888b62b0122c2 100644 --- a/consensus/src/quorum_store/mod.rs +++ b/consensus/src/quorum_store/mod.rs @@ -12,7 +12,6 @@ pub(crate) mod batch_store; pub(crate) mod network_listener; pub(crate) mod proof_coordinator; pub(crate) mod proof_manager; -pub(crate) mod proof_queue; pub(crate) mod quorum_store_builder; pub(crate) mod quorum_store_coordinator; pub mod quorum_store_db; diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 9392bc60279b4..675a066637ba5 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -1,12 +1,13 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use super::proof_queue::BatchSortKey; use crate::{ monitor, quorum_store::{ - batch_generator::BackPressure, batch_store::BatchStore, counters, - proof_queue::ProofQueueCommand, + batch_generator::BackPressure, + batch_store::BatchStore, + counters, + utils::{BatchSortKey, ProofQueueCommand}, }, }; use aptos_consensus_types::{ diff --git a/consensus/src/quorum_store/proof_queue.rs b/consensus/src/quorum_store/proof_queue.rs deleted file mode 100644 index 9c5c8f4019e4a..0000000000000 --- a/consensus/src/quorum_store/proof_queue.rs +++ /dev/null @@ -1,473 +0,0 @@ -// Copyright © Aptos Foundation -// SPDX-License-Identifier: Apache-2.0 - -use super::utils::TimeExpirations; -use crate::quorum_store::counters; -use aptos_consensus_types::proof_of_store::{BatchId, BatchInfo, ProofOfStore}; -use aptos_logger::prelude::*; -use aptos_types::PeerId; -use futures::channel::oneshot; -use move_core_types::account_address::AccountAddress; -use rand::{seq::SliceRandom, thread_rng}; -use std::{ - cmp::{Ordering, Reverse}, - collections::{BTreeMap, HashMap, HashSet}, - time::Instant, -}; - -#[derive(PartialEq, Eq, Hash, Clone)] -pub struct BatchKey { - author: PeerId, - batch_id: BatchId, -} - -impl BatchKey { - pub fn from_info(info: &BatchInfo) -> Self { - Self { - author: info.author(), - batch_id: info.batch_id(), - } - } -} - -#[derive(PartialEq, Eq, Clone, Hash)] -pub struct BatchSortKey { - batch_key: BatchKey, - gas_bucket_start: u64, -} - -impl BatchSortKey { - pub fn from_info(info: &BatchInfo) -> Self { - Self { - batch_key: BatchKey::from_info(info), - gas_bucket_start: info.gas_bucket_start(), - } - } - - pub fn author(&self) -> PeerId { - self.batch_key.author - } -} - -impl PartialOrd for BatchSortKey { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BatchSortKey { - fn cmp(&self, other: &Self) -> Ordering { - // ascending - match self.gas_bucket_start.cmp(&other.gas_bucket_start) { - Ordering::Equal => {}, - ordering => return ordering, - } - // descending - other.batch_key.batch_id.cmp(&self.batch_key.batch_id) - } -} - -#[derive(Debug)] -pub enum ProofQueueCommand { - // Proof manager sends this command to add the proofs to the proof queue - // We send back (remaining_txns, remaining_proofs) to the proof manager - AddProofs(Vec, oneshot::Sender<(u64, u64)>), - // Batch coordinator sends this command to add the received batches to the proof queue. - // For each transaction, the proof queue stores the list of batches containing the transaction. - AddBatches(Vec<(BatchInfo, Vec<(PeerId, u64)>)>), - // Proof manager sends this command to pull proofs from the proof queue to - // include in the block proposal. - PullProofs { - excluded_batches: HashSet, - max_txns: u64, - max_bytes: u64, - return_non_full: bool, - response_sender: oneshot::Sender<(Vec, bool)>, - }, - // Proof manager sends this command to mark these batches as committed and - // update the block timestamp. - // We send back the (remaining_txns, remaining_proofs) to the proof manager - MarkCommitted(Vec, u64, oneshot::Sender<(u64, u64)>), -} - -pub struct ProofQueue { - my_peer_id: PeerId, - // Queue per peer to ensure fairness between peers and priority within peer - author_to_batches: HashMap>, - // ProofOfStore and insertion_time. None if committed - batch_to_proof: HashMap>, - // Map of txn_summary = (sender, sequence number) to all the batches that contain - // the transaction. This helps in counting the number of unique transactions in the pipeline. - txn_summary_to_batches: HashMap<(PeerId, u64), HashSet>, - // List of batches for which we received txn summaries from the batch coordinator - batches_with_txn_summary: HashSet, - // Expiration index - expirations: TimeExpirations, - latest_block_timestamp: u64, - remaining_txns_with_duplicates: u64, - remaining_proofs: u64, - remaining_local_txns: u64, - remaining_local_proofs: u64, -} - -impl ProofQueue { - pub(crate) fn new(my_peer_id: PeerId) -> Self { - Self { - my_peer_id, - author_to_batches: HashMap::new(), - batch_to_proof: HashMap::new(), - txn_summary_to_batches: HashMap::new(), - batches_with_txn_summary: HashSet::new(), - expirations: TimeExpirations::new(), - latest_block_timestamp: 0, - remaining_txns_with_duplicates: 0, - remaining_proofs: 0, - remaining_local_txns: 0, - remaining_local_proofs: 0, - } - } - - #[inline] - fn inc_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns_with_duplicates += num_txns; - self.remaining_proofs += 1; - if *author == self.my_peer_id { - self.remaining_local_txns += num_txns; - self.remaining_local_proofs += 1; - } - } - - #[inline] - fn dec_remaining(&mut self, author: &AccountAddress, num_txns: u64) { - self.remaining_txns_with_duplicates -= num_txns; - self.remaining_proofs -= 1; - if *author == self.my_peer_id { - self.remaining_local_txns -= num_txns; - self.remaining_local_proofs -= 1; - } - } - - fn remaining_txns_without_duplicates(&self) -> u64 { - // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. - let unexpired_batch_keys = self - .batch_to_proof - .iter() - .filter(|(_, proof)| proof.is_some()) - .map(|(batch_key, _)| batch_key) - .collect::>(); - let mut remaining_txns = self - .txn_summary_to_batches - .iter() - .filter(|(_, batches)| { - batches - .iter() - .any(|batch_key| unexpired_batch_keys.contains(batch_key)) - }) - .count() as u64; - - // If a batch_key is not in batches_with_txn_summary, it means we've received the proof but haven't receive the - // transaction summary of the batch from batch coordinator. Add the number of txns in the batch to remaining_txns. - remaining_txns += self - .batch_to_proof - .iter() - .filter_map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { - Some(proof.as_ref().unwrap().0.num_txns()) - } else { - None - } - }) - .sum::(); - - remaining_txns - } - - /// Add the ProofOfStore to proof queue. - pub(crate) fn push(&mut self, proof: ProofOfStore) { - if proof.expiration() < self.latest_block_timestamp { - counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); - return; - } - let batch_key = BatchKey::from_info(proof.info()); - if self.batch_to_proof.get(&batch_key).is_some() { - counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); - return; - } - let author = proof.author(); - let bucket = proof.gas_bucket_start(); - let num_txns = proof.num_txns(); - let expiration = proof.expiration(); - - let batch_sort_key = BatchSortKey::from_info(proof.info()); - let queue = self.author_to_batches.entry(author).or_default(); - queue.insert(batch_sort_key.clone(), proof.info().clone()); - self.expirations.add_item(batch_sort_key, expiration); - self.batch_to_proof - .insert(batch_key, Some((proof, Instant::now()))); - - if author == self.my_peer_id { - counters::inc_local_pos_count(bucket); - } else { - counters::inc_remote_pos_count(bucket); - } - self.inc_remaining(&author, num_txns); - } - - // gets excluded and iterates over the vector returning non excluded or expired entries. - // return the vector of pulled PoS, and the size of the remaining PoS - // The flag in the second return argument is true iff the entire proof queue is fully utilized - // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, - // this flag is set false. - pub(crate) fn pull_proofs( - &mut self, - excluded_batches: &HashSet, - max_txns: u64, - max_bytes: u64, - return_non_full: bool, - ) -> (Vec, bool) { - let mut ret = vec![]; - let mut cur_bytes = 0; - let mut cur_txns = 0; - let mut excluded_txns = 0; - let mut full = false; - - let mut iters = vec![]; - for (_, batches) in self.author_to_batches.iter() { - iters.push(batches.iter().rev()); - } - - while !iters.is_empty() { - iters.shuffle(&mut thread_rng()); - iters.retain_mut(|iter| { - if full { - return false; - } - if let Some((sort_key, batch)) = iter.next() { - if excluded_batches.contains(batch) { - excluded_txns += batch.num_txns(); - } else if let Some(Some((proof, insertion_time))) = - self.batch_to_proof.get(&sort_key.batch_key) - { - cur_bytes += batch.num_bytes(); - cur_txns += batch.num_txns(); - if cur_bytes > max_bytes || cur_txns > max_txns { - // Exceeded the limit for requested bytes or number of transactions. - full = true; - return false; - } - let bucket = proof.gas_bucket_start(); - ret.push(proof.clone()); - counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); - if cur_bytes == max_bytes || cur_txns == max_txns { - // Exactly the limit for requested bytes or number of transactions. - full = true; - return false; - } - } - true - } else { - false - } - }) - } - info!( - // before non full check - byte_size = cur_bytes, - block_size = cur_txns, - batch_count = ret.len(), - full = full, - return_non_full = return_non_full, - "Pull payloads from QuorumStore: internal" - ); - - if full || return_non_full { - counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); - counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); - counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); - counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); - - // Number of proofs remaining in proof queue after the pull - let mut num_proofs_remaining_after_pull = 0; - let mut num_txns_remaining_after_pull = 0; - let excluded_batch_keys = excluded_batches - .iter() - .map(BatchKey::from_info) - .collect::>(); - for (batch_key, proof) in &self.batch_to_proof { - if proof.is_some() - && !ret - .iter() - .any(|p| BatchKey::from_info(p.info()) == *batch_key) - && !excluded_batch_keys.contains(batch_key) - { - num_proofs_remaining_after_pull += 1; - num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); - } - } - counters::NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION - .observe(num_proofs_remaining_after_pull as f64); - counters::NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION - .observe(num_txns_remaining_after_pull as f64); - - // Stable sort, so the order of proofs within an author will not change. - ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); - (ret, !full) - } else { - (Vec::new(), !full) - } - } - - fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { - assert!( - self.latest_block_timestamp <= block_timestamp, - "Decreasing block timestamp" - ); - self.latest_block_timestamp = block_timestamp; - - let expired = self.expirations.expire(block_timestamp); - let mut num_expired_but_not_committed = 0; - for key in &expired { - if let Some(mut queue) = self.author_to_batches.remove(&key.author()) { - if let Some(batch) = queue.remove(key) { - if self - .batch_to_proof - .get(&key.batch_key) - .expect("Entry for unexpired batch must exist") - .is_some() - { - // non-committed proof that is expired - num_expired_but_not_committed += 1; - counters::GAP_BETWEEN_BATCH_EXPIRATION_AND_CURRENT_TIME_WHEN_COMMIT - .observe((block_timestamp - batch.expiration()) as f64); - self.txn_summary_to_batches.retain(|_, batches| { - batches.remove(&key.batch_key); - !batches.is_empty() - }); - self.batches_with_txn_summary.remove(&key.batch_key); - self.dec_remaining(&batch.author(), batch.num_txns()); - } - claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); - } - if !queue.is_empty() { - self.author_to_batches.insert(key.author(), queue); - } - } - } - counters::NUM_PROOFS_EXPIRED_WHEN_COMMIT.inc_by(num_expired_but_not_committed); - } - - pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { - counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns_with_duplicates as f64); - counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); - counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); - counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); - let remaining_txns_without_duplicates = self.remaining_txns_without_duplicates(); - counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES - .observe(remaining_txns_without_duplicates as f64); - //count the number of transactions with more than one batches - counters::TXNS_WITH_DUPLICATE_BATCHES.set( - self.txn_summary_to_batches - .iter() - .filter(|(_, batches)| batches.len() > 1) - .count() as i64, - ); - - counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); - - // count the number of batches with proofs but without txn summaries - counters::PROOFS_WITHOUT_BATCH_DATA.set( - self.batch_to_proof - .iter() - .map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { - 1 - } else { - 0 - } - }) - .sum::(), - ); - - counters::PROOFS_IN_PROOF_QUEUE.set( - self.batch_to_proof - .values() - .map(|proof| if proof.is_some() { 1 } else { 0 }) - .sum::(), - ); - (remaining_txns_without_duplicates, self.remaining_proofs) - } - - // Mark in the hashmap committed PoS, but keep them until they expire - fn mark_committed(&mut self, batches: Vec) { - for batch in &batches { - let batch_key = BatchKey::from_info(batch); - if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { - counters::pos_to_commit( - proof.gas_bucket_start(), - insertion_time.elapsed().as_secs_f64(), - ); - self.dec_remaining(&batch.author(), batch.num_txns()); - } - self.batch_to_proof.insert(batch_key.clone(), None); - self.batches_with_txn_summary.remove(&batch_key); - self.txn_summary_to_batches.retain(|_, batches| { - batches.remove(&batch_key); - !batches.is_empty() - }); - } - } - - pub async fn start(mut self, mut command_rx: tokio::sync::mpsc::Receiver) { - loop { - let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); - if let Some(msg) = command_rx.recv().await { - match msg { - ProofQueueCommand::AddProofs(proofs, response_sender) => { - for proof in proofs { - self.push(proof); - } - if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { - warn!("Failed to send response to AddProofs: {:?}", e); - } - }, - ProofQueueCommand::PullProofs { - excluded_batches, - max_txns, - max_bytes, - return_non_full, - response_sender, - } => { - let (proofs, full) = self.pull_proofs( - &excluded_batches, - max_txns, - max_bytes, - return_non_full, - ); - if let Err(e) = response_sender.send((proofs, full)) { - warn!("Failed to send response to PullProofs: {:?}", e); - } - }, - ProofQueueCommand::MarkCommitted(batches, block_timestamp, response_sender) => { - self.mark_committed(batches); - self.handle_updated_block_timestamp(block_timestamp); - if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { - error!("Failed to send response to MarkCommitted: {:?}", e); - } - }, - ProofQueueCommand::AddBatches(batch_summaries) => { - for (batch_info, txn_summaries) in batch_summaries { - let batch_key = BatchKey::from_info(&batch_info); - for txn_summary in txn_summaries { - self.txn_summary_to_batches - .entry(txn_summary) - .or_default() - .insert(batch_key.clone()); - } - self.batches_with_txn_summary.insert(batch_key); - } - }, - } - } - } - } -} diff --git a/consensus/src/quorum_store/quorum_store_builder.rs b/consensus/src/quorum_store/quorum_store_builder.rs index b435459b6c4fe..112655356560a 100644 --- a/consensus/src/quorum_store/quorum_store_builder.rs +++ b/consensus/src/quorum_store/quorum_store_builder.rs @@ -18,9 +18,9 @@ use crate::{ network_listener::NetworkListener, proof_coordinator::{ProofCoordinator, ProofCoordinatorCommand}, proof_manager::{ProofManager, ProofManagerCommand}, - proof_queue::{ProofQueue, ProofQueueCommand}, quorum_store_coordinator::{CoordinatorCommand, QuorumStoreCoordinator}, types::{Batch, BatchResponse}, + utils::{ProofQueue, ProofQueueCommand}, }, round_manager::VerifiedEvent, }; diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index 052f7aff6af96..39822c04334e0 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -2,8 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::quorum_store::{ - proof_manager::ProofManager, proof_queue::ProofQueue, - tests::batch_store_test::batch_store_for_test, + proof_manager::ProofManager, tests::batch_store_test::batch_store_for_test, utils::ProofQueue, }; use aptos_consensus_types::{ common::{Payload, PayloadFilter}, diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index fd7796da610b0..922ae1d67a3af 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -1,7 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::quorum_store::proof_queue::ProofQueue; +use crate::quorum_store::utils::ProofQueue; use aptos_consensus_types::proof_of_store::{BatchId, BatchInfo, ProofOfStore}; use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 95a721c6d2bd8..67f2fcc9c8bd1 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -1,18 +1,23 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use crate::monitor; -use aptos_consensus_types::common::{TransactionInProgress, TransactionSummary}; +use crate::{monitor, quorum_store::counters}; +use aptos_consensus_types::{ + common::{TransactionInProgress, TransactionSummary}, + proof_of_store::{BatchId, BatchInfo, ProofOfStore}, +}; use aptos_logger::prelude::*; use aptos_mempool::{QuorumStoreRequest, QuorumStoreResponse}; -use aptos_types::transaction::SignedTransaction; +use aptos_types::{transaction::SignedTransaction, PeerId}; use chrono::Utc; use futures::channel::{mpsc::Sender, oneshot}; +use move_core_types::account_address::AccountAddress; +use rand::{seq::SliceRandom, thread_rng}; use std::{ - cmp::Reverse, - collections::{BTreeMap, BinaryHeap, HashSet, VecDeque}, + cmp::{Ordering, Reverse}, + collections::{BTreeMap, BinaryHeap, HashMap, HashSet, VecDeque}, hash::Hash, - time::Duration, + time::{Duration, Instant}, }; use tokio::time::timeout; @@ -134,3 +139,460 @@ impl MempoolProxy { } } } + +#[derive(PartialEq, Eq, Hash, Clone)] +pub struct BatchKey { + author: PeerId, + batch_id: BatchId, +} + +impl BatchKey { + pub fn from_info(info: &BatchInfo) -> Self { + Self { + author: info.author(), + batch_id: info.batch_id(), + } + } +} + +#[derive(PartialEq, Eq, Clone, Hash)] +pub struct BatchSortKey { + batch_key: BatchKey, + gas_bucket_start: u64, +} + +impl BatchSortKey { + pub fn from_info(info: &BatchInfo) -> Self { + Self { + batch_key: BatchKey::from_info(info), + gas_bucket_start: info.gas_bucket_start(), + } + } + + pub fn author(&self) -> PeerId { + self.batch_key.author + } +} + +impl PartialOrd for BatchSortKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BatchSortKey { + fn cmp(&self, other: &Self) -> Ordering { + // ascending + match self.gas_bucket_start.cmp(&other.gas_bucket_start) { + Ordering::Equal => {}, + ordering => return ordering, + } + // descending + other.batch_key.batch_id.cmp(&self.batch_key.batch_id) + } +} + +#[derive(Debug)] +pub enum ProofQueueCommand { + // Proof manager sends this command to add the proofs to the proof queue + // We send back (remaining_txns, remaining_proofs) to the proof manager + AddProofs(Vec, oneshot::Sender<(u64, u64)>), + // Batch coordinator sends this command to add the received batches to the proof queue. + // For each transaction, the proof queue stores the list of batches containing the transaction. + AddBatches(Vec<(BatchInfo, Vec<(PeerId, u64)>)>), + // Proof manager sends this command to pull proofs from the proof queue to + // include in the block proposal. + PullProofs { + excluded_batches: HashSet, + max_txns: u64, + max_bytes: u64, + return_non_full: bool, + response_sender: oneshot::Sender<(Vec, bool)>, + }, + // Proof manager sends this command to mark these batches as committed and + // update the block timestamp. + // We send back the (remaining_txns, remaining_proofs) to the proof manager + MarkCommitted(Vec, u64, oneshot::Sender<(u64, u64)>), +} + +pub struct ProofQueue { + my_peer_id: PeerId, + // Queue per peer to ensure fairness between peers and priority within peer + author_to_batches: HashMap>, + // ProofOfStore and insertion_time. None if committed + batch_to_proof: HashMap>, + // Map of txn_summary = (sender, sequence number) to all the batches that contain + // the transaction. This helps in counting the number of unique transactions in the pipeline. + txn_summary_to_batches: HashMap<(PeerId, u64), HashSet>, + // List of batches for which we received txn summaries from the batch coordinator + batches_with_txn_summary: HashSet, + // Expiration index + expirations: TimeExpirations, + latest_block_timestamp: u64, + remaining_txns_with_duplicates: u64, + remaining_proofs: u64, + remaining_local_txns: u64, + remaining_local_proofs: u64, +} + +impl ProofQueue { + pub(crate) fn new(my_peer_id: PeerId) -> Self { + Self { + my_peer_id, + author_to_batches: HashMap::new(), + batch_to_proof: HashMap::new(), + txn_summary_to_batches: HashMap::new(), + batches_with_txn_summary: HashSet::new(), + expirations: TimeExpirations::new(), + latest_block_timestamp: 0, + remaining_txns_with_duplicates: 0, + remaining_proofs: 0, + remaining_local_txns: 0, + remaining_local_proofs: 0, + } + } + + #[inline] + fn inc_remaining(&mut self, author: &AccountAddress, num_txns: u64) { + self.remaining_txns_with_duplicates += num_txns; + self.remaining_proofs += 1; + if *author == self.my_peer_id { + self.remaining_local_txns += num_txns; + self.remaining_local_proofs += 1; + } + } + + #[inline] + fn dec_remaining(&mut self, author: &AccountAddress, num_txns: u64) { + self.remaining_txns_with_duplicates -= num_txns; + self.remaining_proofs -= 1; + if *author == self.my_peer_id { + self.remaining_local_txns -= num_txns; + self.remaining_local_proofs -= 1; + } + } + + fn remaining_txns_without_duplicates(&self) -> u64 { + // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. + let unexpired_batch_keys = self + .batch_to_proof + .iter() + .filter(|(_, proof)| proof.is_some()) + .map(|(batch_key, _)| batch_key) + .collect::>(); + let mut remaining_txns = self + .txn_summary_to_batches + .iter() + .filter(|(_, batches)| { + batches + .iter() + .any(|batch_key| unexpired_batch_keys.contains(batch_key)) + }) + .count() as u64; + + // If a batch_key is not in batches_with_txn_summary, it means we've received the proof but haven't receive the + // transaction summary of the batch from batch coordinator. Add the number of txns in the batch to remaining_txns. + remaining_txns += self + .batch_to_proof + .iter() + .filter_map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + Some(proof.as_ref().unwrap().0.num_txns()) + } else { + None + } + }) + .sum::(); + + remaining_txns + } + + /// Add the ProofOfStore to proof queue. + pub(crate) fn push(&mut self, proof: ProofOfStore) { + if proof.expiration() < self.latest_block_timestamp { + counters::inc_rejected_pos_count(counters::POS_EXPIRED_LABEL); + return; + } + let batch_key = BatchKey::from_info(proof.info()); + if self.batch_to_proof.get(&batch_key).is_some() { + counters::inc_rejected_pos_count(counters::POS_DUPLICATE_LABEL); + return; + } + let author = proof.author(); + let bucket = proof.gas_bucket_start(); + let num_txns = proof.num_txns(); + let expiration = proof.expiration(); + + let batch_sort_key = BatchSortKey::from_info(proof.info()); + let queue = self.author_to_batches.entry(author).or_default(); + queue.insert(batch_sort_key.clone(), proof.info().clone()); + self.expirations.add_item(batch_sort_key, expiration); + self.batch_to_proof + .insert(batch_key, Some((proof, Instant::now()))); + + if author == self.my_peer_id { + counters::inc_local_pos_count(bucket); + } else { + counters::inc_remote_pos_count(bucket); + } + self.inc_remaining(&author, num_txns); + } + + // gets excluded and iterates over the vector returning non excluded or expired entries. + // return the vector of pulled PoS, and the size of the remaining PoS + // The flag in the second return argument is true iff the entire proof queue is fully utilized + // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, + // this flag is set false. + pub(crate) fn pull_proofs( + &mut self, + excluded_batches: &HashSet, + max_txns: u64, + max_bytes: u64, + return_non_full: bool, + ) -> (Vec, bool) { + let mut ret = vec![]; + let mut cur_bytes = 0; + let mut cur_txns = 0; + let mut excluded_txns = 0; + let mut full = false; + + let mut iters = vec![]; + for (_, batches) in self.author_to_batches.iter() { + iters.push(batches.iter().rev()); + } + + while !iters.is_empty() { + iters.shuffle(&mut thread_rng()); + iters.retain_mut(|iter| { + if full { + return false; + } + if let Some((sort_key, batch)) = iter.next() { + if excluded_batches.contains(batch) { + excluded_txns += batch.num_txns(); + } else if let Some(Some((proof, insertion_time))) = + self.batch_to_proof.get(&sort_key.batch_key) + { + cur_bytes += batch.num_bytes(); + cur_txns += batch.num_txns(); + if cur_bytes > max_bytes || cur_txns > max_txns { + // Exceeded the limit for requested bytes or number of transactions. + full = true; + return false; + } + let bucket = proof.gas_bucket_start(); + ret.push(proof.clone()); + counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); + if cur_bytes == max_bytes || cur_txns == max_txns { + // Exactly the limit for requested bytes or number of transactions. + full = true; + return false; + } + } + true + } else { + false + } + }) + } + info!( + // before non full check + byte_size = cur_bytes, + block_size = cur_txns, + batch_count = ret.len(), + full = full, + return_non_full = return_non_full, + "Pull payloads from QuorumStore: internal" + ); + + if full || return_non_full { + counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); + counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); + counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); + counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); + + // Number of proofs remaining in proof queue after the pull + let mut num_proofs_remaining_after_pull = 0; + let mut num_txns_remaining_after_pull = 0; + let excluded_batch_keys = excluded_batches + .iter() + .map(BatchKey::from_info) + .collect::>(); + for (batch_key, proof) in &self.batch_to_proof { + if proof.is_some() + && !ret + .iter() + .any(|p| BatchKey::from_info(p.info()) == *batch_key) + && !excluded_batch_keys.contains(batch_key) + { + num_proofs_remaining_after_pull += 1; + num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); + } + } + counters::NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + .observe(num_proofs_remaining_after_pull as f64); + counters::NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + .observe(num_txns_remaining_after_pull as f64); + + // Stable sort, so the order of proofs within an author will not change. + ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); + (ret, !full) + } else { + (Vec::new(), !full) + } + } + + fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { + assert!( + self.latest_block_timestamp <= block_timestamp, + "Decreasing block timestamp" + ); + self.latest_block_timestamp = block_timestamp; + + let expired = self.expirations.expire(block_timestamp); + let mut num_expired_but_not_committed = 0; + for key in &expired { + if let Some(mut queue) = self.author_to_batches.remove(&key.author()) { + if let Some(batch) = queue.remove(key) { + if self + .batch_to_proof + .get(&key.batch_key) + .expect("Entry for unexpired batch must exist") + .is_some() + { + // non-committed proof that is expired + num_expired_but_not_committed += 1; + counters::GAP_BETWEEN_BATCH_EXPIRATION_AND_CURRENT_TIME_WHEN_COMMIT + .observe((block_timestamp - batch.expiration()) as f64); + self.txn_summary_to_batches.retain(|_, batches| { + batches.remove(&key.batch_key); + !batches.is_empty() + }); + self.batches_with_txn_summary.remove(&key.batch_key); + self.dec_remaining(&batch.author(), batch.num_txns()); + } + claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); + } + if !queue.is_empty() { + self.author_to_batches.insert(key.author(), queue); + } + } + } + counters::NUM_PROOFS_EXPIRED_WHEN_COMMIT.inc_by(num_expired_but_not_committed); + } + + pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { + counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns_with_duplicates as f64); + counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); + counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); + counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); + let remaining_txns_without_duplicates = self.remaining_txns_without_duplicates(); + counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES + .observe(remaining_txns_without_duplicates as f64); + //count the number of transactions with more than one batches + counters::TXNS_WITH_DUPLICATE_BATCHES.set( + self.txn_summary_to_batches + .iter() + .filter(|(_, batches)| batches.len() > 1) + .count() as i64, + ); + + counters::TXNS_IN_PROOF_QUEUE.set(self.txn_summary_to_batches.len() as i64); + + // count the number of batches with proofs but without txn summaries + counters::PROOFS_WITHOUT_BATCH_DATA.set( + self.batch_to_proof + .iter() + .map(|(batch_key, proof)| { + if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + 1 + } else { + 0 + } + }) + .sum::(), + ); + + counters::PROOFS_IN_PROOF_QUEUE.set( + self.batch_to_proof + .values() + .map(|proof| if proof.is_some() { 1 } else { 0 }) + .sum::(), + ); + (remaining_txns_without_duplicates, self.remaining_proofs) + } + + // Mark in the hashmap committed PoS, but keep them until they expire + fn mark_committed(&mut self, batches: Vec) { + for batch in &batches { + let batch_key = BatchKey::from_info(batch); + if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { + counters::pos_to_commit( + proof.gas_bucket_start(), + insertion_time.elapsed().as_secs_f64(), + ); + self.dec_remaining(&batch.author(), batch.num_txns()); + } + self.batch_to_proof.insert(batch_key.clone(), None); + self.batches_with_txn_summary.remove(&batch_key); + self.txn_summary_to_batches.retain(|_, batches| { + batches.remove(&batch_key); + !batches.is_empty() + }); + } + } + + pub async fn start(mut self, mut command_rx: tokio::sync::mpsc::Receiver) { + loop { + let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); + if let Some(msg) = command_rx.recv().await { + match msg { + ProofQueueCommand::AddProofs(proofs, response_sender) => { + for proof in proofs { + self.push(proof); + } + if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { + warn!("Failed to send response to AddProofs: {:?}", e); + } + }, + ProofQueueCommand::PullProofs { + excluded_batches, + max_txns, + max_bytes, + return_non_full, + response_sender, + } => { + let (proofs, full) = self.pull_proofs( + &excluded_batches, + max_txns, + max_bytes, + return_non_full, + ); + if let Err(e) = response_sender.send((proofs, full)) { + warn!("Failed to send response to PullProofs: {:?}", e); + } + }, + ProofQueueCommand::MarkCommitted(batches, block_timestamp, response_sender) => { + self.mark_committed(batches); + self.handle_updated_block_timestamp(block_timestamp); + if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { + error!("Failed to send response to MarkCommitted: {:?}", e); + } + }, + ProofQueueCommand::AddBatches(batch_summaries) => { + for (batch_info, txn_summaries) in batch_summaries { + let batch_key = BatchKey::from_info(&batch_info); + for txn_summary in txn_summaries { + self.txn_summary_to_batches + .entry(txn_summary) + .or_default() + .insert(batch_key.clone()); + } + self.batches_with_txn_summary.insert(batch_key); + } + }, + } + } + } + } +} From 4a4d8842c0126228c7081947d2c41491d042ed87 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 13:35:04 -0700 Subject: [PATCH 27/67] Moving counters --- consensus/src/counters.rs | 3 +- consensus/src/quorum_store/counters.rs | 112 +++++++++++++------------ 2 files changed, 60 insertions(+), 55 deletions(-) diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 9a45588b95013..e6a2bee65f973 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -748,7 +748,7 @@ pub static NUM_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { .unwrap() }); -/// Histogram for the number of txns per (committed) blocks. +/// Histogram for the number of input txns in the committed blocks. pub static NUM_INPUT_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { register_histogram!( "aptos_consensus_num_input_txns_per_block", @@ -758,6 +758,7 @@ pub static NUM_INPUT_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { .unwrap() }); +/// Histogram for the number of bytes in the committed blocks. pub static NUM_BYTES_PER_BLOCK: Lazy = Lazy::new(|| { register_histogram!( "aptos_consensus_num_bytes_per_block", diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 6e04be81f0f72..2d56cacdbc7cf 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -101,38 +101,6 @@ pub static BATCH_GENERATOR_MAIN_LOOP: Lazy = Lazy::new(|| { ) }); -pub static PROOFS_WITHOUT_BATCH_DATA: Lazy = Lazy::new(|| { - register_int_gauge!( - "quorum_store_proofs_without_batch_data", - "Number of proofs received without batch data" - ) - .unwrap() -}); - -pub static TXNS_WITH_DUPLICATE_BATCHES: Lazy = Lazy::new(|| { - register_int_gauge!( - "quorum_store_txns_with_duplicate_batches", - "Number of transactions received with duplicate batches" - ) - .unwrap() -}); - -pub static TXNS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { - register_int_gauge!( - "quorum_store_txns_in_proof_queue", - "Number of transactions in the proof queue" - ) - .unwrap() -}); - -pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { - register_int_gauge!( - "quorum_store_proofs_in_proof_queue", - "Number of proofs in the proof queue" - ) - .unwrap() -}); - ////////////////////// // NEW QUORUM STORE ////////////////////// @@ -241,28 +209,6 @@ pub static EXCLUDED_TXNS_WHEN_PULL: Lazy = Lazy::new(|| { .unwrap() }); -pub static NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( - || { - register_histogram!( - "quorum_store_num_proofs_left_in_proof_queue_after_proposal_generation", - "Histogram for the number of proofs left in the proof queue after block proposal generation.", - PROOF_COUNT_BUCKETS.clone(), - ) - .unwrap() - }, -); - -pub static NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( - || { - register_histogram!( - "quorum_store_num_txns_left_in_proof_queue_after_proposal_generation", - "Histogram for the number of transactions left in the proof queue after block proposal generation.", - TRANSACTION_COUNT_BUCKETS.clone(), - ) - .unwrap() - }, -); - pub static BATCH_IN_PROGRESS_COMMITTED: Lazy = Lazy::new(|| { register_int_counter!( "quorum_store_batch_in_progress_committed", @@ -366,6 +312,64 @@ pub fn pos_to_commit(bucket: u64, secs: f64) { .observe(secs); } +////////////////////// +// Proof Queue +////////////////////// + +pub static PROOFS_WITHOUT_BATCH_DATA: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_proofs_without_batch_data", + "Number of proofs received without batch data" + ) + .unwrap() +}); + +pub static TXNS_WITH_DUPLICATE_BATCHES: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_txns_with_duplicate_batches", + "Number of transactions received with duplicate batches" + ) + .unwrap() +}); + +pub static TXNS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_txns_in_proof_queue", + "Number of transactions in the proof queue" + ) + .unwrap() +}); + +pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { + register_int_gauge!( + "quorum_store_proofs_in_proof_queue", + "Number of proofs in the proof queue" + ) + .unwrap() +}); + +pub static NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( + || { + register_histogram!( + "quorum_store_num_proofs_left_in_proof_queue_after_proposal_generation", + "Histogram for the number of proofs left in the proof queue after block proposal generation.", + PROOF_COUNT_BUCKETS.clone(), + ) + .unwrap() + }, +); + +pub static NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( + || { + register_histogram!( + "quorum_store_num_txns_left_in_proof_queue_after_proposal_generation", + "Histogram for the number of transactions left in the proof queue after block proposal generation.", + TRANSACTION_COUNT_BUCKETS.clone(), + ) + .unwrap() + }, +); + /// Histogram for the number of total txns left after adding or cleaning batches. pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { register_avg_counter( From a9871d8c60918bf2fb8a1e13671681906328a040 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 14:08:22 -0700 Subject: [PATCH 28/67] Use transaction summary --- consensus/src/quorum_store/types.rs | 8 +++++--- consensus/src/quorum_store/utils.rs | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index a207febfa6a3a..41875a6d70097 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -3,7 +3,7 @@ use anyhow::ensure; use aptos_consensus_types::{ - common::BatchPayload, + common::{BatchPayload, TransactionSummary}, proof_of_store::{BatchId, BatchInfo}, }; use aptos_crypto::{hash::CryptoHash, HashValue}; @@ -172,11 +172,13 @@ impl Batch { self.payload.into_transactions() } - pub fn summary(&self) -> Vec<(PeerId, u64)> { + pub fn summary(&self) -> Vec { self.payload .txns() .iter() - .map(|txn| (txn.sender(), txn.sequence_number())) + .map(|txn| { + TransactionSummary::new(txn.sender(), txn.sequence_number(), txn.committed_hash()) + }) .collect() } diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 67f2fcc9c8bd1..f0cb018178245 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -199,7 +199,7 @@ pub enum ProofQueueCommand { AddProofs(Vec, oneshot::Sender<(u64, u64)>), // Batch coordinator sends this command to add the received batches to the proof queue. // For each transaction, the proof queue stores the list of batches containing the transaction. - AddBatches(Vec<(BatchInfo, Vec<(PeerId, u64)>)>), + AddBatches(Vec<(BatchInfo, Vec)>), // Proof manager sends this command to pull proofs from the proof queue to // include in the block proposal. PullProofs { @@ -221,9 +221,9 @@ pub struct ProofQueue { author_to_batches: HashMap>, // ProofOfStore and insertion_time. None if committed batch_to_proof: HashMap>, - // Map of txn_summary = (sender, sequence number) to all the batches that contain + // Map of txn_summary = (sender, sequence number, hash) to all the batches that contain // the transaction. This helps in counting the number of unique transactions in the pipeline. - txn_summary_to_batches: HashMap<(PeerId, u64), HashSet>, + txn_summary_to_batches: HashMap>, // List of batches for which we received txn summaries from the batch coordinator batches_with_txn_summary: HashSet, // Expiration index From 13669caa35ee439a778523baf5670d27845bb451 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 15:27:49 -0700 Subject: [PATCH 29/67] intelligent pull proofs --- consensus/src/quorum_store/counters.rs | 18 +++++++++++ consensus/src/quorum_store/utils.rs | 43 ++++++++++++++++++++------ testsuite/forge-cli/src/main.rs | 2 +- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 2d56cacdbc7cf..173b8deaa54aa 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -144,6 +144,24 @@ pub static BLOCK_SIZE_WHEN_PULL: Lazy = Lazy::new(|| { .unwrap() }); +pub static TOTAL_BLOCK_SIZE_WHEN_PULL: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_total_block_size_when_pull", + "Histogram for the total size of transactions per block when pulled for consensus.", + BYTE_BUCKETS.clone(), + ) + .unwrap() +}); + +pub static EXTRA_TXNS_WHEN_PULL: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_extra_txns_when_pull", + "Histogram for the number of extra transactions in a block when pulled for consensus.", + TRANSACTION_COUNT_BUCKETS.clone(), + ) + .unwrap() +}); + pub static NUM_INLINE_BATCHES: Lazy = Lazy::new(|| { register_histogram!( "num_inline_batches_in_block_proposal", diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index f0cb018178245..b1ba781104585 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -224,8 +224,8 @@ pub struct ProofQueue { // Map of txn_summary = (sender, sequence number, hash) to all the batches that contain // the transaction. This helps in counting the number of unique transactions in the pipeline. txn_summary_to_batches: HashMap>, - // List of batches for which we received txn summaries from the batch coordinator - batches_with_txn_summary: HashSet, + // List of transaction summaries for each batch + batch_to_txn_summaries: HashMap>, // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, @@ -242,7 +242,7 @@ impl ProofQueue { author_to_batches: HashMap::new(), batch_to_proof: HashMap::new(), txn_summary_to_batches: HashMap::new(), - batches_with_txn_summary: HashSet::new(), + batch_to_txn_summaries: HashMap::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, remaining_txns_with_duplicates: 0, @@ -296,7 +296,7 @@ impl ProofQueue { .batch_to_proof .iter() .filter_map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + if proof.is_some() && !self.batch_to_txn_summaries.contains_key(batch_key) { Some(proof.as_ref().unwrap().0.num_txns()) } else { None @@ -353,8 +353,18 @@ impl ProofQueue { let mut ret = vec![]; let mut cur_bytes = 0; let mut cur_txns = 0; + let mut total_txns = 0; let mut excluded_txns = 0; let mut full = false; + let mut included_and_excluded_txns = HashSet::new(); + for batch_info in excluded_batches { + let batch_key = BatchKey::from_info(batch_info); + if let Some(txn_summaries) = self.batch_to_txn_summaries.get(&batch_key) { + for txn_summary in txn_summaries { + included_and_excluded_txns.insert(*txn_summary); + } + } + } let mut iters = vec![]; for (_, batches) in self.author_to_batches.iter() { @@ -373,8 +383,20 @@ impl ProofQueue { } else if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&sort_key.batch_key) { + if let Some(txn_summaries) = + self.batch_to_txn_summaries.get(&sort_key.batch_key) + { + for txn_summary in txn_summaries { + if !included_and_excluded_txns.contains(txn_summary) { + included_and_excluded_txns.insert(*txn_summary); + cur_txns += 1; + } + } + } else { + cur_txns += batch.num_txns(); + } cur_bytes += batch.num_bytes(); - cur_txns += batch.num_txns(); + total_txns += batch.num_txns(); if cur_bytes > max_bytes || cur_txns > max_txns { // Exceeded the limit for requested bytes or number of transactions. full = true; @@ -407,6 +429,8 @@ impl ProofQueue { if full || return_non_full { counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); + counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(total_txns as f64); + counters::EXTRA_TXNS_WHEN_PULL.observe((total_txns - cur_txns) as f64); counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); @@ -468,7 +492,7 @@ impl ProofQueue { batches.remove(&key.batch_key); !batches.is_empty() }); - self.batches_with_txn_summary.remove(&key.batch_key); + self.batch_to_txn_summaries.remove(&key.batch_key); self.dec_remaining(&batch.author(), batch.num_txns()); } claims::assert_some!(self.batch_to_proof.remove(&key.batch_key)); @@ -504,7 +528,7 @@ impl ProofQueue { self.batch_to_proof .iter() .map(|(batch_key, proof)| { - if proof.is_some() && !self.batches_with_txn_summary.contains(batch_key) { + if proof.is_some() && !self.batch_to_txn_summaries.contains_key(batch_key) { 1 } else { 0 @@ -534,7 +558,7 @@ impl ProofQueue { self.dec_remaining(&batch.author(), batch.num_txns()); } self.batch_to_proof.insert(batch_key.clone(), None); - self.batches_with_txn_summary.remove(&batch_key); + self.batch_to_txn_summaries.remove(&batch_key); self.txn_summary_to_batches.retain(|_, batches| { batches.remove(&batch_key); !batches.is_empty() @@ -582,13 +606,14 @@ impl ProofQueue { ProofQueueCommand::AddBatches(batch_summaries) => { for (batch_info, txn_summaries) in batch_summaries { let batch_key = BatchKey::from_info(&batch_info); + self.batch_to_txn_summaries + .insert(batch_key.clone(), txn_summaries.clone()); for txn_summary in txn_summaries { self.txn_summary_to_batches .entry(txn_summary) .or_default() .insert(batch_key.clone()); } - self.batches_with_txn_summary.insert(batch_key); } }, } diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 7cc337f1f806a..b70a68b14b9b1 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2481,7 +2481,7 @@ fn pfn_const_tps( helm_values["chain"]["epoch_duration_secs"] = epoch_duration_secs.into(); })) .with_success_criteria( - SuccessCriteria::new(12000) + SuccessCriteria::new(5000) .add_no_restarts() .add_max_expired_tps(0) .add_max_failed_submission_tps(0) From 1264dad9e483fc8ecc443cb05fc3b6b13706fb31 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 17:11:48 -0700 Subject: [PATCH 30/67] Fix a bug in pull proofs --- consensus/src/quorum_store/utils.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index f0cb018178245..e52f4801132c7 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -373,13 +373,15 @@ impl ProofQueue { } else if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&sort_key.batch_key) { - cur_bytes += batch.num_bytes(); - cur_txns += batch.num_txns(); - if cur_bytes > max_bytes || cur_txns > max_txns { + if cur_bytes + batch.num_bytes() > max_bytes + || cur_txns + batch.num_txns() > max_txns + { // Exceeded the limit for requested bytes or number of transactions. full = true; return false; } + cur_bytes += batch.num_bytes(); + cur_txns += batch.num_txns(); let bucket = proof.gas_bucket_start(); ret.push(proof.clone()); counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); From 244720cadd526c5bdef8f8bd164db565569508f4 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Fri, 14 Jun 2024 17:47:36 -0700 Subject: [PATCH 31/67] Fix the bug --- consensus/src/quorum_store/utils.rs | 36 ++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index b1ba781104585..15c6cc5b15f7f 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -352,7 +352,7 @@ impl ProofQueue { ) -> (Vec, bool) { let mut ret = vec![]; let mut cur_bytes = 0; - let mut cur_txns = 0; + let mut cur_txns: u64 = 0; let mut total_txns = 0; let mut excluded_txns = 0; let mut full = false; @@ -383,25 +383,35 @@ impl ProofQueue { } else if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&sort_key.batch_key) { - if let Some(txn_summaries) = + let temp_txns = if let Some(txn_summaries) = self.batch_to_txn_summaries.get(&sort_key.batch_key) { - for txn_summary in txn_summaries { - if !included_and_excluded_txns.contains(txn_summary) { - included_and_excluded_txns.insert(*txn_summary); - cur_txns += 1; - } - } + cur_txns + + txn_summaries + .iter() + .filter(|txn_summary| { + !included_and_excluded_txns.contains(txn_summary) + }) + .count() as u64 } else { - cur_txns += batch.num_txns(); - } - cur_bytes += batch.num_bytes(); - total_txns += batch.num_txns(); - if cur_bytes > max_bytes || cur_txns > max_txns { + cur_txns + batch.num_txns() + }; + if cur_bytes + batch.num_bytes() > max_bytes || temp_txns > max_txns { // Exceeded the limit for requested bytes or number of transactions. full = true; return false; } + cur_bytes += batch.num_bytes(); + total_txns += batch.num_txns(); + cur_txns += self.batch_to_txn_summaries.get(&sort_key.batch_key).map_or( + batch.num_txns(), + |summaries| { + summaries + .iter() + .filter(|summary| included_and_excluded_txns.insert(**summary)) + .count() as u64 + }, + ); let bucket = proof.gas_bucket_start(); ret.push(proof.clone()); counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); From 822746a0a7fe621c6fc9c0fa87bf923c99e2f042 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 11:34:39 -0700 Subject: [PATCH 32/67] Rest to full to false in every iteration --- consensus/src/quorum_store/counters.rs | 18 ++++++++++++++++++ consensus/src/quorum_store/utils.rs | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 2d56cacdbc7cf..7f7968d4ad99b 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -316,6 +316,24 @@ pub fn pos_to_commit(bucket: u64, secs: f64) { // Proof Queue ////////////////////// +pub static PULL_PROOFS_MAX_TXNS: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_pull_proofs_max_txns", + "Histogram for the number of transactions pulled when pulling proofs", + TRANSACTION_COUNT_BUCKETS.clone(), + ) + .unwrap() +}); + +pub static PULL_PROOFS_MAX_BYTES: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_pull_proofs_max_bytes", + "Histogram for the number of bytes pulled when pulling proofs", + BYTE_BUCKETS.clone(), + ) + .unwrap() +}); + pub static PROOFS_WITHOUT_BATCH_DATA: Lazy = Lazy::new(|| { register_int_gauge!( "quorum_store_proofs_without_batch_data", diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index e52f4801132c7..e534fe211c064 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -355,6 +355,9 @@ impl ProofQueue { let mut cur_txns = 0; let mut excluded_txns = 0; let mut full = false; + + counters::PULL_PROOFS_MAX_TXNS.observe(max_txns as f64); + counters::PULL_PROOFS_MAX_BYTES.observe(max_bytes as f64); let mut iters = vec![]; for (_, batches) in self.author_to_batches.iter() { @@ -363,6 +366,7 @@ impl ProofQueue { while !iters.is_empty() { iters.shuffle(&mut thread_rng()); + full = false; iters.retain_mut(|iter| { if full { return false; From 9982f971b91d4fb30a35d41ed5d6a46dc206fa7c Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 12:47:18 -0700 Subject: [PATCH 33/67] Addressing PR comments --- consensus/src/quorum_store/proof_manager.rs | 15 ++++++++------- consensus/src/quorum_store/utils.rs | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 675a066637ba5..842052e05ec69 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -25,6 +25,7 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, sync::Arc, }; +use tokio::sync::mpsc::Sender; #[derive(Debug)] pub enum ProofManagerCommand { @@ -135,7 +136,7 @@ pub struct ProofManager { back_pressure_total_proof_limit: u64, remaining_total_proof_num: u64, allow_batches_without_pos_in_proposal: bool, - proof_queue_tx: Arc>, + proof_queue_tx: Arc>, } impl ProofManager { @@ -144,7 +145,7 @@ impl ProofManager { back_pressure_total_proof_limit: u64, batch_store: Arc, allow_batches_without_pos_in_proposal: bool, - proof_queue_tx: Arc>, + proof_queue_tx: Arc>, ) -> Self { Self { batch_queue: BatchQueue::new(batch_store), @@ -158,11 +159,14 @@ impl ProofManager { } pub(crate) async fn receive_proofs(&mut self, proofs: Vec) { + for proof in &proofs { + self.batch_queue.remove_batch(proof.info()); + } if !proofs.is_empty() { let (response_tx, response_rx) = oneshot::channel(); if self .proof_queue_tx - .send(ProofQueueCommand::AddProofs(proofs.clone(), response_tx)) + .send(ProofQueueCommand::AddProofs(proofs, response_tx)) .await .is_ok() { @@ -177,9 +181,6 @@ impl ProofManager { warn!("Failed to add proofs to proof queue"); } } - for proof in proofs.into_iter() { - self.batch_queue.remove_batch(proof.info()); - } } pub(crate) fn receive_batches(&mut self, batches: Vec) { @@ -339,7 +340,7 @@ impl ProofManager { pub async fn start( mut self, - back_pressure_tx: tokio::sync::mpsc::Sender, + back_pressure_tx: Sender, mut proposal_rx: Receiver, mut proof_rx: tokio::sync::mpsc::Receiver, ) { diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index e534fe211c064..3ecfca0d7e6f2 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -273,7 +273,7 @@ impl ProofQueue { } fn remaining_txns_without_duplicates(&self) -> u64 { - // All the bath keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. + // All the batch keys for which batch_to_proof is not None. This is the set of unexpired and uncommitted proofs. let unexpired_batch_keys = self .batch_to_proof .iter() @@ -355,7 +355,7 @@ impl ProofQueue { let mut cur_txns = 0; let mut excluded_txns = 0; let mut full = false; - + counters::PULL_PROOFS_MAX_TXNS.observe(max_txns as f64); counters::PULL_PROOFS_MAX_BYTES.observe(max_bytes as f64); From 3a234140c75c08b2900e6cd37ae64d0415789d58 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 13:28:24 -0700 Subject: [PATCH 34/67] Move backpressure_tx to proof queue --- consensus/src/quorum_store/proof_manager.rs | 78 +++---------------- .../src/quorum_store/quorum_store_builder.rs | 20 +++-- .../quorum_store/tests/proof_manager_test.rs | 7 +- consensus/src/quorum_store/tests/utils.rs | 2 +- consensus/src/quorum_store/utils.rs | 57 +++++++++++--- 5 files changed, 73 insertions(+), 91 deletions(-) diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 842052e05ec69..ce31f6404424b 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -4,7 +4,6 @@ use crate::{ monitor, quorum_store::{ - batch_generator::BackPressure, batch_store::BatchStore, counters, utils::{BatchSortKey, ProofQueueCommand}, @@ -131,28 +130,18 @@ impl BatchQueue { pub struct ProofManager { batch_queue: BatchQueue, - back_pressure_total_txn_limit: u64, - remaining_total_txn_num: u64, - back_pressure_total_proof_limit: u64, - remaining_total_proof_num: u64, allow_batches_without_pos_in_proposal: bool, proof_queue_tx: Arc>, } impl ProofManager { pub fn new( - back_pressure_total_txn_limit: u64, - back_pressure_total_proof_limit: u64, batch_store: Arc, allow_batches_without_pos_in_proposal: bool, proof_queue_tx: Arc>, ) -> Self { Self { batch_queue: BatchQueue::new(batch_store), - back_pressure_total_txn_limit, - remaining_total_txn_num: 0, - back_pressure_total_proof_limit, - remaining_total_proof_num: 0, allow_batches_without_pos_in_proposal, proof_queue_tx, } @@ -163,22 +152,12 @@ impl ProofManager { self.batch_queue.remove_batch(proof.info()); } if !proofs.is_empty() { - let (response_tx, response_rx) = oneshot::channel(); - if self + if let Err(e) = self .proof_queue_tx - .send(ProofQueueCommand::AddProofs(proofs, response_tx)) + .send(ProofQueueCommand::AddProofs(proofs)) .await - .is_ok() { - if let Ok((remaining_total_txn_num, remaining_total_proof_num)) = response_rx.await - { - self.remaining_total_txn_num = remaining_total_txn_num; - self.remaining_total_proof_num = remaining_total_proof_num; - } else { - warn!("Failed to get response from proof queue after adding proofs"); - } - } else { - warn!("Failed to add proofs to proof queue"); + warn!("Failed to add proofs to proof queue with error: {:?}", e); } } } @@ -203,25 +182,15 @@ impl ProofManager { self.batch_queue.remove_batch(batch); } - let (response_tx, response_rx) = oneshot::channel(); - if self + if let Err(e) = self .proof_queue_tx - .send(ProofQueueCommand::MarkCommitted( - batches, - block_timestamp, - response_tx, - )) + .send(ProofQueueCommand::MarkCommitted(batches, block_timestamp)) .await - .is_ok() { - if let Ok((remaining_total_txn_num, remaining_total_proof_num)) = response_rx.await { - self.remaining_total_txn_num = remaining_total_txn_num; - self.remaining_total_proof_num = remaining_total_proof_num; - } else { - warn!("Failed to get response from proof queue after marking proofs as committed"); - } - } else { - warn!("Failed to mark proofs as committed in proof queue"); + warn!( + "Failed to mark proofs as committed in proof queue with error: {:?}", + e + ); } } @@ -330,39 +299,17 @@ impl ProofManager { } } - /// return true when quorum store is back pressured - pub(crate) fn qs_back_pressure(&self) -> BackPressure { - BackPressure { - txn_count: self.remaining_total_txn_num > self.back_pressure_total_txn_limit, - proof_count: self.remaining_total_proof_num > self.back_pressure_total_proof_limit, - } - } - pub async fn start( mut self, - back_pressure_tx: Sender, mut proposal_rx: Receiver, mut proof_rx: tokio::sync::mpsc::Receiver, ) { - let mut back_pressure = BackPressure { - txn_count: false, - proof_count: false, - }; - loop { let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); tokio::select! { Some(msg) = proposal_rx.next() => monitor!("proof_manager_handle_proposal", { self.handle_proposal_request(msg).await; - - let updated_back_pressure = self.qs_back_pressure(); - if updated_back_pressure != back_pressure { - back_pressure = updated_back_pressure; - if back_pressure_tx.send(back_pressure).await.is_err() { - debug!("Failed to send back_pressure for proposal"); - } - } }), Some(msg) = proof_rx.recv() => { monitor!("proof_manager_handle_command", { @@ -386,13 +333,6 @@ impl ProofManager { ).await; }, } - let updated_back_pressure = self.qs_back_pressure(); - if updated_back_pressure != back_pressure { - back_pressure = updated_back_pressure; - if back_pressure_tx.send(back_pressure).await.is_err() { - debug!("Failed to send back_pressure for commit notification"); - } - } }) } } diff --git a/consensus/src/quorum_store/quorum_store_builder.rs b/consensus/src/quorum_store/quorum_store_builder.rs index 112655356560a..7891194df99f6 100644 --- a/consensus/src/quorum_store/quorum_store_builder.rs +++ b/consensus/src/quorum_store/quorum_store_builder.rs @@ -320,9 +320,19 @@ impl InnerBuilder { ) ); - let proof_queue = ProofQueue::new(self.author); + let proof_queue = ProofQueue::new( + self.author, + self.config.back_pressure.backlog_txn_limit_count, + self.config + .back_pressure + .backlog_per_validator_batch_limit_count + * self.num_validators, + ); let proof_queue_cmd_rx = self.proof_queue_cmd_rx.take().unwrap(); - spawn_named!("proof_queue", proof_queue.start(proof_queue_cmd_rx)); + spawn_named!( + "proof_queue", + proof_queue.start(self.back_pressure_tx.clone(), proof_queue_cmd_rx) + ); for (i, remote_batch_coordinator_cmd_rx) in self.remote_batch_coordinator_cmd_rx.into_iter().enumerate() @@ -367,11 +377,6 @@ impl InnerBuilder { let proof_manager_cmd_rx = self.proof_manager_cmd_rx.take().unwrap(); let proof_manager = ProofManager::new( - self.config.back_pressure.backlog_txn_limit_count, - self.config - .back_pressure - .backlog_per_validator_batch_limit_count - * self.num_validators, self.batch_store.clone().unwrap(), self.config.allow_batches_without_pos_in_proposal, self.proof_queue_cmd_tx.clone(), @@ -379,7 +384,6 @@ impl InnerBuilder { spawn_named!( "proof_manager", proof_manager.start( - self.back_pressure_tx.clone(), self.consensus_to_quorum_store_receiver, proof_manager_cmd_rx, ) diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index 39822c04334e0..25595e8a21853 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -16,10 +16,11 @@ use std::{collections::HashSet, sync::Arc}; async fn create_proof_manager() -> ProofManager { let (proof_queue_tx, proof_queue_rx) = tokio::sync::mpsc::channel(100); - let proof_queue = ProofQueue::new(PeerId::random()); - tokio::spawn(proof_queue.start(proof_queue_rx)); + let proof_queue = ProofQueue::new(PeerId::random(), 10, 10); + let (backpressure_tx, _) = tokio::sync::mpsc::channel(10); + tokio::spawn(proof_queue.start(backpressure_tx, proof_queue_rx)); let batch_store = batch_store_for_test(5 * 1024 * 1024); - ProofManager::new(10, 10, batch_store, true, Arc::new(proof_queue_tx)) + ProofManager::new(batch_store, true, Arc::new(proof_queue_tx)) } fn create_proof(author: PeerId, expiration: u64, batch_sequence: u64) -> ProofOfStore { diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 922ae1d67a3af..722e2a70b4cf4 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -27,7 +27,7 @@ fn proof_of_store(author: PeerId, batch_id: BatchId, gas_bucket_start: u64) -> P #[test] fn test_proof_queue_sorting() { let my_peer_id = PeerId::random(); - let mut proof_queue = ProofQueue::new(my_peer_id); + let mut proof_queue = ProofQueue::new(my_peer_id, 10, 10); let author_0 = PeerId::random(); let author_1 = PeerId::random(); diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 3ecfca0d7e6f2..136f57f4d9e08 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -1,6 +1,7 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use super::batch_generator::BackPressure; use crate::{monitor, quorum_store::counters}; use aptos_consensus_types::{ common::{TransactionInProgress, TransactionSummary}, @@ -196,7 +197,7 @@ impl Ord for BatchSortKey { pub enum ProofQueueCommand { // Proof manager sends this command to add the proofs to the proof queue // We send back (remaining_txns, remaining_proofs) to the proof manager - AddProofs(Vec, oneshot::Sender<(u64, u64)>), + AddProofs(Vec), // Batch coordinator sends this command to add the received batches to the proof queue. // For each transaction, the proof queue stores the list of batches containing the transaction. AddBatches(Vec<(BatchInfo, Vec)>), @@ -212,7 +213,7 @@ pub enum ProofQueueCommand { // Proof manager sends this command to mark these batches as committed and // update the block timestamp. // We send back the (remaining_txns, remaining_proofs) to the proof manager - MarkCommitted(Vec, u64, oneshot::Sender<(u64, u64)>), + MarkCommitted(Vec, u64), } pub struct ProofQueue { @@ -229,6 +230,8 @@ pub struct ProofQueue { // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, + back_pressure_total_txn_limit: u64, + back_pressure_total_proof_limit: u64, remaining_txns_with_duplicates: u64, remaining_proofs: u64, remaining_local_txns: u64, @@ -236,7 +239,11 @@ pub struct ProofQueue { } impl ProofQueue { - pub(crate) fn new(my_peer_id: PeerId) -> Self { + pub(crate) fn new( + my_peer_id: PeerId, + back_pressure_total_txn_limit: u64, + back_pressure_total_proof_limit: u64, + ) -> Self { Self { my_peer_id, author_to_batches: HashMap::new(), @@ -245,6 +252,8 @@ impl ProofQueue { batches_with_txn_summary: HashSet::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, + back_pressure_total_txn_limit, + back_pressure_total_proof_limit, remaining_txns_with_duplicates: 0, remaining_proofs: 0, remaining_local_txns: 0, @@ -338,6 +347,15 @@ impl ProofQueue { self.inc_remaining(&author, num_txns); } + /// return true when quorum store is back pressured + pub(crate) fn qs_back_pressure(&self) -> BackPressure { + let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); + BackPressure { + txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, + proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, + } + } + // gets excluded and iterates over the vector returning non excluded or expired entries. // return the vector of pulled PoS, and the size of the remaining PoS // The flag in the second return argument is true iff the entire proof queue is fully utilized @@ -548,17 +566,31 @@ impl ProofQueue { } } - pub async fn start(mut self, mut command_rx: tokio::sync::mpsc::Receiver) { + pub async fn start( + mut self, + back_pressure_tx: tokio::sync::mpsc::Sender, + mut command_rx: tokio::sync::mpsc::Receiver, + ) { + let mut back_pressure = BackPressure { + txn_count: false, + proof_count: false, + }; + loop { let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); if let Some(msg) = command_rx.recv().await { match msg { - ProofQueueCommand::AddProofs(proofs, response_sender) => { + ProofQueueCommand::AddProofs(proofs) => { for proof in proofs { self.push(proof); } - if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { - warn!("Failed to send response to AddProofs: {:?}", e); + + let updated_back_pressure = self.qs_back_pressure(); + if updated_back_pressure != back_pressure { + back_pressure = updated_back_pressure; + if back_pressure_tx.send(back_pressure).await.is_err() { + debug!("Failed to send back_pressure for proposal"); + } } }, ProofQueueCommand::PullProofs { @@ -578,11 +610,16 @@ impl ProofQueue { warn!("Failed to send response to PullProofs: {:?}", e); } }, - ProofQueueCommand::MarkCommitted(batches, block_timestamp, response_sender) => { + ProofQueueCommand::MarkCommitted(batches, block_timestamp) => { self.mark_committed(batches); self.handle_updated_block_timestamp(block_timestamp); - if let Err(e) = response_sender.send(self.remaining_txns_and_proofs()) { - error!("Failed to send response to MarkCommitted: {:?}", e); + + let updated_back_pressure = self.qs_back_pressure(); + if updated_back_pressure != back_pressure { + back_pressure = updated_back_pressure; + if back_pressure_tx.send(back_pressure).await.is_err() { + debug!("Failed to send back_pressure for proposal"); + } } }, ProofQueueCommand::AddBatches(batch_summaries) => { From 8c107be205d4658961e313d84225e35f4e8e4423 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 15:04:42 -0700 Subject: [PATCH 35/67] Add info statement --- consensus/src/quorum_store/utils.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 136f57f4d9e08..f83e21dc6ca24 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -384,11 +384,7 @@ impl ProofQueue { while !iters.is_empty() { iters.shuffle(&mut thread_rng()); - full = false; iters.retain_mut(|iter| { - if full { - return false; - } if let Some((sort_key, batch)) = iter.next() { if excluded_batches.contains(batch) { excluded_txns += batch.num_txns(); @@ -398,7 +394,6 @@ impl ProofQueue { if cur_bytes + batch.num_bytes() > max_bytes || cur_txns + batch.num_txns() > max_txns { - // Exceeded the limit for requested bytes or number of transactions. full = true; return false; } @@ -408,7 +403,6 @@ impl ProofQueue { ret.push(proof.clone()); counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); if cur_bytes == max_bytes || cur_txns == max_txns { - // Exactly the limit for requested bytes or number of transactions. full = true; return false; } @@ -442,6 +436,7 @@ impl ProofQueue { .iter() .map(BatchKey::from_info) .collect::>(); + let mut remaining_proofs = vec![]; for (batch_key, proof) in &self.batch_to_proof { if proof.is_some() && !ret @@ -451,8 +446,13 @@ impl ProofQueue { { num_proofs_remaining_after_pull += 1; num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); + remaining_proofs.push(proof.as_ref().unwrap().0.clone()); } } + info!( + "cur_txns: {}, remaining_proofs: {:?}", + cur_txns, remaining_proofs + ); counters::NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION .observe(num_proofs_remaining_after_pull as f64); counters::NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION From 75fabfeaee34c86973ebbcd3d8beed20f5602aea Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 17:05:50 -0700 Subject: [PATCH 36/67] Change buckets --- consensus/src/counters.rs | 6 +++--- consensus/src/quorum_store/counters.rs | 30 +++++++++++++++----------- consensus/src/quorum_store/utils.rs | 6 ++++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index e6a2bee65f973..02f854587825f 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -733,9 +733,9 @@ pub static NUM_BLOCKS_IN_PIPELINE: Lazy = Lazy::new(|| { // .unwrap() // }); -const NUM_CONSENSUS_TRANSACTIONS_BUCKETS: [f64; 24] = [ - 5.0, 10.0, 20.0, 40.0, 75.0, 100.0, 200.0, 400.0, 800.0, 1200.0, 1800.0, 2500.0, 3300.0, - 4000.0, 5000.0, 6500.0, 8000.0, 10000.0, 12500.0, 15000.0, 18000.0, 21000.0, 25000.0, 30000.0, +const NUM_CONSENSUS_TRANSACTIONS_BUCKETS: [f64; 21] = [ + 5.0, 10.0, 20.0, 40.0, 75.0, 100.0, 200.0, 400.0, 800.0, 1200.0, 1400.0, 1500.0, 1600.0, + 1700.0, 1800.0, 1900.0, 2500.0, 3300.0, 4000.0, 5000.0, 6500.0, ]; /// Histogram for the number of txns per (committed) blocks. diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 7f7968d4ad99b..d271d0fd056eb 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -23,7 +23,7 @@ pub const POS_DUPLICATE_LABEL: &str = "duplicate"; static TRANSACTION_COUNT_BUCKETS: Lazy> = Lazy::new(|| { exponential_buckets( - /*start=*/ 1.5, /*factor=*/ 1.5, /*count=*/ 25, + /*start=*/ 10.0, /*factor=*/ 1.5, /*count=*/ 25, ) .unwrap() }); @@ -372,8 +372,7 @@ pub static NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new(|| { - register_avg_counter( + register_histogram!( "quorum_store_num_total_txns_left_on_update", "Histogram for the number of total txns left after adding or cleaning batches.", + TRANSACTION_COUNT_BUCKETS.clone() ) + .unwrap() }); pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES: Lazy = Lazy::new(|| { - register_avg_counter( + register_histogram!( "quorum_store_num_total_txns_left_on_update_without_duplicates", "Histogram for the number of total txns left after adding or cleaning batches, without duplicates.", - ) + TRANSACTION_COUNT_BUCKETS.clone() + ).unwrap() }); /// Histogram for the number of total batches/PoS left after adding or cleaning batches. pub static NUM_TOTAL_PROOFS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_avg_counter( + register_histogram!( "quorum_store_num_total_proofs_left_on_update", "Histogram for the number of total batches/PoS left after adding or cleaning batches.", + PROOF_COUNT_BUCKETS.clone() ) + .unwrap() }); /// Histogram for the number of local txns left after adding or cleaning batches. pub static NUM_LOCAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_avg_counter( + register_histogram!( "quorum_store_num_local_txns_left_on_update", "Histogram for the number of locally created txns left after adding or cleaning batches.", + TRANSACTION_COUNT_BUCKETS.clone() ) + .unwrap() }); /// Histogram for the number of local batches/PoS left after adding or cleaning batches. pub static NUM_LOCAL_PROOFS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_avg_counter( + register_histogram!( "quorum_store_num_local_proofs_left_on_update", "Histogram for the number of locally created batches/PoS left after adding or cleaning batches.", - ) + PROOF_COUNT_BUCKETS.clone() + ).unwrap() }); /// Counters diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index f83e21dc6ca24..f4146678cbf5f 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -350,6 +350,12 @@ impl ProofQueue { /// return true when quorum store is back pressured pub(crate) fn qs_back_pressure(&self) -> BackPressure { let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); + if remaining_total_txn_num > self.back_pressure_total_txn_limit { + info!( + "QuorumStore back pressured: txn_count: {}, proof_count: {}", + remaining_total_txn_num, remaining_total_proof_num + ); + } BackPressure { txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, From f20dd13a187da56fdcefef144a3e480386b8dfd3 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 17 Jun 2024 17:12:20 -0700 Subject: [PATCH 37/67] Add some info statements --- consensus/src/quorum_store/utils.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index f4146678cbf5f..57c9286615c3d 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -348,11 +348,26 @@ impl ProofQueue { } /// return true when quorum store is back pressured - pub(crate) fn qs_back_pressure(&self) -> BackPressure { + pub(crate) fn qs_back_pressure_1(&self) -> BackPressure { let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); if remaining_total_txn_num > self.back_pressure_total_txn_limit { info!( - "QuorumStore back pressured: txn_count: {}, proof_count: {}", + "QuorumStore back pressured Adding Proofs: txn_count: {}, proof_count: {}", + remaining_total_txn_num, remaining_total_proof_num + ); + } + BackPressure { + txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, + proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, + } + } + + /// return true when quorum store is back pressured + pub(crate) fn qs_back_pressure_2(&self) -> BackPressure { + let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); + if remaining_total_txn_num > self.back_pressure_total_txn_limit { + info!( + "QuorumStore back pressured Committed: txn_count: {}, proof_count: {}", remaining_total_txn_num, remaining_total_proof_num ); } @@ -591,7 +606,7 @@ impl ProofQueue { self.push(proof); } - let updated_back_pressure = self.qs_back_pressure(); + let updated_back_pressure = self.qs_back_pressure_1(); if updated_back_pressure != back_pressure { back_pressure = updated_back_pressure; if back_pressure_tx.send(back_pressure).await.is_err() { @@ -620,7 +635,7 @@ impl ProofQueue { self.mark_committed(batches); self.handle_updated_block_timestamp(block_timestamp); - let updated_back_pressure = self.qs_back_pressure(); + let updated_back_pressure = self.qs_back_pressure_2(); if updated_back_pressure != back_pressure { back_pressure = updated_back_pressure; if back_pressure_tx.send(back_pressure).await.is_err() { From 51ccb9dfbc8f4be724df13469683d34105b0b8ec Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 18 Jun 2024 18:02:20 -0700 Subject: [PATCH 38/67] Cleanup --- config/src/config/quorum_store_config.rs | 2 - consensus/src/counters.rs | 12 ---- consensus/src/quorum_store/batch_generator.rs | 6 +- consensus/src/quorum_store/counters.rs | 42 +++--------- consensus/src/quorum_store/tests/utils.rs | 68 ++++++++++++++++++- consensus/src/quorum_store/utils.rs | 53 +++++---------- testsuite/forge-cli/src/main.rs | 55 +++------------ 7 files changed, 106 insertions(+), 132 deletions(-) diff --git a/config/src/config/quorum_store_config.rs b/config/src/config/quorum_store_config.rs index 5f4c783f3fb85..20b7f890a630e 100644 --- a/config/src/config/quorum_store_config.rs +++ b/config/src/config/quorum_store_config.rs @@ -19,7 +19,6 @@ pub struct QuorumStoreBackPressureConfig { pub backlog_per_validator_batch_limit_count: u64, pub decrease_duration_ms: u64, pub increase_duration_ms: u64, - pub increase_fraction: f64, pub decrease_fraction: f64, pub dynamic_min_txn_per_s: u64, pub dynamic_max_txn_per_s: u64, @@ -35,7 +34,6 @@ impl Default for QuorumStoreBackPressureConfig { backlog_per_validator_batch_limit_count: 4, decrease_duration_ms: 1000, increase_duration_ms: 1000, - increase_fraction: 1.5, decrease_fraction: 0.5, dynamic_min_txn_per_s: 160, dynamic_max_txn_per_s: 4000, diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index 02f854587825f..a8032e450bc05 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -748,16 +748,6 @@ pub static NUM_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { .unwrap() }); -/// Histogram for the number of input txns in the committed blocks. -pub static NUM_INPUT_TXNS_PER_BLOCK: Lazy = Lazy::new(|| { - register_histogram!( - "aptos_consensus_num_input_txns_per_block", - "Histogram for the number of input txns per (committed) blocks.", - NUM_CONSENSUS_TRANSACTIONS_BUCKETS.to_vec() - ) - .unwrap() -}); - /// Histogram for the number of bytes in the committed blocks. pub static NUM_BYTES_PER_BLOCK: Lazy = Lazy::new(|| { register_histogram!( @@ -1069,8 +1059,6 @@ pub fn update_counters_for_committed_blocks(blocks_to_commit: &[Arc= back_pressure_increase_duration { back_pressure_increase_latest = tick_start; dynamic_pull_txn_per_s = std::cmp::min( - (dynamic_pull_txn_per_s as f64 * self.config.back_pressure.increase_fraction) as u64, + dynamic_pull_txn_per_s + self.config.back_pressure.dynamic_min_txn_per_s, self.config.back_pressure.dynamic_max_txn_per_s, ); trace!("QS: dynamic_max_pull_txn_per_s: {}", dynamic_pull_txn_per_s); diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index d271d0fd056eb..8918aeed1c8ca 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -23,7 +23,7 @@ pub const POS_DUPLICATE_LABEL: &str = "duplicate"; static TRANSACTION_COUNT_BUCKETS: Lazy> = Lazy::new(|| { exponential_buckets( - /*start=*/ 10.0, /*factor=*/ 1.5, /*count=*/ 25, + /*start=*/ 1.5, /*factor=*/ 1.5, /*count=*/ 25, ) .unwrap() }); @@ -36,6 +36,9 @@ static PROOF_COUNT_BUCKETS: Lazy> = Lazy::new(|| { .to_vec() }); +static BATCH_COUNT_BUCKETS: Lazy> = + Lazy::new(|| [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 20.0, 25.0].to_vec()); + static BYTE_BUCKETS: Lazy> = Lazy::new(|| { exponential_buckets( /*start=*/ 500.0, /*factor=*/ 1.5, /*count=*/ 25, @@ -112,7 +115,7 @@ pub static NUM_BATCH_PER_BLOCK: Lazy = Lazy::new(|| { register_histogram!( "quorum_store_num_batch_per_block", "Histogram for the number of batches per (committed) blocks.", - TRANSACTION_COUNT_BUCKETS.clone(), + BATCH_COUNT_BUCKETS.clone(), ) .unwrap() }); @@ -316,24 +319,6 @@ pub fn pos_to_commit(bucket: u64, secs: f64) { // Proof Queue ////////////////////// -pub static PULL_PROOFS_MAX_TXNS: Lazy = Lazy::new(|| { - register_histogram!( - "quorum_store_pull_proofs_max_txns", - "Histogram for the number of transactions pulled when pulling proofs", - TRANSACTION_COUNT_BUCKETS.clone(), - ) - .unwrap() -}); - -pub static PULL_PROOFS_MAX_BYTES: Lazy = Lazy::new(|| { - register_histogram!( - "quorum_store_pull_proofs_max_bytes", - "Histogram for the number of bytes pulled when pulling proofs", - BYTE_BUCKETS.clone(), - ) - .unwrap() -}); - pub static PROOFS_WITHOUT_BATCH_DATA: Lazy = Lazy::new(|| { register_int_gauge!( "quorum_store_proofs_without_batch_data", @@ -388,12 +373,10 @@ pub static NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new(|| { - register_histogram!( + register_avg_counter( "quorum_store_num_total_txns_left_on_update", "Histogram for the number of total txns left after adding or cleaning batches.", - TRANSACTION_COUNT_BUCKETS.clone() ) - .unwrap() }); pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES: Lazy = Lazy::new(|| { @@ -406,31 +389,26 @@ pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES: Lazy = L /// Histogram for the number of total batches/PoS left after adding or cleaning batches. pub static NUM_TOTAL_PROOFS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_histogram!( + register_avg_counter( "quorum_store_num_total_proofs_left_on_update", "Histogram for the number of total batches/PoS left after adding or cleaning batches.", - PROOF_COUNT_BUCKETS.clone() ) - .unwrap() }); /// Histogram for the number of local txns left after adding or cleaning batches. pub static NUM_LOCAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_histogram!( + register_avg_counter( "quorum_store_num_local_txns_left_on_update", "Histogram for the number of locally created txns left after adding or cleaning batches.", - TRANSACTION_COUNT_BUCKETS.clone() ) - .unwrap() }); /// Histogram for the number of local batches/PoS left after adding or cleaning batches. pub static NUM_LOCAL_PROOFS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { - register_histogram!( + register_avg_counter( "quorum_store_num_local_proofs_left_on_update", "Histogram for the number of locally created batches/PoS left after adding or cleaning batches.", - PROOF_COUNT_BUCKETS.clone() - ).unwrap() + ) }); /// Counters diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 722e2a70b4cf4..5034171f28e1b 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::quorum_store::utils::ProofQueue; -use aptos_consensus_types::proof_of_store::{BatchId, BatchInfo, ProofOfStore}; +use aptos_consensus_types::{ + common::TransactionSummary, + proof_of_store::{BatchId, BatchInfo, ProofOfStore}, +}; use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; use maplit::hashset; @@ -93,3 +96,66 @@ fn test_proof_queue_sorting() { assert_eq!(count_author_0, 2); assert_eq!(count_author_1, 2); } + +#[test] +fn test_proof_calculate_remaining_txns_and_proofs() { + let my_peer_id = PeerId::random(); + let mut proof_queue = ProofQueue::new(my_peer_id, 10, 10); + + let author_0 = PeerId::random(); + let author_1 = PeerId::random(); + + let author_0_batches = vec![ + proof_of_store(author_0, BatchId::new_for_test(0), 100), + proof_of_store(author_0, BatchId::new_for_test(1), 200), + proof_of_store(author_0, BatchId::new_for_test(2), 50), + proof_of_store(author_0, BatchId::new_for_test(3), 300), + ]; + let info_1 = author_0_batches[0].info().clone(); + let info_2 = author_0_batches[3].info().clone(); + proof_queue.add_batch_summaries(vec![(info_1, vec![TransactionSummary::new( + PeerId::ONE, + 1, + HashValue::zero(), + )])]); + for batch in author_0_batches { + proof_queue.push(batch); + } + + let author_1_batches = vec![ + proof_of_store(author_1, BatchId::new_for_test(4), 500), + proof_of_store(author_1, BatchId::new_for_test(5), 400), + proof_of_store(author_1, BatchId::new_for_test(6), 600), + proof_of_store(author_1, BatchId::new_for_test(7), 50), + ]; + let info_3 = author_1_batches[1].info().clone(); + let info_4 = author_1_batches[3].info().clone(); + for batch in author_1_batches { + proof_queue.push(batch); + } + assert_eq!(proof_queue.remaining_txns_and_proofs(), (8, 8)); + + proof_queue.add_batch_summaries(vec![(info_3, vec![TransactionSummary::new( + PeerId::ONE, + 1, + HashValue::zero(), + )])]); + + assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); + + proof_queue.add_batch_summaries(vec![(info_2, vec![TransactionSummary::new( + PeerId::ONE, + 2, + HashValue::zero(), + )])]); + + assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); + + proof_queue.add_batch_summaries(vec![(info_4, vec![TransactionSummary::new( + PeerId::ONE, + 2, + HashValue::zero(), + )])]); + + assert_eq!(proof_queue.remaining_txns_and_proofs(), (6, 8)); +} diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 300e305fe7852..03ca983fc66b7 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -196,7 +196,6 @@ impl Ord for BatchSortKey { #[derive(Debug)] pub enum ProofQueueCommand { // Proof manager sends this command to add the proofs to the proof queue - // We send back (remaining_txns, remaining_proofs) to the proof manager AddProofs(Vec), // Batch coordinator sends this command to add the received batches to the proof queue. // For each transaction, the proof queue stores the list of batches containing the transaction. @@ -212,7 +211,6 @@ pub enum ProofQueueCommand { }, // Proof manager sends this command to mark these batches as committed and // update the block timestamp. - // We send back the (remaining_txns, remaining_proofs) to the proof manager MarkCommitted(Vec, u64), } @@ -347,30 +345,25 @@ impl ProofQueue { self.inc_remaining(&author, num_txns); } - /// return true when quorum store is back pressured - pub(crate) fn qs_back_pressure_1(&self) -> BackPressure { - let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); - if remaining_total_txn_num > self.back_pressure_total_txn_limit { - info!( - "QuorumStore back pressured Adding Proofs: txn_count: {}, proof_count: {}", - remaining_total_txn_num, remaining_total_proof_num - ); - } - BackPressure { - txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, - proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, + pub(crate) fn add_batch_summaries( + &mut self, + batch_summaries: Vec<(BatchInfo, Vec)>, + ) { + for (batch_info, txn_summaries) in batch_summaries { + let batch_key = BatchKey::from_info(&batch_info); + for txn_summary in txn_summaries { + self.txn_summary_to_batches + .entry(txn_summary) + .or_default() + .insert(batch_key.clone()); + } + self.batches_with_txn_summary.insert(batch_key); } } /// return true when quorum store is back pressured - pub(crate) fn qs_back_pressure_2(&self) -> BackPressure { + pub(crate) fn qs_back_pressure(&self) -> BackPressure { let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); - if remaining_total_txn_num > self.back_pressure_total_txn_limit { - info!( - "QuorumStore back pressured Committed: txn_count: {}, proof_count: {}", - remaining_total_txn_num, remaining_total_proof_num - ); - } BackPressure { txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, @@ -395,9 +388,6 @@ impl ProofQueue { let mut excluded_txns = 0; let mut full = false; - counters::PULL_PROOFS_MAX_TXNS.observe(max_txns as f64); - counters::PULL_PROOFS_MAX_BYTES.observe(max_bytes as f64); - let mut iters = vec![]; for (_, batches) in self.author_to_batches.iter() { iters.push(batches.iter().rev()); @@ -606,7 +596,7 @@ impl ProofQueue { self.push(proof); } - let updated_back_pressure = self.qs_back_pressure_1(); + let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { back_pressure = updated_back_pressure; if back_pressure_tx.send(back_pressure).await.is_err() { @@ -635,7 +625,7 @@ impl ProofQueue { self.mark_committed(batches); self.handle_updated_block_timestamp(block_timestamp); - let updated_back_pressure = self.qs_back_pressure_2(); + let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { back_pressure = updated_back_pressure; if back_pressure_tx.send(back_pressure).await.is_err() { @@ -644,16 +634,7 @@ impl ProofQueue { } }, ProofQueueCommand::AddBatches(batch_summaries) => { - for (batch_info, txn_summaries) in batch_summaries { - let batch_key = BatchKey::from_info(&batch_info); - for txn_summary in txn_summaries { - self.txn_summary_to_batches - .entry(txn_summary) - .or_default() - .insert(batch_key.clone()); - } - self.batches_with_txn_summary.insert(batch_key); - } + self.add_batch_summaries(batch_summaries); }, } } diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 7cc337f1f806a..c4131fb3acc97 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -2442,35 +2442,10 @@ fn pfn_const_tps( 60 * 60 * 2 // 2 hours; avoid epoch changes which can introduce noise }; - // Increase the concurrency level - const USE_CRAZY_MACHINES: bool = false; - - let mut forge_config = ForgeConfig::default() - .with_initial_validator_count(NonZeroUsize::new(100).unwrap()) + ForgeConfig::default() + .with_initial_validator_count(NonZeroUsize::new(7).unwrap()) .with_initial_fullnode_count(7) - .with_validator_override_node_config_fn(Arc::new(|config, _| { - // Increase the state sync chunk sizes (consensus blocks are much larger than 1k) - optimize_state_sync_for_throughput(config); - - config.consensus_observer.publisher_enabled = true; - - // Increase the concurrency level - if USE_CRAZY_MACHINES { - config.execution.concurrency_level = 58; - } - })) - .with_fullnode_override_node_config_fn(Arc::new(|config, _| { - // Increase the state sync chunk sizes (consensus blocks are much larger than 1k) - optimize_state_sync_for_throughput(config); - - config.consensus_observer.observer_enabled = true; - - // Increase the concurrency level - if USE_CRAZY_MACHINES { - config.execution.concurrency_level = 58; - } - })) - .with_emit_job(EmitJobRequest::default().mode(EmitJobMode::ConstTps { tps: 5000 })) + .with_emit_job(EmitJobRequest::default().mode(EmitJobMode::ConstTps { tps: 100 })) .add_network_test(PFNPerformance::new( 7, add_cpu_chaos, @@ -2481,14 +2456,14 @@ fn pfn_const_tps( helm_values["chain"]["epoch_duration_secs"] = epoch_duration_secs.into(); })) .with_success_criteria( - SuccessCriteria::new(12000) + SuccessCriteria::new(95) .add_no_restarts() .add_max_expired_tps(0) .add_max_failed_submission_tps(0) // Percentile thresholds are set to +1 second of non-PFN tests. Should be revisited. - .add_latency_threshold(5., LatencyType::P50) - .add_latency_threshold(6., LatencyType::P90) - .add_latency_threshold(7., LatencyType::P99) + .add_latency_threshold(2.5, LatencyType::P50) + .add_latency_threshold(4., LatencyType::P90) + .add_latency_threshold(5., LatencyType::P99) .add_wait_for_catchup_s( // Give at least 60s for catchup and at most 10% of the run (duration.as_secs() / 10).max(60), @@ -2497,21 +2472,7 @@ fn pfn_const_tps( max_no_progress_secs: 10.0, max_round_gap: 4, }), - ); - - if USE_CRAZY_MACHINES { - forge_config = forge_config - .with_validator_resource_override(NodeResourceOverride { - cpu_cores: Some(58), - memory_gib: Some(200), - }) - .with_fullnode_resource_override(NodeResourceOverride { - cpu_cores: Some(58), - memory_gib: Some(200), - }) - } - - forge_config + ) } /// This test runs a performance benchmark where the network includes From 9c76004fc214ace62b0c6efba339683b19f8c610 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 18 Jun 2024 18:50:41 -0700 Subject: [PATCH 39/67] Remove an unrelated change --- mempool/src/core_mempool/mempool.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/mempool/src/core_mempool/mempool.rs b/mempool/src/core_mempool/mempool.rs index 08aaba6f5d7b6..c39b6f1bebc06 100644 --- a/mempool/src/core_mempool/mempool.rs +++ b/mempool/src/core_mempool/mempool.rs @@ -355,7 +355,6 @@ impl Mempool { while skipped.contains(&skipped_txn) { inserted.insert(skipped_txn); result.push(skipped_txn); - skipped.remove(&skipped_txn); if (result.len() as u64) == max_txns { break 'main; } From 21bb0ad9beea2b53f626c71b80a733ac36793150 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 24 Jun 2024 10:57:28 -0700 Subject: [PATCH 40/67] Addressing PR comments --- consensus/src/counters.rs | 6 +++--- consensus/src/quorum_store/counters.rs | 26 +++++++++++--------------- consensus/src/quorum_store/utils.rs | 6 +++--- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/consensus/src/counters.rs b/consensus/src/counters.rs index a8032e450bc05..b08f89c579d51 100644 --- a/consensus/src/counters.rs +++ b/consensus/src/counters.rs @@ -733,9 +733,9 @@ pub static NUM_BLOCKS_IN_PIPELINE: Lazy = Lazy::new(|| { // .unwrap() // }); -const NUM_CONSENSUS_TRANSACTIONS_BUCKETS: [f64; 21] = [ - 5.0, 10.0, 20.0, 40.0, 75.0, 100.0, 200.0, 400.0, 800.0, 1200.0, 1400.0, 1500.0, 1600.0, - 1700.0, 1800.0, 1900.0, 2500.0, 3300.0, 4000.0, 5000.0, 6500.0, +const NUM_CONSENSUS_TRANSACTIONS_BUCKETS: [f64; 24] = [ + 5.0, 10.0, 20.0, 40.0, 75.0, 100.0, 200.0, 400.0, 800.0, 1200.0, 1800.0, 2500.0, 3300.0, + 4000.0, 5000.0, 6500.0, 8000.0, 10000.0, 12500.0, 15000.0, 18000.0, 21000.0, 25000.0, 30000.0, ]; /// Histogram for the number of txns per (committed) blocks. diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 8918aeed1c8ca..17417ddffe4af 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -115,7 +115,7 @@ pub static NUM_BATCH_PER_BLOCK: Lazy = Lazy::new(|| { register_histogram!( "quorum_store_num_batch_per_block", "Histogram for the number of batches per (committed) blocks.", - BATCH_COUNT_BUCKETS.clone(), + TRANSACTION_COUNT_BUCKETS.clone(), ) .unwrap() }); @@ -351,25 +351,21 @@ pub static PROOFS_IN_PROOF_QUEUE: Lazy = Lazy::new(|| { .unwrap() }); -pub static NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( - || { - register_histogram!( - "quorum_store_num_proofs_left_in_proof_queue_after_proposal_generation", +pub static NUM_PROOFS_IN_PROOF_QUEUE_AFTER_PULL: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_num_proofs_left_in_proof_queue_after_pull", "Histogram for the number of proofs left in the proof queue after block proposal generation.", PROOF_COUNT_BUCKETS.clone(), ).unwrap() - }, -); +}); -pub static NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION: Lazy = Lazy::new( - || { - register_histogram!( - "quorum_store_num_txns_left_in_proof_queue_after_proposal_generation", +pub static NUM_TXNS_IN_PROOF_QUEUE_AFTER_PULL: Lazy = Lazy::new(|| { + register_histogram!( + "quorum_store_num_txns_left_in_proof_queue_after_pull", "Histogram for the number of transactions left in the proof queue after block proposal generation.", TRANSACTION_COUNT_BUCKETS.clone(), ).unwrap() - }, -); +}); /// Histogram for the number of total txns left after adding or cleaning batches. pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { @@ -379,9 +375,9 @@ pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { ) }); -pub static NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES: Lazy = Lazy::new(|| { +pub static NUM_UNIQUE_TOTAL_TXNS_LEFT_ON_UPDATE: Lazy = Lazy::new(|| { register_histogram!( - "quorum_store_num_total_txns_left_on_update_without_duplicates", + "quorum_store_num_unique_total_txns_left_on_update", "Histogram for the number of total txns left after adding or cleaning batches, without duplicates.", TRANSACTION_COUNT_BUCKETS.clone() ).unwrap() diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 03ca983fc66b7..a69038742433e 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -464,9 +464,9 @@ impl ProofQueue { "cur_txns: {}, remaining_proofs: {:?}", cur_txns, remaining_proofs ); - counters::NUM_PROOFS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + counters::NUM_PROOFS_IN_PROOF_QUEUE_AFTER_PULL .observe(num_proofs_remaining_after_pull as f64); - counters::NUM_TXNS_LEFT_IN_PROOF_QUEUE_AFTER_PROPOSAL_GENERATION + counters::NUM_TXNS_IN_PROOF_QUEUE_AFTER_PULL .observe(num_txns_remaining_after_pull as f64); // Stable sort, so the order of proofs within an author will not change. @@ -522,7 +522,7 @@ impl ProofQueue { counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); counters::NUM_LOCAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_local_proofs as f64); let remaining_txns_without_duplicates = self.remaining_txns_without_duplicates(); - counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE_WITHOUT_DUPLICATES + counters::NUM_UNIQUE_TOTAL_TXNS_LEFT_ON_UPDATE .observe(remaining_txns_without_duplicates as f64); //count the number of transactions with more than one batches counters::TXNS_WITH_DUPLICATE_BATCHES.set( From 6eaafb2500f0636dfe0d773672a0f305965c927e Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 10:40:14 -0700 Subject: [PATCH 41/67] Addressing PR comments --- consensus/src/quorum_store/counters.rs | 14 +++++++++++--- consensus/src/quorum_store/utils.rs | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 17417ddffe4af..8e445bfae0721 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -36,9 +36,6 @@ static PROOF_COUNT_BUCKETS: Lazy> = Lazy::new(|| { .to_vec() }); -static BATCH_COUNT_BUCKETS: Lazy> = - Lazy::new(|| [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 20.0, 25.0].to_vec()); - static BYTE_BUCKETS: Lazy> = Lazy::new(|| { exponential_buckets( /*start=*/ 500.0, /*factor=*/ 1.5, /*count=*/ 25, @@ -93,6 +90,17 @@ pub static PROOF_MANAGER_MAIN_LOOP: Lazy = Lazy::new(|| { ) }); +/// Duration of each run of the event loop. +pub static PROOF_QUEUE_MAIN_LOOP: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_main_loop", + "Duration of the each run of the proof queue event loop" + ) + .unwrap(), + ) +}); + /// Duration of each run of the event loop. pub static BATCH_GENERATOR_MAIN_LOOP: Lazy = Lazy::new(|| { DurationHistogram::new( diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index a69038742433e..892fc721c7ec5 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -588,7 +588,7 @@ impl ProofQueue { }; loop { - let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); + let _timer = counters::PROOF_QUEUE_MAIN_LOOP.start_timer(); if let Some(msg) = command_rx.recv().await { match msg { ProofQueueCommand::AddProofs(proofs) => { From 46f18f35b7c4df890a1bd9c79fd2793ab39dbbd1 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 11:21:23 -0700 Subject: [PATCH 42/67] Add some timer counters --- consensus/src/quorum_store/counters.rs | 40 ++++++++++++++++++++++++++ consensus/src/quorum_store/utils.rs | 9 ++++++ 2 files changed, 49 insertions(+) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 8e445bfae0721..20d1a6b4c52b5 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -101,6 +101,46 @@ pub static PROOF_QUEUE_MAIN_LOOP: Lazy = Lazy::new(|| { ) }); +pub static PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_add_batch_summaries_duration", + "Duration of adding batch summaries to proof queue" + ) + .unwrap(), + ) +}); + +pub static PROOF_QUEUE_ADD_PROOFS_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_add_proofs_duration", + "Duration of adding proofs to proof queue" + ) + .unwrap(), + ) +}); + +pub static PROOF_QUEUE_COMMIT_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_commit_duration", + "Duration of committing proofs from proof queue" + ) + .unwrap(), + ) +}); + +pub static PROOF_QUEUE_REMAINING_TXNS_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_remaining_txns_duration", + "Duration of calculating remaining txns in proof queue" + ) + .unwrap(), + ) +}); + /// Duration of each run of the event loop. pub static BATCH_GENERATOR_MAIN_LOOP: Lazy = Lazy::new(|| { DurationHistogram::new( diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 892fc721c7ec5..a305d7be39b2b 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -517,6 +517,7 @@ impl ProofQueue { } pub(crate) fn remaining_txns_and_proofs(&self) -> (u64, u64) { + let start = Instant::now(); counters::NUM_TOTAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_txns_with_duplicates as f64); counters::NUM_TOTAL_PROOFS_LEFT_ON_UPDATE.observe(self.remaining_proofs as f64); counters::NUM_LOCAL_TXNS_LEFT_ON_UPDATE.observe(self.remaining_local_txns as f64); @@ -554,6 +555,7 @@ impl ProofQueue { .map(|proof| if proof.is_some() { 1 } else { 0 }) .sum::(), ); + counters::PROOF_QUEUE_REMAINING_TXNS_DURATION.observe_duration(start.elapsed()); (remaining_txns_without_duplicates, self.remaining_proofs) } @@ -592,9 +594,11 @@ impl ProofQueue { if let Some(msg) = command_rx.recv().await { match msg { ProofQueueCommand::AddProofs(proofs) => { + let start = Instant::now(); for proof in proofs { self.push(proof); } + counters::PROOF_QUEUE_ADD_PROOFS_DURATION.observe_duration(start.elapsed()); let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { @@ -622,8 +626,10 @@ impl ProofQueue { } }, ProofQueueCommand::MarkCommitted(batches, block_timestamp) => { + let start = Instant::now(); self.mark_committed(batches); self.handle_updated_block_timestamp(block_timestamp); + counters::PROOF_QUEUE_COMMIT_DURATION.observe_duration(start.elapsed()); let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { @@ -634,7 +640,10 @@ impl ProofQueue { } }, ProofQueueCommand::AddBatches(batch_summaries) => { + let start = Instant::now(); self.add_batch_summaries(batch_summaries); + counters::PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION + .observe_duration(start.elapsed()); }, } } From b0f73fdfbe46a48d824cf98448fa1951b1b4063a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 13:02:05 -0700 Subject: [PATCH 43/67] Add more timer counters --- consensus/src/quorum_store/counters.rs | 10 ++++++++++ consensus/src/quorum_store/utils.rs | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 20d1a6b4c52b5..05074e14245e4 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -131,6 +131,16 @@ pub static PROOF_QUEUE_COMMIT_DURATION: Lazy = Lazy::new(|| { ) }); +pub static PROOF_QUEUE_UPDATE_TIMESTAMP_DURATION: Lazy = Lazy::new(|| { + DurationHistogram::new( + register_histogram!( + "quorum_store_proof_queue_update_block_timestamp_duration", + "Duration of updating block timestamp in proof queue" + ) + .unwrap(), + ) +}); + pub static PROOF_QUEUE_REMAINING_TXNS_DURATION: Lazy = Lazy::new(|| { DurationHistogram::new( register_histogram!( diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index a305d7be39b2b..83bfa7402a67a 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -628,9 +628,13 @@ impl ProofQueue { ProofQueueCommand::MarkCommitted(batches, block_timestamp) => { let start = Instant::now(); self.mark_committed(batches); - self.handle_updated_block_timestamp(block_timestamp); counters::PROOF_QUEUE_COMMIT_DURATION.observe_duration(start.elapsed()); + let start = Instant::now(); + self.handle_updated_block_timestamp(block_timestamp); + counters::PROOF_QUEUE_UPDATE_TIMESTAMP_DURATION + .observe_duration(start.elapsed()); + let updated_back_pressure = self.qs_back_pressure(); if updated_back_pressure != back_pressure { back_pressure = updated_back_pressure; From 2bd92e29787b3e0fd7641fa3b812063b3a06cf76 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 13:21:13 -0700 Subject: [PATCH 44/67] Minor optimization --- consensus/src/quorum_store/utils.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 83bfa7402a67a..1f87c6946c266 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -572,11 +572,17 @@ impl ProofQueue { } self.batch_to_proof.insert(batch_key.clone(), None); self.batches_with_txn_summary.remove(&batch_key); - self.txn_summary_to_batches.retain(|_, batches| { - batches.remove(&batch_key); - !batches.is_empty() - }); } + let batch_keys = batches + .iter() + .map(BatchKey::from_info) + .collect::>(); + self.txn_summary_to_batches.retain(|_, batches| { + for batch_key in &batch_keys { + batches.remove(batch_key); + } + !batches.is_empty() + }); } pub async fn start( From 51dc66b78b6454c28bc06f1a596c8893948b7235 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 16:08:48 -0700 Subject: [PATCH 45/67] Proof queue to be part of proof manager --- .../src/quorum_store/batch_coordinator.rs | 19 +- consensus/src/quorum_store/proof_manager.rs | 242 +++++++++--------- .../src/quorum_store/quorum_store_builder.rs | 29 +-- .../quorum_store/tests/proof_manager_test.rs | 88 +++---- consensus/src/quorum_store/tests/utils.rs | 4 +- consensus/src/quorum_store/types.rs | 26 +- consensus/src/quorum_store/utils.rs | 120 +-------- 7 files changed, 197 insertions(+), 331 deletions(-) diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index 012863432d70f..6bc8f2fc15eb5 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use super::utils::ProofQueueCommand; use crate::{ network::{NetworkSender, QuorumStoreSender}, quorum_store::{ @@ -33,7 +32,6 @@ pub struct BatchCoordinator { network_sender: Arc, sender_to_proof_manager: Arc>, sender_to_batch_generator: Arc>, - sender_to_proof_queue: Arc>, batch_store: Arc, max_batch_txns: u64, max_batch_bytes: u64, @@ -47,7 +45,6 @@ impl BatchCoordinator { network_sender: NetworkSender, sender_to_proof_manager: Sender, sender_to_batch_generator: Sender, - sender_to_proof_queue: Arc>, batch_store: Arc, max_batch_txns: u64, max_batch_bytes: u64, @@ -59,7 +56,6 @@ impl BatchCoordinator { network_sender: Arc::new(network_sender), sender_to_proof_manager: Arc::new(sender_to_proof_manager), sender_to_batch_generator: Arc::new(sender_to_batch_generator), - sender_to_proof_queue, batch_store, max_batch_txns, max_batch_bytes, @@ -80,7 +76,12 @@ impl BatchCoordinator { let peer_id = persist_requests[0].author(); let batches = persist_requests .iter() - .map(|persisted_value| persisted_value.batch_info().clone()) + .map(|persisted_value| { + ( + persisted_value.batch_info().clone(), + persisted_value.summary(), + ) + }) .collect(); let signed_batch_infos = batch_store.persist(persist_requests); if !signed_batch_infos.is_empty() { @@ -138,10 +139,6 @@ impl BatchCoordinator { } let mut persist_requests = vec![]; - let batches_summary = batches - .iter() - .map(|batch| (batch.batch_info().clone(), batch.summary())) - .collect(); for batch in batches.into_iter() { // TODO: maybe don't message batch generator if the persist is unsuccessful? if let Err(e) = self @@ -153,10 +150,6 @@ impl BatchCoordinator { } persist_requests.push(batch.into()); } - self.sender_to_proof_queue - .send(ProofQueueCommand::AddBatches(batches_summary)) - .await - .expect("Failed to send NewBatches to ProofQueue"); counters::RECEIVED_BATCH_COUNT.inc_by(persist_requests.len() as u64); if author != self.my_peer_id { counters::RECEIVED_REMOTE_BATCH_COUNT.inc_by(persist_requests.len() as u64); diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index ce31f6404424b..92db27fc43c05 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -1,35 +1,35 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 +use super::batch_store::BatchStore; use crate::{ monitor, quorum_store::{ - batch_store::BatchStore, + batch_generator::BackPressure, counters, - utils::{BatchSortKey, ProofQueueCommand}, + utils::{BatchSortKey, ProofQueue}, }, }; use aptos_consensus_types::{ - common::{Payload, PayloadFilter, ProofWithData}, + common::{Payload, PayloadFilter, ProofWithData, TransactionSummary}, proof_of_store::{BatchInfo, ProofOfStore, ProofOfStoreMsg}, request_response::{GetPayloadCommand, GetPayloadResponse}, }; use aptos_logger::prelude::*; use aptos_types::{transaction::SignedTransaction, PeerId}; use futures::StreamExt; -use futures_channel::{mpsc::Receiver, oneshot}; +use futures_channel::mpsc::Receiver; use rand::{seq::SliceRandom, thread_rng}; use std::{ cmp::min, collections::{BTreeMap, HashMap, HashSet}, sync::Arc, }; -use tokio::sync::mpsc::Sender; #[derive(Debug)] pub enum ProofManagerCommand { ReceiveProofs(ProofOfStoreMsg), - ReceiveBatches(Vec), + ReceiveBatches(Vec<(BatchInfo, Vec)>), CommitNotification(u64, Vec), Shutdown(tokio::sync::oneshot::Sender<()>), } @@ -129,46 +129,59 @@ impl BatchQueue { } pub struct ProofManager { + proofs_for_consensus: ProofQueue, batch_queue: BatchQueue, + back_pressure_total_txn_limit: u64, + remaining_total_txn_num: u64, + back_pressure_total_proof_limit: u64, + remaining_total_proof_num: u64, allow_batches_without_pos_in_proposal: bool, - proof_queue_tx: Arc>, } impl ProofManager { pub fn new( + my_peer_id: PeerId, + back_pressure_total_txn_limit: u64, + back_pressure_total_proof_limit: u64, batch_store: Arc, allow_batches_without_pos_in_proposal: bool, - proof_queue_tx: Arc>, ) -> Self { Self { + proofs_for_consensus: ProofQueue::new(my_peer_id), batch_queue: BatchQueue::new(batch_store), + back_pressure_total_txn_limit, + remaining_total_txn_num: 0, + back_pressure_total_proof_limit, + remaining_total_proof_num: 0, allow_batches_without_pos_in_proposal, - proof_queue_tx, } } - pub(crate) async fn receive_proofs(&mut self, proofs: Vec) { - for proof in &proofs { + pub(crate) fn receive_proofs(&mut self, proofs: Vec) { + for proof in proofs.into_iter() { self.batch_queue.remove_batch(proof.info()); + self.proofs_for_consensus.push(proof); } - if !proofs.is_empty() { - if let Err(e) = self - .proof_queue_tx - .send(ProofQueueCommand::AddProofs(proofs)) - .await - { - warn!("Failed to add proofs to proof queue with error: {:?}", e); - } - } + (self.remaining_total_txn_num, self.remaining_total_proof_num) = + self.proofs_for_consensus.remaining_txns_and_proofs(); } - pub(crate) fn receive_batches(&mut self, batches: Vec) { + pub(crate) fn receive_batches( + &mut self, + batch_summaries: Vec<(BatchInfo, Vec)>, + ) { if self.allow_batches_without_pos_in_proposal { + let batches = batch_summaries + .iter() + .map(|(batch_info, _)| batch_info.clone()) + .collect(); self.batch_queue.add_batches(batches); } + self.proofs_for_consensus + .add_batch_summaries(batch_summaries); } - pub(crate) async fn handle_commit_notification( + pub(crate) fn handle_commit_notification( &mut self, block_timestamp: u64, batches: Vec, @@ -181,20 +194,14 @@ impl ProofManager { for batch in &batches { self.batch_queue.remove_batch(batch); } - - if let Err(e) = self - .proof_queue_tx - .send(ProofQueueCommand::MarkCommitted(batches, block_timestamp)) - .await - { - warn!( - "Failed to mark proofs as committed in proof queue with error: {:?}", - e - ); - } + self.proofs_for_consensus.mark_committed(batches); + self.proofs_for_consensus + .handle_updated_block_timestamp(block_timestamp); + (self.remaining_total_txn_num, self.remaining_total_proof_num) = + self.proofs_for_consensus.remaining_txns_and_proofs(); } - pub(crate) async fn handle_proposal_request(&mut self, msg: GetPayloadCommand) { + pub(crate) fn handle_proposal_request(&mut self, msg: GetPayloadCommand) { match msg { GetPayloadCommand::GetPayloadRequest( max_txns, @@ -213,103 +220,101 @@ impl ProofManager { PayloadFilter::InQuorumStore(proofs) => proofs, }; - let (response_tx, response_rx) = oneshot::channel(); - if self - .proof_queue_tx - .send(ProofQueueCommand::PullProofs { - excluded_batches: excluded_batches.clone(), - max_txns, - max_bytes, - return_non_full, - response_sender: response_tx, - }) - .await - .is_ok() - { - match response_rx.await { - Ok((proof_block, proof_queue_fully_utilized)) => { - counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE - .observe(self.batch_queue.len() as f64); - counters::PROOF_QUEUE_FULLY_UTILIZED - .observe(if proof_queue_fully_utilized { 1.0 } else { 0.0 }); + let (proof_block, proof_queue_fully_utilized) = self + .proofs_for_consensus + .pull_proofs(&excluded_batches, max_txns, max_bytes, return_non_full); - let mut inline_block: Vec<(BatchInfo, Vec)> = vec![]; - let cur_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); - let cur_bytes: u64 = proof_block.iter().map(|p| p.num_bytes()).sum(); + counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE.observe(self.batch_queue.len() as f64); + counters::PROOF_QUEUE_FULLY_UTILIZED + .observe(if proof_queue_fully_utilized { 1.0 } else { 0.0 }); - if self.allow_batches_without_pos_in_proposal - && proof_queue_fully_utilized - { - inline_block = self.batch_queue.pull_batches( - min(max_txns - cur_txns, max_inline_txns), - min(max_bytes - cur_bytes, max_inline_bytes), - excluded_batches - .iter() - .cloned() - .chain(proof_block.iter().map(|proof| proof.info().clone())) - .collect(), - ); - } - let inline_txns = inline_block - .iter() - .map(|(_, txns)| txns.len()) - .sum::(); - counters::NUM_INLINE_BATCHES.observe(inline_block.len() as f64); - counters::NUM_INLINE_TXNS.observe(inline_txns as f64); + let mut inline_block: Vec<(BatchInfo, Vec)> = vec![]; + let cur_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); + let cur_bytes: u64 = proof_block.iter().map(|p| p.num_bytes()).sum(); - let res = GetPayloadResponse::GetPayloadResponse( - if proof_block.is_empty() && inline_block.is_empty() { - Payload::empty(true, self.allow_batches_without_pos_in_proposal) - } else if inline_block.is_empty() { - trace!( - "QS: GetBlockRequest excluded len {}, block len {}", - excluded_batches.len(), - proof_block.len() - ); - Payload::InQuorumStore(ProofWithData::new(proof_block)) - } else { - trace!( - "QS: GetBlockRequest excluded len {}, block len {}, inline len {}", - excluded_batches.len(), - proof_block.len(), - inline_block.len() - ); - Payload::QuorumStoreInlineHybrid( - inline_block, - ProofWithData::new(proof_block), - None, - ) - }, - ); - match callback.send(Ok(res)) { - Ok(_) => (), - Err(err) => { - debug!("BlockResponse receiver not available! error {:?}", err) - }, - } - }, - Err(e) => { - warn!("Failed to get response from ProofQueue after sending PullProofs command. {:?}", e); - }, - } - } else { - warn!("Failed to get remaining total num from proof queue"); + if self.allow_batches_without_pos_in_proposal && proof_queue_fully_utilized { + inline_block = self.batch_queue.pull_batches( + min(max_txns - cur_txns, max_inline_txns), + min(max_bytes - cur_bytes, max_inline_bytes), + excluded_batches + .iter() + .cloned() + .chain(proof_block.iter().map(|proof| proof.info().clone())) + .collect(), + ); + } + let inline_txns = inline_block + .iter() + .map(|(_, txns)| txns.len()) + .sum::(); + counters::NUM_INLINE_BATCHES.observe(inline_block.len() as f64); + counters::NUM_INLINE_TXNS.observe(inline_txns as f64); + + let res = GetPayloadResponse::GetPayloadResponse( + if proof_block.is_empty() && inline_block.is_empty() { + Payload::empty(true, self.allow_batches_without_pos_in_proposal) + } else if inline_block.is_empty() { + trace!( + "QS: GetBlockRequest excluded len {}, block len {}", + excluded_batches.len(), + proof_block.len() + ); + Payload::InQuorumStore(ProofWithData::new(proof_block)) + } else { + trace!( + "QS: GetBlockRequest excluded len {}, block len {}, inline len {}", + excluded_batches.len(), + proof_block.len(), + inline_block.len() + ); + Payload::QuorumStoreInlineHybrid( + inline_block, + ProofWithData::new(proof_block), + None, + ) + }, + ); + match callback.send(Ok(res)) { + Ok(_) => (), + Err(err) => debug!("BlockResponse receiver not available! error {:?}", err), } }, } } + /// return true when quorum store is back pressured + pub(crate) fn qs_back_pressure(&self) -> BackPressure { + BackPressure { + txn_count: self.remaining_total_txn_num > self.back_pressure_total_txn_limit, + proof_count: self.remaining_total_proof_num > self.back_pressure_total_proof_limit, + } + } + pub async fn start( mut self, + back_pressure_tx: tokio::sync::mpsc::Sender, mut proposal_rx: Receiver, mut proof_rx: tokio::sync::mpsc::Receiver, ) { + let mut back_pressure = BackPressure { + txn_count: false, + proof_count: false, + }; + loop { let _timer = counters::PROOF_MANAGER_MAIN_LOOP.start_timer(); tokio::select! { Some(msg) = proposal_rx.next() => monitor!("proof_manager_handle_proposal", { - self.handle_proposal_request(msg).await; + self.handle_proposal_request(msg); + + let updated_back_pressure = self.qs_back_pressure(); + if updated_back_pressure != back_pressure { + back_pressure = updated_back_pressure; + if back_pressure_tx.send(back_pressure).await.is_err() { + debug!("Failed to send back_pressure for proposal"); + } + } }), Some(msg) = proof_rx.recv() => { monitor!("proof_manager_handle_command", { @@ -321,7 +326,7 @@ impl ProofManager { break; }, ProofManagerCommand::ReceiveProofs(proofs) => { - self.receive_proofs(proofs.take()).await; + self.receive_proofs(proofs.take()); }, ProofManagerCommand::ReceiveBatches(batches) => { self.receive_batches(batches); @@ -330,9 +335,16 @@ impl ProofManager { self.handle_commit_notification( block_timestamp, batches, - ).await; + ); }, } + let updated_back_pressure = self.qs_back_pressure(); + if updated_back_pressure != back_pressure { + back_pressure = updated_back_pressure; + if back_pressure_tx.send(back_pressure).await.is_err() { + debug!("Failed to send back_pressure for commit notification"); + } + } }) } } diff --git a/consensus/src/quorum_store/quorum_store_builder.rs b/consensus/src/quorum_store/quorum_store_builder.rs index 7891194df99f6..1dca357991396 100644 --- a/consensus/src/quorum_store/quorum_store_builder.rs +++ b/consensus/src/quorum_store/quorum_store_builder.rs @@ -20,7 +20,6 @@ use crate::{ proof_manager::{ProofManager, ProofManagerCommand}, quorum_store_coordinator::{CoordinatorCommand, QuorumStoreCoordinator}, types::{Batch, BatchResponse}, - utils::{ProofQueue, ProofQueueCommand}, }, round_manager::VerifiedEvent, }; @@ -139,8 +138,6 @@ pub struct InnerBuilder { proof_coordinator_cmd_rx: Option>, proof_manager_cmd_tx: tokio::sync::mpsc::Sender, proof_manager_cmd_rx: Option>, - proof_queue_cmd_tx: Arc>, - proof_queue_cmd_rx: Option>, back_pressure_tx: tokio::sync::mpsc::Sender, back_pressure_rx: Option>, quorum_store_storage: Arc, @@ -184,7 +181,6 @@ impl InnerBuilder { config.channel_size, None, ); - let (proof_queue_tx, proof_queue_rx) = tokio::sync::mpsc::channel(config.channel_size); let mut remote_batch_coordinator_cmd_tx = Vec::new(); let mut remote_batch_coordinator_cmd_rx = Vec::new(); for _ in 0..config.num_workers_for_remote_batches { @@ -215,8 +211,6 @@ impl InnerBuilder { proof_coordinator_cmd_rx: Some(proof_coordinator_cmd_rx), proof_manager_cmd_tx, proof_manager_cmd_rx: Some(proof_manager_cmd_rx), - proof_queue_cmd_tx: Arc::new(proof_queue_tx), - proof_queue_cmd_rx: Some(proof_queue_rx), back_pressure_tx, back_pressure_rx: Some(back_pressure_rx), quorum_store_storage, @@ -320,20 +314,6 @@ impl InnerBuilder { ) ); - let proof_queue = ProofQueue::new( - self.author, - self.config.back_pressure.backlog_txn_limit_count, - self.config - .back_pressure - .backlog_per_validator_batch_limit_count - * self.num_validators, - ); - let proof_queue_cmd_rx = self.proof_queue_cmd_rx.take().unwrap(); - spawn_named!( - "proof_queue", - proof_queue.start(self.back_pressure_tx.clone(), proof_queue_cmd_rx) - ); - for (i, remote_batch_coordinator_cmd_rx) in self.remote_batch_coordinator_cmd_rx.into_iter().enumerate() { @@ -342,7 +322,6 @@ impl InnerBuilder { self.network_sender.clone(), self.proof_manager_cmd_tx.clone(), self.batch_generator_cmd_tx.clone(), - self.proof_queue_cmd_tx.clone(), self.batch_store.clone().unwrap(), self.config.receiver_max_batch_txns as u64, self.config.receiver_max_batch_bytes as u64, @@ -377,13 +356,19 @@ impl InnerBuilder { let proof_manager_cmd_rx = self.proof_manager_cmd_rx.take().unwrap(); let proof_manager = ProofManager::new( + self.author, + self.config.back_pressure.backlog_txn_limit_count, + self.config + .back_pressure + .backlog_per_validator_batch_limit_count + * self.num_validators, self.batch_store.clone().unwrap(), self.config.allow_batches_without_pos_in_proposal, - self.proof_queue_cmd_tx.clone(), ); spawn_named!( "proof_manager", proof_manager.start( + self.back_pressure_tx.clone(), self.consensus_to_quorum_store_receiver, proof_manager_cmd_rx, ) diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index 25595e8a21853..812a854f62d9c 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::quorum_store::{ - proof_manager::ProofManager, tests::batch_store_test::batch_store_for_test, utils::ProofQueue, + proof_manager::ProofManager, tests::batch_store_test::batch_store_for_test, }; use aptos_consensus_types::{ common::{Payload, PayloadFilter}, @@ -12,15 +12,11 @@ use aptos_consensus_types::{ use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; use futures::channel::oneshot; -use std::{collections::HashSet, sync::Arc}; +use std::collections::HashSet; -async fn create_proof_manager() -> ProofManager { - let (proof_queue_tx, proof_queue_rx) = tokio::sync::mpsc::channel(100); - let proof_queue = ProofQueue::new(PeerId::random(), 10, 10); - let (backpressure_tx, _) = tokio::sync::mpsc::channel(10); - tokio::spawn(proof_queue.start(backpressure_tx, proof_queue_rx)); +fn create_proof_manager() -> ProofManager { let batch_store = batch_store_for_test(5 * 1024 * 1024); - ProofManager::new(batch_store, true, Arc::new(proof_queue_tx)) + ProofManager::new(PeerId::random(), 10, 10, batch_store, true) } fn create_proof(author: PeerId, expiration: u64, batch_sequence: u64) -> ProofOfStore { @@ -66,7 +62,7 @@ async fn get_proposal( PayloadFilter::InQuorumStore(filter_set), callback_tx, ); - proof_manager.handle_proposal_request(req).await; + proof_manager.handle_proposal_request(req); let GetPayloadResponse::GetPayloadResponse(payload) = callback_rx.await.unwrap().unwrap(); payload } @@ -117,20 +113,20 @@ async fn get_proposal_and_assert( #[tokio::test] async fn test_block_request() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]).await; + proof_manager.receive_proofs(vec![proof.clone()]); get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof]).await; } #[tokio::test] async fn test_max_txns_from_block_to_execute() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]).await; + proof_manager.receive_proofs(vec![proof.clone()]); let payload = get_proposal(&mut proof_manager, 100, &[]).await; // convert payload to v2 format and assert @@ -144,53 +140,45 @@ async fn test_max_txns_from_block_to_execute() { #[tokio::test] async fn test_block_timestamp_expiration() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let proof = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof.clone()]).await; + proof_manager.receive_proofs(vec![proof.clone()]); - proof_manager.handle_commit_notification(1, vec![]).await; + proof_manager.handle_commit_notification(1, vec![]); get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof]).await; - proof_manager.handle_commit_notification(20, vec![]).await; + proof_manager.handle_commit_notification(20, vec![]); get_proposal_and_assert(&mut proof_manager, 100, &[], &[]).await; } #[tokio::test] async fn test_batch_commit() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let proof0 = create_proof(PeerId::random(), 10, 1); - proof_manager.receive_proofs(vec![proof0.clone()]).await; + proof_manager.receive_proofs(vec![proof0.clone()]); let proof1 = create_proof(PeerId::random(), 11, 2); - proof_manager.receive_proofs(vec![proof1.clone()]).await; + proof_manager.receive_proofs(vec![proof1.clone()]); - proof_manager - .handle_commit_notification(1, vec![proof1.info().clone()]) - .await; + proof_manager.handle_commit_notification(1, vec![proof1.info().clone()]); get_proposal_and_assert(&mut proof_manager, 100, &[], &vec![proof0]).await; } #[tokio::test] async fn test_proposal_priority() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let peer0 = PeerId::random(); let peer0_proof0 = create_proof_with_gas(peer0, 10, 2, 1000); let peer0_proof1 = create_proof_with_gas(peer0, 10, 1, 0); - proof_manager - .receive_proofs(vec![peer0_proof1.clone(), peer0_proof0.clone()]) - .await; + proof_manager.receive_proofs(vec![peer0_proof1.clone(), peer0_proof0.clone()]); let peer0_proof2 = create_proof_with_gas(peer0, 10, 4, 500); - proof_manager - .receive_proofs(vec![peer0_proof2.clone()]) - .await; + proof_manager.receive_proofs(vec![peer0_proof2.clone()]); let peer0_proof3 = create_proof_with_gas(peer0, 10, 3, 500); - proof_manager - .receive_proofs(vec![peer0_proof3.clone()]) - .await; + proof_manager.receive_proofs(vec![peer0_proof3.clone()]); // Gas bucket is the most significant prioritization let expected = vec![peer0_proof0.clone()]; @@ -209,21 +197,19 @@ async fn test_proposal_priority() { #[tokio::test] async fn test_proposal_fairness() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let peer0 = PeerId::random(); let peer1 = PeerId::random(); let mut peer0_proofs = vec![]; for i in 0..4 { let proof = create_proof(peer0, 10 + i, 1 + i); - proof_manager.receive_proofs(vec![proof.clone()]).await; + proof_manager.receive_proofs(vec![proof.clone()]); peer0_proofs.push(proof); } let peer1_proof_0 = create_proof(peer1, 7, 1); - proof_manager - .receive_proofs(vec![peer1_proof_0.clone()]) - .await; + proof_manager.receive_proofs(vec![peer1_proof_0.clone()]); // Without filter, and large max size, all proofs are retrieved let mut expected = peer0_proofs.clone(); @@ -251,7 +237,7 @@ async fn test_proposal_fairness() { #[tokio::test] async fn test_duplicate_batches_on_commit() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let author = PeerId::random(); let digest = HashValue::random(); @@ -261,32 +247,30 @@ async fn test_duplicate_batches_on_commit() { let proof1 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); let proof2 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); - proof_manager.receive_proofs(vec![proof0.clone()]).await; - proof_manager.receive_proofs(vec![proof1.clone()]).await; + proof_manager.receive_proofs(vec![proof0.clone()]); + proof_manager.receive_proofs(vec![proof1.clone()]); // Only one copy of the batch exists get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; // Nothing goes wrong on commits - proof_manager - .handle_commit_notification(4, vec![batch.clone()]) - .await; + proof_manager.handle_commit_notification(4, vec![batch.clone()]); get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; // Before expiration, still marked as committed - proof_manager.receive_proofs(vec![proof2.clone()]).await; + proof_manager.receive_proofs(vec![proof2.clone()]); get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; // Nothing goes wrong on expiration - proof_manager.handle_commit_notification(5, vec![]).await; + proof_manager.handle_commit_notification(5, vec![]); get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; - proof_manager.handle_commit_notification(12, vec![]).await; + proof_manager.handle_commit_notification(12, vec![]); get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; } #[tokio::test] async fn test_duplicate_batches_on_expiration() { - let mut proof_manager = create_proof_manager().await; + let mut proof_manager = create_proof_manager(); let author = PeerId::random(); let digest = HashValue::random(); @@ -295,15 +279,15 @@ async fn test_duplicate_batches_on_expiration() { let proof0 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); let proof1 = ProofOfStore::new(batch.clone(), AggregateSignature::empty()); - proof_manager.receive_proofs(vec![proof0.clone()]).await; - proof_manager.receive_proofs(vec![proof1.clone()]).await; + proof_manager.receive_proofs(vec![proof0.clone()]); + proof_manager.receive_proofs(vec![proof1.clone()]); // Only one copy of the batch exists get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; // Nothing goes wrong on expiration - proof_manager.handle_commit_notification(5, vec![]).await; + proof_manager.handle_commit_notification(5, vec![]); get_proposal_and_assert(&mut proof_manager, 10, &[], &vec![proof0.clone()]).await; - proof_manager.handle_commit_notification(12, vec![]).await; + proof_manager.handle_commit_notification(12, vec![]); get_proposal_and_assert(&mut proof_manager, 10, &[], &[]).await; } diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 5034171f28e1b..882a40dfce0c9 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -30,7 +30,7 @@ fn proof_of_store(author: PeerId, batch_id: BatchId, gas_bucket_start: u64) -> P #[test] fn test_proof_queue_sorting() { let my_peer_id = PeerId::random(); - let mut proof_queue = ProofQueue::new(my_peer_id, 10, 10); + let mut proof_queue = ProofQueue::new(my_peer_id); let author_0 = PeerId::random(); let author_1 = PeerId::random(); @@ -100,7 +100,7 @@ fn test_proof_queue_sorting() { #[test] fn test_proof_calculate_remaining_txns_and_proofs() { let my_peer_id = PeerId::random(); - let mut proof_queue = ProofQueue::new(my_peer_id, 10, 10); + let mut proof_queue = ProofQueue::new(my_peer_id); let author_0 = PeerId::random(); let author_1 = PeerId::random(); diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 41875a6d70097..65c01b839e424 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -57,6 +57,22 @@ impl PersistedValue { pub fn payload(&self) -> &Option> { &self.maybe_payload } + + pub fn summary(&self) -> Vec { + if let Some(payload) = &self.maybe_payload { + return payload + .iter() + .map(|txn| { + TransactionSummary::new( + txn.sender(), + txn.sequence_number(), + txn.committed_hash(), + ) + }) + .collect(); + } + vec![] + } } impl Deref for PersistedValue { @@ -172,16 +188,6 @@ impl Batch { self.payload.into_transactions() } - pub fn summary(&self) -> Vec { - self.payload - .txns() - .iter() - .map(|txn| { - TransactionSummary::new(txn.sender(), txn.sequence_number(), txn.committed_hash()) - }) - .collect() - } - pub fn batch_info(&self) -> &BatchInfo { &self.batch_info } diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 1f87c6946c266..cb7f358947d10 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -1,7 +1,6 @@ // Copyright © Aptos Foundation // SPDX-License-Identifier: Apache-2.0 -use super::batch_generator::BackPressure; use crate::{monitor, quorum_store::counters}; use aptos_consensus_types::{ common::{TransactionInProgress, TransactionSummary}, @@ -193,27 +192,6 @@ impl Ord for BatchSortKey { } } -#[derive(Debug)] -pub enum ProofQueueCommand { - // Proof manager sends this command to add the proofs to the proof queue - AddProofs(Vec), - // Batch coordinator sends this command to add the received batches to the proof queue. - // For each transaction, the proof queue stores the list of batches containing the transaction. - AddBatches(Vec<(BatchInfo, Vec)>), - // Proof manager sends this command to pull proofs from the proof queue to - // include in the block proposal. - PullProofs { - excluded_batches: HashSet, - max_txns: u64, - max_bytes: u64, - return_non_full: bool, - response_sender: oneshot::Sender<(Vec, bool)>, - }, - // Proof manager sends this command to mark these batches as committed and - // update the block timestamp. - MarkCommitted(Vec, u64), -} - pub struct ProofQueue { my_peer_id: PeerId, // Queue per peer to ensure fairness between peers and priority within peer @@ -228,8 +206,6 @@ pub struct ProofQueue { // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, - back_pressure_total_txn_limit: u64, - back_pressure_total_proof_limit: u64, remaining_txns_with_duplicates: u64, remaining_proofs: u64, remaining_local_txns: u64, @@ -237,11 +213,7 @@ pub struct ProofQueue { } impl ProofQueue { - pub(crate) fn new( - my_peer_id: PeerId, - back_pressure_total_txn_limit: u64, - back_pressure_total_proof_limit: u64, - ) -> Self { + pub(crate) fn new(my_peer_id: PeerId) -> Self { Self { my_peer_id, author_to_batches: HashMap::new(), @@ -250,8 +222,6 @@ impl ProofQueue { batches_with_txn_summary: HashSet::new(), expirations: TimeExpirations::new(), latest_block_timestamp: 0, - back_pressure_total_txn_limit, - back_pressure_total_proof_limit, remaining_txns_with_duplicates: 0, remaining_proofs: 0, remaining_local_txns: 0, @@ -361,15 +331,6 @@ impl ProofQueue { } } - /// return true when quorum store is back pressured - pub(crate) fn qs_back_pressure(&self) -> BackPressure { - let (remaining_total_txn_num, remaining_total_proof_num) = self.remaining_txns_and_proofs(); - BackPressure { - txn_count: remaining_total_txn_num > self.back_pressure_total_txn_limit, - proof_count: remaining_total_proof_num > self.back_pressure_total_proof_limit, - } - } - // gets excluded and iterates over the vector returning non excluded or expired entries. // return the vector of pulled PoS, and the size of the remaining PoS // The flag in the second return argument is true iff the entire proof queue is fully utilized @@ -477,7 +438,7 @@ impl ProofQueue { } } - fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { + pub(crate) fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { assert!( self.latest_block_timestamp <= block_timestamp, "Decreasing block timestamp" @@ -560,7 +521,7 @@ impl ProofQueue { } // Mark in the hashmap committed PoS, but keep them until they expire - fn mark_committed(&mut self, batches: Vec) { + pub(crate) fn mark_committed(&mut self, batches: Vec) { for batch in &batches { let batch_key = BatchKey::from_info(batch); if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { @@ -584,79 +545,4 @@ impl ProofQueue { !batches.is_empty() }); } - - pub async fn start( - mut self, - back_pressure_tx: tokio::sync::mpsc::Sender, - mut command_rx: tokio::sync::mpsc::Receiver, - ) { - let mut back_pressure = BackPressure { - txn_count: false, - proof_count: false, - }; - - loop { - let _timer = counters::PROOF_QUEUE_MAIN_LOOP.start_timer(); - if let Some(msg) = command_rx.recv().await { - match msg { - ProofQueueCommand::AddProofs(proofs) => { - let start = Instant::now(); - for proof in proofs { - self.push(proof); - } - counters::PROOF_QUEUE_ADD_PROOFS_DURATION.observe_duration(start.elapsed()); - - let updated_back_pressure = self.qs_back_pressure(); - if updated_back_pressure != back_pressure { - back_pressure = updated_back_pressure; - if back_pressure_tx.send(back_pressure).await.is_err() { - debug!("Failed to send back_pressure for proposal"); - } - } - }, - ProofQueueCommand::PullProofs { - excluded_batches, - max_txns, - max_bytes, - return_non_full, - response_sender, - } => { - let (proofs, full) = self.pull_proofs( - &excluded_batches, - max_txns, - max_bytes, - return_non_full, - ); - if let Err(e) = response_sender.send((proofs, full)) { - warn!("Failed to send response to PullProofs: {:?}", e); - } - }, - ProofQueueCommand::MarkCommitted(batches, block_timestamp) => { - let start = Instant::now(); - self.mark_committed(batches); - counters::PROOF_QUEUE_COMMIT_DURATION.observe_duration(start.elapsed()); - - let start = Instant::now(); - self.handle_updated_block_timestamp(block_timestamp); - counters::PROOF_QUEUE_UPDATE_TIMESTAMP_DURATION - .observe_duration(start.elapsed()); - - let updated_back_pressure = self.qs_back_pressure(); - if updated_back_pressure != back_pressure { - back_pressure = updated_back_pressure; - if back_pressure_tx.send(back_pressure).await.is_err() { - debug!("Failed to send back_pressure for proposal"); - } - } - }, - ProofQueueCommand::AddBatches(batch_summaries) => { - let start = Instant::now(); - self.add_batch_summaries(batch_summaries); - counters::PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION - .observe_duration(start.elapsed()); - }, - } - } - } - } } From 4a104c228441a461b38d28fc7d366183f5124b5d Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 16:26:34 -0700 Subject: [PATCH 46/67] Move some code to a function --- consensus/src/quorum_store/counters.rs | 21 -------- consensus/src/quorum_store/utils.rs | 72 +++++++++++++++----------- 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 05074e14245e4..29d893c4f43f8 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -90,17 +90,6 @@ pub static PROOF_MANAGER_MAIN_LOOP: Lazy = Lazy::new(|| { ) }); -/// Duration of each run of the event loop. -pub static PROOF_QUEUE_MAIN_LOOP: Lazy = Lazy::new(|| { - DurationHistogram::new( - register_histogram!( - "quorum_store_proof_queue_main_loop", - "Duration of the each run of the proof queue event loop" - ) - .unwrap(), - ) -}); - pub static PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION: Lazy = Lazy::new(|| { DurationHistogram::new( register_histogram!( @@ -111,16 +100,6 @@ pub static PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION: Lazy = L ) }); -pub static PROOF_QUEUE_ADD_PROOFS_DURATION: Lazy = Lazy::new(|| { - DurationHistogram::new( - register_histogram!( - "quorum_store_proof_queue_add_proofs_duration", - "Duration of adding proofs to proof queue" - ) - .unwrap(), - ) -}); - pub static PROOF_QUEUE_COMMIT_DURATION: Lazy = Lazy::new(|| { DurationHistogram::new( register_histogram!( diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 8365b553bc2c3..e4ee5357dde9d 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -319,6 +319,7 @@ impl ProofQueue { &mut self, batch_summaries: Vec<(BatchInfo, Vec)>, ) { + let start = Instant::now(); for (batch_info, txn_summaries) in batch_summaries { let batch_key = BatchKey::from_info(&batch_info); for txn_summary in txn_summaries { @@ -329,6 +330,43 @@ impl ProofQueue { } self.batches_with_txn_summary.insert(batch_key); } + counters::PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION.observe_duration(start.elapsed()); + } + + fn log_remaining_data_after_pull( + &self, + excluded_batches: &HashSet, + pulled_proofs: &[ProofOfStore], + ) { + let mut num_proofs_remaining_after_pull = 0; + let mut num_txns_remaining_after_pull = 0; + let excluded_batch_keys = excluded_batches + .iter() + .map(BatchKey::from_info) + .collect::>(); + let mut remaining_proofs = vec![]; + for (batch_key, proof) in &self.batch_to_proof { + if proof.is_some() + && !pulled_proofs + .iter() + .any(|p| BatchKey::from_info(p.info()) == *batch_key) + && !excluded_batch_keys.contains(batch_key) + { + num_proofs_remaining_after_pull += 1; + num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); + remaining_proofs.push(proof.as_ref().unwrap().0.clone()); + } + } + let pulled_txns = pulled_proofs.iter().map(|p| p.num_txns()).sum::(); + info!( + "pulled_proofs: {}, pulled_txns: {}, remaining_proofs: {:?}", + pulled_proofs.len(), + pulled_txns, + remaining_proofs + ); + counters::NUM_PROOFS_IN_PROOF_QUEUE_AFTER_PULL + .observe(num_proofs_remaining_after_pull as f64); + counters::NUM_TXNS_IN_PROOF_QUEUE_AFTER_PULL.observe(num_txns_remaining_after_pull as f64); } // gets excluded and iterates over the vector returning non excluded or expired entries. @@ -401,36 +439,8 @@ impl ProofQueue { counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); - // Number of proofs remaining in proof queue after the pull - let mut num_proofs_remaining_after_pull = 0; - let mut num_txns_remaining_after_pull = 0; - let excluded_batch_keys = excluded_batches - .iter() - .map(BatchKey::from_info) - .collect::>(); - let mut remaining_proofs = vec![]; - for (batch_key, proof) in &self.batch_to_proof { - if proof.is_some() - && !ret - .iter() - .any(|p| BatchKey::from_info(p.info()) == *batch_key) - && !excluded_batch_keys.contains(batch_key) - { - num_proofs_remaining_after_pull += 1; - num_txns_remaining_after_pull += proof.as_ref().unwrap().0.num_txns(); - remaining_proofs.push(proof.as_ref().unwrap().0.clone()); - } - } - info!( - "cur_txns: {}, remaining_proofs: {:?}", - cur_txns, remaining_proofs - ); - counters::NUM_PROOFS_IN_PROOF_QUEUE_AFTER_PULL - .observe(num_proofs_remaining_after_pull as f64); - counters::NUM_TXNS_IN_PROOF_QUEUE_AFTER_PULL - .observe(num_txns_remaining_after_pull as f64); - + self.log_remaining_data_after_pull(excluded_batches, &ret); // Stable sort, so the order of proofs within an author will not change. ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); (ret, !full) @@ -440,6 +450,7 @@ impl ProofQueue { } pub(crate) fn handle_updated_block_timestamp(&mut self, block_timestamp: u64) { + let start = Instant::now(); assert!( self.latest_block_timestamp <= block_timestamp, "Decreasing block timestamp" @@ -475,6 +486,7 @@ impl ProofQueue { } } } + counters::PROOF_QUEUE_UPDATE_TIMESTAMP_DURATION.observe_duration(start.elapsed()); counters::NUM_PROOFS_EXPIRED_WHEN_COMMIT.inc_by(num_expired_but_not_committed); } @@ -523,6 +535,7 @@ impl ProofQueue { // Mark in the hashmap committed PoS, but keep them until they expire pub(crate) fn mark_committed(&mut self, batches: Vec) { + let start = Instant::now(); for batch in &batches { let batch_key = BatchKey::from_info(batch); if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&batch_key) { @@ -545,5 +558,6 @@ impl ProofQueue { } !batches.is_empty() }); + counters::PROOF_QUEUE_COMMIT_DURATION.observe_duration(start.elapsed()); } } From 529e12a022171b8c209d9fdd434d01f1842cf3f0 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 25 Jun 2024 17:00:35 -0700 Subject: [PATCH 47/67] Minor fixes --- consensus/src/quorum_store/utils.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index e8b273021e8c7..bc985e815610e 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -322,13 +322,13 @@ impl ProofQueue { let start = Instant::now(); for (batch_info, txn_summaries) in batch_summaries { let batch_key = BatchKey::from_info(&batch_info); - for txn_summary in txn_summaries { + for txn_summary in &txn_summaries { self.txn_summary_to_batches - .entry(txn_summary) + .entry(*txn_summary) .or_default() .insert(batch_key.clone()); } - self.batches_with_txn_summary.insert(batch_key); + self.batch_to_txn_summaries.insert(batch_key, txn_summaries); } counters::PROOF_QUEUE_ADD_BATCH_SUMMARIES_DURATION.observe_duration(start.elapsed()); } @@ -383,10 +383,10 @@ impl ProofQueue { ) -> (Vec, bool) { let mut ret = vec![]; let mut cur_bytes = 0; - let mut cur_txns: u64 = 0; - let mut total_txns = 0; + let mut cur_txns = 0; let mut excluded_txns = 0; let mut full = false; + // Set of all the excluded transactions and all the transactions included in the result let mut included_and_excluded_txns = HashSet::new(); for batch_info in excluded_batches { let batch_key = BatchKey::from_info(batch_info); @@ -411,6 +411,7 @@ impl ProofQueue { } else if let Some(Some((proof, insertion_time))) = self.batch_to_proof.get(&sort_key.batch_key) { + // Calculate the number of unique transactions if this batch is included in the result let temp_txns = if let Some(txn_summaries) = self.batch_to_txn_summaries.get(&sort_key.batch_key) { @@ -430,7 +431,8 @@ impl ProofQueue { return false; } cur_bytes += batch.num_bytes(); - total_txns += batch.num_txns(); + // Add this batch to included_and_excluded_txns and calculate the number of + // unique transactions added in the result so far. cur_txns += self.batch_to_txn_summaries.get(&sort_key.batch_key).map_or( batch.num_txns(), |summaries| { @@ -465,6 +467,7 @@ impl ProofQueue { ); if full || return_non_full { + let total_txns = ret.iter().map(|p| p.num_txns()).sum::(); counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(total_txns as f64); counters::EXTRA_TXNS_WHEN_PULL.observe((total_txns - cur_txns) as f64); @@ -578,7 +581,7 @@ impl ProofQueue { self.dec_remaining(&batch.author(), batch.num_txns()); } self.batch_to_proof.insert(batch_key.clone(), None); - self.batches_with_txn_summary.remove(&batch_key); + self.batch_to_txn_summaries.remove(&batch_key); } let batch_keys = batches .iter() From 80aa8bff7448a2c68812b2f76d7d5924befd508a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 12:54:01 -0700 Subject: [PATCH 48/67] Add max_unique_txns parameter --- config/src/config/api_config.rs | 2 +- config/src/config/consensus_config.rs | 20 ++++++--- .../consensus-types/src/request_response.rs | 9 ++-- consensus/src/dag/dag_driver.rs | 1 + consensus/src/epoch_manager.rs | 1 + consensus/src/liveness/proposal_generator.rs | 21 +++++++--- .../src/liveness/proposal_generator_test.rs | 4 ++ consensus/src/payload_client/mixed.rs | 8 ++++ consensus/src/payload_client/mod.rs | 1 + consensus/src/payload_client/user/mod.rs | 4 ++ .../user/quorum_store_client.rs | 5 +++ .../direct_mempool_quorum_store.rs | 5 ++- consensus/src/quorum_store/proof_manager.rs | 14 +++++-- .../tests/direct_mempool_quorum_store_test.rs | 1 + .../quorum_store/tests/proof_manager_test.rs | 1 + consensus/src/quorum_store/tests/utils.rs | 4 +- consensus/src/quorum_store/utils.rs | 41 +++++++++++-------- consensus/src/round_manager_fuzzing.rs | 1 + consensus/src/round_manager_test.rs | 1 + .../src/test_utils/mock_payload_manager.rs | 1 + 20 files changed, 107 insertions(+), 38 deletions(-) diff --git a/config/src/config/api_config.rs b/config/src/config/api_config.rs index 1758d8c227994..939c4bf603ab7 100644 --- a/config/src/config/api_config.rs +++ b/config/src/config/api_config.rs @@ -125,7 +125,7 @@ impl Default for ApiConfig { transaction_submission_enabled: default_enabled(), transaction_simulation_enabled: default_enabled(), max_submit_transaction_batch_size: DEFAULT_MAX_SUBMIT_TRANSACTION_BATCH_SIZE, - max_block_transactions_page_size: *MAX_RECEIVING_BLOCK_TXNS as u16, + max_block_transactions_page_size: MAX_RECEIVING_BLOCK_TXNS as u16, max_transactions_page_size: DEFAULT_MAX_PAGE_SIZE, max_events_page_size: DEFAULT_MAX_PAGE_SIZE, max_account_resources_page_size: DEFAULT_MAX_ACCOUNT_RESOURCES_PAGE_SIZE, diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 501a80c73f852..d9dfdc3b15d7f 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -6,16 +6,24 @@ use crate::config::{ config_sanitizer::ConfigSanitizer, node_config_loader::NodeType, Error, NodeConfig, QuorumStoreConfig, ReliableBroadcastConfig, SafetyRulesConfig, BATCH_PADDING_BYTES, }; -use aptos_crypto::_once_cell::sync::Lazy; use aptos_types::chain_id::ChainId; use cfg_if::cfg_if; use serde::{Deserialize, Serialize}; use std::path::PathBuf; // NOTE: when changing, make sure to update QuorumStoreBackPressureConfig::backlog_txn_limit_count as well. -const MAX_SENDING_BLOCK_TXNS: u64 = 1900; -pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = - Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_TXNS)); +const MAX_SENDING_BLOCK_UNIQUE_TXNS: u64 = 1900; +pub const MAX_RECEIVING_BLOCK_TXNS: u64 = if (2 * MAX_SENDING_BLOCK_UNIQUE_TXNS) > 10000 { + 2 * MAX_SENDING_BLOCK_UNIQUE_TXNS +} else { + 10000 +}; +const MAX_SENDING_BLOCK_TXNS: u64 = + if MAX_SENDING_BLOCK_UNIQUE_TXNS > MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) { + MAX_SENDING_BLOCK_UNIQUE_TXNS + } else { + MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) + }; // stop reducing size at this point, so 1MB transactions can still go through const MIN_BLOCK_BYTES_OVERRIDE: u64 = 1024 * 1024 + BATCH_PADDING_BYTES as u64; @@ -25,6 +33,7 @@ pub struct ConsensusConfig { // length of inbound queue of messages pub max_network_channel_size: usize, pub max_sending_block_txns: u64, + pub max_sending_block_unique_txns: u64, pub max_sending_block_bytes: u64, pub max_sending_inline_txns: u64, pub max_sending_inline_bytes: u64, @@ -153,8 +162,9 @@ impl Default for ConsensusConfig { ConsensusConfig { max_network_channel_size: 1024, max_sending_block_txns: MAX_SENDING_BLOCK_TXNS, + max_sending_block_unique_txns: MAX_SENDING_BLOCK_UNIQUE_TXNS, max_sending_block_bytes: 3 * 1024 * 1024, // 3MB - max_receiving_block_txns: *MAX_RECEIVING_BLOCK_TXNS, + max_receiving_block_txns: MAX_RECEIVING_BLOCK_TXNS, max_sending_inline_txns: 100, max_sending_inline_bytes: 200 * 1024, // 200 KB max_receiving_block_bytes: 6 * 1024 * 1024, // 6MB diff --git a/consensus/consensus-types/src/request_response.rs b/consensus/consensus-types/src/request_response.rs index a28082b6c0c7f..a65273b19626c 100644 --- a/consensus/consensus-types/src/request_response.rs +++ b/consensus/consensus-types/src/request_response.rs @@ -9,7 +9,9 @@ use std::{fmt, fmt::Formatter}; pub enum GetPayloadCommand { /// Request to pull block to submit to consensus. GetPayloadRequest( - // max block size + // max number of transactions in the block + u64, + // max number of unique transactions in the block u64, // max byte size u64, @@ -31,6 +33,7 @@ impl fmt::Display for GetPayloadCommand { match self { GetPayloadCommand::GetPayloadRequest( max_txns, + max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, @@ -40,8 +43,8 @@ impl fmt::Display for GetPayloadCommand { ) => { write!( f, - "GetPayloadRequest [max_txns: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}]", - max_txns, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded + "GetPayloadRequest [max_txns: {}, max_unique_txns: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}]", + max_txns, max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded ) }, } diff --git a/consensus/src/dag/dag_driver.rs b/consensus/src/dag/dag_driver.rs index 2c787f5b1664b..f2fe23ea42524 100644 --- a/consensus/src/dag/dag_driver.rs +++ b/consensus/src/dag/dag_driver.rs @@ -257,6 +257,7 @@ impl DagDriver { .pull_payload( Duration::from_millis(self.payload_config.payload_pull_max_poll_time_ms), max_txns, + max_txns, max_size_bytes, // TODO: Set max_inline_items and max_inline_bytes correctly 100, diff --git a/consensus/src/epoch_manager.rs b/consensus/src/epoch_manager.rs index b9957ef1d2d75..0de227d9fafde 100644 --- a/consensus/src/epoch_manager.rs +++ b/consensus/src/epoch_manager.rs @@ -839,6 +839,7 @@ impl EpochManager

{ self.time_service.clone(), Duration::from_millis(self.config.quorum_store_poll_time_ms), self.config.max_sending_block_txns, + self.config.max_sending_block_unique_txns, self.config.max_sending_block_bytes, self.config.max_sending_inline_txns, self.config.max_sending_inline_bytes, diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index ea2145a62d93a..ebd8ba691528a 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -164,6 +164,8 @@ pub struct ProposalGenerator { quorum_store_poll_time: Duration, // Max number of transactions to be added to a proposed block. max_block_txns: u64, + // Max number of unique transactions to be added to a proposed block. + max_block_unique_txns: u64, // Max number of bytes to be added to a proposed block. max_block_bytes: u64, // Max number of inline transactions to be added to a proposed block. @@ -193,6 +195,7 @@ impl ProposalGenerator { time_service: Arc, quorum_store_poll_time: Duration, max_block_txns: u64, + max_block_unique_txns: u64, max_block_bytes: u64, max_inline_txns: u64, max_inline_bytes: u64, @@ -210,6 +213,7 @@ impl ProposalGenerator { time_service, quorum_store_poll_time, max_block_txns, + max_block_unique_txns, max_block_bytes, max_inline_txns, max_inline_bytes, @@ -312,9 +316,14 @@ impl ProposalGenerator { let voting_power_ratio = proposer_election.get_voting_power_participation_ratio(round); - let (max_block_txns, max_block_bytes, max_txns_from_block_to_execute, proposal_delay) = - self.calculate_max_block_sizes(voting_power_ratio, timestamp, round) - .await; + let ( + max_block_unique_txns, + max_block_bytes, + max_txns_from_block_to_execute, + proposal_delay, + ) = self + .calculate_max_block_sizes(voting_power_ratio, timestamp, round) + .await; PROPOSER_DELAY_PROPOSAL.set(proposal_delay.as_secs_f64()); if !proposal_delay.is_zero() { @@ -332,7 +341,8 @@ impl ProposalGenerator { .max() .unwrap_or(0); // Use non-backpressure reduced values for computing fill_fraction - let max_fill_fraction = (max_pending_block_len as f32 / self.max_block_txns as f32) + let max_fill_fraction = (max_pending_block_len as f32 + / self.max_block_unique_txns as f32) .max(max_pending_block_bytes as f32 / self.max_block_bytes as f32); PROPOSER_PENDING_BLOCKS_COUNT.set(pending_blocks.len() as i64); PROPOSER_PENDING_BLOCKS_FILL_FRACTION.set(max_fill_fraction as f64); @@ -349,7 +359,8 @@ impl ProposalGenerator { .payload_client .pull_payload( self.quorum_store_poll_time.saturating_sub(proposal_delay), - max_block_txns, + self.max_block_txns, + max_block_unique_txns, max_block_bytes, // TODO: Set max_inline_txns and max_inline_bytes correctly self.max_inline_txns, diff --git a/consensus/src/liveness/proposal_generator_test.rs b/consensus/src/liveness/proposal_generator_test.rs index ab4102b7a39f3..1ee89e2e6b756 100644 --- a/consensus/src/liveness/proposal_generator_test.rs +++ b/consensus/src/liveness/proposal_generator_test.rs @@ -37,6 +37,7 @@ async fn test_proposal_generation_empty_tree() { Arc::new(SimulatedTimeService::new()), Duration::ZERO, 1, + 1, 10, 1, 10, @@ -81,6 +82,7 @@ async fn test_proposal_generation_parent() { Arc::new(SimulatedTimeService::new()), Duration::ZERO, 1, + 1, 1000, 1, 500, @@ -157,6 +159,7 @@ async fn test_old_proposal_generation() { Arc::new(SimulatedTimeService::new()), Duration::ZERO, 1, + 1, 1000, 1, 500, @@ -198,6 +201,7 @@ async fn test_correct_failed_authors() { Arc::new(SimulatedTimeService::new()), Duration::ZERO, 1, + 1, 1000, 1, 500, diff --git a/consensus/src/payload_client/mixed.rs b/consensus/src/payload_client/mixed.rs index 63cc7f00a62c3..4ccf31ce46a92 100644 --- a/consensus/src/payload_client/mixed.rs +++ b/consensus/src/payload_client/mixed.rs @@ -68,6 +68,7 @@ impl PayloadClient for MixedPayloadClient { &self, mut max_poll_time: Duration, mut max_items: u64, + mut max_unique_items: u64, mut max_bytes: u64, max_inline_items: u64, max_inline_bytes: u64, @@ -101,6 +102,7 @@ impl PayloadClient for MixedPayloadClient { debug!("num_validator_txns={}", validator_txns.len()); // Update constraints with validator txn pull results. max_items -= validator_txns.len() as u64; + max_unique_items -= validator_txns.len() as u64; max_bytes -= validator_txns .iter() .map(|txn| txn.size_in_bytes()) @@ -113,6 +115,7 @@ impl PayloadClient for MixedPayloadClient { .pull( max_poll_time, max_items, + max_unique_items, max_bytes, max_inline_items, max_inline_bytes, @@ -152,6 +155,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { .pull_payload( Duration::from_secs(1), // max_poll_time 99, // max_items + 120, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB @@ -175,6 +179,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { .pull_payload( Duration::from_micros(500), // max_poll_time 99, // max_items + 120, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB @@ -198,6 +203,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { .pull_payload( Duration::from_secs(1), // max_poll_time 1, // max_items + 2, // max_unique_items 1048576, // size limit: 1MB 0, 0, // inline limit: 0 @@ -221,6 +227,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { .pull_payload( Duration::from_secs(1), // max_poll_time 99, // max_items + 120, // max_unique_items all_validator_txns[0].size_in_bytes() as u64, 50, all_validator_txns[0].size_in_bytes() as u64, @@ -262,6 +269,7 @@ async fn mixed_payload_client_should_respect_validator_txn_feature_flag() { .pull_payload( Duration::from_millis(50), // max_poll_time 99, // max_items + 120, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB diff --git a/consensus/src/payload_client/mod.rs b/consensus/src/payload_client/mod.rs index d37cbfbbdb5ac..61cc98a5ba036 100644 --- a/consensus/src/payload_client/mod.rs +++ b/consensus/src/payload_client/mod.rs @@ -18,6 +18,7 @@ pub trait PayloadClient: Send + Sync { &self, max_poll_time: Duration, max_items: u64, + max_unique_items: u64, max_bytes: u64, max_inline_items: u64, max_inline_bytes: u64, diff --git a/consensus/src/payload_client/user/mod.rs b/consensus/src/payload_client/user/mod.rs index 6a5cc6db745ff..a19f77e3dfc11 100644 --- a/consensus/src/payload_client/user/mod.rs +++ b/consensus/src/payload_client/user/mod.rs @@ -18,6 +18,7 @@ pub trait UserPayloadClient: Send + Sync { &self, max_poll_time: Duration, max_items: u64, + max_unique_items: u64, max_bytes: u64, max_inline_items: u64, max_inline_bytes: u64, @@ -49,6 +50,7 @@ impl UserPayloadClient for DummyClient { &self, max_poll_time: Duration, mut max_items: u64, + mut max_unique_items: u64, mut max_bytes: u64, _max_inline_items: u64, _max_inline_bytes: u64, @@ -63,6 +65,7 @@ impl UserPayloadClient for DummyClient { let mut txns = vec![]; while timer.elapsed() < max_poll_time && max_items >= 1 + && max_unique_items >= 1 && max_bytes >= 1 && nxt_txn_idx < self.txns.len() { @@ -73,6 +76,7 @@ impl UserPayloadClient for DummyClient { break; } max_items -= 1; + max_unique_items -= 1; max_bytes -= txn_size; nxt_txn_idx += 1; txns.push(txn); diff --git a/consensus/src/payload_client/user/quorum_store_client.rs b/consensus/src/payload_client/user/quorum_store_client.rs index c6f49a2f1e6c9..1cd3caefd50aa 100644 --- a/consensus/src/payload_client/user/quorum_store_client.rs +++ b/consensus/src/payload_client/user/quorum_store_client.rs @@ -46,6 +46,7 @@ impl QuorumStoreClient { async fn pull_internal( &self, max_items: u64, + max_unique_items: u64, max_bytes: u64, max_inline_items: u64, max_inline_bytes: u64, @@ -55,6 +56,7 @@ impl QuorumStoreClient { let (callback, callback_rcv) = oneshot::channel(); let req = GetPayloadCommand::GetPayloadRequest( max_items, + max_unique_items, max_bytes, max_inline_items, max_inline_bytes, @@ -88,6 +90,7 @@ impl UserPayloadClient for QuorumStoreClient { &self, max_poll_time: Duration, max_items: u64, + max_unique_items: u64, max_bytes: u64, max_inline_items: u64, max_inline_bytes: u64, @@ -117,6 +120,7 @@ impl UserPayloadClient for QuorumStoreClient { let payload = self .pull_internal( max_items, + max_unique_items, max_bytes, max_inline_items, max_inline_bytes, @@ -138,6 +142,7 @@ impl UserPayloadClient for QuorumStoreClient { max_poll_time_ms = max_poll_time.as_millis() as u64, payload_len = payload.len(), max_items = max_items, + max_unique_items = max_unique_items, max_bytes = max_bytes, max_inline_items = max_inline_items, max_inline_bytes = max_inline_bytes, diff --git a/consensus/src/quorum_store/direct_mempool_quorum_store.rs b/consensus/src/quorum_store/direct_mempool_quorum_store.rs index 661ff46a1d4dd..8a6a61b10c066 100644 --- a/consensus/src/quorum_store/direct_mempool_quorum_store.rs +++ b/consensus/src/quorum_store/direct_mempool_quorum_store.rs @@ -138,7 +138,8 @@ impl DirectMempoolQuorumStore { async fn handle_consensus_request(&self, req: GetPayloadCommand) { match req { GetPayloadCommand::GetPayloadRequest( - max_txns, + _max_txns, + max_unique_txns, max_bytes, _max_inline_txns, _max_inline_bytes, @@ -147,7 +148,7 @@ impl DirectMempoolQuorumStore { callback, ) => { self.handle_block_request( - max_txns, + max_unique_txns, max_bytes, return_non_full, payload_filter, diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 92db27fc43c05..cfa9760a2a888 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -205,6 +205,7 @@ impl ProofManager { match msg { GetPayloadCommand::GetPayloadRequest( max_txns, + max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, @@ -220,9 +221,14 @@ impl ProofManager { PayloadFilter::InQuorumStore(proofs) => proofs, }; - let (proof_block, proof_queue_fully_utilized) = self - .proofs_for_consensus - .pull_proofs(&excluded_batches, max_txns, max_bytes, return_non_full); + let (proof_block, proof_queue_fully_utilized) = + self.proofs_for_consensus.pull_proofs( + &excluded_batches, + max_txns, + max_unique_txns, + max_bytes, + return_non_full, + ); counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE.observe(self.batch_queue.len() as f64); counters::PROOF_QUEUE_FULLY_UTILIZED @@ -234,7 +240,7 @@ impl ProofManager { if self.allow_batches_without_pos_in_proposal && proof_queue_fully_utilized { inline_block = self.batch_queue.pull_batches( - min(max_txns - cur_txns, max_inline_txns), + min(max_unique_txns - cur_txns, max_inline_txns), min(max_bytes - cur_bytes, max_inline_bytes), excluded_batches .iter() diff --git a/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs b/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs index 03a5e903a554c..35fdc3c7ed97a 100644 --- a/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs +++ b/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs @@ -30,6 +30,7 @@ async fn test_block_request_no_txns() { let (consensus_callback, consensus_callback_rcv) = oneshot::channel(); consensus_to_quorum_store_sender .try_send(GetPayloadCommand::GetPayloadRequest( + 100, 100, 1000, 50, diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index 812a854f62d9c..c9065737fdb15 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -54,6 +54,7 @@ async fn get_proposal( let (callback_tx, callback_rx) = oneshot::channel(); let filter_set = HashSet::from_iter(filter.iter().cloned()); let req = GetPayloadCommand::GetPayloadRequest( + max_txns, max_txns, 1000000, max_txns / 2, diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 882a40dfce0c9..47a26a78c8dbf 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -55,7 +55,7 @@ fn test_proof_queue_sorting() { } // Expect: [600, 300] - let (pulled, _) = proof_queue.pull_proofs(&hashset![], 2, 2, true); + let (pulled, _) = proof_queue.pull_proofs(&hashset![], 4, 2, 2, true); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; @@ -76,7 +76,7 @@ fn test_proof_queue_sorting() { assert_eq!(count_author_1, 1); // Expect: [600, 500, 300, 100] - let (pulled, _) = proof_queue.pull_proofs(&hashset![], 4, 4, true); + let (pulled, _) = proof_queue.pull_proofs(&hashset![], 6, 4, 4, true); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index bc985e815610e..5b52e97a9ee68 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -378,12 +378,14 @@ impl ProofQueue { &mut self, excluded_batches: &HashSet, max_txns: u64, + max_unique_txns: u64, max_bytes: u64, return_non_full: bool, ) -> (Vec, bool) { let mut ret = vec![]; let mut cur_bytes = 0; - let mut cur_txns = 0; + let mut cur_unique_txns = 0; + let mut cur_all_txns = 0; let mut excluded_txns = 0; let mut full = false; // Set of all the excluded transactions and all the transactions included in the result @@ -412,10 +414,10 @@ impl ProofQueue { self.batch_to_proof.get(&sort_key.batch_key) { // Calculate the number of unique transactions if this batch is included in the result - let temp_txns = if let Some(txn_summaries) = + let temp_unique_txns = if let Some(txn_summaries) = self.batch_to_txn_summaries.get(&sort_key.batch_key) { - cur_txns + cur_unique_txns + txn_summaries .iter() .filter(|txn_summary| { @@ -423,29 +425,36 @@ impl ProofQueue { }) .count() as u64 } else { - cur_txns + batch.num_txns() + cur_unique_txns + batch.num_txns() }; - if cur_bytes + batch.num_bytes() > max_bytes || temp_txns > max_txns { + if cur_bytes + batch.num_bytes() > max_bytes + || temp_unique_txns > max_unique_txns + || cur_all_txns + batch.num_txns() > max_txns + { // Exceeded the limit for requested bytes or number of transactions. full = true; return false; } cur_bytes += batch.num_bytes(); + cur_all_txns += batch.num_txns(); // Add this batch to included_and_excluded_txns and calculate the number of // unique transactions added in the result so far. - cur_txns += self.batch_to_txn_summaries.get(&sort_key.batch_key).map_or( - batch.num_txns(), - |summaries| { + cur_unique_txns += self + .batch_to_txn_summaries + .get(&sort_key.batch_key) + .map_or(batch.num_txns(), |summaries| { summaries .iter() .filter(|summary| included_and_excluded_txns.insert(**summary)) .count() as u64 - }, - ); + }); let bucket = proof.gas_bucket_start(); ret.push(proof.clone()); counters::pos_to_pull(bucket, insertion_time.elapsed().as_secs_f64()); - if cur_bytes == max_bytes || cur_txns == max_txns { + if cur_bytes == max_bytes + || cur_all_txns == max_txns + || cur_unique_txns == max_unique_txns + { full = true; return false; } @@ -459,7 +468,8 @@ impl ProofQueue { info!( // before non full check byte_size = cur_bytes, - block_size = cur_txns, + block_total_txns = cur_all_txns, + block_unique_txns = cur_unique_txns, batch_count = ret.len(), full = full, return_non_full = return_non_full, @@ -467,10 +477,9 @@ impl ProofQueue { ); if full || return_non_full { - let total_txns = ret.iter().map(|p| p.num_txns()).sum::(); - counters::BLOCK_SIZE_WHEN_PULL.observe(cur_txns as f64); - counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(total_txns as f64); - counters::EXTRA_TXNS_WHEN_PULL.observe((total_txns - cur_txns) as f64); + counters::BLOCK_SIZE_WHEN_PULL.observe(cur_unique_txns as f64); + counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(cur_all_txns as f64); + counters::EXTRA_TXNS_WHEN_PULL.observe((cur_all_txns - cur_unique_txns) as f64); counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); diff --git a/consensus/src/round_manager_fuzzing.rs b/consensus/src/round_manager_fuzzing.rs index 8c58935500ffc..a41b966a48ff9 100644 --- a/consensus/src/round_manager_fuzzing.rs +++ b/consensus/src/round_manager_fuzzing.rs @@ -183,6 +183,7 @@ fn create_node_for_fuzzing() -> RoundManager { time_service, Duration::ZERO, 1, + 1, 1024, 1, 1024, diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs index c90e8d3387a73..8c9ab505e0bc2 100644 --- a/consensus/src/round_manager_test.rs +++ b/consensus/src/round_manager_test.rs @@ -304,6 +304,7 @@ impl NodeSetup { time_service.clone(), Duration::ZERO, 10, + 20, 1000, 5, 500, diff --git a/consensus/src/test_utils/mock_payload_manager.rs b/consensus/src/test_utils/mock_payload_manager.rs index d28337e51ebfe..e855347ec4ebb 100644 --- a/consensus/src/test_utils/mock_payload_manager.rs +++ b/consensus/src/test_utils/mock_payload_manager.rs @@ -58,6 +58,7 @@ impl PayloadClient for MockPayloadManager { &self, _max_poll_time: Duration, _max_size: u64, + _max_unique_size: u64, _max_bytes: u64, _max_inline_size: u64, _max_inline_bytes: u64, From 5fce8e641f2407ba27b029546207cd1c83152c66 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 13:07:53 -0700 Subject: [PATCH 49/67] Use Lazy --- config/src/config/api_config.rs | 2 +- config/src/config/consensus_config.rs | 36 ++++++++++++++++----------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/config/src/config/api_config.rs b/config/src/config/api_config.rs index 939c4bf603ab7..1758d8c227994 100644 --- a/config/src/config/api_config.rs +++ b/config/src/config/api_config.rs @@ -125,7 +125,7 @@ impl Default for ApiConfig { transaction_submission_enabled: default_enabled(), transaction_simulation_enabled: default_enabled(), max_submit_transaction_batch_size: DEFAULT_MAX_SUBMIT_TRANSACTION_BATCH_SIZE, - max_block_transactions_page_size: MAX_RECEIVING_BLOCK_TXNS as u16, + max_block_transactions_page_size: *MAX_RECEIVING_BLOCK_TXNS as u16, max_transactions_page_size: DEFAULT_MAX_PAGE_SIZE, max_events_page_size: DEFAULT_MAX_PAGE_SIZE, max_account_resources_page_size: DEFAULT_MAX_ACCOUNT_RESOURCES_PAGE_SIZE, diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index d9dfdc3b15d7f..597e7b4021fff 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -6,6 +6,7 @@ use crate::config::{ config_sanitizer::ConfigSanitizer, node_config_loader::NodeType, Error, NodeConfig, QuorumStoreConfig, ReliableBroadcastConfig, SafetyRulesConfig, BATCH_PADDING_BYTES, }; +use aptos_crypto::_once_cell::sync::Lazy; use aptos_types::chain_id::ChainId; use cfg_if::cfg_if; use serde::{Deserialize, Serialize}; @@ -13,17 +14,22 @@ use std::path::PathBuf; // NOTE: when changing, make sure to update QuorumStoreBackPressureConfig::backlog_txn_limit_count as well. const MAX_SENDING_BLOCK_UNIQUE_TXNS: u64 = 1900; -pub const MAX_RECEIVING_BLOCK_TXNS: u64 = if (2 * MAX_SENDING_BLOCK_UNIQUE_TXNS) > 10000 { - 2 * MAX_SENDING_BLOCK_UNIQUE_TXNS -} else { - 10000 -}; -const MAX_SENDING_BLOCK_TXNS: u64 = - if MAX_SENDING_BLOCK_UNIQUE_TXNS > MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) { - MAX_SENDING_BLOCK_UNIQUE_TXNS - } else { - MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) - }; +pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = + Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_UNIQUE_TXNS)); +static MAX_SENDING_BLOCK_TXNS: Lazy = + Lazy::new(|| MAX_SENDING_BLOCK_UNIQUE_TXNS.max(MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000))); + +// pub const MAX_RECEIVING_BLOCK_TXNS: u64 = if (2 * MAX_SENDING_BLOCK_UNIQUE_TXNS) > 10000 { +// 2 * MAX_SENDING_BLOCK_UNIQUE_TXNS +// } else { +// 10000 +// }; +// const MAX_SENDING_BLOCK_TXNS: u64 = +// if MAX_SENDING_BLOCK_UNIQUE_TXNS > MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) { +// MAX_SENDING_BLOCK_UNIQUE_TXNS +// } else { +// MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) +// }; // stop reducing size at this point, so 1MB transactions can still go through const MIN_BLOCK_BYTES_OVERRIDE: u64 = 1024 * 1024 + BATCH_PADDING_BYTES as u64; @@ -161,10 +167,10 @@ impl Default for ConsensusConfig { fn default() -> ConsensusConfig { ConsensusConfig { max_network_channel_size: 1024, - max_sending_block_txns: MAX_SENDING_BLOCK_TXNS, + max_sending_block_txns: *MAX_SENDING_BLOCK_TXNS, max_sending_block_unique_txns: MAX_SENDING_BLOCK_UNIQUE_TXNS, max_sending_block_bytes: 3 * 1024 * 1024, // 3MB - max_receiving_block_txns: MAX_RECEIVING_BLOCK_TXNS, + max_receiving_block_txns: *MAX_RECEIVING_BLOCK_TXNS, max_sending_inline_txns: 100, max_sending_inline_bytes: 200 * 1024, // 200 KB max_receiving_block_bytes: 6 * 1024 * 1024, // 6MB @@ -202,14 +208,14 @@ impl Default for ConsensusConfig { // pipeline once quorum on execution result among validators has been reached // (so-(badly)-called "commit certificate"), meaning 2f+1 validators have finished execution. back_pressure_pipeline_latency_limit_ms: 800, - max_sending_block_txns_override: MAX_SENDING_BLOCK_TXNS, + max_sending_block_txns_override: *MAX_SENDING_BLOCK_TXNS, max_sending_block_bytes_override: 5 * 1024 * 1024, backpressure_proposal_delay_ms: 100, max_txns_from_block_to_execute: None, }, PipelineBackpressureValues { back_pressure_pipeline_latency_limit_ms: 1100, - max_sending_block_txns_override: MAX_SENDING_BLOCK_TXNS, + max_sending_block_txns_override: *MAX_SENDING_BLOCK_TXNS, max_sending_block_bytes_override: 5 * 1024 * 1024, backpressure_proposal_delay_ms: 200, max_txns_from_block_to_execute: None, From adbb6fc2873df403ed47782844b8b6bcf9f26f58 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 13:08:20 -0700 Subject: [PATCH 50/67] Removing comments --- config/src/config/consensus_config.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 597e7b4021fff..4892453895d4f 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -19,17 +19,6 @@ pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = static MAX_SENDING_BLOCK_TXNS: Lazy = Lazy::new(|| MAX_SENDING_BLOCK_UNIQUE_TXNS.max(MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000))); -// pub const MAX_RECEIVING_BLOCK_TXNS: u64 = if (2 * MAX_SENDING_BLOCK_UNIQUE_TXNS) > 10000 { -// 2 * MAX_SENDING_BLOCK_UNIQUE_TXNS -// } else { -// 10000 -// }; -// const MAX_SENDING_BLOCK_TXNS: u64 = -// if MAX_SENDING_BLOCK_UNIQUE_TXNS > MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) { -// MAX_SENDING_BLOCK_UNIQUE_TXNS -// } else { -// MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000) -// }; // stop reducing size at this point, so 1MB transactions can still go through const MIN_BLOCK_BYTES_OVERRIDE: u64 = 1024 * 1024 + BATCH_PADDING_BYTES as u64; From 9964b60b7c6dd8e0483cf8789973caf3fd405763 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 13:24:29 -0700 Subject: [PATCH 51/67] Minor change --- consensus/src/liveness/proposal_generator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index ebd8ba691528a..381797eccf088 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -342,7 +342,7 @@ impl ProposalGenerator { .unwrap_or(0); // Use non-backpressure reduced values for computing fill_fraction let max_fill_fraction = (max_pending_block_len as f32 - / self.max_block_unique_txns as f32) + / self.max_block_txns as f32) .max(max_pending_block_bytes as f32 / self.max_block_bytes as f32); PROPOSER_PENDING_BLOCKS_COUNT.set(pending_blocks.len() as i64); PROPOSER_PENDING_BLOCKS_FILL_FRACTION.set(max_fill_fraction as f64); From 0b170d3f13b1d573f0bdb2d88bdf2248bc62205a Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 13:28:18 -0700 Subject: [PATCH 52/67] Minor change --- consensus/src/payload_client/mixed.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/consensus/src/payload_client/mixed.rs b/consensus/src/payload_client/mixed.rs index 4ccf31ce46a92..e28a3efdb79c7 100644 --- a/consensus/src/payload_client/mixed.rs +++ b/consensus/src/payload_client/mixed.rs @@ -154,8 +154,8 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_secs(1), // max_poll_time - 99, // max_items - 120, // max_unique_items + 120, // max_items + 99, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB @@ -178,8 +178,8 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_micros(500), // max_poll_time - 99, // max_items - 120, // max_unique_items + 120, // max_items + 99, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB @@ -202,8 +202,8 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_secs(1), // max_poll_time - 1, // max_items - 2, // max_unique_items + 2, // max_items + 1, // max_unique_items 1048576, // size limit: 1MB 0, 0, // inline limit: 0 @@ -226,8 +226,8 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_secs(1), // max_poll_time - 99, // max_items - 120, // max_unique_items + 120, // max_items + 99, // max_unique_items all_validator_txns[0].size_in_bytes() as u64, 50, all_validator_txns[0].size_in_bytes() as u64, @@ -268,8 +268,8 @@ async fn mixed_payload_client_should_respect_validator_txn_feature_flag() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_millis(50), // max_poll_time - 99, // max_items - 120, // max_unique_items + 120, // max_items + 99, // max_unique_items 1048576, // size limit: 1MB 50, 500000, // inline limit: 500KB From d2f0fbae755b9d3a9c096f224ec30f574aa534c5 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 13:32:24 -0700 Subject: [PATCH 53/67] Minor fix --- consensus/src/round_manager_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/src/round_manager_test.rs b/consensus/src/round_manager_test.rs index 8c9ab505e0bc2..2c996ed087de5 100644 --- a/consensus/src/round_manager_test.rs +++ b/consensus/src/round_manager_test.rs @@ -303,8 +303,8 @@ impl NodeSetup { Arc::new(MockPayloadManager::new(None)), time_service.clone(), Duration::ZERO, - 10, 20, + 10, 1000, 5, 500, From 13ec31bc1a9bc9b995073f06945658e6254e93e9 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 26 Jun 2024 14:40:49 -0700 Subject: [PATCH 54/67] Add unit test and address PR comments --- config/src/config/consensus_config.rs | 2 + consensus/src/liveness/proposal_generator.rs | 3 +- consensus/src/payload_client/mixed.rs | 6 +- consensus/src/quorum_store/counters.rs | 6 +- consensus/src/quorum_store/tests/utils.rs | 75 ++++++++++++++++++++ consensus/src/quorum_store/utils.rs | 15 ++-- 6 files changed, 91 insertions(+), 16 deletions(-) diff --git a/config/src/config/consensus_config.rs b/config/src/config/consensus_config.rs index 4892453895d4f..31ec95921e32f 100644 --- a/config/src/config/consensus_config.rs +++ b/config/src/config/consensus_config.rs @@ -16,6 +16,8 @@ use std::path::PathBuf; const MAX_SENDING_BLOCK_UNIQUE_TXNS: u64 = 1900; pub(crate) static MAX_RECEIVING_BLOCK_TXNS: Lazy = Lazy::new(|| 10000.max(2 * MAX_SENDING_BLOCK_UNIQUE_TXNS)); +// The receiving validator can accept upto 2k more transactions in the block than the max sending limit. +// The extra cushion of 2k transactions is added just in case we need to increase the max sending limit in the future. static MAX_SENDING_BLOCK_TXNS: Lazy = Lazy::new(|| MAX_SENDING_BLOCK_UNIQUE_TXNS.max(MAX_RECEIVING_BLOCK_TXNS.saturating_sub(2000))); diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index 381797eccf088..c412278506712 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -341,8 +341,7 @@ impl ProposalGenerator { .max() .unwrap_or(0); // Use non-backpressure reduced values for computing fill_fraction - let max_fill_fraction = (max_pending_block_len as f32 - / self.max_block_txns as f32) + let max_fill_fraction = (max_pending_block_len as f32 / self.max_block_txns as f32) .max(max_pending_block_bytes as f32 / self.max_block_bytes as f32); PROPOSER_PENDING_BLOCKS_COUNT.set(pending_blocks.len() as i64); PROPOSER_PENDING_BLOCKS_FILL_FRACTION.set(max_fill_fraction as f64); diff --git a/consensus/src/payload_client/mixed.rs b/consensus/src/payload_client/mixed.rs index e28a3efdb79c7..551af1d3d5abf 100644 --- a/consensus/src/payload_client/mixed.rs +++ b/consensus/src/payload_client/mixed.rs @@ -226,8 +226,8 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_secs(1), // max_poll_time - 120, // max_items - 99, // max_unique_items + 120, // max_items + 99, // max_unique_items all_validator_txns[0].size_in_bytes() as u64, 50, all_validator_txns[0].size_in_bytes() as u64, @@ -268,7 +268,7 @@ async fn mixed_payload_client_should_respect_validator_txn_feature_flag() { let (pulled_validator_txns, Payload::DirectMempool(pulled_user_txns)) = client .pull_payload( Duration::from_millis(50), // max_poll_time - 120, // max_items + 120, // max_items 99, // max_unique_items 1048576, // size limit: 1MB 50, diff --git a/consensus/src/quorum_store/counters.rs b/consensus/src/quorum_store/counters.rs index 25372babd1c20..6de4c5496fd2f 100644 --- a/consensus/src/quorum_store/counters.rs +++ b/consensus/src/quorum_store/counters.rs @@ -193,10 +193,10 @@ pub static TOTAL_BLOCK_SIZE_WHEN_PULL: Lazy = Lazy::new(|| { .unwrap() }); -pub static EXTRA_TXNS_WHEN_PULL: Lazy = Lazy::new(|| { +pub static KNOWN_DUPLICATE_TXNS_WHEN_PULL: Lazy = Lazy::new(|| { register_histogram!( - "quorum_store_extra_txns_when_pull", - "Histogram for the number of extra transactions in a block when pulled for consensus.", + "quorum_store_known_duplicate_txns_when_pull", + "Histogram for the number of known duplicate transactions in a block when pulled for consensus.", TRANSACTION_COUNT_BUCKETS.clone(), ) .unwrap() diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 47a26a78c8dbf..c24247efd377d 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -9,6 +9,7 @@ use aptos_consensus_types::{ use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; use maplit::hashset; +use std::collections::HashSet; /// Return a ProofOfStore with minimal fields used by ProofQueue tests. fn proof_of_store(author: PeerId, batch_id: BatchId, gas_bucket_start: u64) -> ProofOfStore { @@ -159,3 +160,77 @@ fn test_proof_calculate_remaining_txns_and_proofs() { assert_eq!(proof_queue.remaining_txns_and_proofs(), (6, 8)); } + +#[test] +fn test_proof_pull_proofs_with_duplicates() { + let my_peer_id = PeerId::random(); + let mut proof_queue = ProofQueue::new(my_peer_id); + + let txns = vec![ + TransactionSummary::new(PeerId::ONE, 0, HashValue::zero()), + TransactionSummary::new(PeerId::ONE, 1, HashValue::zero()), + TransactionSummary::new(PeerId::ONE, 2, HashValue::zero()), + TransactionSummary::new(PeerId::ONE, 3, HashValue::zero()), + ]; + + let author_0 = PeerId::random(); + let author_1 = PeerId::random(); + + let author_0_batches = vec![ + proof_of_store(author_0, BatchId::new_for_test(0), 100), + proof_of_store(author_0, BatchId::new_for_test(1), 200), + proof_of_store(author_0, BatchId::new_for_test(2), 50), + proof_of_store(author_0, BatchId::new_for_test(3), 300), + ]; + let info_0 = author_0_batches[0].info().clone(); + proof_queue.add_batch_summaries(vec![(author_0_batches[0].info().clone(), vec![txns[0]])]); + proof_queue.add_batch_summaries(vec![(author_0_batches[1].info().clone(), vec![txns[1]])]); + proof_queue.add_batch_summaries(vec![(author_0_batches[2].info().clone(), vec![txns[2]])]); + proof_queue.add_batch_summaries(vec![(author_0_batches[3].info().clone(), vec![txns[0]])]); + + for batch in author_0_batches { + proof_queue.push(batch); + } + + let author_1_batches = vec![ + proof_of_store(author_1, BatchId::new_for_test(4), 500), + proof_of_store(author_1, BatchId::new_for_test(5), 400), + proof_of_store(author_1, BatchId::new_for_test(6), 600), + proof_of_store(author_1, BatchId::new_for_test(7), 50), + ]; + proof_queue.add_batch_summaries(vec![(author_1_batches[0].info().clone(), vec![txns[1]])]); + proof_queue.add_batch_summaries(vec![(author_1_batches[1].info().clone(), vec![txns[2]])]); + proof_queue.add_batch_summaries(vec![(author_1_batches[2].info().clone(), vec![txns[3]])]); + proof_queue.add_batch_summaries(vec![(author_1_batches[3].info().clone(), vec![txns[0]])]); + + for batch in author_1_batches { + proof_queue.push(batch); + } + assert_eq!(proof_queue.remaining_txns_and_proofs(), (4, 8)); + + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 3000, true); + assert!(result.0.len() >= 4); + assert!(result.0.len() <= 8); + let mut pulled_txns = HashSet::new(); + for proof in result.0 { + match proof.batch_id() { + BatchId { id: 0, nonce: 0 } => pulled_txns.insert(0), + BatchId { id: 1, nonce: 0 } => pulled_txns.insert(1), + BatchId { id: 2, nonce: 0 } => pulled_txns.insert(2), + BatchId { id: 3, nonce: 0 } => pulled_txns.insert(0), + BatchId { id: 4, nonce: 0 } => pulled_txns.insert(1), + BatchId { id: 5, nonce: 0 } => pulled_txns.insert(2), + BatchId { id: 6, nonce: 0 } => pulled_txns.insert(3), + BatchId { id: 7, nonce: 0 } => pulled_txns.insert(0), + _ => panic!("Unexpected batch id"), + }; + } + assert!(pulled_txns.len() == 4); + assert!( + proof_queue + .pull_proofs(&hashset![info_0], 8, 4, 400, true) + .0 + .len() + == 7 + ); +} diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 5b52e97a9ee68..ba349f552d82d 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -389,12 +389,12 @@ impl ProofQueue { let mut excluded_txns = 0; let mut full = false; // Set of all the excluded transactions and all the transactions included in the result - let mut included_and_excluded_txns = HashSet::new(); + let mut filtered_txns = HashSet::new(); for batch_info in excluded_batches { let batch_key = BatchKey::from_info(batch_info); if let Some(txn_summaries) = self.batch_to_txn_summaries.get(&batch_key) { for txn_summary in txn_summaries { - included_and_excluded_txns.insert(*txn_summary); + filtered_txns.insert(*txn_summary); } } } @@ -420,9 +420,7 @@ impl ProofQueue { cur_unique_txns + txn_summaries .iter() - .filter(|txn_summary| { - !included_and_excluded_txns.contains(txn_summary) - }) + .filter(|txn_summary| !filtered_txns.contains(txn_summary)) .count() as u64 } else { cur_unique_txns + batch.num_txns() @@ -437,7 +435,7 @@ impl ProofQueue { } cur_bytes += batch.num_bytes(); cur_all_txns += batch.num_txns(); - // Add this batch to included_and_excluded_txns and calculate the number of + // Add this batch to filtered_txns and calculate the number of // unique transactions added in the result so far. cur_unique_txns += self .batch_to_txn_summaries @@ -445,7 +443,7 @@ impl ProofQueue { .map_or(batch.num_txns(), |summaries| { summaries .iter() - .filter(|summary| included_and_excluded_txns.insert(**summary)) + .filter(|summary| filtered_txns.insert(**summary)) .count() as u64 }); let bucket = proof.gas_bucket_start(); @@ -479,7 +477,8 @@ impl ProofQueue { if full || return_non_full { counters::BLOCK_SIZE_WHEN_PULL.observe(cur_unique_txns as f64); counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(cur_all_txns as f64); - counters::EXTRA_TXNS_WHEN_PULL.observe((cur_all_txns - cur_unique_txns) as f64); + counters::KNOWN_DUPLICATE_TXNS_WHEN_PULL + .observe((cur_all_txns - cur_unique_txns) as f64); counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); From c428b1cc169f0a85b0dfb62580f7cd7843ded2bc Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 27 Jun 2024 10:33:31 -0700 Subject: [PATCH 55/67] Minor fix in proof manager --- consensus/src/quorum_store/proof_manager.rs | 9 ++++++--- consensus/src/quorum_store/tests/utils.rs | 7 +++++-- consensus/src/quorum_store/utils.rs | 8 +++++--- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index cfa9760a2a888..d78eaae987d9f 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -221,7 +221,7 @@ impl ProofManager { PayloadFilter::InQuorumStore(proofs) => proofs, }; - let (proof_block, proof_queue_fully_utilized) = + let (proof_block, cur_unique_txns, proof_queue_fully_utilized) = self.proofs_for_consensus.pull_proofs( &excluded_batches, max_txns, @@ -235,12 +235,15 @@ impl ProofManager { .observe(if proof_queue_fully_utilized { 1.0 } else { 0.0 }); let mut inline_block: Vec<(BatchInfo, Vec)> = vec![]; - let cur_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); + let cur_all_txns: u64 = proof_block.iter().map(|p| p.num_txns()).sum(); let cur_bytes: u64 = proof_block.iter().map(|p| p.num_bytes()).sum(); if self.allow_batches_without_pos_in_proposal && proof_queue_fully_utilized { inline_block = self.batch_queue.pull_batches( - min(max_unique_txns - cur_txns, max_inline_txns), + min( + min(max_txns - cur_all_txns, max_unique_txns - cur_unique_txns), + max_inline_txns, + ), min(max_bytes - cur_bytes, max_inline_bytes), excluded_batches .iter() diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index c24247efd377d..e88fcb80f9707 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -56,7 +56,7 @@ fn test_proof_queue_sorting() { } // Expect: [600, 300] - let (pulled, _) = proof_queue.pull_proofs(&hashset![], 4, 2, 2, true); + let (pulled, num_unique_txns, _) = proof_queue.pull_proofs(&hashset![], 4, 2, 2, true); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; @@ -75,9 +75,10 @@ fn test_proof_queue_sorting() { } assert_eq!(count_author_0, 1); assert_eq!(count_author_1, 1); + assert_eq!(num_unique_txns, 2); // Expect: [600, 500, 300, 100] - let (pulled, _) = proof_queue.pull_proofs(&hashset![], 6, 4, 4, true); + let (pulled, num_unique_txns, _) = proof_queue.pull_proofs(&hashset![], 6, 4, 4, true); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; @@ -94,6 +95,7 @@ fn test_proof_queue_sorting() { } prev = Some(batch); } + assert_eq!(num_unique_txns, 4); assert_eq!(count_author_0, 2); assert_eq!(count_author_1, 2); } @@ -226,6 +228,7 @@ fn test_proof_pull_proofs_with_duplicates() { }; } assert!(pulled_txns.len() == 4); + assert!(result.1 == 4); assert!( proof_queue .pull_proofs(&hashset![info_0], 8, 4, 400, true) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index ba349f552d82d..a8db34268f6a0 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -374,6 +374,8 @@ impl ProofQueue { // The flag in the second return argument is true iff the entire proof queue is fully utilized // when pulling the proofs. If any proof from proof queue cannot be included due to size limits, // this flag is set false. + // Returns the proofs, the number of unique transactions in the proofs, and a flag indicating + // whether the proof queue is fully utilized. pub(crate) fn pull_proofs( &mut self, excluded_batches: &HashSet, @@ -381,7 +383,7 @@ impl ProofQueue { max_unique_txns: u64, max_bytes: u64, return_non_full: bool, - ) -> (Vec, bool) { + ) -> (Vec, u64, bool) { let mut ret = vec![]; let mut cur_bytes = 0; let mut cur_unique_txns = 0; @@ -486,9 +488,9 @@ impl ProofQueue { self.log_remaining_data_after_pull(excluded_batches, &ret); // Stable sort, so the order of proofs within an author will not change. ret.sort_by_key(|proof| Reverse(proof.gas_bucket_start())); - (ret, !full) + (ret, cur_unique_txns, !full) } else { - (Vec::new(), !full) + (Vec::new(), 0, !full) } } From 2712880ddb3d9f313636ce6edc463811bd1ead2d Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 27 Jun 2024 13:33:09 -0700 Subject: [PATCH 56/67] Use saturating_sub --- consensus/src/quorum_store/proof_manager.rs | 5 ++++- consensus/src/quorum_store/utils.rs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index d78eaae987d9f..89203f7e0a167 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -241,7 +241,10 @@ impl ProofManager { if self.allow_batches_without_pos_in_proposal && proof_queue_fully_utilized { inline_block = self.batch_queue.pull_batches( min( - min(max_txns - cur_all_txns, max_unique_txns - cur_unique_txns), + min( + max_txns.saturating_sub(cur_all_txns), + max_unique_txns.saturating_sub(cur_unique_txns), + ), max_inline_txns, ), min(max_bytes - cur_bytes, max_inline_bytes), diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index a8db34268f6a0..0f772c78ccf13 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -480,7 +480,7 @@ impl ProofQueue { counters::BLOCK_SIZE_WHEN_PULL.observe(cur_unique_txns as f64); counters::TOTAL_BLOCK_SIZE_WHEN_PULL.observe(cur_all_txns as f64); counters::KNOWN_DUPLICATE_TXNS_WHEN_PULL - .observe((cur_all_txns - cur_unique_txns) as f64); + .observe((cur_all_txns.saturating_sub(cur_unique_txns)) as f64); counters::BLOCK_BYTES_WHEN_PULL.observe(cur_bytes as f64); counters::PROOF_SIZE_WHEN_PULL.observe(ret.len() as f64); counters::EXCLUDED_TXNS_WHEN_PULL.observe(excluded_txns as f64); From 0a50de409777a33a1122073806d689106afbd74d Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Thu, 27 Jun 2024 14:28:15 -0700 Subject: [PATCH 57/67] Exclude expired transactions when counting block size --- consensus/consensus-types/src/common.rs | 30 +++++++++++++++++++ .../src/quorum_store/batch_coordinator.rs | 2 +- consensus/src/quorum_store/proof_manager.rs | 6 ++-- consensus/src/quorum_store/tests/utils.rs | 22 ++++++++------ consensus/src/quorum_store/types.rs | 7 +++-- consensus/src/quorum_store/utils.rs | 16 ++++++---- 6 files changed, 61 insertions(+), 22 deletions(-) diff --git a/consensus/consensus-types/src/common.rs b/consensus/consensus-types/src/common.rs index 4d0be0f31de79..ebb1d6e054ff7 100644 --- a/consensus/consensus-types/src/common.rs +++ b/consensus/consensus-types/src/common.rs @@ -51,6 +51,36 @@ impl fmt::Display for TransactionSummary { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize, Hash, Ord, PartialOrd)] +pub struct TransactionSynopsis { + pub sender: AccountAddress, + pub sequence_number: u64, + pub expiration_timestamp_secs: u64, + pub hash: HashValue, +} + +impl TransactionSynopsis { + pub fn new( + sender: AccountAddress, + sequence_number: u64, + expiration_timestamp_secs: u64, + hash: HashValue, + ) -> Self { + Self { + sender, + sequence_number, + expiration_timestamp_secs, + hash, + } + } +} + +impl fmt::Display for TransactionSynopsis { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.sender, self.sequence_number,) + } +} + #[derive(Clone)] pub struct TransactionInProgress { pub gas_unit_price: u64, diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index 6bc8f2fc15eb5..aa6905b43041f 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -79,7 +79,7 @@ impl BatchCoordinator { .map(|persisted_value| { ( persisted_value.batch_info().clone(), - persisted_value.summary(), + persisted_value.synopsis(), ) }) .collect(); diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index 89203f7e0a167..b2a73dc4550e9 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -11,7 +11,7 @@ use crate::{ }, }; use aptos_consensus_types::{ - common::{Payload, PayloadFilter, ProofWithData, TransactionSummary}, + common::{Payload, PayloadFilter, ProofWithData, TransactionSynopsis}, proof_of_store::{BatchInfo, ProofOfStore, ProofOfStoreMsg}, request_response::{GetPayloadCommand, GetPayloadResponse}, }; @@ -29,7 +29,7 @@ use std::{ #[derive(Debug)] pub enum ProofManagerCommand { ReceiveProofs(ProofOfStoreMsg), - ReceiveBatches(Vec<(BatchInfo, Vec)>), + ReceiveBatches(Vec<(BatchInfo, Vec)>), CommitNotification(u64, Vec), Shutdown(tokio::sync::oneshot::Sender<()>), } @@ -168,7 +168,7 @@ impl ProofManager { pub(crate) fn receive_batches( &mut self, - batch_summaries: Vec<(BatchInfo, Vec)>, + batch_summaries: Vec<(BatchInfo, Vec)>, ) { if self.allow_batches_without_pos_in_proposal { let batches = batch_summaries diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index e88fcb80f9707..638a87d0152e4 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -3,7 +3,7 @@ use crate::quorum_store::utils::ProofQueue; use aptos_consensus_types::{ - common::TransactionSummary, + common::TransactionSynopsis, proof_of_store::{BatchId, BatchInfo, ProofOfStore}, }; use aptos_crypto::HashValue; @@ -116,9 +116,10 @@ fn test_proof_calculate_remaining_txns_and_proofs() { ]; let info_1 = author_0_batches[0].info().clone(); let info_2 = author_0_batches[3].info().clone(); - proof_queue.add_batch_summaries(vec![(info_1, vec![TransactionSummary::new( + proof_queue.add_batch_summaries(vec![(info_1, vec![TransactionSynopsis::new( PeerId::ONE, 1, + 10, HashValue::zero(), )])]); for batch in author_0_batches { @@ -138,25 +139,28 @@ fn test_proof_calculate_remaining_txns_and_proofs() { } assert_eq!(proof_queue.remaining_txns_and_proofs(), (8, 8)); - proof_queue.add_batch_summaries(vec![(info_3, vec![TransactionSummary::new( + proof_queue.add_batch_summaries(vec![(info_3, vec![TransactionSynopsis::new( PeerId::ONE, 1, + 10, HashValue::zero(), )])]); assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); - proof_queue.add_batch_summaries(vec![(info_2, vec![TransactionSummary::new( + proof_queue.add_batch_summaries(vec![(info_2, vec![TransactionSynopsis::new( PeerId::ONE, 2, + 10, HashValue::zero(), )])]); assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); - proof_queue.add_batch_summaries(vec![(info_4, vec![TransactionSummary::new( + proof_queue.add_batch_summaries(vec![(info_4, vec![TransactionSynopsis::new( PeerId::ONE, 2, + 10, HashValue::zero(), )])]); @@ -169,10 +173,10 @@ fn test_proof_pull_proofs_with_duplicates() { let mut proof_queue = ProofQueue::new(my_peer_id); let txns = vec![ - TransactionSummary::new(PeerId::ONE, 0, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 1, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 2, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 3, HashValue::zero()), + TransactionSynopsis::new(PeerId::ONE, 0, 10, HashValue::zero()), + TransactionSynopsis::new(PeerId::ONE, 1, 10, HashValue::zero()), + TransactionSynopsis::new(PeerId::ONE, 2, 10, HashValue::zero()), + TransactionSynopsis::new(PeerId::ONE, 3, 10, HashValue::zero()), ]; let author_0 = PeerId::random(); diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 65c01b839e424..26b0ebafb2c92 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -3,7 +3,7 @@ use anyhow::ensure; use aptos_consensus_types::{ - common::{BatchPayload, TransactionSummary}, + common::{BatchPayload, TransactionSynopsis}, proof_of_store::{BatchId, BatchInfo}, }; use aptos_crypto::{hash::CryptoHash, HashValue}; @@ -58,14 +58,15 @@ impl PersistedValue { &self.maybe_payload } - pub fn summary(&self) -> Vec { + pub fn synopsis(&self) -> Vec { if let Some(payload) = &self.maybe_payload { return payload .iter() .map(|txn| { - TransactionSummary::new( + TransactionSynopsis::new( txn.sender(), txn.sequence_number(), + txn.expiration_timestamp_secs(), txn.committed_hash(), ) }) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 0f772c78ccf13..bdc65fdc9800e 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -3,7 +3,7 @@ use crate::{monitor, quorum_store::counters}; use aptos_consensus_types::{ - common::{TransactionInProgress, TransactionSummary}, + common::{TransactionInProgress, TransactionSummary, TransactionSynopsis}, proof_of_store::{BatchId, BatchInfo, ProofOfStore}, }; use aptos_logger::prelude::*; @@ -198,11 +198,11 @@ pub struct ProofQueue { author_to_batches: HashMap>, // ProofOfStore and insertion_time. None if committed batch_to_proof: HashMap>, - // Map of txn_summary = (sender, sequence number, hash) to all the batches that contain + // Map of txn_summary = (sender, sequence number, expiration_timestamp_secs, hash) to all the batches that contain // the transaction. This helps in counting the number of unique transactions in the pipeline. - txn_summary_to_batches: HashMap>, + txn_summary_to_batches: HashMap>, // List of transaction summaries for each batch - batch_to_txn_summaries: HashMap>, + batch_to_txn_summaries: HashMap>, // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, @@ -317,7 +317,7 @@ impl ProofQueue { pub(crate) fn add_batch_summaries( &mut self, - batch_summaries: Vec<(BatchInfo, Vec)>, + batch_summaries: Vec<(BatchInfo, Vec)>, ) { let start = Instant::now(); for (batch_info, txn_summaries) in batch_summaries { @@ -422,7 +422,11 @@ impl ProofQueue { cur_unique_txns + txn_summaries .iter() - .filter(|txn_summary| !filtered_txns.contains(txn_summary)) + .filter(|txn_summary| { + !filtered_txns.contains(txn_summary) + && txn_summary.expiration_timestamp_secs + > self.latest_block_timestamp + }) .count() as u64 } else { cur_unique_txns + batch.num_txns() From a2a1c3865e859d2874956f84eb82e49a35796434 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 1 Jul 2024 14:17:38 -0700 Subject: [PATCH 58/67] Minor fix --- consensus/src/quorum_store/utils.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index d6dbc97116168..6feb044e57406 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -198,8 +198,8 @@ pub struct ProofQueue { author_to_batches: HashMap>, // ProofOfStore and insertion_time. None if committed batch_to_proof: HashMap>, - // Number of batches in which the txn_summary = (sender, sequence number, hash) has been included - txn_summary_num_occurrences: HashMap, + // Number of batches in which the txn_synopsis = (sender, sequence number, hash, expiration) has been included + txn_summary_num_occurrences: HashMap, // List of transaction summaries for each batch batch_to_txn_summaries: HashMap>, // Expiration index From 12ec7b0c0317b27d1b19ed248a688901ee83f618 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Mon, 1 Jul 2024 14:34:37 -0700 Subject: [PATCH 59/67] Addressing PR comments --- consensus/consensus-types/src/common.rs | 6 +++--- consensus/src/quorum_store/proof_manager.rs | 6 +++--- consensus/src/quorum_store/tests/utils.rs | 18 +++++++++--------- consensus/src/quorum_store/types.rs | 6 +++--- consensus/src/quorum_store/utils.rs | 8 ++++---- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/consensus/consensus-types/src/common.rs b/consensus/consensus-types/src/common.rs index ebb1d6e054ff7..69f0f16bd4379 100644 --- a/consensus/consensus-types/src/common.rs +++ b/consensus/consensus-types/src/common.rs @@ -52,14 +52,14 @@ impl fmt::Display for TransactionSummary { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize, Hash, Ord, PartialOrd)] -pub struct TransactionSynopsis { +pub struct TxnSummaryWithExpiration { pub sender: AccountAddress, pub sequence_number: u64, pub expiration_timestamp_secs: u64, pub hash: HashValue, } -impl TransactionSynopsis { +impl TxnSummaryWithExpiration { pub fn new( sender: AccountAddress, sequence_number: u64, @@ -75,7 +75,7 @@ impl TransactionSynopsis { } } -impl fmt::Display for TransactionSynopsis { +impl fmt::Display for TxnSummaryWithExpiration { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}", self.sender, self.sequence_number,) } diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index b2a73dc4550e9..adbde9d855aaa 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -11,7 +11,7 @@ use crate::{ }, }; use aptos_consensus_types::{ - common::{Payload, PayloadFilter, ProofWithData, TransactionSynopsis}, + common::{Payload, PayloadFilter, ProofWithData, TxnSummaryWithExpiration}, proof_of_store::{BatchInfo, ProofOfStore, ProofOfStoreMsg}, request_response::{GetPayloadCommand, GetPayloadResponse}, }; @@ -29,7 +29,7 @@ use std::{ #[derive(Debug)] pub enum ProofManagerCommand { ReceiveProofs(ProofOfStoreMsg), - ReceiveBatches(Vec<(BatchInfo, Vec)>), + ReceiveBatches(Vec<(BatchInfo, Vec)>), CommitNotification(u64, Vec), Shutdown(tokio::sync::oneshot::Sender<()>), } @@ -168,7 +168,7 @@ impl ProofManager { pub(crate) fn receive_batches( &mut self, - batch_summaries: Vec<(BatchInfo, Vec)>, + batch_summaries: Vec<(BatchInfo, Vec)>, ) { if self.allow_batches_without_pos_in_proposal { let batches = batch_summaries diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 638a87d0152e4..0a7dfe2fa4cbd 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -3,7 +3,7 @@ use crate::quorum_store::utils::ProofQueue; use aptos_consensus_types::{ - common::TransactionSynopsis, + common::TxnSummaryWithExpiration, proof_of_store::{BatchId, BatchInfo, ProofOfStore}, }; use aptos_crypto::HashValue; @@ -116,7 +116,7 @@ fn test_proof_calculate_remaining_txns_and_proofs() { ]; let info_1 = author_0_batches[0].info().clone(); let info_2 = author_0_batches[3].info().clone(); - proof_queue.add_batch_summaries(vec![(info_1, vec![TransactionSynopsis::new( + proof_queue.add_batch_summaries(vec![(info_1, vec![TxnSummaryWithExpiration::new( PeerId::ONE, 1, 10, @@ -139,7 +139,7 @@ fn test_proof_calculate_remaining_txns_and_proofs() { } assert_eq!(proof_queue.remaining_txns_and_proofs(), (8, 8)); - proof_queue.add_batch_summaries(vec![(info_3, vec![TransactionSynopsis::new( + proof_queue.add_batch_summaries(vec![(info_3, vec![TxnSummaryWithExpiration::new( PeerId::ONE, 1, 10, @@ -148,7 +148,7 @@ fn test_proof_calculate_remaining_txns_and_proofs() { assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); - proof_queue.add_batch_summaries(vec![(info_2, vec![TransactionSynopsis::new( + proof_queue.add_batch_summaries(vec![(info_2, vec![TxnSummaryWithExpiration::new( PeerId::ONE, 2, 10, @@ -157,7 +157,7 @@ fn test_proof_calculate_remaining_txns_and_proofs() { assert_eq!(proof_queue.remaining_txns_and_proofs(), (7, 8)); - proof_queue.add_batch_summaries(vec![(info_4, vec![TransactionSynopsis::new( + proof_queue.add_batch_summaries(vec![(info_4, vec![TxnSummaryWithExpiration::new( PeerId::ONE, 2, 10, @@ -173,10 +173,10 @@ fn test_proof_pull_proofs_with_duplicates() { let mut proof_queue = ProofQueue::new(my_peer_id); let txns = vec![ - TransactionSynopsis::new(PeerId::ONE, 0, 10, HashValue::zero()), - TransactionSynopsis::new(PeerId::ONE, 1, 10, HashValue::zero()), - TransactionSynopsis::new(PeerId::ONE, 2, 10, HashValue::zero()), - TransactionSynopsis::new(PeerId::ONE, 3, 10, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 0, 10, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 1, 10, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 2, 10, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 3, 10, HashValue::zero()), ]; let author_0 = PeerId::random(); diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 26b0ebafb2c92..6eafe54707eaa 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -3,7 +3,7 @@ use anyhow::ensure; use aptos_consensus_types::{ - common::{BatchPayload, TransactionSynopsis}, + common::{BatchPayload, TxnSummaryWithExpiration}, proof_of_store::{BatchId, BatchInfo}, }; use aptos_crypto::{hash::CryptoHash, HashValue}; @@ -58,12 +58,12 @@ impl PersistedValue { &self.maybe_payload } - pub fn synopsis(&self) -> Vec { + pub fn synopsis(&self) -> Vec { if let Some(payload) = &self.maybe_payload { return payload .iter() .map(|txn| { - TransactionSynopsis::new( + TxnSummaryWithExpiration::new( txn.sender(), txn.sequence_number(), txn.expiration_timestamp_secs(), diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 6feb044e57406..db2e70eda91d5 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -3,7 +3,7 @@ use crate::{monitor, quorum_store::counters}; use aptos_consensus_types::{ - common::{TransactionInProgress, TransactionSummary, TransactionSynopsis}, + common::{TransactionInProgress, TransactionSummary, TxnSummaryWithExpiration}, proof_of_store::{BatchId, BatchInfo, ProofOfStore}, }; use aptos_logger::prelude::*; @@ -199,9 +199,9 @@ pub struct ProofQueue { // ProofOfStore and insertion_time. None if committed batch_to_proof: HashMap>, // Number of batches in which the txn_synopsis = (sender, sequence number, hash, expiration) has been included - txn_summary_num_occurrences: HashMap, + txn_summary_num_occurrences: HashMap, // List of transaction summaries for each batch - batch_to_txn_summaries: HashMap>, + batch_to_txn_summaries: HashMap>, // Expiration index expirations: TimeExpirations, latest_block_timestamp: u64, @@ -301,7 +301,7 @@ impl ProofQueue { pub(crate) fn add_batch_summaries( &mut self, - batch_summaries: Vec<(BatchInfo, Vec)>, + batch_summaries: Vec<(BatchInfo, Vec)>, ) { let start = Instant::now(); for (batch_info, txn_summaries) in batch_summaries { From fe057ff1a3d540756656cedefd1bdb4d773f49c8 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 2 Jul 2024 01:28:23 -0700 Subject: [PATCH 60/67] Minor fix --- consensus/src/quorum_store/utils.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index b35a2ea8a2fb5..249b933b4f3ab 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -439,7 +439,11 @@ impl ProofQueue { .map_or(batch.num_txns(), |summaries| { summaries .iter() - .filter(|summary| filtered_txns.insert(**summary)) + .filter(|summary| { + filtered_txns.insert(**summary) + && summary.expiration_timestamp_secs + > self.latest_block_timestamp + }) .count() as u64 }); let bucket = proof.gas_bucket_start(); From 1d6ace568211272c5115a770e685c1ffb065e21c Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 9 Jul 2024 16:11:19 -0700 Subject: [PATCH 61/67] Change the expiration units --- consensus/src/quorum_store/utils.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 249b933b4f3ab..96a2c39786b11 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -414,7 +414,9 @@ impl ProofQueue { .iter() .filter(|txn_summary| { !filtered_txns.contains(txn_summary) - && txn_summary.expiration_timestamp_secs + // latest_block_timestamp is microseonds since UNIX epoch + // expiration_timestamp_secs is seconds since UNIX epoch + && (txn_summary.expiration_timestamp_secs * 1_000_000) > self.latest_block_timestamp }) .count() as u64 @@ -441,7 +443,7 @@ impl ProofQueue { .iter() .filter(|summary| { filtered_txns.insert(**summary) - && summary.expiration_timestamp_secs + && (summary.expiration_timestamp_secs * 1_000_000) > self.latest_block_timestamp }) .count() as u64 From fcbf70c50be0de44dfac8a28a432d319357f8f98 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 9 Jul 2024 17:41:38 -0700 Subject: [PATCH 62/67] Fixing unit tests --- consensus/src/quorum_store/tests/utils.rs | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 7a4f36cc12cd1..f6670c15bb31e 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -110,13 +110,14 @@ fn test_proof_calculate_remaining_txns_and_proofs() { let my_peer_id = PeerId::random(); let mut proof_queue = ProofQueue::new(my_peer_id); let now = aptos_infallible::duration_since_epoch().as_micros() as u64; + let now_in_secs = aptos_infallible::duration_since_epoch().as_secs() as u64; let author_0 = PeerId::random(); let author_1 = PeerId::random(); let txns = vec![ - TransactionSummary::new(PeerId::ONE, 0, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 1, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 2, HashValue::zero()), - TransactionSummary::new(PeerId::ONE, 3, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 0, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 1, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 2, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 3, now_in_secs + 1, HashValue::zero()), ]; let author_0_batches = vec![ @@ -312,22 +313,22 @@ fn test_proof_pull_proofs_with_duplicates() { let my_peer_id = PeerId::random(); let mut proof_queue = ProofQueue::new(my_peer_id); let now = aptos_infallible::duration_since_epoch().as_micros() as u64; - + let now_in_secs = aptos_infallible::duration_since_epoch().as_secs() as u64; let txns = vec![ - TxnSummaryWithExpiration::new(PeerId::ONE, 0, 10, HashValue::zero()), - TxnSummaryWithExpiration::new(PeerId::ONE, 1, 10, HashValue::zero()), - TxnSummaryWithExpiration::new(PeerId::ONE, 2, 10, HashValue::zero()), - TxnSummaryWithExpiration::new(PeerId::ONE, 3, 10, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 0, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 1, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 2, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 3, now_in_secs + 1, HashValue::zero()), ]; let author_0 = PeerId::random(); let author_1 = PeerId::random(); let author_0_batches = vec![ - proof_of_store(author_0, BatchId::new_for_test(0), 100, now + 5000), - proof_of_store(author_0, BatchId::new_for_test(1), 200, now + 5000), - proof_of_store(author_0, BatchId::new_for_test(2), 50, now + 5000), - proof_of_store(author_0, BatchId::new_for_test(3), 300, now + 5000), + proof_of_store(author_0, BatchId::new_for_test(0), 100, now + 1_000_000), + proof_of_store(author_0, BatchId::new_for_test(1), 200, now + 2_000_000), + proof_of_store(author_0, BatchId::new_for_test(2), 50, now + 3_000_000), + proof_of_store(author_0, BatchId::new_for_test(3), 300, now + 2_000_000), ]; let info_0 = author_0_batches[0].info().clone(); proof_queue.add_batch_summaries(vec![(author_0_batches[0].info().clone(), vec![txns[0]])]); @@ -335,6 +336,7 @@ fn test_proof_pull_proofs_with_duplicates() { proof_queue.add_batch_summaries(vec![(author_0_batches[2].info().clone(), vec![txns[2]])]); proof_queue.add_batch_summaries(vec![(author_0_batches[3].info().clone(), vec![txns[0]])]); + proof_queue.push(batch); for batch in author_0_batches { proof_queue.push(batch); } From cfad57f567efea0a8012a062b26879054f267b9b Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 9 Jul 2024 22:10:05 -0700 Subject: [PATCH 63/67] Update unit tests --- consensus/src/quorum_store/tests/utils.rs | 239 +++++++++++++++++----- consensus/src/quorum_store/utils.rs | 10 +- 2 files changed, 201 insertions(+), 48 deletions(-) diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index f6670c15bb31e..5a11e84e63f1f 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -109,8 +109,8 @@ fn test_proof_queue_sorting() { fn test_proof_calculate_remaining_txns_and_proofs() { let my_peer_id = PeerId::random(); let mut proof_queue = ProofQueue::new(my_peer_id); - let now = aptos_infallible::duration_since_epoch().as_micros() as u64; let now_in_secs = aptos_infallible::duration_since_epoch().as_secs() as u64; + let now_in_usecs = aptos_infallible::duration_since_epoch().as_micros() as u64; let author_0 = PeerId::random(); let author_1 = PeerId::random(); let txns = vec![ @@ -121,17 +121,47 @@ fn test_proof_calculate_remaining_txns_and_proofs() { ]; let author_0_batches = vec![ - proof_of_store(author_0, BatchId::new_for_test(0), 100, now + 50000), - proof_of_store(author_0, BatchId::new_for_test(1), 200, now + 70000), - proof_of_store(author_0, BatchId::new_for_test(2), 50, now + 20000), - proof_of_store(author_0, BatchId::new_for_test(3), 300, now + 10000), + proof_of_store( + author_0, + BatchId::new_for_test(0), + 100, + now_in_usecs + 50000, + ), + proof_of_store( + author_0, + BatchId::new_for_test(1), + 200, + now_in_usecs + 70000, + ), + proof_of_store(author_0, BatchId::new_for_test(2), 50, now_in_usecs + 20000), + proof_of_store( + author_0, + BatchId::new_for_test(3), + 300, + now_in_usecs + 10000, + ), ]; let author_1_batches = vec![ - proof_of_store(author_1, BatchId::new_for_test(4), 500, now + 20000), - proof_of_store(author_1, BatchId::new_for_test(5), 400, now + 30000), - proof_of_store(author_1, BatchId::new_for_test(6), 600, now + 50000), - proof_of_store(author_1, BatchId::new_for_test(7), 50, now + 60000), + proof_of_store( + author_1, + BatchId::new_for_test(4), + 500, + now_in_usecs + 20000, + ), + proof_of_store( + author_1, + BatchId::new_for_test(5), + 400, + now_in_usecs + 30000, + ), + proof_of_store( + author_1, + BatchId::new_for_test(6), + 600, + now_in_usecs + 50000, + ), + proof_of_store(author_1, BatchId::new_for_test(7), 50, now_in_usecs + 60000), ]; let info_1 = author_0_batches[0].info().clone(); @@ -146,30 +176,35 @@ fn test_proof_calculate_remaining_txns_and_proofs() { proof_queue.add_batch_summaries(vec![(info_1.clone(), vec![txns[0]])]); // batch_summaries: [1 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (0, 0)); + assert_eq!(proof_queue.batch_summaries_len(), 1); proof_queue.push(author_0_batches[0].clone()); // txns: [txn_0] // proofs: [1] // batch_summaries: [1 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (1, 1)); + assert_eq!(proof_queue.batch_summaries_len(), 1); proof_queue.push(author_0_batches[1].clone()); // txns: [txn_0] + txns(proof_2) // proofs: [1, 2] // batch_summaries: [1 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 1); proof_queue.add_batch_summaries(vec![(info_2, vec![txns[1]])]); // txns: [txn_0, txn_1] // proofs: [1, 2] // batch_summaries: [1 -> txn_0, 2 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 2); proof_queue.add_batch_summaries(vec![(info_3.clone(), vec![txns[0]])]); // txns: [txn_0, txn_1] // proofs: [1, 2] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 3); // Adding the batch again shouldn't have an effect proof_queue.add_batch_summaries(vec![(info_3.clone(), vec![txns[0]])]); @@ -177,12 +212,14 @@ fn test_proof_calculate_remaining_txns_and_proofs() { // proofs: [1, 2] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 3); proof_queue.push(author_0_batches[2].clone()); // txns: [txn_0, txn_1] // proofs: [1, 2, 3] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 3)); + assert_eq!(proof_queue.batch_summaries_len(), 3); // Adding the batch again shouldn't have an effect proof_queue.add_batch_summaries(vec![(info_3.clone(), vec![txns[0]])]); @@ -190,49 +227,57 @@ fn test_proof_calculate_remaining_txns_and_proofs() { // proofs: [1, 2, 3] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 3)); + assert_eq!(proof_queue.batch_summaries_len(), 3); proof_queue.push(author_1_batches[0].clone()); // txns: [txn_0, txn_1] + txns(proof_5) // proofs: [1, 2, 3, 5] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (3, 4)); + assert_eq!(proof_queue.batch_summaries_len(), 3); proof_queue.add_batch_summaries(vec![(info_5, vec![txns[1]])]); // txns: [txn_0, txn_1] // proofs: [1, 2, 3, 5] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0, 5 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 4)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.add_batch_summaries(vec![(info_4, vec![txns[2]])]); // txns: [txn_0, txn_1] // proofs: [1, 2, 3, 5] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0, 4 -> txn_2, 5 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 4)); + assert_eq!(proof_queue.batch_summaries_len(), 5); proof_queue.push(author_0_batches[3].clone()); // txns: [txn_0, txn_1, txn_2] // proofs: [1, 2, 3, 4, 5] // batch_summaries: [1 -> txn_0, 2 -> txn_1, 3 -> txn_0, 4 -> txn_2, 5 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (3, 5)); + assert_eq!(proof_queue.batch_summaries_len(), 5); proof_queue.mark_committed(vec![info_1.clone()]); // txns: [txn_0, txn_1, txn_2] // proofs: [2, 3, 4, 5] // batch_summaries: [2 -> txn_1, 3 -> txn_0, 4 -> txn_2, 5 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (3, 4)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.push(author_1_batches[1].clone()); // txns: [txn_0, txn_1, txn_2] + txns(proof_6) // proofs: [2, 3, 4, 5, 6] // batch_summaries: [2 -> txn_1, 3 -> txn_0, 4 -> txn_2, 5 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (4, 5)); + assert_eq!(proof_queue.batch_summaries_len(), 4); - proof_queue.handle_updated_block_timestamp(now + 20000); + proof_queue.handle_updated_block_timestamp(now_in_usecs + 20000); // Expires info_3, info_4, info_5 // txns: [txn_1] + txns(proof_6) // proofs: [2, 6] // batch_summaries: [2 -> txn_1] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 1); // Adding an expired batch again proof_queue.add_batch_summaries(vec![(info_3, vec![txns[0]])]); @@ -240,6 +285,7 @@ fn test_proof_calculate_remaining_txns_and_proofs() { // proofs: [2, 6] // batch_summaries: [2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 2); // Adding an expired proof again. Should have no effect proof_queue.push(author_0_batches[2].clone()); @@ -247,106 +293,159 @@ fn test_proof_calculate_remaining_txns_and_proofs() { // proofs: [2, 6] // batch_summaries: [2 -> txn_1, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 2); proof_queue.add_batch_summaries(vec![(info_7, vec![txns[3]])]); // txns: [txn_1] + txns(proof_6) // proofs: [2, 6] // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 3); - proof_queue.handle_updated_block_timestamp(now + 30000); + proof_queue.handle_updated_block_timestamp(now_in_usecs + 30000); // Expires info_6 // txns: [txn_1] // proofs: [2] // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (1, 1)); + assert_eq!(proof_queue.batch_summaries_len(), 3); proof_queue.add_batch_summaries(vec![(info_6, vec![txns[0]])]); // Expired batch not added to batch summaries // txns: [txn_1] // proofs: [2] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (1, 1)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.push(author_1_batches[2].clone()); // txns: [txn_1, txn_3] // proofs: [2, 7] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.push(author_1_batches[3].clone()); // txns: [txn_1, txn_3] + txns(proof_8) // proofs: [2, 7, 8] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (3, 3)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.mark_committed(vec![info_8.clone()]); // txns: [txn_1, txn_3] // proofs: [2, 7] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 4); proof_queue.add_batch_summaries(vec![(info_8, vec![txns[0]])]); // Committed batch not added to batch summaries // txns: [txn_1, txn_3] // proofs: [2, 7] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0, 8 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 5); proof_queue.push(author_1_batches[3].clone()); // Committed proof added again. Should have no effect // txns: [txn_1, txn_3] // proofs: [2, 7, 8] - // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0] + // batch_summaries: [2 -> txn_1, 7 -> txn_3, 3 -> txn_0, 6 -> txn_0, 8 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (2, 2)); + assert_eq!(proof_queue.batch_summaries_len(), 5); - proof_queue.handle_updated_block_timestamp(now + 70000); + proof_queue.handle_updated_block_timestamp(now_in_usecs + 70000); // Expires info_2, info_7 // txns: [] // proofs: [] - // batch_summaries: [3 -> txn_0] + // batch_summaries: [3 -> txn_0, 6 -> txn_0, 8 -> txn_0] assert_eq!(proof_queue.remaining_txns_and_proofs(), (0, 0)); + assert_eq!(proof_queue.batch_summaries_len(), 3); } #[test] fn test_proof_pull_proofs_with_duplicates() { let my_peer_id = PeerId::random(); let mut proof_queue = ProofQueue::new(my_peer_id); - let now = aptos_infallible::duration_since_epoch().as_micros() as u64; let now_in_secs = aptos_infallible::duration_since_epoch().as_secs() as u64; + let now_in_usecs = now_in_secs * 1_000_000; let txns = vec![ - TxnSummaryWithExpiration::new(PeerId::ONE, 0, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 0, now_in_secs + 2, HashValue::zero()), TxnSummaryWithExpiration::new(PeerId::ONE, 1, now_in_secs + 1, HashValue::zero()), - TxnSummaryWithExpiration::new(PeerId::ONE, 2, now_in_secs + 1, HashValue::zero()), - TxnSummaryWithExpiration::new(PeerId::ONE, 3, now_in_secs + 1, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 2, now_in_secs + 3, HashValue::zero()), + TxnSummaryWithExpiration::new(PeerId::ONE, 3, now_in_secs + 4, HashValue::zero()), ]; let author_0 = PeerId::random(); let author_1 = PeerId::random(); let author_0_batches = vec![ - proof_of_store(author_0, BatchId::new_for_test(0), 100, now + 1_000_000), - proof_of_store(author_0, BatchId::new_for_test(1), 200, now + 2_000_000), - proof_of_store(author_0, BatchId::new_for_test(2), 50, now + 3_000_000), - proof_of_store(author_0, BatchId::new_for_test(3), 300, now + 2_000_000), + proof_of_store( + author_0, + BatchId::new_for_test(0), + 100, + now_in_usecs + 1_100_000, + ), + proof_of_store( + author_0, + BatchId::new_for_test(1), + 200, + now_in_usecs + 3_000_000, + ), + proof_of_store( + author_0, + BatchId::new_for_test(2), + 50, + now_in_usecs + 5_000_000, + ), + proof_of_store( + author_0, + BatchId::new_for_test(3), + 300, + now_in_usecs + 4_000_000, + ), + ]; + + let author_1_batches = vec![ + proof_of_store( + author_1, + BatchId::new_for_test(4), + 500, + now_in_usecs + 4_000_000, + ), + proof_of_store( + author_1, + BatchId::new_for_test(5), + 400, + now_in_usecs + 2_500_000, + ), + proof_of_store( + author_1, + BatchId::new_for_test(6), + 600, + now_in_usecs + 3_500_000, + ), + proof_of_store( + author_1, + BatchId::new_for_test(7), + 50, + now_in_usecs + 4_500_000, + ), ]; + let info_0 = author_0_batches[0].info().clone(); + let info_7 = author_1_batches[2].info().clone(); + proof_queue.add_batch_summaries(vec![(author_0_batches[0].info().clone(), vec![txns[0]])]); proof_queue.add_batch_summaries(vec![(author_0_batches[1].info().clone(), vec![txns[1]])]); proof_queue.add_batch_summaries(vec![(author_0_batches[2].info().clone(), vec![txns[2]])]); proof_queue.add_batch_summaries(vec![(author_0_batches[3].info().clone(), vec![txns[0]])]); - proof_queue.push(batch); for batch in author_0_batches { proof_queue.push(batch); } - let author_1_batches = vec![ - proof_of_store(author_1, BatchId::new_for_test(4), 500, now + 5000), - proof_of_store(author_1, BatchId::new_for_test(5), 400, now + 5000), - proof_of_store(author_1, BatchId::new_for_test(6), 600, now + 5000), - proof_of_store(author_1, BatchId::new_for_test(7), 50, now + 5000), - ]; proof_queue.add_batch_summaries(vec![(author_1_batches[0].info().clone(), vec![txns[1]])]); proof_queue.add_batch_summaries(vec![(author_1_batches[1].info().clone(), vec![txns[2]])]); proof_queue.add_batch_summaries(vec![(author_1_batches[2].info().clone(), vec![txns[3]])]); @@ -357,9 +456,9 @@ fn test_proof_pull_proofs_with_duplicates() { } assert_eq!(proof_queue.remaining_txns_and_proofs(), (4, 8)); - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 3000, true); - assert!(result.0.len() >= 4); - assert!(result.0.len() <= 8); + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + assert_eq!(result.1, 4); + let mut pulled_txns = HashSet::new(); for proof in result.0 { match proof.batch_id() { @@ -374,13 +473,61 @@ fn test_proof_pull_proofs_with_duplicates() { _ => panic!("Unexpected batch id"), }; } - assert!(pulled_txns.len() == 4); - assert!(result.1 == 4); - assert!( - proof_queue - .pull_proofs(&hashset![info_0], 8, 4, 400, true) - .0 - .len() - == 7 - ); + assert_eq!(pulled_txns.len(), 4); + + let result = proof_queue.pull_proofs(&hashset![info_0.clone()], 8, 4, 400, true); + assert_eq!(result.0.len(), 7); + // filtered_txns: txn_0 (included in excluded batches) + assert_eq!(result.1, 3); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_000_000); + // Nothing changes + let result = proof_queue.pull_proofs(&hashset![], 8, 5, 400, true); + assert_eq!(result.0.len(), 8); + assert_eq!(result.1, 4); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_200_000); + // author_0_batches[0] is removed. No expired txns. + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + assert_eq!(result.1, 4); + + let result = proof_queue.pull_proofs(&hashset![], 8, 5, 400, true); + assert_eq!(result.0.len(), 7); + assert_eq!(result.1, 4); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_000_000); + // author_0_batches[0] is removed. txn_1 is expired. + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + assert_eq!(result.0.len(), 7); + assert_eq!(result.1, 3); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_500_000); + // author_0_batches[0], author_1_batches[1] is removed. txn_1 is expired. + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + assert_eq!(result.0.len(), 6); + assert_eq!(result.1, 3); + + let result = proof_queue.pull_proofs(&hashset![info_7], 8, 4, 400, true); + // author_0_batches[0], author_1_batches[1] is removed. author_1_batches[2] is excluded. txn_3 is expired. + assert_eq!(result.0.len(), 5); + assert_eq!(result.1, 2); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_000_000); + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + // author_0_batches[0], author_0_batches[1], author_1_batches[1] are removed. txn_0, txn_1 are expired. + assert_eq!(result.0.len(), 5); + assert_eq!(result.1, 2); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_500_000); + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + // author_0_batches[0], author_0_batches[1], author_1_batches[1], author_1_batches[2] are removed. txn_0, txn_1 are expired. + assert_eq!(result.0.len(), 4); + assert_eq!(result.1, 1); + + proof_queue.handle_updated_block_timestamp(now_in_usecs + 4_000_000); + let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + // author_0_batches[0], author_0_batches[1], author_0_batches[3], author_1_batches[0], author_1_batches[1], author_1_batches[2] are removed. + // txn_0, txn_1, txn_2 are expired. + assert_eq!(result.0.len(), 2); + assert_eq!(result.1, 0); } diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 719cbf62e6f16..50d465f9faa10 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -21,6 +21,7 @@ use std::{ }; use tokio::time::timeout; +const MICRO_SEC_PER_SEC: u64 = 1_000_000; pub(crate) struct Timeouts { timeouts: VecDeque<(i64, T)>, } @@ -252,6 +253,10 @@ impl ProofQueue { } } + pub(crate) fn batch_summaries_len(&self) -> usize { + self.batch_summaries.len() + } + fn remaining_txns_without_duplicates(&self) -> u64 { // txn_summary_num_occurrences counts all the unexpired and uncommitted proofs that have txn summaries // in batch_summaries. @@ -498,7 +503,8 @@ impl ProofQueue { !filtered_txns.contains(txn_summary) // latest_block_timestamp is microseonds since UNIX epoch // expiration_timestamp_secs is seconds since UNIX epoch - && (txn_summary.expiration_timestamp_secs * 1_000_000) + // giving a second buffer for expiration as the expiration time is rounded off to seconds + && ((txn_summary.expiration_timestamp_secs + 1) * MICRO_SEC_PER_SEC) > self.latest_block_timestamp }) .count() as u64 @@ -524,7 +530,7 @@ impl ProofQueue { .iter() .filter(|summary| { filtered_txns.insert(**summary) - && (summary.expiration_timestamp_secs * 1_000_000) + && ((summary.expiration_timestamp_secs + 1)* MICRO_SEC_PER_SEC) > self.latest_block_timestamp }) .count() as u64 From 5646d7e11ac091342a86e78619df7ee6e94c1f95 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Tue, 9 Jul 2024 22:11:22 -0700 Subject: [PATCH 64/67] renaming --- consensus/src/quorum_store/batch_coordinator.rs | 2 +- consensus/src/quorum_store/types.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/src/quorum_store/batch_coordinator.rs b/consensus/src/quorum_store/batch_coordinator.rs index aa6905b43041f..6bc8f2fc15eb5 100644 --- a/consensus/src/quorum_store/batch_coordinator.rs +++ b/consensus/src/quorum_store/batch_coordinator.rs @@ -79,7 +79,7 @@ impl BatchCoordinator { .map(|persisted_value| { ( persisted_value.batch_info().clone(), - persisted_value.synopsis(), + persisted_value.summary(), ) }) .collect(); diff --git a/consensus/src/quorum_store/types.rs b/consensus/src/quorum_store/types.rs index 6eafe54707eaa..91d07487ba404 100644 --- a/consensus/src/quorum_store/types.rs +++ b/consensus/src/quorum_store/types.rs @@ -58,7 +58,7 @@ impl PersistedValue { &self.maybe_payload } - pub fn synopsis(&self) -> Vec { + pub fn summary(&self) -> Vec { if let Some(payload) = &self.maybe_payload { return payload .iter() From 8ab96f445736ad6fbf9f2c07a630cd90b503023b Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 10 Jul 2024 13:18:58 -0700 Subject: [PATCH 65/67] Add block_timestamp as inputt to pull_proofs --- .../consensus-types/src/request_response.rs | 9 +- consensus/src/dag/dag_driver.rs | 1 + consensus/src/liveness/proposal_generator.rs | 1 + consensus/src/payload_client/mixed.rs | 7 ++ consensus/src/payload_client/mod.rs | 1 + consensus/src/payload_client/user/mod.rs | 2 + .../user/quorum_store_client.rs | 4 + .../direct_mempool_quorum_store.rs | 1 + consensus/src/quorum_store/proof_manager.rs | 2 + .../tests/direct_mempool_quorum_store_test.rs | 1 + .../quorum_store/tests/proof_manager_test.rs | 1 + consensus/src/quorum_store/tests/utils.rs | 119 +++++++++++++++--- consensus/src/quorum_store/utils.rs | 9 +- .../src/test_utils/mock_payload_manager.rs | 1 + 14 files changed, 136 insertions(+), 23 deletions(-) diff --git a/consensus/consensus-types/src/request_response.rs b/consensus/consensus-types/src/request_response.rs index a65273b19626c..36985e7c446c4 100644 --- a/consensus/consensus-types/src/request_response.rs +++ b/consensus/consensus-types/src/request_response.rs @@ -4,7 +4,7 @@ use crate::common::{Payload, PayloadFilter}; use anyhow::Result; use futures::channel::oneshot; -use std::{fmt, fmt::Formatter}; +use std::{fmt, fmt::Formatter, time::Duration}; pub enum GetPayloadCommand { /// Request to pull block to submit to consensus. @@ -25,6 +25,8 @@ pub enum GetPayloadCommand { PayloadFilter, // callback to respond to oneshot::Sender>, + // block timestamp + Duration, ), } @@ -40,11 +42,12 @@ impl fmt::Display for GetPayloadCommand { return_non_full, excluded, _, + block_timestamp, ) => { write!( f, - "GetPayloadRequest [max_txns: {}, max_unique_txns: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}]", - max_txns, max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded + "GetPayloadRequest [max_txns: {}, max_unique_txns: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}, block_timestamp: {:?}]", + max_txns, max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded, block_timestamp ) }, } diff --git a/consensus/src/dag/dag_driver.rs b/consensus/src/dag/dag_driver.rs index f2fe23ea42524..2a81472d7cf14 100644 --- a/consensus/src/dag/dag_driver.rs +++ b/consensus/src/dag/dag_driver.rs @@ -268,6 +268,7 @@ impl DagDriver { false, 0, 0.0, + self.time_service.now_unix_time(), ) .await { diff --git a/consensus/src/liveness/proposal_generator.rs b/consensus/src/liveness/proposal_generator.rs index 2ff6b058a99de..9d353bcf6d698 100644 --- a/consensus/src/liveness/proposal_generator.rs +++ b/consensus/src/liveness/proposal_generator.rs @@ -370,6 +370,7 @@ impl ProposalGenerator { pending_ordering, pending_blocks.len(), max_fill_fraction, + timestamp, ) .await .context("Fail to retrieve payload")?; diff --git a/consensus/src/payload_client/mixed.rs b/consensus/src/payload_client/mixed.rs index 72a458c5481d2..3b7ede4d813f0 100644 --- a/consensus/src/payload_client/mixed.rs +++ b/consensus/src/payload_client/mixed.rs @@ -78,6 +78,7 @@ impl PayloadClient for MixedPayloadClient { pending_ordering: bool, pending_uncommitted_blocks: usize, recent_max_fill_fraction: f32, + block_timestamp: Duration, ) -> anyhow::Result<(Vec, Payload), QuorumStoreError> { // Pull validator txns first. let validator_txn_pull_timer = Instant::now(); @@ -124,6 +125,7 @@ impl PayloadClient for MixedPayloadClient { pending_ordering, pending_uncommitted_blocks, recent_max_fill_fraction, + block_timestamp, ) .await?; @@ -165,6 +167,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { false, 0, 0., + aptos_infallible::duration_since_epoch(), ) .await .unwrap() @@ -189,6 +192,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { false, 0, 0., + aptos_infallible::duration_since_epoch(), ) .await .unwrap() @@ -213,6 +217,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { false, 0, 0., + aptos_infallible::duration_since_epoch(), ) .await .unwrap() @@ -237,6 +242,7 @@ async fn mixed_payload_client_should_prioritize_validator_txns() { false, 0, 0., + aptos_infallible::duration_since_epoch(), ) .await .unwrap() @@ -279,6 +285,7 @@ async fn mixed_payload_client_should_respect_validator_txn_feature_flag() { false, 0, 0., + aptos_infallible::duration_since_epoch(), ) .await .unwrap() diff --git a/consensus/src/payload_client/mod.rs b/consensus/src/payload_client/mod.rs index 61cc98a5ba036..cef8293d7500f 100644 --- a/consensus/src/payload_client/mod.rs +++ b/consensus/src/payload_client/mod.rs @@ -28,6 +28,7 @@ pub trait PayloadClient: Send + Sync { pending_ordering: bool, pending_uncommitted_blocks: usize, recent_max_fill_fraction: f32, + block_timestamp: Duration, ) -> anyhow::Result<(Vec, Payload), QuorumStoreError>; fn trace_payloads(&self) {} diff --git a/consensus/src/payload_client/user/mod.rs b/consensus/src/payload_client/user/mod.rs index a19f77e3dfc11..3f5c4ffde1a47 100644 --- a/consensus/src/payload_client/user/mod.rs +++ b/consensus/src/payload_client/user/mod.rs @@ -27,6 +27,7 @@ pub trait UserPayloadClient: Send + Sync { pending_ordering: bool, pending_uncommitted_blocks: usize, recent_max_fill_fraction: f32, + block_timestamp: Duration, ) -> anyhow::Result; } @@ -59,6 +60,7 @@ impl UserPayloadClient for DummyClient { _pending_ordering: bool, _pending_uncommitted_blocks: usize, _recent_max_fill_fraction: f32, + _block_timestamp: Duration, ) -> anyhow::Result { let timer = Instant::now(); let mut nxt_txn_idx = 0; diff --git a/consensus/src/payload_client/user/quorum_store_client.rs b/consensus/src/payload_client/user/quorum_store_client.rs index 1cd3caefd50aa..576dfea4c7259 100644 --- a/consensus/src/payload_client/user/quorum_store_client.rs +++ b/consensus/src/payload_client/user/quorum_store_client.rs @@ -52,6 +52,7 @@ impl QuorumStoreClient { max_inline_bytes: u64, return_non_full: bool, exclude_payloads: PayloadFilter, + block_timestamp: Duration, ) -> anyhow::Result { let (callback, callback_rcv) = oneshot::channel(); let req = GetPayloadCommand::GetPayloadRequest( @@ -63,6 +64,7 @@ impl QuorumStoreClient { return_non_full, exclude_payloads.clone(), callback, + block_timestamp, ); // send to shared mempool self.consensus_to_quorum_store_sender @@ -99,6 +101,7 @@ impl UserPayloadClient for QuorumStoreClient { pending_ordering: bool, pending_uncommitted_blocks: usize, recent_max_fill_fraction: f32, + block_timestamp: Duration, ) -> anyhow::Result { let return_non_full = recent_max_fill_fraction < self.wait_for_full_blocks_above_recent_fill_threshold @@ -126,6 +129,7 @@ impl UserPayloadClient for QuorumStoreClient { max_inline_bytes, return_non_full || return_empty || done, exclude.clone(), + block_timestamp, ) .await?; if payload.is_empty() && !return_empty && !done { diff --git a/consensus/src/quorum_store/direct_mempool_quorum_store.rs b/consensus/src/quorum_store/direct_mempool_quorum_store.rs index 8a6a61b10c066..61efd14550c71 100644 --- a/consensus/src/quorum_store/direct_mempool_quorum_store.rs +++ b/consensus/src/quorum_store/direct_mempool_quorum_store.rs @@ -146,6 +146,7 @@ impl DirectMempoolQuorumStore { return_non_full, payload_filter, callback, + _block_timestamp, ) => { self.handle_block_request( max_unique_txns, diff --git a/consensus/src/quorum_store/proof_manager.rs b/consensus/src/quorum_store/proof_manager.rs index adbde9d855aaa..6b7613b6f3532 100644 --- a/consensus/src/quorum_store/proof_manager.rs +++ b/consensus/src/quorum_store/proof_manager.rs @@ -212,6 +212,7 @@ impl ProofManager { return_non_full, filter, callback, + block_timestamp, ) => { let excluded_batches: HashSet<_> = match filter { PayloadFilter::Empty => HashSet::new(), @@ -228,6 +229,7 @@ impl ProofManager { max_unique_txns, max_bytes, return_non_full, + block_timestamp, ); counters::NUM_BATCHES_WITHOUT_PROOF_OF_STORE.observe(self.batch_queue.len() as f64); diff --git a/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs b/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs index 35fdc3c7ed97a..03d9aa45cae37 100644 --- a/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs +++ b/consensus/src/quorum_store/tests/direct_mempool_quorum_store_test.rs @@ -38,6 +38,7 @@ async fn test_block_request_no_txns() { true, PayloadFilter::DirectMempool(vec![]), consensus_callback, + aptos_infallible::duration_since_epoch(), )) .unwrap(); diff --git a/consensus/src/quorum_store/tests/proof_manager_test.rs b/consensus/src/quorum_store/tests/proof_manager_test.rs index c9065737fdb15..0a78de6727cd7 100644 --- a/consensus/src/quorum_store/tests/proof_manager_test.rs +++ b/consensus/src/quorum_store/tests/proof_manager_test.rs @@ -62,6 +62,7 @@ async fn get_proposal( true, PayloadFilter::InQuorumStore(filter_set), callback_tx, + aptos_infallible::duration_since_epoch(), ); proof_manager.handle_proposal_request(req); let GetPayloadResponse::GetPayloadResponse(payload) = callback_rx.await.unwrap().unwrap(); diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index 5a11e84e63f1f..f18cdb5b2101c 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -9,7 +9,7 @@ use aptos_consensus_types::{ use aptos_crypto::HashValue; use aptos_types::{aggregate_signature::AggregateSignature, PeerId}; use maplit::hashset; -use std::collections::HashSet; +use std::{collections::HashSet, time::Duration}; /// Return a ProofOfStore with minimal fields used by ProofQueue tests. fn proof_of_store( @@ -61,7 +61,14 @@ fn test_proof_queue_sorting() { } // Expect: [600, 300] - let (pulled, num_unique_txns, _) = proof_queue.pull_proofs(&hashset![], 4, 2, 2, true); + let (pulled, num_unique_txns, _) = proof_queue.pull_proofs( + &hashset![], + 4, + 2, + 2, + true, + aptos_infallible::duration_since_epoch(), + ); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; @@ -83,7 +90,14 @@ fn test_proof_queue_sorting() { assert_eq!(num_unique_txns, 2); // Expect: [600, 500, 300, 100] - let (pulled, num_unique_txns, _) = proof_queue.pull_proofs(&hashset![], 6, 4, 4, true); + let (pulled, num_unique_txns, _) = proof_queue.pull_proofs( + &hashset![], + 6, + 4, + 4, + true, + aptos_infallible::duration_since_epoch(), + ); let mut count_author_0 = 0; let mut count_author_1 = 0; let mut prev: Option<&ProofOfStore> = None; @@ -456,7 +470,14 @@ fn test_proof_pull_proofs_with_duplicates() { } assert_eq!(proof_queue.remaining_txns_and_proofs(), (4, 8)); - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs), + ); assert_eq!(result.1, 4); let mut pulled_txns = HashSet::new(); @@ -475,57 +496,127 @@ fn test_proof_pull_proofs_with_duplicates() { } assert_eq!(pulled_txns.len(), 4); - let result = proof_queue.pull_proofs(&hashset![info_0.clone()], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![info_0.clone()], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs), + ); assert_eq!(result.0.len(), 7); // filtered_txns: txn_0 (included in excluded batches) assert_eq!(result.1, 3); proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_000_000); // Nothing changes - let result = proof_queue.pull_proofs(&hashset![], 8, 5, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 5, + 400, + true, + Duration::from_micros(now_in_usecs + 1_000_100), + ); assert_eq!(result.0.len(), 8); assert_eq!(result.1, 4); proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_200_000); // author_0_batches[0] is removed. No expired txns. - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 1_200_100), + ); assert_eq!(result.1, 4); - let result = proof_queue.pull_proofs(&hashset![], 8, 5, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 5, + 400, + true, + Duration::from_micros(now_in_usecs + 1_200_100), + ); assert_eq!(result.0.len(), 7); assert_eq!(result.1, 4); proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_000_000); // author_0_batches[0] is removed. txn_1 is expired. - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 2_000_100), + ); assert_eq!(result.0.len(), 7); assert_eq!(result.1, 3); proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_500_000); // author_0_batches[0], author_1_batches[1] is removed. txn_1 is expired. - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 2_500_100), + ); assert_eq!(result.0.len(), 6); assert_eq!(result.1, 3); - let result = proof_queue.pull_proofs(&hashset![info_7], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![info_7], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 2_500_100), + ); // author_0_batches[0], author_1_batches[1] is removed. author_1_batches[2] is excluded. txn_3 is expired. assert_eq!(result.0.len(), 5); assert_eq!(result.1, 2); proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_000_000); - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 3_000_100), + ); // author_0_batches[0], author_0_batches[1], author_1_batches[1] are removed. txn_0, txn_1 are expired. assert_eq!(result.0.len(), 5); assert_eq!(result.1, 2); proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_500_000); - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 3_500_100), + ); // author_0_batches[0], author_0_batches[1], author_1_batches[1], author_1_batches[2] are removed. txn_0, txn_1 are expired. assert_eq!(result.0.len(), 4); assert_eq!(result.1, 1); proof_queue.handle_updated_block_timestamp(now_in_usecs + 4_000_000); - let result = proof_queue.pull_proofs(&hashset![], 8, 4, 400, true); + let result = proof_queue.pull_proofs( + &hashset![], + 8, + 4, + 400, + true, + Duration::from_micros(now_in_usecs + 4_000_100), + ); // author_0_batches[0], author_0_batches[1], author_0_batches[3], author_1_batches[0], author_1_batches[1], author_1_batches[2] are removed. // txn_0, txn_1, txn_2 are expired. assert_eq!(result.0.len(), 2); diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 50d465f9faa10..56ec33a021a93 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -21,7 +21,6 @@ use std::{ }; use tokio::time::timeout; -const MICRO_SEC_PER_SEC: u64 = 1_000_000; pub(crate) struct Timeouts { timeouts: VecDeque<(i64, T)>, } @@ -460,6 +459,7 @@ impl ProofQueue { max_unique_txns: u64, max_bytes: u64, return_non_full: bool, + block_timestamp: Duration, ) -> (Vec, u64, bool) { let mut ret = vec![]; let mut cur_bytes = 0; @@ -501,10 +501,7 @@ impl ProofQueue { .iter() .filter(|txn_summary| { !filtered_txns.contains(txn_summary) - // latest_block_timestamp is microseonds since UNIX epoch - // expiration_timestamp_secs is seconds since UNIX epoch - // giving a second buffer for expiration as the expiration time is rounded off to seconds - && ((txn_summary.expiration_timestamp_secs + 1) * MICRO_SEC_PER_SEC) + && block_timestamp.as_secs() > self.latest_block_timestamp }) .count() as u64 @@ -530,7 +527,7 @@ impl ProofQueue { .iter() .filter(|summary| { filtered_txns.insert(**summary) - && ((summary.expiration_timestamp_secs + 1)* MICRO_SEC_PER_SEC) + && block_timestamp.as_secs() > self.latest_block_timestamp }) .count() as u64 diff --git a/consensus/src/test_utils/mock_payload_manager.rs b/consensus/src/test_utils/mock_payload_manager.rs index e855347ec4ebb..fec52c63599b2 100644 --- a/consensus/src/test_utils/mock_payload_manager.rs +++ b/consensus/src/test_utils/mock_payload_manager.rs @@ -68,6 +68,7 @@ impl PayloadClient for MockPayloadManager { _pending_ordering: bool, _pending_uncommitted_blocks: usize, _recent_fill_fraction: f32, + _block_timestamp: Duration, ) -> Result<(Vec, Payload), QuorumStoreError> { // generate 1k txn is too slow with coverage instrumentation Ok(( From 0a933748a9dcf585c80da0eaaf373a6465c117f1 Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 10 Jul 2024 13:41:50 -0700 Subject: [PATCH 66/67] Fix test --- consensus/src/quorum_store/tests/utils.rs | 42 ++++++++++++----------- consensus/src/quorum_store/utils.rs | 4 +-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/consensus/src/quorum_store/tests/utils.rs b/consensus/src/quorum_store/tests/utils.rs index f18cdb5b2101c..6214bb7db6603 100644 --- a/consensus/src/quorum_store/tests/utils.rs +++ b/consensus/src/quorum_store/tests/utils.rs @@ -508,7 +508,7 @@ fn test_proof_pull_proofs_with_duplicates() { // filtered_txns: txn_0 (included in excluded batches) assert_eq!(result.1, 3); - proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_000_000); + proof_queue.handle_updated_block_timestamp(now_in_usecs + 500_000); // Nothing changes let result = proof_queue.pull_proofs( &hashset![], @@ -516,36 +516,38 @@ fn test_proof_pull_proofs_with_duplicates() { 5, 400, true, - Duration::from_micros(now_in_usecs + 1_000_100), + Duration::from_micros(now_in_usecs + 500_100), ); - assert_eq!(result.0.len(), 8); assert_eq!(result.1, 4); - proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_200_000); - // author_0_batches[0] is removed. No expired txns. + proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_000_000); + // txn_1 expired let result = proof_queue.pull_proofs( &hashset![], 8, - 4, + 5, 400, true, - Duration::from_micros(now_in_usecs + 1_200_100), + Duration::from_micros(now_in_usecs + 1_000_100), ); - assert_eq!(result.1, 4); + assert_eq!(result.0.len(), 8); + assert_eq!(result.1, 3); + proof_queue.handle_updated_block_timestamp(now_in_usecs + 1_200_000); + // author_0_batches[0] is removed. txn_1 expired. let result = proof_queue.pull_proofs( &hashset![], 8, - 5, + 4, 400, true, Duration::from_micros(now_in_usecs + 1_200_100), ); assert_eq!(result.0.len(), 7); - assert_eq!(result.1, 4); + assert_eq!(result.1, 3); proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_000_000); - // author_0_batches[0] is removed. txn_1 is expired. + // author_0_batches[0] is removed. txn_0, txn_1 are expired. let result = proof_queue.pull_proofs( &hashset![], 8, @@ -555,10 +557,10 @@ fn test_proof_pull_proofs_with_duplicates() { Duration::from_micros(now_in_usecs + 2_000_100), ); assert_eq!(result.0.len(), 7); - assert_eq!(result.1, 3); + assert_eq!(result.1, 2); proof_queue.handle_updated_block_timestamp(now_in_usecs + 2_500_000); - // author_0_batches[0], author_1_batches[1] is removed. txn_1 is expired. + // author_0_batches[0], author_1_batches[1] is removed. txn_0, txn_1 is expired. let result = proof_queue.pull_proofs( &hashset![], 8, @@ -568,7 +570,7 @@ fn test_proof_pull_proofs_with_duplicates() { Duration::from_micros(now_in_usecs + 2_500_100), ); assert_eq!(result.0.len(), 6); - assert_eq!(result.1, 3); + assert_eq!(result.1, 2); let result = proof_queue.pull_proofs( &hashset![info_7], @@ -578,9 +580,9 @@ fn test_proof_pull_proofs_with_duplicates() { true, Duration::from_micros(now_in_usecs + 2_500_100), ); - // author_0_batches[0], author_1_batches[1] is removed. author_1_batches[2] is excluded. txn_3 is expired. + // author_0_batches[0], author_1_batches[1] is removed. author_1_batches[2] is excluded. txn_0, txn_1 are expired. assert_eq!(result.0.len(), 5); - assert_eq!(result.1, 2); + assert_eq!(result.1, 1); proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_000_000); let result = proof_queue.pull_proofs( @@ -591,9 +593,9 @@ fn test_proof_pull_proofs_with_duplicates() { true, Duration::from_micros(now_in_usecs + 3_000_100), ); - // author_0_batches[0], author_0_batches[1], author_1_batches[1] are removed. txn_0, txn_1 are expired. + // author_0_batches[0], author_0_batches[1], author_1_batches[1] are removed. txn_0, txn_1, txn_2 are expired. assert_eq!(result.0.len(), 5); - assert_eq!(result.1, 2); + assert_eq!(result.1, 1); proof_queue.handle_updated_block_timestamp(now_in_usecs + 3_500_000); let result = proof_queue.pull_proofs( @@ -604,9 +606,9 @@ fn test_proof_pull_proofs_with_duplicates() { true, Duration::from_micros(now_in_usecs + 3_500_100), ); - // author_0_batches[0], author_0_batches[1], author_1_batches[1], author_1_batches[2] are removed. txn_0, txn_1 are expired. + // author_0_batches[0], author_0_batches[1], author_1_batches[1], author_1_batches[2] are removed. txn_0, txn_1, txn_0 are expired. assert_eq!(result.0.len(), 4); - assert_eq!(result.1, 1); + assert_eq!(result.1, 0); proof_queue.handle_updated_block_timestamp(now_in_usecs + 4_000_000); let result = proof_queue.pull_proofs( diff --git a/consensus/src/quorum_store/utils.rs b/consensus/src/quorum_store/utils.rs index 56ec33a021a93..faaf18caef8ef 100644 --- a/consensus/src/quorum_store/utils.rs +++ b/consensus/src/quorum_store/utils.rs @@ -502,7 +502,7 @@ impl ProofQueue { .filter(|txn_summary| { !filtered_txns.contains(txn_summary) && block_timestamp.as_secs() - > self.latest_block_timestamp + < txn_summary.expiration_timestamp_secs }) .count() as u64 } else { @@ -528,7 +528,7 @@ impl ProofQueue { .filter(|summary| { filtered_txns.insert(**summary) && block_timestamp.as_secs() - > self.latest_block_timestamp + < summary.expiration_timestamp_secs }) .count() as u64 }, From 6bb88b7780ba238488dbb01c53a50e79333aaebd Mon Sep 17 00:00:00 2001 From: Satya Vusirikala Date: Wed, 10 Jul 2024 14:49:31 -0700 Subject: [PATCH 67/67] Typo --- consensus/consensus-types/src/request_response.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/consensus-types/src/request_response.rs b/consensus/consensus-types/src/request_response.rs index a0cae1afa7fbd..ef9e9485a6a6b 100644 --- a/consensus/consensus-types/src/request_response.rs +++ b/consensus/consensus-types/src/request_response.rs @@ -46,8 +46,8 @@ impl fmt::Display for GetPayloadCommand { ) => { write!( f, - "GetPayloadRequest [max_txns: {}, max_unique_txns: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}, block_timestamp: {:?}]", - max_txns, max_unique_txns, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded, block_timestamp + "GetPayloadRequest [max_txns: {}, max_txns_after_filtering: {}, max_bytes: {}, max_inline_txns: {}, max_inline_bytes:{}, return_non_full: {}, excluded: {}, block_timestamp: {:?}]", + max_txns, max_txns_after_filtering, max_bytes, max_inline_txns, max_inline_bytes, return_non_full, excluded, block_timestamp ) }, }