Skip to content

Commit

Permalink
[forge] Add MempoolToBlockCreation, rm QsBatchToPos, QsPosToProposal (#…
Browse files Browse the repository at this point in the history
…15036)

* [forge] Add MempoolToBlockCreation, rm QsBatchToPos, QsPosToProposal

* [forge] Remove QsBatchToPos, QsPosToProposal

* [forge] Rm LatencyBreakdownSlice variants QsBatchToPos, QsPosToProposal

* [forge] Change mempool_to_block_creation_query

* [forge] Add comment for quorum store backpressure

* [forge] bump p90 latency for 10 TPS from 0.9s to 1s

* [forge] Bump realistic_env_workload_sweep CoinTransfer ordered_to_commit

* [forge] realistic_env_workload_sweep ConsensusOrderedToCommit threshold for TokenV2AmbassadorMint

* [forge] Bump P90 for 10TPS latency criteria
  • Loading branch information
hariria authored Oct 22, 2024
1 parent 26286cd commit 28c112a
Showing 3 changed files with 31 additions and 38 deletions.
22 changes: 11 additions & 11 deletions testsuite/forge-cli/src/suites/realistic_environment.rs
Original file line number Diff line number Diff line change
@@ -124,29 +124,30 @@ pub(crate) fn realistic_env_workload_sweep_test() -> ForgeConfig {
]),
// Investigate/improve to make latency more predictable on different workloads
criteria: [
(7000, 100, 0.3, 0.5, 0.5, 0.4),
(8500, 100, 0.3, 0.5, 0.5, 0.4),
(2000, 300, 0.3, 1.0, 0.6, 1.0),
(3200, 500, 0.3, 1.0, 0.7, 0.6),
(7000, 100, 0.3 + 0.5, 0.5, 0.5),
(8500, 100, 0.3 + 0.5, 0.5, 0.4),
(2000, 300, 0.3 + 1.0, 0.6, 1.0),
(3200, 500, 0.3 + 1.0, 0.7, 0.8),
// TODO - pos-to-proposal is set to high, until it is calibrated/understood.
(28, 5, 0.3, 5.0, 0.7, 1.0),
(28, 5, 0.3 + 5.0, 0.7, 1.0),
]
.into_iter()
.map(
|(
min_tps,
max_expired,
batch_to_pos,
pos_to_proposal,
mempool_to_block_creation,
proposal_to_ordered,
ordered_to_commit,
)| {
SuccessCriteria::new(min_tps)
.add_max_expired_tps(max_expired as f64)
.add_max_failed_submission_tps(200.0)
.add_latency_breakdown_threshold(LatencyBreakdownThreshold::new_strict(vec![
(LatencyBreakdownSlice::QsBatchToPos, batch_to_pos),
(LatencyBreakdownSlice::QsPosToProposal, pos_to_proposal),
(
LatencyBreakdownSlice::MempoolToBlockCreation,
mempool_to_block_creation,
),
(
LatencyBreakdownSlice::ConsensusProposalToOrdered,
proposal_to_ordered,
@@ -323,9 +324,8 @@ pub(crate) fn realistic_env_max_load_test(
success_criteria = success_criteria.add_latency_breakdown_threshold(
LatencyBreakdownThreshold::new_with_breach_pct(
vec![
(LatencyBreakdownSlice::QsBatchToPos, 0.35),
// quorum store backpressure is relaxed, so queueing happens here
(LatencyBreakdownSlice::QsPosToProposal, 2.5),
(LatencyBreakdownSlice::MempoolToBlockCreation, 0.35 + 2.5),
// can be adjusted down if less backpressure
(LatencyBreakdownSlice::ConsensusProposalToOrdered, 0.85),
// can be adjusted down if less backpressure
37 changes: 16 additions & 21 deletions testsuite/forge/src/interface/prometheus_metrics.rs
Original file line number Diff line number Diff line change
@@ -106,8 +106,7 @@ pub async fn fetch_system_metrics(

#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub enum LatencyBreakdownSlice {
QsBatchToPos,
QsPosToProposal,
MempoolToBlockCreation,
ConsensusProposalToOrdered,
ConsensusOrderedToCommit,
ConsensusProposalToCommit,
@@ -155,8 +154,17 @@ pub async fn fetch_latency_breakdown(
let consensus_proposal_to_ordered_query = r#"quantile(0.67, rate(aptos_consensus_block_tracing_sum{role=~"validator", stage="ordered"}[1m]) / rate(aptos_consensus_block_tracing_count{role=~"validator", stage="ordered"}[1m]))"#;
let consensus_proposal_to_commit_query = r#"quantile(0.67, rate(aptos_consensus_block_tracing_sum{role=~"validator", stage="committed"}[1m]) / rate(aptos_consensus_block_tracing_count{role=~"validator", stage="committed"}[1m]))"#;

let qs_batch_to_pos_query = r#"sum(rate(quorum_store_batch_to_PoS_duration_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_batch_to_PoS_duration_count{role=~"validator"}[1m]))"#;
let qs_pos_to_proposal_query = r#"sum(rate(quorum_store_pos_to_pull_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_pos_to_pull_count{role=~"validator"}[1m]))"#;
let mempool_to_block_creation_query = r#"sum(
rate(aptos_core_mempool_txn_commit_latency_sum{
role=~"validator",
stage="commit_accepted_block"
}[1m])
) / sum(
rate(aptos_core_mempool_txn_commit_latency_count{
role=~"validator",
stage="commit_accepted_block"
}[1m])
)"#;

let swarm = swarm.read().await;
let consensus_proposal_to_ordered_samples = swarm
@@ -189,18 +197,9 @@ pub async fn fetch_latency_breakdown(
)
.await?;

let qs_batch_to_pos_samples = swarm
let mempool_to_block_creation_samples = swarm
.query_range_metrics(
qs_batch_to_pos_query,
start_time_adjusted as i64,
end_time as i64,
None,
)
.await?;

let qs_pos_to_proposal_samples = swarm
.query_range_metrics(
qs_pos_to_proposal_query,
mempool_to_block_creation_query,
start_time_adjusted as i64,
end_time as i64,
None,
@@ -209,12 +208,8 @@ pub async fn fetch_latency_breakdown(

let mut samples = BTreeMap::new();
samples.insert(
LatencyBreakdownSlice::QsBatchToPos,
MetricSamples::new(qs_batch_to_pos_samples),
);
samples.insert(
LatencyBreakdownSlice::QsPosToProposal,
MetricSamples::new(qs_pos_to_proposal_samples),
LatencyBreakdownSlice::MempoolToBlockCreation,
MetricSamples::new(mempool_to_block_creation_samples),
);
samples.insert(
LatencyBreakdownSlice::ConsensusProposalToOrdered,
10 changes: 4 additions & 6 deletions testsuite/testcases/src/load_vs_perf_benchmark.rs
Original file line number Diff line number Diff line change
@@ -471,7 +471,7 @@ fn to_table(type_name: String, results: &[Vec<SingleRunStats>]) -> Vec<String> {

let mut table = Vec::new();
table.push(format!(
"{: <name_width$} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12}",
"{: <name_width$} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <12} | {: <14} | {: <12} | {: <13} | {: <12} | {: <12} | {: <12} | {: <12}",
type_name,
"submitted/s",
"committed/s",
@@ -482,8 +482,7 @@ fn to_table(type_name: String, results: &[Vec<SingleRunStats>]) -> Vec<String> {
"p50 lat",
"p90 lat",
"p99 lat",
"batch->pos",
"pos->prop",
"mempool->block",
"prop->order",
"order->commit",
"actual dur",
@@ -497,7 +496,7 @@ fn to_table(type_name: String, results: &[Vec<SingleRunStats>]) -> Vec<String> {
for result in run_results {
let rate = result.stats.rate();
table.push(format!(
"{: <name_width$} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12} | {: <12.3} | {: <12.3} | {: <12.3}",
"{: <name_width$} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.2} | {: <12.3} | {: <12.3} | {: <12.3} | {: <12.3} | {: <14.3} | {: <12.3} | {: <13.3} | {: <12.3} | {: <12} | {: <12.3} | {: <12.3}",
result.name,
rate.submitted,
rate.committed,
@@ -508,8 +507,7 @@ fn to_table(type_name: String, results: &[Vec<SingleRunStats>]) -> Vec<String> {
rate.p50_latency as f64 / 1000.0,
rate.p90_latency as f64 / 1000.0,
rate.p99_latency as f64 / 1000.0,
result.latency_breakdown.get_samples(&LatencyBreakdownSlice::QsBatchToPos).unwrap_or(&MetricSamples::default()).max_sample(),
result.latency_breakdown.get_samples(&LatencyBreakdownSlice::QsPosToProposal).unwrap_or(&MetricSamples::default()).max_sample(),
result.latency_breakdown.get_samples(&LatencyBreakdownSlice::MempoolToBlockCreation).unwrap_or(&MetricSamples::default()).max_sample(),
result.latency_breakdown.get_samples(&LatencyBreakdownSlice::ConsensusProposalToOrdered).unwrap_or(&MetricSamples::default()).max_sample(),
result.latency_breakdown.get_samples(&LatencyBreakdownSlice::ConsensusOrderedToCommit).unwrap_or(&MetricSamples::default()).max_sample(),
result.actual_duration.as_secs(),

0 comments on commit 28c112a

Please sign in to comment.