diff --git a/testsuite/forge-cli/src/suites/realistic_environment.rs b/testsuite/forge-cli/src/suites/realistic_environment.rs index dba85bbd4d9e7..2783517edbf72 100644 --- a/testsuite/forge-cli/src/suites/realistic_environment.rs +++ b/testsuite/forge-cli/src/suites/realistic_environment.rs @@ -124,20 +124,19 @@ pub(crate) fn realistic_env_workload_sweep_test() -> ForgeConfig { ]), // Investigate/improve to make latency more predictable on different workloads criteria: [ - (7000, 100, 0.3, 0.5, 0.5, 0.4), - (8500, 100, 0.3, 0.5, 0.5, 0.4), - (2000, 300, 0.3, 1.0, 0.6, 1.0), - (3200, 500, 0.3, 1.0, 0.7, 0.6), + (7000, 100, 0.3 + 0.5, 0.5, 0.5), + (8500, 100, 0.3 + 0.5, 0.5, 0.4), + (2000, 300, 0.3 + 1.0, 0.6, 1.0), + (3200, 500, 0.3 + 1.0, 0.7, 0.8), // TODO - pos-to-proposal is set to high, until it is calibrated/understood. - (28, 5, 0.3, 5.0, 0.7, 1.0), + (28, 5, 0.3 + 5.0, 0.7, 1.0), ] .into_iter() .map( |( min_tps, max_expired, - batch_to_pos, - pos_to_proposal, + mempool_to_block_creation, proposal_to_ordered, ordered_to_commit, )| { @@ -145,8 +144,10 @@ pub(crate) fn realistic_env_workload_sweep_test() -> ForgeConfig { .add_max_expired_tps(max_expired as f64) .add_max_failed_submission_tps(200.0) .add_latency_breakdown_threshold(LatencyBreakdownThreshold::new_strict(vec![ - (LatencyBreakdownSlice::QsBatchToPos, batch_to_pos), - (LatencyBreakdownSlice::QsPosToProposal, pos_to_proposal), + ( + LatencyBreakdownSlice::MempoolToBlockCreation, + mempool_to_block_creation, + ), ( LatencyBreakdownSlice::ConsensusProposalToOrdered, proposal_to_ordered, @@ -323,9 +324,8 @@ pub(crate) fn realistic_env_max_load_test( success_criteria = success_criteria.add_latency_breakdown_threshold( LatencyBreakdownThreshold::new_with_breach_pct( vec![ - (LatencyBreakdownSlice::QsBatchToPos, 0.35), // quorum store backpressure is relaxed, so queueing happens here - (LatencyBreakdownSlice::QsPosToProposal, 2.5), + (LatencyBreakdownSlice::MempoolToBlockCreation, 0.35 + 2.5), // can be adjusted down if less backpressure (LatencyBreakdownSlice::ConsensusProposalToOrdered, 0.85), // can be adjusted down if less backpressure diff --git a/testsuite/forge/src/interface/prometheus_metrics.rs b/testsuite/forge/src/interface/prometheus_metrics.rs index 30351a819a35a..399b52d078fce 100644 --- a/testsuite/forge/src/interface/prometheus_metrics.rs +++ b/testsuite/forge/src/interface/prometheus_metrics.rs @@ -106,8 +106,7 @@ pub async fn fetch_system_metrics( #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum LatencyBreakdownSlice { - QsBatchToPos, - QsPosToProposal, + MempoolToBlockCreation, ConsensusProposalToOrdered, ConsensusOrderedToCommit, ConsensusProposalToCommit, @@ -155,8 +154,17 @@ pub async fn fetch_latency_breakdown( let consensus_proposal_to_ordered_query = r#"quantile(0.67, rate(aptos_consensus_block_tracing_sum{role=~"validator", stage="ordered"}[1m]) / rate(aptos_consensus_block_tracing_count{role=~"validator", stage="ordered"}[1m]))"#; let consensus_proposal_to_commit_query = r#"quantile(0.67, rate(aptos_consensus_block_tracing_sum{role=~"validator", stage="committed"}[1m]) / rate(aptos_consensus_block_tracing_count{role=~"validator", stage="committed"}[1m]))"#; - let qs_batch_to_pos_query = r#"sum(rate(quorum_store_batch_to_PoS_duration_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_batch_to_PoS_duration_count{role=~"validator"}[1m]))"#; - let qs_pos_to_proposal_query = r#"sum(rate(quorum_store_pos_to_pull_sum{role=~"validator"}[1m])) / sum(rate(quorum_store_pos_to_pull_count{role=~"validator"}[1m]))"#; + let mempool_to_block_creation_query = r#"sum( + rate(aptos_core_mempool_txn_commit_latency_sum{ + role=~"validator", + stage="commit_accepted_block" + }[1m]) + ) / sum( + rate(aptos_core_mempool_txn_commit_latency_count{ + role=~"validator", + stage="commit_accepted_block" + }[1m]) + )"#; let swarm = swarm.read().await; let consensus_proposal_to_ordered_samples = swarm @@ -189,18 +197,9 @@ pub async fn fetch_latency_breakdown( ) .await?; - let qs_batch_to_pos_samples = swarm + let mempool_to_block_creation_samples = swarm .query_range_metrics( - qs_batch_to_pos_query, - start_time_adjusted as i64, - end_time as i64, - None, - ) - .await?; - - let qs_pos_to_proposal_samples = swarm - .query_range_metrics( - qs_pos_to_proposal_query, + mempool_to_block_creation_query, start_time_adjusted as i64, end_time as i64, None, @@ -209,12 +208,8 @@ pub async fn fetch_latency_breakdown( let mut samples = BTreeMap::new(); samples.insert( - LatencyBreakdownSlice::QsBatchToPos, - MetricSamples::new(qs_batch_to_pos_samples), - ); - samples.insert( - LatencyBreakdownSlice::QsPosToProposal, - MetricSamples::new(qs_pos_to_proposal_samples), + LatencyBreakdownSlice::MempoolToBlockCreation, + MetricSamples::new(mempool_to_block_creation_samples), ); samples.insert( LatencyBreakdownSlice::ConsensusProposalToOrdered, diff --git a/testsuite/testcases/src/load_vs_perf_benchmark.rs b/testsuite/testcases/src/load_vs_perf_benchmark.rs index 634fda24f2ccb..51e23edf9a777 100644 --- a/testsuite/testcases/src/load_vs_perf_benchmark.rs +++ b/testsuite/testcases/src/load_vs_perf_benchmark.rs @@ -471,7 +471,7 @@ fn to_table(type_name: String, results: &[Vec]) -> Vec { let mut table = Vec::new(); table.push(format!( - "{: ]) -> Vec { "p50 lat", "p90 lat", "p99 lat", - "batch->pos", - "pos->prop", + "mempool->block", "prop->order", "order->commit", "actual dur", @@ -497,7 +496,7 @@ fn to_table(type_name: String, results: &[Vec]) -> Vec { for result in run_results { let rate = result.stats.rate(); table.push(format!( - "{: ]) -> Vec { rate.p50_latency as f64 / 1000.0, rate.p90_latency as f64 / 1000.0, rate.p99_latency as f64 / 1000.0, - result.latency_breakdown.get_samples(&LatencyBreakdownSlice::QsBatchToPos).unwrap_or(&MetricSamples::default()).max_sample(), - result.latency_breakdown.get_samples(&LatencyBreakdownSlice::QsPosToProposal).unwrap_or(&MetricSamples::default()).max_sample(), + result.latency_breakdown.get_samples(&LatencyBreakdownSlice::MempoolToBlockCreation).unwrap_or(&MetricSamples::default()).max_sample(), result.latency_breakdown.get_samples(&LatencyBreakdownSlice::ConsensusProposalToOrdered).unwrap_or(&MetricSamples::default()).max_sample(), result.latency_breakdown.get_samples(&LatencyBreakdownSlice::ConsensusOrderedToCommit).unwrap_or(&MetricSamples::default()).max_sample(), result.actual_duration.as_secs(),