[forge] Add latency for different workloads (#9415)

aptos-labs · Aug 10, 2023 · f2e91d5 · f2e91d5
1 parent 6a634d6
commit f2e91d5
Show file tree

Hide file tree

Showing 4 changed files with 228 additions and 88 deletions.
diff --git a/.github/workflows/forge-stable.yaml b/.github/workflows/forge-stable.yaml
@@ -127,11 +127,23 @@ jobs:
       FORGE_TEST_SUITE: realistic_env_load_sweep
       POST_TO_SLACK: true
 
-  run-forge-realistic-env-graceful-overload:
+  run-forge-realistic-env-workload-sweep:
     if: ${{ github.event_name != 'pull_request' && always() }}
     needs: [determine-test-metadata, run-forge-realistic-env-load-sweep] # Only run after the previous job completes
     uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
     secrets: inherit
+    with:
+      IMAGE_TAG: ${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
+      FORGE_NAMESPACE: forge-realistic-env-workload-sweep-${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
+      FORGE_RUNNER_DURATION_SECS: 1600 # Run for 26 minutes (4 tests, each for 400 seconds)
+      FORGE_TEST_SUITE: realistic_env_workload_sweep
+      POST_TO_SLACK: true
+
+  run-forge-realistic-env-graceful-overload:
+    if: ${{ github.event_name != 'pull_request' && always() }}
+    needs: [determine-test-metadata, run-forge-realistic-env-workload-sweep] # Only run after the previous job completes
+    uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
+    secrets: inherit
     with:
       IMAGE_TAG: ${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
       FORGE_NAMESPACE: forge-realistic-env-graceful-overload-${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}

diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs
@@ -527,6 +527,7 @@ fn single_test_suite(
         // Rest of the tests:
         "realistic_env_max_load_large" => realistic_env_max_load_test(duration, test_cmd, 20, 10),
         "realistic_env_load_sweep" => realistic_env_load_sweep_test(),
+        "realistic_env_workload_sweep" => realistic_env_workload_sweep_test(),
         "realistic_env_graceful_overload" => realistic_env_graceful_overload(),
         "realistic_network_tuned_for_throughput" => realistic_network_tuned_for_throughput_test(),
         "epoch_changer_performance" => epoch_changer_performance(),
@@ -797,31 +798,19 @@ fn consensus_stress_test() -> ForgeConfig {
     })
 }
 
-fn realistic_env_load_sweep_test() -> ForgeConfig {
+fn realistic_env_sweep_wrap(
+    num_validators: usize,
+    num_fullnodes: usize,
+    test: LoadVsPerfBenchmark,
+) -> ForgeConfig {
     ForgeConfig::default()
-        .with_initial_validator_count(NonZeroUsize::new(20).unwrap())
-        .with_initial_fullnode_count(10)
-        .add_network_test(wrap_with_realistic_env(LoadVsPerfBenchmark {
-            test: Box::new(PerformanceBenchmark),
-            workloads: Workloads::TPS(&[10, 100, 1000, 3000, 5000]),
-            criteria: [
-                (9, 1.5, 3., 4.),
-                (95, 1.5, 3., 4.),
-                (950, 2., 3., 4.),
-                (2750, 2.5, 3.5, 4.5),
-                (4600, 3., 4., 5.),
-            ]
-            .into_iter()
-            .map(|(min_tps, max_lat_p50, max_lat_p90, max_lat_p99)| {
-                SuccessCriteria::new(min_tps)
-                    .add_max_expired_tps(0)
-                    .add_max_failed_submission_tps(0)
-                    .add_latency_threshold(max_lat_p50, LatencyType::P50)
-                    .add_latency_threshold(max_lat_p90, LatencyType::P90)
-                    .add_latency_threshold(max_lat_p99, LatencyType::P99)
-            })
-            .collect(),
+        .with_initial_validator_count(NonZeroUsize::new(num_validators).unwrap())
+        .with_initial_fullnode_count(num_fullnodes)
+        .with_node_helm_config_fn(Arc::new(move |helm_values| {
+            helm_values["validator"]["config"]["execution"]
+                ["processed_transactions_detailed_counters"] = true.into();
         }))
+        .add_network_test(wrap_with_realistic_env(test))
         // Test inherits the main EmitJobRequest, so update here for more precise latency measurements
         .with_emit_job(
             EmitJobRequest::default().latency_polling_interval(Duration::from_millis(100)),
@@ -841,6 +830,98 @@ fn realistic_env_load_sweep_test() -> ForgeConfig {
         )
 }
 
+fn realistic_env_load_sweep_test() -> ForgeConfig {
+    realistic_env_sweep_wrap(20, 10, LoadVsPerfBenchmark {
+        test: Box::new(PerformanceBenchmark),
+        workloads: Workloads::TPS(&[10, 100, 1000, 3000, 5000]),
+        criteria: [
+            (9, 1.5, 3., 4.),
+            (95, 1.5, 3., 4.),
+            (950, 2., 3., 4.),
+            (2750, 2.5, 3.5, 4.5),
+            (4600, 3., 4., 5.),
+        ]
+        .into_iter()
+        .map(|(min_tps, max_lat_p50, max_lat_p90, max_lat_p99)| {
+            SuccessCriteria::new(min_tps)
+                .add_max_expired_tps(0)
+                .add_max_failed_submission_tps(0)
+                .add_latency_threshold(max_lat_p50, LatencyType::P50)
+                .add_latency_threshold(max_lat_p90, LatencyType::P90)
+                .add_latency_threshold(max_lat_p99, LatencyType::P99)
+        })
+        .collect(),
+    })
+}
+
+fn realistic_env_workload_sweep_test() -> ForgeConfig {
+    realistic_env_sweep_wrap(7, 3, LoadVsPerfBenchmark {
+        test: Box::new(PerformanceBenchmark),
+        workloads: Workloads::TRANSACTIONS(&[
+            TransactionWorkload {
+                transaction_type: TransactionTypeArg::CoinTransfer,
+                num_modules: 1,
+                unique_senders: false,
+                mempool_backlog: 20000,
+            },
+            TransactionWorkload {
+                transaction_type: TransactionTypeArg::NoOp,
+                num_modules: 100,
+                unique_senders: false,
+                mempool_backlog: 20000,
+            },
+            TransactionWorkload {
+                transaction_type: TransactionTypeArg::ModifyGlobalResource,
+                num_modules: 1,
+                unique_senders: true,
+                mempool_backlog: 20000,
+            },
+            TransactionWorkload {
+                transaction_type: TransactionTypeArg::TokenV2AmbassadorMint,
+                num_modules: 1,
+                unique_senders: true,
+                mempool_backlog: 10000,
+            },
+            // transactions get rejected, to fix.
+            // TransactionWorkload {
+            //     transaction_type: TransactionTypeArg::PublishPackage,
+            //     num_modules: 1,
+            //     unique_senders: true,
+            //     mempool_backlog: 1000,
+            // },
+        ]),
+        // Investigate/improve to make latency more predictable on different workloads
+        criteria: [
+            (3700, 0.35, 0.5, 0.8, 0.65),
+            (2800, 0.35, 0.5, 1.2, 1.2),
+            (1800, 0.35, 0.5, 1.5, 2.7),
+            (950, 0.35, 0.65, 1.5, 2.7),
+            // (150, 0.5, 1.0, 1.5, 0.65),
+        ]
+        .into_iter()
+        .map(
+            |(min_tps, batch_to_pos, pos_to_proposal, proposal_to_ordered, ordered_to_commit)| {
+                SuccessCriteria::new(min_tps)
+                    .add_max_expired_tps(200)
+                    .add_max_failed_submission_tps(200)
+                    .add_latency_breakdown_threshold(LatencyBreakdownThreshold::new_strict(vec![
+                        (LatencyBreakdownSlice::QsBatchToPos, batch_to_pos),
+                        (LatencyBreakdownSlice::QsPosToProposal, pos_to_proposal),
+                        (
+                            LatencyBreakdownSlice::ConsensusProposalToOrdered,
+                            proposal_to_ordered,
+                        ),
+                        (
+                            LatencyBreakdownSlice::ConsensusOrderedToCommit,
+                            ordered_to_commit,
+                        ),
+                    ]))
+            },
+        )
+        .collect(),
+    })
+}
+
 fn load_vs_perf_benchmark() -> ForgeConfig {
     ForgeConfig::default()
         .with_initial_validator_count(NonZeroUsize::new(20).unwrap())
@@ -875,51 +956,56 @@ fn workload_vs_perf_benchmark() -> ForgeConfig {
             helm_values["validator"]["config"]["execution"]
                 ["processed_transactions_detailed_counters"] = true.into();
         }))
-        // .with_emit_job(EmitJobRequest::default().mode(EmitJobMode::MaxLoad {
-        //     mempool_backlog: 10000,
-        // }))
         .add_network_test(LoadVsPerfBenchmark {
             test: Box::new(PerformanceBenchmark),
             workloads: Workloads::TRANSACTIONS(&[
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::NoOp,
                     num_modules: 1,
                     unique_senders: false,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::NoOp,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::NoOp,
                     num_modules: 1000,
                     unique_senders: false,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::CoinTransfer,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::CoinTransfer,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::AccountResource32B,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::AccountResource1KB,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::PublishPackage,
                     num_modules: 1,
                     unique_senders: true,
+                    mempool_backlog: 20000,
                 },
             ]),
             criteria: Vec::new(),

diff --git a/testsuite/forge/src/success_criteria.rs b/testsuite/forge/src/success_criteria.rs
@@ -114,10 +114,17 @@ impl LatencyBreakdownThreshold {
         }
     }
 
-    pub fn ensure_threshold(&self, metrics: &LatencyBreakdown) -> anyhow::Result<()> {
+    pub fn ensure_threshold(
+        &self,
+        metrics: &LatencyBreakdown,
+        traffic_name_addition: &String,
+    ) -> anyhow::Result<()> {
         for (slice, threshold) in &self.thresholds {
             let samples = metrics.get_samples(slice);
-            threshold.ensure_metrics_threshold(&format!("{:?}", slice), samples.get())?;
+            threshold.ensure_metrics_threshold(
+                &format!("{:?}{}", slice, traffic_name_addition),
+                samples.get(),
+            )?;
         }
         Ok(())
     }
@@ -220,7 +227,8 @@ impl SuccessCriteriaChecker {
             &traffic_name_addition,
         )?;
         if let Some(latency_breakdown_thresholds) = &success_criteria.latency_breakdown_thresholds {
-            latency_breakdown_thresholds.ensure_threshold(latency_breakdown.unwrap())?;
+            latency_breakdown_thresholds
+                .ensure_threshold(latency_breakdown.unwrap(), &traffic_name_addition)?;
         }
         Ok(())
     }
@@ -244,22 +252,24 @@ impl SuccessCriteriaChecker {
         );
         let stats_rate = stats.rate();
 
+        let no_traffic_name_addition = "".to_string();
         Self::check_throughput(
             success_criteria.min_avg_tps,
             success_criteria.max_expired_tps,
             success_criteria.max_failed_submission_tps,
             &stats_rate,
-            &"".to_string(),
+            &no_traffic_name_addition,
         )?;
 
         Self::check_latency(
             &success_criteria.latency_thresholds,
             &stats_rate,
-            &"".to_string(),
+            &no_traffic_name_addition,
         )?;
 
         if let Some(latency_breakdown_thresholds) = &success_criteria.latency_breakdown_thresholds {
-            latency_breakdown_thresholds.ensure_threshold(latency_breakdown)?;
+            latency_breakdown_thresholds
+                .ensure_threshold(latency_breakdown, &no_traffic_name_addition)?;
         }
 
         if let Some(timeout) = success_criteria.wait_for_all_nodes_to_catchup {