From 1a4732334e2913d97c64579802bb731b64773436 Mon Sep 17 00:00:00 2001
From: "Brian (Sunghoon) Cho" <brian@aptoslabs.com>
Date: Fri, 7 Jun 2024 01:06:32 -0700
Subject: [PATCH] [quorum store] reduce backpressure significantly for more TPS
 (#13558)

## Description

As Quorum Store batches are bucketed, and we are looking to increase block limits, now is the time to reduce Quorum Store backpressure.

We now allow 36K transactions outstanding. At 12K TPS, this is approximately 3 seconds worth of batches.

For forge tests, a lot of the queuing shifts from mempool to POS-to-Proposal, so the limits need to be adjusted accordingly.
---
 config/src/config/quorum_store_config.rs |  6 +++---
 testsuite/forge-cli/src/main.rs          | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/config/src/config/quorum_store_config.rs b/config/src/config/quorum_store_config.rs
index 8d99d131529ca..7181ce1141196 100644
--- a/config/src/config/quorum_store_config.rs
+++ b/config/src/config/quorum_store_config.rs
@@ -29,14 +29,14 @@ impl Default for QuorumStoreBackPressureConfig {
         QuorumStoreBackPressureConfig {
             // QS will be backpressured if the remaining total txns is more than this number
             // Roughly, target TPS * commit latency seconds
-            backlog_txn_limit_count: 12_000,
+            backlog_txn_limit_count: 36_000,
             // QS will create batches at the max rate until this number is reached
-            backlog_per_validator_batch_limit_count: 4,
+            backlog_per_validator_batch_limit_count: 20,
             decrease_duration_ms: 1000,
             increase_duration_ms: 1000,
             decrease_fraction: 0.5,
             dynamic_min_txn_per_s: 160,
-            dynamic_max_txn_per_s: 4000,
+            dynamic_max_txn_per_s: 12000,
         }
     }
 }
diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs
index f0706af80692f..4b1fab6b1ca40 100644
--- a/testsuite/forge-cli/src/main.rs
+++ b/testsuite/forge-cli/src/main.rs
@@ -1112,10 +1112,10 @@ fn realistic_env_workload_sweep_test() -> ForgeConfig {
         ]),
         // Investigate/improve to make latency more predictable on different workloads
         criteria: [
-            (7700, 100, 0.3, 0.3, 0.5, 0.5),
-            (7000, 100, 0.3, 0.3, 0.5, 0.5),
-            (2000, 300, 0.3, 0.8, 0.6, 0.7),
-            (3200, 500, 0.3, 0.4, 0.7, 1.0),
+            (7700, 100, 0.3, 0.5, 0.5, 0.5),
+            (7000, 100, 0.3, 0.5, 0.5, 0.5),
+            (2000, 300, 0.3, 1.0, 0.6, 1.0),
+            (3200, 500, 0.3, 1.5, 0.7, 0.7),
             // (150, 0.5, 1.0, 1.5, 0.65),
         ]
         .into_iter()
@@ -1950,9 +1950,9 @@ fn realistic_env_max_load_test(
         .add_system_metrics_threshold(SystemMetricsThreshold::new(
             // Check that we don't use more than 18 CPU cores for 10% of the time.
             MetricsThreshold::new(18.0, 10),
-            // Memory starts around 3GB, and grows around 1.2GB/hr in this test.
+            // Memory starts around 3.5GB, and grows around 1.4GB/hr in this test.
             // Check that we don't use more than final expected memory for more than 10% of the time.
-            MetricsThreshold::new_gb(3.3 + 1.4 * (duration_secs as f64 / 3600.0), 10),
+            MetricsThreshold::new_gb(3.5 + 1.4 * (duration_secs as f64 / 3600.0), 10),
         ))
         .add_no_restarts()
         .add_wait_for_catchup_s(
@@ -1970,8 +1970,8 @@ fn realistic_env_max_load_test(
             LatencyBreakdownThreshold::new_with_breach_pct(
                 vec![
                     (LatencyBreakdownSlice::QsBatchToPos, 0.35),
-                    // only reaches close to threshold during epoch change
-                    (LatencyBreakdownSlice::QsPosToProposal, 0.6),
+                    // quorum store backpressure is relaxed, so queueing happens here
+                    (LatencyBreakdownSlice::QsPosToProposal, 2.5),
                     // can be adjusted down if less backpressure
                     (LatencyBreakdownSlice::ConsensusProposalToOrdered, 0.85),
                     // can be adjusted down if less backpressure