From 350052bb4dea877dbdc1fdeaf29c0ed0ebfe8bf6 Mon Sep 17 00:00:00 2001 From: "Brian (Sunghoon) Cho" Date: Thu, 28 Sep 2023 09:54:14 -0700 Subject: [PATCH] [Forge][Chaos] remove jitter, make inter-region BW 300 Mbps (#10277) ### Description The changes are based on observations while measuring network performance and reading more into the dataset used. * Jitter: My understanding is that re-ordering of packets should be pretty rare in the real world, while the previous jitter configs would introduce re-ordering quite frequently. Unless we have a strong belief that jitter is present in our networks, we shouldn't mess with this. * Inter-region bitrate: The numbers this was based on are iperf with a single TCP stream. The results correlate strongly with RTT, which suggests that RTT is the limiting factor, so there's no real reason to constrain BW itself. For now, 300 Mbps is as fast as our network stack will go for 100+ ms RTT. --- .../src/data/four_region_link_stats.csv | 24 +++++++++---------- .../src/data/two_region_link_stats.csv | 4 ++-- .../src/multi_region_network_test.rs | 9 ++++--- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/testsuite/testcases/src/data/four_region_link_stats.csv b/testsuite/testcases/src/data/four_region_link_stats.csv index 926dbb6cbf93c4..64175bc2a8ecad 100644 --- a/testsuite/testcases/src/data/four_region_link_stats.csv +++ b/testsuite/testcases/src/data/four_region_link_stats.csv @@ -1,13 +1,13 @@ sending_region,receiving_region,bitrate_bps,avgrtt -gcp--us-central1,aws--eu-west-1,86900736,103.435 -gcp--us-central1,aws--ap-northeast-1,65798144,133.996 -gcp--us-central1,aws--sa-east-1,57933824,145.483 -aws--sa-east-1,gcp--us-central1,200146944,145.703 -aws--sa-east-1,aws--eu-west-1,48365568,176.894 -aws--sa-east-1,aws--ap-northeast-1,32505856,255.289 -aws--eu-west-1,gcp--us-central1,275513344,104.169 -aws--eu-west-1,aws--sa-east-1,46530560,176.813 -aws--eu-west-1,aws--ap-northeast-1,40632320,198.555 -aws--ap-northeast-1,gcp--us-central1,214827008,128.999 -aws--ap-northeast-1,aws--eu-west-1,41287680,198.539 -aws--ap-northeast-1,aws--sa-east-1,32243712,255.323 \ No newline at end of file +gcp--us-central1,aws--eu-west-1,300000000,103.435 +gcp--us-central1,aws--ap-northeast-1,300000000,133.996 +gcp--us-central1,aws--sa-east-1,300000000,145.483 +aws--sa-east-1,gcp--us-central1,300000000,145.703 +aws--sa-east-1,aws--eu-west-1,300000000,176.894 +aws--sa-east-1,aws--ap-northeast-1,300000000,255.289 +aws--eu-west-1,gcp--us-central1,300000000,104.169 +aws--eu-west-1,aws--sa-east-1,300000000,176.813 +aws--eu-west-1,aws--ap-northeast-1,300000000,198.555 +aws--ap-northeast-1,gcp--us-central1,300000000,128.999 +aws--ap-northeast-1,aws--eu-west-1,300000000,198.539 +aws--ap-northeast-1,aws--sa-east-1,300000000,255.323 \ No newline at end of file diff --git a/testsuite/testcases/src/data/two_region_link_stats.csv b/testsuite/testcases/src/data/two_region_link_stats.csv index de738dfab91b23..cc22397cf27f15 100644 --- a/testsuite/testcases/src/data/two_region_link_stats.csv +++ b/testsuite/testcases/src/data/two_region_link_stats.csv @@ -1,3 +1,3 @@ sending_region,receiving_region,bitrate_bps,avgrtt -aws--sa-east-1,aws--ap-northeast-1,32505856,255.289 -aws--ap-northeast-1,aws--sa-east-1,32243712,255.323 \ No newline at end of file +aws--sa-east-1,aws--ap-northeast-1,300000000,255.289 +aws--ap-northeast-1,aws--sa-east-1,300000000,255.323 \ No newline at end of file diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 422a38de4b9a32..05ac7c624eb0c0 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -10,7 +10,10 @@ use std::collections::BTreeMap; /// The link stats are obtained from https://github.com/doitintl/intercloud-throughput/blob/master/results_202202/results.csv /// The four regions were hand-picked from the dataset to simulate a multi-region setup -/// with high latencies and low bandwidth. +/// with high latencies. +/// Note, we restrict bandwidth to 300 Mbps between all regions. The reasoning is that the dataset +/// is measuring TCP bandwidth only which is primarily affected by RTT, and not the actual bandwidth +/// across the regions, which would vary according to competing traffic, etc. const FOUR_REGION_LINK_STATS: &[u8] = include_bytes!("data/four_region_link_stats.csv"); /// The two regions were chosen as the most distant regions among the four regions set. const TWO_REGION_LINK_STATS: &[u8] = include_bytes!("data/two_region_link_stats.csv"); @@ -103,7 +106,7 @@ pub struct InterRegionNetEmConfig { impl Default for InterRegionNetEmConfig { fn default() -> Self { Self { - delay_jitter_ms: 20, + delay_jitter_ms: 0, delay_correlation_percentage: 50, loss_percentage: 3, loss_correlation_percentage: 50, @@ -158,7 +161,7 @@ impl Default for IntraRegionNetEmConfig { Self { bandwidth_rate_mbps: 10 * 1000, // 10 Gbps delay_latency_ms: 50, - delay_jitter_ms: 5, + delay_jitter_ms: 0, delay_correlation_percentage: 50, loss_percentage: 1, loss_correlation_percentage: 50,