From 3b691d8a1d7419145196f5c9cee504602c375d9d Mon Sep 17 00:00:00 2001 From: Will Chandler <10663805+wfchandler@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:33:07 -0400 Subject: [PATCH] Enlarge TCP recv_buf to improve throughput (#6690) Image uploads performed via the [web console are 3-4x slower than uploads performed via the Oxide CLI](https://github.com/oxidecomputer/console/issues/2096). We found that the CLI creates 8 separate TCP connections to upload the image chunks, while the console uses HTTP/2 to multiplex a single TCP connection six ways. The default TCP `recv_buf` size on Helios is 128 KB, which limits window size and therefore the number of packets that can be sent in parallel. By increasing this value to 1 MB, we can increase single-connection throughput by ~3x, bringing console performance to rough parity with the CLI. This does increase the amount of memory a potential DoS attack could consume, but 1 MB is still quite small relative to the total resources available on a compute sled. While we're at it, also update the TCP congestion control algorithm to `cubic` from its default value of `sunreno`, which may also help improve throughput. Closes https://github.com/oxidecomputer/omicron/issues/6601 --- illumos-utils/src/ipadm.rs | 42 +++++++++++++++++++++++++++++++- zone-setup/src/bin/zone-setup.rs | 11 +++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/illumos-utils/src/ipadm.rs b/illumos-utils/src/ipadm.rs index d5e0053ba9d..ff99cd7a26e 100644 --- a/illumos-utils/src/ipadm.rs +++ b/illumos-utils/src/ipadm.rs @@ -141,7 +141,7 @@ impl Ipadm { Ok(Self::addrobj_addr(addrobj)?.is_some()) } - // Set MTU to 9000 on both IPv4 and IPv6 + /// Set MTU to 9000 on both IPv4 and IPv6 pub fn set_interface_mtu(datalink: &str) -> Result<(), ExecutionError> { let mut cmd = std::process::Command::new(PFEXEC); let cmd = cmd.args(&[ @@ -197,4 +197,44 @@ impl Ipadm { Self::ensure_ip_addrobj_exists(&addrobj, AddrObjType::DHCP)?; Ok(()) } + + /// Set TCP recv_buf to 1 MB. + pub fn set_tcp_recv_buf() -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + + // This is to improve single-connection throughput on large uploads + // from clients, e.g., images. Modern browsers will almost always use + // HTTP/2, which will multiplex concurrent writes to the same host over + // a single TCP connection. The small default receive window size is a + // major bottleneck, see + // https://github.com/oxidecomputer/console/issues/2096 for further + // details. + let cmd = cmd.args(&[ + IPADM, + "set-prop", + "-t", + "-p", + "recv_buf=1000000", + "tcp", + ]); + execute(cmd)?; + + Ok(()) + } + + /// Set TCP congestion control algorithm to `cubic`. + pub fn set_tcp_congestion_control() -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + IPADM, + "set-prop", + "-t", + "-p", + "congestion_control=cubic", + "tcp", + ]); + execute(cmd)?; + + Ok(()) + } } diff --git a/zone-setup/src/bin/zone-setup.rs b/zone-setup/src/bin/zone-setup.rs index 97bb2af60dd..4441431605c 100644 --- a/zone-setup/src/bin/zone-setup.rs +++ b/zone-setup/src/bin/zone-setup.rs @@ -563,6 +563,17 @@ async fn common_nw_set_up( Ipadm::set_interface_mtu(&datalink) .with_context(|| format!("failed to set MTU on datalink {datalink}"))?; + info!( + log, "Setting TCP recv_buf size to 1 MB"; + ); + Ipadm::set_tcp_recv_buf().context("failed to set TCP recv_buf")?; + + info!( + log, "Setting TCP congestion control algorithm to cubic"; + ); + Ipadm::set_tcp_congestion_control() + .context("failed to set TCP congestion_control")?; + if static_addrs.is_empty() { info!( log,