From 246657ac4d3e1afdf5afe17bc76877959fca3888 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 20 Sep 2024 15:07:11 +0200 Subject: [PATCH] dns: limit to 3 resolvers and use better timeout for them The default timeout was 5s and as 5s is the default for most clients we may try more upstream servers but at that point the client already considered this a timeout and no longer accepts answers. As such if we want to try more reolvers in case the first one is offline/not working then we need a lower timeout. Let's only try 3 as glibc does and then just divide 5s through the number of resolvers, i.e. 1, 2, 3. This should make the fall back work. Fixes #482 Signed-off-by: Paul Holzinger --- src/dns/coredns.rs | 29 +++++++++++++++++++---------- src/server/serve.rs | 16 ++++++++++++++++ test/100-basic-name-resolution.bats | 18 ++++++++++++++++++ 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/dns/coredns.rs b/src/dns/coredns.rs index 863657a2..52ec8fe1 100644 --- a/src/dns/coredns.rs +++ b/src/dns/coredns.rs @@ -24,6 +24,8 @@ use std::time::Duration; use tokio::net::TcpListener; use tokio::net::UdpSocket; +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5); + pub struct CoreDns { rx: flume::Receiver<()>, // kill switch receiver inner: CoreDnsData, @@ -254,12 +256,17 @@ impl CoreDns { req: Message, proto: Protocol, ) { + let mut timeout = DEFAULT_TIMEOUT; + // Remember do not divide by 0. + if !nameservers.is_empty() { + timeout = Duration::from_secs(5) / nameservers.len() as u32 + } // forward dns request to hosts's /etc/resolv.conf for nameserver in &nameservers { let addr = SocketAddr::new(*nameserver, 53); let (client, handle) = match proto { Protocol::Udp => { - let stream = UdpClientStream::::new(addr); + let stream = UdpClientStream::::with_timeout(addr, timeout); let (cl, bg) = match AsyncClient::connect(stream).await { Ok(a) => a, Err(e) => { @@ -271,15 +278,17 @@ impl CoreDns { (cl, handle) } Protocol::Tcp => { - let (stream, sender) = - TcpClientStream::>::new(addr); - let (cl, bg) = match AsyncClient::new(stream, sender, None).await { - Ok(a) => a, - Err(e) => { - debug!("Failed to connect to {addr}: {e}"); - continue; - } - }; + let (stream, sender) = TcpClientStream::< + AsyncIoTokioAsStd, + >::with_timeout(addr, timeout); + let (cl, bg) = + match AsyncClient::with_timeout(stream, sender, timeout, None).await { + Ok(a) => a, + Err(e) => { + debug!("Failed to connect to {addr}: {e}"); + continue; + } + }; let handle = tokio::spawn(bg); (cl, handle) } diff --git a/src/server/serve.rs b/src/server/serve.rs index c657cf95..3ee8fcf0 100644 --- a/src/server/serve.rs +++ b/src/server/serve.rs @@ -406,6 +406,9 @@ fn parse_resolv_conf(content: &str) -> AardvarkResult> { None => continue, } } + + // we do not have time to try many nameservers anyway so only use the first three + nameservers.truncate(3); Ok(nameservers) } @@ -472,6 +475,19 @@ nameserver 1.1.1.3", assert_eq!(res, vec![IP_1_1_1_1, IP_1_1_1_2, IP_1_1_1_3]); } + #[test] + fn test_parse_resolv_conf_truncate_to_three() { + let res = parse_resolv_conf( + "nameserver 1.1.1.1 +nameserver 1.1.1.2 +nameserver 1.1.1.3 +nameserver 1.1.1.4 +nameserver 1.2.3.4", + ) + .expect("failed to parse"); + assert_eq!(res, vec![IP_1_1_1_1, IP_1_1_1_2, IP_1_1_1_3]); + } + #[test] fn test_parse_resolv_conf_with_invalid_ip() { parse_resolv_conf("nameserver abc").expect_err("invalid ip must error"); diff --git a/test/100-basic-name-resolution.bats b/test/100-basic-name-resolution.bats index 23167f2b..435956e6 100644 --- a/test/100-basic-name-resolution.bats +++ b/test/100-basic-name-resolution.bats @@ -56,6 +56,24 @@ load helpers assert "$output" !~ "WARNING: recursion requested but not available" } +@test "basic container - dns itself (bad and good should fall back)" { + setup_slirp4netns + + subnet_a=$(random_subnet 5) + create_config network_name="podman1" container_id=$(random_string 64) container_name="aone" subnet="$subnet_a" custom_dns_server='"192.168.0.0", "10.0.2.3"' aliases='"a1", "1a"' + config_a1=$config + ip_a1=$(echo "$config_a1" | jq -r .networks.podman1.static_ips[0]) + gw=$(echo "$config_a1" | jq -r .network_info.podman1.subnets[0].gateway) + create_container "$config_a1" + a1_pid=$CONTAINER_NS_PID + + # first custom server is wrong but second server should work + run_in_container_netns "$a1_pid" "dig" "google.com" "@$gw" + assert "$output" =~ "Query time: [23][0-9]{3} msec" "timeout should be 2.5s so request should then work shortly after (udp)" + run_in_container_netns "$a1_pid" "dig" +tcp "google.com" "@$gw" + assert "$output" =~ "Query time: [23][0-9]{3} msec" "timeout should be 2.5s so request should then work shortly after (tcp)" +} + @test "basic container - dns itself custom" { setup_slirp4netns