Skip to content

Commit

Permalink
dns: limit to 3 resolvers and use better timeout for them
Browse files Browse the repository at this point in the history
The default timeout was 5s and as 5s is the default for most clients we
may try more upstream servers but at that point the client already
considered this a timeout and no longer accepts answers. As such if we
want to try more reolvers in case the first one is offline/not working
then we need a lower timeout. Let's only try 3 as glibc does and then
just divide 5s through the number of resolvers, i.e. 1, 2, 3. This
should make the fall back work.

Fixes #482

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Oct 7, 2024
1 parent 51cb505 commit 246657a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 10 deletions.
29 changes: 19 additions & 10 deletions src/dns/coredns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ use std::time::Duration;
use tokio::net::TcpListener;
use tokio::net::UdpSocket;

const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5);

pub struct CoreDns {
rx: flume::Receiver<()>, // kill switch receiver
inner: CoreDnsData,
Expand Down Expand Up @@ -254,12 +256,17 @@ impl CoreDns {
req: Message,
proto: Protocol,
) {
let mut timeout = DEFAULT_TIMEOUT;
// Remember do not divide by 0.
if !nameservers.is_empty() {
timeout = Duration::from_secs(5) / nameservers.len() as u32
}
// forward dns request to hosts's /etc/resolv.conf
for nameserver in &nameservers {
let addr = SocketAddr::new(*nameserver, 53);
let (client, handle) = match proto {
Protocol::Udp => {
let stream = UdpClientStream::<UdpSocket>::new(addr);
let stream = UdpClientStream::<UdpSocket>::with_timeout(addr, timeout);
let (cl, bg) = match AsyncClient::connect(stream).await {
Ok(a) => a,
Err(e) => {
Expand All @@ -271,15 +278,17 @@ impl CoreDns {
(cl, handle)
}
Protocol::Tcp => {
let (stream, sender) =
TcpClientStream::<AsyncIoTokioAsStd<tokio::net::TcpStream>>::new(addr);
let (cl, bg) = match AsyncClient::new(stream, sender, None).await {
Ok(a) => a,
Err(e) => {
debug!("Failed to connect to {addr}: {e}");
continue;
}
};
let (stream, sender) = TcpClientStream::<
AsyncIoTokioAsStd<tokio::net::TcpStream>,
>::with_timeout(addr, timeout);
let (cl, bg) =
match AsyncClient::with_timeout(stream, sender, timeout, None).await {
Ok(a) => a,
Err(e) => {
debug!("Failed to connect to {addr}: {e}");
continue;
}
};
let handle = tokio::spawn(bg);
(cl, handle)
}
Expand Down
16 changes: 16 additions & 0 deletions src/server/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,9 @@ fn parse_resolv_conf(content: &str) -> AardvarkResult<Vec<IpAddr>> {
None => continue,
}
}

// we do not have time to try many nameservers anyway so only use the first three
nameservers.truncate(3);
Ok(nameservers)
}

Expand Down Expand Up @@ -472,6 +475,19 @@ nameserver 1.1.1.3",
assert_eq!(res, vec![IP_1_1_1_1, IP_1_1_1_2, IP_1_1_1_3]);
}

#[test]
fn test_parse_resolv_conf_truncate_to_three() {
let res = parse_resolv_conf(
"nameserver 1.1.1.1
nameserver 1.1.1.2
nameserver 1.1.1.3
nameserver 1.1.1.4
nameserver 1.2.3.4",
)
.expect("failed to parse");
assert_eq!(res, vec![IP_1_1_1_1, IP_1_1_1_2, IP_1_1_1_3]);
}

#[test]
fn test_parse_resolv_conf_with_invalid_ip() {
parse_resolv_conf("nameserver abc").expect_err("invalid ip must error");
Expand Down
18 changes: 18 additions & 0 deletions test/100-basic-name-resolution.bats
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,24 @@ load helpers
assert "$output" !~ "WARNING: recursion requested but not available"
}

@test "basic container - dns itself (bad and good should fall back)" {
setup_slirp4netns

subnet_a=$(random_subnet 5)
create_config network_name="podman1" container_id=$(random_string 64) container_name="aone" subnet="$subnet_a" custom_dns_server='"192.168.0.0", "10.0.2.3"' aliases='"a1", "1a"'
config_a1=$config
ip_a1=$(echo "$config_a1" | jq -r .networks.podman1.static_ips[0])
gw=$(echo "$config_a1" | jq -r .network_info.podman1.subnets[0].gateway)
create_container "$config_a1"
a1_pid=$CONTAINER_NS_PID

# first custom server is wrong but second server should work
run_in_container_netns "$a1_pid" "dig" "google.com" "@$gw"
assert "$output" =~ "Query time: [23][0-9]{3} msec" "timeout should be 2.5s so request should then work shortly after (udp)"
run_in_container_netns "$a1_pid" "dig" +tcp "google.com" "@$gw"
assert "$output" =~ "Query time: [23][0-9]{3} msec" "timeout should be 2.5s so request should then work shortly after (tcp)"
}

@test "basic container - dns itself custom" {
setup_slirp4netns

Expand Down

0 comments on commit 246657a

Please sign in to comment.