From c3b47435970b7108760d9df8fe936da24d9c8cca Mon Sep 17 00:00:00 2001 From: David Xia Date: Wed, 6 Apr 2016 14:58:09 -0400 Subject: [PATCH] [solo] Force TCP mode on skydns TL;DR When two DNS servers don't work, add one more! When running some integration tests with HeliosSoloDeployment on Docker hosts that use a local unbound instance as its DNS resolver (i.e. specified in `/etc/resolv.conf` on the Docker host), we saw tests failures due to failed SRV queries to skydns. Skydns is running in the solo container and forwards DNS queries it doesn't know about to the unbound instance via logic in `start.sh`. The skydns error output from the helios solo container spawned by HeliosSoloDeployment looked like: ``` skydns: failure to forward request "dns: failed to unpack truncated message" ``` Our guess is that large UDP responses from the upstream unbound have the "Message Truncated" DNS flag set. When this type of response reaches skydns, skydns blows up and doesn't tell the client about the error. The client times out without retrying in TCP mode. The client would've retried if it had received an error message from skydns. Running `dig` against skydns works. We think this is because `dig` adds an OPT record to its query that sets "udp payload size: 4096". Here's an outstanding issue in skydns that seem related: * https://github.com/skynetservices/skydns/issues/242 * https://github.com/skynetservices/skydns/issues/45 Solution: We start an unbound instance in the solo container and have it forward DNS queries via UDP to the upstream skydns in the same container. Unbound will add the OPT section that makes everything work. Things are fixed. :) We admit this is super funky...And this only might work for UDP packets up to 4096 bytes, the default set by unbound in OPT. --- solo/base/Dockerfile | 4 +++- solo/base/unbound-skydns.conf | 14 ++++++++++++++ solo/base/version.txt | 2 +- solo/docker/Dockerfile | 2 +- solo/docker/start.sh | 3 ++- 5 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 solo/base/unbound-skydns.conf diff --git a/solo/base/Dockerfile b/solo/base/Dockerfile index 94146e2b7a..3062d5cf22 100644 --- a/solo/base/Dockerfile +++ b/solo/base/Dockerfile @@ -2,9 +2,11 @@ FROM ubuntu:trusty ENV DEBIAN_FRONTEND noninteractive RUN apt-get update \ - && apt-get install --no-install-recommends -y curl dnsutils zookeeper git mercurial \ + && apt-get install --no-install-recommends -y curl dnsutils zookeeper git mercurial unbound \ && apt-get clean && rm -rf /var/lib/apt/lists/* +ADD unbound-skydns.conf /etc/unbound/unbound.conf + # Install helios-skydns plugin ENV SKYDNS_PLUGIN_VERSION 0.1 ENV SKYDNS_PLUGIN_DEB helios-skydns_${SKYDNS_PLUGIN_VERSION}_all.deb diff --git a/solo/base/unbound-skydns.conf b/solo/base/unbound-skydns.conf new file mode 100644 index 0000000000..9f3f2418b1 --- /dev/null +++ b/solo/base/unbound-skydns.conf @@ -0,0 +1,14 @@ +server: + interface: 0.0.0.0 + interface: ::0 + port: 53 + tcp-upstream: no + num-threads: 1 + incoming-num-tcp: 256 + outgoing-num-tcp: 256 + access-control: 0.0.0.0/0 allow + do-not-query-localhost: no + +forward-zone: + name: "." + forward-addr: "127.0.0.1@5353" diff --git a/solo/base/version.txt b/solo/base/version.txt index 2eb3c4fe4e..5a2a5806df 100644 --- a/solo/base/version.txt +++ b/solo/base/version.txt @@ -1 +1 @@ -0.5 +0.6 diff --git a/solo/docker/Dockerfile b/solo/docker/Dockerfile index 46e137bf65..29d5ac9ed5 100644 --- a/solo/docker/Dockerfile +++ b/solo/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM spotify/helios-solo-base:0.5 +FROM spotify/helios-solo-base:0.6 EXPOSE 5801 diff --git a/solo/docker/start.sh b/solo/docker/start.sh index e2153db824..0b9db0e1ab 100755 --- a/solo/docker/start.sh +++ b/solo/docker/start.sh @@ -13,7 +13,7 @@ SKYDNS_PATH=$(echo $HELIOS_NAME|python -c "import sys;h=sys.stdin.read().strip() # Write skydns configuration and retry for 30 seconds until successful for i in {1..30}; do if curl --retry 30 -XPUT http://127.0.0.1:4001/v2/keys/skydns/config \ - -d value="{\"dns_addr\":\"0.0.0.0:53\", \"ttl\":3600, \"nameservers\": $NAMESERVERS, \"domain\":\"local.\"}"; then + -d value="{\"dns_addr\":\"0.0.0.0:5353\", \"ttl\":3600, \"nameservers\": $NAMESERVERS, \"domain\":\"local.\"}"; then break fi sleep 1 @@ -24,6 +24,7 @@ curl -XPUT http://127.0.0.1:4001/v2/keys/skydns/${SKYDNS_PATH} \ -d value="{\"host\":\"$HOST_ADDRESS\"}" skydns $SKYDNS_OPTS -verbose & +unbound /usr/share/zookeeper/bin/zkServer.sh start