Skip to content

Commit

Permalink
dns: destroy/reinitialize c-ares channel on ARES_ECONNREFUSED (#9899)
Browse files Browse the repository at this point in the history
Description: this PR adds logic to the DnsResolverImpl to destroy and re-initialize its c-ares channel under certain circumstances. A better option would require work in c-ares c-ares/c-ares#301.
Risk Level: med changes in low-level DNS resolution.
Testing: unit tests

Fixes #4543

Signed-off-by: Jose Nino <[email protected]>
  • Loading branch information
junr03 authored Feb 6, 2020
1 parent aedd8e3 commit a8a43cb
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 18 deletions.
48 changes: 38 additions & 10 deletions source/common/network/dns_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,11 @@ DnsResolverImpl::DnsResolverImpl(
const std::vector<Network::Address::InstanceConstSharedPtr>& resolvers,
const bool use_tcp_for_dns_lookups)
: dispatcher_(dispatcher),
timer_(dispatcher.createTimer([this] { onEventCallback(ARES_SOCKET_BAD, 0); })) {
ares_options options{};
int optmask = 0;
timer_(dispatcher.createTimer([this] { onEventCallback(ARES_SOCKET_BAD, 0); })),
use_tcp_for_dns_lookups_(use_tcp_for_dns_lookups) {

if (use_tcp_for_dns_lookups) {
optmask |= ARES_OPT_FLAGS;
options.flags |= ARES_FLAG_USEVC;
}

initializeChannel(&options, optmask);
AresOptions options = defaultAresOptions();
initializeChannel(&options.options_, options.optmask_);

if (!resolvers.empty()) {
std::vector<std::string> resolver_addrs;
Expand Down Expand Up @@ -65,6 +60,17 @@ DnsResolverImpl::~DnsResolverImpl() {
ares_destroy(channel_);
}

DnsResolverImpl::AresOptions DnsResolverImpl::defaultAresOptions() {
AresOptions options{};

if (use_tcp_for_dns_lookups_) {
options.optmask_ |= ARES_OPT_FLAGS;
options.options_.flags |= ARES_FLAG_USEVC;
}

return options;
}

void DnsResolverImpl::initializeChannel(ares_options* options, int optmask) {
options->sock_state_cb = [](void* arg, int fd, int read, int write) {
static_cast<DnsResolverImpl*>(arg)->onAresSocketStateChange(fd, read, write);
Expand All @@ -83,6 +89,19 @@ void DnsResolverImpl::PendingResolution::onAresGetAddrInfoCallback(int status, i
}
if (!fallback_if_failed_) {
completed_ = true;

// If c-ares returns ARES_ECONNREFUSED and there is no fallback we assume that the channel_ is
// broken. Mark the channel dirty so that it is destroyed and reinitialized on a subsequent call
// to DnsResolver::resolve(). The optimal solution would be for c-ares to reinitialize the
// channel, and not have Envoy track side effects.
// context: https://github.com/envoyproxy/envoy/issues/4543 and
// https://github.com/c-ares/c-ares/issues/301.
//
// The channel cannot be destroyed and reinitialized here because that leads to a c-ares
// segfault.
if (status == ARES_ECONNREFUSED) {
parent_.dirty_channel_ = true;
}
}

std::list<DnsResponse> address_list;
Expand Down Expand Up @@ -203,8 +222,17 @@ ActiveDnsQuery* DnsResolverImpl::resolve(const std::string& dns_name,
// TODO(hennna): Add DNS caching which will allow testing the edge case of a
// failed initial call to getHostByName followed by a synchronous IPv4
// resolution.

// @see DnsResolverImpl::PendingResolution::onAresGetAddrInfoCallback for why this is done.
if (dirty_channel_) {
dirty_channel_ = false;
ares_destroy(channel_);

AresOptions options = defaultAresOptions();
initializeChannel(&options.options_, options.optmask_);
}
std::unique_ptr<PendingResolution> pending_resolution(
new PendingResolution(callback, dispatcher_, channel_, dns_name));
new PendingResolution(*this, callback, dispatcher_, channel_, dns_name));
if (dns_lookup_family == DnsLookupFamily::Auto) {
pending_resolution->fallback_if_failed_ = true;
}
Expand Down
19 changes: 15 additions & 4 deletions source/common/network/dns_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ class DnsResolverImpl : public DnsResolver, protected Logger::Loggable<Logger::I
friend class DnsResolverImplPeer;
struct PendingResolution : public ActiveDnsQuery {
// Network::ActiveDnsQuery
PendingResolution(ResolveCb callback, Event::Dispatcher& dispatcher, ares_channel channel,
const std::string& dns_name)
: callback_(callback), dispatcher_(dispatcher), channel_(channel), dns_name_(dns_name) {}
PendingResolution(DnsResolverImpl& parent, ResolveCb callback, Event::Dispatcher& dispatcher,
ares_channel channel, const std::string& dns_name)
: parent_(parent), callback_(callback), dispatcher_(dispatcher), channel_(channel),
dns_name_(dns_name) {}

void cancel() override {
// c-ares only supports channel-wide cancellation, so we just allow the
Expand All @@ -62,6 +63,7 @@ class DnsResolverImpl : public DnsResolver, protected Logger::Loggable<Logger::I
*/
void getAddrInfo(int family);

DnsResolverImpl& parent_;
// Caller supplied callback to invoke on query completion or error.
const ResolveCb callback_;
// Dispatcher to post any callback_ exceptions to.
Expand All @@ -80,19 +82,28 @@ class DnsResolverImpl : public DnsResolver, protected Logger::Loggable<Logger::I
const std::string dns_name_;
};

struct AresOptions {
ares_options options_;
int optmask_;
};

// Callback for events on sockets tracked in events_.
void onEventCallback(int fd, uint32_t events);
// c-ares callback when a socket state changes, indicating that libevent
// should listen for read/write events.
void onAresSocketStateChange(int fd, int read, int write);
// Initialize the channel with given ares_init_options().
// Initialize the channel.
void initializeChannel(ares_options* options, int optmask);
// Update timer for c-ares timeouts.
void updateAresTimer();
// Return default AresOptions.
AresOptions defaultAresOptions();

Event::Dispatcher& dispatcher_;
Event::TimerPtr timer_;
ares_channel channel_;
bool dirty_channel_{};
const bool use_tcp_for_dns_lookups_;
std::unordered_map<int, Event::FileEventPtr> events_;
};

Expand Down
74 changes: 70 additions & 4 deletions test/common/network/dns_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ enum class RecordType { A, AAAA };
class TestDnsServerQuery {
public:
TestDnsServerQuery(ConnectionPtr connection, const HostMap& hosts_a, const HostMap& hosts_aaaa,
const CNameMap& cnames, const std::chrono::seconds& record_ttl)
const CNameMap& cnames, const std::chrono::seconds& record_ttl, bool refused)
: connection_(std::move(connection)), hosts_a_(hosts_a), hosts_aaaa_(hosts_aaaa),
cnames_(cnames), record_ttl_(record_ttl) {
cnames_(cnames), record_ttl_(record_ttl), refused_(refused) {
connection_->addReadFilter(Network::ReadFilterSharedPtr{new ReadFilter(*this)});
}

Expand Down Expand Up @@ -171,7 +171,11 @@ class TestDnsServerQuery {
memcpy(response_base, request, response_base_len);
DNS_HEADER_SET_QR(response_base, 1);
DNS_HEADER_SET_AA(response_base, 0);
DNS_HEADER_SET_RCODE(response_base, answer_size > 0 ? NOERROR : NXDOMAIN);
if (parent_.refused_) {
DNS_HEADER_SET_RCODE(response_base, REFUSED);
} else {
DNS_HEADER_SET_RCODE(response_base, answer_size > 0 ? NOERROR : NXDOMAIN);
}
DNS_HEADER_SET_ANCOUNT(response_base, answer_size);
DNS_HEADER_SET_NSCOUNT(response_base, 0);
DNS_HEADER_SET_ARCOUNT(response_base, 0);
Expand Down Expand Up @@ -253,6 +257,7 @@ class TestDnsServerQuery {
const HostMap& hosts_aaaa_;
const CNameMap& cnames_;
const std::chrono::seconds& record_ttl_;
bool refused_{};
};

class TestDnsServer : public ListenerCallbacks {
Expand All @@ -263,7 +268,7 @@ class TestDnsServer : public ListenerCallbacks {
Network::ConnectionPtr new_connection = dispatcher_.createServerConnection(
std::move(socket), Network::Test::createRawBufferSocket());
TestDnsServerQuery* query = new TestDnsServerQuery(std::move(new_connection), hosts_a_,
hosts_aaaa_, cnames_, record_ttl_);
hosts_aaaa_, cnames_, record_ttl_, refused_);
queries_.emplace_back(query);
}

Expand All @@ -280,6 +285,7 @@ class TestDnsServer : public ListenerCallbacks {
}

void setRecordTtl(const std::chrono::seconds& ttl) { record_ttl_ = ttl; }
void setRefused(bool refused) { refused_ = refused; }

private:
Event::Dispatcher& dispatcher_;
Expand All @@ -288,6 +294,7 @@ class TestDnsServer : public ListenerCallbacks {
HostMap hosts_aaaa_;
CNameMap cnames_;
std::chrono::seconds record_ttl_;
bool refused_{};
// All queries are tracked so we can do resource reclamation when the test is
// over.
std::vector<std::unique_ptr<TestDnsServerQuery>> queries_;
Expand All @@ -300,6 +307,7 @@ class DnsResolverImplPeer {
DnsResolverImplPeer(DnsResolverImpl* resolver) : resolver_(resolver) {}

ares_channel channel() const { return resolver_->channel_; }
bool isChannelDirty() const { return resolver_->dirty_channel_; }
const std::unordered_map<int, Event::FileEventPtr>& events() { return resolver_->events_; }
// Reset the channel state for a DnsResolverImpl such that it will only use
// TCP and optionally has a zero timeout (for validating timeout behavior).
Expand Down Expand Up @@ -582,6 +590,64 @@ TEST_P(DnsImplTest, CallbackException) {
"unknown");
}

// Validate that the c-ares channel is destroyed and re-initialized when c-ares returns
// ARES_ECONNREFUSED as its callback status.
TEST_P(DnsImplTest, DestroyChannelOnRefused) {
ASSERT_FALSE(peer_->isChannelDirty());
server_->addHosts("some.good.domain", {"201.134.56.7"}, RecordType::A);
server_->setRefused(true);

std::list<Address::InstanceConstSharedPtr> address_list;
EXPECT_NE(nullptr, resolver_->resolve("", DnsLookupFamily::V4Only,
[&](std::list<DnsResponse>&& results) -> void {
address_list = getAddressList(results);
dispatcher_->exit();
}));

dispatcher_->run(Event::Dispatcher::RunType::Block);
// The c-ares channel should be dirty because the TestDnsServer replied with return code REFUSED;
// This test, and the way the TestDnsServerQuery is setup, relies on the fact that Envoy's
// c-ares channel is configured **without** the ARES_FLAG_NOCHECKRESP flag. This causes c-ares to
// discard packets with REFUSED, and thus Envoy receives ARES_ECONNREFUSED due to the code here:
// https://github.com/c-ares/c-ares/blob/d7e070e7283f822b1d2787903cce3615536c5610/ares_process.c#L654
// If that flag needs to be set, or c-ares changes its handling this test will need to be updated
// to create another condition where c-ares invokes onAresGetAddrInfoCallback with status ==
// ARES_ECONNREFUSED.
EXPECT_TRUE(peer_->isChannelDirty());
EXPECT_TRUE(address_list.empty());

server_->setRefused(false);

// Resolve will destroy the original channel and create a new one.
EXPECT_NE(nullptr, resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only,
[&](std::list<DnsResponse>&& results) -> void {
address_list = getAddressList(results);
dispatcher_->exit();
}));

dispatcher_->run(Event::Dispatcher::RunType::Block);
// However, the fresh channel initialized by production code does not point to the TestDnsServer.
// This means that resolution will return ARES_ENOTFOUND. This should not dirty the channel.
EXPECT_FALSE(peer_->isChannelDirty());
EXPECT_TRUE(address_list.empty());

// Reset the channel to point to the TestDnsServer, and make sure resolution is healthy.
if (tcp_only()) {
peer_->resetChannelTcpOnly(zero_timeout());
}
ares_set_servers_ports_csv(peer_->channel(), socket_->localAddress()->asString().c_str());

EXPECT_NE(nullptr, resolver_->resolve("some.good.domain", DnsLookupFamily::Auto,
[&](std::list<DnsResponse>&& results) -> void {
address_list = getAddressList(results);
dispatcher_->exit();
}));

dispatcher_->run(Event::Dispatcher::RunType::Block);
EXPECT_FALSE(peer_->isChannelDirty());
EXPECT_TRUE(hasAddress(address_list, "201.134.56.7"));
}

TEST_P(DnsImplTest, DnsIpAddressVersion) {
std::list<Address::InstanceConstSharedPtr> address_list;
server_->addHosts("some.good.domain", {"1.2.3.4"}, RecordType::A);
Expand Down
2 changes: 2 additions & 0 deletions tools/spelling_dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ EDESTRUCTION
EDF
EINVAL
ELB
ENOTFOUND
ENOTSUP
ENV
EOF
Expand Down Expand Up @@ -181,6 +182,7 @@ NDEBUG
NEXTHDR
NGHTTP
NOAUTH
NOCHECKRESP
NODELAY
NOLINT
NOLINTNEXTLINE
Expand Down

0 comments on commit a8a43cb

Please sign in to comment.