From 1d34172bd058f31956741ec5f3346339fc624fd0 Mon Sep 17 00:00:00 2001 From: htuch Date: Fri, 31 Aug 2018 10:41:27 -0400 Subject: [PATCH] dns: fix exception unsafe behavior in c-ares callbacks. (#4307) Previously, we were taking an exception due to the late validation of update_merge_window duration in ClusterManagerImpl::scheduleUpdate, which happened under a c-ares strict DNS host resolution callback. There are several related issues here: 1. c-ares is exception unsafe, see https://github.com/c-ares/c-ares/issues/219. 2. We should be validating Durations with PGV, see https://github.com/lyft/protoc-gen-validate/issues/97. 3. We should defer the c-ares resolution callbacks to be outside the c-ares callback context for exception safety. This PR addresses (3) by moving callbacks, even when they are "immediate", to a dispatcher post, so that we never take an exception under a c-ares callback. A workaround for (2) is provided, in lieu of https://github.com/lyft/protoc-gen-validate/issues/97, which is blocked on our ability to bump PGV version in Envoy, see https://github.com/lyft/protoc-gen-star/issues/28. Fixes oss-fuzz issue https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9868. Risk level: Medium (DNS clusters will have some timing changes). Testing: Updated DNS implementation unit tests, server fuzz corpus entry added. Signed-off-by: Harvey Tuch --- include/envoy/network/dns.h | 3 +- source/common/network/dns_impl.cc | 5 +- source/common/network/dns_impl.h | 7 +- source/common/upstream/logical_dns_cluster.cc | 4 +- source/common/upstream/upstream_impl.cc | 7 +- test/common/network/dns_impl_test.cc | 265 +++++++++--------- ...testcase-server_fuzz_test-6288786894880768 | 39 +++ test/server/server_fuzz_test.cc | 8 +- 8 files changed, 199 insertions(+), 139 deletions(-) create mode 100644 test/server/server_corpus/clusterfuzz-testcase-server_fuzz_test-6288786894880768 diff --git a/include/envoy/network/dns.h b/include/envoy/network/dns.h index f89cac1ef461..64d79bd8d7d8 100644 --- a/include/envoy/network/dns.h +++ b/include/envoy/network/dns.h @@ -38,7 +38,8 @@ class DnsResolver { * @param address_list supplies the list of resolved IP addresses. The list will be empty if * the resolution failed. */ - typedef std::function&& address_list)> ResolveCb; + typedef std::function&& address_list)> + ResolveCb; /** * Initiate an async DNS resolution. diff --git a/source/common/network/dns_impl.cc b/source/common/network/dns_impl.cc index 33b721e59155..bb317ae397f0 100644 --- a/source/common/network/dns_impl.cc +++ b/source/common/network/dns_impl.cc @@ -120,7 +120,8 @@ void DnsResolverImpl::PendingResolution::onAresHostCallback(int status, int time if (completed_) { if (!cancelled_) { - callback_(std::move(address_list)); + dispatcher_.post( + [callback = callback_, al = std::move(address_list)] { callback(std::move(al)); }); } if (owned_) { delete this; @@ -185,7 +186,7 @@ ActiveDnsQuery* DnsResolverImpl::resolve(const std::string& dns_name, // failed intial call to getHostbyName followed by a synchronous IPv4 // resolution. std::unique_ptr pending_resolution( - new PendingResolution(callback, channel_, dns_name)); + new PendingResolution(callback, dispatcher_, channel_, dns_name)); if (dns_lookup_family == DnsLookupFamily::Auto) { pending_resolution->fallback_if_failed_ = true; } diff --git a/source/common/network/dns_impl.h b/source/common/network/dns_impl.h index b10fd067d8d2..2f79190e8e18 100644 --- a/source/common/network/dns_impl.h +++ b/source/common/network/dns_impl.h @@ -39,8 +39,9 @@ class DnsResolverImpl : public DnsResolver, protected Logger::Loggableresolve( dns_address, dns_lookup_family_, - [this, - dns_address](std::list&& address_list) -> void { + [this, dns_address]( + const std::list&& address_list) -> void { active_dns_query_ = nullptr; ENVOY_LOG(debug, "async DNS resolution complete for {}", dns_address); info_->stats().update_success_.inc(); diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index 5e3e4fa91c22..81687dbc9ac2 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -373,6 +373,11 @@ ClusterInfoImpl::ClusterInfoImpl(const envoy::api::v2::Cluster& config, idle_timeout_ = std::chrono::milliseconds( DurationUtil::durationToMilliseconds(config.common_http_protocol_options().idle_timeout())); } + + // TODO(htuch): Remove this temporary workaround when we have + // https://github.com/lyft/protoc-gen-validate/issues/97 resolved. This just provides early + // validation of sanity of fields that we should catch at config ingestion. + DurationUtil::durationToMilliseconds(common_lb_config_.update_merge_window()); } ProtocolOptionsConfigConstSharedPtr @@ -1137,7 +1142,7 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() { active_query_ = parent_.dns_resolver_->resolve( dns_address_, parent_.dns_lookup_family_, - [this](std::list&& address_list) -> void { + [this](const std::list&& address_list) -> void { active_query_ = nullptr; ENVOY_LOG(debug, "async DNS resolution complete for {}", dns_address_); parent_.info_->stats().update_success_.inc(); diff --git a/test/common/network/dns_impl_test.cc b/test/common/network/dns_impl_test.cc index 136c686a6348..d90029cd576b 100644 --- a/test/common/network/dns_impl_test.cc +++ b/test/common/network/dns_impl_test.cc @@ -459,12 +459,12 @@ INSTANTIATE_TEST_CASE_P(IpVersions, DnsImplTest, // development, where segfaults were encountered due to callback invocations on // destruction. TEST_P(DnsImplTest, DestructPending) { - EXPECT_NE(nullptr, - resolver_->resolve("", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - FAIL(); - UNREFERENCED_PARAMETER(results); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + FAIL(); + UNREFERENCED_PARAMETER(results); + })); // Also validate that pending events are around to exercise the resource // reclamation path. EXPECT_GT(peer_->events().size(), 0U); @@ -475,22 +475,24 @@ TEST_P(DnsImplTest, DestructPending) { // asynchronous behavior or network events. TEST_P(DnsImplTest, LocalLookup) { std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(address_list.empty()); if (GetParam() == Address::IpVersion::v4) { - EXPECT_EQ(nullptr, - resolver_->resolve("localhost", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - })); + // EXPECT_CALL(dispatcher_, post(_)); + EXPECT_EQ(nullptr, resolver_->resolve( + "localhost", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + })); + dispatcher_.run(Event::Dispatcher::RunType::NonBlock); EXPECT_TRUE(hasAddress(address_list, "127.0.0.1")); EXPECT_FALSE(hasAddress(address_list, "::1")); } @@ -499,21 +501,23 @@ TEST_P(DnsImplTest, LocalLookup) { const std::string error_msg = "Synchronous DNS IPv6 localhost resolution failed. Please verify localhost resolves to ::1 " "in /etc/hosts, since this misconfiguration is a common cause of these failures."; - EXPECT_EQ(nullptr, - resolver_->resolve("localhost", DnsLookupFamily::V6Only, - [&](std::list&& results) -> void { - address_list = results; - })) + EXPECT_EQ(nullptr, resolver_->resolve( + "localhost", DnsLookupFamily::V6Only, + [&](const std::list&& results) -> void { + address_list = results; + })) << error_msg; + dispatcher_.run(Event::Dispatcher::RunType::NonBlock); EXPECT_TRUE(hasAddress(address_list, "::1")) << error_msg; EXPECT_FALSE(hasAddress(address_list, "127.0.0.1")); - EXPECT_EQ(nullptr, - resolver_->resolve("localhost", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - })) + EXPECT_EQ(nullptr, resolver_->resolve( + "localhost", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + })) << error_msg; + dispatcher_.run(Event::Dispatcher::RunType::NonBlock); EXPECT_FALSE(hasAddress(address_list, "127.0.0.1")); EXPECT_TRUE(hasAddress(address_list, "::1")) << error_msg; } @@ -522,32 +526,32 @@ TEST_P(DnsImplTest, LocalLookup) { TEST_P(DnsImplTest, DnsIpAddressVersionV6) { std::list address_list; server_->addHosts("some.good.domain", {"1::2"}, AAAA); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1::2")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_FALSE(hasAddress(address_list, "1::2")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V6Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V6Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1::2")); @@ -556,32 +560,32 @@ TEST_P(DnsImplTest, DnsIpAddressVersionV6) { TEST_P(DnsImplTest, DnsIpAddressVersion) { std::list address_list; server_->addHosts("some.good.domain", {"1.2.3.4"}, A); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1.2.3.4")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1.2.3.4")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V6Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V6Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_FALSE(hasAddress(address_list, "1.2.3.4")); @@ -592,22 +596,22 @@ TEST_P(DnsImplTest, DnsIpAddressVersion) { TEST_P(DnsImplTest, RemoteAsyncLookup) { server_->addHosts("some.good.domain", {"201.134.56.7"}, A); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("some.bad.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.bad.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(address_list.empty()); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "201.134.56.7")); @@ -617,12 +621,12 @@ TEST_P(DnsImplTest, RemoteAsyncLookup) { TEST_P(DnsImplTest, MultiARecordLookup) { server_->addHosts("some.good.domain", {"201.134.56.7", "123.4.5.6", "6.5.4.3"}, A); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "201.134.56.7")); @@ -634,12 +638,12 @@ TEST_P(DnsImplTest, CNameARecordLookupV4) { server_->addCName("root.cnam.domain", "result.cname.domain"); server_->addHosts("result.cname.domain", {"201.134.56.7"}, A); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("root.cnam.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "root.cnam.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "201.134.56.7")); @@ -649,12 +653,12 @@ TEST_P(DnsImplTest, CNameARecordLookupWithV6) { server_->addCName("root.cnam.domain", "result.cname.domain"); server_->addHosts("result.cname.domain", {"201.134.56.7"}, A); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("root.cnam.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "root.cnam.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "201.134.56.7")); @@ -664,36 +668,36 @@ TEST_P(DnsImplTest, MultiARecordLookupWithV6) { server_->addHosts("some.good.domain", {"201.134.56.7", "123.4.5.6", "6.5.4.3"}, A); server_->addHosts("some.good.domain", {"1::2", "1::2:3", "1::2:3:4"}, AAAA); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "201.134.56.7")); EXPECT_TRUE(hasAddress(address_list, "123.4.5.6")); EXPECT_TRUE(hasAddress(address_list, "6.5.4.3")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1::2")); EXPECT_TRUE(hasAddress(address_list, "1::2:3")); EXPECT_TRUE(hasAddress(address_list, "1::2:3:4")); - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V6Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V6Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(hasAddress(address_list, "1::2")); @@ -705,17 +709,17 @@ TEST_P(DnsImplTest, MultiARecordLookupWithV6) { TEST_P(DnsImplTest, Cancel) { server_->addHosts("some.good.domain", {"201.134.56.7"}, A); - ActiveDnsQuery* query = - resolver_->resolve("some.domain", DnsLookupFamily::Auto, - [](std::list &&) -> void { FAIL(); }); + ActiveDnsQuery* query = resolver_->resolve( + "some.domain", DnsLookupFamily::Auto, + [](const std::list &&) -> void { FAIL(); }); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::Auto, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::Auto, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); ASSERT_NE(nullptr, query); query->cancel(); @@ -738,12 +742,12 @@ INSTANTIATE_TEST_CASE_P(IpVersions, DnsImplZeroTimeoutTest, TEST_P(DnsImplZeroTimeoutTest, Timeout) { server_->addHosts("some.good.domain", {"201.134.56.7"}, A); std::list address_list; - EXPECT_NE(nullptr, - resolver_->resolve("some.good.domain", DnsLookupFamily::V4Only, - [&](std::list&& results) -> void { - address_list = results; - dispatcher_.exit(); - })); + EXPECT_NE(nullptr, resolver_->resolve( + "some.good.domain", DnsLookupFamily::V4Only, + [&](const std::list&& results) -> void { + address_list = results; + dispatcher_.exit(); + })); dispatcher_.run(Event::Dispatcher::RunType::Block); EXPECT_TRUE(address_list.empty()); @@ -760,10 +764,11 @@ TEST(DnsImplUnitTest, PendingTimerEnable) { Event::FileEvent* file_event = new NiceMock(); EXPECT_CALL(dispatcher, createFileEvent_(_, _, _, _)).WillOnce(Return(file_event)); EXPECT_CALL(*timer, enableTimer(_)); - EXPECT_NE(nullptr, resolver.resolve("some.bad.domain.invalid", DnsLookupFamily::V4Only, - [&](std::list&& results) { - UNREFERENCED_PARAMETER(results); - })); + EXPECT_NE(nullptr, + resolver.resolve("some.bad.domain.invalid", DnsLookupFamily::V4Only, + [&](const std::list&& results) { + UNREFERENCED_PARAMETER(results); + })); } } // namespace Network diff --git a/test/server/server_corpus/clusterfuzz-testcase-server_fuzz_test-6288786894880768 b/test/server/server_corpus/clusterfuzz-testcase-server_fuzz_test-6288786894880768 new file mode 100644 index 000000000000..14c7c52760c7 --- /dev/null +++ b/test/server/server_corpus/clusterfuzz-testcase-server_fuzz_test-6288786894880768 @@ -0,0 +1,39 @@ +node { + locality { + } +} +static_resources { + clusters { + name: "x" + type: STRICT_DNS + connect_timeout { + nanos: 250000000 + } + lb_policy: RING_HASH + hosts { + socket_address { + address: "123.1.0.1" + named_port: "x" + } + } + hosts { + socket_address { + address: "127.0.0.2" + named_port: "3" + } + } + common_lb_config { + update_merge_window { + seconds: 281474976710656 + } + } + } +} +admin { + access_log_path: "@-" + address { + pipe { + path: " " + } + } +} diff --git a/test/server/server_fuzz_test.cc b/test/server/server_fuzz_test.cc index 40426d1ea0b5..af38634b42cf 100644 --- a/test/server/server_fuzz_test.cc +++ b/test/server/server_fuzz_test.cc @@ -74,15 +74,21 @@ DEFINE_PROTO_FUZZER(const envoy::config::bootstrap::v2::Bootstrap& input) { options.log_level_ = Fuzz::Runner::logLevel(); } + std::unique_ptr server; try { - auto server = std::make_unique( + server = std::make_unique( options, test_time.timeSource(), std::make_shared("127.0.0.1"), hooks, restart, stats_store, fakelock, component_factory, std::make_unique(), thread_local_instance); } catch (const EnvoyException& ex) { ENVOY_LOG_MISC(debug, "Controlled EnvoyException exit: {}", ex.what()); + return; } + // If we were successful, run any pending events on the main thread's dispatcher loop. These might + // be, for example, pending DNS resolution callbacks. If they generate exceptions, we want to + // explode and fail the test, hence we do this outside of the try-catch above. + server->dispatcher().run(Event::Dispatcher::RunType::NonBlock); } } // namespace Server