From d336e5a76d18f5d9135a6676170dfadc3cd5b332 Mon Sep 17 00:00:00 2001 From: Suhas Dantkale Date: Mon, 4 May 2020 14:22:46 +0200 Subject: [PATCH] ZOOKEEPER-3723: Zookeeper Client should not fail with ZSYSTEMERROR if DNS does not resolve one of the servers in the zk ensemble The change is backported from Jeelani Mohamed Abdul Khader 's https://github.com/apache/zookeeper/pull/579 change done in "ZOOKEEPER-3095: Connect string fix for non-existent hosts" Code changes to not fail the session initiation if the DNS is not able to resolve the hostname of one of the servers in the Zookeeper ensemble. Some Background: The Zookeeper client resolves all the hostnames in the ensemble while establishing the session. In Kubernetes environment with coreDNS, the hostname entry gets removed from coreDNS during the POD restarts. Though we can manipulate the coreDNS settings to delay the removal of the hostname entry from DNS, we don't want to leave any race where Zookeeper client is trying to establish a session and it fails because the DNS temporarily is not able to resolve the hostname. So as long as one of the servers in the ensemble is able to be DNS resolvable, should we not fail the session establishment with hard error and instead try to establish the connection with one of the other servers Author: Suhas Dantkale Reviewers: Enrico Olivelli , Mate Szalay-Beko , Damien Diederen Closes #1259 from suhasdantkale/suhas/ZOOKEEPER-3723 --- .../zookeeper-client-c/src/zookeeper.c | 18 +++++++++++------- .../zookeeper-client-c/tests/TestClient.cc | 11 +++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/zookeeper-client/zookeeper-client-c/src/zookeeper.c b/zookeeper-client/zookeeper-client-c/src/zookeeper.c index a43ebf251eb..5c2fad6d201 100644 --- a/zookeeper-client/zookeeper-client-c/src/zookeeper.c +++ b/zookeeper-client/zookeeper-client-c/src/zookeeper.c @@ -746,7 +746,7 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a LOG_ERROR(LOGCALLBACK(zh), "could not resolve %s", host); errno=ENOENT; rc=ZBADARGUMENTS; - goto fail; + goto next_host; } // Setup the address array @@ -786,6 +786,7 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a addr->ss_family, hosts_in); } } +next_host: host = strtok_r(0, ",", &strtok_last); } #else @@ -826,14 +827,13 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a if (rc != 0) { errno = getaddrinfo_errno(rc); #ifdef _WIN32 - LOG_ERROR(LOGCALLBACK(zh), "Win32 message: %s\n", gai_strerror(rc)); + LOG_ERROR(LOGCALLBACK(zh), "%s: Win32 message: %s\n", host, gai_strerror(rc)); #elif __linux__ && __GNUC__ - LOG_ERROR(LOGCALLBACK(zh), "getaddrinfo: %s\n", gai_strerror(rc)); + LOG_ERROR(LOGCALLBACK(zh), "%s: getaddrinfo(): %s\n", host, gai_strerror(rc)); #else - LOG_ERROR(LOGCALLBACK(zh), "getaddrinfo: %s\n", strerror(errno)); + LOG_ERROR(LOGCALLBACK(zh), "%s: getaddrinfo(): %s\n", host, strerror(errno)); #endif - rc=ZSYSTEMERROR; - goto fail; + goto next_host; } } @@ -865,11 +865,15 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a } freeaddrinfo(res0); - +next_host: host = strtok_r(0, ",", &strtok_last); } #endif } + if (avec->count == 0) { + rc = ZSYSTEMERROR; // not a single host resolved + goto fail; + } free(hosts); if(!disable_conn_permute){ diff --git a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc index ec8dbb4ce6b..a4a1c64240e 100644 --- a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc +++ b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc @@ -204,6 +204,7 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture CPPUNIT_TEST(testAsyncWatcherAutoReset); CPPUNIT_TEST(testDeserializeString); CPPUNIT_TEST(testFirstServerDown); + CPPUNIT_TEST(testNonexistentHost); #ifdef THREADED CPPUNIT_TEST(testNullData); #ifdef ZOO_IPV6_ENABLED @@ -327,6 +328,16 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture CPPUNIT_ASSERT(zk != 0); CPPUNIT_ASSERT(ctx.waitForConnected(zk)); } + + /** Checks that a non-existent host will not block the connection **/ + void testNonexistentHost() { + char hosts[] = "jimmy:5555,127.0.0.1:22181,someinvalid.host.com:1234"; + watchctx_t ctx; + zoo_deterministic_conn_order(true /* disable permute */); + zhandle_t *zh = createClient(hosts, &ctx); + CPPUNIT_ASSERT(ctx.waitForConnected(zh)); + zoo_deterministic_conn_order(false /* enable permute */); + } /** this checks for a deadlock in calling zookeeper_close and calls from a default watcher that might get triggered just when zookeeper_close() is in progress **/ void testHangingClient() {