From a908001be9641d78040b1954acb0cd3a8e9e42c2 Mon Sep 17 00:00:00 2001
From: Damien Diederen
Date: Sun, 17 May 2020 15:15:54 +0200
Subject: [PATCH] ZOOKEEPER-1998: Allow C client to throttle host name
resolutions
Some environments experience high DNS load because of the name resolutions introduced by [ZOOKEEPER-1355](https://issues.apache.org/jira/browse/ZOOKEEPER-1355).
This patch allows clients to set a minimum delay to observe between "routine" resolutions using a `zoo_set_servers_resolution_delay` API function.
An application can influence the rate of polling via its `delay_ms` parameter: when set to a value greater than zero, the client skips most "routine" resolutions which would have happened in a window of that many milliseconds since the last successful one.
Setting `delay_ms` to `0` disables the new logic, reverting to the default behavior. Setting it to `-1` disables network resolutions during normal operation (but not, e.g., on connection loss).
Author: Damien Diederen
Reviewers: Enrico Olivelli , Mate Szalay-Beko , Suhas Dantkale
Closes #1068 from ztzg/ZOOKEEPER-1998-throttle-dns-resolutions
---
.../zookeeper-client-c/include/zookeeper.h | 26 ++++++
.../zookeeper-client-c/src/zk_adaptor.h | 3 +
.../zookeeper-client-c/src/zookeeper.c | 84 +++++++++++++++----
.../zookeeper-client-c/tests/TestClient.cc | 74 +++++++++++++++-
4 files changed, 171 insertions(+), 16 deletions(-)
diff --git a/zookeeper-client/zookeeper-client-c/include/zookeeper.h b/zookeeper-client/zookeeper-client-c/include/zookeeper.h
index d33a44695a7..95959af56fe 100644
--- a/zookeeper-client/zookeeper-client-c/include/zookeeper.h
+++ b/zookeeper-client/zookeeper-client-c/include/zookeeper.h
@@ -699,6 +699,32 @@ ZOOAPI sasl_callback_t *zoo_sasl_make_basic_callbacks(const char *user,
*/
ZOOAPI int zoo_set_servers(zhandle_t *zh, const char *hosts);
+/**
+ * \brief sets a minimum delay to observe between "routine" host name
+ * resolutions.
+ *
+ * The client performs regular resolutions of the list of servers
+ * passed to \ref zookeeper_init or set with \ref zoo_set_servers in
+ * order to detect changes at the DNS level.
+ *
+ * By default, it does so every time it checks for socket readiness.
+ * This results in low latency in the detection of changes, but can
+ * lead to heavy DNS traffic when the local cache is not effective.
+ *
+ * This method allows an application to influence the rate of polling.
+ * When delay_ms is set to a value greater than zero, the client skips
+ * most "routine" resolutions which would have happened in a window of
+ * that many milliseconds since the last succesful one.
+ *
+ * Setting delay_ms to 0 disables this logic, reverting to the default
+ * behavior. Setting it to -1 disables network resolutions during
+ * normal operation (but not, e.g., on connection loss).
+ *
+ * \param delay_ms 0, -1, or the window size in milliseconds
+ * \return ZOK on success or ZBADARGUMENTS for invalid input parameters
+ */
+ZOOAPI int zoo_set_servers_resolution_delay(zhandle_t *zh, int delay_ms);
+
/**
* \brief cycle to the next server on the next connection attempt.
*
diff --git a/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h b/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h
index 57696d4b203..46a7e7ed6e3 100644
--- a/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h
+++ b/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h
@@ -200,6 +200,9 @@ struct _zhandle {
addrvec_t addrs_old; // old list of addresses that we are no longer connected to
addrvec_t addrs_new; // new list of addresses to connect to if we're reconfiguring
+ struct timeval last_resolve; // time of last hostname resolution
+ int resolve_delay_ms; // see zoo_set_servers_resolution_delay
+
int reconfig; // Are we in the process of reconfiguring cluster's ensemble
double pOld, pNew; // Probability for selecting between 'addrs_old' and 'addrs_new'
int delay;
diff --git a/zookeeper-client/zookeeper-client-c/src/zookeeper.c b/zookeeper-client/zookeeper-client-c/src/zookeeper.c
index a273225ff69..8df4b29dcc5 100644
--- a/zookeeper-client/zookeeper-client-c/src/zookeeper.c
+++ b/zookeeper-client/zookeeper-client-c/src/zookeeper.c
@@ -244,6 +244,8 @@ typedef struct _completion_list {
} completion_list_t;
const char*err2string(int err);
+static inline int calculate_interval(const struct timeval *start,
+ const struct timeval *end);
static int queue_session_event(zhandle_t *zh, int state);
static const char* format_endpoint_info(const struct sockaddr_storage* ep);
@@ -984,10 +986,14 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a
*
* See zoo_cycle_next_server for the selection logic.
*
+ * \param ref_time an optional "reference time," used to determine if
+ * resolution can be skipped in accordance to the delay set by \ref
+ * zoo_set_servers_resolution_delay. Passing NULL prevents skipping.
+ *
* See {@link https://issues.apache.org/jira/browse/ZOOKEEPER-1355} for the
* protocol and its evaluation,
*/
-int update_addrs(zhandle_t *zh)
+int update_addrs(zhandle_t *zh, const struct timeval *ref_time)
{
int rc = ZOK;
char *hosts = NULL;
@@ -1008,26 +1014,54 @@ int update_addrs(zhandle_t *zh)
return ZSYSTEMERROR;
}
- // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new}
+ // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms}
lock_reconfig(zh);
+ // Check if we are due for a host name resolution. (See
+ // zoo_set_servers_resolution_delay. The answer is always "yes"
+ // if no reference is provided or the file descriptor is invalid.)
+ if (ref_time && zh->fd->sock != -1) {
+ int do_resolve;
+
+ if (zh->resolve_delay_ms <= 0) {
+ // -1 disables, 0 means unconditional. Fail safe.
+ do_resolve = zh->resolve_delay_ms != -1;
+ } else {
+ int elapsed_ms = calculate_interval(&zh->last_resolve, ref_time);
+ // Include < 0 in case of overflow, or if we are not
+ // backed by a monotonic clock.
+ do_resolve = elapsed_ms > zh->resolve_delay_ms || elapsed_ms < 0;
+ }
+
+ if (!do_resolve) {
+ goto finish;
+ }
+ }
+
// Copy zh->hostname for local use
hosts = strdup(zh->hostname);
if (hosts == NULL) {
rc = ZSYSTEMERROR;
- goto fail;
+ goto finish;
}
rc = resolve_hosts(zh, hosts, &resolved);
if (rc != ZOK)
{
- goto fail;
+ goto finish;
+ }
+
+ // Unconditionally note last resolution time.
+ if (ref_time) {
+ zh->last_resolve = *ref_time;
+ } else {
+ get_system_time(&zh->last_resolve);
}
// If the addrvec list is identical to last time we ran don't do anything
if (addrvec_eq(&zh->addrs, &resolved))
{
- goto fail;
+ goto finish;
}
// Is the server we're connected to in the new resolved list?
@@ -1047,14 +1081,14 @@ int update_addrs(zhandle_t *zh)
rc = addrvec_append(&zh->addrs_old, resolved_address);
if (rc != ZOK)
{
- goto fail;
+ goto finish;
}
}
else {
rc = addrvec_append(&zh->addrs_new, resolved_address);
if (rc != ZOK)
{
- goto fail;
+ goto finish;
}
}
}
@@ -1107,13 +1141,13 @@ int update_addrs(zhandle_t *zh)
zh->state = ZOO_NOTCONNECTED_STATE;
}
-fail:
+finish:
unlock_reconfig(zh);
// If we short-circuited out and never assigned resolved to zh->addrs then we
// need to free resolved to avoid a memleak.
- if (zh->addrs.data != resolved.data)
+ if (resolved.data && zh->addrs.data != resolved.data)
{
addrvec_free(&resolved);
}
@@ -1310,7 +1344,7 @@ static zhandle_t *zookeeper_init_internal(const char *host, watcher_fn watcher,
if (zh->hostname == 0) {
goto abort;
}
- if(update_addrs(zh) != 0) {
+ if(update_addrs(zh, NULL) != 0) {
goto abort;
}
@@ -1404,7 +1438,7 @@ int zoo_set_servers(zhandle_t *zh, const char *hosts)
return ZBADARGUMENTS;
}
- // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new}
+ // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms}
lock_reconfig(zh);
// Reset hostname to new set of hosts to connect to
@@ -1416,7 +1450,27 @@ int zoo_set_servers(zhandle_t *zh, const char *hosts)
unlock_reconfig(zh);
- return update_addrs(zh);
+ return update_addrs(zh, NULL);
+}
+
+/*
+ * Sets a minimum delay to observe between "routine" host name
+ * resolutions. See prototype for full documentation.
+ */
+int zoo_set_servers_resolution_delay(zhandle_t *zh, int delay_ms) {
+ if (delay_ms < -1) {
+ LOG_ERROR(LOGCALLBACK(zh), "Resolution delay cannot be %d", delay_ms);
+ return ZBADARGUMENTS;
+ }
+
+ // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms}
+ lock_reconfig(zh);
+
+ zh->resolve_delay_ms = delay_ms;
+
+ unlock_reconfig(zh);
+
+ return ZOK;
}
/**
@@ -1481,7 +1535,7 @@ static int get_next_server_in_reconfig(zhandle_t *zh)
*/
void zoo_cycle_next_server(zhandle_t *zh)
{
- // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new}
+ // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms}
lock_reconfig(zh);
memset(&zh->addr_cur, 0, sizeof(zh->addr_cur));
@@ -1513,7 +1567,7 @@ const char* zoo_get_current_server(zhandle_t* zh)
{
const char *endpoint_info = NULL;
- // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new}
+ // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms}
// Need the lock here as it is changed in update_addrs()
lock_reconfig(zh);
@@ -2415,7 +2469,7 @@ int zookeeper_interest(zhandle_t *zh, socket_t *fd, int *interest,
}
api_prolog(zh);
- rc = update_addrs(zh);
+ rc = update_addrs(zh, &now);
if (rc != ZOK) {
return api_epilog(zh, rc);
}
diff --git a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc
index 2a6e9925470..80e4c928a51 100644
--- a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc
+++ b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc
@@ -227,6 +227,7 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture
CPPUNIT_TEST(testWatcherAutoResetWithLocal);
CPPUNIT_TEST(testGetChildren2);
CPPUNIT_TEST(testLastZxid);
+ CPPUNIT_TEST(testServersResolutionDelay);
CPPUNIT_TEST(testRemoveWatchers);
#endif
CPPUNIT_TEST_SUITE_END();
@@ -386,7 +387,78 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture
CPPUNIT_ASSERT(zh->io_count < 2);
zookeeper_close(zh);
}
-
+
+ /* Checks the zoo_set_servers_resolution_delay default and operation */
+ void testServersResolutionDelay() {
+ watchctx_t ctx;
+ zhandle_t *zk = createClient(&ctx);
+ int rc;
+ struct timeval tv;
+ struct Stat stat;
+
+ CPPUNIT_ASSERT(zk);
+ CPPUNIT_ASSERT(zk->resolve_delay_ms == 0);
+
+ // a) Default/0 case: resolve at each request.
+
+ tv = zk->last_resolve;
+ usleep(10000); // 10ms
+
+ rc = zoo_exists(zk, "/", 0, &stat);
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ // Must have changed because of the request.
+ CPPUNIT_ASSERT(zk->last_resolve.tv_sec != tv.tv_sec ||
+ zk->last_resolve.tv_usec != tv.tv_usec);
+
+ // b) Disabled/-1 case: never perform "routine" resolutions.
+
+ rc = zoo_set_servers_resolution_delay(zk, -1); // Disabled
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ tv = zk->last_resolve;
+ usleep(10000); // 10ms
+
+ rc = zoo_exists(zk, "/", 0, &stat);
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ // Must not have changed as auto-resolution is disabled.
+ CPPUNIT_ASSERT(zk->last_resolve.tv_sec == tv.tv_sec &&
+ zk->last_resolve.tv_usec == tv.tv_usec);
+
+ // c) Invalid delay is rejected.
+
+ rc = zoo_set_servers_resolution_delay(zk, -1000); // Bad
+ CPPUNIT_ASSERT_EQUAL((int)ZBADARGUMENTS, rc);
+
+ // d) Valid delay, no resolution within window.
+
+ rc = zoo_set_servers_resolution_delay(zk, 500); // 0.5s
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ tv = zk->last_resolve;
+ usleep(10000); // 10ms
+
+ rc = zoo_exists(zk, "/", 0, &stat);
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ // Must not have changed because the request (hopefully!)
+ // executed in less than 0.5s.
+ CPPUNIT_ASSERT(zk->last_resolve.tv_sec == tv.tv_sec &&
+ zk->last_resolve.tv_usec == tv.tv_usec);
+
+ // e) Valid delay, at least one resolution after delay.
+
+ usleep(500 * 1000); // 0.5s
+
+ rc = zoo_exists(zk, "/", 0, &stat);
+ CPPUNIT_ASSERT_EQUAL((int)ZOK, rc);
+
+ // Must have changed because we waited 0.5s between the
+ // capture and the last request.
+ CPPUNIT_ASSERT(zk->last_resolve.tv_sec != tv.tv_sec ||
+ zk->last_resolve.tv_usec != tv.tv_usec);
+ }
void testPing()
{