diff --git a/zookeeper-client/zookeeper-client-c/include/zookeeper.h b/zookeeper-client/zookeeper-client-c/include/zookeeper.h index 304dafe2f69..b277a959acc 100644 --- a/zookeeper-client/zookeeper-client-c/include/zookeeper.h +++ b/zookeeper-client/zookeeper-client-c/include/zookeeper.h @@ -567,6 +567,32 @@ ZOOAPI zhandle_t *zookeeper_init2(const char *host, watcher_fn fn, */ ZOOAPI int zoo_set_servers(zhandle_t *zh, const char *hosts); +/** + * \brief sets a minimum delay to observe between "routine" host name + * resolutions. + * + * The client performs regular resolutions of the list of servers + * passed to \ref zookeeper_init or set with \ref zoo_set_servers in + * order to detect changes at the DNS level. + * + * By default, it does so every time it checks for socket readiness. + * This results in low latency in the detection of changes, but can + * lead to heavy DNS traffic when the local cache is not effective. + * + * This method allows an application to influence the rate of polling. + * When delay_ms is set to a value greater than zero, the client skips + * most "routine" resolutions which would have happened in a window of + * that many milliseconds since the last succesful one. + * + * Setting delay_ms to 0 disables this logic, reverting to the default + * behavior. Setting it to -1 disables network resolutions during + * normal operation (but not, e.g., on connection loss). + * + * \param delay_ms 0, -1, or the window size in milliseconds + * \return ZOK on success or ZBADARGUMENTS for invalid input parameters + */ +ZOOAPI int zoo_set_servers_resolution_delay(zhandle_t *zh, int delay_ms); + /** * \brief cycle to the next server on the next connection attempt. * diff --git a/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h b/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h index 97995e36ace..4faa0782c1a 100644 --- a/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h +++ b/zookeeper-client/zookeeper-client-c/src/zk_adaptor.h @@ -200,6 +200,9 @@ struct _zhandle { addrvec_t addrs_old; // old list of addresses that we are no longer connected to addrvec_t addrs_new; // new list of addresses to connect to if we're reconfiguring + struct timeval last_resolve; // time of last hostname resolution + int resolve_delay_ms; // see zoo_set_servers_resolution_delay + int reconfig; // Are we in the process of reconfiguring cluster's ensemble double pOld, pNew; // Probability for selecting between 'addrs_old' and 'addrs_new' int delay; diff --git a/zookeeper-client/zookeeper-client-c/src/zookeeper.c b/zookeeper-client/zookeeper-client-c/src/zookeeper.c index 70762c29b35..c7f39ed1ae3 100644 --- a/zookeeper-client/zookeeper-client-c/src/zookeeper.c +++ b/zookeeper-client/zookeeper-client-c/src/zookeeper.c @@ -231,6 +231,8 @@ typedef struct _completion_list { } completion_list_t; const char*err2string(int err); +static inline int calculate_interval(const struct timeval *start, + const struct timeval *end); static int queue_session_event(zhandle_t *zh, int state); static const char* format_endpoint_info(const struct sockaddr_storage* ep); @@ -913,10 +915,14 @@ static int resolve_hosts(const zhandle_t *zh, const char *hosts_in, addrvec_t *a * * See zoo_cycle_next_server for the selection logic. * + * \param ref_time an optional "reference time," used to determine if + * resolution can be skipped in accordance to the delay set by \ref + * zoo_set_servers_resolution_delay. Passing NULL prevents skipping. + * * See {@link https://issues.apache.org/jira/browse/ZOOKEEPER-1355} for the * protocol and its evaluation, */ -int update_addrs(zhandle_t *zh) +int update_addrs(zhandle_t *zh, const struct timeval *ref_time) { int rc = ZOK; char *hosts = NULL; @@ -937,26 +943,54 @@ int update_addrs(zhandle_t *zh) return ZSYSTEMERROR; } - // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new} + // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms} lock_reconfig(zh); + // Check if we are due for a host name resolution. (See + // zoo_set_servers_resolution_delay. The answer is always "yes" + // if no reference is provided or the file descriptor is invalid.) + if (ref_time && zh->fd != -1) { + int do_resolve; + + if (zh->resolve_delay_ms <= 0) { + // -1 disables, 0 means unconditional. Fail safe. + do_resolve = zh->resolve_delay_ms != -1; + } else { + int elapsed_ms = calculate_interval(&zh->last_resolve, ref_time); + // Include < 0 in case of overflow, or if we are not + // backed by a monotonic clock. + do_resolve = elapsed_ms > zh->resolve_delay_ms || elapsed_ms < 0; + } + + if (!do_resolve) { + goto done; + } + } + // Copy zh->hostname for local use hosts = strdup(zh->hostname); if (hosts == NULL) { rc = ZSYSTEMERROR; - goto fail; + goto done; } rc = resolve_hosts(zh, hosts, &resolved); if (rc != ZOK) { - goto fail; + goto done; + } + + // Unconditionally note last resolution time. + if (ref_time) { + zh->last_resolve = *ref_time; + } else { + get_system_time(&zh->last_resolve); } // If the addrvec list is identical to last time we ran don't do anything if (addrvec_eq(&zh->addrs, &resolved)) { - goto fail; + goto done; } // Is the server we're connected to in the new resolved list? @@ -976,14 +1010,14 @@ int update_addrs(zhandle_t *zh) rc = addrvec_append(&zh->addrs_old, resolved_address); if (rc != ZOK) { - goto fail; + goto done; } } else { rc = addrvec_append(&zh->addrs_new, resolved_address); if (rc != ZOK) { - goto fail; + goto done; } } } @@ -1037,13 +1071,13 @@ int update_addrs(zhandle_t *zh) zh->state = ZOO_NOTCONNECTED_STATE; } -fail: +done: unlock_reconfig(zh); // If we short-circuited out and never assigned resolved to zh->addrs then we // need to free resolved to avoid a memleak. - if (zh->addrs.data != resolved.data) + if (resolved.data && zh->addrs.data != resolved.data) { addrvec_free(&resolved); } @@ -1234,7 +1268,7 @@ static zhandle_t *zookeeper_init_internal(const char *host, watcher_fn watcher, if (zh->hostname == 0) { goto abort; } - if(update_addrs(zh) != 0) { + if(update_addrs(zh, NULL) != 0) { goto abort; } @@ -1294,7 +1328,7 @@ int zoo_set_servers(zhandle_t *zh, const char *hosts) return ZBADARGUMENTS; } - // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new} + // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms} lock_reconfig(zh); // Reset hostname to new set of hosts to connect to @@ -1306,7 +1340,27 @@ int zoo_set_servers(zhandle_t *zh, const char *hosts) unlock_reconfig(zh); - return update_addrs(zh); + return update_addrs(zh, NULL); +} + +/* + * Sets a minimum delay to observe between "routine" host name + * resolutions. See prototype for full documentation. + */ +int zoo_set_servers_resolution_delay(zhandle_t *zh, int delay_ms) { + if (delay_ms < -1) { + LOG_ERROR(LOGCALLBACK(zh), "Resolution delay cannot be %d", delay_ms); + return ZBADARGUMENTS; + } + + // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms} + lock_reconfig(zh); + + zh->resolve_delay_ms = delay_ms; + + unlock_reconfig(zh); + + return ZOK; } /** @@ -1371,7 +1425,7 @@ static int get_next_server_in_reconfig(zhandle_t *zh) */ void zoo_cycle_next_server(zhandle_t *zh) { - // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new} + // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms} lock_reconfig(zh); memset(&zh->addr_cur, 0, sizeof(zh->addr_cur)); @@ -1403,7 +1457,7 @@ const char* zoo_get_current_server(zhandle_t* zh) { const char *endpoint_info = NULL; - // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new} + // NOTE: guard access to {hostname, addr_cur, addrs, addrs_old, addrs_new, last_resolve, resolve_delay_ms} // Need the lock here as it is changed in update_addrs() lock_reconfig(zh); @@ -2287,7 +2341,7 @@ int zookeeper_interest(zhandle_t *zh, socket_t *fd, int *interest, } api_prolog(zh); - rc = update_addrs(zh); + rc = update_addrs(zh, &now); if (rc != ZOK) { return api_epilog(zh, rc); } diff --git a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc index 6bcfe148672..f58bfb620bb 100644 --- a/zookeeper-client/zookeeper-client-c/tests/TestClient.cc +++ b/zookeeper-client/zookeeper-client-c/tests/TestClient.cc @@ -224,6 +224,7 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture CPPUNIT_TEST(testWatcherAutoResetWithLocal); CPPUNIT_TEST(testGetChildren2); CPPUNIT_TEST(testLastZxid); + CPPUNIT_TEST(testServersResolutionDelay); CPPUNIT_TEST(testRemoveWatchers); #endif CPPUNIT_TEST_SUITE_END(); @@ -369,7 +370,78 @@ class Zookeeper_simpleSystem : public CPPUNIT_NS::TestFixture CPPUNIT_ASSERT(zh->io_count < 2); zookeeper_close(zh); } - + + /* Checks the zoo_set_servers_resolution_delay default and operation */ + void testServersResolutionDelay() { + watchctx_t ctx; + zhandle_t *zk = createClient(&ctx); + int rc; + struct timeval tv; + struct Stat stat; + + CPPUNIT_ASSERT(zk); + CPPUNIT_ASSERT(zk->resolve_delay_ms == 0); + + // a) Default/0 case: resolve at each request. + + tv = zk->last_resolve; + usleep(10000); // 10ms + + rc = zoo_exists(zk, "/", 0, &stat); + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + // Must have changed because of the request. + CPPUNIT_ASSERT(zk->last_resolve.tv_sec != tv.tv_sec || + zk->last_resolve.tv_usec != tv.tv_usec); + + // b) Disabled/-1 case: never perform "routine" resolutions. + + rc = zoo_set_servers_resolution_delay(zk, -1); // Disabled + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + tv = zk->last_resolve; + usleep(10000); // 10ms + + rc = zoo_exists(zk, "/", 0, &stat); + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + // Must not have changed as auto-resolution is disabled. + CPPUNIT_ASSERT(zk->last_resolve.tv_sec == tv.tv_sec && + zk->last_resolve.tv_usec == tv.tv_usec); + + // c) Invalid delay is rejected. + + rc = zoo_set_servers_resolution_delay(zk, -1000); // Bad + CPPUNIT_ASSERT_EQUAL((int)ZBADARGUMENTS, rc); + + // d) Valid delay, no resolution within window. + + rc = zoo_set_servers_resolution_delay(zk, 500); // 0.5s + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + tv = zk->last_resolve; + usleep(10000); // 10ms + + rc = zoo_exists(zk, "/", 0, &stat); + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + // Must not have changed because the request (hopefully!) + // executed in less than 0.5s. + CPPUNIT_ASSERT(zk->last_resolve.tv_sec == tv.tv_sec && + zk->last_resolve.tv_usec == tv.tv_usec); + + // e) Valid delay, at least one resolution after delay. + + usleep(500 * 1000); // 0.5s + + rc = zoo_exists(zk, "/", 0, &stat); + CPPUNIT_ASSERT_EQUAL((int)ZOK, rc); + + // Must have changed because we waited 0.5s between the + // capture and the last request. + CPPUNIT_ASSERT(zk->last_resolve.tv_sec != tv.tv_sec || + zk->last_resolve.tv_usec != tv.tv_usec); + } void testPing() {