From cc033e0a50d1cf85db4323561b7c8cf0acbaaa92 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 25 Mar 2021 10:43:38 +0800 Subject: [PATCH 1/3] feat(split-client): c++ client split related implementation --- include/dsn/utility/error_code.h | 1 + src/client/partition_resolver.cpp | 10 ++--- src/client/partition_resolver_simple.cpp | 55 ++++++++++++++++-------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/include/dsn/utility/error_code.h b/include/dsn/utility/error_code.h index 940e0636c2..1e1c94d178 100644 --- a/include/dsn/utility/error_code.h +++ b/include/dsn/utility/error_code.h @@ -128,4 +128,5 @@ DEFINE_ERR_CODE(ERR_SASL_INCOMPLETE) DEFINE_ERR_CODE(ERR_ACL_DENY) DEFINE_ERR_CODE(ERR_SPLITTING) DEFINE_ERR_CODE(ERR_PARENT_PARTITION_MISUSED) +DEFINE_ERR_CODE(ERR_CHILD_NOT_READY) } // namespace dsn diff --git a/src/client/partition_resolver.cpp b/src/client/partition_resolver.cpp index 4d9b133fa9..1740808cac 100644 --- a/src/client/partition_resolver.cpp +++ b/src/client/partition_resolver.cpp @@ -105,13 +105,13 @@ void partition_resolver::call_task(const rpc_response_task_ptr &t) // update gpid when necessary auto &hdr = *(t->get_request()->header); if (hdr.gpid.value() != result.pid.value()) { - dassert(hdr.gpid.value() == 0, "inconsistent gpid"); - hdr.gpid = result.pid; - - // update thread hash if not assigned by applications - if (hdr.client.thread_hash == 0) { + if (hdr.client.thread_hash == 0 // thread_hash is not assigned by applications + || + hdr.gpid.value() != 0 // requests set to child redirect to parent + ) { hdr.client.thread_hash = result.pid.thread_hash(); } + hdr.gpid = result.pid; } dsn_rpc_call(result.address, t.get()); }, diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index e24e6ceff7..05f9e56035 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -48,7 +48,13 @@ void partition_resolver_simple::resolve(uint64_t partition_hash, if (_app_partition_count != -1) { idx = get_partition_index(_app_partition_count, partition_hash); rpc_address target; - if (ERR_OK == get_address(idx, target)) { + auto err = get_address(idx, target); + if (err == ERR_CHILD_NOT_READY) { + // child partition is not ready, its requests should be sent to parent partition + idx -= _app_partition_count / 2; + err = get_address(idx, target); + } + if (err == ERR_OK) { callback(resolve_result{ERR_OK, target, {_app_id, idx}}); return; } @@ -68,27 +74,36 @@ void partition_resolver_simple::resolve(uint64_t partition_hash, void partition_resolver_simple::on_access_failure(int partition_index, error_code err) { - if (-1 != partition_index && - err != ERR_CAPACITY_EXCEEDED // no need for reconfiguration on primary - && - err != ERR_NOT_ENOUGH_MEMBER // primary won't change and we only r/w on primary in this + if (-1 == partition_index || + err == ERR_CAPACITY_EXCEEDED // no need for reconfiguration on primary + || + err == ERR_NOT_ENOUGH_MEMBER // primary won't change and we only r/w on primary in this // provider - && - err != ERR_OPERATION_DISABLED // operation disabled - && - err != ERR_BUSY // busy (rpc busy or throttling busy) + || + err == ERR_OPERATION_DISABLED // operation disabled + || + err == ERR_BUSY // busy (rpc busy or throttling busy) + || + err == ERR_SPLITTING // partition is splitting, reject read and write ) { + return; + } + + zauto_write_lock l(_config_lock); + if (err == ERR_PARENT_PARTITION_MISUSED) { + ddebug("clear all partition configuration cache due to access failure %s at %d.%d", + err.to_string(), + _app_id, + partition_index); + _app_partition_count = -1; + } else { ddebug("clear partition configuration cache %d.%d due to access failure %s", _app_id, partition_index, err.to_string()); - - { - zauto_write_lock l(_config_lock); - auto it = _config_cache.find(partition_index); - if (it != _config_cache.end()) { - _config_cache.erase(it); - } + auto it = _config_cache.find(partition_index); + if (it != _config_cache.end()) { + _config_cache.erase(it); } } } @@ -272,7 +287,9 @@ void partition_resolver_simple::query_config_reply(error_code err, _app_id, resp.app_id); } - if (_app_partition_count != -1 && _app_partition_count != resp.partition_count) { + if (_app_partition_count != -1 && _app_partition_count != resp.partition_count && + _app_partition_count * 2 != resp.partition_count && + _app_partition_count != resp.partition_count * 2) { dassert(false, "partition count is changed (mostly the app was removed and created with " "the same name), local Vs remote: %u vs %u ", @@ -429,6 +446,10 @@ error_code partition_resolver_simple::get_address(int partition_index, /*out*/ r auto it = _config_cache.find(partition_index); if (it != _config_cache.end()) { // config = it->second->config; + if (it->second->config.ballot < 0) { + // client query config for splitting app, child partition is not ready + return ERR_CHILD_NOT_READY; + } addr = get_address(it->second->config); if (addr.is_invalid()) { return ERR_IO_PENDING; From 80dc15986c64504e6cb40c77fbc033981ede333e Mon Sep 17 00:00:00 2001 From: heyuchen Date: Sat, 8 May 2021 17:56:22 +0800 Subject: [PATCH 2/3] update by cr --- src/client/partition_resolver_simple.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index 05f9e56035..77a5e43998 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -24,6 +24,7 @@ * THE SOFTWARE. */ +#include #include #include #include @@ -49,12 +50,12 @@ void partition_resolver_simple::resolve(uint64_t partition_hash, idx = get_partition_index(_app_partition_count, partition_hash); rpc_address target; auto err = get_address(idx, target); - if (err == ERR_CHILD_NOT_READY) { + if (dsn_unlikely(err == ERR_CHILD_NOT_READY)) { // child partition is not ready, its requests should be sent to parent partition idx -= _app_partition_count / 2; err = get_address(idx, target); } - if (err == ERR_OK) { + if (dsn_likely(err == ERR_OK)) { callback(resolve_result{ERR_OK, target, {_app_id, idx}}); return; } @@ -91,16 +92,16 @@ void partition_resolver_simple::on_access_failure(int partition_index, error_cod zauto_write_lock l(_config_lock); if (err == ERR_PARENT_PARTITION_MISUSED) { - ddebug("clear all partition configuration cache due to access failure %s at %d.%d", - err.to_string(), - _app_id, - partition_index); + ddebug_f("clear all partition configuration cache due to access failure {} at {}.{}", + err, + _app_id, + partition_index); _app_partition_count = -1; } else { - ddebug("clear partition configuration cache %d.%d due to access failure %s", - _app_id, - partition_index, - err.to_string()); + ddebug_f("clear partition configuration cache {}.{} due to access failure {}", + _app_id, + partition_index, + err); auto it = _config_cache.find(partition_index); if (it != _config_cache.end()) { _config_cache.erase(it); From 00cce1eb8bfc2bd0926d493a72916e0dcaf0d90b Mon Sep 17 00:00:00 2001 From: heyuchen Date: Mon, 17 May 2021 09:39:46 +0800 Subject: [PATCH 3/3] update by cr --- src/client/partition_resolver_simple.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index 77a5e43998..f3ef360b05 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -102,10 +102,7 @@ void partition_resolver_simple::on_access_failure(int partition_index, error_cod _app_id, partition_index, err); - auto it = _config_cache.find(partition_index); - if (it != _config_cache.end()) { - _config_cache.erase(it); - } + _config_cache.erase(partition_index); } }