diff --git a/src/daemons/MetaDaemon.cpp b/src/daemons/MetaDaemon.cpp index 5ccedd0f1ff..0fa5f7ebafc 100644 --- a/src/daemons/MetaDaemon.cpp +++ b/src/daemons/MetaDaemon.cpp @@ -7,6 +7,7 @@ #include #include "MetaDaemonInit.h" +#include "clients/meta/MetaClient.h" #include "common/base/Base.h" #include "common/base/SignalHandler.h" #include "common/fs/FileUtils.h" @@ -25,7 +26,6 @@ #include "meta/KVBasedClusterIdMan.h" #include "meta/MetaServiceHandler.h" #include "meta/MetaVersionMan.h" -#include "meta/RootUserMan.h" #include "meta/http/MetaHttpReplaceHostHandler.h" #include "meta/processors/job/JobManager.h" #include "meta/stats/MetaStats.h" @@ -167,29 +167,10 @@ int main(int argc, char* argv[]) { } } - { - /** - * Only leader part needed. - */ - auto ret = gKVStore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); - if (!nebula::ok(ret)) { - LOG(ERROR) << "Part leader get failed"; - return EXIT_FAILURE; - } - if (nebula::value(ret) == localhost) { - LOG(INFO) << "Check and init root user"; - auto checkRet = nebula::meta::RootUserMan::isGodExists(gKVStore.get()); - if (!nebula::ok(checkRet)) { - auto retCode = nebula::error(checkRet); - LOG(ERROR) << "Parser God Role error:" << apache::thrift::util::enumNameSafe(retCode); - return EXIT_FAILURE; - } - auto existGod = nebula::value(checkRet); - if (!existGod && !nebula::meta::RootUserMan::initRootUser(gKVStore.get())) { - LOG(ERROR) << "Init root user failed"; - return EXIT_FAILURE; - } - } + auto godInit = initGodUser(gKVStore.get(), localhost); + if (godInit != nebula::cpp2::ErrorCode::SUCCEEDED) { + LOG(ERROR) << "Init god user failed"; + return EXIT_FAILURE; } auto metaServer = std::make_unique(); diff --git a/src/daemons/MetaDaemonInit.cpp b/src/daemons/MetaDaemonInit.cpp index ef587ec6bda..fe76db5f26b 100644 --- a/src/daemons/MetaDaemonInit.cpp +++ b/src/daemons/MetaDaemonInit.cpp @@ -10,6 +10,7 @@ #include "common/base/Base.h" #include "common/base/SignalHandler.h" +#include "common/datatypes/HostAddr.h" #include "common/fs/FileUtils.h" #include "common/hdfs/HdfsCommandHelper.h" #include "common/hdfs/HdfsHelper.h" @@ -17,12 +18,14 @@ #include "common/ssl/SSLConfig.h" #include "common/thread/GenericThreadPool.h" #include "common/utils/MetaKeyUtils.h" +#include "kvstore/KVStore.h" #include "kvstore/NebulaStore.h" #include "kvstore/PartManager.h" #include "meta/ActiveHostsMan.h" #include "meta/KVBasedClusterIdMan.h" #include "meta/MetaServiceHandler.h" #include "meta/MetaVersionMan.h" +#include "meta/RootUserMan.h" #include "meta/http/MetaHttpReplaceHostHandler.h" #include "meta/processors/job/JobManager.h" #include "meta/stats/MetaStats.h" @@ -46,6 +49,8 @@ DECLARE_string(meta_server_addrs); // use define from grap flags. DEFINE_int32(ws_meta_http_port, 11000, "Port to listen on Meta with HTTP protocol"); #endif +DECLARE_uint32(raft_heartbeat_interval_secs); + using nebula::web::PathParams; namespace nebula::meta { @@ -158,6 +163,57 @@ std::unique_ptr initKV(std::vector p return kvstore; } +nebula::cpp2::ErrorCode initGodUser(nebula::kvstore::KVStore* kvstore, + const nebula::HostAddr& localhost) { + const int kMaxRetryTime = FLAGS_raft_heartbeat_interval_secs * 3; + constexpr int kRetryInterval = 1; + int retryTime = 0; + // Both leader & follower need to wait all reading ok. + // leader init need to retry writing if leader changed. + while (true) { + retryTime += kRetryInterval; + if (retryTime > kMaxRetryTime) { + LOG(ERROR) << "Retry too many times"; + return nebula::cpp2::ErrorCode::E_RETRY_EXHAUSTED; + } + auto ret = kvstore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); + if (!nebula::ok(ret)) { + LOG(ERROR) << "Part leader get failed"; + return nebula::error(ret); + } + LOG(INFO) << "Check root user"; // follower need to wait reading all ok, too. + auto checkRet = nebula::meta::RootUserMan::isGodExists(kvstore); + if (!nebula::ok(checkRet)) { + auto retCode = nebula::error(checkRet); + if (retCode == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { + LOG(INFO) << "Leader changed, retry"; + sleep(kRetryInterval); + continue; + } + LOG(ERROR) << "Parser God Role error:" << apache::thrift::util::enumNameSafe(retCode); + return nebula::error(checkRet); + } + if (nebula::value(ret) == localhost) { + auto existGod = nebula::value(checkRet); + if (!existGod) { + auto initGod = nebula::meta::RootUserMan::initRootUser(kvstore); + if (initGod != nebula::cpp2::ErrorCode::SUCCEEDED) { + if (initGod != nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { + LOG(ERROR) << "Init God Role error:" << apache::thrift::util::enumNameSafe(initGod); + return initGod; + } else { + LOG(INFO) << "Leader changed, retry"; + sleep(kRetryInterval); + continue; + } + } + } + } + break; + } + return nebula::cpp2::ErrorCode::SUCCEEDED; +} + nebula::Status initWebService(nebula::WebService* svc, nebula::kvstore::KVStore* kvstore) { LOG(INFO) << "Starting Meta HTTP Service"; auto& router = svc->router(); diff --git a/src/daemons/MetaDaemonInit.h b/src/daemons/MetaDaemonInit.h index 0a94ae4bbd4..dfcd8077e06 100644 --- a/src/daemons/MetaDaemonInit.h +++ b/src/daemons/MetaDaemonInit.h @@ -9,6 +9,7 @@ #include "common/base/Status.h" #include "common/hdfs/HdfsCommandHelper.h" +#include "interface/gen-cpp2/common_types.h" #include "kvstore/KVStore.h" #include "webservice/WebService.h" @@ -18,4 +19,7 @@ std::unique_ptr initKV(std::vector p nebula::HostAddr localhost); nebula::Status initWebService(nebula::WebService* svc, nebula::kvstore::KVStore* kvstore); + +nebula::cpp2::ErrorCode initGodUser(nebula::kvstore::KVStore* kvstore, + const nebula::HostAddr& localhost); #endif diff --git a/src/daemons/StandAloneDaemon.cpp b/src/daemons/StandAloneDaemon.cpp index 5b519f726f5..f85b9f4fddc 100644 --- a/src/daemons/StandAloneDaemon.cpp +++ b/src/daemons/StandAloneDaemon.cpp @@ -223,29 +223,10 @@ int main(int argc, char *argv[]) { } } - { - /** - * Only leader part needed. - */ - auto ret = gMetaKVStore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); - if (!nebula::ok(ret)) { - LOG(ERROR) << "Part leader get failed"; - return; - } - if (nebula::value(ret) == metaLocalhost) { - LOG(INFO) << "Check and init root user"; - auto checkRet = nebula::meta::RootUserMan::isGodExists(gMetaKVStore.get()); - if (!nebula::ok(checkRet)) { - auto retCode = nebula::error(checkRet); - LOG(ERROR) << "Parser God Role error:" << apache::thrift::util::enumNameSafe(retCode); - return; - } - auto existGod = nebula::value(checkRet); - if (!existGod && !nebula::meta::RootUserMan::initRootUser(gMetaKVStore.get())) { - LOG(ERROR) << "Init root user failed"; - return; - } - } + auto godInit = initGodUser(gMetaKVStore.get(), metaLocalhost); + if (godInit != nebula::cpp2::ErrorCode::SUCCEEDED) { + LOG(ERROR) << "Init god user failed"; + return; } auto handler = diff --git a/src/meta/RootUserMan.h b/src/meta/RootUserMan.h index c8bde933203..af3e454f312 100644 --- a/src/meta/RootUserMan.h +++ b/src/meta/RootUserMan.h @@ -10,6 +10,7 @@ #include "common/base/Base.h" #include "common/utils/MetaKeyUtils.h" +#include "interface/gen-cpp2/common_types.h" #include "kvstore/KVStore.h" namespace nebula { @@ -53,7 +54,7 @@ class RootUserMan { } } - static bool initRootUser(kvstore::KVStore* kv) { + static nebula::cpp2::ErrorCode initRootUser(kvstore::KVStore* kv) { LOG(INFO) << "Init root user"; auto encodedPwd = proxygen::md5Encode(folly::StringPiece("nebula")); auto userKey = MetaKeyUtils::userKey("root"); @@ -64,18 +65,15 @@ class RootUserMan { std::vector data; data.emplace_back(std::move(userKey), std::move(userVal)); data.emplace_back(std::move(roleKey), std::move(roleVal)); - bool ret = true; + nebula::cpp2::ErrorCode ec; folly::Baton baton; kv->asyncMultiPut( kDefaultSpaceId, kDefaultPartId, std::move(data), [&](nebula::cpp2::ErrorCode code) { - if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { - LOG(INFO) << "Put failed, error " << static_cast(code); - ret = false; - } + ec = code; baton.post(); }); baton.wait(); - return ret; + return ec; } };