diff --git a/src/common/encryption/LicenseManagerConnector.cpp b/src/common/encryption/LicenseManagerConnector.cpp index 9b0742c52d1..ac675ba5d17 100644 --- a/src/common/encryption/LicenseManagerConnector.cpp +++ b/src/common/encryption/LicenseManagerConnector.cpp @@ -133,7 +133,10 @@ void LicenseManagerConnector::threadFunc() { auto rse = getLMId(); if (!rse.ok()) { LOG(ERROR) << "[License Manager] failed to get license manager ID, error: " << rse.toString(); - nextCheckPeriod = kRetryPeriodInSec + folly::Random::rand32(5 * 60); + // nextCheckPeriod = kRetryPeriodInSec + folly::Random::rand32(5 * 60); + // TODO(Aiee) for test only + nextCheckPeriod = 5; + DLOG(INFO) << "[License Manager] next check period: " << nextCheckPeriod; setRetryFlag(); } else { auto lmStatus = validateLicense(); @@ -175,7 +178,10 @@ void LicenseManagerConnector::threadFunc() { dropAllHosts_ = true; } setRetryFlag(); - nextCheckPeriod = kRetryPeriodInSec + folly::Random::rand32(5 * 60); + // nextCheckPeriod = kRetryPeriodInSec + folly::Random::rand32(5 * 60); + // TODO(Aiee) for test only + nextCheckPeriod = 10; + DLOG(INFO) << "[License Manager] next check period: " << nextCheckPeriod; } } } @@ -186,9 +192,6 @@ void LicenseManagerConnector::threadFunc() { LOG(ERROR) << "[License Manager] Failed to validate license with license manager, retry " "timeout, all hosts will be shut down"; } - - // TODO(Aiee) for test only, should be removed before official v3.5 release - nextCheckPeriod = 10; } StatusOr LicenseManagerConnector::buildValidateRequest(const std::string& lmId, @@ -225,14 +228,15 @@ ErrorOr LicenseManagerConnector::buildRawR if (!nebula::ok(res)) { return nebula::error(res); } - auto graphNode = nebula::value(res).size(); + auto queryNode = nebula::value(res).size(); - // Get total graph cpu - auto graphCPU = 0; + // Get total query CPU + auto queryCPU = 0; for (auto& host : nebula::value(res)) { - graphCPU += host.cpuNum_; + queryCPU += host.cpuNum_; } - VLOG(2) << "Total graph node number: " << graphNode << ", graph CPU cores: " << graphCPU; + VLOG(2) << "[License Manager] Total query node number: " << queryNode + << ", query CPU cores: " << queryCPU; // Get active storage hosts res = meta::ActiveHostsMan::getHostInfoByRole(kvstore, meta::cpp2::HostRole::STORAGE); @@ -246,7 +250,8 @@ ErrorOr LicenseManagerConnector::buildRawR for (auto& host : nebula::value(res)) { storageCPU += host.cpuNum_; } - VLOG(2) << "Total storage node number: " << storageNode << ", storage CPU cores: " << storageCPU; + VLOG(2) << "[License Manager] Total storage node number: " << storageNode + << ", storage CPU cores: " << storageCPU; folly::dynamic request = folly::dynamic::object(); auto timestamp = time::WallClock::fastNowInSec(); @@ -254,9 +259,9 @@ ErrorOr LicenseManagerConnector::buildRawR request["timestamp"] = timestamp; folly::dynamic apply = folly::dynamic::object(); - apply["graphCPU"] = graphCPU; + apply["queryCPU"] = queryCPU; apply["storageCPU"] = storageCPU; - apply["graphNode"] = graphNode; + apply["queryNode"] = queryNode; apply["storageNode"] = storageNode; request["apply"] = apply; DLOG(INFO) << "Request body in string: \n" << folly::toJson(request); @@ -496,10 +501,9 @@ LMStatus LicenseManagerConnector::checkRawResponse(const std::string& rawResp) { LOG(INFO) << "[License Manager] License validation request at " << localTimeStamp; if (std::abs(timestamp - localTimeStamp) > 21600) { - LOG(ERROR) << "[License Manager] Invalid response from license manager, the timestamps from " - "the License manager " - "is is " - << timestamp << ", NebulaGraph timestamp is " << localTimeStamp; + LOG(ERROR) << "[License Manager] Expired response from license manager, the timestamps from " + "the License manager is " + << timestamp << ", query timestamp is " << localTimeStamp; return LMStatus::ErrRequestExpired; } @@ -510,8 +514,8 @@ LMStatus LicenseManagerConnector::checkRawResponse(const std::string& rawResp) { resourceUsage_.storageQuota = rawInfo["quota"]["storage"].asInt(); } - VLOG(2) << "Quota resource type: " << resourceUsage_.type << "\n" - << "max graph quota: " << resourceUsage_.graphQuota << "\n" + VLOG(2) << "[License Manager] Quota resource type: " << resourceUsage_.type << "\n" + << "max query quota: " << resourceUsage_.graphQuota << "\n" << "max storage quota: " << resourceUsage_.storageQuota; auto overflowFlag = rawInfo["overflow"].asBool(); auto lmStatus = handleResponseStatus(status, overflowFlag); @@ -539,12 +543,12 @@ LMStatus LicenseManagerConnector::handleResponseStatus(const std::string& messag LOG(WARNING) << "[License Manager] Resource usage has exceeded the license limit"; return LMStatus::Overflow; } - LOG(ERROR) << "[License Manager] License has expired, all graph and storage services will be " + LOG(ERROR) << "[License Manager] License has expired, all query and storage services will be " "terminated soon, " "please contact your administrator to renew the license"; return LMStatus::Expired; } else if (message == "Terminated") { - LOG(ERROR) << "[License Manager] The license has expired, all graph and storage services are " + LOG(ERROR) << "[License Manager] The license has expired, all query and storage services are " "terminated, " "please contact your administrator to renew the license"; return LMStatus::Terminated; diff --git a/src/common/encryption/LicenseManagerConnector.h b/src/common/encryption/LicenseManagerConnector.h index 3664619a8b9..7aeb7455952 100644 --- a/src/common/encryption/LicenseManagerConnector.h +++ b/src/common/encryption/LicenseManagerConnector.h @@ -96,9 +96,9 @@ class LicenseManagerConnector final { // { // timestamp: 111111111111, // apply: { - // graphCPU: 100, + // queryCPU: 100, // storageCPU: 100, - // graphNode: 10, + // queryNode: 10, // storageNode: 10 // } // } diff --git a/src/meta/ActiveHostsMan.h b/src/meta/ActiveHostsMan.h index 4b432f0efa3..fb4cb9dccfe 100644 --- a/src/meta/ActiveHostsMan.h +++ b/src/meta/ActiveHostsMan.h @@ -270,6 +270,8 @@ class ActiveHostsMan final { /** * @brief Get all alive host info by given host role + * This is used in enterprise version to fetch the current cluster status, and + * the ttl is set to 1 heartbeat interval to * * @param kv From where to get * @param hostRole diff --git a/src/meta/processors/admin/HBProcessor.cpp b/src/meta/processors/admin/HBProcessor.cpp index f7f0c9c5a5e..777db0ee7ad 100644 --- a/src/meta/processors/admin/HBProcessor.cpp +++ b/src/meta/processors/admin/HBProcessor.cpp @@ -178,6 +178,14 @@ nebula::cpp2::ErrorCode HBProcessor::checkNodeNumber(const cpp2::HostRole role, } if (!pass) { + // Remove the host from active hosts + auto removeHostRet = removeHost(host); + if (removeHostRet != nebula::cpp2::ErrorCode::SUCCEEDED) { + LOG(ERROR) << fmt::format("Remove host {} failed, error code: {}", + host.toString(), + apache::thrift::util::enumNameSafe(removeHostRet)); + } + LOG(ERROR) << fmt::format( "The number of {} node has reached the maximum, the max number of {} node in the cluster " "is {}, heartbeat from {} is rejected", @@ -237,6 +245,14 @@ nebula::cpp2::ErrorCode HBProcessor::checkNodeCpu(const cpp2::HostRole role, } if (!pass) { + // Remove the host from active hosts + auto removeHostRet = removeHost(host); + if (removeHostRet != nebula::cpp2::ErrorCode::SUCCEEDED) { + LOG(ERROR) << fmt::format("Remove host {} failed, error code: {}", + host.toString(), + apache::thrift::util::enumNameSafe(removeHostRet)); + } + LOG(ERROR) << fmt::format( "[License Manager] The number of {} node CPU cores has reached the maximum, CPU core " "maximum: {}, heartbeat from {} is rejected", @@ -294,7 +310,7 @@ nebula::cpp2::ErrorCode HBProcessor::checkResourceUsage(const cpp2::HostRole rol auto resourceType = LMCIns->resourceUsage_.type; DLOG(INFO) << "[License Manager] Resource type: " << resourceType - << ", graph resource cap: " << LMCIns->resourceUsage_.graphQuota + << ", query resource cap: " << LMCIns->resourceUsage_.graphQuota << ", storage resource cap: " << LMCIns->resourceUsage_.storageQuota; if (resourceType == "CPU") { @@ -309,6 +325,22 @@ nebula::cpp2::ErrorCode HBProcessor::checkResourceUsage(const cpp2::HostRole rol return nebula::cpp2::ErrorCode::SUCCEEDED; } +nebula::cpp2::ErrorCode HBProcessor::removeHost(const HostAddr& host) { + auto hostKey = MetaKeyUtils::hostKey(host.host, host.port); + folly::Baton baton; + nebula::cpp2::ErrorCode errorCode; + kvstore_->asyncRemove(kDefaultSpaceId, + kDefaultPartId, + hostKey, + [this, &baton, &errorCode](nebula::cpp2::ErrorCode code) { + this->handleErrorCode(code); + errorCode = code; + baton.post(); + }); + baton.wait(); + return errorCode; +} + void HBProcessor::setLeaderInfo() { auto leaderRet = kvstore_->partLeader(kDefaultSpaceId, kDefaultPartId); if (ok(leaderRet)) { diff --git a/src/meta/processors/admin/HBProcessor.h b/src/meta/processors/admin/HBProcessor.h index d8e0f33665f..0cea0923760 100644 --- a/src/meta/processors/admin/HBProcessor.h +++ b/src/meta/processors/admin/HBProcessor.h @@ -78,6 +78,10 @@ class HBProcessor : public BaseProcessor { void setLeaderInfo(); + // enterprise only + // Delete the host key from meta data to remove the host from active hosts + nebula::cpp2::ErrorCode removeHost(const HostAddr& host); + ClusterID clusterId_{0}; const HBCounters* counters_{nullptr}; static std::atomic metaVersion_;