From cd3831365fe2b846dfe47d07d85a48d0d87858e2 Mon Sep 17 00:00:00 2001 From: Yichen Wang <18348405+Aiee@users.noreply.github.com> Date: Fri, 11 Mar 2022 13:54:14 +0800 Subject: [PATCH 1/4] Fix service crash caused by connecting using a pre-v2.6.0 client (#3942) * Reject clients with a version lower than 2.6.0 * Add TTL for clientAddr_ * Fix tests * Use client_idle_timeout_secs as the clientAddrTimeout * Change the param of authCheckFromCache() --- src/clients/meta/MetaClient.cpp | 22 ++++++++++++++ src/clients/meta/MetaClient.h | 21 +++++++++++++ src/graph/service/GraphService.cpp | 37 +++++++++++++++++++++-- src/graph/service/GraphService.h | 2 +- src/graph/service/PasswordAuthenticator.h | 1 + 5 files changed, 79 insertions(+), 4 deletions(-) diff --git a/src/clients/meta/MetaClient.cpp b/src/clients/meta/MetaClient.cpp index 4c6ff5f0cb3..ae9186c3021 100644 --- a/src/clients/meta/MetaClient.cpp +++ b/src/clients/meta/MetaClient.cpp @@ -2298,6 +2298,7 @@ Status MetaClient::authCheckFromCache(const std::string& account, const std::str if (!ready_) { return Status::Error("Meta Service not ready"); } + folly::rcu_reader guard; const auto& metadata = *metadata_.load(); auto iter = metadata.userPasswordMap_.find(account); @@ -2468,6 +2469,11 @@ folly::Future> MetaClient::heartbeat() { } } + // TTL for clientAddrMap + // If multiple connections are created but do not authenticate, the clientAddrMap_ will keep + // growing. This is to clear the clientAddrMap_ regularly. + clearClientAddrMap(); + // info used in the agent, only set once // TOOD(spw): if we could add data path(disk) dynamicly in the future, it should be // reported every time it changes @@ -3619,5 +3625,21 @@ Status MetaClient::verifyVersion() { } return Status::OK(); } + +void MetaClient::clearClientAddrMap() { + if (clientAddrMap_.size() == 0) { + return; + } + + auto curTimestamp = time::WallClock::fastNowInSec(); + for (auto it = clientAddrMap_.cbegin(); it != clientAddrMap_.cend();) { + // The clientAddr is expired + if (it->second < curTimestamp) { + it = clientAddrMap_.erase(it); + } else { + ++it; + } + } +} } // namespace meta } // namespace nebula diff --git a/src/clients/meta/MetaClient.h b/src/clients/meta/MetaClient.h index 01584a974f7..b3959c899c4 100644 --- a/src/clients/meta/MetaClient.h +++ b/src/clients/meta/MetaClient.h @@ -154,6 +154,8 @@ using MetaConfigMap = using FTIndexMap = std::unordered_map; using SessionMap = std::unordered_map; + +using clientAddrMap = folly::ConcurrentHashMap; class MetaChangedListener { public: virtual ~MetaChangedListener() = default; @@ -641,6 +643,10 @@ class MetaClient { return options_.localHost_.toString(); } + clientAddrMap& getClientAddrMap() { + return clientAddrMap_; + } + protected: // Return true if load succeeded. bool loadData(); @@ -727,6 +733,9 @@ class MetaClient { Status verifyVersion(); + // Removes expired keys in the clientAddrMap_ + void clearClientAddrMap(); + private: std::shared_ptr ioThreadPool_; std::shared_ptr> clientsMan_; @@ -807,6 +816,18 @@ class MetaClient { NameIndexMap tagNameIndexMap_; NameIndexMap edgeNameIndexMap_; + // TODO(Aiee) This is a walkaround to address the problem that using a lower version(< v2.6.0) + // client to connect with higher version(>= v3.0.0) Nebula service will cause a crash. + // + // The key here is the host of the client that sends the request, and the value indicates the + // expiration of the key because we don't want to keep the key forever. + // + // The assumption here is that there is ONLY ONE VERSION of the client in the host. + // + // This map will be updated when verifyVersion() is called. Only the clients since v2.6.0 will + // call verifyVersion(), thus we could determine whether the client version is lower than v2.6.0 + clientAddrMap clientAddrMap_; + // Global service client ServiceClientsList serviceClientList_; FTIndexMap fulltextIndexMap_; diff --git a/src/graph/service/GraphService.cpp b/src/graph/service/GraphService.cpp index 4deaafc372b..97b83ea6d6f 100644 --- a/src/graph/service/GraphService.cpp +++ b/src/graph/service/GraphService.cpp @@ -24,6 +24,9 @@ namespace nebula { namespace graph { +// The default value is 28800 seconds +const int64_t clientAddrTimeout = FLAGS_client_idle_timeout_secs; + Status GraphService::init(std::shared_ptr ioExecutor, const HostAddr& hostAddr) { auto addrs = network::NetworkUtils::toHosts(FLAGS_meta_server_addrs); @@ -69,8 +72,10 @@ folly::Future GraphService::future_authenticate(const std::string& auto ctx = std::make_unique>(); auto future = ctx->future(); - // check username and password failed - auto authResult = auth(username, password); + // Check username and password failed + // Check whether the client has called verifyClientVersion() + auto clientAddr = HostAddr(peer->getAddressStr(), peer->getPort()); + auto authResult = auth(username, password, clientAddr); if (!authResult.ok()) { ctx->resp().errorCode = ErrorCode::E_BAD_USERNAME_PASSWORD; ctx->resp().errorMsg.reset(new std::string(authResult.toString())); @@ -202,12 +207,29 @@ folly::Future GraphService::future_executeJsonWithParameter( }); } -Status GraphService::auth(const std::string& username, const std::string& password) { +Status GraphService::auth(const std::string& username, + const std::string& password, + const HostAddr& clientIp) { if (!FLAGS_enable_authorize) { return Status::OK(); } if (FLAGS_auth_type == "password") { + auto metaClient = queryEngine_->metaClient(); + // TODO(Aiee) This is a walkaround to address the problem that using a lower version(< v2.6.0) + // client to connect with higher version(>= v3.0.0) Nebula service will cause a crash. + // + // Only the clients since v2.6.0 will call verifyVersion(), thus we could determine whether the + // client version is lower than v2.6.0 + auto clientAddrIt = metaClient->getClientAddrMap().find(clientIp); + if (clientAddrIt == metaClient->getClientAddrMap().end()) { + return Status::Error( + folly::sformat("The version of the client sending request from {} is lower than v2.6.0, " + "please update the client.", + clientIp.toString())); + } + + // Auth with PasswordAuthenticator auto authenticator = std::make_unique(queryEngine_->metaClient()); return authenticator->auth(username, proxygen::md5Encode(folly::StringPiece(password))); } else if (FLAGS_auth_type == "cloud") { @@ -230,6 +252,7 @@ folly::Future GraphService::future_verifyClientVe folly::splitTo( ":", FLAGS_client_white_list, std::inserter(whiteList, whiteList.begin())); cpp2::VerifyClientVersionResp resp; + if (FLAGS_enable_client_white_list && whiteList.find(req.get_version()) == whiteList.end()) { resp.error_code_ref() = nebula::cpp2::ErrorCode::E_CLIENT_SERVER_INCOMPATIBLE; resp.error_msg_ref() = folly::stringPrintf( @@ -239,6 +262,14 @@ folly::Future GraphService::future_verifyClientVe } else { resp.error_code_ref() = nebula::cpp2::ErrorCode::SUCCEEDED; } + + // The client sent request has a version >= v2.6.0, mark the address as valid + auto* peer = getRequestContext()->getPeerAddress(); + auto clientAddr = HostAddr(peer->getAddressStr(), peer->getPort()); + + auto ttlTimestamp = time::WallClock::fastNowInSec() + clientAddrTimeout; + auto clientAddrMap = &metaClient_->getClientAddrMap(); + clientAddrMap->insert_or_assign(clientAddr, ttlTimestamp); return folly::makeFuture(std::move(resp)); } } // namespace graph diff --git a/src/graph/service/GraphService.h b/src/graph/service/GraphService.h index 85ed21a6ce4..d96d6385004 100644 --- a/src/graph/service/GraphService.h +++ b/src/graph/service/GraphService.h @@ -54,7 +54,7 @@ class GraphService final : public cpp2::GraphServiceSvIf { std::unique_ptr metaClient_; private: - Status auth(const std::string& username, const std::string& password); + Status auth(const std::string& username, const std::string& password, const HostAddr& clientIp); std::unique_ptr sessionManager_; std::unique_ptr queryEngine_; diff --git a/src/graph/service/PasswordAuthenticator.h b/src/graph/service/PasswordAuthenticator.h index 7bb3e792251..98996271655 100644 --- a/src/graph/service/PasswordAuthenticator.h +++ b/src/graph/service/PasswordAuthenticator.h @@ -16,6 +16,7 @@ class PasswordAuthenticator final : public Authenticator { public: explicit PasswordAuthenticator(meta::MetaClient* client); + // Authenticates the user by checking the user/password cache in the meta Status auth(const std::string& user, const std::string& password) override; private: From 2f3259de4673ff3d5c6f2281a6c606375b0afebe Mon Sep 17 00:00:00 2001 From: jakevin <30525741+jackwener@users.noreply.github.com> Date: Sat, 12 Mar 2022 08:02:33 +0800 Subject: [PATCH 2/4] Add annotation (#4012) --- .../test/AggregateExpressionBenchmark.cpp | 2 -- src/common/fs/FileUtils.cpp | 4 ++-- src/common/memory/MemoryUtils.cpp | 7 ++---- src/common/network/NetworkUtils.cpp | 14 +++++------ src/common/time/test/WallClockBenchmark.cpp | 4 ++-- src/graph/service/GraphFlags.h | 2 +- src/graph/service/GraphServer.cpp | 2 ++ src/graph/service/GraphServer.h | 2 +- src/graph/service/GraphService.cpp | 2 +- src/graph/service/PermissionCheck.cpp | 13 +++++----- src/graph/service/PermissionManager.cpp | 24 ++++++++----------- src/graph/service/PermissionManager.h | 1 + src/graph/service/QueryEngine.cpp | 2 ++ src/graph/service/QueryInstance.cpp | 10 +++++++- src/graph/service/QueryInstance.h | 3 ++- src/graph/service/RequestContext.h | 4 ++-- 16 files changed, 50 insertions(+), 46 deletions(-) diff --git a/src/common/expression/test/AggregateExpressionBenchmark.cpp b/src/common/expression/test/AggregateExpressionBenchmark.cpp index 2e7f32fa7cb..f79cc2fc106 100644 --- a/src/common/expression/test/AggregateExpressionBenchmark.cpp +++ b/src/common/expression/test/AggregateExpressionBenchmark.cpp @@ -5,8 +5,6 @@ #include -#include - #include "common/base/ObjectPool.h" #include "common/expression/AggregateExpression.h" #include "common/expression/ConstantExpression.h" diff --git a/src/common/fs/FileUtils.cpp b/src/common/fs/FileUtils.cpp index f5293de8303..b1183d25055 100644 --- a/src/common/fs/FileUtils.cpp +++ b/src/common/fs/FileUtils.cpp @@ -92,8 +92,8 @@ StatusOr FileUtils::readLink(const char* path) { } StatusOr FileUtils::realPath(const char* path) { - char* buffer = ::realpath(path, NULL); - if (buffer == NULL) { + char* buffer = ::realpath(path, nullptr); + if (buffer == nullptr) { return Status::Error("realpath %s: %s", path, ::strerror(errno)); } std::string truePath(buffer); diff --git a/src/common/memory/MemoryUtils.cpp b/src/common/memory/MemoryUtils.cpp index 7e0bb7db7cc..ede9599c230 100644 --- a/src/common/memory/MemoryUtils.cpp +++ b/src/common/memory/MemoryUtils.cpp @@ -5,13 +5,10 @@ #include "common/memory/MemoryUtils.h" -#include #include #include -#include #include -#include #include "common/fs/FileUtils.h" @@ -42,7 +39,7 @@ StatusOr MemoryUtils::hitsHighWatermark() { uint64_t cacheSize = 0; for (; iter.valid(); ++iter) { auto& sm = iter.matched(); - cacheSize += std::stoul(sm[2].str(), NULL); + cacheSize += std::stoul(sm[2].str(), nullptr); } std::string limitPath = @@ -64,7 +61,7 @@ StatusOr MemoryUtils::hitsHighWatermark() { std::vector memorySize; for (; iter.valid(); ++iter) { auto& sm = iter.matched(); - memorySize.emplace_back(std::stoul(sm[2].str(), NULL) << 10); + memorySize.emplace_back(std::stoul(sm[2].str(), nullptr) << 10); } std::sort(memorySize.begin(), memorySize.end()); if (memorySize.size() >= 2u) { diff --git a/src/common/network/NetworkUtils.cpp b/src/common/network/NetworkUtils.cpp index b0571c0e240..c6e13d3439f 100644 --- a/src/common/network/NetworkUtils.cpp +++ b/src/common/network/NetworkUtils.cpp @@ -112,7 +112,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/tcp", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -120,7 +120,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/tcp6", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -128,7 +128,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/udp", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -136,7 +136,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/udp6", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -144,7 +144,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/raw", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -152,7 +152,7 @@ std::unordered_set NetworkUtils::getPortsInUse() { fs::FileUtils::FileLineIterator iter("/proc/net/raw6", ®ex); while (iter.valid()) { auto& sm = iter.matched(); - inUse.emplace(std::stoul(sm[1].str(), NULL, 16)); + inUse.emplace(std::stoul(sm[1].str(), nullptr, 16)); ++iter; } } @@ -209,7 +209,7 @@ StatusOr> NetworkUtils::resolveHost(const std::string& hos continue; } - auto address = ((struct sockaddr_in*)rp->ai_addr)->sin_addr.s_addr; + auto address = (reinterpret_cast(rp->ai_addr))->sin_addr.s_addr; // We need to match the integer byte order generated by ipv4ToInt, // so we need to convert here. addrs.emplace_back(intToIPv4(htonl(std::move(address))), port); diff --git a/src/common/time/test/WallClockBenchmark.cpp b/src/common/time/test/WallClockBenchmark.cpp index 87c1b0b5f04..9bae3a0c371 100644 --- a/src/common/time/test/WallClockBenchmark.cpp +++ b/src/common/time/test/WallClockBenchmark.cpp @@ -16,7 +16,7 @@ using nebula::time::WallClock; BENCHMARK(gettimeofday_get_msec, iters) { for (uint32_t i = 0; i < iters; i++) { struct timeval tp; - gettimeofday(&tp, NULL); + gettimeofday(&tp, nullptr); auto ts = tp.tv_sec * 1000 + tp.tv_usec / 1000; folly::doNotOptimizeAway(ts); } @@ -46,7 +46,7 @@ BENCHMARK_DRAW_LINE(); BENCHMARK(gettimeofday_get_sec, iters) { for (uint32_t i = 0; i < iters; i++) { struct timeval tp; - gettimeofday(&tp, NULL); + gettimeofday(&tp, nullptr); auto ts = tp.tv_sec; folly::doNotOptimizeAway(ts); } diff --git a/src/graph/service/GraphFlags.h b/src/graph/service/GraphFlags.h index fe3ecaec3ba..22d8e433ddd 100644 --- a/src/graph/service/GraphFlags.h +++ b/src/graph/service/GraphFlags.h @@ -45,7 +45,7 @@ DECLARE_uint32(failed_login_attempts); // The deault value is 0. A value of 0 disables the option. DECLARE_uint32(password_lock_time_in_secs); -// optimizer +// Optimizer DECLARE_bool(enable_optimizer); DECLARE_int64(max_allowed_connections); diff --git a/src/graph/service/GraphServer.cpp b/src/graph/service/GraphServer.cpp index c9254d35295..897133a905e 100644 --- a/src/graph/service/GraphServer.cpp +++ b/src/graph/service/GraphServer.cpp @@ -40,6 +40,7 @@ bool GraphServer::start() { return false; } + // Init worker id for snowflake generating unique id nebula::Snowflake::initWorkerId(interface->metaClient_.get()); graphThread_ = std::make_unique([&] { @@ -90,6 +91,7 @@ void GraphServer::notifyStop() { } } +// Stop the server. void GraphServer::stop() { if (serverStatus_.load() == ServiceStatus::STATUS_STOPPED) { LOG(INFO) << "The graph server has been stopped"; diff --git a/src/graph/service/GraphServer.h b/src/graph/service/GraphServer.h index 671c0720605..6c2b2696dde 100644 --- a/src/graph/service/GraphServer.h +++ b/src/graph/service/GraphServer.h @@ -24,7 +24,7 @@ class GraphServer { void stop(); - // used for signal handler to set an internal stop flag + // Used for signal handler to set an internal stop flag void notifyStop(); void waitUntilStop(); diff --git a/src/graph/service/GraphService.cpp b/src/graph/service/GraphService.cpp index 97b83ea6d6f..030cbf72e1b 100644 --- a/src/graph/service/GraphService.cpp +++ b/src/graph/service/GraphService.cpp @@ -44,7 +44,7 @@ Status GraphService::init(std::shared_ptr ioExecuto metaClient_ = std::make_unique(ioExecutor, std::move(addrs.value()), options); - // load data try 3 time + // Load data try 3 time bool loadDataOk = metaClient_->waitForMetadReady(3); if (!loadDataOk) { // Resort to retrying in the background diff --git a/src/graph/service/PermissionCheck.cpp b/src/graph/service/PermissionCheck.cpp index 3acacf1846b..f0abcc00b5e 100644 --- a/src/graph/service/PermissionCheck.cpp +++ b/src/graph/service/PermissionCheck.cpp @@ -28,11 +28,10 @@ namespace graph { * Special operation : kShow, kChangePassword */ -// static -Status PermissionCheck::permissionCheck(ClientSession *session, - Sentence *sentence, - ValidateContext *vctx, - GraphSpaceID targetSpace) { +/* static */ Status PermissionCheck::permissionCheck(ClientSession *session, + Sentence *sentence, + ValidateContext *vctx, + GraphSpaceID targetSpace) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -165,7 +164,7 @@ Status PermissionCheck::permissionCheck(ClientSession *session, case Sentence::Kind::kShowMetaLeader: case Sentence::Kind::kShowHosts: { /** - * all roles can be show for above operations. + * All roles can be show for above operations. */ return Status::OK(); } @@ -206,7 +205,7 @@ Status PermissionCheck::permissionCheck(ClientSession *session, return Status::OK(); } case Sentence::Kind::kExplain: - // everyone could explain + // Everyone could explain return Status::OK(); case Sentence::Kind::kSequential: { // No permission checking for sequential sentence. diff --git a/src/graph/service/PermissionManager.cpp b/src/graph/service/PermissionManager.cpp index 80ce3153a53..328463c2e75 100644 --- a/src/graph/service/PermissionManager.cpp +++ b/src/graph/service/PermissionManager.cpp @@ -10,8 +10,7 @@ namespace nebula { namespace graph { -// static -Status PermissionManager::canReadSpace(ClientSession *session, GraphSpaceID spaceId) { +/* static */ Status PermissionManager::canReadSpace(ClientSession *session, GraphSpaceID spaceId) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -35,8 +34,8 @@ Status PermissionManager::canReadSpace(ClientSession *session, GraphSpaceID spac return Status::PermissionError("No permission to read space."); } -// static -Status PermissionManager::canReadSchemaOrData(ClientSession *session, ValidateContext *vctx) { +/* static */ Status PermissionManager::canReadSchemaOrData(ClientSession *session, + ValidateContext *vctx) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -60,8 +59,7 @@ Status PermissionManager::canReadSchemaOrData(ClientSession *session, ValidateCo return Status::PermissionError("No permission to read schema/data."); } -// static -Status PermissionManager::canWriteSpace(ClientSession *session) { +/* static */ Status PermissionManager::canWriteSpace(ClientSession *session) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -71,8 +69,8 @@ Status PermissionManager::canWriteSpace(ClientSession *session) { return Status::PermissionError("No permission to write space."); } -// static -Status PermissionManager::canWriteSchema(ClientSession *session, ValidateContext *vctx) { +/* static */ Status PermissionManager::canWriteSchema(ClientSession *session, + ValidateContext *vctx) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -97,8 +95,7 @@ Status PermissionManager::canWriteSchema(ClientSession *session, ValidateContext return Status::PermissionError("No permission to write schema."); } -// static -Status PermissionManager::canWriteUser(ClientSession *session) { +/* static */ Status PermissionManager::canWriteUser(ClientSession *session) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -113,8 +110,8 @@ Status PermissionManager::canWriteUser(ClientSession *session) { } } -// static -Status PermissionManager::canReadUser(ClientSession *session, const std::string &targetUser) { +/* static */ Status PermissionManager::canReadUser(ClientSession *session, + const std::string &targetUser) { if (!FLAGS_enable_authorize) { return Status::OK(); } @@ -177,8 +174,7 @@ Status PermissionManager::canWriteRole(ClientSession *session, targetUser.c_str()); } -// static -Status PermissionManager::canWriteData(ClientSession *session, ValidateContext *vctx) { +/* static */ Status PermissionManager::canWriteData(ClientSession *session, ValidateContext *vctx) { if (!FLAGS_enable_authorize) { return Status::OK(); } diff --git a/src/graph/service/PermissionManager.h b/src/graph/service/PermissionManager.h index 655b8dd420f..06737c439e1 100644 --- a/src/graph/service/PermissionManager.h +++ b/src/graph/service/PermissionManager.h @@ -16,6 +16,7 @@ namespace nebula { namespace graph { +// This module is responsible for checking the permission of the user class PermissionManager final { public: PermissionManager() = delete; diff --git a/src/graph/service/QueryEngine.cpp b/src/graph/service/QueryEngine.cpp index d3f4f54e609..477ad18a5f1 100644 --- a/src/graph/service/QueryEngine.cpp +++ b/src/graph/service/QueryEngine.cpp @@ -34,6 +34,7 @@ Status QueryEngine::init(std::shared_ptr ioExecutor PlannersRegister::registerPlanners(); + // Set default optimizer rules std::vector rulesets{&opt::RuleSet::DefaultRules()}; if (FLAGS_enable_optimizer) { rulesets.emplace_back(&opt::RuleSet::QueryRules()); @@ -43,6 +44,7 @@ Status QueryEngine::init(std::shared_ptr ioExecutor return setupMemoryMonitorThread(); } +// Create query context and query instance and execute it void QueryEngine::execute(RequestContextPtr rctx) { auto qctx = std::make_unique(std::move(rctx), schemaManager_.get(), diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index aa56c9e58d5..4425b86e3d6 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -43,11 +43,14 @@ void QueryInstance::execute() { return; } + // Sentence is explain query, finish if (!explainOrContinue()) { onFinish(); return; } + // The execution engine converts the physical execution plan generated by the Planner into a + // series of Executors through the Scheduler to drive the execution of the Executors. scheduler_->schedule() .thenValue([this](Status s) { if (s.ok()) { @@ -66,6 +69,7 @@ Status QueryInstance::validateAndOptimize() { auto *rctx = qctx()->rctx(); auto &spaceName = rctx->session()->space().name; VLOG(1) << "Parsing query: " << rctx->query(); + // Result of parsing, get the parsing tree auto result = GQLParser(qctx()).parse(rctx->query()); NG_RETURN_IF_ERROR(result); sentence_ = std::move(result).value(); @@ -84,7 +88,9 @@ Status QueryInstance::validateAndOptimize() { } } + // Validate the query, if failed, return NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx())); + // Optimize the query, and get the execution plan NG_RETURN_IF_ERROR(findBestPlan()); stats::StatsManager::addValue(kOptimizerLatencyUs, *(qctx_->plan()->optimizeTimeInUs())); if (FLAGS_enable_space_level_metrics && spaceName != "") { @@ -209,6 +215,7 @@ void QueryInstance::addSlowQueryStats(uint64_t latency, const std::string &space } } +// Get result from query context and fill the response void QueryInstance::fillRespData(ExecutionResponse *resp) { auto ectx = DCHECK_NOTNULL(qctx_->ectx()); auto plan = DCHECK_NOTNULL(qctx_->plan()); @@ -218,7 +225,7 @@ void QueryInstance::fillRespData(ExecutionResponse *resp) { auto &&value = ectx->moveValue(name); if (!value.isDataSet()) return; - // fill dataset + // Fill dataset auto result = value.moveDataSet(); if (!result.colNames.empty()) { resp->data = std::make_unique(std::move(result)); @@ -229,6 +236,7 @@ void QueryInstance::fillRespData(ExecutionResponse *resp) { } } +// The entry point of the optimizer Status QueryInstance::findBestPlan() { auto plan = qctx_->plan(); SCOPED_TIMER(plan->optimizeTimeInUs()); diff --git a/src/graph/service/QueryInstance.h b/src/graph/service/QueryInstance.h index a702b41b7d9..5588999a4bb 100644 --- a/src/graph/service/QueryInstance.h +++ b/src/graph/service/QueryInstance.h @@ -29,6 +29,7 @@ class QueryInstance final : public boost::noncopyable, public cpp::NonMovable { QueryInstance(std::unique_ptr qctx, opt::Optimizer* optimizer); ~QueryInstance() = default; + // Entrance of the Validate, Optimize, Schedule, Execute process void execute(); QueryContext* qctx() const { @@ -51,7 +52,7 @@ class QueryInstance final : public boost::noncopyable, public cpp::NonMovable { void onError(Status); Status validateAndOptimize(); - // return true if continue to execute + // Return true if continue to execute bool explainOrContinue(); void addSlowQueryStats(uint64_t latency, const std::string& spaceName) const; void fillRespData(ExecutionResponse* resp); diff --git a/src/graph/service/RequestContext.h b/src/graph/service/RequestContext.h index 6bf3cd4fc0d..80348480680 100644 --- a/src/graph/service/RequestContext.h +++ b/src/graph/service/RequestContext.h @@ -33,7 +33,7 @@ class RequestContext final : public boost::noncopyable, public cpp::NonMovable { RequestContext() = default; ~RequestContext() { if (session_ != nullptr) { - // keep the session active + // Keep the session active session_->charge(); } } @@ -57,7 +57,7 @@ class RequestContext final : public boost::noncopyable, public cpp::NonMovable { void setSession(std::shared_ptr session) { session_ = std::move(session); if (session_ != nullptr) { - // keep the session active + // Keep the session active session_->charge(); } } From 19958f60fb172da6bf191369d5615e606e337c48 Mon Sep 17 00:00:00 2001 From: "hs.zhang" <22708345+cangfengzhs@users.noreply.github.com> Date: Sun, 13 Mar 2022 21:35:40 +0800 Subject: [PATCH 3/4] Comment (#3644) * finish comment IndexNode.h * finish comment IndexProjectionNode.h * finish comment IndexScanNode.h * address some comment Co-authored-by: Yee <2520865+yixinglu@users.noreply.github.com> --- src/storage/exec/IndexNode.h | 234 ++++++++++--- src/storage/exec/IndexProjectionNode.h | 51 ++- src/storage/exec/IndexScanNode.h | 446 +++++++++++++++++-------- 3 files changed, 520 insertions(+), 211 deletions(-) diff --git a/src/storage/exec/IndexNode.h b/src/storage/exec/IndexNode.h index 3172a981e64..82d9e75ebed 100644 --- a/src/storage/exec/IndexNode.h +++ b/src/storage/exec/IndexNode.h @@ -13,71 +13,84 @@ #include "storage/CommonUtils.h" namespace nebula { namespace storage { -/** - * IndexNode - * - * Indexnode is the base class for each node that makes up the plan tree. - * - * Member: - * `children_` : all children of the node. - * `context_` : runtime context of plan. - * `name_` : node name which should be set in derive node. - * `duration_` : used to record execution time(exclude children node's time). - * `profileDetail_` : whether record execution time or not. - * - * Function: - * The functions is divided into three parts. - * - * First part is used to build node. This part include constructor/destructor, and - * `IndexNode(const IndexNode& node)` is used to cooperate with `copy` to realize - * the deep copy of node.`copy` function needs to be implemented by the derived - * class itself. - * In fact, the build process is divided into two stages. First, the user needs to - * make various derived classes and nd organize them into a plan tree(by - * `children_`).After that, the root node of plan tree needs to call the init - * function and recursively call the init function of all child nodes, `Initcontext` - * will pass parameters between nodes to determine the data format or other - * information to be returned between nodes during execution.Note that `init` needs - * to be executed before `copy`. - * - * Second part is used to access data. - * `execute` is used to initialize some variables at the beginning of each part(e.g - * dedup set, kv iterator, etc.) - * `next` is used to iterate data. Row format has been determined during `init`. - * Batch processing and loop unrolling can be used to optimize performance if - * necessary, but there are no serious performance problems at present. - * `end` and `finish` are used to release resources at the end of execute or plan - * (e.g, external sort free disk,release schema lease if support Online DDL, commit - * write, etc.). - * However, there are no relevant requirements, so it will not be implemented for - * the time being. - * `xxx` is the interface function.It will recursive call child node's `xxx`. `doXxx` - * is the actual execution logic, and the derived class needs to override this - * function - * - * The third part is used to assist in obtaining some detailed information - */ - using ErrorCode = ::nebula::cpp2::ErrorCode; template using Map = folly::F14FastMap; template using Set = folly::F14FastSet; + +/** + * @brief Context during IndexNode init + * + * For better extensibility, IndexNode::next() will return a row with fix format every time. For + * example, a IndexNode will return a row with format "a,b,a+b" to its parent node. In this way, + * each IndexNode in the execution plan can be completely decoupled. Each node only needs to know + * two things: The first is the column of the row returned by its child nodes in the attribute or + * expression calculation result it needs. The second is that it needs to know what content and the + * corresponding location of the row it returns. + * + * But these contents are not known when IndexNode is constructed, so it needs an Init() process, + * and in this process, the content required by the parent node is notified to the child node, and + * the child node needs to inform the specific location of the parent node content.So we need + * InitContext to pass these messages. + * + * @see IndexNode, IndexNode::init(), IndexNode::next() + */ struct InitContext { - // Column required by parent node + /** + * @brief column required by partent node + * @todo The parent node may not only need the attributes of a certain column, but may also be the + * calculation result of an expression. For example, ProjectionNode/SelectionNode may need the + * calculation result of max(a) + */ Set requiredColumns; - // The format of the row returned to the parent node + /** + * @brief The format of the row returned to the parent node + * @todo the same as requiredColumns + */ std::vector returnColumns; - // The index of name in `returncolumns` + /** + * @brief The index of name in `returncolumns` + * @todo the same as requiredColumns + */ Map retColMap; // The columns in statColumns // TODO(nivras) need refactor this, put statColumns in returnColumns Set statColumns; }; +/** + * @brief IndexNode is the base class for each node that makes up the plan tree. + * + * The functions of IndexNode is divided into three parts. + * + * First part is used to build node. This part contains two stages. First, the user needs to make + * various derived classes and nd organize them into a plan tree(by `children_`).After that, the + * root node of plan tree needs to call the init function and recursively call the init function of + * all child nodes, `Initcontext` will pass parameters between nodes to determine the data format or + * other information to be returned between nodes during execution.Note that `init` needs to be + * executed before `copy`. + * + * Second part is used to access data. `execute()` initlializes some variables at the beginning of + * each part.Then, `next()` iterates data. At the end, `end()` and `finish()` are called if + * necessary. + * + * The third part contains some auxiliary functions for debugging. + * + * @see InitContext + */ + class IndexNode { public: - /* Iterate result*/ + /** + * @class Result + * @brief Iterate result of IndexNode::next() + * + * There are three options for Result: + * - not succeeded. Some error occured during iterating. + * - succeeded but there isn't any remain Row. + * - succeeded and there is a Row. + */ class Result { public: Result() : code_(ErrorCode::SUCCEEDED), empty_(true) {} @@ -92,9 +105,17 @@ class IndexNode { this->empty_ = result.empty_; return *this; } + /** + * @return true if successful + * @return false if not successful + */ inline bool success() { return code_ == ErrorCode::SUCCEEDED; } + /** + * @return true if successful and has data + * @return false if not successful or doesn't have data + */ inline bool hasData() { return success() && empty_ == false; } @@ -113,32 +134,123 @@ class IndexNode { Row row_; bool empty_{true}; }; - /* build */ + + /** + * @brief shallow copy a new IndexNode + * + * @attention children_ will be empty and must copy after init.Under normal circumstances, there + * is no need to call this function. + * + * @param node + */ + IndexNode(const IndexNode& node); + + /** + * @brief Construct a new Index Node object + * + * @param context + * @param name name of the node + */ IndexNode(RuntimeContext* context, const std::string& name); + virtual ~IndexNode() = default; + + /** + * @brief deep copy the node without children + * + * Every derived class should implement copy() by itself. + * + * @return std::unique_ptr + */ virtual std::unique_ptr copy() = 0; + + /** + * @brief add a child node + * + * @param child + */ void addChild(std::unique_ptr child) { children_.emplace_back(std::move(child)); } + + /** + * @brief return all children + * + * @return const std::vector>& + */ const std::vector>& children() { return children_; } + + /** + * @brief Initialize execute plan tree. + * + * @param initCtx + * @return ::nebula::cpp2::ErrorCode + * @attention init() should be called before copy() and execute() + * @see InitContext + */ virtual ::nebula::cpp2::ErrorCode init(InitContext& initCtx) { DCHECK_EQ(children_.size(), 1); return children_[0]->init(initCtx); } - /* execution */ + /** + * @brief `execute()` is used to initialize some variables at the beginning of each part(e.g + * dedup set, kv iterator, etc. + * + * To support add some hooks before execute() or after execute(), execute() contain three + * functions: beforeExecute(),doExecute(),afterExecute(). And doExecute() is a pure virtual + * function, derived class only need override doExecute() function. + * + * @param partId + * @return nebula::cpp2::ErrorCode + * + * @note + */ inline nebula::cpp2::ErrorCode execute(PartitionID partId); + + /** + * @brief `next()` is used to iterate data. + * + * Row format has been determined during `init()`. Batch processing and loop unrolling can be used + * to optimize performance if necessary, but there are no serious performance problems at present. + * + * Like exeucte(), next() also contains beforeNext(), doNext(), afterNext() + * + * @note + * @return Result + */ inline Result next(); + // inline nebula::cpp2::ErrorCode finish(); + // inline nebula::cpp2::ErrorCode end(); - /* assist */ + /** + * @brief return IndexNode name + * + * @return const std::string& + */ const std::string& name() { return name_; } void enableProfileDetail(); + + /** + * @brief return identify of node + * + * identity will contain node name and execute args. This is very useful for viewing the execution + * plan. + * + * @return std::string + */ + virtual std::string identify() = 0; + /** + * @brief All the time spent by next() + * + * @return const time::Duration& + */ inline const time::Duration& duration(); protected: @@ -149,11 +261,29 @@ class IndexNode { void beforeExecute(); void afterExecute(); + /** + * @brief runtime context of plan + */ RuntimeContext* context_; + /** + * @brief graph space id of plan + */ GraphSpaceID spaceId_; + /** + * @brief all children of the node + */ std::vector> children_; + /** + * @brief node name which should be set in derive node. + */ std::string name_; + /** + * @brief used to record execution time(exclude children node's time). + */ time::Duration duration_; + /** + * @brief whether record execution time or not. + */ bool profileDetail_{false}; }; diff --git a/src/storage/exec/IndexProjectionNode.h b/src/storage/exec/IndexProjectionNode.h index 633fd46283f..644951294f3 100644 --- a/src/storage/exec/IndexProjectionNode.h +++ b/src/storage/exec/IndexProjectionNode.h @@ -9,29 +9,30 @@ #include "storage/exec/IndexNode.h" namespace nebula { namespace storage { + /** + * @brief IndexProjectionNode is the class which is used to reformat the row to ensure that the + * format of the returned row meets the requirements of RPC request. * - * IndexProjectionNode - * - * reference: IndexNode - * - * `IndexProjectionNode` is the class which is used to reformat the row to ensure that the format of - * the returned row meets the requirements of RPC request. - * ┌───────────┐ - * │ IndexNode │ - * └─────┬─────┘ - * │ - * ┌──────────┴──────────┐ - * │ IndexProjectionNode │ - * └─────────────────────┘ - * - * Member: - * `requiredColumns_` : Row format required by parent node - * `colPos_` : each column position in child node return row + * @implements IndexNode + * @see IndexNode, InitContext + * @todo requiredColumns_ support expression */ class IndexProjectionNode : public IndexNode { public: + /** + * @brief shallow copy + * @param node + * @see IndexNode::IndexNode(const IndexNode& node) + */ IndexProjectionNode(const IndexProjectionNode& node); + + /** + * @brief Construct a new Index Projection Node object + * + * @param context + * @param requiredColumns the format next() will return + */ IndexProjectionNode(RuntimeContext* context, const std::vector& requiredColumns); nebula::cpp2::ErrorCode init(InitContext& ctx) override; std::unique_ptr copy() override; @@ -39,8 +40,24 @@ class IndexProjectionNode : public IndexNode { private: Result doNext() override; + + /** + * @brief according to required columns, adjust column order or calculate expression + * + * @param row + * @return Row + */ Row project(Row&& row); + + /** + * @brief Row format required by parent node + * + */ std::vector requiredColumns_; + + /** + * @brief each column position in child node return row + */ Map colPos_; }; } // namespace storage diff --git a/src/storage/exec/IndexScanNode.h b/src/storage/exec/IndexScanNode.h index 00cb9f12057..bc8774c78b2 100644 --- a/src/storage/exec/IndexScanNode.h +++ b/src/storage/exec/IndexScanNode.h @@ -19,114 +19,19 @@ namespace nebula { namespace storage { +class Path; +class QualifiedStrategySet; + /** - * - * IndexScanNode - * - * reference: IndexNode, IndexVertexScanNode, IndexEdgeScanNode - * - * `IndexScanNode` is the base class of the node which need to access disk. It has two derive - * class `IndexVertexScanNode` and `IndexEdgeScanNode` - * - * ┌───────────┐ - * │ IndexNode │ - * └─────┬─────┘ - * │ - * ┌───────┴───────┐ - * │ IndexScanNode │ - * └───────┬───────┘ - * ┌───────────┴────────────┐ - * ┌──────────┴──────────┐ ┌───────────┴─────────┐ - * │ IndexVertexScanNode │ │ IndexEdgeScanNode │ - * └─────────────────────┘ └─────────────────────┘ + * @brief `IndexScanNode` is the base class of the node which need to access disk. It has two derive + * class `IndexVertexScanNode` and `IndexEdgeScanNode`. * * `IndexScanNode` will access index data, and then access base data if necessary. * - * Member: - * `indexId_` : index_ in this Node to access - * `partId_` : part to access.It will be modify while `doExecute` - * `index_` : index definition - * `indexNullable_` : if index contain nullable field or not - * `columnHints_` : - * `path_` : - * `iter_` : current kvstore iterator.It while be reset `doExecute` and iterated - * during `doNext` - * `kvstore_` : server kvstore - * `requiredColumns_` : row format that `doNext` needs to return - * `requiredAndHintColumns_`: columns that `decodeFromBase` needs to decode - * `ttlProps` : ttl properties `needAccessBase_` : if need - * `fatalOnBaseNotFound_` : for debug - * - * Function: - * `decodePropFromIndex` : decode properties from Index key.It will be called by - * `decodeFromIndex` - * `decodeFromIndex` : decode all column in `requiredColumns_` by index - * key-value. - * `getBaseData` : get key-value of base data `decodeFromBase` : get - * all values that `requiredAndHintColumns_` required - * `checkTTL` : check data is - * expired or not - * ------------------------------------------------------------- - * - * Path - * - * `Path` is the most important part of `IndexScanNode`. By analyzing `ColumnHint`, it obtains - * the mode(Prefix or Range) and range(key of Prefix or [start,end) of Range) of keys that - * `IndexScanNode` need to query in kvstore. - * - * `Path` not only generate the key to access, but also `qualified` whether the key complies with - * the columnhint constraint or not.For example, if there is a truncated string index, we cannot - * simply compare bytes to determine whether the current key complies with the columnhints - * constraint, the result of `qualified(bytes)` should be `UNCERTAIN` and `IndexScanNode` will - * access base data then `Path` reconfirm `ColumnHint` constraint by `qualified(RowData)`. In - * addition to the above examples, there are other cases to deal with.`Path` and it's derive class - * will dynamic different strategy by `ColumnHint`,`IndexItem`,and `Schema`.All strategy will be - * added to `QFList_`(QualifiedFunctionList) during `buildKey`, and executed during `qualified`. - * - * `Path` will be reset when `IndexScanNode` execute on a new part. - * - * It should be noted that the range generated by `rangepath` is a certain left included and right - * excluded interval,like [startKey_, endKey_), although `ColumnHint` may have many different - * constraint ranges(e.g., (x, y],(INF,y),(x,INF)). Because the length of index key is fixed, the - * way to obtain **the smallest key greater than 'x'** is to append several '\xFF' after until the - * length of 'x' is greater than the length of the indexkey. - * - * - * Member: - * `QFList_` : all Qualified strategy need to executed during qualified - * `nullable_` : if `index_` contain nullable field, `nullable_[i]` is equal to - * `index_->fields[i].nullable`,else `nullable_` is empty - * `index_nullable_offset_` : Participate in the index key encode diagram - * `totalKeyLength_` : Participate in the index key encode diagram - * `suffixLength_` : Participate in the index key encode diagram - * `serializeString_` : a string express path - * - * Index Key Encode: - * ┌──────┬─────────────┬────────────────┬──────────┬─────────────────────────────────────────┐ - * │ type | PartitionID | Indexed Values | nullable | suffix({vid} or {srcId,rank,dstId}) | - * │ 1byte| 3 bytes | n bytes | 0/2 bytes| vid.length or vid.length*2+sizeof(rank) | - * └──────┴─────────────┴────────────────┴──────────┴─────────────────────────────────────────┘ - * │ └───────────────────┬─────────────────────┘ - * index_nullable_offset_ suffixLength_ - * └──────────────────────────────────┬───────────────────────────────────────────────────────┘ - * totalKeyLength_ - * - * Function: - * `make` : construct `PrefixPath` or `RangePath` according to `hints` - * `qualified(StringPiece)` : qualified key by bytes - * `qualified(Map)` : qualified row by value - * `resetPart` : reset current partitionID and reset `iter_` - * `encodeValue` : encode a Value to bytes - * - * - * ------------------------------------------------------------- - * - * + * @implements IndexNode + * @see IndexNode, IndexVertexScanNode, IndexEdgeScanNode * */ - -class Path; -class QualifiedStrategySet; class IndexScanNode : public IndexNode { FRIEND_TEST(IndexScanTest, Base); FRIEND_TEST(IndexScanTest, Vertex); @@ -135,7 +40,22 @@ class IndexScanNode : public IndexNode { friend class IndexScanTestHelper; public: + /** + * @brief shallow copy. + * @attention This constructor will create a new Path + * @see IndexNode::IndexNode(const IndexNode& node) + */ IndexScanNode(const IndexScanNode& node); + + /** + * @brief Construct a new Index Scan Node object + * + * @param context + * @param name + * @param indexId + * @param columnHints + * @param kvstore + */ IndexScanNode(RuntimeContext* context, const std::string& name, IndexID indexId, @@ -148,27 +68,102 @@ class IndexScanNode : public IndexNode { protected: nebula::cpp2::ErrorCode doExecute(PartitionID partId) final; Result doNext() final; + + /** + * @brief decode values from index key + * + * @param key index key + * @param colPosMap needed columns and these position(order) in values + * @param values result.Its order must meet the requirements of colPosMap. + */ void decodePropFromIndex(folly::StringPiece key, const Map& colPosMap, std::vector& values); + /** + * @brief decode all props from index key + * + * decodePropFromIndex() will decode props who are defined in tag/edge properties.And, + * IndexScanNode sometime needs not only those but alse vid,edge_type,tag_id. decodeFromIndex() + * should be override by derived class and decode these special prop and then call + * decodePropFromIndex() to decode general props. + * + * @param key index key + * @return Row + * @see decodePropFromIndex + */ virtual Row decodeFromIndex(folly::StringPiece key) = 0; + + /** + * @brief get the base data key-value according to index key + * + * @param key index key + * @param kv base data key-value + * @return nebula::cpp2::ErrorCode + */ virtual nebula::cpp2::ErrorCode getBaseData(folly::StringPiece key, std::pair& kv) = 0; + + /** + * @brief decode all props from base data key-value. + * + * @param key base data key + * @param value base data value + * @return Map + */ virtual Map decodeFromBase(const std::string& key, const std::string& value) = 0; virtual const std::vector>& getSchema() = 0; + + /** + * @brief Check whether the indexkey has expired + * + * @return true + * @return false + */ bool checkTTL(); + + /** + * @brief start query a new part + * + * @param partId + * @return nebula::cpp2::ErrorCode + * @see Path + */ nebula::cpp2::ErrorCode resetIter(PartitionID partId); PartitionID partId_; + /** + * @brief index_ in this Node to access + */ const IndexID indexId_; + /** + * @brief index definition + */ std::shared_ptr index_; + /** + * @brief if index contain nullable field or not + */ bool indexNullable_ = false; const std::vector& columnHints_; + /** + * @see Path + */ std::unique_ptr path_; + /** + * @brief current kvstore iterator.It while be reset `doExecute` and iterated during `doNext` + */ std::unique_ptr iter_; nebula::kvstore::KVStore* kvstore_; + /** + * @brief row format that `doNext` needs to return + */ std::vector requiredColumns_; + /** + * @brief columns that `decodeFromBase` needs to decode + */ Set requiredAndHintColumns_; + /** + * @brief ttl properties `needAccessBase_ + */ std::pair> ttlProps_; bool needAccessBase_{false}; bool fatalOnBaseNotFound_{false}; @@ -176,9 +171,16 @@ class IndexScanNode : public IndexNode { }; class QualifiedStrategy { public: + /** + * @brief qualified result + * - INCOMPATIBLE: Meets strategy constraints + * - UNCERTAIN: uncertain + * - COMPATIBLE: Does not meet the strategy constraints + */ enum Result { INCOMPATIBLE = 0, UNCERTAIN = 1, COMPATIBLE = 2 }; + /** - * checkNull + * @brief check target column is Null or not * * There are two overload `checkNull` functions: * 1. First one which is with template arg `targetIsNull`, checks `columnIndex` at `nullable` @@ -186,16 +188,13 @@ class QualifiedStrategy { * 2. The other one which is without template, filters key whose `columnIndex` at `nullable` is * true * - * Args: - * `columnIndex` : Index of column. **NOTE** , however, that the order in nullable bytes is - * reversed - * `keyOffset` : Reference `Index Key Encode` -> `index_nullable_offset_` - * - * Return: - * For convenience, we define a variable x.When the value at `columnIndex` is null, x is true, - * Otherwise x is false. - * 1.With template.Return COMPATIBLE if `x`==`targetIsNull`,else INCOMPATIBLE - * 2.Without template.Return COMPATIBLE if `x`==false, else INCOMPATIBLE + * @param columnIndex Index of column. **NOTE** , however, that the order in nullable bytes is + * reversed + * @param keyOffset Reference `Index Key Encode` -> `index_nullable_offset_` + * @return For convenience, we define a variable x.When the value at `columnIndex` is null, x is + * true, Otherwise x is false. + * - With template.Return COMPATIBLE if `x`==`targetIsNull`,else INCOMPATIBLE + * - Without template.Return COMPATIBLE if `x`==false, else INCOMPATIBLE */ template static QualifiedStrategy checkNull(size_t columnIndex, size_t keyOffset) { @@ -220,16 +219,12 @@ class QualifiedStrategy { return q; } /** - * checkNaN - * + * @brief check float number is NaN or not * Only for double. Check the value at `keyOffset` in indexKey is NaN or not. The logic here needs * to be coordinated with the encoding logic of double numbers. * - * Args: - * `keyOffset` : value offset at indexKey - * - * Return: - * Return INCOMPATIBLE if v==Nan else COMPATIBLE; + * @param keyOffset value offset at indexKey + * @return Return INCOMPATIBLE if v==Nan else COMPATIBLE */ static QualifiedStrategy checkNaN(size_t keyOffset) { const char* chr = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"; // '\xFF' * 8 @@ -240,19 +235,18 @@ class QualifiedStrategy { }; return q; } + /** - * dedupGeoIndex + * @brief dedup geo index data * * Because a `GEOGRAPHY` type data will generate multiple index keys pointing to the same base * data,the base data pointed to by the indexkey should be de duplicated. * - * Args: - * `dedupSuffixLength` : If indexed schema is a tag, `dedupSuffixLength` should be vid.len; - * If the indexed schema is an edge, `dedupSuffixLength` should be - * srcId.len+sizeof(rank)+dstId.len - * Return: - * When suffix first appears, the function returns `COMPATIBLE`; otherwise, the function returns - * `INCOMPATIBLE` + * @param dedupSuffixLength If indexed schema is a tag, `dedupSuffixLength` should be vid.len; If + * the indexed schema is an edge, `dedupSuffixLength` should be srcId.len+sizeof(rank)+dstId.len + * + * @return When suffix first appears, the function returns `COMPATIBLE`; otherwise, the function + * returns `INCOMPATIBLE` */ static QualifiedStrategy dedupGeoIndex(size_t dedupSuffixLength) { QualifiedStrategy q; @@ -264,10 +258,12 @@ class QualifiedStrategy { }; return q; } + /** - * constant + * @brief always return a constant * - * Always return `result` + * @tparam result to return + * @return return */ template static QualifiedStrategy constant() { @@ -276,7 +272,7 @@ class QualifiedStrategy { return q; } /** - * compareTruncated + * @brief compare truncated string * * For a `String` type index, `val` may be truncated, and it is not enough to determine whether * the indexkey complies with the constraint of columnhint only through the interval limit of @@ -286,13 +282,10 @@ class QualifiedStrategy { * (ab)c means that string is "abc" but index val has been truncated to "ab". (ab)c > ab is * `UNCERTAIN`, and (ab)c > aa is COMPATIBLE. * - * Args: - * `LEorGE` : It's an assist arg. true means LE and false means GE. - * `val` : Truncated `String` index value,whose length has been define in `IndexItem`. - * `keyStartPos` : The position in indexKey where start compare with `val` - * - * Return: - * Return `COMPATIBLE` if `val` is `LEorGE` than indexKey.Otherwise, return `UNCERTAIN`. + * @tparam LEorGE It's an assist arg. true means LE and false means GE. + * @param val Truncated `String` index value,whose length has been define in `IndexItem`. + * @param keyStartPos The position in indexKey where start compare with `val` + * @return Return `COMPATIBLE` if `val` is `LEorGE` than indexKey.Otherwise, return `UNCERTAIN`. */ template static QualifiedStrategy compareTruncated(const std::string& val, size_t keyStartPos) { @@ -308,7 +301,7 @@ class QualifiedStrategy { }; return q; } - // call + inline Result operator()(const folly::StringPiece& key); private: @@ -323,10 +316,35 @@ class QualifiedStrategySet { std::vector strategyList_; }; +/** + * @brief `Path` is the range and constraints generated according to `ColumnHints`. + * + * `Path` is the most important part of `IndexScanNode`. By analyzing `ColumnHint`, it obtains + * the mode(`Prefix` or `Range`) and range(key of `Prefix` or [start,end) of `Range`) of keys that + * `IndexScanNode` need to query in kvstore. + * + * `Path` not only generate the key to access, but also `qualified` whether the key complies with + * the columnhint constraint or not.For example, if there is a truncated string index, we cannot + * simply compare bytes to determine whether the current key complies with the columnhints + * constraint, the result of `qualified(bytes)` should be `UNCERTAIN` and `IndexScanNode` will + * access base data then `Path` reconfirm `ColumnHint` constraint by `qualified(RowData)`. In + * addition to the above examples, there are other cases to deal with.`Path` and it's derive class + * will dynamic different strategy by `ColumnHint`,`IndexItem`,and `Schema`.All strategy will be + * added to `strategySet_`(QualifiedStrategySet) during `buildKey`, and executed during `qualified`. + * + * `Path` will be reset when `IndexScanNode` execute on a new part. + * + * It should be noted that the range generated by `rangepath` is a certain left included and right + * excluded interval,like [startKey_, endKey_), although `ColumnHint` may have many different + * constraint ranges(e.g., (x, y],(INF,y),(x,INF)). Because the length of index key is fixed, the + * way to obtain **the smallest key greater than 'x'** is to append several '\xFF' after until the + * length of 'x' is greater than the length of the indexkey. + * + * @see QualifiedStrategySet + * @todo + */ class Path { public: - // enum class Qualified : int16_t { INCOMPATIBLE = 0, UNCERTAIN = 1, COMPATIBLE = 2 }; - // using QualifiedFunction = std::function; using ColumnTypeDef = ::nebula::meta::cpp2::ColumnTypeDef; Path(nebula::meta::cpp2::IndexItem* index, const meta::SchemaProviderIf* schema, @@ -334,73 +352,200 @@ class Path { int64_t vidLen); virtual ~Path() = default; + /** + * @brief analyze hints and build a PrefixPath or RangePath + * + * @param index index of path + * @param schema indexed tag or edge + * @param hints property constraints.Like a=1, 1 + */ static std::unique_ptr make(::nebula::meta::cpp2::IndexItem* index, const meta::SchemaProviderIf* schema, const std::vector& hints, int64_t vidLen); + + /** + * @brief apply all qulified strategy on indexKey. + * + * @param key indexKey + * @return QualifiedStrategy::Result + * @see QualifiedStrategy::Result + */ QualifiedStrategy::Result qualified(const folly::StringPiece& key); + virtual bool isRange() { return false; } + /** + * @brief apply all qulified strategy on base data with format map. + * + * @param rowData map who contain all base data + * @return QualifiedStrategy::Result + * @see QualifiedStrategy::Result + */ virtual QualifiedStrategy::Result qualified(const Map& rowData) = 0; + + /** + * @brief reset the current part + * + * @param partId + * @see IndexScanNode::execute(PartitionID partId) + */ virtual void resetPart(PartitionID partId) = 0; + + /** + * @brief Seralize Path to string + * + * @return const std::string& + */ const std::string& toString(); protected: + /** + * @brief encoding value to bytes who make up indexKey + * + * If there is truncated string or null type data, additional qualified strategies need to be + * added. + * + * @param value the value to be encoded + * @param colDef value's column definition + * @param index position of colDef in IndexItem.Needed by checking nullable. + * @param key bytes after encode need to be append to key + * @return std::string + */ std::string encodeValue(const Value& value, const ColumnTypeDef& colDef, size_t index, std::string& key); + /** + * @brief strategy set of current path + * @see QualifiedStrategySet + */ QualifiedStrategySet strategySet_; + /** + * @brief index of current path + */ ::nebula::meta::cpp2::IndexItem* index_; + /** + * @brief tag/edge schema of current path + */ const meta::SchemaProviderIf* schema_; + /** + * @brief IndexColumnHints of current path + */ const std::vector hints_; + /** + * @brief if `index_` contain nullable field, `nullable_[i]` is equal to + * `index_->fields[i].nullable`,else `nullable_` is empty + * + * Index Key Encode: + * ┌──────┬─────────────┬────────────────┬──────────┬─────────────────────────────────────────┐ + * │ type | PartitionID | Indexed Values | nullable | suffix({vid} or {srcId,rank,dstId}) | + * │ 1byte| 3 bytes | n bytes | 0/2 bytes| vid.length or vid.length*2+sizeof(rank) | + * └──────┴─────────────┴────────────────┴──────────┴─────────────────────────────────────────┘ + * │ └───────────────────┬─────────────────────┘ + * index_nullable_offset_ suffixLength_ + * └──────────────────────────────────┬───────────────────────────────────────────────────────┘ + * totalKeyLength_ + */ std::vector nullable_; + + /** + * @brief Participate in the index key encode diagram + */ int64_t index_nullable_offset_{8}; + + /** + * @brief Participate in the index key encode diagram + */ int64_t totalKeyLength_{8}; + + /** + * @brief Participate in the index key encode diagram + */ int64_t suffixLength_; std::string serializeString_; }; + +/** + * @brief A derived class of Path who has a prefix query, like a=1 and b=2 + */ class PrefixPath : public Path { public: + /** + * @brief Construct a new Prefix Path object + * @see Path + */ PrefixPath(nebula::meta::cpp2::IndexItem* index, const meta::SchemaProviderIf* schema, const std::vector& hints, int64_t vidLen); - // Override + + /** + * @brief override + * @param rowData + * @return QualifiedStrategy::Result + */ QualifiedStrategy::Result qualified(const Map& rowData) override; + void resetPart(PartitionID partId) override; + /** + * @brief get prefix key + * + * @return const std::string& + */ const std::string& getPrefixKey() { return prefix_; } private: + /** + * @brief the bytes who is used to query in kvstore + */ std::string prefix_; + + /** + * @brief build prefix_ + */ void buildKey(); }; +/** + * @brief A derived class of Path who has a range query, like a=1 and b<2 + */ class RangePath : public Path { public: + /** + * @brief Construct a new Range Path object + * @see Path + */ RangePath(nebula::meta::cpp2::IndexItem* index, const meta::SchemaProviderIf* schema, const std::vector& hints, int64_t vidLen); QualifiedStrategy::Result qualified(const Map& rowData) override; + void resetPart(PartitionID partId) override; inline bool includeStart() { return includeStart_; } + inline bool includeEnd() { return includeEnd_; } + inline const std::string& getStartKey() { return startKey_; } + inline const std::string& getEndKey() { return endKey_; } + bool isRange() override { return true; } @@ -410,18 +555,35 @@ class RangePath : public Path { bool includeStart_ = true; bool includeEnd_ = false; + /** + * @brief build startKey_ and endKey_ + */ void buildKey(); + + /** + * @brief encode hint into interval [start,end),and start and end are bytes. + * + * @param hint + * @param colTypeDef + * @param colIndex + * @param offset + * @return std::tuple + */ std::tuple encodeRange( const cpp2::IndexColumnHint& hint, const nebula::meta::cpp2::ColumnTypeDef& colTypeDef, size_t colIndex, size_t offset); + inline std::string encodeString(const Value& value, size_t len, bool& truncated); + inline std::string encodeFloat(const Value& value, bool& isNaN); + std::string encodeBeginValue(const Value& value, const ColumnTypeDef& colDef, std::string& key, size_t offset); + std::string encodeEndValue(const Value& value, const ColumnTypeDef& colDef, std::string& key, From 25997c432f9a7b29b87405132b9275157040b427 Mon Sep 17 00:00:00 2001 From: Yichen Wang <18348405+Aiee@users.noreply.github.com> Date: Sun, 13 Mar 2022 22:06:59 +0800 Subject: [PATCH 4/4] Fix crash when use `executeJson()` with profile (#3998) * Fix profile json response * Add UT Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/common/graph/Response.h | 49 +++++++++++-------- .../graph/tests/ResponseEncodeDecodeTest.cpp | 20 ++++++++ 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/common/graph/Response.h b/src/common/graph/Response.h index ee7c0165bdd..7c4679ef35f 100644 --- a/src/common/graph/Response.h +++ b/src/common/graph/Response.h @@ -185,7 +185,7 @@ namespace nebula { -#define X(EnumName, EnumNumber) EnumName = EnumNumber, +#define X(EnumName, EnumNumber) EnumName = (EnumNumber), enum class ErrorCode { ErrorCodeEnums }; @@ -295,7 +295,9 @@ struct ProfilingStats { ProfilingStatsObj.insert("rows", rows); ProfilingStatsObj.insert("execDurationInUs", execDurationInUs); ProfilingStatsObj.insert("totalDurationInUs", totalDurationInUs); - ProfilingStatsObj.insert("otherStats", folly::toDynamic(*otherStats)); + if (otherStats) { + ProfilingStatsObj.insert("otherStats", folly::toDynamic(*otherStats)); + } return ProfilingStatsObj; } @@ -323,7 +325,7 @@ struct PlanNodeBranchInfo { } // True if loop body or then branch of select - bool isDoBranch{0}; + bool isDoBranch{false}; // select/loop node id int64_t conditionNodeId{-1}; @@ -407,22 +409,29 @@ struct PlanNodeDescription { planNodeDescObj.insert("id", id); planNodeDescObj.insert("outputVar", outputVar); - auto descriptionObj = folly::dynamic::array(); - descriptionObj.resize(description->size()); - std::transform( - description->begin(), description->end(), descriptionObj.begin(), [](const auto &ele) { - return ele.toJson(); - }); - planNodeDescObj.insert("description", descriptionObj); - - auto profilesObj = folly::dynamic::array(); - profilesObj.resize(profiles->size()); - std::transform(profiles->begin(), profiles->end(), profilesObj.begin(), [](const auto &ele) { - return ele.toJson(); - }); - planNodeDescObj.insert("profiles", profilesObj); - planNodeDescObj.insert("branchInfo", branchInfo->toJson()); - planNodeDescObj.insert("dependencies", folly::toDynamic(*dependencies)); + if (description) { + auto descriptionObj = folly::dynamic::array(); + descriptionObj.resize(description->size()); + std::transform( + description->begin(), description->end(), descriptionObj.begin(), [](const auto &ele) { + return ele.toJson(); + }); + planNodeDescObj.insert("description", descriptionObj); + } + if (profiles) { + auto profilesObj = folly::dynamic::array(); + profilesObj.resize(profiles->size()); + std::transform(profiles->begin(), profiles->end(), profilesObj.begin(), [](const auto &ele) { + return ele.toJson(); + }); + planNodeDescObj.insert("profiles", profilesObj); + } + if (branchInfo) { + planNodeDescObj.insert("branchInfo", branchInfo->toJson()); + } + if (dependencies) { + planNodeDescObj.insert("dependencies", folly::toDynamic(*dependencies)); + } return planNodeDescObj; } @@ -536,7 +545,7 @@ struct ExecutionResponse { std::unique_ptr planDesc{nullptr}; std::unique_ptr comment{nullptr}; - // Return the response as a JSON string + // Returns the response as a JSON string // only errorCode and latencyInUs are required fields, the rest are optional // if the dataset contains a value of TIME or DATETIME, it will be returned in UTC. // diff --git a/src/common/graph/tests/ResponseEncodeDecodeTest.cpp b/src/common/graph/tests/ResponseEncodeDecodeTest.cpp index de61bc32de8..1365f2b7f4d 100644 --- a/src/common/graph/tests/ResponseEncodeDecodeTest.cpp +++ b/src/common/graph/tests/ResponseEncodeDecodeTest.cpp @@ -89,6 +89,26 @@ TEST(ResponseEncodeDecodeTest, Basic) { } TEST(ResponseEncodeDecodeTest, ToJson) { + // PlanNodeDescription + { + // Dummy data + PlanNodeDescription pnd; + pnd.name = "name"; + pnd.id = 100; + pnd.outputVar = "outputVar"; + pnd.description = nullptr; + pnd.profiles = nullptr; + pnd.branchInfo = nullptr; + pnd.dependencies = nullptr; + + folly::dynamic jsonObj = pnd.toJson(); + folly::dynamic expect = folly::dynamic::object(); + expect.insert("name", "name"); + expect.insert("id", 100); + expect.insert("outputVar", "outputVar"); + + ASSERT_EQ(jsonObj, expect); + } // plan description { std::vector pds;