From c41b1a29079c42c05a25f7181b8a65eb11b5d04d Mon Sep 17 00:00:00 2001 From: "lionel.liu@vesoft.com" <52276794+liuyu85cn@users.noreply.github.com> Date: Wed, 26 Jan 2022 19:05:51 +0800 Subject: [PATCH 1/5] add comments & accumulate bug fix for TOSS (#3643) * 1. add comments & adjust some log level 2. accumulate bug fix for TOSS 2.1 one space may be affected by another space. when recover prime edges. 2.2 when call kvstore->get(), if leaderLeaseInvalid, it will report leader_change, change it to leader_lease_failed. 2.3 print all execution trace in ChainProcessor::finish(), make much more grep friendly. 2.4 some function of memory lock is hard to understand (forceLock, forceUnlock) , change to "setAutoUnlock(bool)" modify update fix compile error adjust finish() adjust finish() add rcPrepare_ = succeeded adjust delete local processor adjust log use rcPrepare, rcRemote rcCommit fix resume add edges adjust UT disable ioThreadPoolForMeta_ make int64 vid printable adjust some log to vlog 2 only print prepare succeeded result only print readable in finish() of resume processor adjust some log some log not print term desc class name in uuid debug add some more log looks like factory may fail? add leader lease error code add check when get lock report leader lease do some clean change table prefix to NebulaKeyUtil Style change some print disable UT for short clean compile error disable pool monitor * unify add/delete finish() as update * not stable will add it back later * fix a clang warning * fix a clang10 warning 2 Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/clients/storage/InternalStorageClient.cpp | 6 +- src/common/utils/MemoryLockWrapper.h | 14 +- src/interface/common.thrift | 1 + src/kvstore/NebulaStore.cpp | 3 +- src/meta/upgrade/v2/meta.thrift | 4 +- src/mock/MockCluster.cpp | 1 + src/storage/CMakeLists.txt | 9 +- src/storage/InternalStorageServiceHandler.h | 7 +- src/storage/StorageServer.cpp | 1 + src/storage/index/LookupProcessor.h | 3 + src/storage/kv/GetProcessor.h | 3 + src/storage/kv/PutProcessor.h | 4 +- src/storage/kv/RemoveProcessor.h | 3 + src/storage/test/CMakeLists.txt | 15 - src/storage/test/ChainAddEdgesTest.cpp | 48 +--- src/storage/test/ChainDeleteEdgesTest.cpp | 73 ++--- src/storage/test/ChainResumeEdgeTest.cpp | 174 ++++++------ src/storage/test/ChainTestUtils.h | 143 +++++++--- src/storage/test/ChainUpdateEdgeTest.cpp | 74 ++--- .../ChainAddEdgesLocalProcessor.cpp | 263 +++++++----------- .../transaction/ChainAddEdgesLocalProcessor.h | 37 ++- .../ChainAddEdgesRemoteProcessor.cpp | 4 +- src/storage/transaction/ChainBaseProcessor.h | 14 +- .../ChainDeleteEdgesLocalProcessor.cpp | 154 +++++----- .../ChainDeleteEdgesLocalProcessor.h | 3 +- .../ChainDeleteEdgesResumeProcessor.cpp | 22 +- .../ChainDeleteEdgesResumeRemoteProcessor.cpp | 67 +++-- .../ChainDeleteEdgesResumeRemoteProcessor.h | 4 +- .../transaction/ChainProcessorFactory.cpp | 50 +++- .../transaction/ChainProcessorFactory.h | 10 +- .../ChainResumeAddDoublePrimeProcessor.cpp | 78 ++++++ ...h => ChainResumeAddDoublePrimeProcessor.h} | 12 +- .../ChainResumeAddPrimeProcessor.cpp | 76 +++++ ...essor.h => ChainResumeAddPrimeProcessor.h} | 12 +- .../transaction/ChainResumeProcessor.cpp | 68 ----- .../transaction/ChainResumeProcessor.h | 31 --- .../ChainResumeUpdateDoublePrimeProcessor.cpp | 63 +++++ ...> ChainResumeUpdateDoublePrimeProcessor.h} | 10 +- ...pp => ChainResumeUpdatePrimeProcessor.cpp} | 35 ++- ...or.h => ChainResumeUpdatePrimeProcessor.h} | 12 +- .../ChainUpdateEdgeLocalProcessor.cpp | 180 +++++++----- .../ChainUpdateEdgeLocalProcessor.h | 11 +- src/storage/transaction/ConsistUtil.cpp | 79 +++--- src/storage/transaction/ConsistUtil.h | 14 +- .../transaction/ResumeAddEdgeProcessor.cpp | 70 ----- .../ResumeAddEdgeRemoteProcessor.cpp | 69 ----- .../ResumeUpdateRemoteProcessor.cpp | 61 ---- .../transaction/TransactionManager.cpp | 261 +++++++++-------- src/storage/transaction/TransactionManager.h | 115 +++++--- 49 files changed, 1263 insertions(+), 1208 deletions(-) create mode 100644 src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp rename src/storage/transaction/{ResumeAddEdgeProcessor.h => ChainResumeAddDoublePrimeProcessor.h} (59%) create mode 100644 src/storage/transaction/ChainResumeAddPrimeProcessor.cpp rename src/storage/transaction/{ResumeAddEdgeRemoteProcessor.h => ChainResumeAddPrimeProcessor.h} (67%) delete mode 100644 src/storage/transaction/ChainResumeProcessor.cpp delete mode 100644 src/storage/transaction/ChainResumeProcessor.h create mode 100644 src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp rename src/storage/transaction/{ResumeUpdateRemoteProcessor.h => ChainResumeUpdateDoublePrimeProcessor.h} (67%) rename src/storage/transaction/{ResumeUpdateProcessor.cpp => ChainResumeUpdatePrimeProcessor.cpp} (55%) rename src/storage/transaction/{ResumeUpdateProcessor.h => ChainResumeUpdatePrimeProcessor.h} (67%) delete mode 100644 src/storage/transaction/ResumeAddEdgeProcessor.cpp delete mode 100644 src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp delete mode 100644 src/storage/transaction/ResumeUpdateRemoteProcessor.cpp diff --git a/src/clients/storage/InternalStorageClient.cpp b/src/clients/storage/InternalStorageClient.cpp index ad2a7c58960..67e635c318b 100644 --- a/src/clients/storage/InternalStorageClient.cpp +++ b/src/clients/storage/InternalStorageClient.cpp @@ -22,6 +22,8 @@ ::nebula::cpp2::ErrorCode getErrorCode(T& tryResp) { switch (stResp.status().code()) { case Status::Code::kLeaderChanged: return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; + case Status::Code::kError: + return nebula::cpp2::ErrorCode::E_RPC_FAILURE; default: LOG(ERROR) << "not impl error transform: code=" << static_cast(stResp.status().code()); @@ -69,8 +71,8 @@ void InternalStorageClient::chainUpdateEdge(cpp2::UpdateEdgeRequest& reversedReq std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); + VLOG(1) << "chainUpdateEdge rpc: " << apache::thrift::util::enumNameSafe(code); if (code == ::nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainUpdateEdge(reversedRequest, termOfSrc, optVersion, std::move(p)); } else { p.setValue(code); @@ -108,7 +110,6 @@ void InternalStorageClient::chainAddEdges(cpp2::AddEdgesRequest& directReq, std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); if (code == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainAddEdges(directReq, termId, optVersion, std::move(p)); } else { p.setValue(code); @@ -165,7 +166,6 @@ void InternalStorageClient::chainDeleteEdges(cpp2::DeleteEdgesRequest& req, std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); if (code == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainDeleteEdges(req, txnId, termId, std::move(p)); } else { p.setValue(code); diff --git a/src/common/utils/MemoryLockWrapper.h b/src/common/utils/MemoryLockWrapper.h index cf0db2d807b..5378a6d0a0b 100644 --- a/src/common/utils/MemoryLockWrapper.h +++ b/src/common/utils/MemoryLockWrapper.h @@ -51,7 +51,7 @@ class MemoryLockGuard { } ~MemoryLockGuard() { - if (locked_) { + if (locked_ && autoUnlock_) { lock_->unlockBatch(keys_); } } @@ -71,15 +71,8 @@ class MemoryLockGuard { return *iter_; } - // this will manual set the lock to unlocked state - // which mean will not release all locks automatically - // please make sure you really know the side effect - void forceLock() { - locked_ = true; - } - - void forceUnlock() { - locked_ = false; + void setAutoUnlock(bool autoUnlock) { + autoUnlock_ = autoUnlock; } protected: @@ -87,6 +80,7 @@ class MemoryLockGuard { std::vector keys_; typename std::vector::iterator iter_; bool locked_{false}; + bool autoUnlock_{true}; }; } // namespace nebula diff --git a/src/interface/common.thrift b/src/interface/common.thrift index 84e70effdcc..695bb4e2f04 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -497,6 +497,7 @@ enum ErrorCode { E_RAFT_WRITE_BLOCKED = -3528, E_RAFT_BUFFER_OVERFLOW = -3529, E_RAFT_ATOMIC_OP_FAILED = -3530, + E_LEADER_LEASE_FAILED = -3531, E_UNKNOWN = -8000, } (cpp.enum_strict) diff --git a/src/kvstore/NebulaStore.cpp b/src/kvstore/NebulaStore.cpp index 43623e22ef0..18cab45c9ed 100644 --- a/src/kvstore/NebulaStore.cpp +++ b/src/kvstore/NebulaStore.cpp @@ -584,7 +584,8 @@ nebula::cpp2::ErrorCode NebulaStore::get(GraphSpaceID spaceId, } auto part = nebula::value(ret); if (!checkLeader(part, canReadFromFollower)) { - return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; + return part->isLeader() ? nebula::cpp2::ErrorCode::E_LEADER_LEASE_FAILED + : nebula::cpp2::ErrorCode::E_LEADER_CHANGED; } return part->engine()->get(key, value); } diff --git a/src/meta/upgrade/v2/meta.thrift b/src/meta/upgrade/v2/meta.thrift index d46922478b8..8dbe888140f 100644 --- a/src/meta/upgrade/v2/meta.thrift +++ b/src/meta/upgrade/v2/meta.thrift @@ -26,7 +26,7 @@ struct SpaceDesc { 3: i32 replica_factor = 0, 4: binary charset_name, 5: binary collate_name, - 6: ColumnTypeDef vid_type = {"type": PropertyType.FIXED_STRING, "type_length": 8}, + 6: ColumnTypeDef vid_type = {"type": "PropertyType.FIXED_STRING", "type_length": 8}, 7: optional binary group_name, 8: optional IsolationLevel isolation_level, 9: optional binary comment, @@ -78,4 +78,4 @@ struct ColumnTypeDef { enum IsolationLevel { DEFAULT = 0x00, // allow add half edge(either in or out edge succeeded) TOSS = 0x01, // add in and out edge atomic -} (cpp.enum_strict) \ No newline at end of file +} (cpp.enum_strict) diff --git a/src/mock/MockCluster.cpp b/src/mock/MockCluster.cpp index 0eb5407fa6f..c9d8fbaae62 100644 --- a/src/mock/MockCluster.cpp +++ b/src/mock/MockCluster.cpp @@ -213,6 +213,7 @@ void MockCluster::initStorageKV(const char* dataPath, txnMan_ = std::make_unique(storageEnv_.get()); storageEnv_->txnMan_ = txnMan_.get(); + txnMan_->start(); } void MockCluster::startStorage(HostAddr addr, diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 6a5ade90293..b1227f4bcd8 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -75,14 +75,13 @@ nebula_add_library( transaction/ConsistUtil.cpp transaction/ChainUpdateEdgeLocalProcessor.cpp transaction/ChainUpdateEdgeRemoteProcessor.cpp - transaction/ChainResumeProcessor.cpp transaction/ChainAddEdgesGroupProcessor.cpp transaction/ChainAddEdgesLocalProcessor.cpp transaction/ChainAddEdgesRemoteProcessor.cpp - transaction/ResumeAddEdgeProcessor.cpp - transaction/ResumeAddEdgeRemoteProcessor.cpp - transaction/ResumeUpdateProcessor.cpp - transaction/ResumeUpdateRemoteProcessor.cpp + transaction/ChainResumeAddPrimeProcessor.cpp + transaction/ChainResumeAddDoublePrimeProcessor.cpp + transaction/ChainResumeUpdatePrimeProcessor.cpp + transaction/ChainResumeUpdateDoublePrimeProcessor.cpp transaction/ChainProcessorFactory.cpp transaction/ChainDeleteEdgesGroupProcessor.cpp transaction/ChainDeleteEdgesLocalProcessor.cpp diff --git a/src/storage/InternalStorageServiceHandler.h b/src/storage/InternalStorageServiceHandler.h index 01407c3b204..10bb052ed05 100644 --- a/src/storage/InternalStorageServiceHandler.h +++ b/src/storage/InternalStorageServiceHandler.h @@ -22,13 +22,14 @@ class InternalStorageServiceHandler final : public cpp2::InternalStorageServiceS public: explicit InternalStorageServiceHandler(StorageEnv* env); - folly::Future future_chainAddEdges(const cpp2::ChainAddEdgesRequest& p_req); + folly::Future future_chainAddEdges( + const cpp2::ChainAddEdgesRequest& p_req) override; folly::Future future_chainUpdateEdge( - const cpp2::ChainUpdateEdgeRequest& p_req); + const cpp2::ChainUpdateEdgeRequest& p_req) override; folly::Future future_chainDeleteEdges( - const cpp2::ChainDeleteEdgesRequest& p_req); + const cpp2::ChainDeleteEdgesRequest& p_req) override; private: StorageEnv* env_{nullptr}; diff --git a/src/storage/StorageServer.cpp b/src/storage/StorageServer.cpp index eec3e5fff4a..b12632b4111 100644 --- a/src/storage/StorageServer.cpp +++ b/src/storage/StorageServer.cpp @@ -397,6 +397,7 @@ void StorageServer::stop() { if (txnMan_) { txnMan_->stop(); + txnMan_->join(); } if (taskMgr_) { taskMgr_->shutdown(); diff --git a/src/storage/index/LookupProcessor.h b/src/storage/index/LookupProcessor.h index 00d6f8f55fa..05012f5f560 100644 --- a/src/storage/index/LookupProcessor.h +++ b/src/storage/index/LookupProcessor.h @@ -43,6 +43,9 @@ class LookupProcessor : public BaseProcessor { folly::Executor* executor_{nullptr}; std::unique_ptr planContext_; std::unique_ptr context_; + /** + * @brief the final output + */ nebula::DataSet resultDataSet_; nebula::DataSet statsDataSet_; std::vector partResults_; diff --git a/src/storage/kv/GetProcessor.h b/src/storage/kv/GetProcessor.h index 7caa28d237e..a4fa6907223 100644 --- a/src/storage/kv/GetProcessor.h +++ b/src/storage/kv/GetProcessor.h @@ -14,6 +14,9 @@ namespace storage { extern ProcessorCounters kGetCounters; +/** + * @brief this is a simple get() interface when storage run in KV mode. + */ class GetProcessor : public BaseProcessor { public: static GetProcessor* instance(StorageEnv* env, diff --git a/src/storage/kv/PutProcessor.h b/src/storage/kv/PutProcessor.h index 101cc183097..7888abd64dd 100644 --- a/src/storage/kv/PutProcessor.h +++ b/src/storage/kv/PutProcessor.h @@ -13,7 +13,9 @@ namespace nebula { namespace storage { extern ProcessorCounters kPutCounters; - +/** + * @brief this is a simple put() interface when storage run in KV mode. + */ class PutProcessor : public BaseProcessor { public: static PutProcessor* instance(StorageEnv* env, diff --git a/src/storage/kv/RemoveProcessor.h b/src/storage/kv/RemoveProcessor.h index 59bab864e87..11dfc5febe2 100644 --- a/src/storage/kv/RemoveProcessor.h +++ b/src/storage/kv/RemoveProcessor.h @@ -14,6 +14,9 @@ namespace storage { extern ProcessorCounters kRemoveCounters; +/** + * @brief this is a simple remove() interface when storage run in KV mode. + */ class RemoveProcessor : public BaseProcessor { public: static RemoveProcessor* instance(StorageEnv* env, diff --git a/src/storage/test/CMakeLists.txt b/src/storage/test/CMakeLists.txt index b880fd232dd..855ca4295b4 100644 --- a/src/storage/test/CMakeLists.txt +++ b/src/storage/test/CMakeLists.txt @@ -753,21 +753,6 @@ nebula_add_executable( gtest ) -nebula_add_executable( - NAME - chain_resume_edge_test - SOURCES - ChainResumeEdgeTest.cpp - OBJECTS - ${storage_test_deps} - LIBRARIES - ${ROCKSDB_LIBRARIES} - ${THRIFT_LIBRARIES} - ${PROXYGEN_LIBRARIES} - wangle - gtest -) - nebula_add_executable( NAME storage_index_write_bm diff --git a/src/storage/test/ChainAddEdgesTest.cpp b/src/storage/test/ChainAddEdgesTest.cpp index 3881e0cc671..a8d0d7cbb26 100644 --- a/src/storage/test/ChainAddEdgesTest.cpp +++ b/src/storage/test/ChainAddEdgesTest.cpp @@ -27,6 +27,7 @@ namespace storage { constexpr int32_t mockSpaceId = 1; constexpr int32_t mockPartNum = 1; constexpr int32_t fackTerm = 1; +constexpr auto suc = nebula::cpp2::ErrorCode::SUCCEEDED; // make sure test class works well TEST(ChainAddEdgesTest, TestUtilsTest) { @@ -38,23 +39,23 @@ TEST(ChainAddEdgesTest, TestUtilsTest) { env->metaClient_ = mClient.get(); MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); - auto* processor = new FakeChainAddEdgesLocalProcessor(env); + auto* proc = new FakeChainAddEdgesLocalProcessor(env); - processor->rcPrepareLocal = nebula::cpp2::ErrorCode::SUCCEEDED; - processor->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; - processor->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + proc->setPrepareCode(suc); + proc->setRemoteCode(suc); + proc->setCommitCode(suc); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); LOG(INFO) << "Test AddEdgesProcessor..."; - auto fut = processor->getFuture(); - processor->process(req); + auto fut = proc->getFuture(); + proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(0, resp.result.failed_parts.size()); LOG(INFO) << "Check data in kv store..."; // The number of data in serve is 334 + EXPECT_EQ(0, resp.result.failed_parts.size()); checkAddEdgesData(req, env, 0, 0); } @@ -68,7 +69,7 @@ TEST(ChainAddEdgesTest, prepareLocalSucceedTest) { MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_RPC_FAILURE); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -127,7 +128,7 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_OUTDATED_TERM; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_OUTDATED_TERM); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -136,7 +137,7 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(1, resp.result.failed_parts.size()); + EXPECT_EQ(0, resp.result.failed_parts.size()); ChainTestUtils util; // none of really edge key should be inserted @@ -144,8 +145,6 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { // prime key should be deleted EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - // env->txnMan_->stop(); } TEST(ChainAddEdgesTest, processRemoteUnknownTest) { @@ -159,7 +158,7 @@ TEST(ChainAddEdgesTest, processRemoteUnknownTest) { auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_RPC_FAILURE); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -168,39 +167,22 @@ TEST(ChainAddEdgesTest, processRemoteUnknownTest) { auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(0, resp.result.failed_parts.size()); ChainTestUtils util; // none of really edge key should be inserted + EXPECT_EQ(0, resp.result.failed_parts.size()); EXPECT_EQ(334, numOfKey(req, util.genKey, env)); // prime key should be deleted EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); } -// make a reversed request, make sure it can be added successfully -TEST(ChainAddEdgesTest, processRemoteTest) { - fs::TempDir rootPath("/tmp/AddEdgesTest.XXXXXX"); - mock::MockCluster cluster; - cluster.initStorageKV(rootPath.path()); - auto* env = cluster.storageEnv_.get(); - auto mClient = MetaClientTestUpdater::makeDefault(); - - env->metaClient_ = mClient.get(); - MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); - - auto* proc = new FakeChainAddEdgesLocalProcessor(env); - LOG(INFO) << "Build AddEdgesRequest..."; - cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); - - auto reversedRequest = proc->reverseRequestForward(req); - delete proc; -} - } // namespace storage } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); diff --git a/src/storage/test/ChainDeleteEdgesTest.cpp b/src/storage/test/ChainDeleteEdgesTest.cpp index 932c895210d..ee4316d380d 100644 --- a/src/storage/test/ChainDeleteEdgesTest.cpp +++ b/src/storage/test/ChainDeleteEdgesTest.cpp @@ -222,6 +222,9 @@ TEST(ChainDeleteEdgesTest, DISABLED_Test5) { delProc->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; delProc->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -231,16 +234,13 @@ TEST(ChainDeleteEdgesTest, DISABLED_Test5) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 167); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - // std::this_thread::sleep_for(std::chrono::milliseconds()); + for (PartitionID i = 1; i <= partNum; ++i) { + env->txnMan_->scanPrimes(mockSpaceId, i, 1); + } + env->txnMan_->stop(); + env->txnMan_->join(); num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - - delete iClient; } // add some edges, then delete all of them, not execute local commit @@ -277,6 +277,9 @@ TEST(ChainDeleteEdgesTest, Test6) { delProc->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; delProc->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -286,16 +289,18 @@ TEST(ChainDeleteEdgesTest, Test6) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 167); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + for (PartitionID i = 1; i <= partNum; ++i) { + env->txnMan_->scanPrimes(mockSpaceId, i); + } + // ChainResumeProcessor resumeProc(env); + // resumeProc.process(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + sleep(1); + env->txnMan_->stop(); + env->txnMan_->join(); + num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } // add some edges, delete one of them, rpc failure @@ -332,6 +337,9 @@ TEST(ChainDeleteEdgesTest, Test7) { auto delReq = delProc->makeDelRequest(addReq, limit); delProc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -341,20 +349,16 @@ TEST(ChainDeleteEdgesTest, Test7) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 166); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); + LOG(INFO) << "after recover()"; + num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 166); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } -// add some edges, then one all of them, rpc failure +// add some edges, delete all, rpc failure TEST(ChainDeleteEdgesTest, Test8) { fs::TempDir rootPath("/tmp/DeleteEdgesTest.XXXXXX"); mock::MockCluster cluster; @@ -397,16 +401,18 @@ TEST(ChainDeleteEdgesTest, Test8) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 0); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + // for (PartitionID i = 1; i <= partNum; ++i) { + // env->txnMan_->scanPrimes(mockSpaceId, i); + // } + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + // ChainResumeProcessor resumeProc(env); + // resumeProc.process(); + + env->txnMan_->stop(); + env->txnMan_->join(); num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } } // namespace storage @@ -414,6 +420,7 @@ TEST(ChainDeleteEdgesTest, Test8) { int main(int argc, char** argv) { FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); diff --git a/src/storage/test/ChainResumeEdgeTest.cpp b/src/storage/test/ChainResumeEdgeTest.cpp index 9c985a8462d..716b8263672 100644 --- a/src/storage/test/ChainResumeEdgeTest.cpp +++ b/src/storage/test/ChainResumeEdgeTest.cpp @@ -20,7 +20,6 @@ #include "storage/test/TestUtils.h" #include "storage/transaction/ChainAddEdgesGroupProcessor.h" #include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainResumeProcessor.h" #include "storage/transaction/ConsistUtil.h" namespace nebula { @@ -58,6 +57,9 @@ TEST(ChainResumeEdgesTest, resumeTest1) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, mockPartNum); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); @@ -70,16 +72,12 @@ TEST(ChainResumeEdgesTest, resumeTest1) { env->txnMan_->scanPrimes(1, i); } - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genDoublePrime, env)); - - delete iClient; } /** @@ -107,6 +105,9 @@ TEST(ChainResumeEdgesTest, resumeTest2) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, mockPartNum); + UPCLT iClient(FakeInternalStorageClient::instance(env, Code::E_UNKNOWN)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); @@ -118,16 +119,16 @@ TEST(ChainResumeEdgesTest, resumeTest2) { EXPECT_EQ(334, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + for (int32_t i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } + + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(0, numOfKey(req, util.genKey, env)); EXPECT_EQ(334, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -161,22 +162,21 @@ TEST(ChainResumeEdgesTest, resumeTest3) { EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); + + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); // none of really edge key should be inserted EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -204,28 +204,26 @@ TEST(ChainResumeEdgesTest, resumeTest4) { int partNum = 1; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, partNum); + auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); EXPECT_EQ(0, resp.result.failed_parts.size()); - // ChainTestUtils util; EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(334, numOfKey(req, gTestUtil.genDoublePrime, env)); - auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(334, numOfKey(req, gTestUtil.genDoublePrime, env)); - - delete iClient; } /** @@ -243,6 +241,10 @@ TEST(ChainResumeEdgesTest, resumeTest5) { proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -252,22 +254,17 @@ TEST(ChainResumeEdgesTest, resumeTest5) { auto resp = std::move(fut).get(); EXPECT_EQ(0, resp.result.failed_parts.size()); + env->txnMan_->stop(); + env->txnMan_->join(); + ChainTestUtils util; EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -288,6 +285,9 @@ TEST(ChainResumeEdgesTest, resumeTest6) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); @@ -299,21 +299,16 @@ TEST(ChainResumeEdgesTest, resumeTest6) { EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } // resume an update left prime, check resume succeeded @@ -342,27 +337,24 @@ TEST(ChainUpdateEdgeTest, resumeTest7) { LOG(INFO) << "addUnfinishedEdge()"; proc->wrapAddUnfinishedEdge(ResumeType::RESUME_CHAIN); auto resp = std::move(f).get(); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); EXPECT_FALSE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume failed @@ -389,23 +381,25 @@ TEST(ChainUpdateEdgeTest, resumeTest8) { proc->process(req); auto resp = std::move(f).get(); - // EXPECT_TRUE(helper.checkResp(req, resp)); + auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + + for (auto i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } + + env->txnMan_->stop(); + env->txnMan_->join(); + EXPECT_FALSE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume outdated @@ -433,23 +427,20 @@ TEST(ChainUpdateEdgeTest, resumeTest9) { proc->wrapAddUnfinishedEdge(ResumeType::RESUME_CHAIN); auto resp = std::move(f).get(); - // EXPECT_TRUE(helper.checkResp(req, resp)); - EXPECT_FALSE(helper.checkRequestUpdated(env, req)); - EXPECT_TRUE(helper.edgeExist(env, req)); - EXPECT_TRUE(helper.primeExist(env, req)); - EXPECT_FALSE(helper.doublePrimeExist(env, req)); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + + for (auto i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_RPC_FAILURE); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, check resume succeeded @@ -461,18 +452,19 @@ TEST(ChainUpdateEdgeTest, resumeTest10) { auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); - // auto parts = cluster.getTotalParts(); auto parts = mockPartNum; EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -481,16 +473,12 @@ TEST(ChainUpdateEdgeTest, resumeTest10) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume failed @@ -509,11 +497,14 @@ TEST(ChainUpdateEdgeTest, resumeTest11) { LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -522,17 +513,12 @@ TEST(ChainUpdateEdgeTest, resumeTest11) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume outdated @@ -551,11 +537,14 @@ TEST(ChainUpdateEdgeTest, resumeTest12) { LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -564,22 +553,19 @@ TEST(ChainUpdateEdgeTest, resumeTest12) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_RPC_FAILURE); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } } // namespace storage } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); diff --git a/src/storage/test/ChainTestUtils.h b/src/storage/test/ChainTestUtils.h index 0fd04ca00ee..05bc100debd 100644 --- a/src/storage/test/ChainTestUtils.h +++ b/src/storage/test/ChainTestUtils.h @@ -6,7 +6,7 @@ #pragma once #include "storage/CommonUtils.h" -#include "storage/transaction/ChainResumeProcessor.h" +#include "storage/transaction/ChainAddEdgesLocalProcessor.h" #include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" #include "storage/transaction/ChainUpdateEdgeRemoteProcessor.h" @@ -19,6 +19,27 @@ extern const int32_t mockSpaceVidLen; using KeyGenerator = std::function; +class TransactionManagerTester { + public: + explicit TransactionManagerTester(TransactionManager* p) : man_(p) {} + + void stop() { + man_->stop(); + int32_t numCheckIdle = 0; + while (numCheckIdle < 3) { + auto stats = man_->exec_->getPoolStats(); + if (stats.threadCount == stats.idleThreadCount) { + ++numCheckIdle; + } else { + numCheckIdle = 0; + } + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + } + + TransactionManager* man_{nullptr}; +}; + class ChainTestUtils { public: ChainTestUtils() { @@ -125,6 +146,8 @@ class FakeChainAddEdgesLocalProcessor : public ChainAddEdgesLocalProcessor { public: explicit FakeChainAddEdgesLocalProcessor(StorageEnv* env) : ChainAddEdgesLocalProcessor(env) { spaceVidLen_ = 32; + rcRemote_ = Code::SUCCEEDED; + rcCommit_ = Code::SUCCEEDED; } folly::SemiFuture prepareLocal() override { @@ -168,6 +191,28 @@ class FakeChainAddEdgesLocalProcessor : public ChainAddEdgesLocalProcessor { folly::Optional rcProcessRemote; folly::Optional rcProcessLocal; + + void setPrepareCode(Code code, Code rc = Code::SUCCEEDED) { + rcPrepareLocal = code; + rcPrepare_ = rc; + } + + void setRemoteCode(Code code) { + rcProcessRemote = code; + rcRemote_ = code; + } + + void setCommitCode(Code code, Code rc = Code::SUCCEEDED) { + rcProcessLocal = code; + rcCommit_ = rc; + } + + void finish() override { + auto rc = (rcPrepare_ == Code::SUCCEEDED) ? rcCommit_ : rcPrepare_; + pushResultCode(rc, localPartId_); + finished_.setValue(rc); + onFinished(); + } }; class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { @@ -187,7 +232,7 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } folly::SemiFuture processRemote(Code code) override { - LOG(INFO) << "FakeChainUpdateEdgeProcessorA::" << __func__ << "()"; + LOG(INFO) << "FakeChainUpdateEdgeProcessor::" << __func__ << "()"; if (rcProcessRemote) { LOG(INFO) << "processRemote() fake return " << apache::thrift::util::enumNameSafe(*rcProcessRemote); @@ -199,7 +244,7 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } folly::SemiFuture processLocal(Code code) override { - LOG(INFO) << "FakeChainUpdateEdgeProcessorA::" << __func__ << "()"; + LOG(INFO) << "FakeChainUpdateEdgeProcessor::" << __func__ << "()"; if (rcProcessLocal) { LOG(INFO) << "processLocal() fake return " << apache::thrift::util::enumNameSafe(*rcProcessLocal); @@ -210,13 +255,61 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } void wrapAddUnfinishedEdge(ResumeType type) { - addUnfinishedEdge(type); + reportFailed(type); + } + void setPrepareCode(Code code, Code rc = Code::SUCCEEDED) { + rcPrepareLocal = code; + rcPrepare_ = rc; + } + + void setRemoteCode(Code code) { + rcProcessRemote = code; + rcRemote_ = code; + } + + void setCommitCode(Code code, Code rc = Code::SUCCEEDED) { + rcProcessLocal = code; + rcCommit_ = rc; + } + + void setDoRecover(bool doRecover) { + doRecover_ = doRecover; + } + + void finish() override { + if (doRecover_) { + LOG(INFO) << "do real finish()"; + ChainUpdateEdgeLocalProcessor::finish(); + } else { + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + + pushResultCode(rc, localPartId_); + finished_.setValue(rc); + onFinished(); + } } public: folly::Optional rcPrepareLocal; folly::Optional rcProcessRemote; folly::Optional rcProcessLocal; + bool doRecover_{false}; }; class MetaClientTestUpdater { @@ -255,17 +348,20 @@ class MetaClientTestUpdater { meta::MetaClientOptions options; auto mClient = std::make_unique(exec, addrs, options); - mClient->localCache_[mockSpaceId] = std::make_shared(); + auto spSpaceInfoCache = std::make_shared(); + addLocalCache(*mClient, mockSpaceId, spSpaceInfoCache); + auto* pCache = getLocalCache(mClient.get(), mockSpaceId); + for (int i = 0; i != mockPartNum; ++i) { - mClient->localCache_[mockSpaceId]->termOfPartition_[i] = i; - auto ignoreItem = mClient->localCache_[mockSpaceId]->partsAlloc_[i]; + pCache->termOfPartition_[i] = i; + auto ignoreItem = pCache->partsAlloc_[i]; UNUSED(ignoreItem); } meta::cpp2::ColumnTypeDef type; type.type_ref() = nebula::cpp2::PropertyType::FIXED_STRING; type.type_length_ref() = 32; - mClient->localCache_[mockSpaceId]->spaceDesc_.vid_type_ref() = std::move(type); + pCache->spaceDesc_.vid_type_ref() = std::move(type); mClient->ready_ = true; return mClient; } @@ -328,12 +424,10 @@ class FakeInternalStorageClient : public InternalStorageClient { static FakeInternalStorageClient* instance(StorageEnv* env, Code fakeCode = Code::SUCCEEDED) { auto pool = std::make_shared(3); return new FakeInternalStorageClient(env, pool, fakeCode); - // static FakeInternalStorageClient client(env, pool, fakeCode); - // return &client; } static void hookInternalStorageClient(StorageEnv* env, InternalStorageClient* client) { - env->txnMan_->iClient_ = client; + env->interClient_ = client; } private: @@ -341,6 +435,8 @@ class FakeInternalStorageClient : public InternalStorageClient { Code code_{Code::SUCCEEDED}; }; +using UPCLT = std::unique_ptr; + struct ChainUpdateEdgeTestHelper { ChainUpdateEdgeTestHelper() { sEdgeType = std::to_string(std::abs(edgeType_)); @@ -425,8 +521,8 @@ struct ChainUpdateEdgeTestHelper { return req; } - bool checkResp2(cpp2::UpdateResponse& resp) { - LOG(INFO) << "checkResp2(cpp2::UpdateResponse& resp)"; + bool checkResp(cpp2::UpdateResponse& resp) { + LOG(INFO) << "checkResp(cpp2::UpdateResponse& resp)"; if (!resp.props_ref()) { LOG(INFO) << "!resp.props_ref()"; return false; @@ -506,7 +602,6 @@ struct ChainUpdateEdgeTestHelper { auto val1 = cexpr->value(); auto val2 = edgeReader->getValueByName(prop.get_name()); - // EXPECT_EQ(val1, val2); if (val1 != val2) { ret = false; } @@ -524,25 +619,5 @@ struct ChainUpdateEdgeTestHelper { std::string sEdgeType; }; -// class ChainResumeProcessorTestHelper { -// public: -// explicit ChainResumeProcessorTestHelper(ChainResumeProcessor* proc) : proc_(proc) {} - -// void setAddEdgeProc(ChainAddEdgesLocalProcessor* proc) { -// proc_->addProc = proc; -// } - -// // setUpdProc -// void setUpdProc(ChainUpdateEdgeLocalProcessor* proc) { -// proc_->updProc = proc; -// } - -// std::string getTxnId() { -// return proc_->addProc->txnId_; -// } -// public: -// ChainResumeProcessor* proc_{nullptr}; -// }; - } // namespace storage } // namespace nebula diff --git a/src/storage/test/ChainUpdateEdgeTest.cpp b/src/storage/test/ChainUpdateEdgeTest.cpp index 6249dac0bdf..ec8e219f883 100644 --- a/src/storage/test/ChainUpdateEdgeTest.cpp +++ b/src/storage/test/ChainUpdateEdgeTest.cpp @@ -21,22 +21,26 @@ #include "storage/test/TestUtils.h" #include "storage/transaction/ChainAddEdgesGroupProcessor.h" #include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainResumeProcessor.h" #include "storage/transaction/ChainUpdateEdgeRemoteProcessor.h" #include "storage/transaction/ConsistUtil.h" namespace nebula { namespace storage { -// using Code = ::nebula::cpp2::ErrorCode; - constexpr int32_t mockSpaceId = 1; constexpr int32_t mockPartNum = 6; +constexpr int32_t fackTerm = 1; constexpr int32_t mockSpaceVidLen = 32; ChainTestUtils gTestUtil; - ChainUpdateEdgeTestHelper helper; + +/** + * @brief do a normal update will succeeded + * 1. prepare environment + * 2. do an normal update (with out any error) + * 3. check edge request updated + */ TEST(ChainUpdateEdgeTest, updateTest1) { fs::TempDir rootPath("/tmp/UpdateEdgeTest.XXXXXX"); mock::MockCluster cluster; @@ -44,23 +48,28 @@ TEST(ChainUpdateEdgeTest, updateTest1) { auto* env = cluster.storageEnv_.get(); auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); + MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); + auto stPartsNum = env->metaClient_->partsNum(mockSpaceId); + if (stPartsNum.ok()) { + LOG(INFO) << "stPartsNum.value()=" << stPartsNum.value(); + } auto parts = cluster.getTotalParts(); + LOG(INFO) << "parts: " << parts; EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); LOG(INFO) << "Test updateTest1..."; auto req = helper.makeDefaultRequest(); - env->txnMan_->iClient_ = FakeInternalStorageClient::instance(env); + env->interClient_ = FakeInternalStorageClient::instance(env); auto reversedRequest = helper.reverseRequest(env, req); auto* proc = new FakeChainUpdateProcessor(env); - LOG(INFO) << "proc: " << proc; auto f = proc->getFuture(); proc->process(req); auto resp = std::move(f).get(); - EXPECT_TRUE(helper.checkResp2(resp)); + EXPECT_TRUE(helper.checkResp(resp)); EXPECT_TRUE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.checkRequestUpdated(env, reversedRequest)); EXPECT_TRUE(helper.edgeExist(env, req)); @@ -68,6 +77,16 @@ TEST(ChainUpdateEdgeTest, updateTest1) { EXPECT_FALSE(helper.doublePrimeExist(env, req)); } +/** + * @brief updateTest2 (update non-exist edge will fail) + * 1. prepare environment + * 2. do a failed update + * 3. check result + * 3.1 edge not updated + * 3.2 prime not exist + * 3.3 double prime not exist + */ + TEST(ChainUpdateEdgeTest, updateTest2) { fs::TempDir rootPath("/tmp/UpdateEdgeTest.XXXXXX"); mock::MockCluster cluster; @@ -75,6 +94,7 @@ TEST(ChainUpdateEdgeTest, updateTest2) { auto* env = cluster.storageEnv_.get(); auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); + MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto parts = cluster.getTotalParts(); EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); @@ -89,12 +109,12 @@ TEST(ChainUpdateEdgeTest, updateTest2) { auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::E_KEY_NOT_FOUND; + proc->setRemoteCode(Code::E_KEY_NOT_FOUND); proc->process(badRequest); auto resp = std::move(f).get(); EXPECT_EQ(1, (*resp.result_ref()).failed_parts.size()); - EXPECT_FALSE(helper.checkResp2(resp)); + EXPECT_FALSE(helper.checkResp(resp)); EXPECT_FALSE(helper.edgeExist(env, badRequest)); EXPECT_FALSE(helper.primeExist(env, badRequest)); EXPECT_FALSE(helper.doublePrimeExist(env, badRequest)); @@ -119,8 +139,9 @@ TEST(ChainUpdateEdgeTest, updateTest3) { auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::SUCCEEDED; - proc->rcProcessLocal = Code::SUCCEEDED; + proc->setRemoteCode(Code::SUCCEEDED); + proc->setCommitCode(Code::SUCCEEDED); + proc->process(goodRequest); auto resp = std::move(f).get(); @@ -146,9 +167,12 @@ TEST(ChainUpdateEdgeTest, updateTest4) { EXPECT_FALSE(helper.primeExist(env, goodRequest)); EXPECT_FALSE(helper.doublePrimeExist(env, goodRequest)); + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::E_RPC_FAILURE; + proc->setRemoteCode(Code::E_RPC_FAILURE); proc->process(goodRequest); auto resp = std::move(f).get(); @@ -161,6 +185,8 @@ TEST(ChainUpdateEdgeTest, updateTest4) { } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); @@ -168,30 +194,6 @@ int main(int argc, char** argv) { } // ***** Test Plan ***** -/** - * @brief updateTest1 (update a normal edge will succeed) - * previous update - * prepareLocal succeed succeed - * processRemote succeed succeed - * processLocal succeed succeed - * expect: edge true - * edge prime false - * double prime false - * prop changed true - */ - -/** - * @brief updateTest2 (update non-exist edge will fail) - * previous update - * prepareLocal failed succeed - * processRemote skip succeed - * processLocal failed succeed - * expect: edge false - * edge prime false - * double prime false - * prop changed true - */ - /** * @brief updateTest3 (remote update failed will not change anything) * previous update diff --git a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp index 1e58236c370..c1a8ac0b9f7 100644 --- a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp +++ b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp @@ -22,61 +22,42 @@ void ChainAddEdgesLocalProcessor::process(const cpp2::AddEdgesRequest& req) { finish(); return; } + + uuid_ = ConsistUtil::strUUID(); + execDesc_ = ", AddEdges, "; env_->txnMan_->addChainTask(this); } -/** - * @brief - * 1. check term - * 2. set mem lock - * 3. write edge prime(key = edge prime, val = ) - */ folly::SemiFuture ChainAddEdgesLocalProcessor::prepareLocal() { - if (FLAGS_trace_toss) { - uuid_ = ConsistUtil::strUUID(); - readableEdgeDesc_ = makeReadableEdge(req_); - if (!readableEdgeDesc_.empty()) { - uuid_.append(" ").append(readableEdgeDesc_); - } + VLOG(2) << uuid_ << __func__ << "()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (rcPrepare_ != Code::SUCCEEDED) { + finish(); + return rcPrepare_; } if (!lockEdges(req_)) { + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; return Code::E_WRITE_WRITE_CONFLICT; } + replaceNullWithDefaultValue(req_); auto [pro, fut] = folly::makePromiseContract(); auto primes = makePrime(); - std::vector debugPrimes; - if (FLAGS_trace_toss) { - debugPrimes = primes; - } erasePrime(); env_->kvstore_->asyncMultiPut( - spaceId_, - localPartId_, - std::move(primes), - [p = std::move(pro), debugPrimes, this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - primeInserted_ = true; - if (FLAGS_trace_toss) { - for (auto& kv : debugPrimes) { - VLOG(1) << uuid_ << " put prime " << folly::hexlify(kv.first); - } - } - } else { - LOG(WARNING) << uuid_ << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } - + spaceId_, localPartId_, std::move(primes), [p = std::move(pro), this](auto rc) mutable { + rcPrepare_ = rc; p.setValue(rc); }); return std::move(fut); } folly::SemiFuture ChainAddEdgesLocalProcessor::processRemote(Code code) { - VLOG(1) << uuid_ << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED) { - return code; + VLOG(2) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } CHECK_EQ(req_.get_parts().size(), 1); auto reversedRequest = reverseRequest(req_); @@ -86,52 +67,39 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::processRemote(Code code) { return std::move(fut); } -folly::SemiFuture ChainAddEdgesLocalProcessor::processLocal(Code code) { - if (FLAGS_trace_toss) { - VLOG(1) << uuid_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); - } - - bool remoteFailed{true}; - - if (code == Code::SUCCEEDED) { - // do nothing - remoteFailed = false; - } else if (code == Code::E_RPC_FAILURE) { - code_ = Code::SUCCEEDED; - remoteFailed = false; - } else { - code_ = code; +folly::SemiFuture ChainAddEdgesLocalProcessor::processLocal(Code) { + VLOG(2) << uuid_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(rcRemote_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { kvAppend_ = makeDoublePrime(); - addUnfinishedEdge(ResumeType::RESUME_REMOTE); } - if (code_ == Code::SUCCEEDED) { - return forwardToDelegateProcessor(); - } else { - if (primeInserted_ && remoteFailed) { - return abort(); - } + if (rcRemote_ != Code::SUCCEEDED && rcRemote_ != Code::E_RPC_FAILURE) { + // prepare succeed and remote failed + return abort(); } - return code_; + return commit(); } -void ChainAddEdgesLocalProcessor::addUnfinishedEdge(ResumeType type) { +void ChainAddEdgesLocalProcessor::reportFailed(ResumeType type) { if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } + execDesc_ += ", reportFailed"; auto keys = toStrKeys(req_); for (auto& key : keys) { - env_->txnMan_->addPrime(spaceId_, key, type); + VLOG(1) << uuid_ << " term=" << term_ << ", reportFailed(), " << folly::hexlify(key); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, key, type); } } @@ -139,34 +107,11 @@ bool ChainAddEdgesLocalProcessor::prepareRequest(const cpp2::AddEdgesRequest& re CHECK_EQ(req.get_parts().size(), 1); req_ = req; spaceId_ = req_.get_space_id(); - auto vidType = env_->metaClient_->getSpaceVidType(spaceId_); - if (!vidType.ok()) { - LOG(WARNING) << "can't get vidType"; - return false; - } else { - spaceVidType_ = vidType.value(); - } localPartId_ = req.get_parts().begin()->first; - replaceNullWithDefaultValue(req_); - - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { - LOG(INFO) << "get term failed"; - return false; - } - - auto vidLen = env_->schemaMan_->getSpaceVidLen(spaceId_); - if (!vidLen.ok()) { - LOG(ERROR) << "getSpaceVidLen failed, spaceId_: " << spaceId_ - << ", status: " << vidLen.status(); - setErrorCode(Code::E_INVALID_SPACEVIDLEN); - return false; - } - spaceVidLen_ = vidLen.value(); - return true; + return getSpaceVidLen(spaceId_) == Code::SUCCEEDED; } -folly::SemiFuture ChainAddEdgesLocalProcessor::forwardToDelegateProcessor() { +folly::SemiFuture ChainAddEdgesLocalProcessor::commit() { auto* proc = AddEdgesProcessor::instance(env_, nullptr); proc->consistOp_ = [&](kvstore::BatchHolder& a, std::vector* b) { callbackOfChainOp(a, b); @@ -174,26 +119,15 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::forwardToDelegateProcessor( auto futProc = proc->getFuture(); auto [pro, fut] = folly::makePromiseContract(); std::move(futProc).thenTry([&, p = std::move(pro)](auto&& t) mutable { - auto rc = Code::SUCCEEDED; + execDesc_ += ", commit(), "; if (t.hasException()) { LOG(INFO) << "catch ex: " << t.exception().what(); - rc = Code::E_UNKNOWN; + rcCommit_ = Code::E_UNKNOWN; } else { auto& resp = t.value(); - rc = extractRpcError(resp); - if (rc == Code::SUCCEEDED) { - if (FLAGS_trace_toss) { - for (auto& k : kvErased_) { - VLOG(1) << uuid_ << " erase prime " << folly::hexlify(k); - } - } - } else { - VLOG(1) << uuid_ << " forwardToDelegateProcessor(), code = " - << apache::thrift::util::enumNameSafe(rc); - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } + rcCommit_ = extractRpcError(resp); } - p.setValue(rc); + p.setValue(rcCommit_); }); proc->process(req_); return std::move(fut); @@ -215,22 +149,22 @@ void ChainAddEdgesLocalProcessor::doRpc(folly::Promise&& promise, promise.setValue(Code::E_LEADER_CHANGED); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto f = p.getFuture(); iClient->chainAddEdges(req, term_, edgeVer_, std::move(p)); std::move(f).thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), std::move(req), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }); } @@ -260,23 +194,14 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::abort() { } auto [pro, fut] = folly::makePromiseContract(); - env_->kvstore_->asyncMultiRemove( - req_.get_space_id(), - localPartId_, - std::move(kvErased_), - [p = std::move(pro), debugErased, this](auto rc) mutable { - VLOG(1) << uuid_ << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - if (FLAGS_trace_toss) { - for (auto& k : debugErased) { - VLOG(1) << uuid_ << "erase prime " << folly::hexlify(k); - } - } - } else { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - p.setValue(rc); - }); + env_->kvstore_->asyncMultiRemove(req_.get_space_id(), + localPartId_, + std::move(kvErased_), + [p = std::move(pro), debugErased, this](auto rc) mutable { + execDesc_ += ", abort(), "; + this->rcCommit_ = rc; + p.setValue(rc); + }); return std::move(fut); } @@ -322,8 +247,8 @@ void ChainAddEdgesLocalProcessor::erasePrime() { bool ChainAddEdgesLocalProcessor::lockEdges(const cpp2::AddEdgesRequest& req) { auto partId = req.get_parts().begin()->first; - auto* lockCore = env_->txnMan_->getLockCore(req.get_space_id(), partId); - if (!lockCore) { + lkCore_ = env_->txnMan_->getLockCore(req.get_space_id(), partId, term_); + if (!lkCore_) { return false; } @@ -331,7 +256,7 @@ bool ChainAddEdgesLocalProcessor::lockEdges(const cpp2::AddEdgesRequest& req) { for (auto& edge : req.get_parts().begin()->second) { keys.emplace_back(ConsistUtil::edgeKey(spaceVidLen_, partId, edge.get_key())); } - lk_ = std::make_unique(lockCore, keys); + lk_ = std::make_unique(lkCore_.get(), keys); return lk_->isLocked(); } @@ -363,9 +288,50 @@ cpp2::AddEdgesRequest ChainAddEdgesLocalProcessor::reverseRequest( } void ChainAddEdgesLocalProcessor::finish() { - VLOG(1) << uuid_ << " commitLocal(), code_ = " << apache::thrift::util::enumNameSafe(code_); - pushResultCode(code_, localPartId_); - finished_.setValue(code_); + if (rcPrepare_ == Code::SUCCEEDED) { + VLOG(1) << uuid_ << execDesc_ << makeReadableEdge(req_) + << ", rcPrepare_=" << apache::thrift::util::enumNameSafe(rcPrepare_) + << ", rcRemote_=" << apache::thrift::util::enumNameSafe(rcRemote_) + << ", rcCommit_=" << apache::thrift::util::enumNameSafe(rcCommit_); + } + do { + if (rcPrepare_ != Code::SUCCEEDED) { + break; // nothing written, no need to recover. + } + + if (rcCommit_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + // rcCommit_ may be set SUCCEEDED in abort(). + // which we should return the error code or remote. + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + + pushResultCode(rc, localPartId_); + finished_.setValue(rc); onFinished(); } @@ -383,36 +349,15 @@ cpp2::AddEdgesRequest ChainAddEdgesLocalProcessor::makeSingleEdgeRequest( return req; } -int64_t ChainAddEdgesLocalProcessor::toInt(const ::nebula::Value& val) { - if (spaceVidType_ == nebula::cpp2::PropertyType::FIXED_STRING) { - auto str = val.toString(); - if (str.size() < 3) { - return 0; - } - auto str2 = str.substr(1, str.size() - 2); - return atoll(str2.c_str()); - } else if (spaceVidType_ == nebula::cpp2::PropertyType::INT64) { - return *reinterpret_cast(const_cast(val.toString().c_str() + 1)); - } - return 0; -} - std::string ChainAddEdgesLocalProcessor::makeReadableEdge(const cpp2::AddEdgesRequest& req) { - if (req.get_parts().size() != 1) { - LOG(INFO) << req.get_parts().size(); - return ""; - } - if (req.get_parts().begin()->second.size() != 1) { - LOG(INFO) << req.get_parts().begin()->second.size(); - return ""; + std::stringstream oss; + oss << "term=" << term_ << ", "; + auto rawKeyVec = toStrKeys(req); + for (auto& rawKey : rawKeyVec) { + oss << ConsistUtil::readableKey(spaceVidLen_, isIntId_, rawKey) << ", "; } - auto& edge = req.get_parts().begin()->second.back(); - int64_t isrc = toInt(edge.get_key().get_src()); - int64_t idst = toInt(edge.get_key().get_dst()); - - std::stringstream oss; - oss << isrc << "->" << idst << ", val: "; + auto& edge = req.get_parts().begin()->second.back(); for (auto& val : edge.get_props()) { oss << val.toString() << " "; } diff --git a/src/storage/transaction/ChainAddEdgesLocalProcessor.h b/src/storage/transaction/ChainAddEdgesLocalProcessor.h index 06695e29677..d9f397dc91d 100644 --- a/src/storage/transaction/ChainAddEdgesLocalProcessor.h +++ b/src/storage/transaction/ChainAddEdgesLocalProcessor.h @@ -97,13 +97,18 @@ class ChainAddEdgesLocalProcessor : public BaseProcessor, void eraseDoublePrime(); - folly::SemiFuture forwardToDelegateProcessor(); + /** + * @brief will call normal AddEdgesProcess to do real insert. + * + * @return folly::SemiFuture + */ + folly::SemiFuture commit(); /// if any operation failed or can not determined(RPC error) /// call this to leave a record in transaction manager /// the record can be scanned by the background resume thread /// then will do fail over logic - void addUnfinishedEdge(ResumeType type); + void reportFailed(ResumeType type); /*** consider the following case: * @@ -119,36 +124,42 @@ class ChainAddEdgesLocalProcessor : public BaseProcessor, * */ void replaceNullWithDefaultValue(cpp2::AddEdgesRequest& req); - std::string makeReadableEdge(const cpp2::AddEdgesRequest& req); + /** + * @brief check is an error code belongs to kv store + * we can do retry / recover if we meet a kv store error + * but if we meet a logical error (retry will alwasy failed) + * we should return error directly. + * @param code + * @return true + * @return false + */ + bool isKVStoreError(Code code); - int64_t toInt(const ::nebula::Value& val); + std::string makeReadableEdge(const cpp2::AddEdgesRequest& req); protected: GraphSpaceID spaceId_; PartitionID localPartId_; PartitionID remotePartId_; cpp2::AddEdgesRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_{nullptr}; int retryLimit_{10}; - // term at prepareLocal, not allowed to change during execution - TermID term_{-1}; - - // set to true when prime insert succeed - // in processLocal(), we check this to determine if need to do abort() - bool primeInserted_{false}; std::vector kvErased_; std::vector kvAppend_; folly::Optional edgeVer_{folly::none}; - int64_t resumedEdgeVer_{-1}; - // for debug / trace purpose + // for trace purpose std::string uuid_; + // as we print all description in finish(), + // we can log execution clue in this + std::string execDesc_; + // for debug, edge "100"->"101" will print like 2231303022->2231303122 // which is hard to recognize. Transform to human readable format std::string readableEdgeDesc_; - nebula::cpp2::PropertyType spaceVidType_{nebula::cpp2::PropertyType::UNKNOWN}; }; } // namespace storage diff --git a/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp b/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp index 94dfce48417..01f648ff143 100644 --- a/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp +++ b/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp @@ -42,7 +42,7 @@ void ChainAddEdgesRemoteProcessor::process(const cpp2::ChainAddEdgesRequest& req // need to do this after set spaceVidLen_ auto keys = getStrEdgeKeys(req); for (auto& key : keys) { - LOG(INFO) << uuid_ << ", key = " << folly::hexlify(key); + VLOG(2) << uuid_ << ", key = " << folly::hexlify(key); } } commit(req); @@ -61,7 +61,7 @@ void ChainAddEdgesRemoteProcessor::commit(const cpp2::ChainAddEdgesRequest& req) rc = part.code; handleErrorCode(part.code, spaceId, part.get_part_id()); } - VLOG(1) << uuid_ << " " << apache::thrift::util::enumNameSafe(rc); + VLOG(2) << uuid_ << " " << apache::thrift::util::enumNameSafe(rc); this->result_ = resp.get_result(); this->onFinished(); }); diff --git a/src/storage/transaction/ChainBaseProcessor.h b/src/storage/transaction/ChainBaseProcessor.h index c20f7dc6e84..e9f2e3e128c 100644 --- a/src/storage/transaction/ChainBaseProcessor.h +++ b/src/storage/transaction/ChainBaseProcessor.h @@ -20,6 +20,8 @@ using Code = ::nebula::cpp2::ErrorCode; * */ class ChainBaseProcessor { + friend class ChainProcessorFactory; + public: virtual ~ChainBaseProcessor() = default; @@ -42,14 +44,10 @@ class ChainBaseProcessor { virtual void finish() = 0; protected: - void setErrorCode(Code code) { - if (code_ == Code::SUCCEEDED) { - code_ = code; - } - } - - protected: - Code code_ = Code::SUCCEEDED; + Code rcPrepare_ = Code::SUCCEEDED; + Code rcRemote_ = Code::E_UNKNOWN; + Code rcCommit_ = Code::E_UNKNOWN; + TermID term_; folly::Promise finished_; }; diff --git a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp index f9c9e1951b8..fe2731ef48a 100644 --- a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp @@ -29,10 +29,10 @@ void ChainDeleteEdgesLocalProcessor::process(const cpp2::DeleteEdgesRequest& req folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { txnId_ = ConsistUtil::strUUID(); - VLOG(1) << txnId_ << " prepareLocal(): " << DeleteEdgesRequestHelper::explain(req_); if (!lockEdges(req_)) { - return Code::E_WRITE_WRITE_CONFLICT; + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; + return rcPrepare_; } primes_ = makePrime(req_); @@ -42,12 +42,7 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { auto [pro, fut] = folly::makePromiseContract(); env_->kvstore_->asyncMultiPut( spaceId_, localPartId_, std::move(primes), [p = std::move(pro), this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - setPrime_ = true; - } else { - LOG(WARNING) << txnId_ << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } - + rcPrepare_ = rc; p.setValue(rc); }); return std::move(fut); @@ -55,8 +50,8 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { folly::SemiFuture ChainDeleteEdgesLocalProcessor::processRemote(Code code) { VLOG(1) << txnId_ << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED) { - return code; + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } DCHECK_EQ(req_.get_parts().size(), 1); auto reversedRequest = reverseRequest(req_); @@ -68,53 +63,45 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::processRemote(Code code) folly::SemiFuture ChainDeleteEdgesLocalProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); - - bool remoteFailed{false}; - if (code == Code::SUCCEEDED) { - // do nothing - } else if (code == Code::E_RPC_FAILURE) { - code_ = Code::SUCCEEDED; - } else { - code_ = code; - remoteFailed = true; + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - auto [currTerm, suc] = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto [currTerm, suc] = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm != term_) { LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { + auto keyPrefix = ConsistUtil::doublePrimeTable(localPartId_); + setDoublePrime_ = true; for (auto& kv : primes_) { - auto key = - ConsistUtil::doublePrimeTable().append(kv.first.substr(ConsistUtil::primeTable().size())); - setDoublePrime_ = true; + auto key = keyPrefix + kv.first.substr(sizeof(PartitionID)); doublePrimes_.emplace_back(key, kv.second); } - reportFailed(ResumeType::RESUME_REMOTE); } - if (code_ == Code::SUCCEEDED) { + if (rcRemote_ == Code::SUCCEEDED || rcRemote_ == Code::E_RPC_FAILURE) { return commitLocal(); } else { - if (setPrime_ && remoteFailed) { - return abort(); - } + return abort(); } - return code_; + // actually, should return either commit() or abort() + return rcRemote_; } void ChainDeleteEdgesLocalProcessor::reportFailed(ResumeType type) { if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } for (auto& edgesOfPart : req_.get_parts()) { auto partId = edgesOfPart.first; for (auto& key : edgesOfPart.second) { auto strKey = ConsistUtil::edgeKey(spaceVidLen_, partId, key); - env_->txnMan_->addPrime(spaceId_, strKey, type); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, strKey, type); } } } @@ -142,7 +129,7 @@ std::vector ChainDeleteEdgesLocalProcessor::makePrime( val += ConsistUtil::deleteIdentifier(); auto partId = singleReq.get_parts().begin()->first; auto& edgeKey = singleReq.get_parts().begin()->second.back(); - auto key = ConsistUtil::primeTable(); + auto key = ConsistUtil::primeTable(partId); key += ConsistUtil::edgeKey(spaceVidLen_, partId, edgeKey); ret.emplace_back(std::make_pair(key, val)); } @@ -154,15 +141,13 @@ Code ChainDeleteEdgesLocalProcessor::checkRequest(const cpp2::DeleteEdgesRequest req_ = req; DCHECK(!req_.get_parts().empty()); spaceId_ = req_.get_space_id(); + localPartId_ = req.get_parts().begin()->first; - auto vidType = env_->metaClient_->getSpaceVidType(spaceId_); - if (!vidType.ok()) { - LOG(WARNING) << "can't get vidType, spaceId_ = " << spaceId_; - return Code::E_SPACE_NOT_FOUND; - } else { - spaceVidType_ = vidType.value(); + auto rc = getSpaceVidLen(spaceId_); + if (rc != Code::SUCCEEDED) { + return rc; } - localPartId_ = req.get_parts().begin()->first; + auto part = env_->kvstore_->part(spaceId_, localPartId_); if (!nebula::ok(part)) { pushResultCode(nebula::error(part), localPartId_); @@ -180,13 +165,6 @@ Code ChainDeleteEdgesLocalProcessor::checkRequest(const cpp2::DeleteEdgesRequest term_ = (nebula::value(part))->termId(); - auto vidLen = env_->schemaMan_->getSpaceVidLen(spaceId_); - if (!vidLen.ok()) { - LOG(ERROR) << "getSpaceVidLen failed, spaceId_: " << spaceId_ - << ", status: " << vidLen.status(); - return Code::E_INVALID_SPACEVIDLEN; - } - spaceVidLen_ = vidLen.value(); return Code::SUCCEEDED; } @@ -199,12 +177,7 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::commitLocal() { auto [pro, fut] = folly::makePromiseContract(); std::move(futProc).thenValue([&, p = std::move(pro)](auto&& resp) mutable { auto rc = ConsistUtil::getErrorCode(resp); - VLOG(1) << txnId_ << " commitLocal() " << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - // do nothing - } else { - reportFailed(ResumeType::RESUME_CHAIN); - } + rcCommit_ = rc; p.setValue(rc); }); proc->process(req_); @@ -218,22 +191,22 @@ void ChainDeleteEdgesLocalProcessor::doRpc(folly::Promise&& promise, promise.setValue(Code::E_LEADER_CHANGED); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto f = p.getFuture(); iClient->chainDeleteEdges(req, txnId_, term_, std::move(p)); std::move(f).thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), std::move(req), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }); } @@ -305,19 +278,15 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::abort() { std::move(keyRemoved), [p = std::move(pro), this](auto rc) mutable { VLOG(1) << txnId_ << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - // do nothing - } else { - reportFailed(ResumeType::RESUME_CHAIN); - } + rcCommit_ = rc; p.setValue(rc); }); return std::move(fut); } bool ChainDeleteEdgesLocalProcessor::lockEdges(const cpp2::DeleteEdgesRequest& req) { - auto* lockCore = env_->txnMan_->getLockCore(req.get_space_id(), localPartId_); - if (!lockCore) { + lkCore_ = env_->txnMan_->getLockCore(req.get_space_id(), localPartId_, term_); + if (!lkCore_) { VLOG(1) << txnId_ << "get lock failed."; return false; } @@ -328,9 +297,10 @@ bool ChainDeleteEdgesLocalProcessor::lockEdges(const cpp2::DeleteEdgesRequest& r keys.emplace_back(std::move(eKey)); } bool dedup = true; - lk_ = std::make_unique(lockCore, keys, dedup); + lk_ = std::make_unique(lkCore_.get(), keys, dedup); if (!lk_->isLocked()) { - VLOG(1) << txnId_ << " conflict " << ConsistUtil::readableKey(spaceVidLen_, lk_->conflictKey()); + VLOG(1) << txnId_ << "term=" << term_ << ", conflict key = " + << ConsistUtil::readableKey(spaceVidLen_, isIntId_, lk_->conflictKey()); } return lk_->isLocked(); } @@ -350,9 +320,51 @@ cpp2::DeleteEdgesRequest ChainDeleteEdgesLocalProcessor::reverseRequest( } void ChainDeleteEdgesLocalProcessor::finish() { - VLOG(1) << txnId_ << " commitLocal(), code_ = " << apache::thrift::util::enumNameSafe(code_); - pushResultCode(code_, localPartId_); - finished_.setValue(code_); + VLOG(1) << txnId_ << " commitLocal() = " << apache::thrift::util::enumNameSafe(rcCommit_); + TermID currTerm = 0; + std::tie(currTerm, std::ignore) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + do { + if (term_ != currTerm) { + // transaction manager will do the clean. + break; + } + + if (rcPrepare_ != Code::SUCCEEDED) { + break; // nothing written, no need to recover. + } + + if (rcCommit_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + // rcCommit_ may be set SUCCEEDED in abort(). + // which we should return the error code or remote. + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + pushResultCode(rc, localPartId_); + finished_.setValue(rc); onFinished(); } diff --git a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h index 2c4f467b3d3..5a356b36e0d 100644 --- a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h +++ b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h @@ -86,6 +86,7 @@ class ChainDeleteEdgesLocalProcessor : public BaseProcessor, PartitionID localPartId_; PartitionID remotePartId_; cpp2::DeleteEdgesRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_{nullptr}; int retryLimit_{10}; /** @@ -107,8 +108,6 @@ class ChainDeleteEdgesLocalProcessor : public BaseProcessor, std::string txnId_; - ::nebula::cpp2::PropertyType spaceVidType_{::nebula::cpp2::PropertyType::UNKNOWN}; - // for debug, edge "100"->"101" will print like 2231303022->2231303122 // which is hard to recognize. Transform to human readable format std::string readableEdgeDesc_; diff --git a/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp index 19698798cff..3b3da57339d 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp @@ -17,15 +17,14 @@ ChainDeleteEdgesResumeProcessor::ChainDeleteEdgesResumeProcessor(StorageEnv* env const std::string& val) : ChainDeleteEdgesLocalProcessor(env) { req_ = DeleteEdgesRequestHelper::parseDeleteEdgesRequest(val); - - VLOG(1) << "explain req_: " << DeleteEdgesRequestHelper::explain(req_); } folly::SemiFuture ChainDeleteEdgesResumeProcessor::prepareLocal() { - code_ = checkRequest(req_); + rcPrepare_ = checkRequest(req_); primes_ = makePrime(req_); setPrime_ = true; - return code_; + rcPrepare_ = Code::SUCCEEDED; + return rcPrepare_; } folly::SemiFuture ChainDeleteEdgesResumeProcessor::processRemote(Code code) { @@ -35,24 +34,25 @@ folly::SemiFuture ChainDeleteEdgesResumeProcessor::processRemote(Code code folly::SemiFuture ChainDeleteEdgesResumeProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } if (code == Code::E_RPC_FAILURE) { for (auto& kv : primes_) { - auto key = - ConsistUtil::doublePrimeTable().append(kv.first.substr(ConsistUtil::primeTable().size())); + auto key = ConsistUtil::doublePrimeTable(localPartId_) + .append(kv.first.substr(ConsistUtil::primeTable(localPartId_).size())); doublePrimes_.emplace_back(key, kv.second); } } - if (code == Code::E_RPC_FAILURE || code == Code::SUCCEEDED) { + if (rcRemote_ == Code::E_RPC_FAILURE || rcRemote_ == Code::SUCCEEDED) { // if there are something wrong other than rpc failure // we need to keep the resume retry(by not remove those prime key) - code_ = commitLocal().get(); - return code_; + return commitLocal(); } - return code_; + return rcRemote_; } } // namespace storage diff --git a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp index a0e0cdbc84f..cbb970a21fc 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp @@ -17,48 +17,59 @@ ChainDeleteEdgesResumeRemoteProcessor::ChainDeleteEdgesResumeRemoteProcessor(Sto } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::prepareLocal() { - code_ = checkRequest(req_); - return code_; + rcPrepare_ = checkRequest(req_); + return rcPrepare_; } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::processRemote(Code code) { VLOG(1) << txnId_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); - return ChainDeleteEdgesLocalProcessor::processRemote(code); } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - - setErrorCode(code); - - if (code == Code::E_RPC_FAILURE) { - return code_; + if (code != Code::SUCCEEDED) { + return code; } - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove double prime key) - std::vector doublePrimeKeys; - for (auto& partOfKeys : req_.get_parts()) { - std::string key; - for (auto& edgeKey : partOfKeys.second) { - doublePrimeKeys.emplace_back(); - doublePrimeKeys.back() = ConsistUtil::doublePrimeTable().append( - ConsistUtil::edgeKey(spaceVidLen_, localPartId_, edgeKey)); - } + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove double prime key) + std::vector doublePrimeKeys; + for (auto& partOfKeys : req_.get_parts()) { + std::string key; + for (auto& edgeKey : partOfKeys.second) { + doublePrimeKeys.emplace_back(); + doublePrimeKeys.back() = + ConsistUtil::doublePrimeTable(localPartId_) + .append(ConsistUtil::edgeKey(spaceVidLen_, localPartId_, edgeKey)); } - - folly::Baton baton; - env_->kvstore_->asyncMultiRemove( - spaceId_, localPartId_, std::move(doublePrimeKeys), [this, &baton](auto&& rc) { - this->code_ = rc; - baton.post(); - }); - baton.wait(); } - return code_; + auto [pro, fut] = folly::makePromiseContract(); + env_->kvstore_->asyncMultiRemove(spaceId_, + localPartId_, + std::move(doublePrimeKeys), + [this, p = std::move(pro)](auto&& rc) mutable { + rcCommit_ = rc; + p.setValue(rc); + }); + return std::move(fut); +} + +void ChainDeleteEdgesResumeRemoteProcessor::finish() { + VLOG(1) << " commitLocal() = " << apache::thrift::util::enumNameSafe(rcCommit_); + TermID currTerm = 0; + std::tie(currTerm, std::ignore) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (term_ == currTerm) { + if (rcCommit_ != Code::SUCCEEDED || rcRemote_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_REMOTE); + } + } else { + // transaction manager will do the clean. + } + pushResultCode(rcCommit_, localPartId_); + finished_.setValue(rcCommit_); + onFinished(); } } // namespace storage diff --git a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h index 31c091f5962..3ae19e2dd5c 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h +++ b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h @@ -16,13 +16,15 @@ class ChainDeleteEdgesResumeRemoteProcessor : public ChainDeleteEdgesLocalProces return new ChainDeleteEdgesResumeRemoteProcessor(env, val); } + virtual ~ChainDeleteEdgesResumeRemoteProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ChainDeleteEdgesResumeRemoteProcessor() = default; + void finish() override; protected: ChainDeleteEdgesResumeRemoteProcessor(StorageEnv* env, const std::string& val); diff --git a/src/storage/transaction/ChainProcessorFactory.cpp b/src/storage/transaction/ChainProcessorFactory.cpp index b8a60e60e4d..2f25402c683 100644 --- a/src/storage/transaction/ChainProcessorFactory.cpp +++ b/src/storage/transaction/ChainProcessorFactory.cpp @@ -7,16 +7,43 @@ #include "storage/transaction/ChainDeleteEdgesResumeProcessor.h" #include "storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h" +#include "storage/transaction/ChainResumeAddDoublePrimeProcessor.h" +#include "storage/transaction/ChainResumeAddPrimeProcessor.h" +#include "storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h" +#include "storage/transaction/ChainResumeUpdatePrimeProcessor.h" #include "storage/transaction/ConsistUtil.h" -#include "storage/transaction/ResumeAddEdgeProcessor.h" -#include "storage/transaction/ResumeAddEdgeRemoteProcessor.h" -#include "storage/transaction/ResumeUpdateProcessor.h" -#include "storage/transaction/ResumeUpdateRemoteProcessor.h" namespace nebula { namespace storage { +ChainBaseProcessor* ChainProcessorFactory::make(StorageEnv* env, + GraphSpaceID spaceId, + TermID termId, + const std::string& edgeKey, + ResumeType type) { + auto partId = NebulaKeyUtils::getPart(edgeKey); + auto prefix = (type == ResumeType::RESUME_CHAIN) ? ConsistUtil::primeTable(partId) + : ConsistUtil::doublePrimeTable(partId); + auto key = prefix + edgeKey; + std::string val; + auto rc = Code::SUCCEEDED; + do { + rc = env->kvstore_->get(spaceId, partId, key, &val); + } while (rc == Code::E_LEADER_LEASE_FAILED); + + if (rc != Code::SUCCEEDED) { + VLOG(1) << "resume edge space=" << spaceId << ", part=" << partId + << ", hex = " << folly::hexlify(edgeKey) + << ", rc = " << apache::thrift::util::enumNameSafe(rc); + return nullptr; + } + + ResumeOptions opt(type, val); + return makeProcessor(env, termId, opt); +} + ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, + TermID termId, const ResumeOptions& options) { ChainBaseProcessor* ret = nullptr; auto requestType = ConsistUtil::parseType(options.primeValue); @@ -24,11 +51,13 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::INSERT: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { - ret = ResumeAddEdgeProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeAddPrimeProcessor"; + ret = ChainResumeAddPrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { - ret = ResumeAddEdgeRemoteProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeAddDoublePrimeProcessor"; + ret = ChainResumeAddDoublePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::UNKNOWN: { @@ -40,11 +69,13 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::UPDATE: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { - ret = ResumeUpdateProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeUpdatePrimeProcessor"; + ret = ChainResumeUpdatePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { - ret = ResumeUpdateRemoteProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeUpdateDoublePrimeProcessor"; + ret = ChainResumeUpdateDoublePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::UNKNOWN: { @@ -56,10 +87,12 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::DELETE: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { + VLOG(1) << "make ChainDeleteEdgesResumeProcessor"; ret = ChainDeleteEdgesResumeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { + VLOG(2) << "make ChainDeleteEdgesResumeRemoteProcessor"; ret = ChainDeleteEdgesResumeRemoteProcessor::instance(env, options.primeValue); break; } @@ -73,6 +106,7 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, LOG(FATAL) << "RequestType::UNKNOWN: not supposed run here"; } } + ret->term_ = termId; return ret; } diff --git a/src/storage/transaction/ChainProcessorFactory.h b/src/storage/transaction/ChainProcessorFactory.h index 6c1518199d1..05a062c0fda 100644 --- a/src/storage/transaction/ChainProcessorFactory.h +++ b/src/storage/transaction/ChainProcessorFactory.h @@ -14,7 +14,15 @@ namespace storage { class ChainProcessorFactory { public: - static ChainBaseProcessor* makeProcessor(StorageEnv* env, const ResumeOptions& options); + static ChainBaseProcessor* makeProcessor(StorageEnv* env, + TermID termId, + const ResumeOptions& options); + + static ChainBaseProcessor* make(StorageEnv* env, + GraphSpaceID spaceId, + TermID termId, + const std::string& edgeKey, + ResumeType type); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp new file mode 100644 index 00000000000..e23c69293b1 --- /dev/null +++ b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeAddDoublePrimeProcessor.h" + +namespace nebula { +namespace storage { + +ChainResumeAddDoublePrimeProcessor::ChainResumeAddDoublePrimeProcessor(StorageEnv* env, + const std::string& val) + : ChainAddEdgesLocalProcessor(env) { + req_ = ConsistUtil::parseAddRequest(val); + + uuid_ = ConsistUtil::strUUID() + " ResumeDoublePrime, "; +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::prepareLocal() { + ChainAddEdgesLocalProcessor::prepareRequest(req_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + + auto spaceId = req_.get_space_id(); + auto numOfPart = env_->metaClient_->partsNum(spaceId); + if (!numOfPart.ok()) { + rcPrepare_ = Code::E_SPACE_NOT_FOUND; + return Code::E_SPACE_NOT_FOUND; + } + auto& parts = req_.get_parts(); + auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); + remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); + + return Code::SUCCEEDED; +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::processRemote(Code code) { + return ChainAddEdgesLocalProcessor::processRemote(code); +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::processLocal(Code code) { + VLOG(2) << uuid_ << " commitLocal() = " << apache::thrift::util::enumNameSafe(code); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; + } + + if (code == Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + eraseDoublePrime(); + return abort(); + } + + return code; +} + +void ChainResumeAddDoublePrimeProcessor::finish() { + if (rcPrepare_ == Code::SUCCEEDED) { + VLOG(1) << uuid_ << ", " << makeReadableEdge(req_) + << ", rcPrepare_ = " << apache::thrift::util::enumNameSafe(rcPrepare_) + << ", rcRemote_ = " << apache::thrift::util::enumNameSafe(rcRemote_) + << ", rcCommit_ = " << apache::thrift::util::enumNameSafe(rcCommit_); + } + if (rcCommit_ != Code::SUCCEEDED || rcRemote_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_REMOTE); + } else { + // nothing todo + } + pushResultCode(rcCommit_, localPartId_); + finished_.setValue(rcCommit_); + onFinished(); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeProcessor.h b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h similarity index 59% rename from src/storage/transaction/ResumeAddEdgeProcessor.h rename to src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h index 797bf7979aa..4eb918e2c99 100644 --- a/src/storage/transaction/ResumeAddEdgeProcessor.h +++ b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h @@ -10,22 +10,24 @@ namespace nebula { namespace storage { -class ResumeAddEdgeProcessor : public ChainAddEdgesLocalProcessor { +class ChainResumeAddDoublePrimeProcessor : public ChainAddEdgesLocalProcessor { public: - static ResumeAddEdgeProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeAddEdgeProcessor(env, val); + static ChainResumeAddDoublePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeAddDoublePrimeProcessor(env, val); } + virtual ~ChainResumeAddDoublePrimeProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ResumeAddEdgeProcessor() = default; + void finish() override; protected: - ResumeAddEdgeProcessor(StorageEnv* env, const std::string& val); + ChainResumeAddDoublePrimeProcessor(StorageEnv* env, const std::string& val); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp b/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp new file mode 100644 index 00000000000..17675b6e307 --- /dev/null +++ b/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeAddPrimeProcessor.h" + +namespace nebula { +namespace storage { + +ChainResumeAddPrimeProcessor::ChainResumeAddPrimeProcessor(StorageEnv* env, const std::string& val) + : ChainAddEdgesLocalProcessor(env) { + req_ = ConsistUtil::parseAddRequest(val); + + uuid_ = ConsistUtil::strUUID(); + execDesc_ = ", ResumePrime. "; +} + +folly::SemiFuture ChainResumeAddPrimeProcessor::prepareLocal() { + VLOG(2) << uuid_ << " resume prime " << readableEdgeDesc_; + prepareRequest(req_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + auto spaceId = req_.get_space_id(); + auto numOfPart = env_->metaClient_->partsNum(spaceId); + if (!numOfPart.ok()) { + rcPrepare_ = Code::E_SPACE_NOT_FOUND; + return rcPrepare_; + } + auto& parts = req_.get_parts(); + auto& srcId = parts.begin()->second.back().get_key().get_src().getStr(); + auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); + localPartId_ = env_->metaClient_->partId(numOfPart.value(), srcId); + remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); + + return rcPrepare_; +} + +folly::SemiFuture ChainResumeAddPrimeProcessor::processRemote(Code code) { + VLOG(2) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); + return ChainAddEdgesLocalProcessor::processRemote(code); +} + +/** + * @brief this most import difference to ChainAddEdgesLocalProcessor is + * we can not abort, (delete an exist prime) + * @return folly::SemiFuture + */ +folly::SemiFuture ChainResumeAddPrimeProcessor::processLocal(Code) { + VLOG(2) << uuid_ << " processRemote() " << apache::thrift::util::enumNameSafe(rcRemote_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + kvAppend_ = ChainAddEdgesLocalProcessor::makeDoublePrime(); + } + + if (rcRemote_ == Code::E_RPC_FAILURE || rcRemote_ == Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + erasePrime(); + return commit(); + } + + return rcRemote_; +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.h b/src/storage/transaction/ChainResumeAddPrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeAddEdgeRemoteProcessor.h rename to src/storage/transaction/ChainResumeAddPrimeProcessor.h index a9046814064..4f6251c3fb1 100644 --- a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.h +++ b/src/storage/transaction/ChainResumeAddPrimeProcessor.h @@ -10,22 +10,22 @@ namespace nebula { namespace storage { -class ResumeAddEdgeRemoteProcessor : public ChainAddEdgesLocalProcessor { +class ChainResumeAddPrimeProcessor : public ChainAddEdgesLocalProcessor { public: - static ResumeAddEdgeRemoteProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeAddEdgeRemoteProcessor(env, val); + static ChainResumeAddPrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeAddPrimeProcessor(env, val); } + virtual ~ChainResumeAddPrimeProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ResumeAddEdgeRemoteProcessor() = default; - protected: - ResumeAddEdgeRemoteProcessor(StorageEnv* env, const std::string& val); + ChainResumeAddPrimeProcessor(StorageEnv* env, const std::string& val); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeProcessor.cpp b/src/storage/transaction/ChainResumeProcessor.cpp deleted file mode 100644 index 4fad8f13749..00000000000 --- a/src/storage/transaction/ChainResumeProcessor.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ChainResumeProcessor.h" - -#include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainProcessorFactory.h" -#include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" -#include "storage/transaction/ConsistUtil.h" -#include "storage/transaction/TransactionManager.h" - -namespace nebula { -namespace storage { - -void ChainResumeProcessor::process() { - auto* table = env_->txnMan_->getDangleEdges(); - std::unique_ptr iter; - for (auto it = table->begin(); it != table->end(); ++it) { - auto spaceId = *reinterpret_cast(const_cast(it->first.c_str())); - auto edgeKey = std::string(it->first.c_str() + sizeof(GraphSpaceID), - it->first.size() - sizeof(GraphSpaceID)); - auto partId = NebulaKeyUtils::getPart(edgeKey); - auto prefix = (it->second == ResumeType::RESUME_CHAIN) ? ConsistUtil::primeTable() - : ConsistUtil::doublePrimeTable(); - auto key = prefix + edgeKey; - std::string val; - auto rc = env_->kvstore_->get(spaceId, partId, key, &val); - VLOG(1) << "resume edge space=" << spaceId << ", part=" << partId - << ", hex = " << folly::hexlify(edgeKey) - << ", rc = " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - // do nothing - } else if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - VLOG(1) << "kvstore->get() leader changed"; - auto getPart = env_->kvstore_->part(spaceId, partId); - if (nebula::ok(getPart) && !nebula::value(getPart)->isLeader()) { - // not leader any more, stop trying resume - env_->txnMan_->delPrime(spaceId, edgeKey); - } - continue; - } else if (rc == nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND) { - // raft may rollback want we scanned. - env_->txnMan_->delPrime(spaceId, edgeKey); - } else { - LOG(WARNING) << "kvstore->get() failed, " << apache::thrift::util::enumNameSafe(rc); - continue; - } - - ResumeOptions opt(it->second, val); - auto* proc = ChainProcessorFactory::makeProcessor(env_, opt); - auto fut = proc->getFinished(); - env_->txnMan_->addChainTask(proc); - std::move(fut) - .thenValue([=](auto&& code) { - if (code == Code::SUCCEEDED) { - env_->txnMan_->delPrime(spaceId, edgeKey); - } else { - VLOG(1) << "recover failed: " << apache::thrift::util::enumNameSafe(rc); - } - }) - .get(); - } -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ChainResumeProcessor.h b/src/storage/transaction/ChainResumeProcessor.h deleted file mode 100644 index ac3572e319f..00000000000 --- a/src/storage/transaction/ChainResumeProcessor.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#pragma once - -#include "clients/storage/InternalStorageClient.h" -#include "common/utils/NebulaKeyUtils.h" -#include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainBaseProcessor.h" -#include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" -#include "storage/transaction/TransactionManager.h" - -namespace nebula { -namespace storage { - -class ChainResumeProcessor { - friend class ChainResumeProcessorTestHelper; - - public: - explicit ChainResumeProcessor(StorageEnv* env) : env_(env) {} - - void process(); - - private: - StorageEnv* env_{nullptr}; -}; - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp new file mode 100644 index 00000000000..0b9aede2e0e --- /dev/null +++ b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h" + +#include + +namespace nebula { +namespace storage { + +ChainResumeUpdateDoublePrimeProcessor::ChainResumeUpdateDoublePrimeProcessor(StorageEnv* env, + const std::string& val) + : ChainUpdateEdgeLocalProcessor(env) { + req_ = ConsistUtil::parseUpdateRequest(val); + ChainUpdateEdgeLocalProcessor::prepareRequest(req_); +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::prepareLocal() { + VLOG(1) << " prepareLocal()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + return rcPrepare_; +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::processRemote(Code code) { + VLOG(1) << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); + return ChainUpdateEdgeLocalProcessor::processRemote(code); +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::processLocal(Code code) { + VLOG(1) << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); + + if (code != Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + return code; + } + + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; + } + + auto key = ConsistUtil::doublePrime(spaceVidLen_, localPartId_, req_.get_edge_key()); + kvErased_.emplace_back(std::move(key)); + return abort(); +} + +void ChainResumeUpdateDoublePrimeProcessor::finish() { + VLOG(1) << " commitLocal()=" << apache::thrift::util::enumNameSafe(rcCommit_); + if (isKVStoreError(rcCommit_) || rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + } + + pushResultCode(rcCommit_, req_.get_part_id()); + finished_.setValue(rcCommit_); + onFinished(); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateRemoteProcessor.h b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeUpdateRemoteProcessor.h rename to src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h index bb3171d061b..df4a6ba8bdf 100644 --- a/src/storage/transaction/ResumeUpdateRemoteProcessor.h +++ b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h @@ -15,10 +15,10 @@ namespace storage { * if the TxnManager background resume thread found a prime key * it will create this processor to resume the complete update process */ -class ResumeUpdateRemoteProcessor : public ChainUpdateEdgeLocalProcessor { +class ChainResumeUpdateDoublePrimeProcessor : public ChainUpdateEdgeLocalProcessor { public: - static ResumeUpdateRemoteProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeUpdateRemoteProcessor(env, val); + static ChainResumeUpdateDoublePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeUpdateDoublePrimeProcessor(env, val); } folly::SemiFuture prepareLocal() override; @@ -29,10 +29,10 @@ class ResumeUpdateRemoteProcessor : public ChainUpdateEdgeLocalProcessor { void finish() override; - virtual ~ResumeUpdateRemoteProcessor() = default; + virtual ~ChainResumeUpdateDoublePrimeProcessor() = default; protected: - ResumeUpdateRemoteProcessor(StorageEnv* env, const std::string& val); + ChainResumeUpdateDoublePrimeProcessor(StorageEnv* env, const std::string& val); bool lockEdge(const cpp2::UpdateEdgeRequest& req); }; diff --git a/src/storage/transaction/ResumeUpdateProcessor.cpp b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp similarity index 55% rename from src/storage/transaction/ResumeUpdateProcessor.cpp rename to src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp index 075d0c10a2d..321d5cb3a9b 100644 --- a/src/storage/transaction/ResumeUpdateProcessor.cpp +++ b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp @@ -3,7 +3,7 @@ * This source code is licensed under Apache 2.0 License. */ -#include "storage/transaction/ResumeUpdateProcessor.h" +#include "storage/transaction/ChainResumeUpdatePrimeProcessor.h" #include @@ -12,30 +12,34 @@ namespace nebula { namespace storage { -ResumeUpdateProcessor::ResumeUpdateProcessor(StorageEnv* env, const std::string& val) +ChainResumeUpdatePrimeProcessor::ChainResumeUpdatePrimeProcessor(StorageEnv* env, + const std::string& val) : ChainUpdateEdgeLocalProcessor(env) { req_ = ConsistUtil::parseUpdateRequest(val); ChainUpdateEdgeLocalProcessor::prepareRequest(req_); } -folly::SemiFuture ResumeUpdateProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - return code_; +folly::SemiFuture ChainResumeUpdatePrimeProcessor::prepareLocal() { + VLOG(1) << " prepareLocal()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + return rcPrepare_; } -folly::SemiFuture ResumeUpdateProcessor::processRemote(Code code) { +folly::SemiFuture ChainResumeUpdatePrimeProcessor::processRemote(Code code) { VLOG(1) << "prepareLocal()=" << apache::thrift::util::enumNameSafe(code); return ChainUpdateEdgeLocalProcessor::processRemote(code); } -folly::SemiFuture ResumeUpdateProcessor::processLocal(Code code) { +folly::SemiFuture ChainResumeUpdatePrimeProcessor::processLocal(Code code) { VLOG(1) << "processRemote()=" << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } if (code == Code::E_RPC_FAILURE) { @@ -47,18 +51,11 @@ folly::SemiFuture ResumeUpdateProcessor::processLocal(Code code) { // we need to keep the resume retry(by not remove those prime key) auto key = ConsistUtil::primeKey(spaceVidLen_, localPartId_, req_.get_edge_key()); kvErased_.emplace_back(std::move(key)); - forwardToDelegateProcessor(); - return code_; + return commit(); } return code; } -void ResumeUpdateProcessor::finish() { - VLOG(1) << "commitLocal()=" << apache::thrift::util::enumNameSafe(code_); - finished_.setValue(code_); - onFinished(); -} - } // namespace storage } // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateProcessor.h b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeUpdateProcessor.h rename to src/storage/transaction/ChainResumeUpdatePrimeProcessor.h index 557e351b4ed..bf13906f5bc 100644 --- a/src/storage/transaction/ResumeUpdateProcessor.h +++ b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.h @@ -15,10 +15,10 @@ namespace storage { * if the TxnManager background resume thread found a prime key * it will create this processor to resume the complete update process */ -class ResumeUpdateProcessor : public ChainUpdateEdgeLocalProcessor { +class ChainResumeUpdatePrimeProcessor : public ChainUpdateEdgeLocalProcessor { public: - static ResumeUpdateProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeUpdateProcessor(env, val); + static ChainResumeUpdatePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeUpdatePrimeProcessor(env, val); } folly::SemiFuture prepareLocal() override; @@ -27,12 +27,10 @@ class ResumeUpdateProcessor : public ChainUpdateEdgeLocalProcessor { folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - void finish() override; - - virtual ~ResumeUpdateProcessor() = default; + virtual ~ChainResumeUpdatePrimeProcessor() = default; protected: - ResumeUpdateProcessor(StorageEnv* env, const std::string& val); + ChainResumeUpdatePrimeProcessor(StorageEnv* env, const std::string& val); bool lockEdge(); }; diff --git a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp index d2246ecb002..36d4822a02b 100644 --- a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp +++ b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp @@ -17,6 +17,7 @@ namespace storage { void ChainUpdateEdgeLocalProcessor::process(const cpp2::UpdateEdgeRequest& req) { if (!prepareRequest(req)) { + pushResultCode(rcPrepare_, localPartId_); onFinished(); } @@ -28,26 +29,21 @@ bool ChainUpdateEdgeLocalProcessor::prepareRequest(const cpp2::UpdateEdgeRequest spaceId_ = req.get_space_id(); localPartId_ = req_.get_part_id(); - auto rc = getSpaceVidLen(spaceId_); - if (rc != nebula::cpp2::ErrorCode::SUCCEEDED) { - pushResultCode(rc, localPartId_); + rcPrepare_ = getSpaceVidLen(spaceId_); + if (rcPrepare_ != nebula::cpp2::ErrorCode::SUCCEEDED) { return false; } - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (rcPrepare_ != Code::SUCCEEDED) { return false; } return true; } -/** - * 1. set mem lock - * 2. set edge prime - * */ folly::SemiFuture ChainUpdateEdgeLocalProcessor::prepareLocal() { if (!setLock()) { - LOG(INFO) << "set lock failed, return E_WRITE_WRITE_CONFLICT"; + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; return Code::E_WRITE_WRITE_CONFLICT; } @@ -61,18 +57,14 @@ folly::SemiFuture ChainUpdateEdgeLocalProcessor::prepareLocal() { auto c = folly::makePromiseContract(); env_->kvstore_->asyncMultiPut( spaceId_, localPartId_, std::move(data), [p = std::move(c.first), this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - primeInserted_ = true; - } else { - VLOG(1) << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } + rcPrepare_ = rc; p.setValue(rc); }); return std::move(c.second); } folly::SemiFuture ChainUpdateEdgeLocalProcessor::processRemote(Code code) { - LOG(INFO) << "prepareLocal()=" << apache::thrift::util::enumNameSafe(code); + VLOG(1) << " prepareLocal(): " << apache::thrift::util::enumNameSafe(code); if (code != Code::SUCCEEDED) { return code; } @@ -82,41 +74,78 @@ folly::SemiFuture ChainUpdateEdgeLocalProcessor::processRemote(Code code) } folly::SemiFuture ChainUpdateEdgeLocalProcessor::processLocal(Code code) { - LOG(INFO) << "processRemote(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED && code_ == Code::SUCCEEDED) { - code_ = code; + VLOG(1) << " processRemote(): " << apache::thrift::util::enumNameSafe(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { appendDoublePrime(); - addUnfinishedEdge(ResumeType::RESUME_REMOTE); } - if (code == Code::SUCCEEDED || code == Code::E_RPC_FAILURE) { - erasePrime(); - forwardToDelegateProcessor(); - } else { - if (primeInserted_) { - abort(); - } + erasePrime(); + + if (rcRemote_ != Code::SUCCEEDED && rcRemote_ != Code::E_RPC_FAILURE) { + // prepare succeed and remote failed + return abort(); } - return code_; + return commit(); +} + +void ChainUpdateEdgeLocalProcessor::finish() { + VLOG(1) << " commitLocal()=" << apache::thrift::util::enumNameSafe(rcCommit_); + do { + if (rcPrepare_ != Code::SUCCEEDED) { + break; + } + if (isKVStoreError(rcCommit_)) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + + pushResultCode(rc, req_.get_part_id()); + finished_.setValue(rc); + onFinished(); } void ChainUpdateEdgeLocalProcessor::doRpc(folly::Promise&& promise, int retry) noexcept { try { if (retry > retryLimit_) { - promise.setValue(Code::E_LEADER_CHANGED); + rcRemote_ = Code::E_LEADER_CHANGED; + promise.setValue(rcRemote_); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto reversedReq = reverseRequest(req_); @@ -124,17 +153,16 @@ void ChainUpdateEdgeLocalProcessor::doRpc(folly::Promise&& promise, int re iClient->chainUpdateEdge(reversedReq, term_, ver_, std::move(p)); std::move(f) .thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - VLOG(1) << "code = " << apache::thrift::util::enumNameSafe(code); - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }) .get(); } catch (std::exception& ex) { @@ -155,23 +183,27 @@ void ChainUpdateEdgeLocalProcessor::appendDoublePrime() { kvAppend_.emplace_back(std::make_pair(std::move(key), std::move(val))); } -void ChainUpdateEdgeLocalProcessor::forwardToDelegateProcessor() { - kUpdateEdgeCounters.init("update_edge"); +folly::SemiFuture ChainUpdateEdgeLocalProcessor::commit() { + VLOG(1) << __func__ << "()"; UpdateEdgeProcessor::ContextAdjuster fn = [=](EdgeContext& ctx) { ctx.kvAppend = std::move(kvAppend_); ctx.kvErased = std::move(kvErased_); }; + auto [pro, fut] = folly::makePromiseContract(); auto* proc = UpdateEdgeProcessor::instance(env_); proc->adjustContext(std::move(fn)); auto f = proc->getFuture(); + std::move(f).thenTry([&, p = std::move(pro)](auto&& t) mutable { + if (t.hasValue()) { + resp_ = std::move(t.value()); + rcCommit_ = getErrorCode(resp_); + } + p.setValue(rcCommit_); + }); + proc->process(req_); - auto resp = std::move(f).get(); - code_ = getErrorCode(resp); - if (code_ != Code::SUCCEEDED) { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - std::swap(resp_, resp); + return std::move(fut); } Code ChainUpdateEdgeLocalProcessor::checkAndBuildContexts(const cpp2::UpdateEdgeRequest&) { @@ -182,26 +214,21 @@ std::string ChainUpdateEdgeLocalProcessor::sEdgeKey(const cpp2::UpdateEdgeReques return ConsistUtil::edgeKey(spaceVidLen_, req.get_part_id(), req.get_edge_key()); } -void ChainUpdateEdgeLocalProcessor::finish() { - LOG(INFO) << "ChainUpdateEdgeLocalProcessor::finish()"; - pushResultCode(code_, req_.get_part_id()); - onFinished(); -} - -void ChainUpdateEdgeLocalProcessor::abort() { - auto key = ConsistUtil::primeKey(spaceVidLen_, localPartId_, req_.get_edge_key()); - kvErased_.emplace_back(std::move(key)); +folly::SemiFuture ChainUpdateEdgeLocalProcessor::abort() { + VLOG(1) << __func__ << "()"; + if (kvErased_.empty()) { + return Code::SUCCEEDED; + } - folly::Baton baton; - env_->kvstore_->asyncMultiRemove( - req_.get_space_id(), req_.get_part_id(), std::move(kvErased_), [&](auto rc) mutable { - LOG(INFO) << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc != Code::SUCCEEDED) { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - baton.post(); - }); - baton.wait(); + auto [pro, fut] = folly::makePromiseContract(); + env_->kvstore_->asyncMultiRemove(req_.get_space_id(), + req_.get_part_id(), + std::move(kvErased_), + [&, p = std::move(pro)](auto rc) mutable { + rcCommit_ = rc; + p.setValue(rc); + }); + return std::move(fut); } cpp2::UpdateEdgeRequest ChainUpdateEdgeLocalProcessor::reverseRequest( @@ -221,12 +248,12 @@ cpp2::UpdateEdgeRequest ChainUpdateEdgeLocalProcessor::reverseRequest( bool ChainUpdateEdgeLocalProcessor::setLock() { auto spaceId = req_.get_space_id(); - auto* lockCore = env_->txnMan_->getLockCore(spaceId, req_.get_part_id()); - if (lockCore == nullptr) { + lkCore_ = env_->txnMan_->getLockCore(spaceId, req_.get_part_id(), term_); + if (lkCore_ == nullptr) { return false; } auto key = ConsistUtil::edgeKey(spaceVidLen_, req_.get_part_id(), req_.get_edge_key()); - lk_ = std::make_unique>(lockCore, key); + lk_ = std::make_unique>(lkCore_.get(), key); return lk_->isLocked(); } @@ -240,13 +267,20 @@ nebula::cpp2::ErrorCode ChainUpdateEdgeLocalProcessor::getErrorCode( return parts.front().get_code(); } -void ChainUpdateEdgeLocalProcessor::addUnfinishedEdge(ResumeType type) { - LOG(INFO) << "addUnfinishedEdge()"; +void ChainUpdateEdgeLocalProcessor::reportFailed(ResumeType type) { + VLOG(1) << __func__ << "()"; if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } auto key = ConsistUtil::edgeKey(spaceVidLen_, req_.get_part_id(), req_.get_edge_key()); - env_->txnMan_->addPrime(spaceId_, key, type); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, key, type); +} + +bool ChainUpdateEdgeLocalProcessor::isKVStoreError(nebula::cpp2::ErrorCode code) { + auto iCode = static_cast(code); + auto kvStoreErrorCodeBegin = static_cast(nebula::cpp2::ErrorCode::E_RAFT_UNKNOWN_PART); + auto kvStoreErrorCodeEnd = static_cast(nebula::cpp2::ErrorCode::E_RAFT_ATOMIC_OP_FAILED); + return iCode >= kvStoreErrorCodeBegin && iCode <= kvStoreErrorCodeEnd; } } // namespace storage diff --git a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h index 2f84f343a83..d4b1a9af0f9 100644 --- a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h +++ b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h @@ -49,9 +49,9 @@ class ChainUpdateEdgeLocalProcessor void doRpc(folly::Promise&& promise, int retry = 0) noexcept; - folly::SemiFuture processNormalLocal(Code code); + folly::SemiFuture commit(); - void abort(); + folly::SemiFuture abort(); bool prepareRequest(const cpp2::UpdateEdgeRequest& req); @@ -59,15 +59,13 @@ class ChainUpdateEdgeLocalProcessor void appendDoublePrime(); - void forwardToDelegateProcessor(); - std::string sEdgeKey(const cpp2::UpdateEdgeRequest& req); cpp2::UpdateEdgeRequest reverseRequest(const cpp2::UpdateEdgeRequest& req); bool setLock(); - void addUnfinishedEdge(ResumeType type); + void reportFailed(ResumeType type); int64_t getVersion(const cpp2::UpdateEdgeRequest& req); @@ -75,8 +73,11 @@ class ChainUpdateEdgeLocalProcessor Code checkAndBuildContexts(const cpp2::UpdateEdgeRequest& req) override; + bool isKVStoreError(nebula::cpp2::ErrorCode code); + protected: cpp2::UpdateEdgeRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_; PartitionID localPartId_; int retryLimit_{10}; diff --git a/src/storage/transaction/ConsistUtil.cpp b/src/storage/transaction/ConsistUtil.cpp index d80d288b2f7..a0923d84a96 100644 --- a/src/storage/transaction/ConsistUtil.cpp +++ b/src/storage/transaction/ConsistUtil.cpp @@ -12,50 +12,54 @@ #include "common/utils/NebulaKeyUtils.h" namespace nebula { namespace storage { - -static const std::string kPrimeTable{"__prime__"}; // NOLINT -static const std::string kDoublePrimeTable{"__prime_prime__"}; // NOLINT - -std::string ConsistUtil::primeTable() { - return kPrimeTable; +std::string ConsistUtil::primeTable(PartitionID partId) { + auto item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kPrime); + std::string key; + key.reserve(sizeof(PartitionID)); + key.append(reinterpret_cast(&item), sizeof(PartitionID)); + return key; } -std::string ConsistUtil::doublePrimeTable() { - return kDoublePrimeTable; +std::string ConsistUtil::doublePrimeTable(PartitionID partId) { + auto item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kDoublePrime); + std::string key; + key.reserve(sizeof(PartitionID)); + key.append(reinterpret_cast(&item), sizeof(PartitionID)); + return key; } std::string ConsistUtil::primePrefix(PartitionID partId) { - return kPrimeTable + NebulaKeyUtils::edgePrefix(partId); + return primeTable(partId) + NebulaKeyUtils::edgePrefix(partId); } std::string ConsistUtil::doublePrimePrefix(PartitionID partId) { - return kDoublePrimeTable + NebulaKeyUtils::edgePrefix(partId); + return doublePrimeTable(partId) + NebulaKeyUtils::edgePrefix(partId); } std::string ConsistUtil::primeKey(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& edgeKey) { - return kPrimeTable + NebulaKeyUtils::edgeKey(vIdLen, - partId, - edgeKey.get_src().getStr(), - edgeKey.get_edge_type(), - edgeKey.get_ranking(), - edgeKey.get_dst().getStr()); + return primeTable(partId) + NebulaKeyUtils::edgeKey(vIdLen, + partId, + edgeKey.get_src().getStr(), + edgeKey.get_edge_type(), + edgeKey.get_ranking(), + edgeKey.get_dst().getStr()); } folly::StringPiece ConsistUtil::edgeKeyFromPrime(const folly::StringPiece& key) { - return folly::StringPiece(key.begin() + kPrimeTable.size(), key.end()); + return folly::StringPiece(key.begin() + sizeof(PartitionID), key.end()); } folly::StringPiece ConsistUtil::edgeKeyFromDoublePrime(const folly::StringPiece& key) { - return folly::StringPiece(key.begin() + kDoublePrimeTable.size(), key.end()); + return folly::StringPiece(key.begin() + sizeof(PartitionID), key.end()); } std::string ConsistUtil::doublePrime(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& key) { - return kDoublePrimeTable + NebulaKeyUtils::edgeKey(vIdLen, - partId, - key.get_src().getStr(), - key.get_edge_type(), - key.get_ranking(), - key.get_dst().getStr()); + return doublePrimeTable(partId) + NebulaKeyUtils::edgeKey(vIdLen, + partId, + key.get_src().getStr(), + key.get_edge_type(), + key.get_ranking(), + key.get_dst().getStr()); } RequestType ConsistUtil::parseType(folly::StringPiece val) { @@ -123,7 +127,6 @@ void ConsistUtil::reverseEdgeKeyInplace(cpp2::EdgeKey& edgeKey) { } int64_t ConsistUtil::toInt(const ::nebula::Value& val) { - // return ConsistUtil::toInt2(val.toString()); auto str = val.toString(); if (str.size() < 3) { return 0; @@ -131,19 +134,19 @@ int64_t ConsistUtil::toInt(const ::nebula::Value& val) { return *reinterpret_cast(const_cast(str.data() + 1)); } -int64_t ConsistUtil::toInt2(const std::string& str) { - if (str.size() < 8) { - return 0; - } - return *reinterpret_cast(const_cast(str.data())); -} - -std::string ConsistUtil::readableKey(size_t vidLen, const std::string& rawKey) { +std::string ConsistUtil::readableKey(size_t vidLen, bool isIntVid, const std::string& rawKey) { auto src = NebulaKeyUtils::getSrcId(vidLen, rawKey); auto dst = NebulaKeyUtils::getDstId(vidLen, rawKey); auto rank = NebulaKeyUtils::getRank(vidLen, rawKey); std::stringstream ss; - ss << ConsistUtil::toInt2(src.str()) << "->" << ConsistUtil::toInt2(dst.str()) << "@" << rank; + ss << std::boolalpha << "isIntVid=" << isIntVid << ", "; + if (isIntVid) { + ss << *reinterpret_cast(const_cast(src.begin())) << "--" + << *reinterpret_cast(const_cast(dst.begin())); + } else { + ss << src.str() << "--" << dst.str(); + } + ss << "@" << rank; return ss.str(); } @@ -181,12 +184,14 @@ cpp2::DeleteEdgesRequest DeleteEdgesRequestHelper::parseDeleteEdgesRequest(const return req; } -std::string DeleteEdgesRequestHelper::explain(const cpp2::DeleteEdgesRequest& req) { +std::string DeleteEdgesRequestHelper::explain(const cpp2::DeleteEdgesRequest& req, bool isIntVid) { std::stringstream oss; for (auto& partOfKeys : req.get_parts()) { for (auto& key : partOfKeys.second) { - oss << ConsistUtil::toInt(key.get_src()) << "->" << ConsistUtil::toInt(key.get_dst()) << "@" - << key.get_ranking() << ", "; + if (isIntVid) { + oss << ConsistUtil::toInt(key.get_src()) << "->" << ConsistUtil::toInt(key.get_dst()) << "@" + << key.get_ranking() << ", "; + } } } return oss.str(); diff --git a/src/storage/transaction/ConsistUtil.h b/src/storage/transaction/ConsistUtil.h index 0ca2fc918d5..48f9aae1fb6 100644 --- a/src/storage/transaction/ConsistUtil.h +++ b/src/storage/transaction/ConsistUtil.h @@ -17,13 +17,9 @@ namespace nebula { namespace storage { class ConsistUtil final { public: - static std::string primeTable(); + static std::string primeTable(PartitionID partId); - static std::string doublePrimeTable(); - - static std::string deletePrimeTable(); - - static std::string deleteDoublePrimeTable(); + static std::string doublePrimeTable(PartitionID partId); static std::string edgeKey(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& key); @@ -87,9 +83,7 @@ class ConsistUtil final { */ static int64_t toInt(const ::nebula::Value& val); - static int64_t toInt2(const std::string& val); - - static std::string readableKey(size_t vidLen, const std::string& rawKey); + static std::string readableKey(size_t vidLen, bool isIntId, const std::string& rawKey); static std::vector toStrKeys(const cpp2::DeleteEdgesRequest& req, int vidLen); @@ -104,7 +98,7 @@ struct DeleteEdgesRequestHelper final { static cpp2::DeleteEdgesRequest parseDeleteEdgesRequest(const std::string& val); - static std::string explain(const cpp2::DeleteEdgesRequest& req); + static std::string explain(const cpp2::DeleteEdgesRequest& req, bool isIntVid); }; } // namespace storage diff --git a/src/storage/transaction/ResumeAddEdgeProcessor.cpp b/src/storage/transaction/ResumeAddEdgeProcessor.cpp deleted file mode 100644 index 3ca1bfb18c5..00000000000 --- a/src/storage/transaction/ResumeAddEdgeProcessor.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeAddEdgeProcessor.h" - -namespace nebula { -namespace storage { - -ResumeAddEdgeProcessor::ResumeAddEdgeProcessor(StorageEnv* env, const std::string& val) - : ChainAddEdgesLocalProcessor(env) { - req_ = ConsistUtil::parseAddRequest(val); - - uuid_ = ConsistUtil::strUUID(); - readableEdgeDesc_ = makeReadableEdge(req_); - VLOG(1) << uuid_ << " resume prime " << readableEdgeDesc_; - ChainAddEdgesLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeAddEdgeProcessor::prepareLocal() { - if (code_ != Code::SUCCEEDED) { - return code_; - } - auto spaceId = req_.get_space_id(); - auto numOfPart = env_->metaClient_->partsNum(spaceId); - if (!numOfPart.ok()) { - return Code::E_SPACE_NOT_FOUND; - } - auto& parts = req_.get_parts(); - auto& srcId = parts.begin()->second.back().get_key().get_src().getStr(); - auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); - localPartId_ = env_->metaClient_->partId(numOfPart.value(), srcId); - remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); - - return code_; -} - -folly::SemiFuture ResumeAddEdgeProcessor::processRemote(Code code) { - VLOG(1) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); - return ChainAddEdgesLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeAddEdgeProcessor::processLocal(Code code) { - VLOG(1) << uuid_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); - - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::E_RPC_FAILURE) { - kvAppend_ = ChainAddEdgesLocalProcessor::makeDoublePrime(); - } - - if (code == Code::E_RPC_FAILURE || code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - erasePrime(); - code_ = forwardToDelegateProcessor().get(); - return code_; - } - - return code; -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp b/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp deleted file mode 100644 index 21259f74afa..00000000000 --- a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeAddEdgeRemoteProcessor.h" - -namespace nebula { -namespace storage { - -ResumeAddEdgeRemoteProcessor::ResumeAddEdgeRemoteProcessor(StorageEnv* env, const std::string& val) - : ChainAddEdgesLocalProcessor(env) { - req_ = ConsistUtil::parseAddRequest(val); - ChainAddEdgesLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { - return code_; - } - - auto spaceId = req_.get_space_id(); - auto numOfPart = env_->metaClient_->partsNum(spaceId); - if (!numOfPart.ok()) { - return Code::E_SPACE_NOT_FOUND; - } - auto& parts = req_.get_parts(); - auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); - remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); - - return Code::SUCCEEDED; -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::processRemote(Code code) { - return ChainAddEdgesLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::processLocal(Code code) { - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::E_OUTDATED_TERM) { - // E_OUTDATED_TERM indicate this host is no longer the leader of curr part - // any following kv operation will fail - // due to not allowed to write from follower - return code; - } - - if (code == Code::E_RPC_FAILURE) { - // nothing to do, as we are already an rpc failure - } - - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - ChainAddEdgesLocalProcessor::eraseDoublePrime(); - code_ = forwardToDelegateProcessor().get(); - return code_; - } - - return code; -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp b/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp deleted file mode 100644 index 5bfa6ed2a65..00000000000 --- a/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeUpdateRemoteProcessor.h" - -#include - -namespace nebula { -namespace storage { - -ResumeUpdateRemoteProcessor::ResumeUpdateRemoteProcessor(StorageEnv* env, const std::string& val) - : ChainUpdateEdgeLocalProcessor(env) { - req_ = ConsistUtil::parseUpdateRequest(val); - ChainUpdateEdgeLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - return code_; -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::processRemote(Code code) { - return ChainUpdateEdgeLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::processLocal(Code code) { - setErrorCode(code); - - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - auto key = ConsistUtil::doublePrime(spaceVidLen_, localPartId_, req_.get_edge_key()); - kvErased_.emplace_back(std::move(key)); - forwardToDelegateProcessor(); - return code; - } else { - // we can't decide if the double prime should be deleted. - // so do nothing - } - - return code; -} - -void ResumeUpdateRemoteProcessor::finish() { - if (FLAGS_trace_toss) { - VLOG(1) << "commitLocal()=" << apache::thrift::util::enumNameSafe(code_); - } - finished_.setValue(code_); - onFinished(); -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/TransactionManager.cpp b/src/storage/transaction/TransactionManager.cpp index 2c91b20b8de..c14fbe0c0a2 100644 --- a/src/storage/transaction/TransactionManager.cpp +++ b/src/storage/transaction/TransactionManager.cpp @@ -12,47 +12,106 @@ #include "kvstore/NebulaStore.h" #include "storage/CommonUtils.h" #include "storage/StorageFlags.h" -#include "storage/transaction/ChainResumeProcessor.h" +#include "storage/transaction/ChainProcessorFactory.h" namespace nebula { namespace storage { DEFINE_int32(resume_interval_secs, 10, "Resume interval"); - -ProcessorCounters kForwardTranxCounters; +DEFINE_int32(toss_worker_num, 16, "Resume interval"); TransactionManager::TransactionManager(StorageEnv* env) : env_(env) { LOG(INFO) << "TransactionManager ctor()"; - exec_ = std::make_shared(10); - iClient_ = env_->interClient_; - resumeThread_ = std::make_unique(); + exec_ = std::make_shared(FLAGS_toss_worker_num); +} + +bool TransactionManager::start() { std::vector> existParts; auto fn = std::bind(&TransactionManager::onNewPartAdded, this, std::placeholders::_1); static_cast<::nebula::kvstore::NebulaStore*>(env_->kvstore_) ->registerOnNewPartAdded("TransactionManager", fn, existParts); - for (auto& partOfSpace : existParts) { - scanPrimes(partOfSpace.first, partOfSpace.second); + for (auto&& [spaceId, partId] : existParts) { + auto [termId, rc] = getTermFromKVStore(spaceId, partId); + if (rc != Code::SUCCEEDED) { + continue; + } + scanPrimes(spaceId, partId, termId); } + return true; +} + +void TransactionManager::monitorPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg) { + monPoolStats_.emplace_back(std::make_pair(pool, msg)); } -TransactionManager::LockCore* TransactionManager::getLockCore(GraphSpaceID spaceId, - GraphSpaceID partId, - bool checkWhiteList) { +void TransactionManager::bgPrintPoolStat() { + while (!stop_) { + for (auto&& [pool, msg] : monPoolStats_) { + VLOG(1) << dumpPoolStat(pool, msg); + } + std::this_thread::sleep_for(std::chrono::seconds(20)); + } +} + +std::string TransactionManager::dumpPoolStat(folly::ThreadPoolExecutor* exec, + const std::string& msg) { + auto stats = exec->getPoolStats(); + std::stringstream oss; + oss << "\npoolStats: " << msg << "\n\t threadCount = " << stats.threadCount + << "\n\t idleThreadCount = " << stats.idleThreadCount + << "\n\t activeThreadCount = " << stats.activeThreadCount + << "\n\t pendingTaskCount = " << stats.pendingTaskCount + << "\n\t totalTaskCount = " << stats.totalTaskCount << "\n"; + return oss.str(); +} + +void TransactionManager::stop() { + LOG(INFO) << "TransactionManager stop()"; + stop_ = true; +} + +void TransactionManager::join() { + LOG(INFO) << "TransactionManager join()"; + exec_->stop(); +} + +void TransactionManager::addChainTask(ChainBaseProcessor* proc) { + if (stop_) { + return; + } + folly::via(exec_.get()) + .thenValue([=](auto&&) { return proc->prepareLocal(); }) + .thenValue([=](auto&& code) { return proc->processRemote(code); }) + .thenValue([=](auto&& code) { return proc->processLocal(code); }) + .ensure([=]() { proc->finish(); }); +} + +TransactionManager::SPtrLock TransactionManager::getLockCore(GraphSpaceID spaceId, + GraphSpaceID partId, + TermID termId, + bool checkWhiteList) { if (checkWhiteList) { - if (scannedParts_.find(std::make_pair(spaceId, partId)) == scannedParts_.end()) { + auto currTermKey = std::make_pair(spaceId, partId); + auto it = currTerm_.find(currTermKey); + if (it == currTerm_.end()) { + return nullptr; + } + if (it->second != termId) { return nullptr; } } - auto it = memLocks_.find(spaceId); + MemLockKey key = std::make_tuple(spaceId, partId, termId); + auto it = memLocks_.find(key); if (it != memLocks_.end()) { - return it->second.get(); + return it->second; } - auto item = memLocks_.insert(spaceId, std::make_unique()); - return item.first->second.get(); + auto item = memLocks_.insert(key, std::make_shared()); + return item.first->second; } -std::pair TransactionManager::getTerm(GraphSpaceID spaceId, PartitionID partId) { +std::pair TransactionManager::getTermFromKVStore(GraphSpaceID spaceId, + PartitionID partId) { TermID termId = -1; auto rc = Code::SUCCEEDED; auto part = env_->kvstore_->part(spaceId, partId); @@ -67,13 +126,13 @@ std::pair TransactionManager::getTerm(GraphSpaceID spaceId, Partit bool TransactionManager::checkTermFromCache(GraphSpaceID spaceId, PartitionID partId, TermID termId) { - auto termOfMeta = env_->metaClient_->getTermFromCache(spaceId, partId); - if (termOfMeta.ok()) { - if (termId < termOfMeta.value()) { + auto termFromMeta = env_->metaClient_->getTermFromCache(spaceId, partId); + if (termFromMeta.ok()) { + if (termId < termFromMeta.value()) { LOG(WARNING) << "checkTerm() failed: " << "spaceId=" << spaceId << ", partId=" << partId << ", in-coming term=" << termId - << ", term in meta cache=" << termOfMeta.value(); + << ", term in meta cache=" << termFromMeta.value(); return false; } } @@ -89,75 +148,33 @@ bool TransactionManager::checkTermFromCache(GraphSpaceID spaceId, return true; } -void TransactionManager::resumeThread() { - SCOPE_EXIT { - resumeThread_->addDelayTask( - FLAGS_resume_interval_secs * 1000, &TransactionManager::resumeThread, this); - }; - ChainResumeProcessor proc(env_); - proc.process(); -} - -bool TransactionManager::start() { - if (!resumeThread_->start()) { - LOG(ERROR) << "resume thread start failed"; - return false; - } - resumeThread_->addDelayTask( - FLAGS_resume_interval_secs * 1000, &TransactionManager::resumeThread, this); - return true; -} - -void TransactionManager::stop() { - exec_->stop(); - resumeThread_->stop(); - resumeThread_->wait(); -} - -std::string TransactionManager::makeLockKey(GraphSpaceID spaceId, const std::string& edge) { - std::string lockKey; - lockKey.append(reinterpret_cast(&spaceId), sizeof(GraphSpaceID)).append(edge); - return lockKey; -} - -std::string TransactionManager::getEdgeKey(const std::string& lockKey) { - std::string edgeKey(lockKey.c_str() + sizeof(GraphSpaceID)); - return edgeKey; -} - -void TransactionManager::addPrime(GraphSpaceID spaceId, const std::string& edge, ResumeType type) { - VLOG(1) << "addPrime() space=" << spaceId << ", hex=" << folly::hexlify(edge) +void TransactionManager::addPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& egKey, + ResumeType type) { + VLOG(2) << "addPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey) << ", ResumeType=" << static_cast(type); - auto key = makeLockKey(spaceId, edge); - dangleEdges_.insert(std::make_pair(key, type)); -} - -void TransactionManager::delPrime(GraphSpaceID spaceId, const std::string& edge) { - VLOG(1) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(edge) << ", readable " - << ConsistUtil::readableKey(8, edge); - auto key = makeLockKey(spaceId, edge); - dangleEdges_.erase(key); - - auto partId = NebulaKeyUtils::getPart(edge); - auto* lk = getLockCore(spaceId, partId, false); - lk->unlock(edge); -} - -void TransactionManager::scanAll() { - LOG(INFO) << "scanAll()"; - std::unordered_map> leaders; - if (env_->kvstore_->allLeader(leaders) == 0) { - LOG(INFO) << "no leader found, skip any resume process"; + auto* proc = ChainProcessorFactory::make(env_, spaceId, termId, egKey, type); + if (proc == nullptr) { + VLOG(1) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey); + auto lk = getLockCore(spaceId, partId, termId, false); + if (lk) { + lk->unlock(egKey); + } return; } - for (auto& leader : leaders) { - auto spaceId = leader.first; - for (auto& partInfo : leader.second) { - auto partId = partInfo.get_part_id(); - scanPrimes(spaceId, partId); + auto fut = proc->getFinished(); + std::move(fut).thenValue([=](auto&& code) { + if (code == Code::SUCCEEDED) { + VLOG(2) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey); + auto lk = getLockCore(spaceId, partId, termId, false); + if (lk) { + lk->unlock(egKey); + } } - } - LOG(INFO) << "finish scanAll()"; + }); + addChainTask(proc); } void TransactionManager::onNewPartAdded(std::shared_ptr& part) { @@ -175,75 +192,73 @@ void TransactionManager::onLeaderLostWrapper(const ::nebula::kvstore::Part::Call opt.spaceId, opt.partId, opt.term); - scannedParts_.erase(std::make_pair(opt.spaceId, opt.partId)); - dangleEdges_.clear(); + auto currTermKey = std::make_pair(opt.spaceId, opt.partId); + auto currTermIter = currTerm_.find(currTermKey); + if (currTermIter == currTerm_.end()) { + return; + } + auto memLockKey = std::make_tuple(opt.spaceId, opt.partId, currTermIter->second); + memLocks_.erase(memLockKey); } void TransactionManager::onLeaderElectedWrapper( const ::nebula::kvstore::Part::CallbackOptions& opt) { LOG(INFO) << folly::sformat( "leader get do scanPrimes space={}, part={}, term={}", opt.spaceId, opt.partId, opt.term); - scanPrimes(opt.spaceId, opt.partId); + scanPrimes(opt.spaceId, opt.partId, opt.term); } -void TransactionManager::scanPrimes(GraphSpaceID spaceId, PartitionID partId) { - LOG(INFO) << folly::sformat("{}(), spaceId={}, partId={}", __func__, spaceId, partId); +void TransactionManager::scanPrimes(GraphSpaceID spaceId, PartitionID partId, TermID termId) { + LOG(INFO) << folly::sformat( + "{}(), space={}, part={}, term={}", __func__, spaceId, partId, termId); std::unique_ptr iter; auto prefix = ConsistUtil::primePrefix(partId); auto rc = env_->kvstore_->prefix(spaceId, partId, prefix, &iter); if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { for (; iter->valid(); iter->next()) { - auto edgeKey = ConsistUtil::edgeKeyFromPrime(iter->key()); - VLOG(1) << "scanned edgekey: " << folly::hexlify(edgeKey) - << ", readable: " << ConsistUtil::readableKey(8, edgeKey.str()); - auto lockKey = makeLockKey(spaceId, edgeKey.str()); - auto insSucceed = dangleEdges_.insert(std::make_pair(lockKey, ResumeType::RESUME_CHAIN)); - if (!insSucceed.second) { - LOG(ERROR) << "not supposed to insert fail: " << folly::hexlify(edgeKey); - } - auto* lk = getLockCore(spaceId, partId, false); - auto succeed = lk->try_lock(edgeKey.str()); + auto edgeKey = ConsistUtil::edgeKeyFromPrime(iter->key()).str(); + VLOG(1) << "scanned prime edge: " << folly::hexlify(edgeKey); + auto lk = getLockCore(spaceId, partId, termId, false); + auto succeed = lk->try_lock(edgeKey); if (!succeed) { - LOG(ERROR) << "not supposed to lock fail: " << folly::hexlify(edgeKey); + LOG(ERROR) << "not supposed to lock fail: " + << ", spaceId " << spaceId << ", partId " << partId << ", termId " << termId + << folly::hexlify(edgeKey); } + addPrime(spaceId, partId, termId, edgeKey, ResumeType::RESUME_CHAIN); } } else { VLOG(1) << "primePrefix() " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - return; - } } prefix = ConsistUtil::doublePrimePrefix(partId); rc = env_->kvstore_->prefix(spaceId, partId, prefix, &iter); if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { for (; iter->valid(); iter->next()) { - auto edgeKey = ConsistUtil::edgeKeyFromDoublePrime(iter->key()); - auto lockKey = makeLockKey(spaceId, edgeKey.str()); - auto insSucceed = dangleEdges_.insert(std::make_pair(lockKey, ResumeType::RESUME_REMOTE)); - if (!insSucceed.second) { - LOG(ERROR) << "not supposed to insert fail: " << folly::hexlify(edgeKey); - } - auto* lk = getLockCore(spaceId, partId, false); - auto succeed = lk->try_lock(edgeKey.str()); + auto edgeKey = ConsistUtil::edgeKeyFromDoublePrime(iter->key()).str(); + VLOG(1) << "scanned double prime edge: " << folly::hexlify(edgeKey); + auto lk = getLockCore(spaceId, partId, termId, false); + auto succeed = lk->try_lock(edgeKey); if (!succeed) { - LOG(ERROR) << "not supposed to lock fail: " << folly::hexlify(edgeKey); + LOG(ERROR) << "not supposed to lock fail: " + << ", space " << spaceId << ", partId " << partId << ", termId " << termId + << folly::hexlify(edgeKey); } + addPrime(spaceId, partId, termId, edgeKey, ResumeType::RESUME_REMOTE); } } else { VLOG(1) << "doublePrimePrefix() " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - return; - } } - auto partOfSpace = std::make_pair(spaceId, partId); - auto insRet = scannedParts_.insert(std::make_pair(partOfSpace, 0)); - LOG(INFO) << "insert space=" << spaceId << ", part=" << partId - << ", into white list suc=" << std::boolalpha << insRet.second; + + auto currTermKey = std::make_pair(spaceId, partId); + currTerm_.insert_or_assign(currTermKey, termId); + + LOG(INFO) << "set curr term spaceId = " << spaceId << ", partId = " << partId + << ", termId = " << termId; } -folly::ConcurrentHashMap* TransactionManager::getDangleEdges() { - return &dangleEdges_; +folly::EventBase* TransactionManager::getEventBase() { + return exec_->getEventBase(); } } // namespace storage diff --git a/src/storage/transaction/TransactionManager.h b/src/storage/transaction/TransactionManager.h index acfc2517506..f94156b92e6 100644 --- a/src/storage/transaction/TransactionManager.h +++ b/src/storage/transaction/TransactionManager.h @@ -26,100 +26,125 @@ class TransactionManager { public: FRIEND_TEST(ChainUpdateEdgeTest, updateTest1); friend class FakeInternalStorageClient; + friend class TransactionManagerTester; using LockGuard = MemoryLockGuard; using LockCore = MemoryLockCore; using UPtrLock = std::unique_ptr; + using SPtrLock = std::shared_ptr; public: explicit TransactionManager(storage::StorageEnv* env); ~TransactionManager() { stop(); - } - - void addChainTask(ChainBaseProcessor* proc) { - folly::async([=] { - proc->prepareLocal() - .via(exec_.get()) - .thenValue([=](auto&& code) { return proc->processRemote(code); }) - .thenValue([=](auto&& code) { return proc->processLocal(code); }) - .ensure([=]() { proc->finish(); }); - }); - } - - folly::Executor* getExecutor() { - return exec_.get(); + join(); } bool start(); void stop(); - LockCore* getLockCore(GraphSpaceID spaceId, PartitionID partId, bool checkWhiteList = true); + /** + * @brief wait until stop + */ + void join(); - InternalStorageClient* getInternalClient() { - return iClient_; - } + /** + * @brief add a new processor to do "chain" work, + * using the internal executor of transaction manager. + * + * @param proc + */ + void addChainTask(ChainBaseProcessor* proc); + + /** + * @brief Get the Lock Core object to set a memory lock for a key. + * + * @param spaceId + * @param partId + * @param termId + * @param checkWhiteList caller outside TransactionManager have to set this true. + * @return nullptr if failed. + */ + SPtrLock getLockCore(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + bool checkWhiteList = true); // get term of part from kvstore, may fail if this part is not exist - std::pair getTerm(GraphSpaceID spaceId, PartitionID partId); + std::pair getTermFromKVStore(GraphSpaceID spaceId, + PartitionID partId); // check get term from local term cache // this is used by Chain...RemoteProcessor, // to avoid an old leader request overrider a newer leader's bool checkTermFromCache(GraphSpaceID spaceId, PartitionID partId, TermID termId); - void reportFailed(); - // leave a record for (double)prime edge, to let resume processor there is one dangling edge - void addPrime(GraphSpaceID spaceId, const std::string& edgeKey, ResumeType type); + void addPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& edgeKey, + ResumeType type); + + // delete a prime record when recover succeeded. + void delPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& edgeKey); - void delPrime(GraphSpaceID spaceId, const std::string& edgeKey); - - bool checkUnfinishedEdge(GraphSpaceID spaceId, const folly::StringPiece& key); + /** + * @brief need to do a scan to let all prime(double prime) set a memory lock, + * before a partition start to serve. + * otherwise, if a new request comes, it will overwrite the existing lock. + * @param spaceId + * @param partId + */ + void scanPrimes(GraphSpaceID spaceId, PartitionID partId, TermID termId); - folly::ConcurrentHashMap* getDangleEdges(); + /** + * @brief Get the an Event Base object from its internal executor + * + * @return folly::EventBase* + */ + folly::EventBase* getEventBase(); - void scanPrimes(GraphSpaceID spaceId, PartitionID partId); + /** + * @brief stat thread, used for debug + */ + void monitorPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg); + void bgPrintPoolStat(); + std::string dumpPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg); - void scanAll(); + bool stop_{false}; + std::vector> monPoolStats_; protected: - void resumeThread(); - - std::string makeLockKey(GraphSpaceID spaceId, const std::string& edge); - - std::string getEdgeKey(const std::string& lockKey); - // this is a callback register to NebulaStore on new part added. void onNewPartAdded(std::shared_ptr& part); // this is a callback register to Part::onElected void onLeaderElectedWrapper(const ::nebula::kvstore::Part::CallbackOptions& options); + // this is a callback register to Part::onLostLeadership void onLeaderLostWrapper(const ::nebula::kvstore::Part::CallbackOptions& options); protected: - using PartUUID = std::pair; + using SpacePart = std::pair; StorageEnv* env_{nullptr}; std::shared_ptr exec_; - InternalStorageClient* iClient_; - folly::ConcurrentHashMap memLocks_; - folly::ConcurrentHashMap cachedTerms_; - std::unique_ptr resumeThread_; - /** - * edges need to recover will put into this, - * resume processor will get edge from this then do resume. - * */ - folly::ConcurrentHashMap dangleEdges_; + folly::ConcurrentHashMap cachedTerms_; + + using MemLockKey = std::tuple; + folly::ConcurrentHashMap memLocks_; /** * @brief every raft part need to do a scan, * only scanned part allowed to insert edges */ - folly::ConcurrentHashMap, int> scannedParts_; + folly::ConcurrentHashMap, TermID> currTerm_; }; } // namespace storage From 195960d605f1f8fb05f08922d70b996eeaaac2fb Mon Sep 17 00:00:00 2001 From: yaphet <4414314+darionyaphet@users.noreply.github.com> Date: Wed, 26 Jan 2022 20:02:02 +0800 Subject: [PATCH 2/5] disable add hosts into zone (#3825) Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/graph/validator/MaintainValidator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph/validator/MaintainValidator.cpp b/src/graph/validator/MaintainValidator.cpp index 80706a19c28..75b06660b16 100644 --- a/src/graph/validator/MaintainValidator.cpp +++ b/src/graph/validator/MaintainValidator.cpp @@ -570,7 +570,7 @@ Status ShowZonesValidator::toPlan() { } Status AddHostsIntoZoneValidator::validateImpl() { - return Status::OK(); + return Status::SemanticError("Add hosts into zone is unsupported"); } Status AddHostsIntoZoneValidator::toPlan() { From 6203fa332bbe0c5587b2256917e441ba521ffe8a Mon Sep 17 00:00:00 2001 From: yaphet <4414314+darionyaphet@users.noreply.github.com> Date: Wed, 26 Jan 2022 20:36:27 +0800 Subject: [PATCH 3/5] Check active zone before create space (#3822) Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/meta/ActiveHostsMan.cpp | 5 +- .../processors/parts/CreateSpaceProcessor.cpp | 12 +++- src/meta/test/ProcessorTest.cpp | 65 +++++++++++++++---- 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/meta/ActiveHostsMan.cpp b/src/meta/ActiveHostsMan.cpp index 4ab0eaa581f..240dfeb161a 100644 --- a/src/meta/ActiveHostsMan.cpp +++ b/src/meta/ActiveHostsMan.cpp @@ -61,10 +61,7 @@ nebula::cpp2::ErrorCode ActiveHostsMan::updateHostInfo(kvstore::KVStore* kv, } } // indicate whether any leader info is updated - bool hasUpdate = false; - if (!data.empty()) { - hasUpdate = true; - } + bool hasUpdate = !data.empty(); data.emplace_back(MetaKeyUtils::hostKey(hostAddr.host, hostAddr.port), HostInfo::encodeV2(info)); folly::SharedMutex::WriteHolder wHolder(LockUtils::spaceLock()); diff --git a/src/meta/processors/parts/CreateSpaceProcessor.cpp b/src/meta/processors/parts/CreateSpaceProcessor.cpp index a6d2db6b57d..75c985b0a91 100644 --- a/src/meta/processors/parts/CreateSpaceProcessor.cpp +++ b/src/meta/processors/parts/CreateSpaceProcessor.cpp @@ -175,6 +175,7 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { return; } + int32_t activeZoneSize = 0; std::unordered_map zoneHosts; for (auto& zone : zones) { auto zoneKey = MetaKeyUtils::zoneKey(zone); @@ -194,14 +195,14 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { auto key = MetaKeyUtils::hostKey(host.host, host.port); auto ret = doGet(key); if (!nebula::ok(ret)) { - code = nebula::error(ret); LOG(ERROR) << "Get host " << host << " failed."; - break; + continue; } HostInfo info = HostInfo::decode(nebula::value(ret)); if (now - info.lastHBTimeInMilliSec_ < FLAGS_heartbeat_interval_secs * FLAGS_expired_time_factor * 1000) { + activeZoneSize += 1; auto hostIter = hostLoading_.find(host); if (hostIter == hostLoading_.end()) { hostLoading_[host] = 0; @@ -218,6 +219,13 @@ void CreateSpaceProcessor::process(const cpp2::CreateSpaceReq& req) { zoneHosts[zone] = std::move(hosts); } + if (replicaFactor > activeZoneSize) { + LOG(ERROR) << "Replication number should less than or equal to active zone number."; + handleErrorCode(nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH); + onFinished(); + return; + } + if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << "Create space failed"; handleErrorCode(code); diff --git a/src/meta/test/ProcessorTest.cpp b/src/meta/test/ProcessorTest.cpp index 61d09ecdec6..cd8b26d099d 100644 --- a/src/meta/test/ProcessorTest.cpp +++ b/src/meta/test/ProcessorTest.cpp @@ -319,8 +319,33 @@ TEST(ProcessorTest, SpaceTest) { ASSERT_EQ(nebula::cpp2::ErrorCode::SUCCEEDED, resp.get_code()); } { - std::vector hosts = {{"0", 0}, {"1", 1}, {"2", 2}, {"3", 3}}; - TestUtils::registerHB(kv.get(), hosts); + // Attempt to register heartbeat + const ClusterID kClusterId = 10; + for (auto i = 0; i < 4; i++) { + cpp2::HBReq req; + req.role_ref() = cpp2::HostRole::STORAGE; + req.host_ref() = HostAddr(std::to_string(i), i); + req.cluster_id_ref() = kClusterId; + auto* processor = HBProcessor::instance(kv.get(), nullptr, kClusterId); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::SUCCEEDED, resp.get_code()); + } + } + { + cpp2::ListZonesReq req; + auto* processor = ListZonesProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::SUCCEEDED, resp.get_code()); + auto zones = resp.get_zones(); + ASSERT_EQ(4, zones.size()); + ASSERT_EQ("default_zone_0_0", zones[0].get_zone_name()); + ASSERT_EQ("default_zone_1_1", zones[1].get_zone_name()); + ASSERT_EQ("default_zone_2_2", zones[2].get_zone_name()); + ASSERT_EQ("default_zone_3_3", zones[3].get_zone_name()); } int32_t hostsNum = 4; { @@ -473,6 +498,31 @@ TEST(ProcessorTest, SpaceTest) { auto dresp = std::move(df).get(); ASSERT_EQ(nebula::cpp2::ErrorCode::SUCCEEDED, dresp.get_code()); } + { + cpp2::AddHostsReq req; + std::vector hosts = {{"4", 4}}; + req.hosts_ref() = std::move(hosts); + auto* processor = AddHostsProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::SUCCEEDED, resp.get_code()); + } + { + cpp2::SpaceDesc properties; + properties.space_name_ref() = "default_space"; + properties.partition_num_ref() = 8; + properties.replica_factor_ref() = 5; + properties.charset_name_ref() = "utf8"; + properties.collate_name_ref() = "utf8_bin"; + cpp2::CreateSpaceReq req; + req.properties_ref() = std::move(properties); + auto* processor = CreateSpaceProcessor::instance(kv.get()); + auto f = processor->getFuture(); + processor->process(req); + auto resp = std::move(f).get(); + ASSERT_EQ(nebula::cpp2::ErrorCode::E_ZONE_NOT_ENOUGH, resp.get_code()); + } } TEST(ProcessorTest, CreateTagTest) { @@ -2569,7 +2619,6 @@ TEST(ProcessorTest, HostsTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -2610,7 +2659,6 @@ TEST(ProcessorTest, HostsTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -2749,7 +2797,6 @@ TEST(ProcessorTest, HostsTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -2770,7 +2817,6 @@ TEST(ProcessorTest, AddHostsIntoNewZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -2824,7 +2870,6 @@ TEST(ProcessorTest, AddHostsIntoNewZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -2871,7 +2916,6 @@ TEST(ProcessorTest, AddHostsIntoZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -3028,7 +3072,6 @@ TEST(ProcessorTest, AddHostsIntoZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -3049,7 +3092,6 @@ TEST(ProcessorTest, DropHostsTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -3080,7 +3122,6 @@ TEST(ProcessorTest, DropHostsTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -3446,7 +3487,6 @@ TEST(ProcessorTest, RenameZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8987; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; @@ -3568,7 +3608,6 @@ TEST(ProcessorTest, MergeZoneTest) { const ClusterID kClusterId = 10; for (auto i = 8986; i < 8990; i++) { cpp2::HBReq req; - req.role_ref() = cpp2::HostRole::STORAGE; req.host_ref() = HostAddr("127.0.0.1", i); req.cluster_id_ref() = kClusterId; req.role_ref() = cpp2::HostRole::STORAGE; From 67fc5549104cb487d2336410e479049c3d628410 Mon Sep 17 00:00:00 2001 From: "hs.zhang" <22708345+cangfengzhs@users.noreply.github.com> Date: Wed, 26 Jan 2022 21:24:10 +0800 Subject: [PATCH 4/5] abandon upgrader for old version (#3818) Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/tools/db-upgrade/DbUpgraderTool.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/tools/db-upgrade/DbUpgraderTool.cpp b/src/tools/db-upgrade/DbUpgraderTool.cpp index f36ba943546..b4e05cd143b 100644 --- a/src/tools/db-upgrade/DbUpgraderTool.cpp +++ b/src/tools/db-upgrade/DbUpgraderTool.cpp @@ -10,39 +10,37 @@ void printHelp() { fprintf( stderr, - R"( ./db_upgrade --src_db_path= --dst_db_path= --upgrade_meta_server= --upgrade_version=<1|2> + R"( ./db_upgrade --src_db_path= --dst_db_path= --upgrade_meta_server= --upgrade_version=2:3 desc: - This tool is used to upgrade data from nebula 1.x or the previous versions of nebula 2.0 RC - to nebula 2.0 GA version. + This tool is used to upgrade data from nebula 2.0GA to 3.0 required: --src_db_path= - Source data path(data_path in storage 1.x conf) to the rocksdb data directory. + Source data path to the rocksdb data directory. This is an absolute path, multi paths should be split by comma. - If nebula 1.x was installed in /usr/local/nebula, + If old nebula was installed in /usr/local/nebula, the db_path would be /usr/local/nebula/data/storage Default: "" --dst_db_path= - Destination data path(data_path in storage 2.0 conf) to the rocksdb data directory. + Destination data path to the rocksdb data directory. This is an absolute path, multi paths should be split by comma. - If nebula 2.0 was installed in /usr/local/nebulav2, - the db_path would be /usr/local/nebulav2/data/storage + If new nebula was installed in /usr/local/nebula_new, + the db_path would be /usr/local/nebulav_new/data/storage Default: "" note: The number of paths in src_db_path is equal to the number of paths in dst_db_path, and src_db_path and dst_db_path must be different. + For 2.0GA to 3.0, dst_db_path is useless. --upgrade_meta_server= A list of meta severs' ip:port separated by comma. Default: 127.0.0.1:45500 - --upgrade_version=<1:2|2RC:2|2:3> - This tool can only upgrade 1.x data, 2.0 RC, or 2.0 GA data. - 1:2 upgrade the data from 1.x to 2.0GA - 2RC:2 upgrade the data from 2.0RC to 2.0GA + --upgrade_version=<2:3> + This tool can only upgrade 2.0GA. 2:3 upgrade the data from 2.0GA to 3.0 Default: "" @@ -165,7 +163,7 @@ int main(int argc, char* argv[]) { CHECK_NOTNULL(schemaMan); CHECK_NOTNULL(indexMan); - std::vector versions = {"1:2", "2RC:2", "2:3"}; + std::vector versions = {"2:3"}; if (std::find(versions.begin(), versions.end(), FLAGS_upgrade_version) == versions.end()) { LOG(ERROR) << "Flag upgrade_version : " << FLAGS_upgrade_version; return EXIT_FAILURE; From 1e75ef85b32fc85c6348f757601c293c0c29190b Mon Sep 17 00:00:00 2001 From: "hs.zhang" <22708345+cangfengzhs@users.noreply.github.com> Date: Wed, 26 Jan 2022 21:55:39 +0800 Subject: [PATCH 5/5] put dataVersionKey on create space for storage (#3817) * put dataVersionKey on create space for storage * address some comments Co-authored-by: Doodle <13706157+critical27@users.noreply.github.com> Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/common/utils/NebulaKeyUtils.cpp | 4 ++++ src/common/utils/NebulaKeyUtils.h | 2 ++ src/kvstore/RocksEngine.cpp | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/src/common/utils/NebulaKeyUtils.cpp b/src/common/utils/NebulaKeyUtils.cpp index 0a91dd7b622..8c81230ec48 100644 --- a/src/common/utils/NebulaKeyUtils.cpp +++ b/src/common/utils/NebulaKeyUtils.cpp @@ -265,4 +265,8 @@ std::string NebulaKeyUtils::dataVersionKey() { return "\xFF\xFF\xFF\xFF"; } +std::string NebulaKeyUtils::dataVersionValue() { + return "3.0"; +} + } // namespace nebula diff --git a/src/common/utils/NebulaKeyUtils.h b/src/common/utils/NebulaKeyUtils.h index 5b62dde095f..ce04c56dd1c 100644 --- a/src/common/utils/NebulaKeyUtils.h +++ b/src/common/utils/NebulaKeyUtils.h @@ -274,6 +274,8 @@ class NebulaKeyUtils final { static std::string dataVersionKey(); + static std::string dataVersionValue(); + static_assert(sizeof(NebulaKeyType) == sizeof(PartitionID)); private: diff --git a/src/kvstore/RocksEngine.cpp b/src/kvstore/RocksEngine.cpp index 6a5ef04baf6..6cd542c00a7 100644 --- a/src/kvstore/RocksEngine.cpp +++ b/src/kvstore/RocksEngine.cpp @@ -10,6 +10,7 @@ #include "common/base/Base.h" #include "common/fs/FileUtils.h" +#include "common/utils/MetaKeyUtils.h" #include "common/utils/NebulaKeyUtils.h" #include "kvstore/KVStore.h" @@ -124,6 +125,17 @@ RocksEngine::RocksEngine(GraphSpaceID spaceId, status = rocksdb::DB::Open(options, path, &db); } CHECK(status.ok()) << status.ToString(); + if (!readonly && spaceId_ != kDefaultSpaceId /* only for storage*/) { + rocksdb::ReadOptions readOptions; + std::string dataVersionValue = ""; + status = db->Get(readOptions, NebulaKeyUtils::dataVersionKey(), &dataVersionValue); + if (status.IsNotFound()) { + rocksdb::WriteOptions writeOptions; + status = db->Put( + writeOptions, NebulaKeyUtils::dataVersionKey(), NebulaKeyUtils::dataVersionValue()); + } + CHECK(status.ok()) << status.ToString(); + } db_.reset(db); extractorLen_ = sizeof(PartitionID) + vIdLen; partsNum_ = allParts().size();