From 039044e4caf3af32e2f55d49fc699128c8906f0f Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 15 Feb 2022 09:35:19 +0800 Subject: [PATCH 1/9] hack for test Signed-off-by: Zhigao Tong --- release-centos7-llvm/scripts/build-tiflash-ci.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/release-centos7-llvm/scripts/build-tiflash-ci.sh b/release-centos7-llvm/scripts/build-tiflash-ci.sh index 8f15f61772e..acdea9c8c7d 100755 --- a/release-centos7-llvm/scripts/build-tiflash-ci.sh +++ b/release-centos7-llvm/scripts/build-tiflash-ci.sh @@ -102,19 +102,16 @@ mkdir -p ${SRCPATH}/contrib/tiflash-proxy/target/release cd ${SRCPATH}/contrib/tiflash-proxy proxy_git_hash=$(git log -1 --format="%H") +PROXY_CI_CACHE_PATH="tiflash/ci-cache/tmp/pr-build" curl -o "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" \ - http://fileserver.pingcap.net/download/builds/pingcap/tiflash-proxy/${proxy_git_hash}-llvm/libtiflash_proxy.so + http://fileserver.pingcap.net/download/builds/pingcap/${PROXY_CI_CACHE_PATH}/${proxy_git_hash}/libtiflash_proxy.so proxy_size=$(ls -l "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" | awk '{print $5}') min_size=$((102400)) BUILD_TIFLASH_PROXY=false if [[ ${proxy_size} -lt ${min_size} ]]; then - BUILD_TIFLASH_PROXY=true - CMAKE_PREBUILT_LIBS_ROOT_ARG="" - echo "need to build libtiflash_proxy.so" - export PATH=$PATH:$HOME/.cargo/bin - rm -f target/release/libtiflash_proxy.so + exit -1 else CMAKE_PREBUILT_LIBS_ROOT_ARG=-DPREBUILT_LIBS_ROOT="${SRCPATH}/contrib/tiflash-proxy" chmod 0731 "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" From 99707dc0d238be4c245fa725d29a5c88dd395750 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 15 Feb 2022 09:35:34 +0800 Subject: [PATCH 2/9] add interfaces to support async read index task Signed-off-by: Zhigao Tong --- contrib/tiflash-proxy | 2 +- dbms/src/Debug/DBGInvoker.cpp | 3 + dbms/src/Debug/MockRaftStoreProxy.cpp | 270 +++++ dbms/src/Debug/MockRaftStoreProxy.h | 117 ++ dbms/src/Debug/ReadIndexStressTest.cpp | 195 ++++ dbms/src/Debug/ReadIndexStressTest.h | 51 + dbms/src/Server/Server.cpp | 10 +- .../Storages/Transaction/ApplySnapshot.cpp | 6 +- dbms/src/Storages/Transaction/KVStore.cpp | 15 +- dbms/src/Storages/Transaction/KVStore.h | 29 + dbms/src/Storages/Transaction/LearnerRead.cpp | 6 +- dbms/src/Storages/Transaction/ProxyFFI.cpp | 53 +- dbms/src/Storages/Transaction/ProxyFFI.h | 46 +- .../Storages/Transaction/ReadIndexWorker.cpp | 1037 +++++++++++++++++ .../Storages/Transaction/ReadIndexWorker.h | 347 ++++++ dbms/src/Storages/Transaction/Region.cpp | 10 +- dbms/src/Storages/Transaction/Region.h | 2 +- dbms/src/Storages/Transaction/RegionMeta.cpp | 13 - dbms/src/Storages/Transaction/RegionMeta.h | 2 - dbms/src/Storages/Transaction/TMTContext.cpp | 12 +- dbms/src/Storages/Transaction/TMTContext.h | 2 + dbms/src/Storages/Transaction/Utils.h | 3 +- .../Transaction/tests/gtest_kvstore.cpp | 194 ++- .../tests/gtest_read_index_worker.cpp | 642 ++++++++++ etc/config-template.toml | 5 + metrics/grafana/tiflash_proxy_details.json | 6 +- 26 files changed, 3034 insertions(+), 44 deletions(-) create mode 100644 dbms/src/Debug/MockRaftStoreProxy.cpp create mode 100644 dbms/src/Debug/MockRaftStoreProxy.h create mode 100644 dbms/src/Debug/ReadIndexStressTest.cpp create mode 100644 dbms/src/Debug/ReadIndexStressTest.h create mode 100644 dbms/src/Storages/Transaction/ReadIndexWorker.cpp create mode 100644 dbms/src/Storages/Transaction/ReadIndexWorker.h create mode 100644 dbms/src/Storages/Transaction/tests/gtest_read_index_worker.cpp diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 30c2ffff1d5..7a0ba5e2c85 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 30c2ffff1d5beadfc9bc96f82c040691b8804546 +Subproject commit 7a0ba5e2c85e9d89ab7f639d9e3b96246192657d diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 08eec58bccd..77e1d3709a8 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,8 @@ DBGInvoker::DBGInvoker() regSchemalessFunc("search_log_for_key", dbgFuncSearchLogForKey); regSchemalessFunc("tidb_dag", dbgFuncTiDBQueryFromNaturalDag); + + regSchemalessFunc("read_index_stress_test", ReadIndexStressTest::dbgFuncStressTest); } void replaceSubstr(std::string & str, const std::string & target, const std::string & replacement) diff --git a/dbms/src/Debug/MockRaftStoreProxy.cpp b/dbms/src/Debug/MockRaftStoreProxy.cpp new file mode 100644 index 00000000000..d93fc2caea1 --- /dev/null +++ b/dbms/src/Debug/MockRaftStoreProxy.cpp @@ -0,0 +1,270 @@ +#include +#include + +namespace DB +{ +kvrpcpb::ReadIndexRequest make_read_index_reqs(uint64_t region_id, uint64_t start_ts) +{ + kvrpcpb::ReadIndexRequest req; + req.set_start_ts(start_ts); + req.mutable_context()->set_region_id(region_id); + return req; +} + +MockRaftStoreProxy & as_ref(RaftStoreProxyPtr ptr) +{ + return *reinterpret_cast(reinterpret_cast(ptr.inner)); +} + +extern void mock_set_rust_gc_helper(void (*)(RawVoidPtr, RawRustPtrType)); + +RawRustPtr fn_make_read_index_task(RaftStoreProxyPtr ptr, BaseBuffView view) +{ + auto & x = as_ref(ptr); + kvrpcpb::ReadIndexRequest req; + req.ParseFromArray(view.data, view.len); + auto * task = x.makeReadIndexTask(req); + if (task) + GCMonitor::instance().add(RawObjType::MockReadIndexTask, 1); + return RawRustPtr{task, static_cast(RawObjType::MockReadIndexTask)}; +} + +RawRustPtr fn_make_async_waker(void (*wake_fn)(RawVoidPtr), + RawCppPtr data) +{ + auto * p = new MockAsyncWaker{std::make_shared()}; + p->data->data = data; + p->data->wake_fn = wake_fn; + GCMonitor::instance().add(RawObjType::MockAsyncWaker, 1); + return RawRustPtr{p, static_cast(RawObjType::MockAsyncWaker)}; +} + +uint8_t fn_poll_read_index_task(RaftStoreProxyPtr, RawVoidPtr task, RawVoidPtr resp, RawVoidPtr waker) +{ + auto & read_index_task = *reinterpret_cast(task); + auto * async_waker = reinterpret_cast(waker); + auto res = read_index_task.data->poll(async_waker ? async_waker->data : nullptr); + if (res) + { + auto * s = reinterpret_cast(resp); + *s = *res; + return 1; + } + else + { + return 0; + } +} + +void fn_gc_rust_ptr(RawVoidPtr ptr, RawRustPtrType type_) +{ + if (!ptr) + return; + auto type = static_cast(type_); + GCMonitor::instance().add(type, -1); + switch (type) + { + case RawObjType::None: + break; + case RawObjType::MockReadIndexTask: + delete reinterpret_cast(ptr); + break; + case RawObjType::MockAsyncWaker: + delete reinterpret_cast(ptr); + break; + } +} + +void fn_handle_batch_read_index(RaftStoreProxyPtr, CppStrVecView, RawVoidPtr, uint64_t) +{ + throw Exception("`fn_handle_batch_read_index` is deprecated"); +} + +TiFlashRaftProxyHelper MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr proxy_ptr) +{ + TiFlashRaftProxyHelper res{}; + res.proxy_ptr = proxy_ptr; + res.fn_make_read_index_task = fn_make_read_index_task; + res.fn_poll_read_index_task = fn_poll_read_index_task; + res.fn_make_async_waker = fn_make_async_waker; + res.fn_handle_batch_read_index = fn_handle_batch_read_index; + { + // make sure such function pointer will be set at most once. + static std::once_flag flag; + std::call_once(flag, []() { MockSetFFI::MockSetRustGcHelper(fn_gc_rust_ptr); }); + } + + return res; +} + +raft_serverpb::RegionLocalState MockProxyRegion::getState() +{ + auto _ = genLockGuard(); + return state; +} + +raft_serverpb::RaftApplyState MockProxyRegion::getApply() +{ + auto _ = genLockGuard(); + return apply; +} + +void MockProxyRegion::updateCommitIndex(uint64_t index) +{ + auto _ = genLockGuard(); + this->apply.set_commit_index(index); +} + +MockProxyRegion::MockProxyRegion() +{ + apply.set_commit_index(5); +} + +std::optional RawMockReadIndexTask::poll(std::shared_ptr waker) +{ + auto _ = genLockGuard(); + + if (!finished) + { + if (waker != this->waker) + { + this->waker = waker; + } + return {}; + } + if (has_lock) + { + resp.mutable_locked(); + return resp; + } + if (has_region_error) + { + resp.mutable_region_error()->mutable_data_is_not_ready(); + return resp; + } + resp.set_read_index(region->getApply().commit_index()); + return resp; +} + +void RawMockReadIndexTask::update(bool lock, bool region_error) +{ + { + auto _ = genLockGuard(); + if (finished) + return; + finished = true; + has_lock = lock; + has_region_error = region_error; + } + if (waker) + waker->wake(); +} + +MockProxyRegionPtr MockRaftStoreProxy::getRegion(uint64_t id) +{ + auto _ = genLockGuard(); + return doGetRegion(id); +} + +MockProxyRegionPtr MockRaftStoreProxy::doGetRegion(uint64_t id) +{ + if (auto it = regions.find(id); it != regions.end()) + { + return it->second; + } + return nullptr; +} + +MockReadIndexTask * MockRaftStoreProxy::makeReadIndexTask(kvrpcpb::ReadIndexRequest req) +{ + auto _ = genLockGuard(); + + wake(); + + auto region = doGetRegion(req.context().region_id()); + if (region) + { + auto * r = new MockReadIndexTask{}; + r->data = std::make_shared(); + r->data->req = std::move(req); + r->data->region = region; + tasks.push_back(r->data); + return r; + } + return nullptr; +} + +void MockRaftStoreProxy::init(size_t region_num) +{ + auto _ = genLockGuard(); + for (size_t i = 0; i < region_num; ++i) + { + regions.emplace(i, std::make_shared()); + } +} + +size_t MockRaftStoreProxy::size() const +{ + auto _ = genLockGuard(); + return regions.size(); +} + +void MockRaftStoreProxy::wake() +{ + notifier.wake(); +} + +void MockRaftStoreProxy::testRunNormal(const std::atomic_bool & over) +{ + while (!over) + { + runOneRound(); + notifier.blockedWaitFor(std::chrono::seconds(1)); + } +} + +void MockRaftStoreProxy::runOneRound() +{ + auto _ = genLockGuard(); + while (!tasks.empty()) + { + auto & t = *tasks.front(); + if (region_id_to_drop.find(t.req.context().region_id()) != region_id_to_drop.end()) + { + } + else + t.update(); + tasks.pop_front(); + } +} + +void MockRaftStoreProxy::unsafeInvokeForTest(std::function && cb) +{ + auto _ = genLockGuard(); + cb(*this); +} + +void GCMonitor::add(RawObjType type, int64_t diff) +{ + auto _ = genLockGuard(); + data[type] += diff; +} + +bool GCMonitor::checkClean() +{ + auto _ = genLockGuard(); + for (auto && d : data) + { + if (d.second) + return false; + } + return true; +} + +bool GCMonitor::empty() +{ + auto _ = genLockGuard(); + return data.empty(); +} + +} // namespace DB diff --git a/dbms/src/Debug/MockRaftStoreProxy.h b/dbms/src/Debug/MockRaftStoreProxy.h new file mode 100644 index 00000000000..766fb9a2fe2 --- /dev/null +++ b/dbms/src/Debug/MockRaftStoreProxy.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ +kvrpcpb::ReadIndexRequest make_read_index_reqs(uint64_t region_id, uint64_t start_ts); + +struct MockProxyRegion : MutexLockWrap +{ + raft_serverpb::RegionLocalState getState(); + + raft_serverpb::RaftApplyState getApply(); + + void updateCommitIndex(uint64_t index); + + MockProxyRegion(); + + raft_serverpb::RegionLocalState state; + raft_serverpb::RaftApplyState apply; +}; + +using MockProxyRegionPtr = std::shared_ptr; + +struct MockAsyncNotifier +{ + RawCppPtr data; // notifier + void (*wake_fn)(RawVoidPtr); + void wake() const + { + wake_fn(data.ptr); + } + ~MockAsyncNotifier() + { + GcRawCppPtr(data.ptr, data.type); + } +}; + +struct MockAsyncWaker +{ + std::shared_ptr data; +}; + +struct RawMockReadIndexTask : MutexLockWrap +{ + std::optional poll(std::shared_ptr waker); + + void update(bool lock = false, bool region_error = false); + + kvrpcpb::ReadIndexRequest req; + kvrpcpb::ReadIndexResponse resp; + + MockProxyRegionPtr region; + std::shared_ptr waker; + bool has_lock{false}; + bool has_region_error{false}; + bool finished{false}; +}; + +struct MockReadIndexTask +{ + std::shared_ptr data; +}; + +struct MockRaftStoreProxy : MutexLockWrap +{ + MockProxyRegionPtr getRegion(uint64_t id); + + MockProxyRegionPtr doGetRegion(uint64_t id); + + MockReadIndexTask * makeReadIndexTask(kvrpcpb::ReadIndexRequest req); + + void init(size_t region_num); + + size_t size() const; + + void wake(); + + void testRunNormal(const std::atomic_bool & over); + + void runOneRound(); + + void unsafeInvokeForTest(std::function && cb); + + static TiFlashRaftProxyHelper SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr); + + std::unordered_set region_id_to_drop; + std::map regions; + std::list> tasks; + AsyncWaker::Notifier notifier; +}; + +enum class RawObjType : uint32_t +{ + None, + MockReadIndexTask, + MockAsyncWaker, +}; + +struct GCMonitor : MutexLockWrap + , public ext::Singleton +{ + void add(RawObjType type, int64_t diff); + + bool checkClean(); + + bool empty(); + + std::unordered_map data; + + static GCMonitor global_gc_monitor; +}; + +} // namespace DB diff --git a/dbms/src/Debug/ReadIndexStressTest.cpp b/dbms/src/Debug/ReadIndexStressTest.cpp new file mode 100644 index 00000000000..bb58afe722b --- /dev/null +++ b/dbms/src/Debug/ReadIndexStressTest.cpp @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +const std::map ReadIndexStressTest::TestTypeName = { + {ReadIndexStressTest::TestType::V1, "batch-read-index-v1"}, + {ReadIndexStressTest::TestType::Async, "async-read-index"}}; + +static const std::map TestName2Type = { + {"batch-read-index-v1", ReadIndexStressTest::TestType::V1}, + {"async-read-index", ReadIndexStressTest::TestType::Async}}; + +ReadIndexStressTest::ReadIndexStressTest( + const TMTContext & tmt_) + : tmt(tmt_) +{ + MockStressTestCfg::enable = true; + LOG_FMT_WARNING(logger, "enable MockStressTest"); +} + +ReadIndexStressTest::~ReadIndexStressTest() +{ + MockStressTestCfg::enable = false; + LOG_FMT_WARNING(logger, "disable MockStressTest"); +} + +void ReadIndexStressTest::dbgFuncStressTest(Context & context, const ASTs & args, DBGInvoker::Printer printer) +{ + if (args.size() < 4) + throw Exception("Args not matched, should be: min_region_cnt, loop_cnt, type(batch-read-index-v1, async-read-index), concurrency", ErrorCodes::BAD_ARGUMENTS); + size_t min_region_cnt = safeGet(typeid_cast(*args[0]).value); + size_t loop_cnt = safeGet(typeid_cast(*args[1]).value); + TestType ver = TestName2Type.at(typeid_cast(*args[2]).name); + size_t concurrency = safeGet(typeid_cast(*args[3]).value); + { + if (min_region_cnt < 1 || loop_cnt < 1 || concurrency < 1) + throw Exception("Invalid args: `min_region_cnt < 1 or loop_cnt < 1 or concurrency < 1` ", ErrorCodes::LOGICAL_ERROR); + } + ReadIndexStressTest cxt{context.getTMTContext()}; + size_t req_cnt{}; + std::vector tests_time_cost; + { + std::vector> tmp_tests_time_cost; + cxt.runConcurrency(min_region_cnt, loop_cnt, ver, concurrency, req_cnt, tmp_tests_time_cost); + + for (const auto & a : tmp_tests_time_cost) + { + for (const auto & b : a) + { + tests_time_cost.emplace_back(b); + } + } + std::sort(tests_time_cost.begin(), tests_time_cost.end()); + } + size_t cnt = tests_time_cost.size(); + TimeCost min_tc = TimeCost ::max(), max_tc = TimeCost::min(), avg{}, median = tests_time_cost[cnt / 2]; + { + for (const auto & b : tests_time_cost) + { + avg += b; + min_tc = std::min(min_tc, b); + max_tc = std::max(max_tc, b); + } + if (cnt % 2 == 0) + { + median = tests_time_cost[cnt / 2]; + median += tests_time_cost[(cnt / 2) - 1]; + median /= 2; + } + avg /= cnt; + } + std::string addition_info = max_tc >= std::chrono::seconds{10} ? "Error: meet timeout" : ""; + printer(fmt::format( + "region count each round {}, loop count {}, concurrency {}, type `{}`, time cost min {}, max {}, avg {}, median {}, {}", + req_cnt, + loop_cnt, + concurrency, + TestTypeName.at(ver), + min_tc, + max_tc, + avg, + median, + addition_info)); +} + +void ReadIndexStressTest::runConcurrency( + size_t min_region_cnt, + size_t loop_cnt, + TestType ver, + size_t concurrency, + size_t & req_cnt, + std::vector> & tests_time_cost) +{ + std::list threads; + tests_time_cost.resize(concurrency); + for (size_t cur_id = 0; cur_id < concurrency; ++cur_id) + { + threads.emplace_back(std::thread{[&, cur_id]() { + std::string name = fmt::format("RdIdxStress-{}", cur_id); + setThreadName(name.data()); + const auto & kvstore = *tmt.getKVStore(); + std::vector reqs; + reqs.reserve(min_region_cnt); + if (kvstore.regionSize() == 0) + { + throw Exception("no exist region", ErrorCodes::LOGICAL_ERROR); + } + { + size_t times = min_region_cnt / kvstore.regionSize(); + kvstore.traverseRegions([&](RegionID, const RegionPtr & region) { + for (size_t i = 0; i < times; ++i) + reqs.emplace_back(GenRegionReadIndexReq(*region, 1)); + }); + kvstore.traverseRegions([&](RegionID, const RegionPtr & region) { + if (reqs.size() < min_region_cnt) + reqs.emplace_back(GenRegionReadIndexReq(*region, 1)); + }); + } + req_cnt = reqs.size(); + for (size_t j = 0; j < loop_cnt; ++j) + { + auto ts = tmt.getPDClient()->getTS(); + for (auto & r : reqs) + r.set_start_ts(ts); + auto time_cost = run(reqs, ver); + tests_time_cost[cur_id].emplace_back(time_cost); + } + }}); + } + for (auto & t : threads) + t.join(); +} + +ReadIndexStressTest::TimeCost ReadIndexStressTest::run( + std::vector reqs, + TestType ver) +{ + const auto & kvstore = *tmt.getKVStore(); + size_t timeout_ms = 10 * 1000; + const auto wrap_time_cost = [&](std::function && f) { + auto start_time = Clock::now(); + f(); + auto end_time = Clock ::now(); + auto time_cost = std::chrono::duration_cast(end_time - start_time); + LOG_FMT_INFO(logger, "time cost {}", time_cost); + return time_cost; + }; + switch (ver) + { + case TestType::V1: + return wrap_time_cost( + [&]() { + LOG_FMT_INFO(logger, "begin to run `{}`: req size {}, ", TestTypeName.at(ver), reqs.size()); + kvstore.getProxyHelper()->batchReadIndex_v1(reqs, timeout_ms); + }); + case TestType::Async: + return wrap_time_cost( + [&]() { + if (!kvstore.read_index_worker_manager) + { + LOG_FMT_ERROR(logger, "no read_index_worker_manager"); + return; + } + LOG_FMT_INFO(logger, "begin to run `{}`: req size {}", TestTypeName.at(ver), reqs.size()); + for (size_t i = 0; i < reqs.size(); ++i) + { + auto & req = reqs[i]; + req.mutable_context()->set_region_id(req.context().region_id() + MockStressTestCfg::RegionIdPrefix * (i + 1)); + } + LOG_FMT_INFO(logger, "add prefix {} to each region id", MockStressTestCfg::RegionIdPrefix); + auto resps = kvstore.batchReadIndex(reqs, timeout_ms); + for (const auto & resp : resps) + { + if (resp.first.read_index() == 0) + throw Exception("meet region error", ErrorCodes::LOGICAL_ERROR); + } + }); + default: + throw Exception("unknown type", ErrorCodes::LOGICAL_ERROR); + } +} + +} // namespace DB diff --git a/dbms/src/Debug/ReadIndexStressTest.h b/dbms/src/Debug/ReadIndexStressTest.h new file mode 100644 index 00000000000..452cecad05c --- /dev/null +++ b/dbms/src/Debug/ReadIndexStressTest.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include + +namespace kvrpcpb +{ +class ReadIndexRequest; +} + +namespace DB +{ +class TMTContext; +class Context; + +class ReadIndexStressTest +{ +public: + enum class TestType + { + V1, + Async, + }; + using TimeCost = std::chrono::milliseconds; + + static const std::map TestTypeName; + + // min_region_cnt, loop_cnt, type(batch-read-index-v1, async-read-index), concurrency + static void dbgFuncStressTest(Context & context, const ASTs & args, DBGInvoker::Printer); + + TimeCost run( + std::vector reqs, + TestType ver); + + void runConcurrency(size_t min_region_cnt, + size_t loop_cnt, + TestType ver, + size_t concurrency, + size_t & req_cnt, + std::vector> & tests_time_cost); + + explicit ReadIndexStressTest(const TMTContext &); + ~ReadIndexStressTest(); + +private: + const TMTContext & tmt; + Poco::Logger * logger{&Poco::Logger::get("ReadIndexStressTest")}; +}; + + +} // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index c493bd361f7..77dae0c4b11 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1306,7 +1306,14 @@ int Server::main(const std::vector & /*args*/) // proxy update store-id before status set `RaftProxyStatus::Running` assert(tiflash_instance_wrap.proxy_helper->getProxyStatus() == RaftProxyStatus::Running); LOG_FMT_INFO(log, "store {}, tiflash proxy is ready to serve, try to wake up all regions' leader", tmt_context.getKVStore()->getStoreID(std::memory_order_seq_cst)); - + size_t runner_cnt = config().getUInt("flash.read_index_runner_count", 1); // if set 0, DO NOT enable read-index worker + tmt_context.getKVStore()->initReadIndexWorkers( + [&]() { + // get from tmt context + return std::chrono::milliseconds(tmt_context.readIndexWorkerTick()); + }, + /*running thread count*/ runner_cnt); + tmt_context.getKVStore()->asyncRunReadIndexWorkers(); WaitCheckRegionReady(tmt_context, terminate_signals_counter); } SCOPE_EXIT({ @@ -1323,6 +1330,7 @@ int Server::main(const std::vector & /*args*/) std::this_thread::sleep_for(std::chrono::milliseconds(200)); } tmt_context.setStatusTerminated(); + tmt_context.getKVStore()->stopReadIndexWorkers(); LOG_FMT_INFO(log, "Set store context status Terminated"); { // update status and let proxy stop all services except encryption. diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 019244d437a..bf66e6864f6 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -50,10 +50,10 @@ void KVStore::checkAndApplySnapshot(const RegionPtrWrap & new_region, TMTContext old_applied_index = old_region->appliedIndex(); if (auto new_index = new_region->appliedIndex(); old_applied_index > new_index) { - auto s = fmt::format("{}: [region {}] already has newer apply-index {}, should not happen", - __PRETTY_FUNCTION__, + auto s = fmt::format("[region {}] already has newer apply-index {} than {}, should not happen", region_id, - old_applied_index); + old_applied_index, + new_index); throw Exception(s, ErrorCodes::LOGICAL_ERROR); } else if (old_applied_index == new_index) diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 5c2a7df85ee..fcf7d8e62a3 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,13 @@ void KVStore::removeRegion(RegionID region_id, bool remove_data, RegionTable & r manage_lock.index.remove(it->second->makeRaftCommandDelegate(task_lock).getRange().comparableKeys(), region_id); // remove index manage_lock.regions.erase(it); } + { + if (read_index_worker_manager) //std::atomic_thread_fence will protect it + { + // remove cache & read-index task + read_index_worker_manager->getWorkerByRegion(region_id).removeRegion(region_id); + } + } region_persister.drop(region_id, region_lock); LOG_FMT_INFO(log, "Persisted [region {}] deleted", region_id); @@ -600,7 +608,7 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter it = remain_regions.erase(it); } } - auto read_index_res = tmt.getKVStore()->getProxyHelper()->batchReadIndex(batch_read_index_req, tmt.batchReadIndexTimeout()); + auto read_index_res = tmt.getKVStore()->batchReadIndex(batch_read_index_req, tmt.batchReadIndexTimeout()); for (auto && [resp, region_id] : read_index_res) { bool need_retry = resp.read_index() == 0; @@ -728,4 +736,9 @@ void KVStore::StoreMeta::update(Base && base_) store_id = base.id(); } +KVStore::~KVStore() +{ + releaseReadIndexWorkers(); +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index 1af8dbaf9fc..9ccc7630533 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -54,6 +54,7 @@ enum class EngineStoreApplyRes : uint32_t; struct TiFlashRaftProxyHelper; struct RegionPreDecodeBlockData; using RegionPreDecodeBlockDataPtr = std::unique_ptr; +class ReadIndexWorkerManager; using BatchReadIndexRes = std::vector>; class ReadIndexStressTest; @@ -125,6 +126,28 @@ class KVStore final : private boost::noncopyable // May return 0 if uninitialized uint64_t getStoreID(std::memory_order = std::memory_order_relaxed) const; + BatchReadIndexRes batchReadIndex(const std::vector & req, uint64_t timeout_ms) const; + + /// Initialize read-index worker context. It only can be invoked once. + /// `worker_coefficient` means `worker_coefficient * runner_cnt` workers will be created. + /// `runner_cnt` means number of runner which controls behavior of worker. + void initReadIndexWorkers( + std::function && fn_min_dur_handle_region, + size_t runner_cnt, + size_t worker_coefficient = 64); + + /// Create `runner_cnt` threads to run ReadIndexWorker asynchronously and automatically. + /// If there is other runtime framework, DO NOT invoke it. + void asyncRunReadIndexWorkers(); + + /// Stop workers after there is no more read-index task. + void stopReadIndexWorkers(); + + /// TODO: if supported by runtime framework, run one round for specific runner by `id`. + void runOneRoundOfReadIndexRunner(size_t runner_id); + + ~KVStore(); + private: friend class MockTiDB; friend struct MockTiDBTable; @@ -136,6 +159,7 @@ class KVStore final : private boost::noncopyable friend void dbgFuncRemoveRegion(Context &, const ASTs &, DBGInvokerPrinter); friend void dbgFuncPutRegion(Context &, const ASTs &, DBGInvokerPrinter); friend class tests::RegionKVStoreTest; + friend class ReadIndexStressTest; struct StoreMeta { using Base = metapb::Store; @@ -187,6 +211,7 @@ class KVStore final : private boost::noncopyable void persistRegion(const Region & region, const RegionTaskLock & region_task_lock, const char * caller); /// Get and callback all regions whose range overlapped with start/end key. void handleRegionsByRangeOverlap(const RegionRange & range, std::function && callback) const; + void releaseReadIndexWorkers(); private: RegionManager region_manager; @@ -213,6 +238,10 @@ class KVStore final : private boost::noncopyable const TiFlashRaftProxyHelper * proxy_helper{nullptr}; + // It should be initialized after `proxy_helper` is set. + // It should be visited from outside after status of proxy is `Running` + ReadIndexWorkerManager * read_index_worker_manager{nullptr}; + std::atomic_int64_t read_index_event_flag{0}; StoreMeta store; diff --git a/dbms/src/Storages/Transaction/LearnerRead.cpp b/dbms/src/Storages/Transaction/LearnerRead.cpp index 9b826002e65..b227c83eb66 100644 --- a/dbms/src/Storages/Transaction/LearnerRead.cpp +++ b/dbms/src/Storages/Transaction/LearnerRead.cpp @@ -240,9 +240,9 @@ LearnerReadSnapshot doLearnerRead( /// Blocking learner read. Note that learner read must be performed ahead of data read, /// otherwise the desired index will be blocked by the lock of data read. - if (const auto * proxy_helper = kvstore->getProxyHelper(); proxy_helper) + if (kvstore->getProxyHelper()) { - auto res = proxy_helper->batchReadIndex(batch_read_index_req, tmt.batchReadIndexTimeout()); + auto res = kvstore->batchReadIndex(batch_read_index_req, tmt.batchReadIndexTimeout()); for (auto && [resp, region_id] : res) { batch_read_index_result.emplace(region_id, std::move(resp)); @@ -320,7 +320,7 @@ LearnerReadSnapshot doLearnerRead( { // Wait index timeout is disabled; or timeout is enabled but not happen yet, wait index for // a specify Region. - auto [wait_res, time_cost] = region->waitIndex(index_to_wait, tmt); + auto [wait_res, time_cost] = region->waitIndex(index_to_wait, tmt.waitIndexTimeout(), [&tmt]() { return tmt.checkRunning(); }); if (wait_res != WaitIndexResult::Finished) { handle_wait_timeout_region(region_to_query.region_id); diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index 7af153b7eeb..580e31e4207 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -104,8 +105,31 @@ EngineStoreApplyRes HandleAdminRaftCmd( static_assert(sizeof(RaftStoreProxyFFIHelper) == sizeof(TiFlashRaftProxyHelper)); static_assert(alignof(RaftStoreProxyFFIHelper) == alignof(TiFlashRaftProxyHelper)); +struct RustGcHelper : public ext::Singleton +{ + void gcRustPtr(RawVoidPtr ptr, RawRustPtrType type) const + { + fn_gc_rust_ptr(ptr, type); + } + + RustGcHelper() = default; + + void setRustPtrGcFn(void (*fn_gc_rust_ptr)(RawVoidPtr, RawRustPtrType)) + { + this->fn_gc_rust_ptr = fn_gc_rust_ptr; + } + +private: + void (*fn_gc_rust_ptr)(RawVoidPtr, RawRustPtrType); +}; + void AtomicUpdateProxy(DB::EngineStoreServerWrap * server, RaftStoreProxyFFIHelper * proxy) { + // any usage towards proxy helper must happen after this function. + { + // init global rust gc function pointer here. + RustGcHelper::instance().setRustPtrGcFn(proxy->fn_gc_rust_ptr); + } server->proxy_helper = static_cast(proxy); std::atomic_thread_fence(std::memory_order::memory_order_seq_cst); } @@ -221,7 +245,7 @@ void CppStrVec::updateView() } } -BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex(const std::vector & req, uint64_t timeout_ms) const +BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex_v1(const std::vector & req, uint64_t timeout_ms) const { std::vector req_strs; req_strs.reserve(req.size()); @@ -237,6 +261,24 @@ BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex(const std::vector(ptr); break; + case RawCppPtrTypeImpl::WakerNotifier: + delete reinterpret_cast(ptr); + break; default: LOG_FMT_ERROR(&Poco::Logger::get(__FUNCTION__), "unknown type {}", type); exit(-1); @@ -436,4 +481,10 @@ void SetStore(EngineStoreServerWrap * server, BaseBuffView buff) server->tmt->getKVStore()->setStore(std::move(store)); } +void MockSetFFI::MockSetRustGcHelper(void (*fn_gc_rust_ptr)(RawVoidPtr, RawRustPtrType)) +{ + LOG_FMT_WARNING(&Poco::Logger::get(__FUNCTION__), "Set mock rust ptr gc function"); + RustGcHelper::instance().setRustPtrGcFn(fn_gc_rust_ptr); +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index 0ef48b443cb..4100633e12b 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -7,6 +7,7 @@ #include #include +#include #include namespace kvrpcpb @@ -38,10 +39,44 @@ enum class RawCppPtrTypeImpl : RawCppPtrType String, PreHandledSnapshotWithBlock, PreHandledSnapshotWithFiles, + WakerNotifier, }; RawCppPtr GenRawCppPtr(RawVoidPtr ptr_ = nullptr, RawCppPtrTypeImpl type_ = RawCppPtrTypeImpl::None); +struct ReadIndexTask; +struct RawRustPtrWrap; + +struct RawRustPtrWrap : RawRustPtr +{ + RawRustPtrWrap(const RawRustPtrWrap &) = delete; + RawRustPtrWrap & operator=(const RawRustPtrWrap &) = delete; + + explicit RawRustPtrWrap(RawRustPtr inner); + ~RawRustPtrWrap(); + RawRustPtrWrap(RawRustPtrWrap &&); +}; + +struct ReadIndexTask : RawRustPtrWrap +{ + explicit ReadIndexTask(RawRustPtr inner_) + : RawRustPtrWrap(inner_) + {} +}; + +struct TimerTask : RawRustPtrWrap +{ + explicit TimerTask(RawRustPtr inner_) + : RawRustPtrWrap(inner_) + {} +}; + +class MockSetFFI +{ + friend struct MockRaftStoreProxy; + static void MockSetRustGcHelper(void (*)(RawVoidPtr, RawRustPtrType)); +}; + struct TiFlashRaftProxyHelper : RaftStoreProxyFFIHelper { RaftProxyStatus getProxyStatus() const; @@ -51,8 +86,15 @@ struct TiFlashRaftProxyHelper : RaftStoreProxyFFIHelper FileEncryptionInfo newFile(const std::string &) const; FileEncryptionInfo deleteFile(const std::string &) const; FileEncryptionInfo linkFile(const std::string &, const std::string &) const; - kvrpcpb::ReadIndexResponse readIndex(const kvrpcpb::ReadIndexRequest &) const; + BatchReadIndexRes batchReadIndex_v1(const std::vector &, uint64_t) const; BatchReadIndexRes batchReadIndex(const std::vector &, uint64_t) const; + BatchReadIndexRes batchReadIndex_v2(const std::vector &, uint64_t) const; + // return null if meet error `Full` or `Disconnected` + std::optional makeReadIndexTask(const kvrpcpb::ReadIndexRequest & req) const; + bool pollReadIndexTask(ReadIndexTask & task, kvrpcpb::ReadIndexResponse & resp, RawVoidPtr waker = nullptr) const; + RawRustPtr makeAsyncWaker(void (*wake_fn)(RawVoidPtr), RawCppPtr data) const; + TimerTask makeTimerTask(uint64_t time_ms) const; + bool pollTimerTask(TimerTask & task, RawVoidPtr waker = nullptr) const; }; extern "C" { @@ -82,6 +124,7 @@ HttpRequestRes HandleHttpRequest(EngineStoreServerWrap *, BaseBuffView path, Bas uint8_t CheckHttpUriAvailable(BaseBuffView); void GcRawCppPtr(void * ptr, RawCppPtrType type); void InsertBatchReadIndexResp(RawVoidPtr, BaseBuffView, uint64_t); +void SetReadIndexResp(RawVoidPtr, BaseBuffView); void SetServerInfoResp(BaseBuffView, RawVoidPtr); BaseBuffView strIntoView(const std::string * str_ptr); CppStrWithView GetConfig(EngineStoreServerWrap *, uint8_t full); @@ -110,6 +153,7 @@ inline EngineStoreServerHelper GetEngineStoreServerHelper( .fn_check_http_uri_available = CheckHttpUriAvailable, .fn_gc_raw_cpp_ptr = GcRawCppPtr, .fn_insert_batch_read_index_resp = InsertBatchReadIndexResp, + .fn_set_read_index_resp = SetReadIndexResp, .fn_set_server_info_resp = SetServerInfoResp, .fn_get_config = GetConfig, .fn_set_store = SetStore, diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp new file mode 100644 index 00000000000..f11028ca786 --- /dev/null +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp @@ -0,0 +1,1037 @@ +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +//#define ADD_TEST_DEBUG_LOG_FMT + +#ifdef ADD_TEST_DEBUG_LOG_FMT + +static Poco::Logger * global_logger_for_test = nullptr; +static std::mutex global_logger_mutex; +#define STDOUT_TEST_LOG_FMT(...) \ + do \ + { \ + std::string formatted_message = LogFmtDetails::numArgs(__VA_ARGS__) > 1 \ + ? LogFmtDetails::toCheckedFmtStr( \ + FMT_STRING(LOG_GET_FIRST_ARG(__VA_ARGS__)), \ + __VA_ARGS__) \ + : LogFmtDetails::firstArg(__VA_ARGS__); \ + { \ + auto _ = std::lock_guard(global_logger_mutex); \ + std::cout << fmt::format( \ + "[{}][{}:{}][{}]", \ + Clock::now(), \ + &__FILE__[LogFmtDetails::getFileNameOffset(__FILE__)], \ + __LINE__, \ + formatted_message) \ + << std::endl; \ + } \ + } while (false) + +//#define TEST_LOG_FMT(...) LOG_FMT_ERROR(global_logger_for_test, __VA_ARGS__) + +#define TEST_LOG_FMT(...) STDOUT_TEST_LOG_FMT(__VA_ARGS__) + +void F_TEST_LOG_FMT(const std::string & s) +{ + auto _ = std::lock_guard(global_logger_mutex); + std::cout << s << std::endl; +} +#else +#define TEST_LOG_FMT(...) +void F_TEST_LOG_FMT(const std::string &) +{} +#endif +} // namespace DB + +namespace DB +{ +AsyncNotifier::Status AsyncWaker::Notifier::blockedWaitFor(std::chrono::milliseconds timeout) +{ + // if flag from false to false, wait for notification. + // if flag from true to false, do nothing. + auto res = AsyncNotifier::Status::Normal; + if (!wait_flag.exchange(false, std::memory_order_acq_rel)) + { + { + auto lock = genUniqueLock(); + if (!wait_flag.load(std::memory_order_acquire)) + { + if (cv.wait_for(lock, timeout) == std::cv_status::timeout) + res = AsyncNotifier::Status::Timeout; + } + } + wait_flag.store(false, std::memory_order_release); + } + return res; +} + +void AsyncWaker::Notifier::wake() +{ + // if flag from false -> true, then wake up. + // if flag from true -> true, do nothing. + if (!wait_flag.exchange(true, std::memory_order_acq_rel)) + { + // wake up notifier + auto _ = genLockGuard(); + cv.notify_one(); + } +} + +void AsyncWaker::wake(RawVoidPtr notifier_) +{ + auto & notifier = *reinterpret_cast(notifier_); + notifier.wake(); +} + +AsyncWaker::AsyncWaker(const TiFlashRaftProxyHelper & helper_) + : AsyncWaker(helper_, new AsyncWaker::Notifier{}) +{ +} + +AsyncWaker::AsyncWaker(const TiFlashRaftProxyHelper & helper_, AsyncNotifier * notifier_) + : inner(helper_.makeAsyncWaker(AsyncWaker::wake, GenRawCppPtr(notifier_, RawCppPtrTypeImpl::WakerNotifier))) + , notifier(*notifier_) +{ +} + +AsyncNotifier::Status AsyncWaker::waitFor(std::chrono::milliseconds timeout) +{ + return notifier.blockedWaitFor(timeout); +} + +RawVoidPtr AsyncWaker::getRaw() const +{ + return inner.ptr; +} + +struct BlockedReadIndexHelperTrait +{ + explicit BlockedReadIndexHelperTrait(uint64_t timeout_ms_) + : timeout_ms(timeout_ms_) + {} + virtual AsyncNotifier::Status blockedWaitFor(std::chrono::milliseconds) = 0; + + // block current runtime and wait. + virtual AsyncNotifier::Status blockedWait() + { + auto time_cost_ms = check_watch.elapsedMilliseconds(); + + if (time_cost_ms >= timeout_ms) + { + return AsyncNotifier::Status::Timeout; + } + + auto remain = std::chrono::milliseconds(timeout_ms - time_cost_ms); + + // TODO: use async process if supported by framework + return blockedWaitFor(remain); + } + virtual ~BlockedReadIndexHelperTrait() = default; + +protected: + Stopwatch check_watch; + uint64_t timeout_ms; +}; + +struct BlockedReadIndexHelper : BlockedReadIndexHelperTrait +{ +public: + BlockedReadIndexHelper(uint64_t timeout_ms_, AsyncWaker & waker_) + : BlockedReadIndexHelperTrait(timeout_ms_) + , waker(waker_) + { + } + + const AsyncWaker & getWaker() const + { + return waker; + } + + AsyncNotifier::Status blockedWaitFor(std::chrono::milliseconds tm) override + { + return waker.waitFor(tm); + } + + virtual ~BlockedReadIndexHelper() = default; + +private: + AsyncWaker & waker; +}; + +struct BlockedReadIndexHelperV3 : BlockedReadIndexHelperTrait +{ + BlockedReadIndexHelperV3(uint64_t timeout_ms_, AsyncWaker::Notifier & notifier_) + : BlockedReadIndexHelperTrait(timeout_ms_) + , notifier(notifier_) + { + } + + AsyncNotifier::Status blockedWaitFor(std::chrono::milliseconds tm) override + { + return notifier.blockedWaitFor(tm); + } + + virtual ~BlockedReadIndexHelperV3() = default; + +private: + AsyncWaker::Notifier & notifier; +}; + +BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex(const std::vector & req, uint64_t timeout_ms) const +{ + return batchReadIndex_v2(req, timeout_ms); +} + +BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex_v2(const std::vector & req, uint64_t timeout_ms) const +{ + AsyncWaker waker(*this); + BlockedReadIndexHelper helper{timeout_ms, waker}; + + std::queue> tasks; + BatchReadIndexRes resps; + resps.reserve(req.size()); + + for (const auto & r : req) + { + if (auto task = makeReadIndexTask(r); !task) + { + kvrpcpb::ReadIndexResponse res; + res.mutable_region_error(); + resps.emplace_back(std::move(res), r.context().region_id()); + } + else + { + tasks.emplace(r.context().region_id(), std::move(*task)); + } + } + + { // block wait for all tasks are ready or timeout. + kvrpcpb::ReadIndexResponse tmp; + while (!tasks.empty()) + { + auto & it = tasks.front(); + if (pollReadIndexTask(it.second, tmp, helper.getWaker().getRaw())) + { + resps.emplace_back(std::move(tmp), it.first); + tmp.Clear(); + tasks.pop(); + } + else + { + if (helper.blockedWait() == AsyncNotifier::Status::Timeout) + break; + } + } + } + { // if meet timeout, which means part of regions can not get response from leader, try to poll rest tasks + while (!tasks.empty()) + { + auto & it = tasks.front(); + kvrpcpb::ReadIndexResponse tmp; + if (pollReadIndexTask(it.second, tmp)) + { + resps.emplace_back(std::move(tmp), it.first); + } + else + { + tmp.mutable_region_error()->mutable_region_not_found(); + resps.emplace_back(std::move(tmp), it.first); + } + tasks.pop(); + } + } + + return resps; +} + +RawRustPtr TiFlashRaftProxyHelper::makeAsyncWaker(void (*wake_fn)(RawVoidPtr), RawCppPtr data) const +{ + return fn_make_async_waker(wake_fn, data); +} + +std::optional TiFlashRaftProxyHelper::makeReadIndexTask(const kvrpcpb::ReadIndexRequest & req) const +{ + thread_local std::string buff_cache; + req.SerializePartialToString(&buff_cache); + auto req_view = strIntoView(&buff_cache); + if (RawRustPtr ptr = fn_make_read_index_task(proxy_ptr, req_view); ptr.ptr) + { + return ReadIndexTask{ptr}; + } + else + { + return {}; + } +} + +bool TiFlashRaftProxyHelper::pollReadIndexTask(ReadIndexTask & task, kvrpcpb::ReadIndexResponse & resp, RawVoidPtr waker) const +{ + return fn_poll_read_index_task(proxy_ptr, task.ptr, &resp, waker); +} + +TimerTask TiFlashRaftProxyHelper::makeTimerTask(uint64_t time_ms) const +{ + return TimerTask{fn_make_timer_task(time_ms)}; +} + +bool TiFlashRaftProxyHelper::pollTimerTask(TimerTask & task, RawVoidPtr waker) const +{ + return fn_poll_timer_task(task.ptr, waker); +} + +void SetReadIndexResp(RawVoidPtr resp, BaseBuffView view) +{ + auto * res = reinterpret_cast(resp); + res->ParseFromArray(view.data, view.len); +} + + +struct ReadIndexNotifyCtrl : MutexLockWrap +{ + using Data = std::deque>; + + bool empty() const + { + auto _ = genLockGuard(); + return data.empty(); + } + void add(RegionID id, Timestamp ts) + { + auto _ = genLockGuard(); + data.emplace_back(id, ts); + } + Data popAll() + { + auto _ = genLockGuard(); + return std::move(data); + } + + void wake() const + { + notifier->wake(); + } + + explicit ReadIndexNotifyCtrl(AsyncWaker::NotifierPtr notifier_) + : notifier(notifier_) + {} + + Data data; + AsyncWaker::NotifierPtr notifier; +}; + +struct RegionReadIndexNotifier : AsyncNotifier +{ + void wake() override + { + notify->add(region_id, ts); + notify->wake(); + } + + virtual ~RegionReadIndexNotifier() = default; + + RegionReadIndexNotifier( + RegionID region_id_, + Timestamp ts_, + const ReadIndexNotifyCtrlPtr & notify_) + : region_id(region_id_) + , ts(ts_) + , notify(notify_) + {} + + RegionID region_id; + Timestamp ts; + ReadIndexNotifyCtrlPtr notify; +}; + +std::atomic ReadIndexWorker::max_read_index_task_timeout = std::chrono::milliseconds{8 * 1000}; +//std::atomic ReadIndexWorker::max_read_index_history{8}; + +void ReadIndexFuture::update(kvrpcpb::ReadIndexResponse resp) +{ + auto _ = genLockGuard(); + if (finished) + return; + + TEST_LOG_FMT("set ReadIndexFuture resp for req {}, resp {}", req.ShortDebugString(), resp.ShortDebugString()); + + finished = true; + this->resp = std::move(resp); + + if (notifier) + { + TEST_LOG_FMT("wake notifier for region {}", req.context().region_id()); + notifier->wake(); + } +} + +std::optional ReadIndexFuture::poll(const std::shared_ptr & notifier_) const +{ + auto _ = genLockGuard(); + if (!finished) + { + if (notifier_ != notifier) + { + TEST_LOG_FMT("set notifier for region {}", req.context().region_id()); + notifier = notifier_; + } + return {}; + } + return resp; +} + +void ReadIndexDataNode::ReadIndexElement::doTriggerCallbacks() +{ + TEST_LOG_FMT("start to triggerCallbacks for region {} callbacks size {}", region_id, callbacks.size()); + + if (resp.has_locked()) + { + for (auto && cb : callbacks) + { + kvrpcpb::ReadIndexResponse res{}; + if (cb->req.start_ts() == start_ts) + res = resp; + else + res.mutable_region_error()->mutable_epoch_not_match(); + cb->update(std::move(res)); + } + } + else + { + for (auto && cb : callbacks) + { + cb->update(resp); + } + } + callbacks.clear(); +} + +void ReadIndexDataNode::ReadIndexElement::doForceSet(const kvrpcpb::ReadIndexResponse & f) +{ + TEST_LOG_FMT("force set response {}, start-ts {}", f.ShortDebugString(), start_ts); + + task_pair.reset(); + resp = f; + doTriggerCallbacks(); +} + +void ReadIndexDataNode::ReadIndexElement::doPoll(const TiFlashRaftProxyHelper & helper, std::chrono::milliseconds timeout) +{ + bool can_trigger = false; + { + if (task_pair) + { + auto && [task, waker] = *task_pair; + + auto * raw_waker = waker.getRaw(); + bool clean_task = false; + if (helper.pollReadIndexTask(task, resp, raw_waker)) + { + TEST_LOG_FMT("poll ReadIndexElement success for region {}, resp {}", region_id, resp.ShortDebugString()); + + clean_task = true; + } + else if (Clock::now() > timeout + start_time) + { + TEST_LOG_FMT("poll ReadIndexElement timeout for region {}", region_id); + + clean_task = true; + resp.mutable_region_error()->mutable_server_is_busy(); // set region_error `server_is_busy` for task timeout + } + else + { + TEST_LOG_FMT( + "poll ReadIndexElement failed for region {}, time cost {}, timeout {}, start time {}", + region_id, + Clock::now() - start_time, + timeout, + start_time); + } + + if (clean_task) + { + can_trigger = true; + task_pair.reset(); + } + } + else + can_trigger = true; + } + if (can_trigger) + { + doTriggerCallbacks(); + } +} + +void ReadIndexDataNode::doAddHistoryTasks(Timestamp ts, kvrpcpb::ReadIndexResponse && resp) +{ + for (auto it = running_tasks.begin(); it != running_tasks.end();) + { + if (it->first <= ts) + { + it->second.doForceSet(resp); // copy resp + it = running_tasks.erase(it); + } + else + { + break; + } + } + { + history_success_tasks.emplace(ts, std::move(resp)); // move resp + } +} + +void ReadIndexDataNode::doConsume(const TiFlashRaftProxyHelper & helper, RunningTasks::iterator it) +{ + it->second.doPoll(helper, ReadIndexWorker::getMaxReadIndexTaskTimeout()); + if (!it->second.task_pair) + { + auto start_ts = it->first; + auto resp = std::move(it->second.resp); + + running_tasks.erase(it); + + // If `start ts` is NOT initialized or response has `region error` | `lock`, just ignore + if (start_ts && !resp.has_locked() && !resp.has_region_error()) + { + doAddHistoryTasks(start_ts, std::move(resp)); + } + } +} + +void ReadIndexDataNode::consume(const TiFlashRaftProxyHelper & helper, Timestamp ts) +{ + auto _ = genLockGuard(); + + if (auto it = running_tasks.find(ts); it != running_tasks.end()) + { + doConsume(helper, it); + } +} + +const uint64_t MockStressTestCfg::RegionIdPrefix = 1ull << 40; +bool MockStressTestCfg::enable = false; + +std::optional makeReadIndexTask(const TiFlashRaftProxyHelper & helper, kvrpcpb::ReadIndexRequest & req) +{ + if (likely(!MockStressTestCfg::enable)) + return helper.makeReadIndexTask(req); + else + { + auto ori_id = req.context().region_id(); + req.mutable_context()->set_region_id(ori_id % MockStressTestCfg::RegionIdPrefix); // set region id to original one. + TEST_LOG_FMT("hacked ReadIndexTask to req {}", req.ShortDebugString()); + auto res = helper.makeReadIndexTask(req); + req.mutable_context()->set_region_id(ori_id); + return res; + } +} + +void ReadIndexDataNode::runOneRound(const TiFlashRaftProxyHelper & helper, const ReadIndexNotifyCtrlPtr & notify) +{ + auto opt_waiting_tasks = this->waiting_tasks.popAll(); + if (!opt_waiting_tasks) + return; + auto & waiting_tasks = *opt_waiting_tasks; + + auto _ = genLockGuard(); + + { + Timestamp max_ts = 0; + ReadIndexFuturePtr max_ts_task = nullptr; + { + const ReadIndexFuturePtr * x = nullptr; + for (auto & e : waiting_tasks) + { + if (e.first >= max_ts) + { + max_ts = e.first; + x = &e.second; + } + } + max_ts_task = *x; // NOLINT + } + + auto region_id = max_ts_task->req.context().region_id(); + + TEST_LOG_FMT( + "try to use max_ts {}, from request for region {}, waiting_tasks size {}, running_tasks {}", + max_ts, + region_id, + waiting_tasks.size(), + running_tasks.size()); + + if (history_success_tasks && history_success_tasks->first >= max_ts) + { + TEST_LOG_FMT("find history_tasks resp {}", history_success_tasks->second.ShortDebugString()); + + for (const auto & e : waiting_tasks) + { + e.second->update(history_success_tasks->second); + } + + cnt_use_history_tasks += waiting_tasks.size(); + } + else + { + auto run_it = running_tasks.lower_bound(max_ts); + if (run_it == running_tasks.end()) + { + TEST_LOG_FMT("no exist running_tasks for ts {}", max_ts); + + if (auto t = makeReadIndexTask(helper, max_ts_task->req); t) + { + TEST_LOG_FMT("successfully make ReadIndexTask for region {} ts {}", region_id, max_ts); + AsyncWaker waker{helper, new RegionReadIndexNotifier(region_id, max_ts, notify)}; + run_it = running_tasks.try_emplace(max_ts, region_id, max_ts).first; + run_it->second.task_pair.emplace(std::move(*t), std::move(waker)); + } + else + { + TEST_LOG_FMT("failed to make ReadIndexTask for region {} ts {}", region_id, max_ts); + run_it = running_tasks.try_emplace(max_ts, region_id, max_ts).first; + run_it->second.resp.mutable_region_error(); + } + } + + for (auto && e : waiting_tasks) + { + run_it->second.callbacks.emplace_back(std::move(e.second)); + } + + doConsume(helper, run_it); + } + } +} + +ReadIndexDataNode::~ReadIndexDataNode() +{ + auto _ = genLockGuard(); + + kvrpcpb::ReadIndexResponse resp; + resp.mutable_region_error()->mutable_region_not_found(); + + TEST_LOG_FMT("~ReadIndexDataNode region {}: waiting_tasks {}, running_tasks {} ", region_id, waiting_tasks.size(), running_tasks.size()); + + if (auto waiting_tasks = this->waiting_tasks.popAll(); waiting_tasks) + { + for (const auto & e : *waiting_tasks) + { + e.second->update(resp); + } + } + + for (auto & e : running_tasks) + { + e.second.doForceSet(resp); + } + running_tasks.clear(); +} + +ReadIndexFuturePtr ReadIndexDataNode::insertTask(const kvrpcpb::ReadIndexRequest & req) +{ + auto task = std::make_shared(); + task->req = req; + + waiting_tasks.add(req.start_ts(), task); + + return task; +} + +ReadIndexDataNodePtr ReadIndexWorker::DataMap::upsertDataNode(RegionID region_id) const +{ + auto _ = genWriteLockGuard(); + + TEST_LOG_FMT("upsertDataNode for region {}", region_id); + + auto [it, ok] = region_map.try_emplace(region_id); + if (ok) + it->second = std::make_shared(region_id); + return it->second; +} + +ReadIndexDataNodePtr ReadIndexWorker::DataMap::tryGetDataNode(RegionID region_id) const +{ + auto _ = genReadLockGuard(); + if (auto it = region_map.find(region_id); it != region_map.end()) + { + return it->second; + } + return nullptr; +} + +ReadIndexDataNodePtr ReadIndexWorker::DataMap::getDataNode(RegionID region_id) const +{ + if (auto ptr = tryGetDataNode(region_id); ptr != nullptr) + return ptr; + return upsertDataNode(region_id); +} + +void ReadIndexWorker::DataMap::invoke(std::function &)> && cb) +{ + auto _ = genWriteLockGuard(); + cb(region_map); +} + +void ReadIndexWorker::DataMap::removeRegion(RegionID region_id) +{ + auto _ = genWriteLockGuard(); + region_map.erase(region_id); +} + +void ReadIndexWorker::consumeReadIndexNotifyCtrl() +{ + for (auto && [region_id, ts] : read_index_notify_ctrl->popAll()) + { + auto node = data_map.getDataNode(region_id); + TEST_LOG_FMT("consume region {}, ts {}", region_id, ts); + node->consume(proxy_helper, ts); + } +} + +void ReadIndexWorker::consumeRegionNotifies(Duration min_dur) +{ + if (!lastRunTimeout(min_dur)) + { + TEST_LOG_FMT("worker {} failed to check last run timeout {}", getID(), min_dur); + return; + } + + for (auto && region_id : region_notify_map.popAll()) + { + auto node = data_map.getDataNode(region_id); + node->runOneRound(proxy_helper, read_index_notify_ctrl); + } + + TEST_LOG_FMT("worker {} set last run time {}", getID(), Clock::now()); + last_run_time.store(Clock::now(), std::memory_order_release); +} + +ReadIndexFuturePtr ReadIndexWorker::genReadIndexFuture(const kvrpcpb::ReadIndexRequest & req) +{ + auto data = data_map.getDataNode(req.context().region_id()); + auto res = data->insertTask(req); + region_notify_map.add(req.context().region_id()); + return res; +} + +ReadIndexFuturePtr ReadIndexWorkerManager::genReadIndexFuture(const kvrpcpb::ReadIndexRequest & req) +{ + return getWorkerByRegion(req.context().region_id()).genReadIndexFuture(req); +} + +void ReadIndexWorker::runOneRound(Duration min_dur) +{ + if (!read_index_notify_ctrl->empty()) + { + consumeReadIndexNotifyCtrl(); + } + if (!region_notify_map.empty()) + { + consumeRegionNotifies(min_dur); + } +} + +ReadIndexWorker::ReadIndexWorker( + const TiFlashRaftProxyHelper & proxy_helper_, + size_t id_, + AsyncWaker::NotifierPtr notifier_) + : proxy_helper(proxy_helper_) + , id(id_) + , read_index_notify_ctrl(std::make_shared(notifier_)) +{ +} + +bool ReadIndexWorker::lastRunTimeout(Duration timeout) const +{ + TEST_LOG_FMT("worker {}, last run time {}, timeout {}", getID(), last_run_time.load(std::memory_order_relaxed), timeout); + return last_run_time.load(std::memory_order_relaxed) + timeout < Clock::now(); +} + +ReadIndexWorker & ReadIndexWorkerManager::getWorkerByRegion(RegionID region_id) +{ + return *workers[region_id % workers.size()]; +} + +ReadIndexWorkerManager::ReadIndexWorkerManager( + const TiFlashRaftProxyHelper & proxy_helper_, + size_t workers_cnt, + ReadIndexWorkerManager::FnGetTickTime && fn_min_dur_handle_region, + size_t runner_cnt) + : proxy_helper(proxy_helper_) + , logger(&Poco::Logger::get("ReadIndexWorkers")) +{ + for (size_t i = 0; i < runner_cnt; ++i) + runners.emplace_back(std::make_unique( + i, + runner_cnt, + workers, + logger, + fn_min_dur_handle_region, + std::make_shared())); + + { + workers.reserve(workers_cnt); + for (size_t i = 0; i < workers_cnt; ++i) + workers.emplace_back(nullptr); + + for (size_t rid = 0; rid < runner_cnt; ++rid) + { + for (size_t wid = rid; wid < workers_cnt; wid += runner_cnt) + { + workers[wid] = std::make_unique( + proxy_helper, + wid, + runners[rid]->global_notifier); + } + } + } +} + +void ReadIndexWorkerManager::wakeAll() +{ + for (auto & runner : runners) + runner->wake(); +} + +void ReadIndexWorkerManager::asyncRun() +{ + for (auto & runner : runners) + runner->asyncRun(); +} + +void ReadIndexWorkerManager::stop() +{ + for (auto & runner : runners) + runner->stop(); +} + +ReadIndexWorkerManager::~ReadIndexWorkerManager() +{ + stop(); +} + +void ReadIndexWorkerManager::runOneRoundAll(Duration min_dur) +{ + for (size_t id = 0; id < runners.size(); ++id) + runOneRound(min_dur, id); +} + +void ReadIndexWorkerManager::runOneRound(Duration min_dur, size_t id) +{ + runners[id]->runOneRound(min_dur); +} + +BatchReadIndexRes ReadIndexWorkerManager::batchReadIndex( + const std::vector & reqs, + uint64_t timeout_ms) +{ + TEST_LOG_FMT("reqs size {}, timeout {}ms", reqs.size(), timeout_ms); + + auto notifier = std::make_shared(); + BlockedReadIndexHelperV3 helper{timeout_ms, *notifier}; + std::queue> tasks; + BatchReadIndexRes resps; + resps.reserve(reqs.size()); + + for (const auto & req : reqs) + { + auto region_id = req.context().region_id(); + auto & wk = this->getWorkerByRegion(region_id); + auto future = wk.genReadIndexFuture(req); + tasks.emplace(region_id, future); + } + this->wakeAll(); + + TEST_LOG_FMT("wake read_index_worker"); + + while (!tasks.empty()) + { + auto & it = tasks.front(); + if (auto res = it.second->poll(notifier); res) + { + resps.emplace_back(std::move(*res), it.first); + tasks.pop(); + } + else + { + TEST_LOG_FMT("got resp {}, remain {}", resps.size(), tasks.size()); + if (helper.blockedWait() == AsyncNotifier::Status::Timeout) + { + break; + } + } + } + { // if meet timeout, which means part of regions can not get response from leader, try to poll rest tasks + TEST_LOG_FMT("rest {}, poll rest tasks onece", tasks.size()); + + while (!tasks.empty()) + { + auto & it = tasks.front(); + if (auto res = it.second->poll(); res) + { + resps.emplace_back(std::move(*res), it.first); + } + else + { + kvrpcpb::ReadIndexResponse tmp; + tmp.mutable_region_error()->mutable_region_not_found(); + resps.emplace_back(std::move(tmp), it.first); + } + tasks.pop(); + } + } + return resps; +} + +void ReadIndexWorker::removeRegion(uint64_t region_id) +{ + TEST_LOG_FMT("remove region {}", region_id); + data_map.removeRegion(region_id); +} + +BatchReadIndexRes KVStore::batchReadIndex(const std::vector & reqs, uint64_t timeout_ms) const +{ + assert(this->proxy_helper); + if (read_index_worker_manager) + { + return this->read_index_worker_manager->batchReadIndex(reqs, timeout_ms); + } + else + { + return proxy_helper->batchReadIndex_v1(reqs, timeout_ms); + } +} + +std::unique_ptr ReadIndexWorkerManager::newReadIndexWorkerManager( + const TiFlashRaftProxyHelper & proxy_helper, + size_t cap, + ReadIndexWorkerManager::FnGetTickTime && fn_min_dur_handle_region, + size_t runner_cnt) +{ +#ifdef ADD_TEST_DEBUG_LOG_FMT + global_logger_for_test = &Poco::Logger::get("TestReadIndexWork"); +#endif + return std::make_unique(proxy_helper, cap, std::move(fn_min_dur_handle_region), runner_cnt); +} + +void KVStore::initReadIndexWorkers(ReadIndexWorkerManager::FnGetTickTime && fn_min_dur_handle_region, size_t runner_cnt, size_t worker_coefficient) +{ + if (!runner_cnt) + { + LOG_FMT_WARNING(log, "Run without read-index workers"); + return; + } + auto worker_cnt = worker_coefficient * runner_cnt; + LOG_FMT_INFO(log, "Start to initialize read-index workers: worker count {}, runner count {}", worker_cnt, runner_cnt); + auto * ptr = ReadIndexWorkerManager::newReadIndexWorkerManager( + *proxy_helper, + worker_cnt, + std::move(fn_min_dur_handle_region), + runner_cnt) + .release(); + std::atomic_thread_fence(std::memory_order_seq_cst); + read_index_worker_manager = ptr; +} + +void KVStore::asyncRunReadIndexWorkers() +{ + if (!read_index_worker_manager) + return; + + assert(this->proxy_helper); + read_index_worker_manager->asyncRun(); +} + +void KVStore::stopReadIndexWorkers() +{ + if (!read_index_worker_manager) + return; + + assert(this->proxy_helper); + + read_index_worker_manager->stop(); +} + +void KVStore::releaseReadIndexWorkers() +{ + if (read_index_worker_manager) + { + delete read_index_worker_manager; + read_index_worker_manager = nullptr; + } +} + +void ReadIndexWorkerManager::ReadIndexRunner::wake() const +{ + global_notifier->wake(); +} + +void ReadIndexWorkerManager::ReadIndexRunner::stop() +{ + auto tmp = State::Running; + state.compare_exchange_strong(tmp, State::Stopping, std::memory_order_acq_rel); + global_notifier->wake(); + if (work_thread) + { + work_thread->join(); + work_thread.reset(); + LOG_FMT_INFO(logger, "Thread of read-index runner {} has joined", id); + } + state.store(State::Terminated); +} + +void ReadIndexWorkerManager::ReadIndexRunner::blockedWaitFor(std::chrono::milliseconds timeout) const +{ + global_notifier->blockedWaitFor(timeout); +} + +void ReadIndexWorkerManager::ReadIndexRunner::runOneRound(Duration min_dur) +{ + for (size_t i = id; i < workers.size(); i += runner_cnt) + workers[i]->runOneRound(min_dur); +} + +void ReadIndexWorkerManager::ReadIndexRunner::asyncRun() +{ + state = State::Running; + work_thread = std::make_unique([this]() { + std::string name = fmt::format("ReadIndexWkr-{}", id); + setThreadName(name.data()); + LOG_FMT_INFO(logger, "Start read-index runner {}", id); + while (true) + { + auto base_tick_timeout = fn_min_dur_handle_region(); + blockedWaitFor(base_tick_timeout); + if (state.load(std::memory_order_acquire) != State::Running) + break; + runOneRound(base_tick_timeout); + } + LOG_FMT_INFO(logger, "Start to stop read-index runner {}", id); + }); +} + +ReadIndexWorkerManager::ReadIndexRunner::ReadIndexRunner( + size_t id_, + size_t runner_cnt_, + ReadIndexWorkers & workers_, + Poco::Logger * logger_, + FnGetTickTime fn_min_dur_handle_region_, + AsyncWaker::NotifierPtr global_notifier_) + : id(id_) + , runner_cnt(runner_cnt_) + , workers(workers_) + , logger(logger_) + , fn_min_dur_handle_region(std::move(fn_min_dur_handle_region_)) + , global_notifier(std::move(global_notifier_)) +{} + +} // namespace DB diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.h b/dbms/src/Storages/Transaction/ReadIndexWorker.h new file mode 100644 index 00000000000..e5d08055640 --- /dev/null +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.h @@ -0,0 +1,347 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +class ReadIndexTest; +} // namespace tests + +struct AsyncWaker +{ + struct Notifier : AsyncNotifier + , MutexLockWrap + { + mutable std::condition_variable cv; + // multi notifiers single receiver model. use another flag to avoid waiting endlessly. + mutable std::atomic_bool wait_flag{false}; + + // usually sender invoke `wake`, receiver invoke `blockedWaitFor` + AsyncNotifier::Status blockedWaitFor(std::chrono::milliseconds timeout) override; + void wake() override; + + virtual ~Notifier() = default; + }; + using NotifierPtr = std::shared_ptr; + + // proxy will call this function to invoke `AsyncNotifier::wake` + static void wake(RawVoidPtr notifier_); + + // create a `Notifier` in heap & let proxy wrap it and return as rust ptr with specific type. + explicit AsyncWaker(const TiFlashRaftProxyHelper & helper_); + + AsyncNotifier::Status waitFor(std::chrono::milliseconds timeout); + + RawVoidPtr getRaw() const; + + AsyncWaker(const TiFlashRaftProxyHelper & helper_, AsyncNotifier * notifier_); + +private: + RawRustPtrWrap inner; + // notifier is always in heap and is maintained in raftstore proxy as shared obj. + AsyncNotifier & notifier; +}; + +struct ReadIndexWorker; +using ReadIndexWorkers = std::vector>; +struct ReadIndexFuture; +using ReadIndexFuturePtr = std::shared_ptr; + +class ReadIndexWorkerManager : boost::noncopyable +{ +public: + using FnGetTickTime = std::function; + + ReadIndexWorker & getWorkerByRegion(RegionID region_id); + + explicit ReadIndexWorkerManager( + const TiFlashRaftProxyHelper & proxy_helper_, + size_t workers_cnt, + FnGetTickTime && fn_min_dur_handle_region_, + size_t runner_cnt); + + void wakeAll(); // wake all runners to handle tasks + void asyncRun(); + void runOneRound(Duration min_dur, size_t id); + void stop(); + ~ReadIndexWorkerManager(); + BatchReadIndexRes batchReadIndex( + const std::vector & reqs, + uint64_t timeout_ms = 10 * 1000); + + static std::unique_ptr newReadIndexWorkerManager( + const TiFlashRaftProxyHelper & proxy_helper, + size_t cap, + FnGetTickTime && fn_min_dur_handle_region, + size_t runner_cnt = 1); + + ReadIndexFuturePtr genReadIndexFuture(const kvrpcpb::ReadIndexRequest & req); + +private: + void runOneRoundAll(Duration min_dur = std::chrono::milliseconds{0}); + + enum class State : uint8_t + { + Running, + Stopping, + Terminated, + }; + + friend class tests::ReadIndexTest; + + struct ReadIndexRunner : boost::noncopyable + { + void wake() const; + + void stop(); + + void blockedWaitFor(std::chrono::milliseconds timeout) const; + + /// Traverse its workers and try to execute tasks. + void runOneRound(Duration min_dur); + + /// Create one thread to run asynchronously. + void asyncRun(); + + ReadIndexRunner( + size_t id_, + size_t runner_cnt_, + ReadIndexWorkers & workers_, + Poco::Logger * logger_, + FnGetTickTime fn_min_dur_handle_region_, + AsyncWaker::NotifierPtr global_notifier_); + + const size_t id; + const size_t runner_cnt; + ReadIndexWorkers & workers; + Poco::Logger * logger; + const FnGetTickTime fn_min_dur_handle_region; + /// The workers belonged to runner share same notifier. + AsyncWaker::NotifierPtr global_notifier; + std::unique_ptr work_thread; + std::atomic state{State::Running}; + }; + +private: + const TiFlashRaftProxyHelper & proxy_helper; + /// Each runner is mapped to a part of workers(worker_id % runner_cnt == runner_id). + std::vector> runners; + /// Each worker controls read-index process of region(region_id % worker_cnt == worker_id). + ReadIndexWorkers workers; + Poco::Logger * logger; +}; + +struct ReadIndexNotifyCtrl; +using ReadIndexNotifyCtrlPtr = std::shared_ptr; + +struct RegionNotifyMap : MutexLockWrap +{ + using Data = std::unordered_set; + + bool empty() const + { + auto _ = genLockGuard(); + return data.empty(); + } + void add(RegionID id) + { + auto _ = genLockGuard(); + data.emplace(id); + } + Data popAll() + { + auto _ = genLockGuard(); + return std::move(data); + } + + Data data; +}; + +struct ReadIndexFuture : MutexLockWrap +{ + void update(kvrpcpb::ReadIndexResponse resp); + + std::optional poll(const std::shared_ptr & notifier_ = nullptr) const; + + kvrpcpb::ReadIndexRequest req; + kvrpcpb::ReadIndexResponse resp; + bool finished{false}; + mutable std::shared_ptr notifier; +}; + +struct ReadIndexDataNode : MutexLockWrap +{ + struct ReadIndexElement + { + using Task = std::optional>; + + explicit ReadIndexElement(const RegionID region_id_, const Timestamp start_ts_) + : region_id(region_id_) + , start_ts(start_ts_) + {} + + ReadIndexElement(const ReadIndexElement &) = delete; + + void doTriggerCallbacks(); + + void doForceSet(const kvrpcpb::ReadIndexResponse & f); + + void doPoll(const TiFlashRaftProxyHelper & helper, std::chrono::milliseconds timeout); + + const RegionID region_id; + const Timestamp start_ts; + Task task_pair; + kvrpcpb::ReadIndexResponse resp; + std::deque callbacks; + Timepoint start_time = Clock::now(); + }; + + struct WaitingTasks : MutexLockWrap + { + using Data = std::deque>; + + void add(Timestamp ts, ReadIndexFuturePtr f) + { + auto _ = genLockGuard(); + waiting_tasks.emplace_back(ts, std::move(f)); + } + + std::optional popAll() + { + auto _ = genLockGuard(); + if (waiting_tasks.empty()) + return {}; + return std::move(waiting_tasks); + } + + size_t size() const + { + auto _ = genLockGuard(); + return waiting_tasks.size(); + } + + Data waiting_tasks; + }; + + using RunningTasks = std::map; + using HistorySuccessTasks = std::optional>; + + void doAddHistoryTasks(Timestamp ts, kvrpcpb::ReadIndexResponse && resp); + + void doConsume(const TiFlashRaftProxyHelper & helper, RunningTasks::iterator it); + + void consume(const TiFlashRaftProxyHelper & helper, Timestamp ts); + + void runOneRound(const TiFlashRaftProxyHelper & helper, const ReadIndexNotifyCtrlPtr & notify); + + ReadIndexFuturePtr insertTask(const kvrpcpb::ReadIndexRequest & req); + + ~ReadIndexDataNode(); + + explicit ReadIndexDataNode(const RegionID region_id_) + : region_id(region_id_) + {} + + const RegionID region_id; + + WaitingTasks waiting_tasks; + RunningTasks running_tasks; + HistorySuccessTasks history_success_tasks; + + size_t cnt_use_history_tasks{}; +}; + +using ReadIndexDataNodePtr = std::shared_ptr; + +struct ReadIndexWorker +{ + struct DataMap : SharedMutexLockWrap + { + ReadIndexDataNodePtr upsertDataNode(RegionID region_id) const; + + ReadIndexDataNodePtr tryGetDataNode(RegionID region_id) const; + + ReadIndexDataNodePtr getDataNode(RegionID region_id) const; + + void invoke(std::function &)> &&); + + void removeRegion(RegionID); + + mutable std::unordered_map region_map; + }; + + void consumeReadIndexNotifyCtrl(); + + void consumeRegionNotifies(Duration min_dur); + + ReadIndexFuturePtr genReadIndexFuture(const kvrpcpb::ReadIndexRequest & req); + + // try to consume read-index response notifications & region waiting list + void runOneRound(Duration min_dur); + + explicit ReadIndexWorker( + const TiFlashRaftProxyHelper & proxy_helper_, + size_t id_, + AsyncWaker::NotifierPtr notifier_); + + size_t getID() const { return id; } + + static std::chrono::milliseconds getMaxReadIndexTaskTimeout() + { + return max_read_index_task_timeout.load(std::memory_order_relaxed); + } + static void setMaxReadIndexTaskTimeout(std::chrono::milliseconds t) + { + max_read_index_task_timeout = t; + } + // static size_t getMaxReadIndexHistory() + // { + // return max_read_index_history.load(std::memory_order_relaxed); + // } + // static void setMaxReadIndexHistory(size_t x) + // { + // x = x == 0 ? 1 : x; + // max_read_index_history = x; + // } + bool lastRunTimeout(Duration timeout) const; + + void removeRegion(uint64_t); + + static std::atomic max_read_index_task_timeout; + // static std::atomic max_read_index_history; + + const TiFlashRaftProxyHelper & proxy_helper; + const size_t id; + DataMap data_map; + + // proxy can add read-index response notifications and wake runner through it. + ReadIndexNotifyCtrlPtr read_index_notify_ctrl; + + // insert region read-index request to waiting list, then add notifications. + // worker will generate related task and send to proxy. + RegionNotifyMap region_notify_map; + + // no need to be protected + std::atomic last_run_time{Timepoint::min()}; +}; + +struct MockStressTestCfg +{ + /// Usually, there may be a small number of region in one store. Using prefix can multiply the number. + static const uint64_t RegionIdPrefix; + /// If enabled, `ReadIndexStressTest` will modify region id in `kvrpcpb::ReadIndexRequest` to add prefix. + static bool enable; +}; + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/Transaction/Region.cpp b/dbms/src/Storages/Transaction/Region.cpp index 91ffd560ea2..f8211203e0f 100644 --- a/dbms/src/Storages/Transaction/Region.cpp +++ b/dbms/src/Storages/Transaction/Region.cpp @@ -495,8 +495,9 @@ kvrpcpb::ReadIndexRequest GenRegionReadIndexReq(const Region & region, UInt64 st { request.set_start_ts(start_ts); auto * key_range = request.add_ranges(); - key_range->set_start_key(*meta_snap.range->rawKeys().first); - key_range->set_end_key(*meta_snap.range->rawKeys().second); + // use original tikv key + key_range->set_start_key(meta_snap.range->comparableKeys().first.key); + key_range->set_end_key(meta_snap.range->comparableKeys().second.key); } } return request; @@ -507,7 +508,7 @@ bool Region::checkIndex(UInt64 index) const return meta.checkIndex(index); } -std::tuple Region::waitIndex(UInt64 index, const TMTContext & tmt) +std::tuple Region::waitIndex(UInt64 index, const UInt64 timeout_ms, std::function && check_running) { if (proxy_helper != nullptr) { @@ -518,8 +519,7 @@ std::tuple Region::waitIndex(UInt64 index, const TMTCon "{} need to wait learner index {}", toString(), index); - auto timeout_ms = tmt.waitIndexTimeout(); - auto wait_idx_res = meta.waitIndex(index, timeout_ms, [&tmt]() { return tmt.checkRunning(); }); + auto wait_idx_res = meta.waitIndex(index, timeout_ms, std::move(check_running)); auto elapsed_secs = wait_index_watch.elapsedSeconds(); switch (wait_idx_res) { diff --git a/dbms/src/Storages/Transaction/Region.h b/dbms/src/Storages/Transaction/Region.h index 815708e2f7b..8fd81158927 100644 --- a/dbms/src/Storages/Transaction/Region.h +++ b/dbms/src/Storages/Transaction/Region.h @@ -145,7 +145,7 @@ class Region : public std::enable_shared_from_this bool checkIndex(UInt64 index) const; // Return for wait-index. - std::tuple waitIndex(UInt64 index, const TMTContext & tmt); + std::tuple waitIndex(UInt64 index, const UInt64 timeout_ms, std::function && check_running); UInt64 appliedIndex() const; diff --git a/dbms/src/Storages/Transaction/RegionMeta.cpp b/dbms/src/Storages/Transaction/RegionMeta.cpp index b4d64300f09..38c3fc34339 100644 --- a/dbms/src/Storages/Transaction/RegionMeta.cpp +++ b/dbms/src/Storages/Transaction/RegionMeta.cpp @@ -56,13 +56,6 @@ metapb::Peer RegionMeta::getPeer() const return peer; } -pingcap::kv::RegionVerID RegionMeta::getRegionVerID() const -{ - std::lock_guard lock(mutex); - - return pingcap::kv::RegionVerID{regionId(), region_state.getConfVersion(), region_state.getVersion()}; -} - raft_serverpb::RaftApplyState RegionMeta::getApplyState() const { std::lock_guard lock(mutex); @@ -100,12 +93,6 @@ UInt64 RegionMeta::appliedIndex() const return apply_state.applied_index(); } -UInt64 RegionMeta::appliedTerm() const -{ - std::lock_guard lock(mutex); - return applied_term; -} - RegionMeta::RegionMeta(RegionMeta && rhs) : region_id(rhs.regionId()) { diff --git a/dbms/src/Storages/Transaction/RegionMeta.h b/dbms/src/Storages/Transaction/RegionMeta.h index 0326bb505aa..7a113801892 100644 --- a/dbms/src/Storages/Transaction/RegionMeta.h +++ b/dbms/src/Storages/Transaction/RegionMeta.h @@ -51,12 +51,10 @@ class RegionMeta UInt64 storeId() const; UInt64 appliedIndex() const; - UInt64 appliedTerm() const; ImutRegionRangePtr getRange() const; metapb::Peer getPeer() const; - pingcap::kv::RegionVerID getRegionVerID() const; UInt64 version() const; diff --git a/dbms/src/Storages/Transaction/TMTContext.cpp b/dbms/src/Storages/Transaction/TMTContext.cpp index c588d72ac1a..53dd079f2a2 100644 --- a/dbms/src/Storages/Transaction/TMTContext.cpp +++ b/dbms/src/Storages/Transaction/TMTContext.cpp @@ -160,6 +160,7 @@ void TMTContext::reloadConfig(const Poco::Util::AbstractConfiguration & config) static constexpr const char * BATCH_READ_INDEX_TIMEOUT_MS = "flash.batch_read_index_timeout_ms"; static constexpr const char * WAIT_INDEX_TIMEOUT_MS = "flash.wait_index_timeout_ms"; static constexpr const char * WAIT_REGION_READY_TIMEOUT_SEC = "flash.wait_region_ready_timeout_sec"; + static constexpr const char * READ_INDEX_WORKER_TICK_MS = "flash.read_index_worker_tick_ms"; // default config about compact-log: period 120s, rows 40k, bytes 32MB. getKVStore()->setRegionCompactLogConfig(std::max(config.getUInt64(COMPACT_LOG_MIN_PERIOD, 120), 1), @@ -174,15 +175,17 @@ void TMTContext::reloadConfig(const Poco::Util::AbstractConfiguration & config) t = t >= 0 ? t : std::numeric_limits::max(); // set -1 to wait infinitely t; }); + read_index_worker_tick_ms = config.getUInt64(READ_INDEX_WORKER_TICK_MS, 10 /*10ms*/); } { LOG_FMT_INFO( &Poco::Logger::root(), - "read-index max thread num: {}, timeout: {}ms; wait-index timeout: {}ms; wait-region-ready timeout: {}s; ", + "read-index max thread num: {}, timeout: {}ms; wait-index timeout: {}ms; wait-region-ready timeout: {}s; read-index-worker-tick: {}ms", replicaReadMaxThread(), batchReadIndexTimeout(), waitIndexTimeout(), - waitRegionReadyTimeout()); + waitRegionReadyTimeout(), + readIndexWorkerTick()); } } @@ -227,6 +230,11 @@ Int64 TMTContext::waitRegionReadyTimeout() const { return wait_region_ready_timeout_sec.load(std::memory_order_relaxed); } +uint64_t TMTContext::readIndexWorkerTick() const +{ + return read_index_worker_tick_ms.load(std::memory_order_relaxed); +} + const std::string & IntoStoreStatusName(TMTContext::StoreStatus status) { static const std::string StoreStatusName[] = { diff --git a/dbms/src/Storages/Transaction/TMTContext.h b/dbms/src/Storages/Transaction/TMTContext.h index 3b10591ca90..913350185ff 100644 --- a/dbms/src/Storages/Transaction/TMTContext.h +++ b/dbms/src/Storages/Transaction/TMTContext.h @@ -98,6 +98,7 @@ class TMTContext : private boost::noncopyable // timeout for wait index (ms). "0" means wait infinitely UInt64 waitIndexTimeout() const; Int64 waitRegionReadyTimeout() const; + uint64_t readIndexWorkerTick() const; private: Context & context; @@ -124,6 +125,7 @@ class TMTContext : private boost::noncopyable std::atomic_uint64_t replica_read_max_thread; std::atomic_uint64_t batch_read_index_timeout_ms; std::atomic_uint64_t wait_index_timeout_ms; + std::atomic_uint64_t read_index_worker_tick_ms; std::atomic_int64_t wait_region_ready_timeout_sec; }; diff --git a/dbms/src/Storages/Transaction/Utils.h b/dbms/src/Storages/Transaction/Utils.h index ffed02b0cc4..f9f47c7d431 100644 --- a/dbms/src/Storages/Transaction/Utils.h +++ b/dbms/src/Storages/Transaction/Utils.h @@ -21,7 +21,6 @@ struct always_false : std::false_type namespace DB { - class MutexLockWrap { public: @@ -72,4 +71,4 @@ struct AsyncNotifier virtual void wake() = 0; virtual ~AsyncNotifier() = default; }; -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 6479111dc26..e9cf93e8fa1 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -20,10 +21,11 @@ extern void RemoveRegionCommitCache(const RegionPtr & region, const RegionDataRe namespace tests { -RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key) +RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key, const TiFlashRaftProxyHelper * proxy_helper = nullptr) { return std::make_shared( - RegionMeta(createPeer(2, true), createRegionInfo(id, std::move(start_key), std::move(end_key)), initialApplyState())); + RegionMeta(createPeer(2, true), createRegionInfo(id, std::move(start_key), std::move(end_key)), initialApplyState()), + proxy_helper); } class RegionKVStoreTest : public ::testing::Test @@ -36,6 +38,7 @@ class RegionKVStoreTest : public ::testing::Test static void testBasic(); static void testKVStore(); static void testRegion(); + static void testReadIndex(); private: static void testRaftSplit(KVStore & kvs, TMTContext & tmt); @@ -44,6 +47,147 @@ class RegionKVStoreTest : public ::testing::Test static void testRaftMergeRollback(KVStore & kvs, TMTContext & tmt); }; +void RegionKVStoreTest::testReadIndex() +{ + std::string path = TiFlashTestEnv::getTemporaryPath("/region_kvs_tmp") + "/basic"; + + Poco::File file(path); + if (file.exists()) + file.remove(true); + file.createDirectories(); + + auto ctx = TiFlashTestEnv::getContext( + DB::Settings(), + Strings{ + path, + }); + MockRaftStoreProxy proxy_instance; + TiFlashRaftProxyHelper proxy_helper; + { + proxy_helper = MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr{&proxy_instance}); + proxy_instance.init(10); + } + std::atomic_bool over{false}; + + // start mock proxy in other thread + auto proxy_runner = std::thread([&]() { + proxy_instance.testRunNormal(over); + }); + KVStore & kvs = *ctx.getTMTContext().getKVStore(); + kvs.restore(&proxy_helper); + ASSERT_EQ(kvs.getProxyHelper(), &proxy_helper); + { + ASSERT_EQ(kvs.getRegion(0), nullptr); + auto task_lock = kvs.genTaskLock(); + auto lock = kvs.genRegionWriteLock(task_lock); + { + auto region = makeRegion(1, RecordKVFormat::genKey(1, 0), RecordKVFormat::genKey(1, 10), kvs.getProxyHelper()); + lock.regions.emplace(1, region); + lock.index.add(region); + } + { + auto region = makeRegion(2, RecordKVFormat::genKey(1, 10), RecordKVFormat::genKey(1, 20), kvs.getProxyHelper()); + lock.regions.emplace(2, region); + lock.index.add(region); + } + { + auto region = makeRegion(3, RecordKVFormat::genKey(1, 30), RecordKVFormat::genKey(1, 40), kvs.getProxyHelper()); + lock.regions.emplace(3, region); + lock.index.add(region); + } + } + { + ASSERT_EQ(kvs.read_index_worker_manager, nullptr); + { + auto region = kvs.getRegion(1); + auto req = GenRegionReadIndexReq(*region, 8); + try + { + auto resp = kvs.batchReadIndex({req}, 100); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "`fn_handle_batch_read_index` is deprecated"); + } + } + kvs.initReadIndexWorkers( + []() { + return std::chrono::milliseconds(10); + }, + 1); + ASSERT_NE(kvs.read_index_worker_manager, nullptr); + + kvs.asyncRunReadIndexWorkers(); + { + const std::atomic_size_t terminate_signals_counter{}; + WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 2, 20); + } + { + // test read index + auto region = kvs.getRegion(1); + auto req = GenRegionReadIndexReq(*region, 8); + auto resp = kvs.batchReadIndex({req}, 100); + ASSERT_EQ(resp[0].first.read_index(), 5); + { + auto r = region->waitIndex(5, 0, []() { return true; }); + ASSERT_EQ(std::get<0>(r), WaitIndexResult::Finished); + } + { + auto r = region->waitIndex(8, 1, []() { return false; }); + ASSERT_EQ(std::get<0>(r), WaitIndexResult::Terminated); + } + } + for (auto & r : proxy_instance.regions) + { + r.second->updateCommitIndex(667); + } + { + auto region = kvs.getRegion(1); + auto req = GenRegionReadIndexReq(*region, 8); + auto resp = kvs.batchReadIndex({req}, 100); + ASSERT_EQ(resp[0].first.read_index(), 5); // history + } + { + auto region = kvs.getRegion(1); + auto req = GenRegionReadIndexReq(*region, 10); + auto resp = kvs.batchReadIndex({req}, 100); + ASSERT_EQ(resp[0].first.read_index(), 667); + } + { + auto region = kvs.getRegion(2); + auto req = GenRegionReadIndexReq(*region, 5); + auto resp = proxy_helper.batchReadIndex({req}, 100); // v2 + ASSERT_EQ(resp[0].first.read_index(), 667); // got latest + { + auto r = region->waitIndex(667 + 1, 2, []() { return true; }); + ASSERT_EQ(std::get<0>(r), WaitIndexResult::Timeout); + } + { + AsyncWaker::Notifier notifier; + std::thread t([&]() { + notifier.wake(); + auto r = region->waitIndex(667 + 1, 100000, []() { return true; }); + ASSERT_EQ(std::get<0>(r), WaitIndexResult::Finished); + }); + SCOPE_EXIT({ + t.join(); + }); + notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + region->handleWriteRaftCmd({}, 667 + 1, 6, ctx.getTMTContext()); + } + } + } + kvs.stopReadIndexWorkers(); + kvs.releaseReadIndexWorkers(); + over = true; + proxy_instance.wake(); + proxy_runner.join(); + ASSERT(GCMonitor::instance().checkClean()); + ASSERT(!GCMonitor::instance().empty()); +} + void RegionKVStoreTest::testRaftMergeRollback(KVStore & kvs, TMTContext & tmt) { uint64_t region_id = 7; @@ -363,6 +507,10 @@ void RegionKVStoreTest::testRaftMerge(KVStore & kvs, TMTContext & tmt) void RegionKVStoreTest::testRegion() { TableID table_id = 100; + { + auto meta = RegionMeta(createPeer(2, true), createRegionInfo(666, RecordKVFormat::genKey(0, 0), RecordKVFormat::genKey(0, 1000)), initialApplyState()); + ASSERT_EQ(meta.peerId(), 2); + } auto region = makeRegion(1, RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 1000)); { ASSERT_TRUE(region->checkIndex(5)); @@ -374,8 +522,8 @@ void RegionKVStoreTest::testRegion() ASSERT_EQ(req.start_ts(), start_ts); ASSERT_EQ(region->getMetaRegion().region_epoch().DebugString(), req.context().region_epoch().DebugString()); - ASSERT_EQ(*region->getRange()->rawKeys().first, req.ranges()[0].start_key()); - ASSERT_EQ(*region->getRange()->rawKeys().second, req.ranges()[0].end_key()); + ASSERT_EQ(region->getRange()->comparableKeys().first.key, req.ranges()[0].start_key()); + ASSERT_EQ(region->getRange()->comparableKeys().second.key, req.ranges()[0].end_key()); } { region->insert("lock", RecordKVFormat::genKey(table_id, 3), RecordKVFormat::encodeLockCfValue(RecordKVFormat::CFModifyFlag::PutFlag, "PK", 3, 20)); @@ -469,6 +617,17 @@ void RegionKVStoreTest::testKVStore() }); KVStore & kvs = *ctx.getTMTContext().getKVStore(); kvs.restore(nullptr); + { + kvs.initReadIndexWorkers( + []() { + return std::chrono::milliseconds(10); + }, + 0); + ASSERT_EQ(kvs.read_index_worker_manager, nullptr); + kvs.asyncRunReadIndexWorkers(); + kvs.stopReadIndexWorkers(); + kvs.releaseReadIndexWorkers(); + } { auto store = metapb::Store{}; store.set_id(1234); @@ -679,6 +838,30 @@ void RegionKVStoreTest::testKVStore() 5, ctx.getTMTContext()); ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); + try + { + kvs.handleApplySnapshot( + region->getMetaRegion(), + 2, + {}, // empty + 6, // smaller index + 5, + ctx.getTMTContext()); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "[region 19] already has newer apply-index 8 than 6, should not happen"); + ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); // apply-snapshot do not work + } + kvs.handleApplySnapshot( + region->getMetaRegion(), + 2, + {}, // empty + 8, // same index + 5, + ctx.getTMTContext()); + ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); // apply-snapshot do not work region = makeRegion(19, RecordKVFormat::genKey(1, 50), RecordKVFormat::genKey(1, 60)); region->handleWriteRaftCmd({}, 10, 10, ctx.getTMTContext()); kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); @@ -904,8 +1087,9 @@ try testBasic(); testKVStore(); testRegion(); + testReadIndex(); } CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_read_index_worker.cpp b/dbms/src/Storages/Transaction/tests/gtest_read_index_worker.cpp new file mode 100644 index 00000000000..cb56794168a --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_read_index_worker.cpp @@ -0,0 +1,642 @@ +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace FailPoints +{ +} // namespace FailPoints + +namespace tests +{ +class ReadIndexTest : public ::testing::Test +{ +public: + ReadIndexTest() + = default; + + void SetUp() override + {} + + static size_t computeCntUseHistoryTasks(ReadIndexWorkerManager & manager); + static void testBasic(); + static void testBatch(); + static void testNormal(); + static void testError(); +}; + +void ReadIndexTest::testError() +{ + // test error + + MockRaftStoreProxy proxy_instance; + TiFlashRaftProxyHelper proxy_helper; + { + proxy_helper = MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr{&proxy_instance}); + proxy_instance.init(10); + } + auto manager = ReadIndexWorkerManager::newReadIndexWorkerManager( + proxy_helper, + 5, + [&]() { + return std::chrono::milliseconds(10); + }); + { + std::vector reqs; + std::vector resps; + std::list futures; + { + reqs = { + make_read_index_reqs(2, 10), + make_read_index_reqs(2, 12), + make_read_index_reqs(2, 13)}; + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + auto resp = future->poll(); + ASSERT(!resp); + futures.push_back(future); + } + manager->runOneRoundAll(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(!resp); + } + ASSERT_EQ(proxy_instance.tasks.size(), 1); + + // force response region error `data_is_not_ready` + proxy_instance.tasks.front()->update(false, true); + + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(!resp); + } + manager->runOneRoundAll(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(resp); + ASSERT(resp->region_error().has_data_is_not_ready()); + } + { + // poll another time + auto resp = futures.front()->poll(); + ASSERT(resp); + // still old value + ASSERT(resp->region_error().has_data_is_not_ready()); + } + futures.clear(); + proxy_instance.runOneRound(); + ASSERT_FALSE(manager->getWorkerByRegion(2).data_map.getDataNode(2)->history_success_tasks); + } + + { + reqs = { + make_read_index_reqs(2, 10), + make_read_index_reqs(2, 12), + make_read_index_reqs(2, 13)}; + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + auto resp = future->poll(); + ASSERT(!resp); + futures.push_back(future); + } + manager->runOneRoundAll(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(!resp); + } + ASSERT_EQ(proxy_instance.tasks.size(), 1); + + // force response to have lock + proxy_instance.tasks.front()->update(true, false); + + proxy_instance.runOneRound(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(!resp); + } + manager->runOneRoundAll(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(resp); + resps.emplace_back(std::move(*resp)); + } + + // only the one with same ts will be set lock. + // others will got region error and let upper layer retry. + ASSERT(resps[0].region_error().has_epoch_not_match()); + ASSERT(resps[1].region_error().has_epoch_not_match()); + ASSERT(resps[2].has_locked()); + + // history_success_tasks no update. + ASSERT_FALSE(manager->getWorkerByRegion(2).data_map.getDataNode(2)->history_success_tasks); + } + { + // test drop region when there are related tasks. + std::vector reqs; + std::vector resps; + std::list futures; + reqs = { + make_read_index_reqs(2, 12), + make_read_index_reqs(2, 13), + }; + futures.clear(); + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + auto resp = future->poll(); + ASSERT(!resp); + futures.push_back(future); + } + manager->runOneRoundAll(); + proxy_instance.runOneRound(); + auto future = manager->genReadIndexFuture(make_read_index_reqs(2, 15)); + + // drop region 2 + manager->getWorkerByRegion(2).removeRegion(2); + + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(resp); + resps.emplace_back(std::move(*resp)); + } + ASSERT(resps[0].region_error().has_region_not_found()); + ASSERT(resps[1].region_error().has_region_not_found()); + { + auto resp = future->poll(); + ASSERT(resp); + ASSERT(resp->region_error().has_region_not_found()); + } + ASSERT_FALSE(manager->getWorkerByRegion(2).data_map.getDataNode(2)->history_success_tasks); + } + } + ASSERT(GCMonitor::instance().checkClean()); + ASSERT(!GCMonitor::instance().empty()); +} +size_t ReadIndexTest::computeCntUseHistoryTasks(ReadIndexWorkerManager & manager) +{ + size_t cnt_use_history_tasks = 0; + for (auto & worker : manager.workers) + { + worker->data_map.invoke([&](std::unordered_map & d) { + for (auto & x : d) + { + cnt_use_history_tasks += x.second->cnt_use_history_tasks; + } + }); + } + return cnt_use_history_tasks; +} + +void ReadIndexTest::testBasic() +{ + { + // codec + kvrpcpb::ReadIndexResponse resp; + std::string str = resp.SerializeAsString(); + { + resp.mutable_locked(); + resp.mutable_region_error(); + resp.set_read_index(12345); + } + kvrpcpb::ReadIndexResponse resp2; + SetReadIndexResp(&resp2, BaseBuffView{str.data(), str.size()}); + std::string str2 = resp2.SerializeAsString(); + + ASSERT_EQ(str2, str); + } + { + // lock wrap + struct TestMutexLockWrap : MutexLockWrap + { + void test() + { + { + auto lock = genLockGuard(); + auto ulock = tryToLock(); + ASSERT_FALSE(ulock.operator bool()); + } + { + auto ulock = genUniqueLock(); + ASSERT(ulock.operator bool()); + } + } + } test; + test.test(); + } + { + // future only can be updated once + ReadIndexFuture f; + kvrpcpb::ReadIndexResponse resp; + resp.set_read_index(888); + f.update(std::move(resp)); + ASSERT_EQ(f.resp.read_index(), 888); + resp.set_read_index(999); + f.update(std::move(resp)); + ASSERT_EQ(f.resp.read_index(), 888); + } +} + +void ReadIndexTest::testNormal() +{ + // test normal + + MockRaftStoreProxy proxy_instance; + TiFlashRaftProxyHelper proxy_helper; + { + proxy_helper = MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr{&proxy_instance}); + proxy_instance.init(10); + } + auto manager = ReadIndexWorkerManager::newReadIndexWorkerManager( + proxy_helper, + 5, + [&]() { + return std::chrono::milliseconds(10); + }, + 3); + + for (size_t id = 0; id < manager->workers.size(); ++id) + { + ASSERT_EQ(id, manager->workers[id]->getID()); + } + + // start async loop in other threads + manager->asyncRun(); + + std::atomic_bool over{false}; + + // start mock proxy in other thread + auto proxy_runner = std::thread([&]() { + setThreadName("proxy-runner"); + proxy_instance.testRunNormal(over); + }); + + { + std::vector reqs; + { + reqs.reserve(proxy_instance.size()); + for (size_t i = 0; i < proxy_instance.size(); ++i) + { + reqs.emplace_back(make_read_index_reqs(i, 10)); + } + } + { + auto resps = manager->batchReadIndex(reqs); + ASSERT_EQ(resps.size(), reqs.size()); + for (size_t i = 0; i < reqs.size(); ++i) + { + auto & req = reqs[i]; + auto && [resp, id] = resps[i]; + ASSERT_EQ(req.context().region_id(), id); + ASSERT_EQ(resp.read_index(), 5); + } + } + { + // update region commit index + for (auto & r : proxy_instance.regions) + { + r.second->updateCommitIndex(668); + } + } + + size_t expect_cnt_use_history_tasks = proxy_instance.size(); + { + // smaller ts, use history success record + for (auto & r : reqs) + { + r.set_start_ts(9); + } + auto resps = manager->batchReadIndex(reqs); + ASSERT_EQ(resps.size(), reqs.size()); + for (size_t i = 0; i < reqs.size(); ++i) + { + auto & req = reqs[i]; + auto && [resp, id] = resps[i]; + ASSERT_EQ(req.context().region_id(), id); + ASSERT_EQ(resp.read_index(), 5); + } + ASSERT_EQ(computeCntUseHistoryTasks(*manager), expect_cnt_use_history_tasks); + } + { + // bigger ts, fetch latest commit index + reqs = {make_read_index_reqs(0, 11)}; + auto resps = manager->batchReadIndex(reqs); + ASSERT_EQ(resps[0].first.read_index(), 668); + ASSERT_EQ(computeCntUseHistoryTasks(*manager), expect_cnt_use_history_tasks); + } + { + for (auto & r : proxy_instance.regions) + { + r.second->updateCommitIndex(669); + } + } + { + // smaller ts, use history success record. + expect_cnt_use_history_tasks++; + reqs = {make_read_index_reqs(0, 9)}; + auto resps = manager->batchReadIndex(reqs); + ASSERT_EQ(resps[0].first.read_index(), 668); // history record has been updated + ASSERT_EQ(computeCntUseHistoryTasks(*manager), expect_cnt_use_history_tasks); + } + { + // set region id to let mock proxy drop all related tasks. + proxy_instance.unsafeInvokeForTest([](MockRaftStoreProxy & proxy) { + proxy.region_id_to_drop.emplace(1); + }); + std::vector reqs; + + reqs = {make_read_index_reqs(5, 12), make_read_index_reqs(1, 12), make_read_index_reqs(2, 12)}; + Timepoint start = Clock::now(); + auto resps = manager->batchReadIndex(reqs, 20); + auto time_cost = Clock::now() - start; + ASSERT_GE(time_cost, std::chrono::milliseconds{20}); // meet timeout + ASSERT_EQ(resps[0].first.read_index(), 669); + ASSERT_EQ(resps[1].first.region_error().has_region_not_found(), true); // timeout to region error not found + ASSERT_EQ(resps[2].first.read_index(), 669); + ASSERT(!GCMonitor::instance().checkClean()); + { + // test timeout 0ms + proxy_instance.unsafeInvokeForTest([](MockRaftStoreProxy & proxy) { + proxy.region_id_to_drop.emplace(9); + }); + auto resps = manager->batchReadIndex({make_read_index_reqs(9, 12)}, 0); + ASSERT_EQ(resps[0].first.region_error().has_region_not_found(), true); // timeout to region error not found + } + + // disable drop region task + proxy_instance.unsafeInvokeForTest([](MockRaftStoreProxy & proxy) { + proxy.region_id_to_drop.clear(); + }); + + ReadIndexWorker::setMaxReadIndexTaskTimeout(std::chrono::milliseconds{10}); // set max task timeout in worker + resps = manager->batchReadIndex(reqs, 500); // the old task is still in worker, poll from mock proxy failed, check timeout and set region error `server_is_busy + ASSERT_EQ(resps[1].first.region_error().has_server_is_busy(), true); // meet region error `server_is_busy` for task timeout + + ReadIndexWorker::setMaxReadIndexTaskTimeout(std::chrono::milliseconds{8 * 1000}); // set max task timeout in worker to previous. + resps = manager->batchReadIndex(reqs, 500); + ASSERT_EQ(resps[1].first.read_index(), 669); + } + { + // test `batchReadIndex_v2`. + // batchReadIndex_v2 is almost like v1. + // v1 runs all jobs in proxy. v2 split into multi steps and runs them in tiflash side. + reqs = {make_read_index_reqs(5, 12), make_read_index_reqs(1, 12), make_read_index_reqs(2, 12)}; + auto resps = proxy_helper.batchReadIndex_v2(reqs, 200); + for (size_t i = 0; i < reqs.size(); ++i) + { + ASSERT_EQ(resps[i].first.read_index(), 669); + } + + // set region id to let mock proxy drop all related tasks. + proxy_instance.unsafeInvokeForTest([](MockRaftStoreProxy & proxy) { + proxy.region_id_to_drop.emplace(1); + }); + + resps = proxy_helper.batchReadIndex_v2(reqs, 50); + + ASSERT_EQ(resps[0].first.read_index(), 669); + ASSERT_EQ(resps[1].first.region_error().has_region_not_found(), true); // timeout to region error not found + ASSERT_EQ(resps[2].first.read_index(), 669); + + proxy_instance.unsafeInvokeForTest([](MockRaftStoreProxy & proxy) { + proxy.region_id_to_drop.clear(); + }); + } + { + // test region not exists + reqs = {make_read_index_reqs(8192, 5)}; + auto resps = proxy_helper.batchReadIndex_v2(reqs, 20); + ASSERT(resps[0].first.has_region_error()); + } + } + over = true; + proxy_instance.wake(); + proxy_runner.join(); + manager.reset(); + ASSERT(GCMonitor::instance().checkClean()); + ASSERT(!GCMonitor::instance().empty()); +} +void ReadIndexTest::testBatch() +{ + // test batch + MockRaftStoreProxy proxy_instance; + TiFlashRaftProxyHelper proxy_helper; + { + proxy_helper = MockRaftStoreProxy::SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr{&proxy_instance}); + proxy_instance.init(10); + } + auto manager = ReadIndexWorkerManager::newReadIndexWorkerManager( + proxy_helper, + 5, + [&]() { + return std::chrono::milliseconds(10); + }); + // DO NOT run manager and mock proxy in other threads. + { + { + // run with empty `waiting_tasks` + manager->getWorkerByRegion(0).data_map.getDataNode(0)->runOneRound(proxy_helper, manager->getWorkerByRegion(0).read_index_notify_ctrl); + } + { + // test upsert region + manager->workers[0]->data_map.upsertDataNode(0); + auto ori_s = manager->workers[0]->data_map.region_map.size(); + manager->workers[0]->data_map.upsertDataNode(0); + ASSERT_EQ(manager->workers[0]->data_map.region_map.size(), ori_s); + } + + std::vector reqs; + std::deque futures; + reqs = { + make_read_index_reqs(0, 1), + make_read_index_reqs(0, 2), + make_read_index_reqs(0, 3), + make_read_index_reqs(1, 2), + make_read_index_reqs(1, 3)}; + // no waiting task + ASSERT_EQ( + manager->getWorkerByRegion(0).data_map.getDataNode(0)->waiting_tasks.size(), + 0); + // poll failed + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + ASSERT_FALSE(future->poll()); + futures.emplace_back(future); + } + // worker 0 has 3 waiting tasks. + ASSERT_EQ( + manager->getWorkerByRegion(0).data_map.getDataNode(0)->waiting_tasks.size(), + 3); + // run worker, clean waiting task. + manager->runOneRoundAll(); + ASSERT_EQ( + manager->getWorkerByRegion(0).data_map.getDataNode(0)->waiting_tasks.size(), + 0); + + ASSERT_EQ(1, manager->getWorkerByRegion(0).data_map.getDataNode(0)->running_tasks.size()); // worker 0 has 1 running task. + ASSERT_EQ(1, manager->getWorkerByRegion(1).data_map.getDataNode(1)->running_tasks.size()); // worker 1 has 1 running task. + { + for (auto & r : proxy_instance.regions) + { + r.second->updateCommitIndex(667); + } + } + { + auto req = make_read_index_reqs(0, 5); + auto future = manager->genReadIndexFuture(req); + ASSERT_FALSE(future->poll()); + futures.emplace_back(future); + } + { + // continuously run same worker, time duration must bigger than setting. + ASSERT_FALSE(manager->getWorkerByRegion(0).lastRunTimeout(std::chrono::milliseconds(500))); // failed to check last run timeout + } + manager->runOneRoundAll(); + + // bigger ts and make new running task + ASSERT_EQ(2, manager->getWorkerByRegion(0).data_map.getDataNode(0)->running_tasks.size()); + + // mock proxy update all read index requests. + proxy_instance.runOneRound(); + manager->runOneRoundAll(); + std::vector resps; + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(resp); + resps.emplace_back(std::move(*resp)); + } + ASSERT_EQ(resps.size(), 6); + for (auto & resp : resps) + ASSERT_EQ(resp.read_index(), 667); + } + { + auto req = make_read_index_reqs(8192, 5); // not exist + + auto future = manager->genReadIndexFuture(req); + + manager->runOneRoundAll(); // failed to execute `fn_make_read_index_task`. + + ASSERT_EQ(0, manager->getWorkerByRegion(8192).data_map.getDataNode(8192)->running_tasks.size()); // no running task + + auto resp = future->poll(); + ASSERT(resp); + ASSERT(resp->has_region_error()); + proxy_instance.runOneRound(); + } + { + // test history + for (auto & r : proxy_instance.regions) + { + r.second->updateCommitIndex(670); + } + std::vector reqs; + std::list futures; + std::vector resps; + + reqs = { + make_read_index_reqs(0, 5), + make_read_index_reqs(0, 6)}; + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + futures.push_back(future); + } + manager->runOneRoundAll(); + + ASSERT_EQ(1, manager->getWorkerByRegion(0).data_map.getDataNode(0)->running_tasks.size()); + + { + auto future = manager->genReadIndexFuture(make_read_index_reqs(0, 10)); + futures.push_back(future); + } + manager->runOneRoundAll(); + { + auto & t = proxy_instance.tasks.back(); + ASSERT_EQ(t->req.start_ts(), 10); + t->update(); // only response ts `10` + } + + ASSERT_EQ(2, manager->getWorkerByRegion(0).data_map.getDataNode(0)->running_tasks.size()); + + manager->runOneRoundAll(); // history trigger all callback tasks. + + ASSERT_EQ(0, manager->getWorkerByRegion(0).data_map.getDataNode(0)->running_tasks.size()); + + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT(resp); + resps.emplace_back(std::move(*resp)); + } + { + ASSERT_EQ(resps[0].read_index(), 670); + ASSERT_EQ(resps[1].read_index(), 670); + ASSERT_EQ(resps[2].read_index(), 670); + } + proxy_instance.runOneRound(); + manager->runOneRoundAll(); + + futures.clear(); + reqs = { + make_read_index_reqs(0, 12), + }; + for (const auto & req : reqs) + { + auto future = manager->genReadIndexFuture(req); + futures.push_back(future); + } + manager->runOneRoundAll(); + proxy_instance.runOneRound(); + manager->runOneRoundAll(); + for (auto & future : futures) + { + auto resp = future->poll(); + ASSERT_EQ(resp->read_index(), 670); + } + + ASSERT_EQ(manager->getWorkerByRegion(0).data_map.getDataNode(0)->history_success_tasks->second.read_index(), 670); + } + { + MockStressTestCfg::enable = true; + auto region_id = 1 + MockStressTestCfg::RegionIdPrefix * (1 + 1); + auto f = manager->genReadIndexFuture(make_read_index_reqs(region_id, 15)); + manager->runOneRoundAll(); + proxy_instance.regions[1]->updateCommitIndex(677); + proxy_instance.runOneRound(); + manager->runOneRoundAll(); + auto resp = f->poll(); + ASSERT_EQ(resp->read_index(), 677); + MockStressTestCfg::enable = false; + } + ASSERT(GCMonitor::instance().checkClean()); + ASSERT(!GCMonitor::instance().empty()); +} + + +TEST_F(ReadIndexTest, workers) +try +{ + testBasic(); + testNormal(); + testBatch(); + testError(); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/etc/config-template.toml b/etc/config-template.toml index 9c690e7ae4f..a864fff2a59 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -89,6 +89,11 @@ # batch_read_index_timeout_ms = 10000 # wait_index_timeout_ms = 300000 # wait_region_ready_timeout_sec = 1200 +## The number of context to run read-index worker. Set 0 to disable async read-index worker. +# read_index_runner_count = 1 +## The minimum duration to handle read-index tasks in each worker. +# read_index_worker_tick_ms = 10 + [flash.proxy] # addr = "0.0.0.0:20170" # advertise-addr = "tiflash0:20170" diff --git a/metrics/grafana/tiflash_proxy_details.json b/metrics/grafana/tiflash_proxy_details.json index d81f93a43ae..8d2a74c8c43 100644 --- a/metrics/grafana/tiflash_proxy_details.json +++ b/metrics/grafana/tiflash_proxy_details.json @@ -4699,7 +4699,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": " \tThe CPU utilization of split check", + "description": " \tThe CPU utilization of read index worker", "editable": true, "error": false, "fill": 0, @@ -4739,7 +4739,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_proxy_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"split_check\"}[1m])) by (instance)", + "expr": "sum(rate(tiflash_proxy_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"ReadIndexWkr_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -4752,7 +4752,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Split check CPU", + "title": "Read Index Worker CPU", "tooltip": { "msResolution": false, "shared": true, From 292a698b12b170fafa5b52e45b0562aa418c6995 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 15 Feb 2022 10:35:24 +0800 Subject: [PATCH 3/9] refine tests Signed-off-by: Zhigao Tong --- dbms/src/Debug/MockRaftStoreProxy.cpp | 7 +++- dbms/src/Debug/MockRaftStoreProxy.h | 2 ++ dbms/src/Storages/Transaction/KVStore.cpp | 6 ++-- .../Storages/Transaction/ReadIndexWorker.cpp | 2 +- .../Transaction/tests/gtest_kvstore.cpp | 34 +++++++++++++++++-- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/dbms/src/Debug/MockRaftStoreProxy.cpp b/dbms/src/Debug/MockRaftStoreProxy.cpp index d93fc2caea1..f4b9586bb34 100644 --- a/dbms/src/Debug/MockRaftStoreProxy.cpp +++ b/dbms/src/Debug/MockRaftStoreProxy.cpp @@ -109,6 +109,11 @@ raft_serverpb::RaftApplyState MockProxyRegion::getApply() return apply; } +uint64_t MockProxyRegion::getLatestCommitIndex() +{ + return this->getApply().commit_index(); +} + void MockProxyRegion::updateCommitIndex(uint64_t index) { auto _ = genLockGuard(); @@ -142,7 +147,7 @@ std::optional RawMockReadIndexTask::poll(std::shared resp.mutable_region_error()->mutable_data_is_not_ready(); return resp; } - resp.set_read_index(region->getApply().commit_index()); + resp.set_read_index(region->getLatestCommitIndex()); return resp; } diff --git a/dbms/src/Debug/MockRaftStoreProxy.h b/dbms/src/Debug/MockRaftStoreProxy.h index 766fb9a2fe2..c9d1db7da3b 100644 --- a/dbms/src/Debug/MockRaftStoreProxy.h +++ b/dbms/src/Debug/MockRaftStoreProxy.h @@ -15,6 +15,8 @@ struct MockProxyRegion : MutexLockWrap raft_serverpb::RaftApplyState getApply(); + uint64_t getLatestCommitIndex(); + void updateCommitIndex(uint64_t index); MockProxyRegion(); diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index fcf7d8e62a3..414279d66bb 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -633,7 +633,7 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter break; LOG_FMT_INFO(log, - "{} regions need to fetch latest commit-index in next round, sleep for {:.2f}s", + "{} regions need to fetch latest commit-index in next round, sleep for {:.3f}s", remain_regions.size(), wait_tick_time); std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(wait_tick_time * 1000))); @@ -677,7 +677,7 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter break; LOG_FMT_INFO(log, - "{} regions need to apply to latest index, sleep for {:.2f}s", + "{} regions need to apply to latest index, sleep for {:.3f}s", regions_to_check.size(), wait_tick_time); std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(wait_tick_time * 1000))); @@ -695,7 +695,7 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter } LOG_FMT_INFO(log, - "finish to check {} regions, time cost {:.2f}s", + "finish to check {} regions, time cost {:.3f}s", total_regions_cnt, region_check_watch.elapsedSeconds()); } diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp index f11028ca786..2397d44d242 100644 --- a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp @@ -1011,9 +1011,9 @@ void ReadIndexWorkerManager::ReadIndexRunner::asyncRun() { auto base_tick_timeout = fn_min_dur_handle_region(); blockedWaitFor(base_tick_timeout); + runOneRound(base_tick_timeout); if (state.load(std::memory_order_acquire) != State::Running) break; - runOneRound(base_tick_timeout); } LOG_FMT_INFO(logger, "Start to stop read-index runner {}", id); }); diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index e9cf93e8fa1..85e6014879a 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -118,11 +118,39 @@ void RegionKVStoreTest::testReadIndex() 1); ASSERT_NE(kvs.read_index_worker_manager, nullptr); - kvs.asyncRunReadIndexWorkers(); { + kvs.asyncRunReadIndexWorkers(); + auto tar_region_id = 9; + { + auto task_lock = kvs.genTaskLock(); + auto lock = kvs.genRegionWriteLock(task_lock); + + auto region = makeRegion(tar_region_id, RecordKVFormat::genKey(2, 0), RecordKVFormat::genKey(2, 10)); + lock.regions.emplace(region->id(), region); + } + { + ASSERT_EQ(proxy_instance.regions.at(tar_region_id)->getLatestCommitIndex(), 5); + proxy_instance.regions.at(tar_region_id)->updateCommitIndex(66); + } + + AsyncWaker::Notifier notifier; const std::atomic_size_t terminate_signals_counter{}; - WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 2, 20); + std::thread t([&]() { + notifier.wake(); + WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 1 / 1000.0, 20); + }); + SCOPE_EXIT({ + t.join(); + }); + ASSERT_EQ(notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)), AsyncNotifier::Status::Normal); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + auto tar = kvs.getRegion(tar_region_id); + ASSERT_EQ( + tar->handleWriteRaftCmd({}, 66, 6, ctx.getTMTContext()), + EngineStoreApplyRes::None); + kvs.stopReadIndexWorkers(); } + kvs.asyncRunReadIndexWorkers(); { // test read index auto region = kvs.getRegion(1); @@ -173,7 +201,7 @@ void RegionKVStoreTest::testReadIndex() SCOPE_EXIT({ t.join(); }); - notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)); + ASSERT_EQ(notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)), AsyncNotifier::Status::Normal); std::this_thread::sleep_for(std::chrono::milliseconds(2)); region->handleWriteRaftCmd({}, 667 + 1, 6, ctx.getTMTContext()); } From b84def6681e234317e24dd29100cc9e4b3934bbd Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 15 Feb 2022 11:00:52 +0800 Subject: [PATCH 4/9] refine log Signed-off-by: Zhigao Tong --- dbms/src/Storages/Transaction/ApplySnapshot.cpp | 8 ++++---- dbms/src/Storages/Transaction/Region.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index bf66e6864f6..90af824dfcc 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -58,9 +58,9 @@ void KVStore::checkAndApplySnapshot(const RegionPtrWrap & new_region, TMTContext } else if (old_applied_index == new_index) { - LOG_WARNING(log, - old_region->toString(false) << " already has same applied index, just ignore next process. " - << "Please check log whether server crashed after successfully applied snapshot."); + LOG_FMT_WARNING(log, + "{} already has same applied index, just ignore next process. Please check log whether server crashed after successfully applied snapshot.", + old_region->getDebugString()); return; } @@ -188,7 +188,7 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re if (old_region != nullptr) { - LOG_DEBUG(log, __FUNCTION__ << ": previous " << old_region->toString(true) << " ; new " << new_region->toString(true)); + LOG_FMT_DEBUG(log, "{}: previous {}, new {}", __FUNCTION__, old_region->getDebugString(), new_region->getDebugString()); { // remove index first const auto & range = old_region->makeRaftCommandDelegate(task_lock).getRange().comparableKeys(); diff --git a/dbms/src/Storages/Transaction/Region.cpp b/dbms/src/Storages/Transaction/Region.cpp index f8211203e0f..39a5594cce0 100644 --- a/dbms/src/Storages/Transaction/Region.cpp +++ b/dbms/src/Storages/Transaction/Region.cpp @@ -538,7 +538,7 @@ std::tuple Region::waitIndex(UInt64 index, const UInt64 case WaitIndexResult::Timeout: { ProfileEvents::increment(ProfileEvents::RaftWaitIndexTimeout); - LOG_WARNING(log, toString(false) << " wait learner index " << index << " timeout"); + LOG_FMT_WARNING(log, "{} wait learner index {} timeout", toString(false), index); return {wait_idx_res, elapsed_secs}; } } From e9829bcf2b5d7e18dacf70b6ba4be454d34de3e1 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 15 Feb 2022 16:09:32 +0800 Subject: [PATCH 5/9] opt log fmt & more test cases Signed-off-by: Zhigao Tong --- dbms/src/Debug/MockRaftStoreProxy.cpp | 8 ++- dbms/src/Debug/MockRaftStoreProxy.h | 1 + dbms/src/Storages/Transaction/KVStore.cpp | 65 +++++++++++++------ dbms/src/Storages/Transaction/KVStore.h | 2 +- .../Transaction/tests/gtest_kvstore.cpp | 33 +++++++++- 5 files changed, 83 insertions(+), 26 deletions(-) diff --git a/dbms/src/Debug/MockRaftStoreProxy.cpp b/dbms/src/Debug/MockRaftStoreProxy.cpp index f4b9586bb34..bace630f51d 100644 --- a/dbms/src/Debug/MockRaftStoreProxy.cpp +++ b/dbms/src/Debug/MockRaftStoreProxy.cpp @@ -234,11 +234,13 @@ void MockRaftStoreProxy::runOneRound() while (!tasks.empty()) { auto & t = *tasks.front(); - if (region_id_to_drop.find(t.req.context().region_id()) != region_id_to_drop.end()) + if (!region_id_to_drop.count(t.req.context().region_id())) { + if (region_id_to_error.count(t.req.context().region_id())) + t.update(false, true); + else + t.update(false, false); } - else - t.update(); tasks.pop_front(); } } diff --git a/dbms/src/Debug/MockRaftStoreProxy.h b/dbms/src/Debug/MockRaftStoreProxy.h index c9d1db7da3b..6862bf29365 100644 --- a/dbms/src/Debug/MockRaftStoreProxy.h +++ b/dbms/src/Debug/MockRaftStoreProxy.h @@ -90,6 +90,7 @@ struct MockRaftStoreProxy : MutexLockWrap static TiFlashRaftProxyHelper SetRaftStoreProxyFFIHelper(RaftStoreProxyPtr); std::unordered_set region_id_to_drop; + std::unordered_set region_id_to_error; std::map regions; std::list> tasks; AsyncWaker::Notifier notifier; diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 414279d66bb..c8c43e0a1b7 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -576,12 +577,21 @@ EngineStoreApplyRes KVStore::handleAdminRaftCmd(raft_cmdpb::AdminRequest && requ } } -void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & terminate_signals_counter, double wait_tick_time, double max_wait_tick_time) +void WaitCheckRegionReady( + const TMTContext & tmt, + const std::atomic_size_t & terminate_signals_counter, + double wait_tick_time, + double max_wait_tick_time, + double get_wait_region_ready_timeout_sec) { constexpr double batch_read_index_time_rate = 0.2; // part of time for waiting shall be assigned to batch-read-index Poco::Logger * log = &Poco::Logger::get(__FUNCTION__); - LOG_FMT_INFO(log, "start to check regions ready, min-wait-tick {}s, max-wait-tick {}s", wait_tick_time, max_wait_tick_time); + LOG_FMT_INFO(log, + "start to check regions ready, min-wait-tick {}s, max-wait-tick {}s, wait-region-ready-timeout {:.3f}s", + wait_tick_time, + max_wait_tick_time, + get_wait_region_ready_timeout_sec); std::unordered_set remain_regions; std::unordered_map regions_to_check; @@ -591,7 +601,7 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter tmt.getKVStore()->traverseRegions([&remain_regions](RegionID region_id, const RegionPtr &) { remain_regions.emplace(region_id); }); total_regions_cnt = remain_regions.size(); } - while (region_check_watch.elapsedSeconds() < tmt.waitRegionReadyTimeout() * batch_read_index_time_rate + while (region_check_watch.elapsedSeconds() < get_wait_region_ready_timeout_sec * batch_read_index_time_rate && terminate_signals_counter.load(std::memory_order_relaxed) == 0) { std::vector batch_read_index_req; @@ -642,16 +652,19 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter if (!remain_regions.empty()) { - LOG_WARNING( - log, remain_regions.size() << " regions CANNOT fetch latest commit-index from TiKV, (region-id): "; do { - for (auto && r : remain_regions) - { - oss_internal_rare << "(" << r << ") "; - } - } while (0)); + FmtBuffer buffer; + buffer.joinStr( + remain_regions.begin(), + remain_regions.end(), + [&](const auto & e, FmtBuffer & b) { b.fmtAppend("{}", e); }, + " "); + LOG_FMT_WARNING( + log, + "{} regions CANNOT fetch latest commit-index from TiKV, (region-id): {}", + remain_regions.size(), + buffer.toString()); } - while (region_check_watch.elapsedSeconds() < static_cast(tmt.waitRegionReadyTimeout()) - && terminate_signals_counter.load(std::memory_order_relaxed) == 0) + do { for (auto it = regions_to_check.begin(); it != regions_to_check.end();) { @@ -682,16 +695,27 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter wait_tick_time); std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(wait_tick_time * 1000))); wait_tick_time = std::min(max_wait_tick_time, wait_tick_time * 2); - } + } while (region_check_watch.elapsedSeconds() < get_wait_region_ready_timeout_sec + && terminate_signals_counter.load(std::memory_order_relaxed) == 0); + if (!regions_to_check.empty()) { - LOG_WARNING( - log, regions_to_check.size() << " regions CANNOT catch up with latest index, (region-id,latest-index): "; do { - for (auto && [region_id, latest_index] : regions_to_check) + FmtBuffer buffer; + buffer.joinStr( + regions_to_check.begin(), + regions_to_check.end(), + [&](const auto & e, FmtBuffer & b) { + if (auto r = tmt.getKVStore()->getRegion(e.first); r) + { + b.fmtAppend("{},{},{}", e.first, e.second, r->appliedIndex()); + } + else { - oss_internal_rare << "(" << region_id << "," << latest_index << ") "; + b.fmtAppend("{},{},none", e.first, e.second); } - } while (0)); + }, + " "); + LOG_FMT_WARNING(log, "{} regions CANNOT catch up with latest index, (region-id,latest-index,apply-index): {}", regions_to_check.size(), buffer.toString()); } LOG_FMT_INFO(log, @@ -704,9 +728,10 @@ void WaitCheckRegionReady(const TMTContext & tmt, const std::atomic_size_t & ter { // wait interval to check region ready, not recommended to modify only if for tesing auto wait_region_ready_tick = tmt.getContext().getConfigRef().getUInt64("flash.wait_region_ready_tick", 0); - const double max_wait_tick_time = 0 == wait_region_ready_tick ? 20.0 : static_cast(tmt.waitRegionReadyTimeout()); + auto wait_region_ready_timeout_sec = static_cast(tmt.waitRegionReadyTimeout()); + const double max_wait_tick_time = 0 == wait_region_ready_tick ? 20.0 : wait_region_ready_timeout_sec; double min_wait_tick_time = 0 == wait_region_ready_tick ? 2.5 : static_cast(wait_region_ready_tick); // default tick in TiKV is about 2s (without hibernate-region) - return WaitCheckRegionReady(tmt, terminate_signals_counter, min_wait_tick_time, max_wait_tick_time); + return WaitCheckRegionReady(tmt, terminate_signals_counter, min_wait_tick_time, max_wait_tick_time, wait_region_ready_timeout_sec); } void KVStore::setStore(metapb::Store store_) diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index 9ccc7630533..dd86e1208e1 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -258,6 +258,6 @@ class KVStoreTaskLock : private boost::noncopyable }; void WaitCheckRegionReady(const TMTContext &, const std::atomic_size_t & terminate_signals_counter); -void WaitCheckRegionReady(const TMTContext &, const std::atomic_size_t &, double, double); +void WaitCheckRegionReady(const TMTContext &, const std::atomic_size_t &, double, double, double); } // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 85e6014879a..5f673a9c915 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -120,6 +120,10 @@ void RegionKVStoreTest::testReadIndex() { kvs.asyncRunReadIndexWorkers(); + SCOPE_EXIT({ + kvs.stopReadIndexWorkers(); + }); + auto tar_region_id = 9; { auto task_lock = kvs.genTaskLock(); @@ -137,7 +141,7 @@ void RegionKVStoreTest::testReadIndex() const std::atomic_size_t terminate_signals_counter{}; std::thread t([&]() { notifier.wake(); - WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 1 / 1000.0, 20); + WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 1 / 1000.0, 20, 20 * 60); }); SCOPE_EXIT({ t.join(); @@ -148,7 +152,32 @@ void RegionKVStoreTest::testReadIndex() ASSERT_EQ( tar->handleWriteRaftCmd({}, 66, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); - kvs.stopReadIndexWorkers(); + } + { + kvs.asyncRunReadIndexWorkers(); + SCOPE_EXIT({ + kvs.stopReadIndexWorkers(); + }); + + auto tar_region_id = 9; + { + ASSERT_EQ(proxy_instance.regions.at(tar_region_id)->getLatestCommitIndex(), 66); + proxy_instance.unsafeInvokeForTest([&](MockRaftStoreProxy & p) { + p.region_id_to_error.emplace(tar_region_id); + p.regions.at(2)->updateCommitIndex(6); + }); + } + + AsyncWaker::Notifier notifier; + const std::atomic_size_t terminate_signals_counter{}; + std::thread t([&]() { + notifier.wake(); + WaitCheckRegionReady(ctx.getTMTContext(), terminate_signals_counter, 1 / 1000.0, 2 / 1000.0, 5 / 1000.0); + }); + SCOPE_EXIT({ + t.join(); + }); + ASSERT_EQ(notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)), AsyncNotifier::Status::Normal); } kvs.asyncRunReadIndexWorkers(); { From b6c828e8b73cd3d376a8f22b3d0c05890ab3afdc Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Wed, 16 Feb 2022 13:47:30 +0800 Subject: [PATCH 6/9] clean code Signed-off-by: Zhigao Tong --- dbms/src/Storages/Transaction/KVStore.cpp | 4 +--- dbms/src/Storages/Transaction/Region.cpp | 12 ------------ .../Storages/Transaction/RegionExecutionResult.h | 14 ++------------ .../Storages/Transaction/tests/gtest_kvstore.cpp | 15 +++++++++++++++ 4 files changed, 18 insertions(+), 27 deletions(-) diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index c8c43e0a1b7..954036894d1 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -256,7 +256,7 @@ EngineStoreApplyRes KVStore::handleWriteRaftCmd( cmd_cf.push_back(NameToCF(req.delete_().cf())); break; default: - break; + throw Exception(fmt::format("Unsupport raft cmd {}", raft_cmdpb::CmdType_Name(type)), ErrorCodes::LOGICAL_ERROR); } } return handleWriteRaftCmd( @@ -569,8 +569,6 @@ EngineStoreApplyRes KVStore::handleAdminRaftCmd(raft_cmdpb::AdminRequest && requ case RaftCommandResult::Type::CommitMerge: handle_commit_merge(result.source_region_id); break; - default: - throw Exception("Unsupported RaftCommandResult", ErrorCodes::LOGICAL_ERROR); } return EngineStoreApplyRes::Persist; diff --git a/dbms/src/Storages/Transaction/Region.cpp b/dbms/src/Storages/Transaction/Region.cpp index 39a5594cce0..e941f26fabf 100644 --- a/dbms/src/Storages/Transaction/Region.cpp +++ b/dbms/src/Storages/Transaction/Region.cpp @@ -317,7 +317,6 @@ void RegionRaftCommandDelegate::handleAdminRaftCmd(const raft_cmdpb::AdminReques break; default: throw Exception(fmt::format("unsupported admin command type {}", raft_cmdpb::AdminCmdType_Name(type)), ErrorCodes::LOGICAL_ERROR); - break; } switch (type) @@ -474,12 +473,6 @@ ImutRegionRangePtr Region::getRange() const return meta.getRange(); } -ReadIndexResult::ReadIndexResult(RegionException::RegionReadStatus status_, UInt64 read_index_, kvrpcpb::LockInfo * lock_info_) - : status(status_) - , read_index(read_index_) - , lock_info(lock_info_) -{} - kvrpcpb::ReadIndexRequest GenRegionReadIndexReq(const Region & region, UInt64 start_ts) { auto meta_snap = region.dumpRegionMetaSnapshot(); @@ -542,7 +535,6 @@ std::tuple Region::waitIndex(UInt64 index, const UInt64 return {wait_idx_res, elapsed_secs}; } } - throw Exception("Unknown result of wait index:" + DB::toString(static_cast(wait_idx_res))); } } return {WaitIndexResult::Finished, 0}; @@ -667,10 +659,6 @@ EngineStoreApplyRes Region::handleWriteRaftCmd(const WriteCmdsView & cmds, UInt6 } break; } - default: - throw Exception( - std::string(__PRETTY_FUNCTION__) + ": unsupported command type " + std::to_string(static_cast(type)), - ErrorCodes::LOGICAL_ERROR); } }; diff --git a/dbms/src/Storages/Transaction/RegionExecutionResult.h b/dbms/src/Storages/Transaction/RegionExecutionResult.h index 6557624bba4..dadadcbf7d0 100644 --- a/dbms/src/Storages/Transaction/RegionExecutionResult.h +++ b/dbms/src/Storages/Transaction/RegionExecutionResult.h @@ -1,9 +1,9 @@ #pragma once -#include #include #include +#include #include namespace kvrpcpb @@ -28,7 +28,7 @@ struct RaftCommandResult : private boost::noncopyable IndexError, BatchSplit, ChangePeer, - CommitMerge + CommitMerge, }; bool sync_log; @@ -45,14 +45,4 @@ struct RegionMergeResult UInt64 version; }; -struct ReadIndexResult -{ - RegionException::RegionReadStatus status{RegionException::RegionReadStatus::OK}; - UInt64 read_index{0}; - std::unique_ptr lock_info{nullptr}; - - ReadIndexResult(RegionException::RegionReadStatus status_, UInt64 read_index_ = 0, kvrpcpb::LockInfo * lock_info_ = nullptr); - ReadIndexResult() = default; -}; - } // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 5f673a9c915..5d372163b6f 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -789,6 +789,18 @@ void RegionKVStoreTest::testKVStore() { ASSERT_EQ(e.message(), "Unexpected eof"); } + try + { + raft_cmdpb::RaftCmdRequest request; + request.add_requests()->set_cmd_type(::raft_cmdpb::CmdType::Invalid); + ASSERT_EQ(kvs.handleWriteRaftCmd(std::move(request), 1, 10, 6, ctx.getTMTContext()), + EngineStoreApplyRes::None); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "Unsupport raft cmd Invalid"); + } } ASSERT_EQ(kvs.getRegion(1)->dataInfo(), "[lock 1 ]"); { @@ -835,6 +847,9 @@ void RegionKVStoreTest::testKVStore() testRaftSplit(kvs, ctx.getTMTContext()); ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{}, raft_cmdpb::AdminResponse{}, 8192, 5, 6, ctx.getTMTContext()), EngineStoreApplyRes::NotFound); } + { + ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{}, raft_cmdpb::AdminResponse{}, 8192, 5, 6, ctx.getTMTContext()), EngineStoreApplyRes::NotFound); + } { raft_cmdpb::AdminRequest request; raft_cmdpb::AdminResponse response; From 635a25c9d3d074bbca79ca7b75d3b0b8af1705f2 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Wed, 16 Feb 2022 16:06:44 +0800 Subject: [PATCH 7/9] more tests Signed-off-by: Zhigao Tong --- dbms/src/Storages/Transaction/Region.h | 8 + dbms/src/Storages/Transaction/RegionData.cpp | 4 - dbms/src/Storages/Transaction/RegionMeta.cpp | 56 ++++--- dbms/src/Storages/Transaction/RegionMeta.h | 7 + .../Transaction/tests/gtest_kvstore.cpp | 141 ++++++++++++++++++ 5 files changed, 181 insertions(+), 35 deletions(-) diff --git a/dbms/src/Storages/Transaction/Region.h b/dbms/src/Storages/Transaction/Region.h index 8fd81158927..b25a578ae2f 100644 --- a/dbms/src/Storages/Transaction/Region.h +++ b/dbms/src/Storages/Transaction/Region.h @@ -15,6 +15,11 @@ class ReadIndexRequest; namespace DB { +namespace tests +{ +class RegionKVStoreTest; +} + class Region; using RegionPtr = std::shared_ptr; using Regions = std::vector; @@ -185,6 +190,7 @@ class Region : public std::enable_shared_from_this Region() = delete; friend class RegionRaftCommandDelegate; friend class RegionMockTest; + friend class tests::RegionKVStoreTest; // Private methods no need to lock mutex, normally @@ -227,6 +233,8 @@ class RegionRaftCommandDelegate : public Region UInt64 appliedIndex(); private: + friend class tests::RegionKVStoreTest; + RegionRaftCommandDelegate() = delete; Regions execBatchSplit( diff --git a/dbms/src/Storages/Transaction/RegionData.cpp b/dbms/src/Storages/Transaction/RegionData.cpp index 0aa3de5722e..6ba3a094e0c 100644 --- a/dbms/src/Storages/Transaction/RegionData.cpp +++ b/dbms/src/Storages/Transaction/RegionData.cpp @@ -37,8 +37,6 @@ void RegionData::insert(ColumnFamilyType cf, TiKVKey && key, TiKVValue && value) lock_cf.insert(std::move(key), std::move(value)); return; } - default: - throw Exception(std::string(__PRETTY_FUNCTION__) + " with undefined CF, should not happen", ErrorCodes::LOGICAL_ERROR); } } @@ -69,8 +67,6 @@ void RegionData::remove(ColumnFamilyType cf, const TiKVKey & key) lock_cf.remove(RegionLockCFDataTrait::Key{nullptr, std::string_view(key.data(), key.dataSize())}, true); return; } - default: - throw Exception(std::string(__PRETTY_FUNCTION__) + " with undefined CF, should not happen", ErrorCodes::LOGICAL_ERROR); } } diff --git a/dbms/src/Storages/Transaction/RegionMeta.cpp b/dbms/src/Storages/Transaction/RegionMeta.cpp index 38c3fc34339..3c11f3b4891 100644 --- a/dbms/src/Storages/Transaction/RegionMeta.cpp +++ b/dbms/src/Storages/Transaction/RegionMeta.cpp @@ -1,5 +1,7 @@ #include #include +#include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -64,9 +66,6 @@ raft_serverpb::RaftApplyState RegionMeta::getApplyState() const void RegionMeta::doSetRegion(const metapb::Region & region) { - if (regionId() != region.id()) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": region id is not equal, should not happen", ErrorCodes::LOGICAL_ERROR); - region_state.setRegion(region); } @@ -205,9 +204,6 @@ void RegionMeta::assignRegionMeta(RegionMeta && rhs) { std::lock_guard lock(mutex); - if (regionId() != rhs.regionId()) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": region_id not equal, should not happen", ErrorCodes::LOGICAL_ERROR); - peer = std::move(rhs.peer); apply_state = std::move(rhs.apply_state); applied_term = rhs.applied_term; @@ -265,27 +261,23 @@ RegionMergeResult MetaRaftCommandDelegate::checkBeforeCommitMerge( case raft_serverpb::PeerState::Normal: break; default: - { - throw Exception(std::string(__PRETTY_FUNCTION__) + ": " + toString(false) - + " unexpected state of source region: " + raft_serverpb::PeerState_Name(state), - ErrorCodes::LOGICAL_ERROR); - } + throw Exception(fmt::format("{}: unexpected state {} of source {}", __FUNCTION__, raft_serverpb::PeerState_Name(state), regionId(), toString(false)), ErrorCodes::LOGICAL_ERROR); } if (!(source_region == source_meta.region_state.getRegion())) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": source_region not match exist region", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("{}: source region not match exist region meta", __FUNCTION__), ErrorCodes::LOGICAL_ERROR); return computeRegionMergeResult(source_region, region_state.getRegion()); } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" -static void CheckRegionForMergeCmd(const raft_cmdpb::AdminResponse & response, const RegionState & region_state) +void CheckRegionForMergeCmd(const raft_cmdpb::AdminResponse & response, const RegionState & region_state) { if (response.has_split() && !(response.split().left() == region_state.getRegion())) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": current region:\n" + region_state.getRegion().ShortDebugString() - + "\nexpect:\n" + response.split().left().ShortDebugString() + "\nshould not happen", - ErrorCodes::LOGICAL_ERROR); + throw Exception( + fmt::format("{}: current region meta: {}, expect: {}", __FUNCTION__, region_state.getRegion().ShortDebugString(), response.split().left().ShortDebugString()), + ErrorCodes::LOGICAL_ERROR); } #pragma GCC diagnostic pop @@ -298,12 +290,13 @@ void MetaRaftCommandDelegate::execRollbackMerge( const auto & rollback_request = request.rollback_merge(); if (region_state.getState() != raft_serverpb::PeerState::Merging) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": region state is " + raft_serverpb::PeerState_Name(region_state.getState()), - ErrorCodes::LOGICAL_ERROR); + throw Exception( + fmt::format("{}: region state is {}, expect {}", __FUNCTION__, raft_serverpb::PeerState_Name(region_state.getState()), raft_serverpb::PeerState_Name(raft_serverpb::PeerState::Merging)), + ErrorCodes::LOGICAL_ERROR); if (region_state.getMergeState().commit() != rollback_request.commit()) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": merge commit index " + DB::toString(region_state.getMergeState().commit()) - + " != " + DB::toString(rollback_request.commit()), - ErrorCodes::LOGICAL_ERROR); + throw Exception( + fmt::format("{}: merge commit index is {}, expect {}", __FUNCTION__, region_state.getMergeState().commit(), rollback_request.commit()), + ErrorCodes::LOGICAL_ERROR); std::lock_guard lock(mutex); const auto version = region_state.getVersion() + 1; @@ -315,6 +308,14 @@ void MetaRaftCommandDelegate::execRollbackMerge( CheckRegionForMergeCmd(response, region_state); } +void ChangeRegionStateRange(RegionState & region_state, bool source_at_left, const RegionState & source_region_state) +{ + if (source_at_left) + region_state.setStartKey(source_region_state.getRegion().start_key()); + else + region_state.setEndKey(source_region_state.getRegion().end_key()); +} + void MetaRaftCommandDelegate::execCommitMerge( const RegionMergeResult & res, UInt64 index, @@ -324,10 +325,8 @@ void MetaRaftCommandDelegate::execCommitMerge( { std::lock_guard lock(mutex); region_state.setVersion(res.version); - if (res.source_at_left) - region_state.setStartKey(source_meta.region_state.getRegion().start_key()); - else - region_state.setEndKey(source_meta.region_state.getRegion().end_key()); + + ChangeRegionStateRange(region_state, res.source_at_left, source_meta.region_state); region_state.setState(raft_serverpb::PeerState::Normal); region_state.clearMergeState(); @@ -366,8 +365,7 @@ void MetaRaftCommandDelegate::execPrepareMerge( bool RegionMeta::doCheckPeerRemoved() const { - if (region_state.getRegion().peers().empty()) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": got empty peers, should not happen", ErrorCodes::LOGICAL_ERROR); + assert(!region_state.getRegion().peers().empty()); for (const auto & region_peer : region_state.getRegion().peers()) { @@ -398,10 +396,6 @@ MetaRaftCommandDelegate & RegionMeta::makeRaftCommandDelegate() return static_cast(*this); } -const metapb::Peer & MetaRaftCommandDelegate::getPeer() const -{ - return peer; -} const raft_serverpb::RaftApplyState & MetaRaftCommandDelegate::applyState() const { return apply_state; diff --git a/dbms/src/Storages/Transaction/RegionMeta.h b/dbms/src/Storages/Transaction/RegionMeta.h index 7a113801892..959efd553a2 100644 --- a/dbms/src/Storages/Transaction/RegionMeta.h +++ b/dbms/src/Storages/Transaction/RegionMeta.h @@ -11,6 +11,11 @@ struct RegionVerID; namespace DB { +namespace tests +{ +class RegionKVStoreTest; +} + struct RegionMergeResult; class Region; class MetaRaftCommandDelegate; @@ -94,6 +99,7 @@ class RegionMeta private: RegionMeta() = delete; friend class MetaRaftCommandDelegate; + friend class tests::RegionKVStoreTest; void doSetRegion(const metapb::Region & region); void doSetApplied(UInt64 index, UInt64 term); @@ -135,6 +141,7 @@ class MetaRaftCommandDelegate , private boost::noncopyable { friend class RegionRaftCommandDelegate; + friend class tests::RegionKVStoreTest; MetaRaftCommandDelegate() = delete; diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 5d372163b6f..52d6efaccc3 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -18,6 +18,8 @@ extern void setupDelRequest(raft_cmdpb::Request *, const std::string &, const Ti extern std::optional ReadRegionCommitCache(const RegionPtr & region, bool lock_region = true); extern void RemoveRegionCommitCache(const RegionPtr & region, const RegionDataReadInfoList & data_list_read, bool lock_region = true); +extern void CheckRegionForMergeCmd(const raft_cmdpb::AdminResponse & response, const RegionState & region_state); +extern void ChangeRegionStateRange(RegionState & region_state, bool source_at_left, const RegionState & source_region_state); namespace tests { @@ -276,6 +278,55 @@ void RegionKVStoreTest::testRaftMergeRollback(KVStore & kvs, TMTContext & tmt) { auto region = kvs.getRegion(region_id); + raft_cmdpb::AdminRequest request; + raft_cmdpb::AdminResponse response; + { + request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); + + auto * rollback_merge = request.mutable_rollback_merge(); + { + auto merge_state = region->getMergeState(); + rollback_merge->set_commit(merge_state.commit()); + } + } + region->setStateApplying(); + try + { + kvs.handleAdminRaftCmd(std::move(request), + std::move(response), + region_id, + 32, + 6, + tmt); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "execRollbackMerge: region state is Applying, expect Merging"); + } + ASSERT_EQ(region->peerState(), raft_serverpb::PeerState::Applying); + region->setPeerState(raft_serverpb::PeerState::Merging); + + region->meta.region_state.getMutMergeState().set_commit(1234); + try + { + kvs.handleAdminRaftCmd(std::move(request), + std::move(response), + region_id, + 32, + 6, + tmt); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "execRollbackMerge: merge commit index is 1234, expect 31"); + } + region->meta.region_state.getMutMergeState().set_commit(31); + } + { + auto region = kvs.getRegion(region_id); + raft_cmdpb::AdminRequest request; raft_cmdpb::AdminResponse response; { @@ -499,6 +550,45 @@ void RegionKVStoreTest::testRaftMerge(KVStore & kvs, TMTContext & tmt) ASSERT_EQ(source_region->peerState(), raft_serverpb::PeerState::Merging); } + { + auto source_id = 7, target_id = 1; + auto source_region = kvs.getRegion(source_id); + raft_cmdpb::AdminRequest request; + { + request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); + auto * commit_merge = request.mutable_commit_merge(); + { + commit_merge->set_commit(source_region->appliedIndex()); + *commit_merge->mutable_source() = source_region->getMetaRegion(); + } + } + source_region->setStateApplying(); + source_region->makeRaftCommandDelegate(kvs.genTaskLock()); + const auto & source_region_meta_delegate = source_region->meta.makeRaftCommandDelegate(); + try + { + kvs.getRegion(target_id)->meta.makeRaftCommandDelegate().checkBeforeCommitMerge(request, source_region_meta_delegate); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "checkBeforeCommitMerge: unexpected state Applying of source 1"); + } + source_region->setPeerState(raft_serverpb::PeerState::Normal); + { + request.mutable_commit_merge()->mutable_source()->mutable_start_key()->clear(); + } + try + { + kvs.getRegion(target_id)->meta.makeRaftCommandDelegate().checkBeforeCommitMerge(request, source_region_meta_delegate); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "checkBeforeCommitMerge: source region not match exist region meta"); + } + } + { auto source_id = 7, target_id = 1; auto source_region = kvs.getRegion(source_id); @@ -1003,6 +1093,43 @@ void test_mergeresult() ASSERT_EQ(MetaRaftCommandDelegate::computeRegionMergeResult(createRegionInfo(1, "", "x"), createRegionInfo(1000, "x", "")).source_at_left, true); ASSERT_EQ(MetaRaftCommandDelegate::computeRegionMergeResult(createRegionInfo(1, "x", "y"), createRegionInfo(1000, "y", "z")).source_at_left, true); ASSERT_EQ(MetaRaftCommandDelegate::computeRegionMergeResult(createRegionInfo(1, "y", "z"), createRegionInfo(1000, "x", "y")).source_at_left, false); + + { + RegionState region_state; + bool source_at_left; + RegionState source_region_state; + + region_state.setStartKey(RecordKVFormat::genKey(1, 0)); + region_state.setEndKey(RecordKVFormat::genKey(1, 10)); + + source_region_state.setStartKey(RecordKVFormat::genKey(1, 10)); + source_region_state.setEndKey(RecordKVFormat::genKey(1, 20)); + + source_at_left = false; + + ChangeRegionStateRange(region_state, source_at_left, source_region_state); + + ASSERT_EQ(region_state.getRange()->comparableKeys().first.key, RecordKVFormat::genKey(1, 0)); + ASSERT_EQ(region_state.getRange()->comparableKeys().second.key, RecordKVFormat::genKey(1, 20)); + } + { + RegionState region_state; + bool source_at_left; + RegionState source_region_state; + + region_state.setStartKey(RecordKVFormat::genKey(2, 5)); + region_state.setEndKey(RecordKVFormat::genKey(2, 10)); + + source_region_state.setStartKey(RecordKVFormat::genKey(2, 0)); + source_region_state.setEndKey(RecordKVFormat::genKey(2, 5)); + + source_at_left = true; + + ChangeRegionStateRange(region_state, source_at_left, source_region_state); + + ASSERT_EQ(region_state.getRange()->comparableKeys().first.key, RecordKVFormat::genKey(2, 0)); + ASSERT_EQ(region_state.getRange()->comparableKeys().second.key, RecordKVFormat::genKey(2, 10)); + } } void RegionKVStoreTest::testBasic() @@ -1151,6 +1278,20 @@ void RegionKVStoreTest::testBasic() { test_mergeresult(); } + { + raft_cmdpb::AdminResponse response; + response.mutable_split()->mutable_left()->add_peers()->set_id(123); + RegionState region_state; + region_state.getMutRegion().add_peers()->set_id(456); + try + { + CheckRegionForMergeCmd(response, region_state); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "CheckRegionForMergeCmd: current region meta: peers { id: 456 }, expect: peers { id: 123 }"); + } + } } TEST_F(RegionKVStoreTest, run) From 0d772458b3e6f2686949744d56a9f05fc6765931 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Thu, 17 Feb 2022 14:30:47 +0800 Subject: [PATCH 8/9] update proxy Signed-off-by: Zhigao Tong --- contrib/tiflash-proxy | 2 +- dbms/src/Server/Server.cpp | 3 +++ release-centos7-llvm/scripts/build-tiflash-ci.sh | 9 ++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 7a0ba5e2c85..4d6a94040d2 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 7a0ba5e2c85e9d89ab7f639d9e3b96246192657d +Subproject commit 4d6a94040d25f146a7fde3042b03df923846e161 diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 77dae0c4b11..852ab0b8950 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -219,6 +219,8 @@ struct TiFlashProxyConfig const String engine_store_address = "engine-addr"; const String engine_store_advertise_address = "advertise-engine-addr"; const String pd_endpoints = "pd-endpoints"; + const String engine_label = "engine-label"; + const String engine_label_value = "tiflash"; explicit TiFlashProxyConfig(Poco::Util::LayeredConfiguration & config) { @@ -241,6 +243,7 @@ struct TiFlashProxyConfig args_map[engine_store_address] = config.getString("flash.service_addr"); else args_map[engine_store_advertise_address] = args_map[engine_store_address]; + args_map[engine_label] = engine_label_value; for (auto && [k, v] : args_map) { diff --git a/release-centos7-llvm/scripts/build-tiflash-ci.sh b/release-centos7-llvm/scripts/build-tiflash-ci.sh index acdea9c8c7d..8f15f61772e 100755 --- a/release-centos7-llvm/scripts/build-tiflash-ci.sh +++ b/release-centos7-llvm/scripts/build-tiflash-ci.sh @@ -102,16 +102,19 @@ mkdir -p ${SRCPATH}/contrib/tiflash-proxy/target/release cd ${SRCPATH}/contrib/tiflash-proxy proxy_git_hash=$(git log -1 --format="%H") -PROXY_CI_CACHE_PATH="tiflash/ci-cache/tmp/pr-build" curl -o "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" \ - http://fileserver.pingcap.net/download/builds/pingcap/${PROXY_CI_CACHE_PATH}/${proxy_git_hash}/libtiflash_proxy.so + http://fileserver.pingcap.net/download/builds/pingcap/tiflash-proxy/${proxy_git_hash}-llvm/libtiflash_proxy.so proxy_size=$(ls -l "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" | awk '{print $5}') min_size=$((102400)) BUILD_TIFLASH_PROXY=false if [[ ${proxy_size} -lt ${min_size} ]]; then - exit -1 + BUILD_TIFLASH_PROXY=true + CMAKE_PREBUILT_LIBS_ROOT_ARG="" + echo "need to build libtiflash_proxy.so" + export PATH=$PATH:$HOME/.cargo/bin + rm -f target/release/libtiflash_proxy.so else CMAKE_PREBUILT_LIBS_ROOT_ARG=-DPREBUILT_LIBS_ROOT="${SRCPATH}/contrib/tiflash-proxy" chmod 0731 "${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so" From 7f75e86b9b32e455f731811d2db69f72d78751c5 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Thu, 17 Feb 2022 15:10:59 +0800 Subject: [PATCH 9/9] add more tests Signed-off-by: Zhigao Tong --- dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 52d6efaccc3..582cfc95e77 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -133,6 +133,7 @@ void RegionKVStoreTest::testReadIndex() auto region = makeRegion(tar_region_id, RecordKVFormat::genKey(2, 0), RecordKVFormat::genKey(2, 10)); lock.regions.emplace(region->id(), region); + lock.index.add(region); } { ASSERT_EQ(proxy_instance.regions.at(tar_region_id)->getLatestCommitIndex(), 5); @@ -147,6 +148,7 @@ void RegionKVStoreTest::testReadIndex() }); SCOPE_EXIT({ t.join(); + kvs.handleDestroy(tar_region_id, ctx.getTMTContext()); }); ASSERT_EQ(notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)), AsyncNotifier::Status::Normal); std::this_thread::sleep_for(std::chrono::milliseconds(2)); @@ -181,7 +183,12 @@ void RegionKVStoreTest::testReadIndex() }); ASSERT_EQ(notifier.blockedWaitFor(std::chrono::milliseconds(1000 * 3600)), AsyncNotifier::Status::Normal); } + kvs.asyncRunReadIndexWorkers(); + SCOPE_EXIT({ + kvs.stopReadIndexWorkers(); + }); + { // test read index auto region = kvs.getRegion(1); @@ -765,6 +772,8 @@ void RegionKVStoreTest::testKVStore() KVStore & kvs = *ctx.getTMTContext().getKVStore(); kvs.restore(nullptr); { + // Run without read-index workers + kvs.initReadIndexWorkers( []() { return std::chrono::milliseconds(10);