diff --git a/dbms/src/Storages/Transaction/RaftLogManager.cpp b/dbms/src/Storages/Transaction/RaftLogManager.cpp index 9b0bdab65fe..000c47e31d6 100644 --- a/dbms/src/Storages/Transaction/RaftLogManager.cpp +++ b/dbms/src/Storages/Transaction/RaftLogManager.cpp @@ -37,7 +37,8 @@ bool RaftLogEagerGcTasks::updateHint( UInt64 applied_index, UInt64 threshold) { - if (applied_index < eager_truncated_index || applied_index - eager_truncated_index < threshold) + if (threshold == 0 // + || applied_index < eager_truncated_index || applied_index - eager_truncated_index < threshold) return false; // Try to register a task for eager remove RaftLog to reduce the memory overhead of UniPS diff --git a/dbms/src/Storages/Transaction/RaftLogManager.h b/dbms/src/Storages/Transaction/RaftLogManager.h index fa5b1652510..8b96b28eb5e 100644 --- a/dbms/src/Storages/Transaction/RaftLogManager.h +++ b/dbms/src/Storages/Transaction/RaftLogManager.h @@ -43,6 +43,6 @@ class RaftLogEagerGcTasks // RegionID -> truncated index using RaftLogGcTasksRes = std::unordered_map; -RaftLogGcTasksRes executeRaftLogGcTasks(Context & global_ctx, RaftLogEagerGcTasks::Hints && hints); +[[nodiscard]] RaftLogGcTasksRes executeRaftLogGcTasks(Context & global_ctx, RaftLogEagerGcTasks::Hints && hints); } // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_raft_log_manager.cpp b/dbms/src/Storages/Transaction/tests/gtest_raft_log_manager.cpp new file mode 100644 index 00000000000..90f814758b6 --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_raft_log_manager.cpp @@ -0,0 +1,87 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::tests +{ + +TEST(RaftLogEagerGCTasksTest, Basic) +try +{ + RaftLogEagerGcTasks tasks; + + RegionID region_id = 1000; + // threshold == 0 always return false + ASSERT_FALSE(tasks.updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000, /*threshold=*/0)); + ASSERT_FALSE(tasks.updateHint(region_id, /*eager_truncated_index=*/10000, /*applied_index=*/10, /*threshold=*/0)); + + ASSERT_FALSE(tasks.updateHint(region_id, /*eager_truncated_index=*/10000, /*applied_index=*/10, /*threshold=*/512)); + + { + // create new hints + ASSERT_TRUE( + tasks.updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000, /*threshold=*/512)); + // the applied index advance, but not merged into the hints + ASSERT_FALSE( + tasks.updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000 + 10, /*threshold=*/512)); + auto hints = tasks.getAndClearHints(); + ASSERT_EQ(hints.size(), 1); + ASSERT_TRUE(hints.contains(region_id)); + ASSERT_EQ(hints[region_id].applied_index, 10000); + ASSERT_EQ(hints[region_id].eager_truncate_index, 10); + } + { + auto hints = tasks.getAndClearHints(); + ASSERT_TRUE(hints.empty()); + } + + { + // create new hints + ASSERT_TRUE( + tasks.updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000, /*threshold=*/512)); + // the applied index advance, and merged into the hints + ASSERT_TRUE( + tasks + .updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000 + 523, /*threshold=*/512)); + // applied index rollback, just ignore + ASSERT_FALSE( + tasks + .updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000 + 500, /*threshold=*/512)); + auto hints = tasks.getAndClearHints(); + ASSERT_EQ(hints.size(), 1); + ASSERT_TRUE(hints.contains(region_id)); + ASSERT_EQ(hints[region_id].applied_index, 10000 + 523); + ASSERT_EQ(hints[region_id].eager_truncate_index, 10); + } + + { + // create new hints + ASSERT_TRUE( + tasks.updateHint(region_id, /*eager_truncated_index=*/10, /*applied_index=*/10000, /*threshold=*/512)); + // the applied index and truncated index advance, and merged into the hints + ASSERT_TRUE( + tasks + .updateHint(region_id, /*eager_truncated_index=*/30, /*applied_index=*/10000 + 523, /*threshold=*/512)); + auto hints = tasks.getAndClearHints(); + ASSERT_EQ(hints.size(), 1); + ASSERT_TRUE(hints.contains(region_id)); + ASSERT_EQ(hints[region_id].applied_index, 10000 + 523); + ASSERT_EQ(hints[region_id].eager_truncate_index, 10); + } +} +CATCH + +} // namespace DB::tests diff --git a/dbms/src/TiDB/Etcd/Client.cpp b/dbms/src/TiDB/Etcd/Client.cpp index c160a9efacf..d9ea6e3749e 100644 --- a/dbms/src/TiDB/Etcd/Client.cpp +++ b/dbms/src/TiDB/Etcd/Client.cpp @@ -186,6 +186,7 @@ grpc::Status Client::leaseRevoke(LeaseID lease_id) } std::tuple Client::campaign( + grpc::ClientContext * grpc_context, const String & name, const String & value, LeaseID lease_id) @@ -195,12 +196,11 @@ std::tuple Client::campaign( req.set_value(value); req.set_lease(lease_id); - grpc::ClientContext context; // usually use `campaign` blocks until become leader or error happens, // don't set timeout. v3electionpb::CampaignResponse resp; - auto status = leaderClient()->election_stub->Campaign(&context, req, &resp); + auto status = leaderClient()->election_stub->Campaign(grpc_context, req, &resp); return {resp.leader(), status}; } diff --git a/dbms/src/TiDB/Etcd/Client.h b/dbms/src/TiDB/Etcd/Client.h index fa96901df49..ffa299fa064 100644 --- a/dbms/src/TiDB/Etcd/Client.h +++ b/dbms/src/TiDB/Etcd/Client.h @@ -74,6 +74,7 @@ class Client grpc::Status leaseRevoke(LeaseID lease_id); std::tuple campaign( + grpc::ClientContext * grpc_context, const String & name, const String & value, LeaseID lease_id); diff --git a/dbms/src/TiDB/OwnerManager.cpp b/dbms/src/TiDB/OwnerManager.cpp index f3b9a9945cf..8719dc23aa7 100644 --- a/dbms/src/TiDB/OwnerManager.cpp +++ b/dbms/src/TiDB/OwnerManager.cpp @@ -118,6 +118,11 @@ void EtcdOwnerManager::cancelImpl() } if (th_camaign.joinable()) { + { + std::unique_lock lock(mtx_camaign); + if (campaing_ctx) + campaing_ctx->TryCancel(); + } th_camaign.join(); } if (th_watch_owner.joinable()) @@ -248,7 +253,11 @@ void EtcdOwnerManager::camaignLoop(Etcd::SessionPtr session) const auto lease_id = session->leaseID(); LOG_DEBUG(log, "new campaign loop with lease_id={:x}", lease_id); // Let this thread blocks until becone owner or error occurs - auto && [new_leader, status] = client->campaign(campaign_name, id, lease_id); + { + std::unique_lock lock(mtx_camaign); + campaing_ctx = std::make_unique(); + } + auto && [new_leader, status] = client->campaign(campaing_ctx.get(), campaign_name, id, lease_id); if (!status.ok()) { // if error, continue next campaign diff --git a/dbms/src/TiDB/OwnerManager.h b/dbms/src/TiDB/OwnerManager.h index 5029b689adf..aa2d73c21a5 100644 --- a/dbms/src/TiDB/OwnerManager.h +++ b/dbms/src/TiDB/OwnerManager.h @@ -181,6 +181,7 @@ class EtcdOwnerManager : public OwnerManager std::mutex mtx_camaign; State state = State::Init; std::condition_variable cv_camaign; + std::unique_ptr campaing_ctx; // A thread for running camaign logic std::thread th_camaign; diff --git a/dbms/src/TiDB/tests/gtest_owner_manager.cpp b/dbms/src/TiDB/tests/gtest_owner_manager.cpp index 0c38f49aa9e..95e0e034dbc 100644 --- a/dbms/src/TiDB/tests/gtest_owner_manager.cpp +++ b/dbms/src/TiDB/tests/gtest_owner_manager.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -370,7 +371,6 @@ try } CATCH - TEST_F(OwnerManagerTest, CreateEtcdSessionFail) try { @@ -397,7 +397,7 @@ try auto owner0 = std::static_pointer_cast(OwnerManager::createS3GCOwner(*ctx, id, etcd_client, test_ttl)); auto owner_info = owner0->getOwnerID(); - EXPECT_EQ(owner_info.status, OwnerType::NotOwner) << magic_enum::enum_name(owner_info.status); + EXPECT_EQ(owner_info.status, OwnerType::NoLeader) << magic_enum::enum_name(owner_info.status); FailPointHelper::enableFailPoint(FailPoints::force_fail_to_create_etcd_session); @@ -408,4 +408,76 @@ try } CATCH +TEST_F(OwnerManagerTest, CancelNonOwner) +try +{ + auto etcd_endpoint = Poco::Environment::get("ETCD_ENDPOINT", ""); + if (etcd_endpoint.empty()) + { + const auto * t = ::testing::UnitTest::GetInstance()->current_test_info(); + LOG_INFO( + log, + "{}.{} is skipped because env ETCD_ENDPOINT not set. " + "Run it with an etcd cluster using `ETCD_ENDPOINT=127.0.0.1:2379 ./dbms/gtests_dbms ...`", + t->test_case_name(), + t->name()); + return; + } + + using namespace std::chrono_literals; + + auto ctx = TiFlashTestEnv::getContext(); + pingcap::ClusterConfig config; + pingcap::pd::ClientPtr pd_client = std::make_shared(Strings{etcd_endpoint}, config); + auto etcd_client = DB::Etcd::Client::create(pd_client, config); + + std::atomic owner0_elected = false; + std::shared_ptr owner0; + std::shared_ptr owner1; + auto th_owner = std::async([&]() { + const String id = "owner_0"; + owner0 = std::static_pointer_cast( + OwnerManager::createS3GCOwner(*ctx, id, etcd_client, test_ttl)); + auto owner_info = owner0->getOwnerID(); + EXPECT_EQ(owner_info.status, OwnerType::NoLeader) << magic_enum::enum_name(owner_info.status); + + owner0->setBeOwnerHook([&] { owner0_elected = true; }); + owner0->campaignOwner(); + + while (!owner0_elected) + ; + + owner_info = owner0->getOwnerID(); + EXPECT_EQ(owner_info.status, OwnerType::IsOwner) << magic_enum::enum_name(owner_info.status); + }); + + auto th_non_owner = std::async([&] { + const String id = "owner_1"; + + LOG_INFO(log, "waiting for owner0 elected"); + while (!owner0_elected) + ; + + owner1 = std::static_pointer_cast( + OwnerManager::createS3GCOwner(*ctx, id, etcd_client, test_ttl)); + owner1->campaignOwner(); // this will block + }); + + auto th_cancel_non_owner = std::async([&] { + while (!owner0_elected) + ; + + LOG_INFO(log, "waiting for owner1 start campaign"); + std::this_thread::sleep_for(3s); + LOG_INFO(log, "cancel owner1 start"); + owner1->cancel(); // cancel should finished th_non_owner + LOG_INFO(log, "cancel owner1 done"); + }); + + th_cancel_non_owner.wait(); + th_non_owner.wait(); + th_owner.wait(); +} +CATCH + } // namespace DB::tests diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 45a2e2a9407..9cf08047c1a 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1694098045746, + "iteration": 1694151861724, "links": [], "panels": [ { @@ -1088,7 +1088,7 @@ "h": 7, "w": 12, "x": 0, - "y": 34 + "y": 2 }, "hiddenSeries": false, "id": 141, @@ -1200,7 +1200,7 @@ "h": 7, "w": 12, "x": 12, - "y": 34 + "y": 2 }, "hiddenSeries": false, "id": 154, @@ -1330,7 +1330,7 @@ "h": 7, "w": 12, "x": 0, - "y": 41 + "y": 9 }, "hiddenSeries": false, "id": 145, @@ -1460,7 +1460,7 @@ "h": 7, "w": 12, "x": 12, - "y": 41 + "y": 9 }, "hiddenSeries": false, "id": 147, @@ -1590,7 +1590,7 @@ "h": 7, "w": 12, "x": 0, - "y": 48 + "y": 16 }, "hiddenSeries": false, "id": 155, @@ -1720,7 +1720,7 @@ "h": 7, "w": 12, "x": 12, - "y": 48 + "y": 16 }, "hiddenSeries": false, "id": 153, @@ -1850,7 +1850,7 @@ "h": 7, "w": 12, "x": 0, - "y": 55 + "y": 23 }, "hiddenSeries": false, "id": 151, @@ -1980,7 +1980,7 @@ "h": 7, "w": 12, "x": 12, - "y": 55 + "y": 23 }, "hiddenSeries": false, "id": 156, @@ -2110,7 +2110,7 @@ "h": 7, "w": 12, "x": 0, - "y": 62 + "y": 30 }, "hiddenSeries": false, "id": 149, @@ -2240,7 +2240,7 @@ "h": 7, "w": 12, "x": 12, - "y": 62 + "y": 30 }, "hiddenSeries": false, "id": 159, @@ -2370,7 +2370,7 @@ "h": 7, "w": 12, "x": 0, - "y": 69 + "y": 37 }, "hiddenSeries": false, "id": 161, @@ -4167,7 +4167,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 4 }, "hiddenSeries": false, "id": 107, @@ -4269,7 +4269,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 4 }, "hiddenSeries": false, "id": 109, @@ -4389,7 +4389,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 12 }, "hiddenSeries": false, "id": 111, @@ -4500,7 +4500,7 @@ "h": 8, "w": 12, "x": 12, - "y": 44 + "y": 12 }, "hiddenSeries": false, "id": 113, @@ -4611,7 +4611,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 20 }, "hiddenSeries": false, "id": 117, @@ -4712,7 +4712,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 20 }, "hiddenSeries": false, "id": 115, @@ -4846,7 +4846,7 @@ "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 5 }, "hiddenSeries": false, "id": 19, @@ -4968,7 +4968,7 @@ "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 5 }, "hiddenSeries": false, "id": 18, @@ -5066,7 +5066,7 @@ "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 12 }, "hiddenSeries": false, "id": 20, @@ -5216,7 +5216,7 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 6 }, "hiddenSeries": false, "id": 41, @@ -5329,7 +5329,7 @@ "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 6 }, "hiddenSeries": false, "id": 38, @@ -5487,7 +5487,7 @@ "h": 8, "w": 24, "x": 0, - "y": 46 + "y": 14 }, "hiddenSeries": false, "id": 40, @@ -5587,7 +5587,7 @@ "h": 5, "w": 12, "x": 0, - "y": 54 + "y": 22 }, "hiddenSeries": false, "id": 39, @@ -5690,7 +5690,7 @@ "h": 5, "w": 12, "x": 12, - "y": 54 + "y": 22 }, "hiddenSeries": false, "id": 42, @@ -5794,7 +5794,7 @@ "h": 5, "w": 12, "x": 0, - "y": 59 + "y": 27 }, "hiddenSeries": false, "id": 130, @@ -5897,7 +5897,7 @@ "h": 5, "w": 12, "x": 12, - "y": 59 + "y": 27 }, "hiddenSeries": false, "id": 131, @@ -6001,7 +6001,7 @@ "h": 7, "w": 8, "x": 0, - "y": 64 + "y": 32 }, "hiddenSeries": false, "id": 50, @@ -6135,7 +6135,7 @@ "h": 7, "w": 8, "x": 8, - "y": 64 + "y": 32 }, "hiddenSeries": false, "id": 22, @@ -6249,7 +6249,7 @@ "h": 7, "w": 8, "x": 16, - "y": 64 + "y": 32 }, "hiddenSeries": false, "id": 52, @@ -6366,7 +6366,7 @@ "h": 7, "w": 12, "x": 0, - "y": 71 + "y": 39 }, "hiddenSeries": false, "id": 46, @@ -6489,7 +6489,7 @@ "h": 7, "w": 12, "x": 12, - "y": 71 + "y": 39 }, "hiddenSeries": false, "id": 47, @@ -6613,7 +6613,7 @@ "h": 8, "w": 12, "x": 0, - "y": 78 + "y": 46 }, "height": "", "hiddenSeries": false, @@ -6743,7 +6743,7 @@ "h": 8, "w": 12, "x": 12, - "y": 78 + "y": 46 }, "height": "", "hiddenSeries": false, @@ -6857,29 +6857,30 @@ { "aliasColors": {}, "bars": false, + "cacheTimeout": null, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage I/O limiter metrics.", + "description": "The current processing number of segments' background management", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 86 + "y": 54 }, "hiddenSeries": false, - "id": 84, + "id": 67, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, @@ -6895,7 +6896,7 @@ }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -6904,19 +6905,33 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_storage_io_limiter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "avg(tiflash_system_current_metric_DT_DeltaMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", + "hide": false, + "intervalFactor": 1, + "legendFormat": "delta_merge-{{instance}}", "refId": "A" + }, + { + "expr": "avg(tiflash_system_current_metric_DT_SegmentSplit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "seg_split-{{instance}}", + "refId": "B" + }, + { + "expr": "avg(tiflash_system_current_metric_DT_SegmentMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "seg_merge-{{instance}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "I/O Limiter", + "title": "Current Data Management Tasks", "tooltip": { "shared": true, "sort": 0, @@ -6933,7 +6948,7 @@ "yaxes": [ { "decimals": 0, - "format": "binBps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -6941,7 +6956,7 @@ "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -6960,116 +6975,65 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "I/O Limiter pending tasks.", + "description": "Errors of DeltaIndex", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 86 + "y": 54 }, "hiddenSeries": false, - "id": 86, + "id": 237, "legend": { "alignAsTable": true, "avg": false, "current": false, - "max": false, + "max": true, "min": false, "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/%/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tiflash_system_current_metric_RateLimiterPendingWriteRequest{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_DTDeltaIndexError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "other-{{instance}}", + "legendFormat": "DeltaIndexError-{{instance}}", "refId": "A" - }, - { - "exemplar": true, - "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "bgwrite-{{instance}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "fgwrite-{{instance}}", - "refId": "C" - }, - { - "exemplar": true, - "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "bgread-{{instance}}", - "refId": "D" - }, - { - "exemplar": true, - "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "fgread-{{instance}}", - "refId": "E" - }, - { - "exemplar": true, - "expr": "histogram_quantile(1.00, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "hide": false, - "interval": "", - "legendFormat": "100%-{{type}}", - "refId": "F" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "hide": false, - "interval": "", - "legendFormat": "99%-{{type}}", - "refId": "G" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "I/O Limiter Pending Tasks", + "title": "DeltaIndexError", "tooltip": { "shared": true, "sort": 0, @@ -7085,8 +7049,8 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "cps", "label": null, "logBase": 1, "max": null, @@ -7094,12 +7058,12 @@ "show": true }, { - "format": "s", + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": null, - "show": true + "min": "0", + "show": false } ], "yaxis": { @@ -7113,31 +7077,31 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The information of read thread scheduling and data sharing cache hit ratio. Data sharing cache is purpose-built for OLAP workload that can reduce repeated data reads of concurrent table scanning.", + "description": "The storage I/O limiter metrics.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 94 + "y": 62 }, "hiddenSeries": false, - "id": 132, + "id": 84, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -7148,60 +7112,28 @@ }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cache_hit_ratio", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy|sche_no_segment\"}[1m])) by (type)", + "expr": "sum(rate(tiflash_storage_io_limiter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", - "hide": false, - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))", - "hide": false, - "interval": "", - "legendFormat": "get_cache_hit", - "refId": "C" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", - "hide": false, - "interval": "", - "legendFormat": "get_cache_total", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))/sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cache_hit_ratio", - "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Read Thread and Data Sharing", + "title": "I/O Limiter", "tooltip": { "shared": true, "sort": 0, @@ -7217,8 +7149,8 @@ }, "yaxes": [ { - "decimals": null, - "format": "ops", + "decimals": 0, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -7226,11 +7158,11 @@ "show": true }, { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -7242,81 +7174,119 @@ { "aliasColors": {}, "bars": false, - "cacheTimeout": null, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The current processing number of segments' background management", + "description": "I/O Limiter pending tasks.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 94 + "y": 62 }, "hiddenSeries": false, - "id": 67, + "id": 86, "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/%/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tiflash_system_current_metric_DT_DeltaMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", + "expr": "avg(tiflash_system_current_metric_RateLimiterPendingWriteRequest{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "other-{{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "bgwrite-{{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "fgwrite-{{instance}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, - "intervalFactor": 1, - "legendFormat": "delta_merge-{{instance}}", - "refId": "A" + "interval": "", + "legendFormat": "bgread-{{instance}}", + "refId": "D" }, { - "expr": "avg(tiflash_system_current_metric_DT_SegmentSplit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "seg_split-{{instance}}", - "refId": "B" + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "fgread-{{instance}}", + "refId": "E" }, { - "expr": "avg(tiflash_system_current_metric_DT_SegmentMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "seg_merge-{{instance}}", - "refId": "C" + "exemplar": true, + "expr": "histogram_quantile(1.00, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "100%-{{type}}", + "refId": "F" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "99%-{{type}}", + "refId": "G" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Current Data Management Tasks", + "title": "I/O Limiter Pending Tasks", "tooltip": { "shared": true, "sort": 0, @@ -7341,7 +7311,7 @@ "show": true }, { - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -7360,118 +7330,100 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "cache misses or cache hits of mark_cache.\nBased on this infactor, we can check whether mark_cache is large enough", + "description": "The information of read thread scheduling and data sharing cache hit ratio. Data sharing cache is purpose-built for OLAP workload that can reduce repeated data reads of concurrent table scanning.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 102 + "y": 70 }, "hiddenSeries": false, - "id": 169, + "id": 132, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "cache_hit_ratio", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "max(tiflash_system_profile_event_MarkCacheMisses{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy|sche_no_segment\"}[1m])) by (type)", + "format": "time_series", + "hide": false, "interval": "", - "legendFormat": "mark cache misses", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{type}}", "refId": "A" }, { "exemplar": true, - "expr": "max(tiflash_system_profile_event_MarkCacheHits{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))", "hide": false, "interval": "", - "legendFormat": "mark cache hits", + "legendFormat": "get_cache_hit", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "get_cache_total", "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))/sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cache_hit_ratio", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Effectiveness of Mark Cache", + "title": "Read Thread and Data Sharing", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "transformations": [ - { - "id": "calculateField", - "options": { - "alias": "mark cache count total", - "binary": { - "left": "mark cache misses", - "operator": "+", - "reducer": "sum", - "right": "mark cache hits" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - } - } - }, - { - "id": "calculateField", - "options": { - "alias": "mark cache effectiveness", - "binary": { - "left": "mark cache hits", - "operator": "/", - "reducer": "sum", - "right": "mark cache count total" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - } - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Time", - "mark cache effectiveness" - ] - } - } - } - ], "type": "graph", "xaxis": { "buckets": null, @@ -7482,20 +7434,21 @@ }, "yaxes": [ { - "format": "percentunit", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": "0", + "show": true } ], "yaxis": { @@ -7517,10 +7470,10 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 12, - "y": 102 + "y": 70 }, "hiddenSeries": false, "id": 88, @@ -7708,42 +7661,39 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Errors of DeltaIndex", + "description": "cache misses or cache hits of mark_cache.\nBased on this infactor, we can check whether mark_cache is large enough", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 77 + "y": 78 }, "hiddenSeries": false, - "id": 237, + "id": 169, "legend": { - "alignAsTable": true, "avg": false, "current": false, - "max": true, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -7753,25 +7703,76 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_system_profile_event_DTDeltaIndexError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": false, + "expr": "max(tiflash_system_profile_event_MarkCacheMisses{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "interval": "", - "intervalFactor": 1, - "legendFormat": "DeltaIndexError-{{instance}}", + "legendFormat": "mark cache misses", + "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "max(tiflash_system_profile_event_MarkCacheHits{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "mark cache hits", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DeltaIndexError", + "title": "Effectiveness of Mark Cache", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "mark cache count total", + "binary": { + "left": "mark cache misses", + "operator": "+", + "reducer": "sum", + "right": "mark cache hits" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "mark cache effectiveness", + "binary": { + "left": "mark cache hits", + "operator": "/", + "reducer": "sum", + "right": "mark cache count total" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Time", + "mark cache effectiveness" + ] + } + } + } + ], "type": "graph", "xaxis": { "buckets": null, @@ -7782,20 +7783,19 @@ }, "yaxes": [ { - "decimals": null, - "format": "cps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "opm", + "format": "percent", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": false } ], @@ -7821,7 +7821,7 @@ "h": 8, "w": 12, "x": 12, - "y": 109 + "y": 78 }, "hiddenSeries": false, "id": 168, @@ -7941,7 +7941,7 @@ "h": 8, "w": 12, "x": 0, - "y": 117 + "y": 86 }, "hiddenSeries": false, "id": 238, @@ -11473,7 +11473,7 @@ "y": 72 }, "hiddenSeries": false, - "id": 238, + "id": 239, "legend": { "alignAsTable": true, "avg": false, @@ -12542,7 +12542,7 @@ "h": 8, "w": 24, "x": 0, - "y": 176 + "y": 11 }, "hiddenSeries": false, "id": 173, @@ -12643,7 +12643,7 @@ "h": 8, "w": 12, "x": 0, - "y": 184 + "y": 19 }, "hiddenSeries": false, "id": 187, @@ -12771,7 +12771,7 @@ "h": 8, "w": 12, "x": 12, - "y": 184 + "y": 19 }, "height": "", "hiddenSeries": false, @@ -12890,7 +12890,7 @@ "h": 8, "w": 12, "x": 0, - "y": 192 + "y": 27 }, "height": "", "hiddenSeries": false, @@ -13000,7 +13000,7 @@ "h": 8, "w": 12, "x": 12, - "y": 192 + "y": 27 }, "height": "", "hiddenSeries": false, @@ -13113,7 +13113,7 @@ "h": 8, "w": 12, "x": 0, - "y": 200 + "y": 35 }, "hiddenSeries": false, "id": 176, @@ -13221,7 +13221,7 @@ "h": 8, "w": 12, "x": 12, - "y": 200 + "y": 35 }, "hiddenSeries": false, "id": 175, @@ -13348,7 +13348,7 @@ "h": 8, "w": 12, "x": 0, - "y": 208 + "y": 43 }, "hiddenSeries": false, "id": 189, @@ -13450,7 +13450,7 @@ "h": 8, "w": 12, "x": 12, - "y": 208 + "y": 43 }, "hiddenSeries": false, "id": 191, @@ -13550,7 +13550,7 @@ "h": 8, "w": 12, "x": 0, - "y": 216 + "y": 51 }, "hiddenSeries": false, "id": 193, @@ -13676,7 +13676,7 @@ "h": 8, "w": 12, "x": 12, - "y": 216 + "y": 51 }, "hiddenSeries": false, "id": 195, @@ -13787,7 +13787,7 @@ "h": 8, "w": 12, "x": 0, - "y": 224 + "y": 59 }, "hiddenSeries": false, "id": 201, @@ -13923,7 +13923,7 @@ "h": 8, "w": 12, "x": 12, - "y": 224 + "y": 59 }, "hiddenSeries": false, "id": 233, @@ -14039,7 +14039,7 @@ "h": 8, "w": 12, "x": 0, - "y": 232 + "y": 67 }, "hiddenSeries": false, "id": 236, @@ -14169,7 +14169,7 @@ "h": 8, "w": 12, "x": 0, - "y": 177 + "y": 12 }, "hiddenSeries": false, "id": 178, @@ -14287,7 +14287,7 @@ "h": 8, "w": 12, "x": 12, - "y": 177 + "y": 12 }, "hiddenSeries": false, "id": 179, @@ -14461,7 +14461,7 @@ "h": 8, "w": 12, "x": 0, - "y": 185 + "y": 20 }, "hiddenSeries": false, "id": 182, @@ -14579,7 +14579,7 @@ "h": 8, "w": 12, "x": 12, - "y": 185 + "y": 20 }, "hiddenSeries": false, "id": 180, @@ -14706,7 +14706,7 @@ "h": 8, "w": 12, "x": 0, - "y": 193 + "y": 28 }, "hiddenSeries": false, "id": 185, @@ -14833,7 +14833,7 @@ "h": 8, "w": 12, "x": 12, - "y": 193 + "y": 28 }, "hiddenSeries": false, "id": 186, @@ -14935,7 +14935,7 @@ "h": 8, "w": 12, "x": 0, - "y": 201 + "y": 36 }, "hiddenSeries": false, "id": 188, @@ -16210,4 +16210,4 @@ "title": "Test-Cluster-TiFlash-Summary", "uid": "SVbh2xUWk", "version": 1 -} \ No newline at end of file +}