Skip to content

Commit

Permalink
raftstore: check stale peer on leader missing (tikv#16038)
Browse files Browse the repository at this point in the history
Stale peers can impede TiKV store resolved ts and impact RTO for essential
functions. Default 2-hour interval for stale peer check is insufficient
for stale reads, flashbacks, and ebs backup.

To mitigate this, we speed up stale read check by allowing TiKV to check for
stale peers every 10 minutes in the event that a leader is missing.

close tikv#11847, close tikv#15520, close pingcap/tidb#39130

Signed-off-by: Neil Shen <[email protected]>

Co-authored-by: tonyxuqqi <[email protected]>
  • Loading branch information
overvenus and tonyxuqqi committed Nov 27, 2023
1 parent 13f40e9 commit 6439dd2
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 18 deletions.
33 changes: 20 additions & 13 deletions components/raftstore/src/store/fsm/peer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6213,19 +6213,26 @@ where
fail_point!("peer_check_stale_state", state != StaleState::Valid, |_| {});
match state {
StaleState::Valid => (),
StaleState::LeaderMissing => {
warn!(
"leader missing longer than abnormal_leader_missing_duration";
"region_id" => self.fsm.region_id(),
"peer_id" => self.fsm.peer_id(),
"expect" => %self.ctx.cfg.abnormal_leader_missing_duration,
);
self.ctx
.raft_metrics
.leader_missing
.lock()
.unwrap()
.insert(self.region_id());
StaleState::LeaderMissing | StaleState::MaybeLeaderMissing => {
if state == StaleState::LeaderMissing {
warn!(
"leader missing longer than abnormal_leader_missing_duration";
"region_id" => self.fsm.region_id(),
"peer_id" => self.fsm.peer_id(),
"expect" => %self.ctx.cfg.abnormal_leader_missing_duration,
);
self.ctx
.raft_metrics
.leader_missing
.lock()
.unwrap()
.insert(self.region_id());
}

// It's very likely that this is a stale peer. To prevent
// resolved ts from being blocked for too long, we check stale
// peer eagerly.
self.fsm.peer.bcast_check_stale_peer_message(self.ctx);
}
StaleState::ToValidate => {
// for peer B in case 1 above
Expand Down
5 changes: 4 additions & 1 deletion components/raftstore/src/store/local_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use std::sync::{Arc, Mutex};

use collections::HashSet;
use prometheus::local::LocalHistogram;
use prometheus::local::{LocalHistogram, LocalIntCounter};
use raft::eraftpb::MessageType;
use tikv_util::time::{Duration, Instant};
use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS};
Expand Down Expand Up @@ -97,6 +97,7 @@ pub struct RaftMetrics {
pub wf_commit_log: LocalHistogram,
pub wf_commit_not_persist_log: LocalHistogram,

pub check_stale_peer: LocalIntCounter,
pub leader_missing: Arc<Mutex<HashSet<u64>>>,

last_flush_time: Instant,
Expand Down Expand Up @@ -132,6 +133,7 @@ impl RaftMetrics {
wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(),
wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(),
wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(),
check_stale_peer: CHECK_STALE_PEER_COUNTER.local(),
leader_missing: Arc::default(),
last_flush_time: Instant::now_coarse(),
}
Expand Down Expand Up @@ -170,6 +172,7 @@ impl RaftMetrics {
self.wf_commit_not_persist_log.flush();
}

self.check_stale_peer.flush();
let mut missing = self.leader_missing.lock().unwrap();
LEADER_MISSING.set(missing.len() as i64);
missing.clear();
Expand Down
5 changes: 5 additions & 0 deletions components/raftstore/src/store/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,11 @@ lazy_static! {
"Total number of leader missed region."
).unwrap();

pub static ref CHECK_STALE_PEER_COUNTER: IntCounter = register_int_counter!(
"tikv_raftstore_check_stale_peer",
"Total number of checking stale peers."
).unwrap();

pub static ref INGEST_SST_DURATION_SECONDS: Histogram =
register_histogram!(
"tikv_snapshot_ingest_sst_duration_seconds",
Expand Down
14 changes: 10 additions & 4 deletions components/raftstore/src/store/peer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pub enum StaleState {
Valid,
ToValidate,
LeaderMissing,
MaybeLeaderMissing,
}

#[derive(Debug)]
Expand Down Expand Up @@ -2267,7 +2268,6 @@ where
self.leader_missing_time = None;
return StaleState::Valid;
}
let naive_peer = !self.is_initialized() || !self.raft_group.raft.promotable();
// Updates the `leader_missing_time` according to the current state.
//
// If we are checking this it means we suspect the leader might be missing.
Expand All @@ -2287,13 +2287,18 @@ where
StaleState::ToValidate
}
Some(instant)
if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0
&& !naive_peer =>
if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0 =>
{
// A peer is considered as in the leader missing state
// if it's initialized but is isolated from its leader or
// something bad happens that the raft group can not elect a leader.
StaleState::LeaderMissing
if self.is_initialized() && self.raft_group.raft.promotable() {
StaleState::LeaderMissing
} else {
// Uninitialized peer and learner may not have leader info,
// even if there is a valid leader.
StaleState::MaybeLeaderMissing
}
}
_ => StaleState::Valid,
}
Expand Down Expand Up @@ -5510,6 +5515,7 @@ where
&mut self,
ctx: &mut PollContext<EK, ER, T>,
) {
ctx.raft_metrics.check_stale_peer.inc();
if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver()
|| self.region().get_region_epoch().get_conf_ver() == 0
{
Expand Down
9 changes: 9 additions & 0 deletions metrics/grafana/tikv_details.json
Original file line number Diff line number Diff line change
Expand Up @@ -38331,6 +38331,15 @@
"legendFormat": "{{instance}}-{{reason}}",
"refId": "A",
"step": 10
},
{
"expr": "sum(delta(tikv_raftstore_check_stale_peer{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"legendFormat": "{{instance}}-stale-peer",
"refId": "B",
"step": 10
}
],
"thresholds": [],
Expand Down
58 changes: 58 additions & 0 deletions tests/integrations/raftstore/test_merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,64 @@ fn test_node_gc_uninitialized_peer_after_merge() {
cluster.must_region_not_exist(left.get_id(), 4);
}

/// Test leader missing should issue check stale peer requests.
#[test]
fn test_node_gc_uninitialized_peer_after_merge_on_leader_missing() {
let mut cluster = new_node_cluster(0, 4);
configure_for_merge(&mut cluster);
ignore_merge_target_integrity(&mut cluster);
cluster.cfg.raft_store.raft_election_timeout_ticks = 5;
cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40);
cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100);
cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100);
// Set a large max_leader_missing_duration so that check stale peer will
// only be triggered by leader missing.
cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(1);

let pd_client = Arc::clone(&cluster.pd_client);
pd_client.disable_default_operator();

cluster.run_conf_change();

cluster.must_put(b"k1", b"v1");
cluster.must_put(b"k3", b"v3");

// test if an uninitialized stale peer before conf removal is destroyed
// automatically
let region = pd_client.get_region(b"k1").unwrap();
pd_client.must_add_peer(region.get_id(), new_peer(2, 2));
pd_client.must_add_peer(region.get_id(), new_peer(3, 3));

cluster.must_split(&region, b"k2");
let left = pd_client.get_region(b"k1").unwrap();
let right = pd_client.get_region(b"k2").unwrap();

// Block snapshot messages, so that new peers will never be initialized.
cluster.add_send_filter(CloneFilterFactory(
RegionPacketFilter::new(left.get_id(), 4)
.msg_type(MessageType::MsgSnapshot)
.direction(Direction::Recv),
));
// Add peer (4,4), remove peer (4,4) and then merge regions.
// Peer (4,4) will be an an uninitialized stale peer.
pd_client.must_add_peer(left.get_id(), new_peer(4, 4));
cluster.must_region_exist(left.get_id(), 4);
cluster.add_send_filter(IsolationFilterFactory::new(4));
pd_client.must_remove_peer(left.get_id(), new_peer(4, 4));
pd_client.must_merge(left.get_id(), right.get_id());
cluster.clear_send_filters();

// Wait for the peer (4,4) to be destroyed.
sleep_ms(
3 * cluster
.cfg
.raft_store
.abnormal_leader_missing_duration
.as_millis(),
);
cluster.must_region_not_exist(left.get_id(), 4);
}

#[test]
fn test_node_merge_slow_split_right() {
test_node_merge_slow_split(true);
Expand Down

0 comments on commit 6439dd2

Please sign in to comment.