diff --git a/test/src/node.rs b/test/src/node.rs index e7f906a289..ad25d7fe89 100644 --- a/test/src/node.rs +++ b/test/src/node.rs @@ -38,6 +38,16 @@ pub(crate) struct ProcessGuard { pub killed: bool, } +impl ProcessGuard { + pub(crate) fn is_alive(&mut self) -> bool { + let try_wait = self.child.try_wait(); + match try_wait { + Ok(status_op) => status_op.is_none(), + Err(_err) => false, + } + } +} + impl Drop for ProcessGuard { fn drop(&mut self) { if !self.killed { @@ -738,6 +748,15 @@ impl Node { g.take() } + pub(crate) fn is_alive(&mut self) -> bool { + let mut g = self.inner.guard.write().unwrap(); + if let Some(guard) = g.as_mut() { + guard.is_alive() + } else { + false + } + } + pub fn stop(&mut self) { drop(self.take_guard()); } diff --git a/test/src/rpc.rs b/test/src/rpc.rs index 2f91be7e83..bb4b9a81dc 100644 --- a/test/src/rpc.rs +++ b/test/src/rpc.rs @@ -174,13 +174,24 @@ impl RpcClient { } pub fn wait_rpc_ready(&self) { + self.wait_rpc_ready_internal(|| { + panic!("wait rpc ready timeout"); + }); + } + + pub fn wait_rpc_ready_internal(&self, fail: F) -> bool + where + F: Fn(), + { let now = std::time::Instant::now(); while self.inner.get_tip_block_number().is_err() { std::thread::sleep(std::time::Duration::from_millis(100)); if now.elapsed() > std::time::Duration::from_secs(60) { - panic!("wait rpc ready timeout"); + fail(); + return false; } } + true } pub fn get_block_template( diff --git a/test/src/specs/fault_injection/randomly_kill.rs b/test/src/specs/fault_injection/randomly_kill.rs index 92c6101805..e3f42a67d0 100644 --- a/test/src/specs/fault_injection/randomly_kill.rs +++ b/test/src/specs/fault_injection/randomly_kill.rs @@ -11,8 +11,23 @@ impl Spec for RandomlyKill { fn run(&self, nodes: &mut Vec) { let mut rng = thread_rng(); let node = &mut nodes[0]; - for _ in 0..rng.gen_range(10..20) { - node.rpc_client().wait_rpc_ready(); + let max_restart_times = rng.gen_range(10..20); + + let mut node_crash_times = 0; + + let mut randomly_kill_times = 0; + while randomly_kill_times < max_restart_times { + node.rpc_client().wait_rpc_ready_internal(|| {}); + + if !node.is_alive() { + node.start(); + node_crash_times += 1; + + if node_crash_times > 3 { + panic!("Node crash too many times"); + } + } + let n = rng.gen_range(0..10); // TODO: the kill of child process and mining are actually sequential here // We need to find some way to so these two things in parallel. @@ -25,6 +40,7 @@ impl Spec for RandomlyKill { } info!("Stop the node"); node.stop_gracefully(); + randomly_kill_times += 1; info!("Start the node"); node.start(); }