From e95a70a96fb0dc17f7470686e923087a75e24c38 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Fri, 13 Mar 2020 11:10:21 -0700 Subject: [PATCH] sharness: add a per-test global timeout option After recent frustration with a variety of hangs, this provides a new environment variable `FLUX_TEST_TIMEOUT` that provides each individual top-level sharness test (test_expect_success or similar) with an individual timeout of that value in seconds. After that many seconds the sharness script receives a signal, prints an error, kills the current command, and proceeds to subsequent tests. The mechanism for this is a little bit awkward because of the way that sharness uses eval to execute tests, but has proven reliable in my tests so far. --- t/sharness.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/t/sharness.sh b/t/sharness.sh index c9f2bcac40fe..157adcb474a0 100644 --- a/t/sharness.sh +++ b/t/sharness.sh @@ -325,7 +325,27 @@ test_pause() { fi } +die_on_alarm() { + kill -9 $! > /dev/null 2>&1 # kill currently executing command + echo "Top-level test timed out" +} + test_eval_() { + ( # start a subshell in the background to provide a timeout + set -e + parent_pid=$$ + i=0 + while kill -0 $parent_pid ; do + sleep 1 + if test "$i" -gt ${FLUX_TEST_TIMEOUT:-120} ; then + break + fi + i=$(($i+1)) + done + kill -ALRM $$ # send ALRM to parent + ) & + ALRM=$! + trap die_on_alarm ALRM # This is a separate function because some tests use # "return" to end a test_expect_success block early. case ",$test_prereq," in @@ -336,6 +356,10 @@ test_eval_() { eval &3 2>&4 "$*" ;; esac + ret=$? + trap - ALRM + kill $ALRM >/dev/null 2>&1 + return $ret } test_run_() {