From 29063a7d88b846b664ce5e38d18e259a1211ad7d Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Thu, 2 May 2024 19:33:22 -0700 Subject: [PATCH] test/unreservable: add test for blocked jobs overmatching problem: we didn't have a test to reproduce the issue with blocked jobs being constantly reconsidered solution: with the new failed stats support, after the fix there should be no more than 10 failures to match, 14 is somewhat deterministic for this test if the issue comes back. --- t/t4013-unreservable.sh | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/t/t4013-unreservable.sh b/t/t4013-unreservable.sh index c83e0c6a5..4a082580b 100755 --- a/t/t4013-unreservable.sh +++ b/t/t4013-unreservable.sh @@ -58,12 +58,42 @@ test_expect_success 'create 3 jobs with the same constraint, so two are unreserv --progress --jps \ --flags=waitable \ --setattr=exec.test.run_duration=0.01s \ - sleep 0.25 + sleep 0.5 ' test_expect_success 'ensure all three succeeded' ' flux job wait -av ' +test_expect_success 'drain a few nodes' ' + flux resource drain 1-5 test with drained nodes +' +test_expect_success 'create a set of 2 inactive jobs' ' + flux submit --cc=1-2 --quiet \ + -N 1 --exclusive \ + --flags=waitable \ + --requires="host:fake[4]" \ + --progress --jps \ + --setattr=exec.test.run_duration=0.01s \ + hostname +' +test_expect_success 'create a set of 2 running jobs' ' + flux submit --progress --jps --quiet --cc=1-2 --wait-event=start -N1 \ + --flags=waitable \ + --requires=compute \ + --setattr=exec.test.run_duration=0.01s \ + hostname +' +test_expect_success 'undrain nodes' ' + flux resource undrain 1-5 +' +test_expect_success 'ensure all four succeeded' ' + flux job wait -av +' +test_expect_success 'failed match requests should be 10, but 14 is bad' ' + NJOBS_FAILED="$(rpc sched-fluxion-resource.stats-get | jq ".match.failed.njobs")" && + echo njobs failed $NJOBS_FAILED && + test $NJOBS_FAILED = 10 +' test_expect_success 'unload fluxion' ' flux module remove sched-fluxion-qmanager && flux module remove sched-fluxion-resource &&