Skip to content

Commit

Permalink
test: add unreservable job test
Browse files Browse the repository at this point in the history
problem: ensure we don't end up with unreservable jobs treated as
unsatisfiable again

solution: add a test that runs three jobs that are only allowed to run
on the same resource, and each take all remaining time.  This forces
jobs two and three to be unreservable until one completes.  It's
technically sensitive to timing, but in this constrained case I would be
surprised if it proves to be flaky.  Still should set something up we
can use in sharness for setting up jobs that wait on a trigger on a
domain socket so we can easily make this kind of test fully
deterministic.
  • Loading branch information
trws committed May 6, 2024
1 parent ca65c6e commit a9808c3
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
1 change: 1 addition & 0 deletions t/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ set(ALL_TESTS
t4010-match-conf.t
t4011-match-duration.t
t4012-set-status.t
t4013-unreservable.sh
t5000-valgrind.t
t5100-issues-test-driver.t
t6000-graph-size.t
Expand Down
72 changes: 72 additions & 0 deletions t/t4013-unreservable.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash
#

test_description='
'

. `dirname $0`/sharness.sh

export TEST_UNDER_FLUX_QUORUM=1
export TEST_UNDER_FLUX_START_MODE=leader
rpc() {
flux python -c \
"import flux, json; print(flux.Flux().rpc(\"$1\").get_str())"
}

test_under_flux 16384 system

test_expect_success 'unload sched-simple' '
flux module remove -f sched-simple
'

test_expect_success 'update configuration' '
flux config load <<-'EOF'
[[resource.config]]
hosts = "fake[0-10]"
cores = "0-63"
gpus = "0-3"
[[resource.config]]
hosts = "fake[0-10]"
properties = ["compute"]
[sched-fluxion-qmanager]
queue-policy = "easy"
[sched-fluxion-resource]
match-policy = "firstnodex"
prune-filters = "ALL:core,ALL:gpu,cluster:node,rack:node"
match-format = "rv1_nosched"
EOF
'

test_expect_success 'reload resource with monitor-force-up' '
flux module reload -f resource noverify monitor-force-up
'
test_expect_success 'load fluxion modules' '
flux module load sched-fluxion-resource &&
flux module load sched-fluxion-qmanager
'
test_expect_success 'wait for fluxion to be ready' '
flux python -c \
"import flux, json; print(flux.Flux().rpc(\"sched.resource-status\").get_str())"
'
test_expect_success 'create 3 jobs with the same constraint, so two are unreservable' '
flux submit --cc=1-3 --quiet \
-N 1 --exclusive \
--requires="host:fake[5]" \
--progress --jps \
--flags=waitable \
--setattr=exec.test.run_duration=0.01s \
sleep 0.25
'

test_expect_success 'ensure all three succeeded' '
flux job wait -av
'
test_expect_success 'unload fluxion' '
flux module remove sched-fluxion-qmanager &&
flux module remove sched-fluxion-resource &&
flux module load sched-simple
'
test_done

0 comments on commit a9808c3

Please sign in to comment.