forked from flux-framework/flux-sched
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
testsuite: add test for module reload with running jobs
Problem: There is no reproducer for issue flux-framework#1035: fluxion can't restart with queues enabled. Add a new test driver for issue reproducers: t5100-issues-test-driver.t Then add a reproducer script for flux-framework#1035 to the t/issues subdirectory.
- Loading branch information
Showing
3 changed files
with
104 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/bin/bash | ||
# | ||
# Ensure fluxion modules can recover running jobs with rv1 match format. | ||
# | ||
log() { printf "issue#1035: $@\n" >&2; } | ||
die() { log "$@"; exit 1; } | ||
run_timeout() { | ||
"${PYTHON:-python3}" "${SHARNESS_TEST_SRCDIR}/scripts/run_timeout.py" "$@" | ||
} | ||
|
||
if test -z "$ISSUE_1035_TEST_ACTIVE"; then | ||
export ISSUE_1035_TEST_ACTIVE=t | ||
log "relaunching under test instance of size 4..." | ||
exec flux start -s 4 $0 "$@" | ||
fi | ||
test $(flux resource list -no {nnodes}) -eq 4 || die "test requires 4 nodes" | ||
|
||
log "Unloading modules..." | ||
flux module remove sched-fluxion-qmanager | ||
flux module remove sched-fluxion-resource | ||
flux module remove resource | ||
|
||
log "Amending instance resource set with properties: batch, debug..." | ||
flux kvs get resource.R \ | ||
| flux R set-property batch:0-1 debug:2-3 \ | ||
| flux kvs put -r resource.R=- | ||
#flux kvs get resource.R | jq | ||
|
||
log "Loading config with queues and match-format=\"rv1\"..." | ||
flux config load <<EOF | ||
[queues.debug] | ||
requires = ["debug"] | ||
[queues.batch] | ||
requires = ["batch"] | ||
[sched-fluxion-resource] | ||
match-format = "rv1" | ||
EOF | ||
flux config get | \ | ||
jq -e ".\"sched-fluxion-resource\".\"match-format\" == \"rv1\"" \ | ||
|| die "failed to set sched-fluxion-resource.match-format = rv1" | ||
|
||
log "Reloading modules..." | ||
flux module load resource noverify | ||
flux module load sched-fluxion-resource | ||
flux module load sched-fluxion-qmanager | ||
flux dmesg -HL | grep version | tail -2 | ||
|
||
log "Starting all queues..." | ||
flux queue start --all --quiet | ||
flux queue status | ||
flux resource list -s free | ||
|
||
log "Submitting two sleep jobs..." | ||
run_timeout 10 flux submit -N2 --wait-event=start --queue=debug sleep inf | ||
run_timeout 10 flux submit -N2 --wait-event=start --queue=batch sleep inf | ||
|
||
log "Reloading fluxion..." | ||
flux module unload sched-fluxion-qmanager | ||
flux module reload sched-fluxion-resource | ||
flux module load sched-fluxion-qmanager | ||
|
||
log "Checking that running jobs were recovered..." | ||
flux jobs -ano "{id.f58:>12} {status_abbrev:>2} {name}" | ||
test $(flux jobs -no {id} | wc -l) -eq 2 \ | ||
|| die "Expected 2 jobs still running" | ||
flux cancel --all | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/bin/sh | ||
# | ||
test_description='Verify that fixed issues remain fixed' | ||
|
||
. `dirname $0`/sharness.sh | ||
|
||
if test_have_prereq ASAN; then | ||
skip_all='skipping issues tests under AddressSanitizer' | ||
test_done | ||
fi | ||
skip_all_unless_have jq | ||
|
||
SIZE=2 | ||
test_under_flux ${SIZE} | ||
echo "# $0: flux session size will be ${SIZE}" | ||
|
||
if test -z "$T5100_ISSUES_GLOB"; then | ||
T5100_ISSUES_GLOB="*" | ||
fi | ||
|
||
flux bulksubmit -n1 -o pty --job-name={./%} -t 10m \ | ||
--flags=waitable \ | ||
--quiet --watch \ | ||
flux start {} \ | ||
::: ${SHARNESS_TEST_SRCDIR}/issues/${T5100_ISSUES_GLOB} | ||
|
||
for id in $(flux jobs -ano {id}); do | ||
test_expect_success $(flux jobs -no {name} $id) "flux job attach $id" | ||
done | ||
|
||
test_done |