-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
testsuite: Add job instance restart tests
Add initial tests to see that jobs can survive instance restarts using the job-exec testexec execution plugin.
- Loading branch information
Showing
2 changed files
with
94 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/bin/sh | ||
|
||
test_description='Test instance restart and still running jobs with testexec' | ||
|
||
# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment: | ||
test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile | ||
. `dirname $0`/sharness.sh | ||
|
||
export FLUX_INSTANCE_RESTART=t | ||
|
||
test_under_flux 1 job | ||
|
||
test_expect_success 'run a testexec job in persistent instance (long run)' ' | ||
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ | ||
flux mini submit \ | ||
--flags=debug \ | ||
--setattr=system.exec.test.run_duration=100s \ | ||
hostname >id1.out | ||
' | ||
|
||
test_expect_success 'restart instance, reattach to running job, cancel it (long run)' ' | ||
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ | ||
sh -c "flux job eventlog $(cat id1.out) > eventlog_long1.out; \ | ||
flux jobs -n > jobs_long1.out; \ | ||
flux job cancel $(cat id1.out)" && | ||
grep "reattach-start" eventlog_long1.out && | ||
grep "reattach-finish" eventlog_long1.out && | ||
grep $(cat id1.out) jobs_long1.out | ||
' | ||
|
||
test_expect_success 'restart instance, job completed (long run)' ' | ||
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ | ||
sh -c "flux job eventlog $(cat id1.out) > eventlog_long2.out; \ | ||
flux jobs -n > jobs_long2.out" && | ||
grep "finish" eventlog_long2.out | grep status && | ||
test_must_fail grep $(cat id1.out) jobs_long2.out | ||
' | ||
|
||
# reattach_finish will indicate to testexcec that the job finished | ||
# right after reattach, emulating a job that finished before the | ||
# instance restarted | ||
test_expect_success 'run a testexec job in persistent instance (exit run)' ' | ||
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ | ||
flux mini submit \ | ||
--flags=debug \ | ||
--setattr=system.exec.test.reattach_finish=1 \ | ||
--setattr=system.exec.test.run_duration=100s \ | ||
hostname >id2.out | ||
' | ||
|
||
test_expect_success 'restart instance, reattach to running job, its finished (exit run)' ' | ||
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ | ||
sh -c "flux job eventlog $(cat id2.out) > eventlog_exit1.out" && | ||
grep "reattach-start" eventlog_exit1.out && | ||
grep "reattach-finish" eventlog_exit1.out && | ||
grep "finish" eventlog_exit1.out | grep status | ||
' | ||
|
||
test_expect_success 'start job under flux instance' ' | ||
flux mini submit \ | ||
--flags=debug \ | ||
--setattr=system.exec.test.run_duration=100s \ | ||
hostname >id3.out | ||
' | ||
|
||
test_expect_success 'remove all job related modules but not KVS' ' | ||
flux module remove job-exec && | ||
flux module remove sched-simple && | ||
flux module remove job-list && | ||
flux module remove job-info && | ||
flux module remove job-manager && | ||
flux module remove job-ingest | ||
' | ||
|
||
test_expect_success 're-load all job related modules' ' | ||
flux module load job-manager && | ||
flux module load job-info && | ||
flux module load job-list && | ||
flux module load job-ingest && | ||
flux module load job-exec && | ||
flux module load sched-simple | ||
' | ||
|
||
test_expect_success 'job reattach with KVS namespace still existing' ' | ||
flux job eventlog $(cat id3.out) > eventlog_kvsexists1.out && | ||
flux jobs -n > jobs_kvsexists1.out && | ||
grep "reattach-start" eventlog_kvsexists1.out && | ||
grep "reattach-finish" eventlog_kvsexists1.out && | ||
grep $(cat id3.out) jobs_kvsexists1.out | ||
' | ||
|
||
test_done |