Skip to content

Commit

Permalink
testsuite: Add job instance restart tests
Browse files Browse the repository at this point in the history
Add initial tests to see that jobs can survive instance restarts
using the job-exec testexec execution plugin.
  • Loading branch information
chu11 committed Nov 17, 2021
1 parent 5436ded commit 1af7735
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 1 deletion.
3 changes: 2 additions & 1 deletion t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ clean-local:
LONGTESTSCRIPTS = \
t5000-valgrind.t \
t3100-flux-in-flux.t \
t3200-instance-restart.t
t3200-instance-restart.t \
t3202-instance-restart-testexec.t

# This list is included in both TESTS and dist_check_SCRIPTS.
TESTSCRIPTS = \
Expand Down
93 changes: 93 additions & 0 deletions t/t3202-instance-restart-testexec.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/sh

test_description='Test instance restart and still running jobs with testexec'

# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment:
test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile
. `dirname $0`/sharness.sh

export FLUX_INSTANCE_RESTART=t

test_under_flux 1 job

test_expect_success 'run a testexec job in persistent instance (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
flux mini submit \
--flags=debug \
--setattr=system.exec.test.run_duration=100s \
hostname >id1.out
'

test_expect_success 'restart instance, reattach to running job, cancel it (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id1.out) > eventlog_long1.out; \
flux jobs -n > jobs_long1.out; \
flux job cancel $(cat id1.out)" &&
grep "flux-reattach" eventlog_long1.out &&
grep "reattached" eventlog_long1.out &&
test_must_fail grep "finish" eventlog_long1.out &&
grep $(cat id1.out) jobs_long1.out
'

test_expect_success 'restart instance, job completed (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id1.out) > eventlog_long2.out; \
flux jobs -n > jobs_long2.out" &&
grep "finish" eventlog_long2.out &&
test_must_fail grep $(cat id1.out) jobs_long2.out
'

# reattach_finish will indicate to testexcec that the job finished
# right after reattach, emulating a job that finished before the
# instance restarted
test_expect_success 'run a testexec job in persistent instance (exit run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
flux mini submit \
--flags=debug \
--setattr=system.exec.test.reattach_finish=1 \
--setattr=system.exec.test.run_duration=100s \
hostname >id2.out
'

test_expect_success 'restart instance, reattach to running job, its finished (exit run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id2.out) > eventlog_exit1.out" &&
grep "flux-reattach" eventlog_exit1.out &&
grep "reattached" eventlog_exit1.out &&
grep "finish" eventlog_exit1.out
'

test_expect_success 'start job under flux instance' '
flux mini submit \
--flags=debug \
--setattr=system.exec.test.run_duration=100s \
hostname >id3.out
'

test_expect_success 'remove all job related modules but not KVS' '
flux module remove job-exec &&
flux module remove sched-simple &&
flux module remove job-list &&
flux module remove job-info &&
flux module remove job-manager &&
flux module remove job-ingest
'

test_expect_success 're-load all job related modules' '
flux module load job-manager &&
flux module load job-info &&
flux module load job-list &&
flux module load job-ingest &&
flux module load job-exec &&
flux module load sched-simple
'

test_expect_success 'job reattached with KVS namespace still existing' '
flux job eventlog $(cat id3.out) > eventlog_kvsexists1.out &&
flux jobs -n > jobs_kvsexists1.out &&
grep "flux-reattach" eventlog_kvsexists1.out &&
grep "reattached" eventlog_kvsexists1.out &&
grep $(cat id3.out) jobs_kvsexists1.out
'

test_done

0 comments on commit 1af7735

Please sign in to comment.