Skip to content

Commit

Permalink
testsuite: Add job instance restart tests
Browse files Browse the repository at this point in the history
Add initial tests to see that jobs can survive instance restarts
using the job-exec testexec execution plugin.
  • Loading branch information
chu11 committed Nov 16, 2021
1 parent b4cfd7a commit 0cb4678
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 1 deletion.
3 changes: 2 additions & 1 deletion t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ clean-local:
LONGTESTSCRIPTS = \
t5000-valgrind.t \
t3100-flux-in-flux.t \
t3200-instance-restart.t
t3200-instance-restart.t \
t3202-instance-restart-testexec.t

# This list is included in both TESTS and dist_check_SCRIPTS.
TESTSCRIPTS = \
Expand Down
58 changes: 58 additions & 0 deletions t/t3202-instance-restart-testexec.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/sh

test_description='Test instance restart and still running jobs with testexec'

# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment:
test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile
. `dirname $0`/sharness.sh

export FLUX_INSTANCE_RESTART=t

test_expect_success 'run a testexec job in persistent instance (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
flux mini submit \
--flags=debug \
--setattr=system.exec.test.run_duration=100s \
hostname >id1.out
'

test_expect_success 'restart instance, reattach to running job, cancel it (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id1.out) > eventlog_long1.out; \
flux jobs -n > jobs_long1.out; \
flux job cancel $(cat id1.out)" &&
grep "flux-reattach" eventlog_long1.out &&
grep "reattached" eventlog_long1.out &&
test_must_fail grep "finish" eventlog_long1.out &&
grep $(cat id1.out) jobs_long1.out
'

test_expect_success 'restart instance, job completed (long run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id1.out) > eventlog_long2.out; \
flux jobs -n > jobs_long2.out" &&
grep "finish" eventlog_long2.out &&
test_must_fail grep $(cat id1.out) jobs_long2.out
'

# reattach_finish will indicate to testexcec that the job finished
# right after reattach, emulating a job that finished before the
# instance restarted
test_expect_success 'run a testexec job in persistent instance (exit run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
flux mini submit \
--flags=debug \
--setattr=system.exec.test.reattach_finish=1 \
--setattr=system.exec.test.run_duration=100s \
hostname >id2.out
'

test_expect_success 'restart instance, reattach to running job, its finished (exit run)' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
sh -c "flux job eventlog $(cat id2.out) > eventlog_exit1.out" &&
grep "flux-reattach" eventlog_exit1.out &&
grep "reattached" eventlog_exit1.out &&
grep "finish" eventlog_exit1.out
'

test_done

0 comments on commit 0cb4678

Please sign in to comment.