Skip to content

Commit

Permalink
testsuite: cover offline KVS garbage collection
Browse files Browse the repository at this point in the history
Problem: there is no test coverage for offline KVS garbage
collection.

Add a sharness script that exercises this functionality.
Augment the shutdown-cmd sharness script to cover new shutdown options.
  • Loading branch information
garlick committed May 1, 2022
1 parent e06683f commit e4c886f
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ TESTSCRIPTS = \
t2807-dump-cmd.t \
t2808-shutdown-cmd.t \
t2809-job-purge.t \
t2810-kvs-garbage-collect.t \
t2900-job-timelimits.t \
t3000-mpi-basic.t \
t3001-mpi-personalities.t \
Expand Down
50 changes: 50 additions & 0 deletions t/t2808-shutdown-cmd.t
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,54 @@ test_expect_success 'flux-shutdown as initial program does not hang' '
test_expect_code 129 run_timeout 30 flux start flux shutdown
'

test_expect_success 'submit batch script and wait for it to start' '
rm -f job6-has-started &&
cat >batch6.sh <<-EOT &&
#!/bin/sh
flux mini run /bin/true
touch job6-has-started
sleep 300
EOT
chmod +x batch6.sh &&
flux mini batch -t30m -n1 batch6.sh >jobid6 &&
$waitfile job6-has-started
'

test_expect_success 'one job has run in the batch job' '
(FLUX_URI=$(flux uri --local $(cat jobid6)) \
flux jobs -n -a -o {id}) >job6_list &&
test $(wc -l <job6_list) -eq 1
'

test_expect_success 'shutdown batch script with --dump' '
(FLUX_URI=$(flux uri --local $(cat jobid6)) \
flux shutdown --dump=dump.tgz)
'
test_expect_success 'dump file was created' '
tar tvf dump.tgz
'
test_expect_success 'restart batch script from dump and wait for it to start' '
rm -f job6-has-started &&
flux mini batch -t30m -n1 \
--broker-opts=-Scontent.restore=dump.tgz \
batch6.sh >jobid6_try2 &&
$waitfile job6-has-started
'
test_expect_success 'two jobs have been run in batch job' '
(FLUX_URI=$(flux uri --local $(cat jobid6_try2)) \
flux jobs -n -a -o {id}) >job6_list_try2 &&
test $(wc -l <job6_list_try2) -eq 2
'
test_expect_success 'job id from before restart is in job listing' '
grep $(cat job6_list) job6_list_try2
'

test_expect_success 'shutdown batch script with --gc' '
(FLUX_URI=$(flux uri --local $(cat jobid6_try2)) \
flux shutdown --gc)
'
test_expect_success 'dump file was created with RESTORE link' '
tar tvf dump/RESTORE
'

test_done
92 changes: 92 additions & 0 deletions t/t2810-kvs-garbage-collect.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/sh

test_description='Test offline KVS garbage collection'

. $(dirname $0)/sharness.sh

test_expect_success 'create test script' '
cat >runjobs.sh <<-EOT &&
#!/bin/bash -e
trap "" SIGHUP
flux mini submit --cc=1-10 /bin/true >/dev/null
flux queue drain
backingmod=\$(flux getattr content.backing-module)
flux module stats --type int --parse object_count \$backingmod
EOT
chmod +x runjobs.sh
'
test_expect_success 'run instance that leaves an auto dump' '
mkdir -p state &&
flux start -o,-Sstatedir=state \
-o,-Scontent.dump=auto \
-o,-Slog-filename=dmesg.log \
./runjobs.sh >object_count
'
test_expect_success 'broker logs report dump activity' '
grep "dumping content to" dmesg.log
'
test_expect_success 'dump exists and RESTORE symlink is valid' '
test -h state/dump/RESTORE &&
readlink -f state/dump/RESTORE >archive &&
test -f $(cat archive)
'
test_expect_success 'restart instance with auto restore' '
flux start -o,-Sstatedir=state \
-o,-Scontent.restore=auto \
-o,-Slog-filename=dmesg2.log \
flux module stats \
--type int --parse object_count content-sqlite >object_count2
'
test_expect_success 'broker logs report restore activity' '
grep "restoring content from" dmesg2.log
'
test_expect_success 'number of stored objects was reduced by GC' '
before=$(cat object_count) &&
after=$(cat object_count2) &&
test $before -gt $after
'
test_expect_success 'RESTORE symlink is gone' '
test_must_fail test -h state/dump/RESTORE
'
test_expect_success 'archive file remains' '
test -f $(cat archive)
'

#
# Now repeat the above test with
# - content-files backend
# - no statedir
# - explicitly named dump file (not auto)
#
test_expect_success 'run instance that leaves a named dump' '
flux start -o,-Slog-filename=dmesg3.log \
-o,-Scontent.dump=foo.tgz \
-o,-Scontent.backing-module=content-files \
./runjobs.sh >object_count3
'
test_expect_success 'broker logs report dump activity' '
grep "dumping content to" dmesg3.log
'
test_expect_success 'dump exists in current directory' '
test -f foo.tgz
'
test_expect_success 'no RESTORE link was created because path is explicit' '
test_must_fail test -h dump/RESTORE
'
test_expect_success 'restart instance and restore' '
flux start -o,-Slog-filename=dmesg4.log \
-o,-Scontent.restore=foo.tgz \
-o,-Scontent.backing-module=content-files \
flux module stats \
--type int --parse object_count content-files >object_count4
'
test_expect_success 'broker logs report restore activity' '
grep "restoring content from" dmesg4.log
'
test_expect_success 'number of stored objects was reduced by GC' '
before=$(cat object_count3) &&
after=$(cat object_count4) &&
test $before -gt $after
'

test_done

0 comments on commit e4c886f

Please sign in to comment.