From e4c886fa09cae5baf0aa3e54793c69139868fa3b Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Tue, 26 Apr 2022 13:42:58 -0700 Subject: [PATCH] testsuite: cover offline KVS garbage collection Problem: there is no test coverage for offline KVS garbage collection. Add a sharness script that exercises this functionality. Augment the shutdown-cmd sharness script to cover new shutdown options. --- t/Makefile.am | 1 + t/t2808-shutdown-cmd.t | 50 +++++++++++++++++++ t/t2810-kvs-garbage-collect.t | 92 +++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100755 t/t2810-kvs-garbage-collect.t diff --git a/t/Makefile.am b/t/Makefile.am index e1924b87b771..bfd143047c64 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -187,6 +187,7 @@ TESTSCRIPTS = \ t2807-dump-cmd.t \ t2808-shutdown-cmd.t \ t2809-job-purge.t \ + t2810-kvs-garbage-collect.t \ t2900-job-timelimits.t \ t3000-mpi-basic.t \ t3001-mpi-personalities.t \ diff --git a/t/t2808-shutdown-cmd.t b/t/t2808-shutdown-cmd.t index 789a21c7e287..fb6f1682cd38 100755 --- a/t/t2808-shutdown-cmd.t +++ b/t/t2808-shutdown-cmd.t @@ -136,4 +136,54 @@ test_expect_success 'flux-shutdown as initial program does not hang' ' test_expect_code 129 run_timeout 30 flux start flux shutdown ' +test_expect_success 'submit batch script and wait for it to start' ' + rm -f job6-has-started && + cat >batch6.sh <<-EOT && + #!/bin/sh + flux mini run /bin/true + touch job6-has-started + sleep 300 + EOT + chmod +x batch6.sh && + flux mini batch -t30m -n1 batch6.sh >jobid6 && + $waitfile job6-has-started +' + +test_expect_success 'one job has run in the batch job' ' + (FLUX_URI=$(flux uri --local $(cat jobid6)) \ + flux jobs -n -a -o {id}) >job6_list && + test $(wc -l jobid6_try2 && + $waitfile job6-has-started +' +test_expect_success 'two jobs have been run in batch job' ' + (FLUX_URI=$(flux uri --local $(cat jobid6_try2)) \ + flux jobs -n -a -o {id}) >job6_list_try2 && + test $(wc -l runjobs.sh <<-EOT && + #!/bin/bash -e + trap "" SIGHUP + flux mini submit --cc=1-10 /bin/true >/dev/null + flux queue drain + backingmod=\$(flux getattr content.backing-module) + flux module stats --type int --parse object_count \$backingmod + EOT + chmod +x runjobs.sh +' +test_expect_success 'run instance that leaves an auto dump' ' + mkdir -p state && + flux start -o,-Sstatedir=state \ + -o,-Scontent.dump=auto \ + -o,-Slog-filename=dmesg.log \ + ./runjobs.sh >object_count +' +test_expect_success 'broker logs report dump activity' ' + grep "dumping content to" dmesg.log +' +test_expect_success 'dump exists and RESTORE symlink is valid' ' + test -h state/dump/RESTORE && + readlink -f state/dump/RESTORE >archive && + test -f $(cat archive) +' +test_expect_success 'restart instance with auto restore' ' + flux start -o,-Sstatedir=state \ + -o,-Scontent.restore=auto \ + -o,-Slog-filename=dmesg2.log \ + flux module stats \ + --type int --parse object_count content-sqlite >object_count2 +' +test_expect_success 'broker logs report restore activity' ' + grep "restoring content from" dmesg2.log +' +test_expect_success 'number of stored objects was reduced by GC' ' + before=$(cat object_count) && + after=$(cat object_count2) && + test $before -gt $after +' +test_expect_success 'RESTORE symlink is gone' ' + test_must_fail test -h state/dump/RESTORE +' +test_expect_success 'archive file remains' ' + test -f $(cat archive) +' + +# +# Now repeat the above test with +# - content-files backend +# - no statedir +# - explicitly named dump file (not auto) +# +test_expect_success 'run instance that leaves a named dump' ' + flux start -o,-Slog-filename=dmesg3.log \ + -o,-Scontent.dump=foo.tgz \ + -o,-Scontent.backing-module=content-files \ + ./runjobs.sh >object_count3 +' +test_expect_success 'broker logs report dump activity' ' + grep "dumping content to" dmesg3.log +' +test_expect_success 'dump exists in current directory' ' + test -f foo.tgz +' +test_expect_success 'no RESTORE link was created because path is explicit' ' + test_must_fail test -h dump/RESTORE +' +test_expect_success 'restart instance and restore' ' + flux start -o,-Slog-filename=dmesg4.log \ + -o,-Scontent.restore=foo.tgz \ + -o,-Scontent.backing-module=content-files \ + flux module stats \ + --type int --parse object_count content-files >object_count4 +' +test_expect_success 'broker logs report restore activity' ' + grep "restoring content from" dmesg4.log +' +test_expect_success 'number of stored objects was reduced by GC' ' + before=$(cat object_count3) && + after=$(cat object_count4) && + test $before -gt $after +' + +test_done