diff --git a/etc/Makefile.am b/etc/Makefile.am index d35ba1a4c796..c63e446c7584 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -2,6 +2,10 @@ systemdsystemunit_DATA = flux.service #endif +tmpfilesdir = $(prefix)/lib/tmpfiles.d + +tmpfiles_DATA = flux.conf + dist_fluxrc_SCRIPTS = \ rc1 \ rc3 @@ -43,6 +47,7 @@ noinst_SCRIPTS = \ EXTRA_DIST = \ gen-cmdhelp.py \ + flux.conf \ $(noinst_SCRIPTS) completions/flux: $(srcdir)/completions/flux.pre diff --git a/etc/flux.conf b/etc/flux.conf new file mode 100644 index 000000000000..d6feb54bbbfe --- /dev/null +++ b/etc/flux.conf @@ -0,0 +1,4 @@ +# See tmpfiles.d(5) +# remove Flux dump files older than 30 days + +e /var/lib/flux/dump - - - 30d diff --git a/etc/flux.service.in b/etc/flux.service.in index cddb415bedea..2514e7d13cef 100644 --- a/etc/flux.service.in +++ b/etc/flux.service.in @@ -21,6 +21,7 @@ ExecStart=/bin/bash -c '\ -Sbroker.quorum=0 \ -Sbroker.quorum-timeout=none \ -Sbroker.exit-norestart=42 \ + -Scontent.restore=auto \ ' SyslogIdentifier=flux ExecReload=@X_BINDIR@/flux config reload diff --git a/etc/rc1 b/etc/rc1 index 76b692fafa5b..5b2f0b985e96 100755 --- a/etc/rc1 +++ b/etc/rc1 @@ -3,10 +3,6 @@ # Allow connector-local more time to start listening on socket RANK=$(FLUX_LOCAL_CONNECTOR_RETRY_COUNT=30 flux getattr rank) -if ! content_backing=$(flux getattr content.backing-module 2>/dev/null); then - content_backing=content-sqlite -fi - # Usage: modload {all|} modname [args ...] modload() { local where=$1; shift @@ -16,7 +12,34 @@ modload() { } modload all barrier -modload 0 ${content_backing} + +if test $RANK -eq 0; then + backingmod=$(flux getattr content.backing-module 2>/dev/null) || : + backingmod=${backingmod:-content-sqlite} + dumpfile=$(flux getattr content.restore 2>/dev/null) || : + if test -n "${dumpfile}"; then + if test "${dumpfile}" = "auto"; then + statedir=$(flux getattr statedir 2>/dev/null) || : + dumplink="${statedir:-.}/dump/RESTORE" + if test -h "${dumplink}"; then + dumpfile=$(readlink -f ${dumplink}) || : + else + dumpfile="" + dumplink="" + fi + fi + fi + if test -n "${dumpfile}"; then + flux module load ${backingmod} truncate + echo "restoring content from ${dumpfile}" + flux restore --quiet --checkpoint ${dumpfile} + if test -n "${dumplink}"; then + rm -f ${dumplink} + fi + else + flux module load ${backingmod} + fi +fi modload all kvs modload all kvs-watch diff --git a/etc/rc3 b/etc/rc3 index af72f5bd607c..97bc61e2f6cb 100755 --- a/etc/rc3 +++ b/etc/rc3 @@ -45,7 +45,25 @@ modrm all kvs flux content flush || exit_rc=1 -backingmod=$(flux getattr content.backing-module 2>/dev/null) -modrm 0 ${backingmod:-content-sqlite} +if test $RANK -eq 0; then + backingmod=$(flux getattr content.backing-module 2>/dev/null) + backingmod=${backingmod:-content-sqlite} + dumpfile=$(flux getattr content.dump 2>/dev/null) + if test $exit_rc -eq 0 -a -n "${dumpfile}"; then + if test "${dumpfile}" = "auto"; then + statedir=$(flux getattr statedir 2>/dev/null) + mkdir -p "${statedir:-.}/dump" + dumpfile="${statedir:-.}/dump/$(date +%Y%m%d_%H%M%S).tgz" + dumplink="${statedir:-.}/dump/RESTORE" + fi + echo "dumping content to ${dumpfile}" + if flux dump --quiet --checkpoint ${dumpfile}; then + test -n "$dumplink" && ln -s $(basename ${dumpfile}) ${dumplink} + else + exit_rc=1 + fi + fi + flux module remove ${backingmod} || exit_rc=1 +fi exit $exit_rc diff --git a/src/cmd/builtin/shutdown.c b/src/cmd/builtin/shutdown.c index b2cc411ce76b..006d8f4f94db 100644 --- a/src/cmd/builtin/shutdown.c +++ b/src/cmd/builtin/shutdown.c @@ -68,6 +68,15 @@ static int subcmd (optparse_t *p, int ac, char *av[]) if (optparse_hasopt (p, "background")) flags &= ~FLUX_RPC_STREAMING; + if (optparse_hasopt (p, "gc") || optparse_hasopt (p, "dump")) { + const char *val = optparse_get_str (p, "dump", "auto"); + + if (flux_attr_set (h, "content.dump", val) < 0) + log_err_exit ("error setting content.dump attribute"); + + log_msg ("shutdown will dump KVS (this may take some time)"); + } + /* N.B. set nodeid=FLUX_NODEID_ANY so we get immediate error from * broker if run on rank > 0. */ @@ -90,6 +99,12 @@ static int subcmd (optparse_t *p, int ac, char *av[]) } static struct optparse_option opts[] = { + { .name = "gc", .has_arg = 0, + .usage = "Garbage collect KVS (short for --dump=auto)", + }, + { .name = "dump", .has_arg = 1, .arginfo = "PATH", + .usage = "Dump KVS content to specified archive file using flux-dump(1)." + }, { .name = "background", .has_arg = 0, .usage = "Exit the command immediately after initiating shutdown", }, diff --git a/src/modules/content-files/content-files.c b/src/modules/content-files/content-files.c index 9e55e37b3fd8..043272efe01f 100644 --- a/src/modules/content-files/content-files.c +++ b/src/modules/content-files/content-files.c @@ -56,6 +56,8 @@ #include "src/common/libutil/blobref.h" #include "src/common/libutil/log.h" +#include "src/common/libutil/dirwalk.h" +#include "src/common/libutil/unlink_recursive.h" #include "src/common/libcontent/content-util.h" @@ -69,6 +71,43 @@ struct content_files { int hash_size; }; +static int file_count_cb (dirwalk_t *d, void *arg) +{ + int *count = arg; + + if (!dirwalk_isdir (d)) + (*count)++; + return 0; +} + +static int get_object_count (const char *path) +{ + int count = 0; + if (dirwalk (path, 0, file_count_cb, &count) < 0) + return -1; + return count; +} + +static void stats_get_cb (flux_t *h, + flux_msg_handler_t *mh, + const flux_msg_t *msg, + void *arg) +{ + struct content_files *ctx = arg; + int count; + + if ((count = get_object_count (ctx->dbpath)) < 0) + goto error; + + if (flux_respond_pack (h, msg, "{s:i}", "object_count", count) < 0) + flux_log_error (h, "error responding to stats.get request"); + return; +error: + if (flux_respond_error (h, msg, errno, NULL) < 0) + flux_log_error (h, "error responding to stats.get request"); +} + + /* Handle a content-backing.load request from the rank 0 broker's * content-cache service. The raw request payload is a hash digest. * The raw response payload is the blob content. @@ -259,12 +298,13 @@ static const struct flux_msg_handler_spec htab[] = { { FLUX_MSGTYPE_REQUEST, "content-backing.store", store_cb, 0 }, { FLUX_MSGTYPE_REQUEST, "kvs-checkpoint.get", checkpoint_get_cb, 0 }, { FLUX_MSGTYPE_REQUEST, "kvs-checkpoint.put", checkpoint_put_cb, 0 }, + { FLUX_MSGTYPE_REQUEST, "content-files.stats.get", stats_get_cb, 0 }, FLUX_MSGHANDLER_TABLE_END, }; /* Create module context and perform some initialization. */ -static struct content_files *content_files_create (flux_t *h) +static struct content_files *content_files_create (flux_t *h, bool truncate) { struct content_files *ctx; const char *dbdir; @@ -295,6 +335,8 @@ static struct content_files *content_files_create (flux_t *h) } if (asprintf (&ctx->dbpath, "%s/content.files", dbdir) < 0) goto error; + if (truncate) + (void)unlink_recursive (ctx->dbpath); if (mkdir (ctx->dbpath, 0700) < 0 && errno != EEXIST) { flux_log_error (h, "could not create %s", ctx->dbpath); goto error; @@ -307,15 +349,20 @@ static struct content_files *content_files_create (flux_t *h) return NULL; } -static int parse_args (flux_t *h, int argc, char **argv, bool *testing) +static int parse_args (flux_t *h, + int argc, + char **argv, + bool *testing, + bool *truncate) { int i; for (i = 0; i < argc; i++) { if (!strcmp (argv[i], "testing")) *testing = true; + else if (!strcmp (argv[i], "truncate")) + *truncate = true; else { - errno = EINVAL; - flux_log_error (h, "%s", argv[i]); + flux_log (h, LOG_ERR, "Unknown module option: %s", argv[i]); return -1; } } @@ -338,11 +385,12 @@ int mod_main (flux_t *h, int argc, char **argv) { struct content_files *ctx; bool testing = false; + bool truncate = false; int rc = -1; - if (parse_args (h, argc, argv, &testing) < 0) + if (parse_args (h, argc, argv, &testing, &truncate) < 0) return -1; - if (!(ctx = content_files_create (h))) { + if (!(ctx = content_files_create (h, truncate))) { flux_log_error (h, "content_files_create failed"); return -1; } diff --git a/src/modules/content-s3/content-s3.c b/src/modules/content-s3/content-s3.c index 551164e84da4..17caf12ed26b 100644 --- a/src/modules/content-s3/content-s3.c +++ b/src/modules/content-s3/content-s3.c @@ -469,11 +469,31 @@ static struct content_s3 *content_s3_create (flux_t *h) return NULL; } +static int parse_args (flux_t *h, int argc, char **argv) +{ + for (int i = 0; i < argc; i++) { + if (!strcmp (argv[i], "truncate")) { + flux_log (h, + LOG_ERR, + "truncate is not implemented. Use S3 console" + " or other external mechanism to empty bucket."); + return -1; + } + else { + flux_log (h, LOG_ERR, "Unknown module option: %s", argv[i]); + return -1; + } + } + return 0; +} + int mod_main (flux_t *h, int argc, char **argv) { struct content_s3 *ctx; int rc = -1; + if (parse_args (h, argc, argv) < 0) + return -1; if (!(ctx = content_s3_create (h))) { flux_log_error (h, "content_s3_create failed"); return -1; diff --git a/src/modules/content-sqlite/content-sqlite.c b/src/modules/content-sqlite/content-sqlite.c index e3a44dbf5611..3e8e860504cf 100644 --- a/src/modules/content-sqlite/content-sqlite.c +++ b/src/modules/content-sqlite/content-sqlite.c @@ -76,6 +76,7 @@ struct content_sqlite { struct content_stats stats; const char *journal_mode; const char *synchronous; + bool truncate; }; static void log_sqlite_error (struct content_sqlite *ctx, const char *fmt, ...) @@ -600,12 +601,15 @@ void stats_get_cb (flux_t *h, /* Open the database file ctx->dbfile and set up the database. */ -static int content_sqlite_opendb (struct content_sqlite *ctx) +static int content_sqlite_opendb (struct content_sqlite *ctx, bool truncate) { int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; char s[128]; int count; + if (truncate) + (void)unlink (ctx->dbfile); + if (sqlite3_open_v2 (ctx->dbfile, &ctx->db, flags, NULL) != SQLITE_OK) { log_sqlite_error (ctx, "opening %s", ctx->dbfile); goto error; @@ -786,7 +790,10 @@ static struct content_sqlite *content_sqlite_create (flux_t *h) return NULL; } -static int process_args (struct content_sqlite *ctx, int argc, char **argv) +static int process_args (struct content_sqlite *ctx, + int argc, + char **argv, + bool *truncate) { int i; for (i = 0; i < argc; i++) { @@ -796,8 +803,11 @@ static int process_args (struct content_sqlite *ctx, int argc, char **argv) else if (strncmp ("synchronous=", argv[i], 12) == 0) { ctx->synchronous = argv[i] + 12; } + else if (strcmp ("truncate", argv[i]) == 0) { + *truncate = true; + } else { - flux_log_error (ctx->h, "Unknown module option: '%s'", argv[i]); + flux_log (ctx->h, LOG_ERR, "Unknown module option: '%s'", argv[i]); return -1; } } @@ -807,15 +817,17 @@ static int process_args (struct content_sqlite *ctx, int argc, char **argv) int mod_main (flux_t *h, int argc, char **argv) { struct content_sqlite *ctx; + bool truncate = false; int rc = -1; if (!(ctx = content_sqlite_create (h))) { flux_log_error (h, "content_sqlite_create failed"); return -1; } - if (process_args (ctx, argc, argv) < 0) // override pragmas set above + // override pragmas set above + if (process_args (ctx, argc, argv, &truncate) < 0) goto done; - if (content_sqlite_opendb (ctx) < 0) + if (content_sqlite_opendb (ctx, truncate) < 0) goto done; if (content_register_backing_store (h, "content-sqlite") < 0) goto done; diff --git a/t/Makefile.am b/t/Makefile.am index e1924b87b771..bfd143047c64 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -187,6 +187,7 @@ TESTSCRIPTS = \ t2807-dump-cmd.t \ t2808-shutdown-cmd.t \ t2809-job-purge.t \ + t2810-kvs-garbage-collect.t \ t2900-job-timelimits.t \ t3000-mpi-basic.t \ t3001-mpi-personalities.t \ diff --git a/t/sharness.d/flux-sharness.sh b/t/sharness.d/flux-sharness.sh index a95147ad5951..3f2106913ba4 100644 --- a/t/sharness.d/flux-sharness.sh +++ b/t/sharness.d/flux-sharness.sh @@ -98,6 +98,7 @@ make_bootstrap_config() { local full="0-$(($size-1))" mkdir $workdir/conf.d + mkdir $workdir/state flux keygen $workdir/cert cat >$workdir/conf.d/bootstrap.toml <<-EOT [bootstrap] @@ -121,6 +122,7 @@ make_bootstrap_config() { echo "--test-start-mode=${TEST_UNDER_FLUX_START_MODE:-all}" echo "-o,-Stbon.fanout=${TEST_UNDER_FLUX_FANOUT:-$size}" echo "-o,-Stbon.zmqdebug=1" + echo "-o,-Sstatedir=$workdir/state" } # diff --git a/t/t0012-content-sqlite.t b/t/t0012-content-sqlite.t index 902a686b1928..89d17f940018 100755 --- a/t/t0012-content-sqlite.t +++ b/t/t0012-content-sqlite.t @@ -207,11 +207,13 @@ test_expect_success 'remove read permission from content.sqlite file' ' chmod u-w $(flux getattr rundir)/content.sqlite && test_must_fail flux module load content-sqlite ' +test_expect_success 'restore read permission on content.sqlite file' ' + chmod u+w $(flux getattr rundir)/content.sqlite +' # Clean slate for a few more tests -test_expect_success 'remove content.sqlite file' ' - rm $(flux getattr rundir)/content.sqlite && - flux module load content-sqlite +test_expect_success 'load content-sqlite with truncate option' ' + flux module load content-sqlite truncate ' test_expect_success 'content-sqlite and content-cache are empty' ' test $(flux module stats \ diff --git a/t/t0018-content-files.t b/t/t0018-content-files.t index 15a5aaf1f7cb..e8cc88bb518c 100755 --- a/t/t0018-content-files.t +++ b/t/t0018-content-files.t @@ -73,6 +73,10 @@ kvs_checkpoint_get() { # Tests of the module by itself (no content cache) ## +test_expect_success 'content-files module load fails with unknown option' ' + test_must_fail flux module load content-files notoption +' + test_expect_success 'load content-files module' ' flux module load content-files testing ' @@ -213,6 +217,18 @@ test_expect_success LONGTEST 'reload/verify various size large blobs through cac test $err -eq 0 ' +test_expect_success 'flux module stats reports nonzero object count' ' + test $(flux module stats \ + --type int --parse object_count content-files) -gt 0 +' +test_expect_success 'reload content-files with truncate option' ' + flux module reload content-files truncate +' +test_expect_success 'flux module stats reports zero object count' ' + test $(flux module stats \ + --type int --parse object_count content-files) -eq 0 +' + test_expect_success 'remove content-files module' ' flux module remove content-files ' diff --git a/t/t0024-content-s3.t b/t/t0024-content-s3.t index 025328bcaa0f..73bbf4a9ea74 100755 --- a/t/t0024-content-s3.t +++ b/t/t0024-content-s3.t @@ -78,6 +78,13 @@ kvs_checkpoint_get() { # Tests of the module by itself (no content cache) ## +test_expect_success 'content-s3 module load fails with unknown option' ' + test_must_fail flux module load content-s3 notoption +' +test_expect_success 'content-s3 module load fails with truncate option' ' + test_must_fail flux module load content-s3 truncate +' + test_expect_success 'create creds.toml from env' ' mkdir -p creds && cat >creds/creds.toml <<-CREDS diff --git a/t/t2808-shutdown-cmd.t b/t/t2808-shutdown-cmd.t index 789a21c7e287..fb6f1682cd38 100755 --- a/t/t2808-shutdown-cmd.t +++ b/t/t2808-shutdown-cmd.t @@ -136,4 +136,54 @@ test_expect_success 'flux-shutdown as initial program does not hang' ' test_expect_code 129 run_timeout 30 flux start flux shutdown ' +test_expect_success 'submit batch script and wait for it to start' ' + rm -f job6-has-started && + cat >batch6.sh <<-EOT && + #!/bin/sh + flux mini run /bin/true + touch job6-has-started + sleep 300 + EOT + chmod +x batch6.sh && + flux mini batch -t30m -n1 batch6.sh >jobid6 && + $waitfile job6-has-started +' + +test_expect_success 'one job has run in the batch job' ' + (FLUX_URI=$(flux uri --local $(cat jobid6)) \ + flux jobs -n -a -o {id}) >job6_list && + test $(wc -l jobid6_try2 && + $waitfile job6-has-started +' +test_expect_success 'two jobs have been run in batch job' ' + (FLUX_URI=$(flux uri --local $(cat jobid6_try2)) \ + flux jobs -n -a -o {id}) >job6_list_try2 && + test $(wc -l runjobs.sh <<-EOT && + #!/bin/bash -e + trap "" SIGHUP + flux mini submit --cc=1-10 /bin/true >/dev/null + flux queue drain + backingmod=\$(flux getattr content.backing-module) + flux module stats --type int --parse object_count \$backingmod + EOT + chmod +x runjobs.sh +' +test_expect_success 'run instance that leaves an auto dump' ' + mkdir -p state && + flux start -o,-Sstatedir=state \ + -o,-Scontent.dump=auto \ + -o,-Slog-filename=dmesg.log \ + ./runjobs.sh >object_count +' +test_expect_success 'broker logs report dump activity' ' + grep "dumping content to" dmesg.log +' +test_expect_success 'dump exists and RESTORE symlink is valid' ' + test -h state/dump/RESTORE && + readlink -f state/dump/RESTORE >archive && + test -f $(cat archive) +' +test_expect_success 'restart instance with auto restore' ' + flux start -o,-Sstatedir=state \ + -o,-Scontent.restore=auto \ + -o,-Slog-filename=dmesg2.log \ + flux module stats \ + --type int --parse object_count content-sqlite >object_count2 +' +test_expect_success 'broker logs report restore activity' ' + grep "restoring content from" dmesg2.log +' +test_expect_success 'number of stored objects was reduced by GC' ' + before=$(cat object_count) && + after=$(cat object_count2) && + test $before -gt $after +' +test_expect_success 'RESTORE symlink is gone' ' + test_must_fail test -h state/dump/RESTORE +' +test_expect_success 'archive file remains' ' + test -f $(cat archive) +' + +# +# Now repeat the above test with +# - content-files backend +# - no statedir +# - explicitly named dump file (not auto) +# +test_expect_success 'run instance that leaves a named dump' ' + flux start -o,-Slog-filename=dmesg3.log \ + -o,-Scontent.dump=foo.tgz \ + -o,-Scontent.backing-module=content-files \ + ./runjobs.sh >object_count3 +' +test_expect_success 'broker logs report dump activity' ' + grep "dumping content to" dmesg3.log +' +test_expect_success 'dump exists in current directory' ' + test -f foo.tgz +' +test_expect_success 'no RESTORE link was created because path is explicit' ' + test_must_fail test -h dump/RESTORE +' +test_expect_success 'restart instance and restore' ' + flux start -o,-Slog-filename=dmesg4.log \ + -o,-Scontent.restore=foo.tgz \ + -o,-Scontent.backing-module=content-files \ + flux module stats \ + --type int --parse object_count content-files >object_count4 +' +test_expect_success 'broker logs report restore activity' ' + grep "restoring content from" dmesg4.log +' +test_expect_success 'number of stored objects was reduced by GC' ' + before=$(cat object_count3) && + after=$(cat object_count4) && + test $before -gt $after +' + +test_done