diff --git a/doc/man1/flux-shutdown.rst b/doc/man1/flux-shutdown.rst index 0c1fad8f6294..2488abaf6a24 100644 --- a/doc/man1/flux-shutdown.rst +++ b/doc/man1/flux-shutdown.rst @@ -76,6 +76,18 @@ OPTIONS the dump, and the link is removed. :linux:man8:`systemd-tmpfiles` automatically cleans up dump files in ``/var/lib/flux/dump`` after 30 days. +**-y, --yes** + If ``--dump`` and ``--gc`` are not specified, `flux-shutdown` may ask the user + if they would like to garbage collect if `kvs.gc-threshold` has been crossed. + Specify this option to automatically answer yes to garbage collect. This option + should be used when scripting with `flux shutdown`. + +**-n, --no** + If ``--dump`` and ``--gc`` are not specified, `flux-shutdown` may ask the user + if they would like to garbage collect if `kvs.gc-threshold` has been crossed. + Specify this option to automatically answer no to garbage collect. This option + should be used when scripting with `flux shutdown`. + RESOURCES ========= @@ -87,4 +99,4 @@ SEE ALSO ======== :man1:`flux-start`, :man1:`flux-uptime`, :man1:`flux-uri`, :man1:`flux-dump`, -:linux:man8:`systemd-tmpfiles` +:man5:`flux-config-kvs`,:linux:man8:`systemd-tmpfiles` diff --git a/doc/man5/flux-config-kvs.rst b/doc/man5/flux-config-kvs.rst index 19f75ac99343..4b8d87c15108 100644 --- a/doc/man5/flux-config-kvs.rst +++ b/doc/man5/flux-config-kvs.rst @@ -22,6 +22,13 @@ checkpoint-period primary namespace. The checkpoint is used to protect against data loss in the event of a Flux broker crash. +gc-threshold + (optional) Sets the number of KVS commits (distinct root snapshots) + after which offline garbage collection is performed by + :man1:`flux-shutdown`. A value of 100000 may be a good starting + point. (Default: garbage collection must be manually requested with + `flux-shutdown --gc`). + EXAMPLE ======= @@ -30,7 +37,7 @@ EXAMPLE [kvs] checkpoint-period = "30m" - + gc-threshold = 100000 RESOURCES ========= @@ -43,4 +50,4 @@ RFC 23: Flux Standard Duration: https://flux-framework.readthedocs.io/projects/f SEE ALSO ======== -:man5:`flux-config` +:man1:`flux-shutdown`,:man5:`flux-config` diff --git a/src/cmd/builtin/shutdown.c b/src/cmd/builtin/shutdown.c index 4fef6ee63ace..71645d893f78 100644 --- a/src/cmd/builtin/shutdown.c +++ b/src/cmd/builtin/shutdown.c @@ -12,13 +12,88 @@ # include #endif #include +#include #include #include "src/broker/state_machine.h" +#include "src/common/libkvs/kvs_checkpoint.h" #include "src/common/libutil/uri.h" #include "builtin.h" +static void get_kvs_version (flux_t *h, int *version) +{ + (*version) = 0; + if (flux_kvs_get_version (h, NULL, version) < 0 + && errno != ENOSYS) + log_err_exit ("Error fetching KVS version"); +} + +static void get_gc_threshold (flux_t *h, int *gc_threshold) +{ + flux_future_t *f; + json_t *o; + (*gc_threshold) = 0; + if (!(f = flux_rpc (h, "config.get", NULL, FLUX_NODEID_ANY, 0)) + || flux_rpc_get_unpack (f, "o", &o) < 0) + log_msg_exit ("Error fetching flux config: %s", + future_strerror (f, errno)); + (void)json_unpack (o, "{s:{s:i}}", "kvs", "gc-threshold", gc_threshold); +} + +int askyn (char *prompt, bool default_value, bool *result) +{ + while (1) { + char buf[16]; + printf ("%s [%s]? ", prompt, default_value ? "Y/n" : "y/N"); + fflush (stdout); + if (fgets (buf, sizeof (buf), stdin) == NULL) + return -1; + if (buf[0] == '\n') + break; + if (buf[0] == 'y' || buf[0] == 'Y') { + (*result) = true; + return 0; + } + if (buf[0] == 'n' || buf[0] == 'N') { + (*result) = false; + return 0; + } + printf ("Please answer y or n\n"); + }; + (*result) = default_value; + return 0; +} + +static bool gc_threshold_check (flux_t *h, optparse_t *p) +{ + int gc_threshold, version; + bool rc = false; + + get_kvs_version (h, &version); + get_gc_threshold (h, &gc_threshold); + + if (gc_threshold > 0 && version > gc_threshold) { + if (optparse_hasopt (p, "yes") || optparse_hasopt (p, "no")) { + if (optparse_hasopt (p, "yes")) + rc = true; + else + rc = false; + return rc; + } + + if (!isatty (STDIN_FILENO)) + log_msg_exit ("gc threshold exceeded, specify -y or -n\n"); + + if (askyn ("gc threshold exceeded, " + "do you want to perform garbage collection", + true, + &rc) < 0) + log_msg_exit ("error retrieving user input"); + } + return rc; +} + static void process_updates (flux_future_t *f) { const char *s; @@ -68,7 +143,9 @@ static int subcmd (optparse_t *p, int ac, char *av[]) if (optparse_hasopt (p, "background")) flags &= ~FLUX_RPC_STREAMING; - if (optparse_hasopt (p, "gc") || optparse_hasopt (p, "dump")) { + if (optparse_hasopt (p, "gc") + || optparse_hasopt (p, "dump") + || gc_threshold_check (h, p)) { const char *val = optparse_get_str (p, "dump", "auto"); if (flux_attr_set (h, "content.dump", val) < 0) @@ -116,6 +193,14 @@ static struct optparse_option opts[] = { " 0=show log messages <= LOG_INFO level (default)," " 1=show all log messages", }, + { .name = "yes", .key = 'y', .has_arg = 0, + .usage = "If garbage collection threshold exceeded, " + "perform garbage collection", + }, + { .name = "no", .key = 'n', .has_arg = 0, + .usage = "If garbage collection threshold exceeded, " + "do not perform garbage collection", + }, OPTPARSE_TABLE_END };