From 20fcadcef4ba862fb9b2f230699b5a10f88e0c55 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Mon, 29 Aug 2022 12:29:01 -0700 Subject: [PATCH] kvs: support gc-threshold config Problem: KVS garbage collection is only done when an administrator runs flux-shutdown and chooses to garbage collect via the --dump or --gc options. Solution: Support a kvs gc-threshold configuration option. This configuration will take an integer count of KVS changes (the KVS version number or sequence number). Once the threshold has been crossed, flux-shutdown will ask the user if they wish to garbage collect. This offers an easy way for administrators to be reminded of garbage collection on a regular basis. Fixes #4311 --- doc/man5/flux-config-kvs.rst | 11 ++++- src/cmd/builtin/shutdown.c | 87 +++++++++++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/doc/man5/flux-config-kvs.rst b/doc/man5/flux-config-kvs.rst index 19f75ac99343..4b8d87c15108 100644 --- a/doc/man5/flux-config-kvs.rst +++ b/doc/man5/flux-config-kvs.rst @@ -22,6 +22,13 @@ checkpoint-period primary namespace. The checkpoint is used to protect against data loss in the event of a Flux broker crash. +gc-threshold + (optional) Sets the number of KVS commits (distinct root snapshots) + after which offline garbage collection is performed by + :man1:`flux-shutdown`. A value of 100000 may be a good starting + point. (Default: garbage collection must be manually requested with + `flux-shutdown --gc`). + EXAMPLE ======= @@ -30,7 +37,7 @@ EXAMPLE [kvs] checkpoint-period = "30m" - + gc-threshold = 100000 RESOURCES ========= @@ -43,4 +50,4 @@ RFC 23: Flux Standard Duration: https://flux-framework.readthedocs.io/projects/f SEE ALSO ======== -:man5:`flux-config` +:man1:`flux-shutdown`,:man5:`flux-config` diff --git a/src/cmd/builtin/shutdown.c b/src/cmd/builtin/shutdown.c index 4fef6ee63ace..71645d893f78 100644 --- a/src/cmd/builtin/shutdown.c +++ b/src/cmd/builtin/shutdown.c @@ -12,13 +12,88 @@ # include #endif #include +#include #include #include "src/broker/state_machine.h" +#include "src/common/libkvs/kvs_checkpoint.h" #include "src/common/libutil/uri.h" #include "builtin.h" +static void get_kvs_version (flux_t *h, int *version) +{ + (*version) = 0; + if (flux_kvs_get_version (h, NULL, version) < 0 + && errno != ENOSYS) + log_err_exit ("Error fetching KVS version"); +} + +static void get_gc_threshold (flux_t *h, int *gc_threshold) +{ + flux_future_t *f; + json_t *o; + (*gc_threshold) = 0; + if (!(f = flux_rpc (h, "config.get", NULL, FLUX_NODEID_ANY, 0)) + || flux_rpc_get_unpack (f, "o", &o) < 0) + log_msg_exit ("Error fetching flux config: %s", + future_strerror (f, errno)); + (void)json_unpack (o, "{s:{s:i}}", "kvs", "gc-threshold", gc_threshold); +} + +int askyn (char *prompt, bool default_value, bool *result) +{ + while (1) { + char buf[16]; + printf ("%s [%s]? ", prompt, default_value ? "Y/n" : "y/N"); + fflush (stdout); + if (fgets (buf, sizeof (buf), stdin) == NULL) + return -1; + if (buf[0] == '\n') + break; + if (buf[0] == 'y' || buf[0] == 'Y') { + (*result) = true; + return 0; + } + if (buf[0] == 'n' || buf[0] == 'N') { + (*result) = false; + return 0; + } + printf ("Please answer y or n\n"); + }; + (*result) = default_value; + return 0; +} + +static bool gc_threshold_check (flux_t *h, optparse_t *p) +{ + int gc_threshold, version; + bool rc = false; + + get_kvs_version (h, &version); + get_gc_threshold (h, &gc_threshold); + + if (gc_threshold > 0 && version > gc_threshold) { + if (optparse_hasopt (p, "yes") || optparse_hasopt (p, "no")) { + if (optparse_hasopt (p, "yes")) + rc = true; + else + rc = false; + return rc; + } + + if (!isatty (STDIN_FILENO)) + log_msg_exit ("gc threshold exceeded, specify -y or -n\n"); + + if (askyn ("gc threshold exceeded, " + "do you want to perform garbage collection", + true, + &rc) < 0) + log_msg_exit ("error retrieving user input"); + } + return rc; +} + static void process_updates (flux_future_t *f) { const char *s; @@ -68,7 +143,9 @@ static int subcmd (optparse_t *p, int ac, char *av[]) if (optparse_hasopt (p, "background")) flags &= ~FLUX_RPC_STREAMING; - if (optparse_hasopt (p, "gc") || optparse_hasopt (p, "dump")) { + if (optparse_hasopt (p, "gc") + || optparse_hasopt (p, "dump") + || gc_threshold_check (h, p)) { const char *val = optparse_get_str (p, "dump", "auto"); if (flux_attr_set (h, "content.dump", val) < 0) @@ -116,6 +193,14 @@ static struct optparse_option opts[] = { " 0=show log messages <= LOG_INFO level (default)," " 1=show all log messages", }, + { .name = "yes", .key = 'y', .has_arg = 0, + .usage = "If garbage collection threshold exceeded, " + "perform garbage collection", + }, + { .name = "no", .key = 'n', .has_arg = 0, + .usage = "If garbage collection threshold exceeded, " + "do not perform garbage collection", + }, OPTPARSE_TABLE_END };