Skip to content

Commit

Permalink
kvs: support gc-threshold config
Browse files Browse the repository at this point in the history
Problem: KVS garbage collection is only done when an
administrator runs flux-shutdown and chooses to
garbage collect via the --dump or --gc options.

Solution: Support a kvs gc-threshold configuration option.
This configuration will take an integer count of KVS changes
(the KVS version number or sequence number).  Once the threshold
has been crossed, flux-shutdown will ask the user if they wish to
garbage collect.  Additional options are added to specify yes/no
if the user is scripting with flux-shutdown.

This offers an easy way for administrators to be reminded of garbage
collection on a regular basis.

Fixes flux-framework#4311
  • Loading branch information
chu11 committed Aug 31, 2022
1 parent 5178d0d commit e374b57
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 4 deletions.
14 changes: 13 additions & 1 deletion doc/man1/flux-shutdown.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ OPTIONS
the dump, and the link is removed. :linux:man8:`systemd-tmpfiles`
automatically cleans up dump files in ``/var/lib/flux/dump`` after 30 days.

**-y, --yes**
If ``--dump`` and ``--gc`` are not specified, `flux-shutdown` may ask the user
if they would like to garbage collect if `kvs.gc-threshold` has been crossed.
Specify this option to automatically answer yes to garbage collect. This option
should be used when scripting with `flux shutdown`.

**-n, --no**
If ``--dump`` and ``--gc`` are not specified, `flux-shutdown` may ask the user
if they would like to garbage collect if `kvs.gc-threshold` has been crossed.
Specify this option to automatically answer no to garbage collect. This option
should be used when scripting with `flux shutdown`.


RESOURCES
=========
Expand All @@ -87,4 +99,4 @@ SEE ALSO
========

:man1:`flux-start`, :man1:`flux-uptime`, :man1:`flux-uri`, :man1:`flux-dump`,
:linux:man8:`systemd-tmpfiles`
:man5:`flux-config-kvs`,:linux:man8:`systemd-tmpfiles`
11 changes: 9 additions & 2 deletions doc/man5/flux-config-kvs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ checkpoint-period
primary namespace. The checkpoint is used to protect against data
loss in the event of a Flux broker crash.

gc-threshold
(optional) Sets the number of KVS commits (distinct root snapshots)
after which offline garbage collection is performed by
:man1:`flux-shutdown`. A value of 100000 may be a good starting
point. (Default: garbage collection must be manually requested with
`flux-shutdown --gc`).


EXAMPLE
=======
Expand All @@ -30,7 +37,7 @@ EXAMPLE

[kvs]
checkpoint-period = "30m"

gc-threshold = 100000

RESOURCES
=========
Expand All @@ -43,4 +50,4 @@ RFC 23: Flux Standard Duration: https://flux-framework.readthedocs.io/projects/f
SEE ALSO
========

:man5:`flux-config`
:man1:`flux-shutdown`,:man5:`flux-config`
87 changes: 86 additions & 1 deletion src/cmd/builtin/shutdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,88 @@
# include <config.h>
#endif
#include <unistd.h>
#include <jansson.h>
#include <flux/core.h>

#include "src/broker/state_machine.h"
#include "src/common/libkvs/kvs_checkpoint.h"
#include "src/common/libutil/uri.h"

#include "builtin.h"

static void get_kvs_version (flux_t *h, int *version)
{
(*version) = 0;
if (flux_kvs_get_version (h, NULL, version) < 0
&& errno != ENOSYS)
log_err_exit ("Error fetching KVS version");
}

static void get_gc_threshold (flux_t *h, int *gc_threshold)
{
flux_future_t *f;
json_t *o;
(*gc_threshold) = 0;
if (!(f = flux_rpc (h, "config.get", NULL, FLUX_NODEID_ANY, 0))
|| flux_rpc_get_unpack (f, "o", &o) < 0)
log_msg_exit ("Error fetching flux config: %s",
future_strerror (f, errno));
(void)json_unpack (o, "{s:{s:i}}", "kvs", "gc-threshold", gc_threshold);
}

int askyn (char *prompt, bool default_value, bool *result)
{
while (1) {
char buf[16];
printf ("%s [%s]? ", prompt, default_value ? "Y/n" : "y/N");
fflush (stdout);
if (fgets (buf, sizeof (buf), stdin) == NULL)
return -1;
if (buf[0] == '\n')
break;
if (buf[0] == 'y' || buf[0] == 'Y') {
(*result) = true;
return 0;
}
if (buf[0] == 'n' || buf[0] == 'N') {
(*result) = false;
return 0;
}
printf ("Please answer y or n\n");
};
(*result) = default_value;
return 0;
}

static bool gc_threshold_check (flux_t *h, optparse_t *p)
{
int gc_threshold, version;
bool rc = false;

get_kvs_version (h, &version);
get_gc_threshold (h, &gc_threshold);

if (gc_threshold > 0 && version > gc_threshold) {
if (optparse_hasopt (p, "yes") || optparse_hasopt (p, "no")) {
if (optparse_hasopt (p, "yes"))
rc = true;
else
rc = false;
return rc;
}

if (!isatty (STDIN_FILENO))
log_msg_exit ("gc threshold exceeded, specify -y or -n\n");

if (askyn ("gc threshold exceeded, "
"do you want to perform garbage collection",
true,
&rc) < 0)
log_msg_exit ("error retrieving user input");
}
return rc;
}

static void process_updates (flux_future_t *f)
{
const char *s;
Expand Down Expand Up @@ -68,7 +143,9 @@ static int subcmd (optparse_t *p, int ac, char *av[])
if (optparse_hasopt (p, "background"))
flags &= ~FLUX_RPC_STREAMING;

if (optparse_hasopt (p, "gc") || optparse_hasopt (p, "dump")) {
if (optparse_hasopt (p, "gc")
|| optparse_hasopt (p, "dump")
|| gc_threshold_check (h, p)) {
const char *val = optparse_get_str (p, "dump", "auto");

if (flux_attr_set (h, "content.dump", val) < 0)
Expand Down Expand Up @@ -116,6 +193,14 @@ static struct optparse_option opts[] = {
" 0=show log messages <= LOG_INFO level (default),"
" 1=show all log messages",
},
{ .name = "yes", .key = 'y', .has_arg = 0,
.usage = "If garbage collection threshold exceeded, "
"perform garbage collection",
},
{ .name = "no", .key = 'n', .has_arg = 0,
.usage = "If garbage collection threshold exceeded, "
"do not perform garbage collection",
},
OPTPARSE_TABLE_END
};

Expand Down

0 comments on commit e374b57

Please sign in to comment.