Skip to content

Commit

Permalink
kvs: add date to kvs-primary checkpoint
Browse files Browse the repository at this point in the history
Problem: It'd be convenient if we knew the date when the kvs
primary checkpoint was checkpointed.

Solution: When checkpointing the primary KVS, store a json
object with both the rootref and timestamp, instead of just
the rootref string.  On retrieval, parse appropriately and
retrieve timestamp for output in logs.

Fixes flux-framework#3580
  • Loading branch information
chu11 committed Feb 15, 2022
1 parent db2c75e commit 80ebe02
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 13 deletions.
81 changes: 68 additions & 13 deletions src/modules/kvs/kvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2711,10 +2711,15 @@ static void process_args (struct kvs_ctx *ctx, int ac, char **av)
* Copy value to buf with '\0' termination.
* Return 0 on success, -1 on failure,
*/
static int checkpoint_get (flux_t *h, const char *key, char *buf, size_t len)
static int checkpoint_get (flux_t *h, const char *key,
char *buf, size_t len,
double *timestamp)
{
flux_future_t *f;
const char *value;
const char *value = NULL;
json_t *o = NULL;
const char *rootref;
int rv = -1;

if (!(f = flux_rpc_pack (h,
"kvs-checkpoint.get",
Expand All @@ -2726,24 +2731,61 @@ static int checkpoint_get (flux_t *h, const char *key, char *buf, size_t len)
return -1;
if (flux_rpc_get_unpack (f, "{s:s}", "value", &value) < 0)
goto error;
if (strlen (value) >= len) {

if (!(o = json_loads (value, 0, NULL))) {
errno = EINVAL;
goto error;
}
strcpy (buf, value);
flux_future_destroy (f);
return 0;
if (json_unpack (o, "{s:s s:f}",
"rootref", &rootref,
"timestamp", timestamp) < 0) {
errno = EINVAL;
goto error;
}
if (strlen (rootref) >= len) {
errno = EINVAL;
goto error;
}
strcpy (buf, rootref);
rv = 0;
error:
flux_future_destroy (f);
return -1;
json_decref (o);
return rv;
}

static int get_timestamp_now (double *timestamp)
{
struct timespec ts;
if (clock_gettime (CLOCK_REALTIME, &ts) < 0)
return -1;
*timestamp = (1E-9 * ts.tv_nsec) + ts.tv_sec;
return 0;
}

/* Synchronously store key-value pair to checkpoint service.
* Returns 0 on success, -1 on failure.
*/
static int checkpoint_put (flux_t *h, const char *key, const char *value)
static int checkpoint_put (flux_t *h, const char *key, const char *rootref)
{
flux_future_t *f;
flux_future_t *f = NULL;
double timestamp;
json_t *o = NULL;
char *value = NULL;
int rv = -1;

if (get_timestamp_now (&timestamp) < 0)
return -1;
if (!(o = json_pack ("{s:s s:f}",
"rootref", rootref,
"timestamp", timestamp))) {
errno = ENOMEM;
goto error;
}
if (!(value = json_dumps (o, JSON_COMPACT))) {
errno = ENOMEM;
goto error;
}

if (!(f = flux_rpc_pack (h,
"kvs-checkpoint.put",
Expand All @@ -2757,10 +2799,14 @@ static int checkpoint_put (flux_t *h, const char *key, const char *value)
return -1;
if (flux_rpc_get (f, NULL) < 0) {
flux_future_destroy (f);
return -1;
goto error;
}
rv = 0;
error:
flux_future_destroy (f);
return 0;
json_decref (o);
free (value);
return rv;
}

/* Store initial root in local cache, and flush to content cache
Expand Down Expand Up @@ -2850,13 +2896,22 @@ int mod_main (flux_t *h, int argc, char **argv)
if (ctx->rank == 0) {
struct kvsroot *root;
char rootref[BLOBREF_MAX_STRING_SIZE];
double timestamp;
uint32_t owner = getuid ();

/* Look for a checkpoint and use it if found.
* Otherwise start the primary root namespace with an empty directory.
*/
if (checkpoint_get (h, "kvs-primary", rootref, sizeof (rootref)) == 0)
flux_log (h, LOG_INFO, "restored kvs-primary from checkpoint");
if (checkpoint_get (h, "kvs-primary",
rootref, sizeof (rootref), &timestamp) == 0) {
char datestr[128];
time_t sec = timestamp;
struct tm tm;
gmtime_r (&sec, &tm);
strftime (datestr, sizeof (datestr), "%FT%T", &tm);
flux_log (h, LOG_INFO,
"restored kvs-primary from checkpoint on %s", datestr);
}
else {
if (store_initial_rootdir (ctx, rootref, sizeof (rootref)) < 0) {
flux_log_error (h, "storing initial root object");
Expand Down
11 changes: 11 additions & 0 deletions t/t2010-kvs-snapshot-restore.t
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,15 @@ test_expect_success 'content from previous instance survived' '
test_cmp get.exp get.out
'

test_expect_success 're-run instance, verify checkpoint date saved' '
flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \
flux dmesg >dmesg.out
'

# just check for todays date, not time for obvious reasons
test_expect_success 'verify date in flux logs' '
today=`date --iso-8601` &&
grep checkpoint dmesg.out | grep ${today}
'

test_done

0 comments on commit 80ebe02

Please sign in to comment.