From 61f6ba0164e5f8421aa8c61bc750c26ed4eaed8d Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Tue, 15 Feb 2022 09:50:55 -0800 Subject: [PATCH] kvs: add date to kvs-primary checkpoint Problem: It'd be convenient if we knew the date when the kvs primary checkpoint was checkpointed. Solution: When checkpointing the primary KVS, store a json object with both the rootref and timestamp, instead of just the rootref string. On retrieval, parse appropriately and retrieve timestamp for output in logs. Fixes #3580 --- src/modules/kvs/kvs.c | 81 ++++++++++++++++++++++++++++------ t/t2010-kvs-snapshot-restore.t | 11 +++++ 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index e79536c6eae5..bb6bce98f8cc 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -2711,10 +2711,15 @@ static void process_args (struct kvs_ctx *ctx, int ac, char **av) * Copy value to buf with '\0' termination. * Return 0 on success, -1 on failure, */ -static int checkpoint_get (flux_t *h, const char *key, char *buf, size_t len) +static int checkpoint_get (flux_t *h, const char *key, + char *buf, size_t len, + double *timestamp) { flux_future_t *f; - const char *value; + const char *value = NULL; + json_t *o = NULL; + const char *rootref; + int rv = -1; if (!(f = flux_rpc_pack (h, "kvs-checkpoint.get", @@ -2726,24 +2731,61 @@ static int checkpoint_get (flux_t *h, const char *key, char *buf, size_t len) return -1; if (flux_rpc_get_unpack (f, "{s:s}", "value", &value) < 0) goto error; - if (strlen (value) >= len) { + + if (!(o = json_loads (value, 0, NULL))) { errno = EINVAL; goto error; } - strcpy (buf, value); - flux_future_destroy (f); - return 0; + if (json_unpack (o, "{s:s s:f}", + "rootref", &rootref, + "timestamp", timestamp) < 0) { + errno = EINVAL; + goto error; + } + if (strlen (rootref) >= len) { + errno = EINVAL; + goto error; + } + strcpy (buf, rootref); + rv = 0; error: flux_future_destroy (f); - return -1; + json_decref (o); + return rv; +} + +static int get_timestamp_now (double *timestamp) +{ + struct timespec ts; + if (clock_gettime (CLOCK_REALTIME, &ts) < 0) + return -1; + *timestamp = (1E-9 * ts.tv_nsec) + ts.tv_sec; + return 0; } /* Synchronously store key-value pair to checkpoint service. * Returns 0 on success, -1 on failure. */ -static int checkpoint_put (flux_t *h, const char *key, const char *value) +static int checkpoint_put (flux_t *h, const char *key, const char *rootref) { - flux_future_t *f; + flux_future_t *f = NULL; + double timestamp; + json_t *o = NULL; + char *value = NULL; + int rv = -1; + + if (get_timestamp_now (×tamp) < 0) + return -1; + if (!(o = json_pack ("{s:s s:f}", + "rootref", rootref, + "timestamp", timestamp))) { + errno = ENOMEM; + goto error; + } + if (!(value = json_dumps (o, JSON_COMPACT))) { + errno = ENOMEM; + goto error; + } if (!(f = flux_rpc_pack (h, "kvs-checkpoint.put", @@ -2757,10 +2799,14 @@ static int checkpoint_put (flux_t *h, const char *key, const char *value) return -1; if (flux_rpc_get (f, NULL) < 0) { flux_future_destroy (f); - return -1; + goto error; } + rv = 0; +error: flux_future_destroy (f); - return 0; + json_decref (o); + free (value); + return rv; } /* Store initial root in local cache, and flush to content cache @@ -2850,13 +2896,22 @@ int mod_main (flux_t *h, int argc, char **argv) if (ctx->rank == 0) { struct kvsroot *root; char rootref[BLOBREF_MAX_STRING_SIZE]; + double timestamp; uint32_t owner = getuid (); /* Look for a checkpoint and use it if found. * Otherwise start the primary root namespace with an empty directory. */ - if (checkpoint_get (h, "kvs-primary", rootref, sizeof (rootref)) == 0) - flux_log (h, LOG_INFO, "restored kvs-primary from checkpoint"); + if (checkpoint_get (h, "kvs-primary", + rootref, sizeof (rootref), ×tamp) == 0) { + char datestr[128]; + time_t sec = timestamp; + struct tm tm; + gmtime_r (&sec, &tm); + strftime (datestr, sizeof (datestr), "%FT%T", &tm); + flux_log (h, LOG_INFO, + "restored kvs-primary from checkpoint on %s", datestr); + } else { if (store_initial_rootdir (ctx, rootref, sizeof (rootref)) < 0) { flux_log_error (h, "storing initial root object"); diff --git a/t/t2010-kvs-snapshot-restore.t b/t/t2010-kvs-snapshot-restore.t index 6f0959a32891..4e4503c9a3f7 100755 --- a/t/t2010-kvs-snapshot-restore.t +++ b/t/t2010-kvs-snapshot-restore.t @@ -59,4 +59,15 @@ test_expect_success 'content from previous instance survived' ' test_cmp get.exp get.out ' +test_expect_success 're-run instance, verify checkpoint date saved' ' + flux start -o,--setattr=content.backing-path=$(pwd)/content.sqlite \ + flux dmesg >dmesg.out +' + +# just check for todays date, not time for obvious reasons +test_expect_success 'verify date in flux logs' ' + today=`date --iso-8601` && + grep checkpoint dmesg.out | grep ${today} +' + test_done