Skip to content

Commit

Permalink
Merge pull request #2783 from garlick/checkpoint_restart
Browse files Browse the repository at this point in the history
add KVS checkpoints
  • Loading branch information
mergify[bot] authored Mar 7, 2020
2 parents a360528 + 558d46f commit 02ef011
Show file tree
Hide file tree
Showing 10 changed files with 402 additions and 207 deletions.
19 changes: 7 additions & 12 deletions doc/man7/flux-broker-attributes.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,12 @@ the session. By default broker.rundir is set to "${rundir}/${rank}",
which guarantees a unique directory per rank. It is not advisable
to override this attribute on the command line. Use rundir instead.

persist-directory::
A persistent directory available for storage on rank 0 only.
If persist-directory is not defined, persistence is unavailable
and users should fall back to broker.rundir, with cleanup.

persist-filesystem::
If defined, and persist-directory is not defined, the rank
0 broker chooses a unique name for persist-directory within
persist-filesystem and creates it automatically.

content.backing-path::
The path to the content backing store file(s). If this is set on the
broker command line, the backing store uses this path instead of
a temporary one, and content is preserved on instance exit.
If file exists, its content is imported into the instance.
If it doesn't exist, it is created.

TOPOLOGY ATTRIBUTES
-------------------
Expand Down Expand Up @@ -115,8 +111,7 @@ to stderr on the logging rank, for capture by the enclosing instance.
log-filename::
(rank zero only) If set, session log entries, as filtered by log-forward-level,
are directed to this file. If unset, but persist-directory is set, log
entries are directed to persist-directory/log.
are directed to this file.
log-stderr-level::
(rank zero only) Session log entries at syslog(3) level at or below this
Expand Down
11 changes: 2 additions & 9 deletions etc/rc3
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ flux module remove -f job-exec
flux module remove -f job-manager
flux exec -r all flux module remove -f job-ingest

if PERSISTDIR=$(flux getattr persist-directory 2>/dev/null); then
/bin/true; # XXX: nothing to persist?
fi

flux module remove -f userdb

flux module remove -f cron
Expand All @@ -34,10 +30,7 @@ flux exec -r all flux module remove -f barrier
flux exec -r all flux module remove -f job-info
flux exec -r all flux module remove -f kvs-watch
flux exec -r all -x 0 flux module remove -f kvs
if test -n "$PERSISTDIR"; then
flux kvs getroot >${PERSISTDIR}/kvsroot.final
flux content flush
fi

flux module remove -f kvs
flux content flush
flux module remove -f content-sqlite

85 changes: 0 additions & 85 deletions src/broker/broker.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ static void runlevel_cb (runlevel_t *r, int level, int rc, double elapsed,
static void runlevel_io_cb (runlevel_t *r, const char *name,
const char *msg, void *arg);

static int create_persistdir (attr_t *attrs, uint32_t rank);
static int create_rundir (attr_t *attrs);
static int create_broker_rundir (overlay_t *ov, void *arg);
static int create_dummyattrs (flux_t *h, uint32_t rank, uint32_t size);
Expand Down Expand Up @@ -499,14 +498,6 @@ int main (int argc, char *argv[])
// Setup profiling
setup_profiling (argv[0], rank);

/* If persist-filesystem or persist-directory are set, initialize those,
* but only on rank 0.
*/
if (create_persistdir (ctx.attrs, rank) < 0) {
log_err ("create_persistdir");
goto cleanup;
}

/* Initialize logging.
* OK to call flux_log*() after this.
*/
Expand Down Expand Up @@ -1107,82 +1098,6 @@ static int create_broker_rundir (overlay_t *ov, void *arg)
return rv;
}

/* If 'persist-directory' set, validate it, make it immutable, done.
* If 'persist-filesystem' set, validate it, make it immutable, then:
* Avoid name collisions with other flux tmpdirs used in testing
* e.g. "flux-<pid>-XXXXXX"
*/
static int create_persistdir (attr_t *attrs, uint32_t rank)
{
struct stat sb;
const char *attr = "persist-directory";
const char *persist_dir, *persist_fs;
char *dir, *tmpl = NULL;
int rc = -1;

if (rank > 0) {
(void) attr_delete (attrs, "persist-filesystem", true);
(void) attr_delete (attrs, "persist-directory", true);
goto done_success;
}
if (attr_get (attrs, attr, &persist_dir, NULL) == 0) {
if (stat (persist_dir, &sb) < 0)
goto done;
if (!S_ISDIR (sb.st_mode)) {
errno = ENOTDIR;
goto done;
}
if ((sb.st_mode & S_IRWXU) != S_IRWXU) {
errno = EPERM;
goto done;
}
if (attr_set_flags (attrs, attr, FLUX_ATTRFLAG_IMMUTABLE) < 0)
goto done;
} else {
if (attr_get (attrs, "persist-filesystem", &persist_fs, NULL)< 0) {
goto done_success;
}
if (stat (persist_fs, &sb) < 0)
goto done;
if (!S_ISDIR (sb.st_mode)) {
errno = ENOTDIR;
goto done;
}
if ((sb.st_mode & S_IRWXU) != S_IRWXU) {
errno = EPERM;
goto done;
}
if (attr_set_flags (attrs, "persist-filesystem",
FLUX_ATTRFLAG_IMMUTABLE) < 0)
goto done;
if (asprintf (&tmpl,
"%s/fluxP-%d-XXXXXX",
persist_fs,
(int)getpid()) < 0)
goto done;
if (!(dir = mkdtemp (tmpl)))
goto done;
if (attr_add (attrs, attr, dir, FLUX_ATTRFLAG_IMMUTABLE) < 0)
goto done;
}
done_success:
if (attr_get (attrs, "persist-filesystem", NULL, NULL) < 0) {
if (attr_add (attrs, "persist-filesystem", NULL,
FLUX_ATTRFLAG_IMMUTABLE) < 0)
goto done;
}
if (attr_get (attrs, "persist-directory", NULL, NULL) < 0) {
if (attr_add (attrs, "persist-directory", NULL,
FLUX_ATTRFLAG_IMMUTABLE) < 0)
goto done;
}
rc = 0;
done:
if (tmpl)
free (tmpl);
return rc;
}

static bool nodeset_member (const char *s, uint32_t rank)
{
struct idset *ns = NULL;
Expand Down
14 changes: 0 additions & 14 deletions src/broker/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,26 +411,12 @@ static int attr_set_log (const char *name, const char *val, void *arg)

static int logbuf_register_attrs (logbuf_t *logbuf, attr_t *attrs)
{
char s[PATH_MAX];
const char *val;
int rc = -1;

/* log-filename
* Only allowed to be set on rank 0 (ignore initial value on rank > 0).
* If unset, and persist-directory is set, make it ${persist-directory}/log
*/
if (logbuf->rank == 0) {
if (attr_get (attrs, "log-filename", NULL, NULL) < 0
&& attr_get (attrs, "persist-directory", &val, NULL) == 0 && val) {
if (snprintf (s, sizeof (s), "%s/log", val) >= sizeof (s)) {
log_err ("log-filename truncated");
goto done;
}
if (attr_add (attrs, "log-filename", s, 0) < 0) {
log_err ("could not initialize log-filename");
goto done;
}
}
if (attr_add_active (attrs, "log-filename", 0,
attr_get_log, attr_set_log, logbuf) < 0)
goto done;
Expand Down
Loading

0 comments on commit 02ef011

Please sign in to comment.