Skip to content

Commit

Permalink
Merge pull request #4303 from garlick/content_truncate
Browse files Browse the repository at this point in the history
flux-shutdown: add --gc garbage collection option
  • Loading branch information
mergify[bot] authored May 2, 2022
2 parents f8473ee + 2ae2799 commit d9c64e7
Show file tree
Hide file tree
Showing 16 changed files with 337 additions and 21 deletions.
5 changes: 5 additions & 0 deletions etc/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
systemdsystemunit_DATA = flux.service
#endif

tmpfilesdir = $(prefix)/lib/tmpfiles.d

tmpfiles_DATA = flux.conf

dist_fluxrc_SCRIPTS = \
rc1 \
rc3
Expand Down Expand Up @@ -43,6 +47,7 @@ noinst_SCRIPTS = \

EXTRA_DIST = \
gen-cmdhelp.py \
flux.conf \
$(noinst_SCRIPTS)

completions/flux: $(srcdir)/completions/flux.pre
Expand Down
4 changes: 4 additions & 0 deletions etc/flux.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# See tmpfiles.d(5)
# remove Flux dump files older than 30 days

e /var/lib/flux/dump - - - 30d
1 change: 1 addition & 0 deletions etc/flux.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ExecStart=/bin/bash -c '\
-Sbroker.quorum=0 \
-Sbroker.quorum-timeout=none \
-Sbroker.exit-norestart=42 \
-Scontent.restore=auto \
'
SyslogIdentifier=flux
ExecReload=@X_BINDIR@/flux config reload
Expand Down
33 changes: 28 additions & 5 deletions etc/rc1
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
# Allow connector-local more time to start listening on socket
RANK=$(FLUX_LOCAL_CONNECTOR_RETRY_COUNT=30 flux getattr rank)

if ! content_backing=$(flux getattr content.backing-module 2>/dev/null); then
content_backing=content-sqlite
fi

# Usage: modload {all|<rank>} modname [args ...]
modload() {
local where=$1; shift
Expand All @@ -16,7 +12,34 @@ modload() {
}

modload all barrier
modload 0 ${content_backing}

if test $RANK -eq 0; then
backingmod=$(flux getattr content.backing-module 2>/dev/null) || :
backingmod=${backingmod:-content-sqlite}
dumpfile=$(flux getattr content.restore 2>/dev/null) || :
if test -n "${dumpfile}"; then
if test "${dumpfile}" = "auto"; then
statedir=$(flux getattr statedir 2>/dev/null) || :
dumplink="${statedir:-.}/dump/RESTORE"
if test -h "${dumplink}"; then
dumpfile=$(readlink -f ${dumplink}) || :
else
dumpfile=""
dumplink=""
fi
fi
fi
if test -n "${dumpfile}"; then
flux module load ${backingmod} truncate
echo "restoring content from ${dumpfile}"
flux restore --quiet --checkpoint ${dumpfile}
if test -n "${dumplink}"; then
rm -f ${dumplink}
fi
else
flux module load ${backingmod}
fi
fi

modload all kvs
modload all kvs-watch
Expand Down
22 changes: 20 additions & 2 deletions etc/rc3
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,25 @@ modrm all kvs

flux content flush || exit_rc=1

backingmod=$(flux getattr content.backing-module 2>/dev/null)
modrm 0 ${backingmod:-content-sqlite}
if test $RANK -eq 0; then
backingmod=$(flux getattr content.backing-module 2>/dev/null)
backingmod=${backingmod:-content-sqlite}
dumpfile=$(flux getattr content.dump 2>/dev/null)
if test $exit_rc -eq 0 -a -n "${dumpfile}"; then
if test "${dumpfile}" = "auto"; then
statedir=$(flux getattr statedir 2>/dev/null)
mkdir -p "${statedir:-.}/dump"
dumpfile="${statedir:-.}/dump/$(date +%Y%m%d_%H%M%S).tgz"
dumplink="${statedir:-.}/dump/RESTORE"
fi
echo "dumping content to ${dumpfile}"
if flux dump --quiet --checkpoint ${dumpfile}; then
test -n "$dumplink" && ln -s $(basename ${dumpfile}) ${dumplink}
else
exit_rc=1
fi
fi
flux module remove ${backingmod} || exit_rc=1
fi

exit $exit_rc
15 changes: 15 additions & 0 deletions src/cmd/builtin/shutdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ static int subcmd (optparse_t *p, int ac, char *av[])
if (optparse_hasopt (p, "background"))
flags &= ~FLUX_RPC_STREAMING;

if (optparse_hasopt (p, "gc") || optparse_hasopt (p, "dump")) {
const char *val = optparse_get_str (p, "dump", "auto");

if (flux_attr_set (h, "content.dump", val) < 0)
log_err_exit ("error setting content.dump attribute");

log_msg ("shutdown will dump KVS (this may take some time)");
}

/* N.B. set nodeid=FLUX_NODEID_ANY so we get immediate error from
* broker if run on rank > 0.
*/
Expand All @@ -90,6 +99,12 @@ static int subcmd (optparse_t *p, int ac, char *av[])
}

static struct optparse_option opts[] = {
{ .name = "gc", .has_arg = 0,
.usage = "Garbage collect KVS (short for --dump=auto)",
},
{ .name = "dump", .has_arg = 1, .arginfo = "PATH",
.usage = "Dump KVS content to specified archive file using flux-dump(1)."
},
{ .name = "background", .has_arg = 0,
.usage = "Exit the command immediately after initiating shutdown",
},
Expand Down
60 changes: 54 additions & 6 deletions src/modules/content-files/content-files.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@

#include "src/common/libutil/blobref.h"
#include "src/common/libutil/log.h"
#include "src/common/libutil/dirwalk.h"
#include "src/common/libutil/unlink_recursive.h"

#include "src/common/libcontent/content-util.h"

Expand All @@ -69,6 +71,43 @@ struct content_files {
int hash_size;
};

static int file_count_cb (dirwalk_t *d, void *arg)
{
int *count = arg;

if (!dirwalk_isdir (d))
(*count)++;
return 0;
}

static int get_object_count (const char *path)
{
int count = 0;
if (dirwalk (path, 0, file_count_cb, &count) < 0)
return -1;
return count;
}

static void stats_get_cb (flux_t *h,
flux_msg_handler_t *mh,
const flux_msg_t *msg,
void *arg)
{
struct content_files *ctx = arg;
int count;

if ((count = get_object_count (ctx->dbpath)) < 0)
goto error;

if (flux_respond_pack (h, msg, "{s:i}", "object_count", count) < 0)
flux_log_error (h, "error responding to stats.get request");
return;
error:
if (flux_respond_error (h, msg, errno, NULL) < 0)
flux_log_error (h, "error responding to stats.get request");
}


/* Handle a content-backing.load request from the rank 0 broker's
* content-cache service. The raw request payload is a hash digest.
* The raw response payload is the blob content.
Expand Down Expand Up @@ -259,12 +298,13 @@ static const struct flux_msg_handler_spec htab[] = {
{ FLUX_MSGTYPE_REQUEST, "content-backing.store", store_cb, 0 },
{ FLUX_MSGTYPE_REQUEST, "kvs-checkpoint.get", checkpoint_get_cb, 0 },
{ FLUX_MSGTYPE_REQUEST, "kvs-checkpoint.put", checkpoint_put_cb, 0 },
{ FLUX_MSGTYPE_REQUEST, "content-files.stats.get", stats_get_cb, 0 },
FLUX_MSGHANDLER_TABLE_END,
};

/* Create module context and perform some initialization.
*/
static struct content_files *content_files_create (flux_t *h)
static struct content_files *content_files_create (flux_t *h, bool truncate)
{
struct content_files *ctx;
const char *dbdir;
Expand Down Expand Up @@ -295,6 +335,8 @@ static struct content_files *content_files_create (flux_t *h)
}
if (asprintf (&ctx->dbpath, "%s/content.files", dbdir) < 0)
goto error;
if (truncate)
(void)unlink_recursive (ctx->dbpath);
if (mkdir (ctx->dbpath, 0700) < 0 && errno != EEXIST) {
flux_log_error (h, "could not create %s", ctx->dbpath);
goto error;
Expand All @@ -307,15 +349,20 @@ static struct content_files *content_files_create (flux_t *h)
return NULL;
}

static int parse_args (flux_t *h, int argc, char **argv, bool *testing)
static int parse_args (flux_t *h,
int argc,
char **argv,
bool *testing,
bool *truncate)
{
int i;
for (i = 0; i < argc; i++) {
if (!strcmp (argv[i], "testing"))
*testing = true;
else if (!strcmp (argv[i], "truncate"))
*truncate = true;
else {
errno = EINVAL;
flux_log_error (h, "%s", argv[i]);
flux_log (h, LOG_ERR, "Unknown module option: %s", argv[i]);
return -1;
}
}
Expand All @@ -338,11 +385,12 @@ int mod_main (flux_t *h, int argc, char **argv)
{
struct content_files *ctx;
bool testing = false;
bool truncate = false;
int rc = -1;

if (parse_args (h, argc, argv, &testing) < 0)
if (parse_args (h, argc, argv, &testing, &truncate) < 0)
return -1;
if (!(ctx = content_files_create (h))) {
if (!(ctx = content_files_create (h, truncate))) {
flux_log_error (h, "content_files_create failed");
return -1;
}
Expand Down
20 changes: 20 additions & 0 deletions src/modules/content-s3/content-s3.c
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,31 @@ static struct content_s3 *content_s3_create (flux_t *h)
return NULL;
}

static int parse_args (flux_t *h, int argc, char **argv)
{
for (int i = 0; i < argc; i++) {
if (!strcmp (argv[i], "truncate")) {
flux_log (h,
LOG_ERR,
"truncate is not implemented. Use S3 console"
" or other external mechanism to empty bucket.");
return -1;
}
else {
flux_log (h, LOG_ERR, "Unknown module option: %s", argv[i]);
return -1;
}
}
return 0;
}

int mod_main (flux_t *h, int argc, char **argv)
{
struct content_s3 *ctx;
int rc = -1;

if (parse_args (h, argc, argv) < 0)
return -1;
if (!(ctx = content_s3_create (h))) {
flux_log_error (h, "content_s3_create failed");
return -1;
Expand Down
22 changes: 17 additions & 5 deletions src/modules/content-sqlite/content-sqlite.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct content_sqlite {
struct content_stats stats;
const char *journal_mode;
const char *synchronous;
bool truncate;
};

static void log_sqlite_error (struct content_sqlite *ctx, const char *fmt, ...)
Expand Down Expand Up @@ -600,12 +601,15 @@ void stats_get_cb (flux_t *h,

/* Open the database file ctx->dbfile and set up the database.
*/
static int content_sqlite_opendb (struct content_sqlite *ctx)
static int content_sqlite_opendb (struct content_sqlite *ctx, bool truncate)
{
int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE;
char s[128];
int count;

if (truncate)
(void)unlink (ctx->dbfile);

if (sqlite3_open_v2 (ctx->dbfile, &ctx->db, flags, NULL) != SQLITE_OK) {
log_sqlite_error (ctx, "opening %s", ctx->dbfile);
goto error;
Expand Down Expand Up @@ -786,7 +790,10 @@ static struct content_sqlite *content_sqlite_create (flux_t *h)
return NULL;
}

static int process_args (struct content_sqlite *ctx, int argc, char **argv)
static int process_args (struct content_sqlite *ctx,
int argc,
char **argv,
bool *truncate)
{
int i;
for (i = 0; i < argc; i++) {
Expand All @@ -796,8 +803,11 @@ static int process_args (struct content_sqlite *ctx, int argc, char **argv)
else if (strncmp ("synchronous=", argv[i], 12) == 0) {
ctx->synchronous = argv[i] + 12;
}
else if (strcmp ("truncate", argv[i]) == 0) {
*truncate = true;
}
else {
flux_log_error (ctx->h, "Unknown module option: '%s'", argv[i]);
flux_log (ctx->h, LOG_ERR, "Unknown module option: '%s'", argv[i]);
return -1;
}
}
Expand All @@ -807,15 +817,17 @@ static int process_args (struct content_sqlite *ctx, int argc, char **argv)
int mod_main (flux_t *h, int argc, char **argv)
{
struct content_sqlite *ctx;
bool truncate = false;
int rc = -1;

if (!(ctx = content_sqlite_create (h))) {
flux_log_error (h, "content_sqlite_create failed");
return -1;
}
if (process_args (ctx, argc, argv) < 0) // override pragmas set above
// override pragmas set above
if (process_args (ctx, argc, argv, &truncate) < 0)
goto done;
if (content_sqlite_opendb (ctx) < 0)
if (content_sqlite_opendb (ctx, truncate) < 0)
goto done;
if (content_register_backing_store (h, "content-sqlite") < 0)
goto done;
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ TESTSCRIPTS = \
t2807-dump-cmd.t \
t2808-shutdown-cmd.t \
t2809-job-purge.t \
t2810-kvs-garbage-collect.t \
t2900-job-timelimits.t \
t3000-mpi-basic.t \
t3001-mpi-personalities.t \
Expand Down
2 changes: 2 additions & 0 deletions t/sharness.d/flux-sharness.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ make_bootstrap_config() {
local full="0-$(($size-1))"

mkdir $workdir/conf.d
mkdir $workdir/state
flux keygen $workdir/cert
cat >$workdir/conf.d/bootstrap.toml <<-EOT
[bootstrap]
Expand All @@ -121,6 +122,7 @@ make_bootstrap_config() {
echo "--test-start-mode=${TEST_UNDER_FLUX_START_MODE:-all}"
echo "-o,-Stbon.fanout=${TEST_UNDER_FLUX_FANOUT:-$size}"
echo "-o,-Stbon.zmqdebug=1"
echo "-o,-Sstatedir=$workdir/state"
}

#
Expand Down
8 changes: 5 additions & 3 deletions t/t0012-content-sqlite.t
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,13 @@ test_expect_success 'remove read permission from content.sqlite file' '
chmod u-w $(flux getattr rundir)/content.sqlite &&
test_must_fail flux module load content-sqlite
'
test_expect_success 'restore read permission on content.sqlite file' '
chmod u+w $(flux getattr rundir)/content.sqlite
'

# Clean slate for a few more tests
test_expect_success 'remove content.sqlite file' '
rm $(flux getattr rundir)/content.sqlite &&
flux module load content-sqlite
test_expect_success 'load content-sqlite with truncate option' '
flux module load content-sqlite truncate
'
test_expect_success 'content-sqlite and content-cache are empty' '
test $(flux module stats \
Expand Down
Loading

0 comments on commit d9c64e7

Please sign in to comment.