From 756ce494f635297a7dacd71f699bd18c7e7212fa Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 24 Apr 2022 18:50:38 -0700 Subject: [PATCH] rc: manage dump/restore of backing store content Problem: we need a way to tell rc scripts to restore content on startup, and dump content on shutdown, for offline KVS garbage collection of a system instance or user checkpoint/restart. Add some logic to rc1 and rc3: rc1: If the content.restore broker attribute is set to a file path, then load the content backing store module with the 'truncate' option, and restore content from the file before loading the KVS. rc3: If the content.dump broker attribute is set to a file path, then dump content to the file after unloading the KVS. Additionally, if content.restore=auto, then rc1 looks for a symlink named RESTORE in the broker's current working directory or ${statedir} if defined. If the symlink exists, then restore content from the file it points to and remove the symlink on success. If content.dump=auto, then rc3 dumps content to an automatically generated file name containing the date in the current working directory or ${statedir} if defined, and creates the RESTORE symlink pointing to it. Use case #1 - system instance: The systemd unit file sets content.restore=auto. Normally, the system instance just reuses the backing store as now. But if content.dump=auto is set while the instance is running, a dump is created at shutdown, and the backing store is recreated from the dump when the instance starts again, accomplishing offline garbage collection. The flux-shuntdown(1) command may set content.dump based on an option or a "backing store needs GC" heuristic. Tying the dump logic to flux-shutdown(1) is helpful because then the shutdown can take longer than the systemd TimeoutStopSec (90s) without getting killed. Use case #2 - user checkpoint/restart: A user may choose to checkpoint an instance by running: flux setattr content.dump=restart.tgz and restart with flux start -o,-Scontent.restore=restart.tgz Presumably a flux-shutdown(1) option would just work here as well. --- etc/rc1 | 33 ++++++++++++++++++++++++++++----- etc/rc3 | 21 +++++++++++++++++++-- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/etc/rc1 b/etc/rc1 index 76b692fafa5b..5acd67884834 100755 --- a/etc/rc1 +++ b/etc/rc1 @@ -3,10 +3,6 @@ # Allow connector-local more time to start listening on socket RANK=$(FLUX_LOCAL_CONNECTOR_RETRY_COUNT=30 flux getattr rank) -if ! content_backing=$(flux getattr content.backing-module 2>/dev/null); then - content_backing=content-sqlite -fi - # Usage: modload {all|} modname [args ...] modload() { local where=$1; shift @@ -16,7 +12,34 @@ modload() { } modload all barrier -modload 0 ${content_backing} + +if test $RANK -eq 0; then + backingmod=$(flux getattr content.backing-module 2>/dev/null) || : + backingmod=${backingmod:-content-sqlite} + dumpfile=$(flux getattr content.restore 2>/dev/null) || : + if test -n "${dumpfile}"; then + if test "${dumpfile}" = "auto"; then + statedir=$(flux getattr statedir 2>/dev/null) || : + dumplink="${statedir:-.}/RESTORE" + if test -h "${dumplink}"; then + dumpfile=$(readlink -f ${dumplink}) || : + else + dumpfile="" + dumplink="" + fi + fi + fi + if test -n "${dumpfile}"; then + flux module load ${backingmod} truncate + echo "restoring content from ${dumpfile}" + flux restore --quiet --checkpoint ${dumpfile} + if test -n "${dumplink}"; then + rm -f ${dumplink} + fi + else + flux module load ${backingmod} + fi +fi modload all kvs modload all kvs-watch diff --git a/etc/rc3 b/etc/rc3 index af72f5bd607c..52fd78daac34 100755 --- a/etc/rc3 +++ b/etc/rc3 @@ -45,7 +45,24 @@ modrm all kvs flux content flush || exit_rc=1 -backingmod=$(flux getattr content.backing-module 2>/dev/null) -modrm 0 ${backingmod:-content-sqlite} +if test $RANK -eq 0; then + backingmod=$(flux getattr content.backing-module 2>/dev/null) + backingmod=${backingmod:-content-sqlite} + dumpfile=$(flux getattr content.dump 2>/dev/null) + if test $exit_rc -eq 0 -a -n "${dumpfile}"; then + if test "${dumpfile}" = "auto"; then + statedir=$(flux getattr statedir 2>/dev/null) + dumpfile="${statedir:-.}/dump-$(date +%Y%m%d_%H%M%S).tgz" + dumplink="${statedir:-.}/RESTORE" + fi + echo "dumping content to ${dumpfile}" + if flux dump --quiet --checkpoint ${dumpfile}; then + test -n "$dumplink" && ln -s $(basename ${dumpfile}) ${dumplink} + else + exit_rc=1 + fi + fi + flux module remove ${backingmod} || exit_rc=1 +fi exit $exit_rc