Skip to content

Commit

Permalink
Safeguard suibase-daemon from being started without file lock.
Browse files Browse the repository at this point in the history
  • Loading branch information
mario4tier committed Jul 26, 2024
1 parent 007547c commit 3633722
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 20 deletions.
10 changes: 5 additions & 5 deletions rust/suibase/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion rust/suibase/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ members = ["crates/suibase-daemon",
[workspace.package]
# Bump 'version' for the daemon to self-restart after an update.
# (this is not the Suibase package version, it is specifically for the Rust crates).
version = "0.0.14"
version = "0.0.15"
edition = "2021"

[workspace.dependencies]
Expand Down
30 changes: 30 additions & 0 deletions rust/suibase/crates/suibase-daemon/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ impl Command {
pub async fn execute(self, globals: Globals) -> Result<(), anyhow::Error> {
match self {
Command::Run {} => {
// Verify that this process is the one running under file lock ~/tmp/.suibase-daemon.lock
// If not, exit with an exit code of 13 (which is detected as "do no restart" by Suibase run-daemon.sh)
// This is a safeguard against a user trying to start suibase-daemon without the proper locking.

// Get the pid of this process.
let my_pid = std::process::id();

// Call the bash script "~/suibase/scripts/verify-suibase-daemon-lock $my_pid"
let home_dir = home::home_dir().expect("Failed to get home directory");
let script_path = home_dir.join("suibase/scripts/common/verify-suibase-daemon-lock.sh");

// Call the bash script "~/suibase/scripts/verify-suibase-daemon-lock $my_pid"
// Returns OK when the process is the one running under file lock ~/tmp/.suibase-daemon.lock
let output = std::process::Command::new("/bin/bash")
.arg("-c")
.arg(format!("{} {}", script_path.display(), my_pid))
.output();

let mut force_exit = true;

if let Ok(output) = output {
if output.status.success() && output.stdout.starts_with(b"OK") {
force_exit = false;
}
}

if force_exit {
std::process::exit(13);
}

// Create mpsc channels (internal messaging between threads).
//
// The AdminController handles events about configuration changes
Expand Down
45 changes: 31 additions & 14 deletions scripts/common/run-daemon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ esac


force_stop_all_services() {
# This is called in rare cases.
#
# It will force stop all processes for localnet, but not touch
# This will force stop all processes for localnet, but not touch
# the "user_request" to preserve the user intent.
#
# It will create a "force_stop" key-value in each .state workdir
Expand All @@ -55,7 +53,6 @@ force_stop_all_services() {
# It is assumed that the daemon will delete the force_stop,
# and resume the services as configured by "user_request".


update_SUI_FAUCET_PROCESS_PID_var
if [ -n "$SUI_FAUCET_PROCESS_PID" ]; then
set_key_value "localnet" "force_stop" "true"
Expand All @@ -67,6 +64,22 @@ force_stop_all_services() {
set_key_value "localnet" "force_stop" "true"
stop_sui_process
fi

# Wait until all process confirm stopped (or timeout).
count=0
while [ $count -lt 10 ]; do
if is_suibase_daemon_running; then
# Already running? then do nothing.
exit 0
fi
update_SUI_FAUCET_PROCESS_PID_var
update_SUI_PROCESS_PID_var
if [ -z "$SUI_FAUCET_PROCESS_PID" ] && [ -z "$SUI_PROCESS_PID" ]; then
break
fi
sleep 1
count=$((count + 1))
done
}
export -f force_stop_all_services

Expand Down Expand Up @@ -132,33 +145,37 @@ main() {
# The suibase-daemon is responsible to properly re-start the child processes/services.
if [ "$PARAM_NAME" = "suibase" ]; then
if [ -f "$_LOCKFILE" ]; then
local _BLOCKED=true
for i in 1 2 3; do
if is_suibase_daemon_running; then
_BLOCKED=false
break
# Already running? then do nothing.
exit 0
fi

if ! [ -f "$_LOCKFILE" ]; then
_BLOCKED=false
break
fi

sleep 1
done

if [ $_BLOCKED = true ]; then
# This is to recover when the lockfile exists, but the suibase-daemon
# is NOT running (e.g. was killed). Killing only the daemon is problematic
# when its child process are left running. This is why all potential
# child services are stopped here.
force_stop_all_services
fi
done
fi
fi

# shellcheck disable=SC2086,SC2016
try_locked_command "$_LOCKFILE" /bin/sh -uec 'while true; do "$@" > $0 2>&1; echo "Restarting process" > $0 2>&1; sleep 1; done' "$_LOG" $_CMD_LINE
try_locked_command "$_LOCKFILE" /bin/sh -uec '
while true; do
"$@" > $0 2>&1
exit_status=$?
if [ $exit_status -eq 13 ]; then
echo "Process exited with status 13. Exiting loop." > $0 2>&1
break
fi
echo "Restarting process" > $0 2>&1
sleep 1
done' "$_LOG" $_CMD_LINE
fi

}
Expand Down
40 changes: 40 additions & 0 deletions scripts/common/verify-suibase-daemon-lock.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Source '__globals.sh'.
SUIBASE_DIR="$HOME/suibase"
SCRIPT_COMMON_CALLER="$(readlink -f "$0")"
WORKDIR="none"

# Validate that the parameter is a number
if [ -z "$1" ]; then
echo "ERROR: Missing PID to check"
exit 1
fi

if ! [[ "$1" =~ ^[0-9]+$ ]]; then
echo "ERROR: Invalid PID: $1"
exit 1
fi

# shellcheck source=SCRIPTDIR/__globals.sh
source "$SUIBASE_DIR/scripts/common/__globals.sh" "$SCRIPT_COMMON_CALLER" "$WORKDIR"
trap cleanup EXIT

# shellcheck source=SCRIPTDIR/__suibase-daemon.sh
source "$SUIBASE_DIR/scripts/common/__suibase-daemon.sh"

# Get the PID of suibase (under the lock file)
update_SUIBASE_DAEMON_PID_var

if [ -z "$SUIBASE_DAEMON_PID" ]; then
echo "ERROR: suibase daemon not running under lock"
exit 1
fi

if [ "$1" != "$SUIBASE_DAEMON_PID" ]; then
echo "ERROR: PID $1 is not the suibase-daemon PID under proper lock ($SUIBASE_DAEMON_PID)"
exit 1
fi

echo "OK"
exit 0

0 comments on commit 3633722

Please sign in to comment.