-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
review Snakemake restart process/rule #86
Comments
We can leverage snakemake's built-in
first iteration of the revised script #!/bin/bash
# Exit on error, but allow for proper cleanup
set -e
# Configuration
APSIM_JOBS=100
WORKFLOW_STATE_FILE=".workflow_state"
MAX_RETRIES=3
# Function to log messages with timestamps
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
# Function to run a Snakefile with retry logic
run_snakefile() {
local snakefile=$1
local jobs=$2
local retry_count=0
local success=false
while [ $retry_count -lt $MAX_RETRIES ] && [ "$success" = false ]; do
if [ $retry_count -gt 0 ]; then
log_message "Retrying $snakefile (Attempt $((retry_count + 1)) of $MAX_RETRIES)"
else
log_message "Running $snakefile"
fi
if snakemake -s "$snakefile" --profile slurm --jobs "$jobs" --rerun-incomplete; then
success=true
log_message "$snakefile completed successfully"
echo "$snakefile:COMPLETED" >> "$WORKFLOW_STATE_FILE"
else
((retry_count++))
if [ $retry_count -lt $MAX_RETRIES ]; then
log_message "Failed to run $snakefile. Waiting 60 seconds before retry..."
sleep 60
else
log_message "Failed to run $snakefile after $MAX_RETRIES attempts"
return 1
fi
fi
done
}
# Function to perform cleanup
cleanup() {
log_message "Checking cleanup conditions..."
if [ -z "$(ls -A FAILED_CONFIG 2>/dev/null)" ] && [ $(ls -1 FAILED_DB 2>/dev/null | wc -l) -le 1 ]; then
log_message "FAILED_CONFIG is empty and FAILED_DB has at most one file. Cleaning up files..."
find . -maxdepth 1 -type f \( -name "*.processed" -o -name "*.apsimx" -o -name "*.txt" -o -name "*.met" \) -delete
rm -f txt_files_processed db_files_sorted "$WORKFLOW_STATE_FILE"
log_message "Cleanup completed"
else
log_message "FAILED_CONFIG is not empty or FAILED_DB has more than one file. Skipping cleanup."
fi
}
# Function to check workflow state
check_workflow_state() {
local snakefile=$1
if [ -f "$WORKFLOW_STATE_FILE" ] && grep -q "^$snakefile:COMPLETED$" "$WORKFLOW_STATE_FILE"; then
return 0
fi
return 1
}
# Main workflow
main() {
# Create or clear workflow state file
[ ! -f "$WORKFLOW_STATE_FILE" ] || check_workflow_state "Snakefile_2" || > "$WORKFLOW_STATE_FILE"
# Process Snakefile_1 if not already completed
if ! check_workflow_state "Snakefile_1"; then
if ! run_snakefile "Snakefile_1" 1; then
log_message "Error: Text file processing failed"
exit 1
fi
else
log_message "Skipping Snakefile_1 (already completed)"
fi
# Process Snakefile_2 if not already completed
if ! check_workflow_state "Snakefile_2"; then
if ! run_snakefile "Snakefile_2" "$APSIM_JOBS"; then
log_message "Error: APSIM file processing failed"
exit 1
fi
else
log_message "Skipping Snakefile_2 (already completed)"
fi
log_message "All processing completed successfully"
cleanup
}
# Trap cleanup for interrupts
trap 'log_message "Workflow interrupted"; exit 1' INT TERM
# Run main workflow
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No description provided.
The text was updated successfully, but these errors were encountered: