From 9e36bb48eeb79c9f1355e3ef039757fc97b6e519 Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen Date: Thu, 5 Sep 2024 12:24:35 -0700 Subject: [PATCH 1/6] Add updated sbatch script --- docs/source/resources/mriqc.sbatch | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 docs/source/resources/mriqc.sbatch diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch new file mode 100644 index 000000000..683b27092 --- /dev/null +++ b/docs/source/resources/mriqc.sbatch @@ -0,0 +1,57 @@ +#!/bin/bash + +#NOTE: These should work with Slurm HPC systems, + # but these specific paramters have only been tested on + # Stanford's Sherlock. Some parameters may need to be + # adjusted for other HPCs, specifically --partition. + +#SBATCH --job-name mriqc +#SBATCH --partition normal #TODO: update for your HPC + +#NOTE: The --array parameter allows multiple jobs to be launched at once, + # and is generally recommended to efficiently run several hundred jobs + # at once. + +# TODO: adjust the range for your dataset; 1-n%j where n is the number of + # participants and j is the maximum number of concurrent jobs you'd like + # to run. +#SBATCH --array=1-5 +#SBATCH --time=1:00:00 #NOTE: likely much longer than generally needed +#SBATCH --ntasks 1 +#SBATCH --cpus-per-task=16 +#SBATCH --mem-per-cpu=4G +# Outputs ---------------------------------- +#SBATCH --output log/%x-%A-%a.out +#SBATCH --error log/%x-%A-%a.err +#SBATCH --mail-user=%u@stanford.edu #TODO: sub @ for your own inst +#SBATCH --mail-type=ALL +# ------------------------------------------ + +unset PYTHONPATH + +MRIQC_VERSION="24.0.0" #TODO: update if using a different version +BIDS_DIR="ds002785" #TODO: replace with path to your dataset +OUTPUT_DIR="${BIDS_DIR}/derivatives/mriqc-${MRIQC_VERSION}" + +SINGULARITY_CMD="singularity run -e mriqc-${MRIQC_VERSION}.simg" + +#NOTE: The first clause in this line selects a row in participants.tsv using the system generated + #array index variable SLURM_ARRAY_TASK_ID. This is piped to grep to isolate the subject id. +subject=$( sed -n ${SLURM_ARRAY_TASK_ID}p ${BIDS_DIR}/participants.tsv | grep -oP "sub-[0-9]*" ) + +echo Subject $subject + +cmd="${SINGULARITY_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \ + --participant-label $subject \ + -w $PWD/work/ \ + --omp-nthreads 8 --mem 10 \ + --verbose-reports" + +echo Running task ${SLURM_ARRAY_TASK_ID} +echo Commandline: $cmd +eval $cmd +exitcode=$? +echo "sub-$subject ${SLURM_ARRAY_TASK_ID} $exitcode" \ + >> ${SLURM_ARRAY_JOB_ID}.tsv +echo Finished tasks ${SLURM_ARRAY_TASK_ID} with exit code $exitcode +exit $exitcode From a77ce270af274f9a8e9ead3261b45e27f9ff0341 Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen Date: Fri, 6 Sep 2024 10:22:51 -0700 Subject: [PATCH 2/6] update regex for alphanumerics --- docs/source/resources/mriqc.sbatch | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch index 683b27092..9327b00da 100644 --- a/docs/source/resources/mriqc.sbatch +++ b/docs/source/resources/mriqc.sbatch @@ -15,15 +15,16 @@ # TODO: adjust the range for your dataset; 1-n%j where n is the number of # participants and j is the maximum number of concurrent jobs you'd like # to run. -#SBATCH --array=1-5 -#SBATCH --time=1:00:00 #NOTE: likely much longer than generally needed + +#SBATCH --array=1-216%50 +#SBATCH --time=1:00:00 #NOTE: likely longer than generally needed #SBATCH --ntasks 1 #SBATCH --cpus-per-task=16 #SBATCH --mem-per-cpu=4G # Outputs ---------------------------------- #SBATCH --output log/%x-%A-%a.out #SBATCH --error log/%x-%A-%a.err -#SBATCH --mail-user=%u@stanford.edu #TODO: sub @ for your own inst +#SBATCH --mail-user=%u@stanford.edu #TODO: update for your email domain #SBATCH --mail-type=ALL # ------------------------------------------ @@ -35,9 +36,13 @@ OUTPUT_DIR="${BIDS_DIR}/derivatives/mriqc-${MRIQC_VERSION}" SINGULARITY_CMD="singularity run -e mriqc-${MRIQC_VERSION}.simg" -#NOTE: The first clause in this line selects a row in participants.tsv using the system generated - #array index variable SLURM_ARRAY_TASK_ID. This is piped to grep to isolate the subject id. -subject=$( sed -n ${SLURM_ARRAY_TASK_ID}p ${BIDS_DIR}/participants.tsv | grep -oP "sub-[0-9]*" ) +#NOTE: The first clause in this line selects a row in participants.tsv + # using the system generated array index variable SLURM_ARRAY_TASK_ID. + # This is piped to grep to isolate the subject id. The regex should + # work for most subject naming conventions, but may need to be modified. + +subject=$( sed -n ${SLURM_ARRAY_TASK_ID}p ${BIDS_DIR}/participants.tsv \ + | grep -oP "sub-[A-Za-z0-9_]*" ) echo Subject $subject From c359938776b9c6da138d15502abaed72097836e6 Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen <35019015+mckenziephagen@users.noreply.github.com> Date: Tue, 10 Sep 2024 10:32:53 -0700 Subject: [PATCH 3/6] Update mriqc.sbatch fix misspelling in comment for codespell --- docs/source/resources/mriqc.sbatch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch index 9327b00da..4a8ad105d 100644 --- a/docs/source/resources/mriqc.sbatch +++ b/docs/source/resources/mriqc.sbatch @@ -1,7 +1,7 @@ #!/bin/bash #NOTE: These should work with Slurm HPC systems, - # but these specific paramters have only been tested on + # but these specific parameters have only been tested on # Stanford's Sherlock. Some parameters may need to be # adjusted for other HPCs, specifically --partition. From 991c38558e0e0c040c8383b1bf22de2ff366f6b2 Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen <35019015+mckenziephagen@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:47:34 -0700 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: Oscar Esteban --- docs/source/resources/mriqc.sbatch | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch index 4a8ad105d..88beccc33 100644 --- a/docs/source/resources/mriqc.sbatch +++ b/docs/source/resources/mriqc.sbatch @@ -31,10 +31,10 @@ unset PYTHONPATH MRIQC_VERSION="24.0.0" #TODO: update if using a different version -BIDS_DIR="ds002785" #TODO: replace with path to your dataset +BIDS_DIR="${STUDY}/ds002785" # TODO: replace with path to your dataset OUTPUT_DIR="${BIDS_DIR}/derivatives/mriqc-${MRIQC_VERSION}" -SINGULARITY_CMD="singularity run -e mriqc-${MRIQC_VERSION}.simg" +SINGULARITY_CMD="apptainer run -e mriqc_${MRIQC_VERSION}.sif" #NOTE: The first clause in this line selects a row in participants.tsv # using the system generated array index variable SLURM_ARRAY_TASK_ID. @@ -49,8 +49,9 @@ echo Subject $subject cmd="${SINGULARITY_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \ --participant-label $subject \ -w $PWD/work/ \ - --omp-nthreads 8 --mem 10 \ - --verbose-reports" + --omp-nthreads 10 --nprocs 12 \ # For nodes with at least 32GB RAM + --verbose-reports \ + --bids-database-dir ${BIDS_DIR}/.bids-index/" echo Running task ${SLURM_ARRAY_TASK_ID} echo Commandline: $cmd From 2e6d03b0b7c591b8a83d4a962e207110ccc0e19c Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen Date: Mon, 14 Oct 2024 20:08:42 -0700 Subject: [PATCH 5/6] switch singularity to apptainer --- docs/source/resources/mriqc.sbatch | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) mode change 100644 => 100755 docs/source/resources/mriqc.sbatch diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch old mode 100644 new mode 100755 index 9327b00da..2a6e86b7f --- a/docs/source/resources/mriqc.sbatch +++ b/docs/source/resources/mriqc.sbatch @@ -1,21 +1,16 @@ #!/bin/bash - #NOTE: These should work with Slurm HPC systems, - # but these specific paramters have only been tested on + # but these specific parameters have only been tested on # Stanford's Sherlock. Some parameters may need to be # adjusted for other HPCs, specifically --partition. - #SBATCH --job-name mriqc #SBATCH --partition normal #TODO: update for your HPC - #NOTE: The --array parameter allows multiple jobs to be launched at once, # and is generally recommended to efficiently run several hundred jobs # at once. - # TODO: adjust the range for your dataset; 1-n%j where n is the number of # participants and j is the maximum number of concurrent jobs you'd like # to run. - #SBATCH --array=1-216%50 #SBATCH --time=1:00:00 #NOTE: likely longer than generally needed #SBATCH --ntasks 1 @@ -26,31 +21,36 @@ #SBATCH --error log/%x-%A-%a.err #SBATCH --mail-user=%u@stanford.edu #TODO: update for your email domain #SBATCH --mail-type=ALL + # ------------------------------------------ -unset PYTHONPATH +STUDY="/scratch/users/mphagen/mriqc-protocol" #TODO: replace with your path -MRIQC_VERSION="24.0.0" #TODO: update if using a different version -BIDS_DIR="ds002785" #TODO: replace with path to your dataset + +MRIQC_VERSION="24.0.2" #TODO: update if using a different version +BIDS_DIR="${STUDY}/ds002785" # TODO: replace with path to your dataset OUTPUT_DIR="${BIDS_DIR}/derivatives/mriqc-${MRIQC_VERSION}" -SINGULARITY_CMD="singularity run -e mriqc-${MRIQC_VERSION}.simg" +APPTAINER_CMD="apptainer run -e mriqc_${MRIQC_VERSION}.sif" -#NOTE: The first clause in this line selects a row in participants.tsv +# Offset subject index by 1 because of header in participants.tsv +subject_idx=$(( ${SLURM_ARRAY_TASK_ID} + 1 )) + +##NOTE: The first clause in this line selects a row in participants.tsv # using the system generated array index variable SLURM_ARRAY_TASK_ID. # This is piped to grep to isolate the subject id. The regex should - # work for most subject naming conventions, but may need to be modified. - -subject=$( sed -n ${SLURM_ARRAY_TASK_ID}p ${BIDS_DIR}/participants.tsv \ + # work for most subject naming conventions, but may need to be modified. + +subject=$( sed -n ${subject_idx}p ${BIDS_DIR}/participants.tsv \ | grep -oP "sub-[A-Za-z0-9_]*" ) echo Subject $subject -cmd="${SINGULARITY_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \ +cmd="${APPTAINER_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \ --participant-label $subject \ -w $PWD/work/ \ - --omp-nthreads 8 --mem 10 \ - --verbose-reports" + --omp-nthreads 10 --nprocs 12 \ + --verbose-reports" echo Running task ${SLURM_ARRAY_TASK_ID} echo Commandline: $cmd From 2c257fd2fd60a316afa5b7eb5d321ef1d591138c Mon Sep 17 00:00:00 2001 From: McKenzie Paige Hagen <35019015+mckenziephagen@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:25:21 -0700 Subject: [PATCH 6/6] Update docs/source/resources/mriqc.sbatch Co-authored-by: Oscar Esteban --- docs/source/resources/mriqc.sbatch | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch index 5f8322e25..c4776c9d0 100755 --- a/docs/source/resources/mriqc.sbatch +++ b/docs/source/resources/mriqc.sbatch @@ -49,9 +49,7 @@ echo Subject $subject cmd="${APPTAINER_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \ --participant-label $subject \ -w $PWD/work/ \ - --omp-nthreads 10 --nprocs 12 \ - # For nodes with at least 32GB RAM - --verbose-reports" + --omp-nthreads 10 --nprocs 12" # For nodes with at least 32GB RAM echo Running task ${SLURM_ARRAY_TASK_ID} echo Commandline: $cmd