From 877d21fa7406281761796397f99af1eb37b78491 Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Fri, 24 May 2024 15:13:53 +0200 Subject: [PATCH] create new config bih for use with the BIH HPC cluster Co-authored-by: Shuba Alampalli <47524561+shubavarshini@users.noreply.github.com> --- .github/workflows/main.yml | 1 + README.md | 1 + conf/bih.config | 30 +++++++++++++++++++++ docs/bih.md | 54 ++++++++++++++++++++++++++++++++++++++ nfcore_custom.config | 1 + 5 files changed, 87 insertions(+) create mode 100644 conf/bih.config create mode 100644 docs/bih.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 36a75f292..ed36be828 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,6 +44,7 @@ jobs: - "azurebatch" - "bi" - "bigpurple" + - "bih_hpc" - "binac" - "biohpc_gen" - "biowulf" diff --git a/README.md b/README.md index 1a02e998c..2bee57a93 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ Currently documentation is available for the following systems: - [AZUREBATCH](docs/azurebatch.md) - [BIGPURPLE](docs/bigpurple.md) - [BI](docs/bi.md) +- [BIH](docs/bih.md) - [BINAC](docs/binac.md) - [BIOHPC_GEN](docs/biohpc_gen.md) - [BIOWULF](docs/biowulf.md) diff --git a/conf/bih.config b/conf/bih.config new file mode 100644 index 000000000..62564a480 --- /dev/null +++ b/conf/bih.config @@ -0,0 +1,30 @@ +params { + config_profile_name = 'bih' + config_profile_description = 'Berlin Institute of Health HPC cluster profile provided by nf-core/configs.' + config_profile_contact = 'BIH-HPC IT Team ' + config_profile_url = 'https://www.hpc.bihealth.org/' + max_memory = 340.GB + max_cpus = 64 + max_time = 7.d +} + +process { + executor = 'slurm' + maxRetries = 2 + clusterOptions = "--export=ALL" +} + +apptainer { + enabled = true + autoMounts = true + cacheDir = "$params.scratch/apptainer_img_$USER" +} + +cleanup = true +workDir = "$params.scratch/work_$USER" + +profiles { + debug { + cleanup = false + } +} diff --git a/docs/bih.md b/docs/bih.md new file mode 100644 index 000000000..a22a096c7 --- /dev/null +++ b/docs/bih.md @@ -0,0 +1,54 @@ +# nf-core/configs: BIH HPC Configuration + +This configuration enables the use of nf-core pipelines on the [BIH HPC cluster at the Berlin Institute of Health operated by CUBI]([https://www.hpc.bihealth.org/]). +To use, run a pipeline with `-profile bih`. +This will download and launch the [`bih.config`](../conf/bih.config) which has been pre-configured with a setup suitable for the BIH HPC cluster. +It will use slurm as a scheduler for the compute cluster, defines max resources, and specifies cache locations for apptainer. +Pipeline specific parameters still need to be configured manually. + +### Install Nextflow and nf-core + +The latest version of Nextflow is not installed by default on the BIH HPC cluster. +You can install it via conda following the [official documentation](https://nf-co.re/docs/usage/getting_started/installation#bioconda-installation): + +``` +# Install Bioconda according to the documentation, notably setting up channels and disabling auto-activation of the base environment. +conda config --add channels defaults +conda config --add channels bioconda +conda config --add channels conda-forge +conda config --set auto_activate_base false + +# Create the environment env_nf, and install the tool nextflow as well as nf-core. +conda create --name env_nf nextflow nf-core +``` + +### Run Nextflow + +Here is an example of an sbatch script with the nf-core pipeline rnaseq ([read documentation here](https://nf-co.re/rnaseq/3.14.0)). +The user has to include a scratch path and the parameters of the pipeline. + +``` +# Initiating SLURM options +#!/bin/bash +#SBATCH --job-name=rnaseq_nf +#SBATCH --mem-per-cpu=10G +#SBATCH --ntasks=1 +#SBATCH -n 1 +#SBATCH --output=%x_%A_%a.log + +# Launch conda and nextflow/nf-core +source /etc/profile.d/conda.sh +conda activate env_nf + +# Nextflow run +nextflow run nf-core/rnaseq -r 3.14.0 -profile bih,test,apptainer \ +--scratch "" \ +--outdir "test_run_rnaseq" +``` + +All of the intermediate files required to run the pipeline will be stored in the `/work_$USER/` directory and the docker/apptainer images in the `/apptainer_imgs_$USER/`. +Therefore, we recommend the `--scratch` option to point to a user/group/project's scratch directory. +If the pipeline runs successfully, files in the work directory are deleted automatically. +If the pipeline exits with an error, the work directory is not deleted and pipeline execution can be continued with the `-resume` option. +Main output files created by the pipeline will be saved in the `--outdir` directory. + diff --git a/nfcore_custom.config b/nfcore_custom.config index d7a68868e..e96a0ab40 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -20,6 +20,7 @@ profiles { azurebatch { includeConfig "${params.custom_config_base}/conf/azurebatch.config" } bi { includeConfig "${params.custom_config_base}/conf/bi.config" } bigpurple { includeConfig "${params.custom_config_base}/conf/bigpurple.config" } + bih { includeConfig "${params.custom_config_base}/conf/bih.config" } binac { includeConfig "${params.custom_config_base}/conf/binac.config" } biohpc_gen { includeConfig "${params.custom_config_base}/conf/biohpc_gen.config" } biowulf { includeConfig "${params.custom_config_base}/conf/biowulf.config" }