From 877d21fa7406281761796397f99af1eb37b78491 Mon Sep 17 00:00:00 2001
From: Thomas Sell <thomas.sell@bih-charite.de>
Date: Fri, 24 May 2024 15:13:53 +0200
Subject: [PATCH] create new config bih for use with the BIH HPC cluster

Co-authored-by: Shuba Alampalli <47524561+shubavarshini@users.noreply.github.com>
---
 .github/workflows/main.yml |  1 +
 README.md                  |  1 +
 conf/bih.config            | 30 +++++++++++++++++++++
 docs/bih.md                | 54 ++++++++++++++++++++++++++++++++++++++
 nfcore_custom.config       |  1 +
 5 files changed, 87 insertions(+)
 create mode 100644 conf/bih.config
 create mode 100644 docs/bih.md

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 36a75f292..ed36be828 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -44,6 +44,7 @@ jobs:
           - "azurebatch"
           - "bi"
           - "bigpurple"
+          - "bih_hpc"
           - "binac"
           - "biohpc_gen"
           - "biowulf"
diff --git a/README.md b/README.md
index 1a02e998c..2bee57a93 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,7 @@ Currently documentation is available for the following systems:
 - [AZUREBATCH](docs/azurebatch.md)
 - [BIGPURPLE](docs/bigpurple.md)
 - [BI](docs/bi.md)
+- [BIH](docs/bih.md)
 - [BINAC](docs/binac.md)
 - [BIOHPC_GEN](docs/biohpc_gen.md)
 - [BIOWULF](docs/biowulf.md)
diff --git a/conf/bih.config b/conf/bih.config
new file mode 100644
index 000000000..62564a480
--- /dev/null
+++ b/conf/bih.config
@@ -0,0 +1,30 @@
+params {
+	config_profile_name = 'bih'
+	config_profile_description = 'Berlin Institute of Health HPC cluster profile provided by nf-core/configs.'
+	config_profile_contact = 'BIH-HPC IT Team <hpc-helpdesk@bih-charite.de>'
+	config_profile_url = 'https://www.hpc.bihealth.org/'
+	max_memory = 340.GB
+	max_cpus = 64
+	max_time = 7.d
+}
+
+process {
+	executor = 'slurm'
+	maxRetries = 2
+	clusterOptions = "--export=ALL"
+}
+
+apptainer {
+    enabled = true
+    autoMounts = true
+    cacheDir = "$params.scratch/apptainer_img_$USER"
+}
+
+cleanup = true
+workDir = "$params.scratch/work_$USER"
+
+profiles {
+	debug {
+		cleanup = false
+	}
+}
diff --git a/docs/bih.md b/docs/bih.md
new file mode 100644
index 000000000..a22a096c7
--- /dev/null
+++ b/docs/bih.md
@@ -0,0 +1,54 @@
+# nf-core/configs: BIH HPC Configuration
+
+This configuration enables the use of nf-core pipelines on the [BIH HPC cluster at the Berlin Institute of Health operated by CUBI]([https://www.hpc.bihealth.org/]).
+To use, run a pipeline with `-profile bih`.
+This will download and launch the [`bih.config`](../conf/bih.config) which has been pre-configured with a setup suitable for the BIH HPC cluster.
+It will use slurm as a scheduler for the compute cluster, defines max resources, and specifies cache locations for apptainer.
+Pipeline specific parameters still need to be configured manually.
+
+### Install Nextflow and nf-core
+
+The latest version of Nextflow is not installed by default on the BIH HPC cluster.
+You can install it via conda following the [official documentation](https://nf-co.re/docs/usage/getting_started/installation#bioconda-installation):
+
+```
+# Install Bioconda according to the documentation, notably setting up channels and disabling auto-activation of the base environment.
+conda config --add channels defaults
+conda config --add channels bioconda
+conda config --add channels conda-forge
+conda config --set auto_activate_base false
+
+# Create the environment env_nf, and install the tool nextflow as well as nf-core.
+conda create --name env_nf nextflow nf-core
+```
+
+### Run Nextflow
+
+Here is an example of an sbatch script with the nf-core pipeline rnaseq ([read documentation here](https://nf-co.re/rnaseq/3.14.0)).
+The user has to include a scratch path and the parameters of the pipeline.
+
+```
+# Initiating SLURM options
+#!/bin/bash
+#SBATCH --job-name=rnaseq_nf
+#SBATCH --mem-per-cpu=10G
+#SBATCH --ntasks=1
+#SBATCH -n 1
+#SBATCH --output=%x_%A_%a.log
+
+# Launch conda and nextflow/nf-core
+source <path to your conda installation>/etc/profile.d/conda.sh
+conda activate env_nf
+
+# Nextflow run
+nextflow run nf-core/rnaseq -r 3.14.0 -profile bih,test,apptainer \
+--scratch "<path to your scratch folder>" \
+--outdir "test_run_rnaseq"
+```
+
+All of the intermediate files required to run the pipeline will be stored in the `<path to your scratch folder>/work_$USER/` directory and the docker/apptainer images in the `<path to your scratch folder>/apptainer_imgs_$USER/`.
+Therefore, we recommend the `--scratch` option to point to a user/group/project's scratch directory.
+If the pipeline runs successfully, files in the work directory are deleted automatically.
+If the pipeline exits with an error, the work directory is not deleted and pipeline execution can be continued with the `-resume` option.
+Main output files created by the pipeline will be saved in the `--outdir` directory.
+
diff --git a/nfcore_custom.config b/nfcore_custom.config
index d7a68868e..e96a0ab40 100644
--- a/nfcore_custom.config
+++ b/nfcore_custom.config
@@ -20,6 +20,7 @@ profiles {
     azurebatch         { includeConfig "${params.custom_config_base}/conf/azurebatch.config" }
     bi                 { includeConfig "${params.custom_config_base}/conf/bi.config" }
     bigpurple          { includeConfig "${params.custom_config_base}/conf/bigpurple.config" }
+    bih                { includeConfig "${params.custom_config_base}/conf/bih.config" }
     binac              { includeConfig "${params.custom_config_base}/conf/binac.config" }
     biohpc_gen         { includeConfig "${params.custom_config_base}/conf/biohpc_gen.config" }
     biowulf            { includeConfig "${params.custom_config_base}/conf/biowulf.config" }