nf-core · FriederikeHanssen · Dec 20, 2021 · Dec 14, 2021 · Dec 16, 2021 · Dec 16, 2021
@@ -36,7 +36,7 @@ body:
     attributes:
       label: Relevant files
       description: |
-        Please upload (drag and drop) and relevant files. Make into a `.zip` file if the extension is not allowed.
+        Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed.
         Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files.
 
   - type: textarea

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
@@ -14,14 +14,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Launch workflow via tower
-        uses: nf-core/tower-action@master
+        uses: nf-core/tower-action@v2
         # TODO nf-core: You can customise AWS full pipeline tests as required
         # Add full size test data (but still relatively small datasets for few samples)
         # on the `test_full.config` test runs with only one set of parameters
 
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
-          bearer_token: ${{ secrets.TOWER_BEARER_TOKEN }}
+          access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
           compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
           pipeline: ${{ github.repository }}
           revision: ${{ github.sha }}
@@ -30,5 +30,5 @@ jobs:
             {
               "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-${{ github.sha }}"
             }
-          profiles: '[ "test_full", "aws_tower" ]'
-
+          profiles: test_full,aws_tower
+          pre_run_script: 'export NXF_VER=21.10.3'
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
@@ -11,18 +11,18 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Launch workflow via tower
-        uses: nf-core/tower-action@master
+        uses: nf-core/tower-action@v2
 
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
-          bearer_token: ${{ secrets.TOWER_BEARER_TOKEN }}
+          access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
           compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
           pipeline: ${{ github.repository }}
           revision: ${{ github.sha }}
           workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/sarek/work-${{ github.sha }}
           parameters: |
             {
-              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-${{ github.sha }}"
+              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-test-${{ github.sha }}"
             }
-          profiles: '[ "test", "aws_tower" ]'
-
+          profiles: test,aws_tower
+          pre_run_script: 'export NXF_VER=21.10.3'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,14 +8,16 @@ on:
   release:
     types: [published]
 
+env:
+  NXF_ANSI_LOG: false
+  CAPSULE_LOG: none
+
 jobs:
   test:
     name: Run workflow tests
     # Only run on push if this is the nf-core dev branch (merged PRs)
     if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/sarek') }}
     runs-on: ubuntu-latest
-    env:
-      NXF_ANSI_LOG: false
     strategy:
       matrix:
         # Nextflow versions
@@ -43,7 +45,6 @@ jobs:
 
       - name: Install Nextflow
         env:
-          CAPSULE_LOG: none
           NXF_VER: ${{ matrix.NXF_VER }}
           # Uncomment only if the edge release is more recent than the latest stable release
           # See https://github.com/nextflow-io/nextflow/issues/2467

@@ -51,11 +51,14 @@ By default, the pipeline currently performs the following:
 3. Download the pipeline and test it on a minimal dataset with a single command:
 
     ```console
-    nextflow run nf-core/sarek -profile test,<docker/singularity/podman/shifter/charliecloud/conda/institute>
+    nextflow run nf-core/sarek -profile test,YOURPROFILE
     ```
 
+    Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.
+
+    > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.
     > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
-    > * If you are using `singularity` then the pipeline will auto-detect this and attempt to download the Singularity images directly as opposed to performing a conversion from Docker images. If you are persistently observing issues downloading Singularity images directly due to timeout or network issues then please use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, it is highly recommended to use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to pre-download all of the required containers before running the pipeline and to set the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options to be able to store and re-use the images from a central location for future pipeline runs.
+    > * If you are using `singularity` and are persistently observing issues downloading Singularity images directly due to timeout or network issues, then you can use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, you can use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.
     > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.
 
 4. Start running your own analysis!

@@ -5,7 +5,7 @@ custom_logo_title: 'nf-core/sarek'
 report_comment: >
     This report has been generated by the <a href="https://github.com/nf-core/sarek" target="_blank">nf-core/sarek</a>
     analysis pipeline. For information about how to interpret these results, please see the
-    <a href="https://github.com/nf-core/sarek" target="_blank">documentation</a>.
+    <a href="https://nf-co.re/sarek" target="_blank">documentation</a>.
 report_section_order:
     software_versions:
         order: -1000

@@ -63,4 +63,7 @@ process {
     withName:MULTIQC {
         errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
     }
+    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
+        cache = false
+    }
 }
@@ -5,54 +5,12 @@
  * Defines reference genomes, without using iGenome paths
  * Can be used by any config that customises the base
  * path using $params.genomes_base / --genomes_base
+ *
+ * CAREFUL: Some o the files might be reuiqred in the CI tests not yet implemented. They should be gradually moved to the test.config. Until then lets keep this file.
  */
 
 params {
     genomes {
-        'GRCh37' {
-            ac_loci               = "${params.genomes_base}/1000G_phase3_20130502_SNP_maf0.3.loci"
-            ac_loci_gc            = "${params.genomes_base}/1000G_phase3_20130502_SNP_maf0.3.loci.gc"
-            bwa                   = "${params.genomes_base}/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
-            chr_dir               = "${params.genomes_base}/Chromosomes"
-            chr_length            = "${params.genomes_base}/human_g1k_v37_decoy.len"
-            dbsnp                 = "${params.genomes_base}/dbsnp_138.b37.vcf"
-            dbsnp_tbi             = "${params.genomes_base}/dbsnp_138.b37.vcf.idx"
-            dict                  = "${params.genomes_base}/human_g1k_v37_decoy.dict"
-            fasta                 = "${params.genomes_base}/human_g1k_v37_decoy.fasta"
-            fasta_fai             = "${params.genomes_base}/human_g1k_v37_decoy.fasta.fai"
-            germline_resource     = "${params.genomes_base}/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz"
-            germline_resource_tbi = "${params.genomes_base}/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz.tbi"
-            intervals             = "${params.genomes_base}/wgs_calling_regions_Sarek.list"
-            known_indels          = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
-            known_indels_tbi      = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
-            mappability           = "${params.genomes_base}/out100m2_hg19.gem"
-            snpeff_db             = 'GRCh37.75'
-            vep_genome            = 'GRCh37'
-            vep_species           = 'homo_sapiens'
-            vep_cache_version     = '104'
-        }
-        'GRCh38' {
-            ac_loci               = "${params.genomes_base}/1000G_phase3_GRCh38_maf0.3.loci"
-            ac_loci_gc            = "${params.genomes_base}/1000G_phase3_GRCh38_maf0.3.loci.gc"
-            bwa                   = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
-            chr_dir               = "${params.genomes_base}/Chromosomes"
-            chr_length            = "${params.genomes_base}/Homo_sapiens_assembly38.len"
-            dbsnp                 = "${params.genomes_base}/dbsnp_146.hg38.vcf.gz"
-            dbsnp_tbi             = "${params.genomes_base}/dbsnp_146.hg38.vcf.gz.tbi"
-            dict                  = "${params.genomes_base}/Homo_sapiens_assembly38.dict"
-            fasta                 = "${params.genomes_base}/Homo_sapiens_assembly38.fasta"
-            fasta_fai             = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.fai"
-            germline_resource     = "${params.genomes_base}/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz"
-            germline_resource_tbi = "${params.genomes_base}/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz.tbi"
-            intervals             = "${params.genomes_base}/wgs_calling_regions.hg38.bed"
-            known_indels          = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
-            known_indels_tbi      = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
-            mappability           = "${params.genomes_base}/out100m2_hg38.gem"
-            snpeff_db             = 'GRCh38.99'
-            vep_genome            = 'GRCh38'
-            vep_species           = 'homo_sapiens'
-            vep_cache_version     = '104'
-        }
         'minimalGRCh37' {
             fasta                 = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta"
         }
@@ -83,16 +41,5 @@ params {
         'custom' {
             fasta                 = null
         }
-        'small_hg38' {
-            dbsnp                 = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
-            fasta                 = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
-            germline_resource     = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
-            intervals             = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list"
-            known_indels          = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
-            snpeff_db             = 'GRCh38.99'
-            vep_genome            = 'GRCh38'
-            vep_species           = 'homo_sapiens'
-            vep_cache_version     = '104'
-        }
     }
 }
@@ -7,11 +7,10 @@
         ext.args2           = Second set of arguments appended to command in module (multi-tool modules).
         ext.args3           = Third set of arguments appended to command in module (multi-tool modules).
         ext.suffix          = File name ext.suffix output files. Not available for nf-core modules
-        ext.prefix          = File name ext.prefix output files.
+        ext.prefix          = File name prefix for output files.
 ----------------------------------------------------------------------------------------
 */
 
-// Generic process options for all workflows
 process {
 
     publishDir = [
@@ -20,9 +19,10 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
+    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
         publishDir = [
             path: { "${params.outdir}/pipeline_info" },
+            mode: 'copy',
             pattern: '*_versions.yml'
         ]
     }
@@ -126,7 +126,7 @@ process {
     withName: "BWA.*MEM" {
         ext.args         = { meta.status == 1 ? '-K 100000000 -M -B 3' : '-K 100000000 -M' }
         publishDir       = [ enabled: false ]
-        ext.prefix       = { params.split_fastq > 1 ? "${meta.id}".concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last().concat('.')) : "${meta.id}" }
+        ext.prefix       = { params.split_fastq > 1 ?  "${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) : "" }
 
     }
 
@@ -138,12 +138,18 @@ process {
         ]
     }
 
-    withName: 'SAMTOOLS_MERGE' {
-        publishDir       = [ enabled: false ]
+    withName: "SEQKIT_SPLIT2" {
+        ext.args         = { "--by-size ${params.split_fastq}" }
+        publishDir = [
+            path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+            mode: 'copy',
+            enabled: params.save_split_fastqs,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
     }
 
-    withName: 'SEQKIT_SPLIT2' {
-        ext.args         = { "--by-size ${params.split_fastq}" }
+
+    withName: 'SAMTOOLS_MERGE' {
         publishDir       = [ enabled: false ]
     }
 }
@@ -378,6 +384,7 @@ if ((params.tools) && (params.tools.contains('merge'))) {
             ext.prefix = {"${meta.id}_snpEff_VEP.ann.vcf"}
         }
     }
+
 }
 
 // QC_TRIM

@@ -11,13 +11,14 @@
 */
 
 params {
+
     config_profile_name        = 'Test profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
     max_cpus   = 2
-    max_memory = 6.GB
-    max_time   = 6.h
+    max_memory = '6.GB'
+    max_time   = '6.h'
 
     // Input data
     input = "${baseDir}/tests/csv/3.0/fastq_single.csv"
@@ -26,6 +27,13 @@ params {
     igenomes_ignore   = true
     genome            = 'small_hg38'
     genomes_base      = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules'
+
+    dbsnp             = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
+    fasta             = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
+    germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
+    intervals         = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list"
+    known_indels      = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
+
     snpeff_db         = 'WBcel235.99'
     vep_species       = 'caenorhabditis_elegans'
     vep_cache_version = '104'
@@ -55,7 +63,9 @@ profiles {
         params.skip_markduplicates = true
     }
     split_fastq {
-        params.split_fastq         = 1500
+        params.split_fastq         = 150000
+        params.save_split_fastqs   = true
+
     }
     targeted {
         params.target_bed          = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/target.bed'
@@ -97,18 +107,12 @@ profiles {
     }
 }
 
+//This is apparently useless as it won't overwrite things in the modules.config
 process {
     withName:SNPEFF {
         maxForks = 1
     }
     withName:ENSEMBLVEP {
         maxForks = 1
     }
-    withName:SEQKIT_SPLIT2{
-        publishDir = [
-        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
-        mode: 'copy',
-        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
 }
@@ -764,7 +764,8 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
 
 - `pipeline_info/`
     - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`.
-    - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.tsv`.
+    - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline.
+    - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
 
 </details>