diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 589d7118..a9060419 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,23 +31,13 @@ jobs: uses: actions/checkout@v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Download the NCBI taxdump database - run: | - mkdir ncbi_taxdump - curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf - - - - name: Download the BUSCO lineage database - run: | - mkdir busco_database - curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf - - - name: Run pipeline with test data # You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --taxdump $PWD/ncbi_taxdump --busco $PWD/busco_database --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8d12b98a..ca21ecfe 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -19,10 +19,10 @@ jobs: - uses: actions/setup-node@v4 - name: Install editorconfig-checker - run: npm install -g editorconfig-checker + run: npm install -g editorconfig-checker@3.0.2 - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile\|.sqlite3') Prettier: runs-on: ubuntu-latest diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml index b44c29f4..eb2d574a 100644 --- a/.github/workflows/sanger_test_full.yml +++ b/.github/workflows/sanger_test_full.yml @@ -26,7 +26,7 @@ jobs: with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV_LARGE }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} revision: ${{ env.REVISION }} workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }} parameters: | diff --git a/.nf-core.yml b/.nf-core.yml index 85e18745..0ad948cc 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -12,6 +12,9 @@ lint: - LICENCE - lib/NfcoreTemplate.groovy - CODE_OF_CONDUCT.md + - assets/sendmail_template.txt + - assets/email_template.html + - assets/email_template.txt - assets/nf-core-blobtoolkit_logo_light.png - docs/images/nf-core-blobtoolkit_logo_light.png - docs/images/nf-core-blobtoolkit_logo_dark.png @@ -19,6 +22,8 @@ lint: - .github/PULL_REQUEST_TEMPLATE.md - .github/workflows/linting.yml - .github/workflows/branch.yml + - .github/CONTRIBUTING.md + - .github/workflows/linting_comment.yml multiqc_config: - report_comment nextflow_config: diff --git a/CHANGELOG.md b/CHANGELOG.md index 050399ed..0557ad5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,30 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[0.7.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.7.0)] – Psyduck – [2024-11-20] + +The pipeline is now considered to be a complete and suitable replacement for the Snakemake version. + +- Fetch information about the chromosomes of the assemblies. Used to power + "grid plots". +- Fill in accurate read information in the blobDir. Users are now reqiured + to indicate in the samplesheet whether the reads are paired or single. +- Updated the Blastn settings to allow 7 days runtime at most, since that + covers 99.7% of the jobs. +- Allow database inputs to be optionally compressed (`.tar.gz`) + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ----------------- | --------------- | +| blast | 2.14.1 and 2.15.0 | only 2.15.0 | +| blobtoolkit | 4.3.9 | 4.4.0 | +| busco | 5.5.0 | 5.7.1 | +| multiqc | 1.20 and 1.21 | 1.20 and 1.25.1 | +| samtools | 1.18 and 1.19.2 | 1.20 and 1.21 | + ## [[0.6.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.6.0)] – Bellsprout – [2024-09-13] The pipeline has now been validated for draft (unpublished) assemblies. @@ -87,13 +111,13 @@ The pipeline has now been validated on dozens of genomes, up to 11 Gbp. Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. -| Dependency | Old version | New version | -| ----------- | ------------- | ------------- | -| blobtoolkit | 4.3.3 | 4.3.9 | -| blast | 2.14.0 | 2.15.0 | -| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | -| samtools | 1.18 | 1.19.2 | -| seqtk | 1.3 | 1.4 | +| Dependency | Old version | New version | +| ----------- | ------------- | ----------------- | +| blobtoolkit | 4.3.3 | 4.3.9 | +| blast | 2.14.0 | 2.15.0 and 2.14.1 | +| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | +| samtools | 1.18 | 1.18 and 1.19.2 | +| seqtk | 1.3 | 1.4 | > **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. diff --git a/CITATIONS.md b/CITATIONS.md index 8b960779..1e18be72 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,10 @@ ## Pipeline tools +- [BLAST+](https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html) + + > Camacho, Chritiam, et al. “BLAST+: architecture and applications.” BMC Bioinformatics, vol. 10, no. 412, Dec. 2009, https://doi.org/10.1186/1471-2105-10-421 + - [BlobToolKit](https://github.com/blobtoolkit/blobtoolkit) > Challis, Richard, et al. “BlobToolKit – Interactive Quality Assessment of Genome Assemblies.” G3 Genes|Genomes|Genetics, vol. 10, no. 4, Apr. 2020, pp. 1361–74, https://doi.org/10.1534/g3.119.400908. @@ -26,9 +30,7 @@ - [Fasta_windows](https://github.com/tolkit/fasta_windows) -- [GoaT](https://goat.genomehubs.org) - - > Challis, Richard, et al. “Genomes on a Tree (GoaT): A versatile, scalable search engine for genomic and sequencing project metadata across the eukaryotic tree of life.” Wellcome Open Research, vol. 8, no. 24, 2023, https://doi.org/10.12688/wellcomeopenres.18658.1. + > Brown, Max, et al. "Fasta_windows v0.2.3". GitHub, 2021. https://github.com/tolkit/fasta_windows - [Minimap2](https://github.com/lh3/minimap2) @@ -42,11 +44,15 @@ > Danecek, Petr, et al. “Twelve Years of SAMtools and BCFtools.” GigaScience, vol. 10, no. 2, Jan. 2021, https://doi.org/10.1093/gigascience/giab008. +- [SeqTK](https://github.com/lh3/seqtk) + + > Li, Heng. "SeqTK v1.4" GitHub, 2023, https://github.com/lh3/seqtk + ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://bioconda.github.io) diff --git a/README.md b/README.md index b55ab353..11e394fa 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Actions Linting Status](https://github.com/sanger-tol/blobtoolkit/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7949058-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7949058) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=conda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/blobtoolkit) @@ -20,8 +20,8 @@ It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome 4. Run BUSCO ([`busco`](https://busco.ezlab.org/)) 5. Extract BUSCO genes ([`blobtoolkit/extractbuscos`](https://github.com/blobtoolkit/blobtoolkit)) 6. Run Diamond BLASTp against extracted BUSCO genes ([`diamond/blastp`](https://github.com/bbuchfink/diamond)) -7. Run BLASTx against sequences with no hit ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) -8. Run BLASTn against sequences still with not hit ([`blast/blastx`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) +7. Run BLASTx against sequences with no hit ([`diamond/blastx`](https://github.com/bbuchfink/diamond)) +8. Run BLASTn against sequences still with not hit ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) 9. Count BUSCO genes ([`blobtoolkit/countbuscos`](https://github.com/blobtoolkit/blobtoolkit)) 10. Generate combined sequence stats across various window sizes ([`blobtoolkit/windowstats`](https://github.com/blobtoolkit/blobtoolkit)) 11. Imports analysis results into a BlobDir dataset ([`blobtoolkit/blobdir`](https://github.com/blobtoolkit/blobtoolkit)) @@ -37,13 +37,17 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,datatype,datafile -mMelMel3,hic,GCA_922984935.2.hic.mMelMel3.cram -mMelMel1,illumina,GCA_922984935.2.illumina.mMelMel1.cram -mMelMel3,ont,GCA_922984935.2.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3,hic,GCA_922984935.2.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,GCA_922984935.2.illumina.mMelMel1.cram,PAIRED +mMelMel3,ont,GCA_922984935.2.ont.mMelMel3.cram,SINGLE ``` -Each row represents an aligned file. Rows with the same sample identifier are considered technical replicates. The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (`ont`, `hic`, `pacbio`, `pacbio_clr`, `illumina`). The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline. +Each row represents an aligned file. +Rows with the same sample identifier are considered technical replicates. +The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (`ont`, `hic`, `pacbio`, `pacbio_clr`, `illumina`). +The library layout indicates whether the reads are paired or single. +The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline. Now, you can run the pipeline using: @@ -77,9 +81,8 @@ sanger-tol/blobtoolkit was written in Nextflow by [Alexander Ramos Diaz](https:/ We thank the following people for their assistance in the development of this pipeline: - - - [Guoying Qi](https://github.com/gq1) +- [Bethan Yates](https://github.com/BethYates) ## Contributions and Support @@ -89,8 +92,6 @@ If you would like to contribute to this pipeline, please see the [contributing g If you use sanger-tol/blobtoolkit for your analysis, please cite it using the following doi: [10.5281/zenodo.7949058](https://doi.org/10.5281/zenodo.7949058) - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). diff --git a/assets/schema_input.json b/assets/schema_input.json index 26ed41cb..812541d8 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -23,6 +23,11 @@ "type": "string", "pattern": "^\\S+\\.(bam|cram|fa|fa.gz|fasta|fasta.gz|fq|fq.gz|fastq|fastq.gz)$", "errorMessage": "Data file for reads cannot contain spaces and must be BAM/CRAM/FASTQ/FASTA" + }, + "library_layout": { + "type": "string", + "pattern": "^(SINGLE|PAIRED)$", + "errorMessage": "The only valid layouts are SINGLE and PAIRED" } }, "required": ["datafile", "datatype", "sample"] diff --git a/assets/test/mMelMel3.1.buscogenes.dmnd b/assets/test/mMelMel3.1.buscogenes.dmnd deleted file mode 100644 index 391345ba..00000000 Binary files a/assets/test/mMelMel3.1.buscogenes.dmnd and /dev/null differ diff --git a/assets/test/mMelMel3.1.buscoregions.dmnd b/assets/test/mMelMel3.1.buscoregions.dmnd deleted file mode 100644 index 91fa6042..00000000 Binary files a/assets/test/mMelMel3.1.buscoregions.dmnd and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb deleted file mode 100644 index 18062436..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr deleted file mode 100644 index 0b5d4906..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin deleted file mode 100644 index bebd568b..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog deleted file mode 100644 index e6ef79c7..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos deleted file mode 100644 index 99700566..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not deleted file mode 100644 index 047e8d38..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq deleted file mode 100644 index 48497573..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf deleted file mode 100644 index 3be5ea5b..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto deleted file mode 100644 index 6d4a41c7..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 b/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 deleted file mode 100644 index dc933c1f..00000000 Binary files a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 and /dev/null differ diff --git a/assets/test/samplesheet.csv b/assets/test/samplesheet.csv index 5f8d4463..c8d555be 100644 --- a/assets/test/samplesheet.csv +++ b/assets/test/samplesheet.csv @@ -1,5 +1,5 @@ -sample,datatype,datafile -mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram -mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram -mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram -mMelMel3,ont,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram,PAIRED +mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram,PAIRED +mMelMel3,ont,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram,SINGLE diff --git a/assets/test/samplesheet_raw.csv b/assets/test/samplesheet_raw.csv index 830753a7..53a5a42e 100644 --- a/assets/test/samplesheet_raw.csv +++ b/assets/test/samplesheet_raw.csv @@ -1,4 +1,4 @@ -sample,datatype,datafile -mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3#1_subset.cram -mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4#1_subset.cram -mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2#1_subset.cram +sample,datatype,datafile,library_layout +mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3#1_subset.cram,PAIRED +mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4#1_subset.cram,PAIRED +mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2#1_subset.cram,PAIRED diff --git a/assets/test/samplesheet_s3.csv b/assets/test/samplesheet_s3.csv index dbb34181..532f584e 100644 --- a/assets/test/samplesheet_s3.csv +++ b/assets/test/samplesheet_s3.csv @@ -1,5 +1,5 @@ -sample,datatype,datafile -mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram -mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram -mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram -mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram,PAIRED +mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram,PAIRED +mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram,SINGLE diff --git a/assets/test_full/full_samplesheet.csv b/assets/test_full/full_samplesheet.csv index 6a3ba69d..52f029e2 100644 --- a/assets/test_full/full_samplesheet.csv +++ b/assets/test_full/full_samplesheet.csv @@ -1,3 +1,3 @@ -sample,datatype,datafile -gfLaeSulp1,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/hic/GCA_927399515.1.unmasked.hic.gfLaeSulp1.cram -gfLaeSulp1,pacbio,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/pacbio/GCA_927399515.1.unmasked.pacbio.gfLaeSulp1.cram +sample,datatype,datafile,library_layout +gfLaeSulp1,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/hic/GCA_927399515.1.unmasked.hic.gfLaeSulp1.cram,PAIRED +gfLaeSulp1,pacbio,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/pacbio/GCA_927399515.1.unmasked.pacbio.gfLaeSulp1.cram,SINGLE diff --git a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd b/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd deleted file mode 100644 index a0d0e1d2..00000000 Binary files a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd and /dev/null differ diff --git a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd b/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd deleted file mode 100644 index 3f2a1a54..00000000 Binary files a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb deleted file mode 100644 index 0905629a..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr deleted file mode 100644 index 1fa3521a..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin deleted file mode 100644 index 0503c4c7..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog deleted file mode 100644 index 7dcd60eb..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos deleted file mode 100644 index 6bd1dcdf..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not deleted file mode 100644 index 8bacddec..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq deleted file mode 100644 index 6afe38e9..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf deleted file mode 100644 index efd34086..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto deleted file mode 100644 index 4b140ec3..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 b/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 deleted file mode 100644 index 2a56a82f..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 and /dev/null differ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 6b5392bf..bc468469 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -45,11 +45,17 @@ class RowChecker: "ont", ) + VALID_LAYOUTS = ( + "SINGLE", + "PAIRED", + ) + def __init__( self, sample_col="sample", type_col="datatype", file_col="datafile", + layout_col="library_layout", **kwargs, ): """ @@ -62,11 +68,14 @@ def __init__( the read data (default "datatype"). file_col (str): The name of the column that contains the file path for the read data (default "datafile"). + layout_col(str): The name of the column that contains the layout of the + library (i.e. "PAIRED" or "SINGLE"). """ super().__init__(**kwargs) self._sample_col = sample_col self._type_col = type_col self._file_col = file_col + self._layout_col = layout_col self._seen = set() self.modified = [] @@ -82,6 +91,7 @@ def validate_and_transform(self, row): self._validate_sample(row) self._validate_type(row) self._validate_file(row) + self._validate_layout(row) self._seen.add((row[self._sample_col], row[self._file_col])) self.modified.append(row) @@ -94,7 +104,7 @@ def _validate_sample(self, row): def _validate_type(self, row): """Assert that the data type matches expected values.""" - if not any(row[self._type_col] for datatype in self.VALID_DATATYPES): + if row[self._type_col] not in self.VALID_DATATYPES: raise AssertionError( f"The datatype is unrecognized: {row[self._type_col]}\n" f"It should be one of: {', '.join(self.VALID_DATATYPES)}" @@ -114,6 +124,14 @@ def _validate_data_format(self, filename): f"It should be one of: {', '.join(self.VALID_FORMATS)}" ) + def _validate_layout(self, row): + """Assert that the library layout matches expected values.""" + if not row[self._layout_col] in self.VALID_LAYOUTS: + raise AssertionError( + f"The library layout is unrecognized: {row[self._layout_col]}\n" + f"It should be one of: {', '.join(self.VALID_LAYOUTS)}" + ) + def validate_unique_samples(self): """ Assert that the combination of sample name and aligned filename is unique. @@ -178,7 +196,7 @@ def check_samplesheet(file_in, file_out): This function checks that the samplesheet follows the following structure, see also the `blobtoolkit samplesheet`_:: - sample,datatype,datafile + sample,datatype,datafile,library_layout sample1,hic,/path/to/file1.cram sample1,pacbio,/path/to/file2.cram sample1,ont,/path/to/file3.cram @@ -187,7 +205,7 @@ def check_samplesheet(file_in, file_out): https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/test/samplesheet.csv """ - required_columns = {"sample", "datatype", "datafile"} + required_columns = {"sample", "datatype", "datafile", "library_layout"} # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_in.open(newline="") as in_handle: reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) diff --git a/bin/generate_config.py b/bin/generate_config.py index e3d00dfa..8bdec323 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -13,6 +13,7 @@ GOAT_LOOKUP_API = "https://goat.genomehubs.org/api/v2/lookup?searchTerm=%s&result=taxon&size=10&taxonomy=ncbi" GOAT_RECORD_API = "https://goat.genomehubs.org/api/v2/record?recordId=%s&result=taxon&size=10&taxonomy=ncbi" NCBI_DATASETS_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/%s/dataset_report?filters.assembly_version=all_assemblies" +NCBI_SEQUENCE_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/%s/sequence_reports" RANKS = [ "genus", @@ -34,12 +35,19 @@ def parse_args(args=None): parser.add_argument("--fasta", required=True, help="Path to the Fasta file of the assembly.") parser.add_argument("--taxon_query", required=True, help="Query string/integer for this taxon.") parser.add_argument("--lineage_tax_ids", required=True, help="Mapping between BUSCO lineages and taxon IDs.") - parser.add_argument("--yml_out", required=True, help="Output YML file.") - parser.add_argument("--csv_out", required=True, help="Output CSV file.") + parser.add_argument("--output_prefix", required=True, help="Prefix to name the output files.") parser.add_argument("--accession", help="Accession number of the assembly (optional).", default=None) parser.add_argument("--busco", help="Requested BUSCO lineages.", default=None) - parser.add_argument("--blastn", required=True, help="Path to the NCBI Taxonomy database") - parser.add_argument("--version", action="version", version="%(prog)s 1.4") + parser.add_argument("--nt", required=True, help="Path to the NT database") + parser.add_argument("--read_id", action="append", help="ID of a read set") + parser.add_argument("--read_type", action="append", help="Type of a read set") + parser.add_argument("--read_layout", action="append", help="Layout of a read set") + parser.add_argument("--read_path", action="append", help="Path of a read set") + parser.add_argument("--blastp", help="Path to the blastp database", required=True) + parser.add_argument("--blastx", help="Path to the blastx database", required=True) + parser.add_argument("--blastn", help="Path to the blastn database", required=True) + parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True) + parser.add_argument("--version", action="version", version="%(prog)s 2.0") return parser.parse_args(args) @@ -165,8 +173,23 @@ def get_assembly_info(accession: str) -> typing.Dict[str, typing.Union[str, int] return d -def adjust_taxon_id(blastn: str, taxon_info: TaxonInfo) -> int: - con = sqlite3.connect(os.path.join(blastn, "taxonomy4blast.sqlite3")) +def get_sequence_report(accession: str): + response = requests.get(NCBI_SEQUENCE_API % accession).json() + if not response["reports"]: + print(f"Assembly not found: {accession}", file=sys.stderr) + sys.exit(1) + sequence_report = response["reports"] + for rec in sequence_report: + if rec["role"] == "assembled-molecule": + rec["assembly_level"] = rec["assigned_molecule_location_type"] + else: + rec["assembly_level"] = "na" + rec["chr_name"] = "na" + return sequence_report + + +def adjust_taxon_id(nt: str, taxon_info: TaxonInfo) -> int: + con = sqlite3.connect(os.path.join(nt, "taxonomy4blast.sqlite3")) cur = con.cursor() for taxon_id in [taxon_info.taxon_id] + [anc_taxon_info.taxon_id for anc_taxon_info in taxon_info.lineage]: res = cur.execute("SELECT * FROM TaxidInfo WHERE taxid = ?", (taxon_id,)) @@ -174,49 +197,60 @@ def adjust_taxon_id(blastn: str, taxon_info: TaxonInfo) -> int: return taxon_id +def datatype_to_platform(s): + if s == "ont": + return "OXFORD_NANOPORE" + elif s.startswith("pacbio"): + return "PACBIO_SMRT" + elif s in ["hic", "illumina"]: + return "ILLUMINA" + else: + return "OTHER" + + def print_yaml( file_out, assembly_info: typing.Dict[str, typing.Union[str, int]], taxon_info: TaxonInfo, classification: typing.Dict[str, str], - odb_arr: typing.List[str], + reads, + blastp, + blastx, + blastn, + taxdump, ): data = { "assembly": assembly_info, - "busco": { - "basal_lineages": BUSCO_BASAL_LINEAGES, - # "download_dir": - "lineages": odb_arr + [lin for lin in BUSCO_BASAL_LINEAGES if lin not in odb_arr], + "reads": { + "paired": [], + "single": [], }, - # "reads": {}, "revision": 1, "settings": { - # Only settings.stats_windows is mandatory, everything else is superfluous "blast_chunk": 100000, "blast_max_chunks": 10, "blast_min_length": 1000, "blast_overlap": 0, "stats_chunk": 1000, "stats_windows": [0.1, 0.01, 100000, 1000000], - # "taxdump": , + "taxdump": taxdump, "tmp": "/tmp", }, "similarity": { - # Only the presence similarity.diamond_blastx seems mandatory, everything else is superfluous "blastn": { "name": "nt", - # "path": , + "path": blastn, }, "defaults": {"evalue": 1e-10, "import_evalue": 1e-25, "max_target_seqs": 10, "taxrule": "buscogenes"}, "diamond_blastp": { "import_max_target_seqs": 100000, "name": "reference_proteomes", - # "path": , + "path": blastp, "taxrule": "blastp=buscogenes", }, "diamond_blastx": { "name": "reference_proteomes", - # "path": , + "path": blastx, }, }, "taxon": { @@ -226,6 +260,16 @@ def print_yaml( }, "version": 2, } + + for id, datatype, layout, path in reads: + data["reads"][layout.lower()].append( + { + "prefix": id, + "file": path, + "plaform": datatype_to_platform(datatype), + } + ) + out_dir = os.path.dirname(file_out) make_dir(out_dir) @@ -243,23 +287,66 @@ def print_csv(file_out, taxon_id: int, odb_arr: typing.List[str]): print("busco_lineage", odb_val, sep=",", file=fout) +def print_tsvs(output_prefix, sequence_report): + categories_tsv = f"{output_prefix}.categories.tsv" + synonyms_tsv = f"{output_prefix}.synonyms.tsv" + with open(categories_tsv, "w") as fhc: + with open(synonyms_tsv, "w") as fhs: + print("identifier", "assembly_role", "assembly_level", "assembly_unit", sep="\t", file=fhc) + print("identifier", "name", "assigned_name", "refseq_accession", sep="\t", file=fhs) + for rec in sequence_report: + print( + rec["genbank_accession"], + rec["role"], + rec["assembly_level"], + rec["assembly_unit"], + sep="\t", + file=fhc, + ) + print( + rec["genbank_accession"], + rec["sequence_name"], + rec["chr_name"], + rec.get("refseq_accession", "na"), + sep="\t", + file=fhs, + ) + + def main(args=None): args = parse_args(args) assembly_info: typing.Dict[str, typing.Union[str, int]] if args.accession: assembly_info = get_assembly_info(args.accession) + sequence_report = get_sequence_report(args.accession) else: assembly_info = {"level": "scaffold"} + sequence_report = None assembly_info["file"] = args.fasta taxon_info = fetch_taxon_info(args.taxon_query) classification = get_classification(taxon_info) odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco) - taxon_id = adjust_taxon_id(args.blastn, taxon_info) - - print_yaml(args.yml_out, assembly_info, taxon_info, classification, odb_arr) - print_csv(args.csv_out, taxon_id, odb_arr) + taxon_id = adjust_taxon_id(args.nt, taxon_info) + + if sequence_report: + print_tsvs(args.output_prefix, sequence_report) + + reads = zip(args.read_id, args.read_type, args.read_layout, args.read_path) + + print_yaml( + f"{args.output_prefix}.yaml", + assembly_info, + taxon_info, + classification, + reads, + args.blastp, + args.blastx, + args.blastn, + args.taxdump, + ) + print_csv(f"{args.output_prefix}.csv", taxon_id, odb_arr) if __name__ == "__main__": diff --git a/bin/update_versions.py b/bin/update_versions.py index 1d3491a3..21f379a5 100755 --- a/bin/update_versions.py +++ b/bin/update_versions.py @@ -15,28 +15,10 @@ def parse_args(args=None): parser.add_argument("--meta_in", help="Input JSON file.", required=True) parser.add_argument("--meta_out", help="Output JSON file.", required=True) parser.add_argument("--software", help="Input YAML file.", required=True) - parser.add_argument("--read_id", action="append", help="ID of a read set") - parser.add_argument("--read_type", action="append", help="Type of a read set") - parser.add_argument("--read_path", action="append", help="Path of a read set") - parser.add_argument("--blastp", help="Path to the blastp database", required=True) - parser.add_argument("--blastx", help="Path to the blastx database", required=True) - parser.add_argument("--blastn", help="Path to the blastn database", required=True) - parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True) - parser.add_argument("--version", action="version", version="%(prog)s 1.2.0") + parser.add_argument("--version", action="version", version="%(prog)s 1.3.0") return parser.parse_args(args) -def datatype_to_platform(s): - if s == "ont": - return "OXFORD_NANOPORE" - elif s.startswith("pacbio"): - return "PACBIO_SMRT" - elif s in ["hic", "illumina"]: - return "ILLUMINA" - else: - return "OTHER" - - def update_meta(args): with open(args.meta_in) as fh: infile = json.load(fh) @@ -54,20 +36,6 @@ def update_meta(args): del new_dict["sanger-tol/blobtoolkit"] infile["settings"]["software_versions"] = new_dict - infile["settings"]["taxdump"] = args.taxdump - for k in ["blastn", "diamond_blastp", "diamond_blastx"]: - infile["similarity"].setdefault(k, {}) - infile["similarity"]["blastn"]["path"] = args.blastn - infile["similarity"]["diamond_blastp"]["path"] = args.blastp - infile["similarity"]["diamond_blastx"]["path"] = args.blastx - - infile["reads"] = {} - for id, datatype, path in zip(args.read_id, args.read_type, args.read_path): - infile["reads"][id] = { - "file": path, - "plaform": datatype_to_platform(datatype), - } - return infile diff --git a/conf/base.config b/conf/base.config index 75fa0d06..25648852 100644 --- a/conf/base.config +++ b/conf/base.config @@ -66,11 +66,11 @@ process { time = { check_max( 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } } - // Temporarily the same settings as CCS + // Baased on a handful of runs withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(HIC|ILMN)' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } - time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + time = { check_max( 1.h * Math.ceil( meta.read_count / 75000000 ) * task.attempt, 'time' ) } } withName: 'WINDOWSTATS_INPUT' { @@ -106,14 +106,15 @@ process { withName: "BLAST_BLASTN" { - // There are blast failures we don't know how to fix. Just ignore for now - errorStrategy = { task.exitStatus in ((130..145) + 104) ? (task.attempt == process.maxRetries ? 'ignore' : 'retry') : 'finish' } + // There are blast failures we don't know how to fix. We just give up after 3 attempts + errorStrategy = { task.exitStatus in ((130..145) + 104) ? (task.attempt == 3 ? 'ignore' : 'retry') : 'finish' } + // Most jobs complete quickly but some need a lot longer. For those outliers, - // the CPU usage remains usually low, often nearing a single CPU - cpus = { check_max( 6 - (task.attempt-1), 'cpus' ) } - memory = { check_max( 1.GB * Math.pow(4, task.attempt-1), 'memory' ) } - time = { check_max( 10.h * Math.pow(4, task.attempt-1), 'time' ) } + // the CPU usage remains usually low, averaging a single CPU + cpus = { check_max( task.attempt == 1 ? 4 : 1, 'cpus' ) } + memory = { check_max( 2.GB, 'memory' ) } + time = { check_max( task.attempt == 1 ? 4.h : ( task.attempt == 2 ? 47.h : 167.h ), 'time' ) } } withName:CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/conf/modules.config b/conf/modules.config index 1193cf5f..9f8b7aec 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,10 +22,20 @@ process { withName: "WINDOWMASKER_MKCOUNTS" { ext.args = "-infmt fasta -sformat obinary" + publishDir = [ + path: { "${params.outdir}/repeats/windowmasker" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename.substring(0, filename.length() - 3) + "obinary" } + ] } withName: "WINDOWMASKER_USTAT" { ext.args = "-infmt fasta -dust T -outfmt fasta" + publishDir = [ + path: { "${params.outdir}/repeats/windowmasker" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] } withName: "MINIMAP2_HIC" { @@ -86,8 +96,8 @@ process { ext.args = { 'test' in workflow.profile.tokenize(',') ? // Additional configuration to speed processes up during testing. // Note: BUSCO *must* see the double-quotes around the parameters - '--force --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' - : '--force' } + '--force --metaeuk --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' + : '--force --metaeuk' } } withName: "RESTRUCTUREBUSCODIR" { diff --git a/conf/test.config b/conf/test.config index 8ad70cae..20331442 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,16 +25,16 @@ params { input = "${projectDir}/assets/test/samplesheet_s3.csv" // Fasta references - fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz" + fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.phiXspike.fasta.gz" accession = "GCA_922984935.2" taxon = "Meles meles" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" - busco = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03" - blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test/nt_mMelMel3.1" + taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" + busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/conf/test_full.config b/conf/test_full.config index ca78130e..a86e0050 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -25,11 +25,11 @@ params { taxon = "Laetiporus sulphureus" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" + taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" busco = "/lustre/scratch123/tol/resources/busco/latest" - blastp = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test_full/nt_gfLaeSulp1.1" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/nt_gfLaeSulp1.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/conf/test_raw.config b/conf/test_raw.config index 0cf1d16f..7af9bd2e 100644 --- a/conf/test_raw.config +++ b/conf/test_raw.config @@ -31,11 +31,11 @@ params { taxon = "Meles meles" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" - busco = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03" - blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test/nt_mMelMel3.1/" + taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" + busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/docs/output.md b/docs/output.md index e3204a1d..8af0fd40 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,7 +2,7 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. @@ -15,6 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [BlobDir](#blobdir) - Output files viewable on a [BlobToolKit viewer](https://github.com/blobtoolkit/blobtoolkit) - [Static plots](#static-plots) - Static versions of the BlobToolKit plots - [BUSCO](#busco) - BUSCO results +- [Repeat masking](#repeat-masking) - Masked repeats (optional) - [Read alignments](#read-alignments) - Aligned reads (optional) - [Read coverage](#read-coverage) - Read coverage tracks - [Base content](#base-content) - _k_-mer statistics (for k ≤ 4) @@ -68,6 +69,20 @@ BUSCO results generated by the pipeline (all BUSCO lineages that match the claas +### Repeat masking + +Reults from the repeat-masker step -- only if the pipeline is run with `--mask`. + +
+Output files + +- `repeats/` + - `windowmasker/` + - `.fasta`: masked assembly in Fasta format. + - `.obinary`: frequency counts of repeats, in windowmasker's own binary format. + +
+ ### Read alignments Read alignments in BAM format -- only if the pipeline is run with `--align`. diff --git a/docs/usage.md b/docs/usage.md index 4d0ad33e..0fb6c0cb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,7 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' @@ -21,30 +21,31 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: ```console -sample,datatype,datafile -sample1,hic,hic.cram -sample2,illumina,illumina.cram -sample2,illumina,illumina.cram +sample,datatype,datafile,library_layout +sample1,hic,hic.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED ``` ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. A final samplesheet file may look something like the one below. ```console -sample,datatype,datafile -sample1,hic,hic.cram -sample2,illumina,illumina.cram -sample3,ont,ont.cram +sample,datatype,datafile,library_layout +sample1,hic,hic.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED +sample3,ont,ont.cram,SINGLE ``` -| Column | Description | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | -| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, `pacbio_clr` or `ont`. | -| `datafile` | Full path to read data file. | +| Column | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | +| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, `pacbio_clr` or `ont`. | +| `datafile` | Full path to read data file. | +| `library_layout` | Layout of the library. Must be one of `SINGLE`, `PAIRED`. | An [example samplesheet](assets/test/samplesheet.csv) has been provided with the pipeline. @@ -53,7 +54,20 @@ An [example samplesheet](assets/test/samplesheet.csv) has been provided with the The pipeline can also accept a samplesheet generated by the [nf-core/fetchngs](https://nf-co.re/fetchngs) pipeline (tested with version 1.11.0). The pipeline then needs the `--fetchngs_samplesheet true` option _and_ `--align true`, since the data files would all be unaligned. -## Getting databases ready for the pipeline +## Database parameters + +Configure access to your local databases with the `--busco`, `--blastp`, `--blastx`, `--blastn`, and `--taxdump` parameters. + +Note that `--busco` refers to the download path of _all_ lineages. +Then, when explicitly selecting the lineages to run the pipeline on, +provide the names of these lineages _with_ their `_odb10` suffix as a comma-separated string. +For instance: + +```bash +--busco path-to-databases/busco/ --busco_lineages vertebrata_odb10,bacteria_odb10,fungi_odb10 +``` + +### Getting databases ready for the pipeline The BlobToolKit pipeline can be run in many different ways. The default way requires access to several databases: @@ -64,84 +78,108 @@ The BlobToolKit pipeline can be run in many different ways. The default way requ It is a good idea to put a date suffix for each database location so you know at a glance whether you are using the latest version. We are using the `YYYY_MM` format as we do not expect the databases to be updated more frequently than once a month. However, feel free to use `DATE=YYYY_MM_DD` or a different format if you prefer. -### 1. NCBI taxdump database - -Create the database directory and move into the directory: +Note that all input databases may be optionally passed directly to the pipeline compressed as `.tar.gz`, and the pipeline will handle decompression. +The instructions below show how to build each input database in _two_ forms: decompressed _and_ compressed. You may not need to do both. Select the one that is most appropriate for how you want to use the pipeline. -```bash -DATE=2023_03 -TAXDUMP=/path/to/databases/taxdump_${DATE} -mkdir -p $TAXDUMP -cd $TAXDUMP -``` +#### 1. NCBI taxdump database -Retrieve and decompress the NCBI taxdump: +Create the database directory, retrieve and decompress the NCBI taxonomy: ```bash -curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar xzf - +DATE=2024_10 +TAXDUMP=/path/to/databases/taxdump_${DATE} +TAXDUMP_TAR=/path/to/databases/taxdump_${DATE}.tar.gz +mkdir -p "$TAXDUMP" +curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz -o $TAXDUMP_TAR +tar -xzf $TAXDUMP_TAR -C "$TAXDUMP" ``` -### 2. NCBI nucleotide BLAST database +#### 2. NCBI nucleotide BLAST database Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 NT=/path/to/databases/nt_${DATE} +NT_TAR=/path/to/databases/nt_${DATE}.tar.gz mkdir -p $NT cd $NT ``` -Retrieve the NCBI blast nt database (version 5) files and tar gunzip them. We are using the `&&` syntax to ensure that each command completes without error before the next one is run: +Retrieve the NCBI blast nt database (version 5) files and tar gunzip them. +`wget` and the use of the FTP protocol are necessary to resolve the wildcard `nt.???.tar.gz`. +We are using the `&&` syntax to ensure that each command completes without error before the next one is run: ```bash wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/v5/nt.???.tar.gz" -P $NT/ && for file in $NT/*.tar.gz; do tar xf $file -C $NT && rm $file; done + +wget "https://ftp.ncbi.nlm.nih.gov/blast/db/v5/taxdb.tar.gz" && +tar xf taxdb.tar.gz -C $NT && +rm taxdb.tar.gz + +# Compress and cleanup +cd .. +tar -cvzf $NT_TAR $NT +rm -r $NT ``` -### 3. UniProt reference proteomes database +#### 3. UniProt reference proteomes database -You need [diamond blast](https://github.com/bbuchfink/diamond) installed for this step. The easiest way is probably using [conda](https://anaconda.org/bioconda/diamond). Make sure you have the latest version of Diamond (>2.x.x) otherwise the `--taxonnames` argument may not work. +You need [diamond blast](https://github.com/bbuchfink/diamond) installed for this step. +The easiest way is probably to install their [pre-compiled release](https://github.com/bbuchfink/diamond/releases). +Make sure you have the latest version of Diamond (>2.x.x) otherwise the `--taxonnames` argument may not work. Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 UNIPROT=/path/to/databases/uniprot_${DATE} +UNIPROT_TAR=/path/to/databases/uniprot_${DATE}.tar.gz mkdir -p $UNIPROT cd $UNIPROT ``` -The UniProt `Refseq_Proteomes_YYYY_MM.tar.gz` file is very large (>160 GB) and will take a long time to download. The command below looks complex because it needs to get around the problem of using wildcards with wget and curl. +The UniProt `Refseq_Proteomes_YYYY_MM.tar.gz` file is very large (close to 200 GB) and will take a long time to download. +The command below looks complex because it needs to get around the problem of using wildcards with wget and curl. ```bash -wget -q -O $UNIPROT/reference_proteomes.tar.gz \ - ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/$(curl \ - -vs ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/ 2>&1 | \ - awk '/tar.gz/ {print $9}') -tar xf reference_proteomes.tar.gz +EBI_URL=ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/ +mkdir extract +curl -L $EBI_URL/$(curl -vs $EBI_URL 2>&1 | awk '/tar.gz/ {print $9}') | \ + tar -xzf - -C extract # Create a single fasta file with all the fasta files from each subdirectory: -touch reference_proteomes.fasta.gz -find . -mindepth 2 | grep "fasta.gz" | grep -v 'DNA' | grep -v 'additional' | xargs cat >> reference_proteomes.fasta.gz +find extract -type f -name '*.fasta.gz' ! -name '*_DNA.fasta.gz' ! -name '*_additional.fasta.gz' -exec cat '{}' '+' > reference_proteomes.fasta.gz # create the accession-to-taxid map for all reference proteome sequences: -printf "accession\taccession.version\ttaxid\tgi\n" > reference_proteomes.taxid_map -zcat */*/*.idmapping.gz | grep "NCBI_TaxID" | awk '{print $1 "\t" $1 "\t" $3 "\t" 0}' >> reference_proteomes.taxid_map +find extract -type f -name '*.idmapping.gz' -exec zcat {} + | \ + awk 'BEGIN {OFS="\t"; print "accession", "accession.version", "taxid", "gi"} $2=="NCBI_TaxID" {print $1, $1, $3, 0}' > reference_proteomes.taxid_map # create the taxon aware diamond blast database diamond makedb -p 16 --in reference_proteomes.fasta.gz --taxonmap reference_proteomes.taxid_map --taxonnodes $TAXDUMP/nodes.dmp --taxonnames $TAXDUMP/names.dmp -d reference_proteomes.dmnd + +# clean up +mv extract/{README,STATS} . +rm -r extract +rm -r $TAXDUMP + +# Compress final database and cleanup +cd .. +tar -cvzf $UNIPROT_TAR $UNIPROT +rm -r $UNIPROT ``` -### 4. BUSCO databases +#### 4. BUSCO databases Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 BUSCO=/path/to/databases/busco_${DATE} +BUSCO_TAR=/path/to/databases/busco_${DATE}.tar.gz mkdir -p $BUSCO cd $BUSCO ``` @@ -162,6 +200,13 @@ If you have [GNU parallel](https://www.gnu.org/software/parallel/) installed, yo find v5/data -name "*.tar.gz" | parallel "cd {//}; tar -xzf {/}" ``` +Finally re-compress and cleanup the files: + +```bash +tar -cvzf $BUSCO_TAR $BUSCO +rm -r $BUSCO +``` + ## Changes from Snakemake to Nextflow ### Commands @@ -227,8 +272,8 @@ List of tools for any given dataset can be fetched from the API, for example htt | Dependency | Snakemake | Nextflow | | ----------------- | --------- | -------- | -| blobtoolkit | 4.3.2 | 4.3.9 | -| blast | 2.12.0 | 2.15.0 | +| blobtoolkit | 4.3.2 | 4.4.0 | +| blast | 2.12.0 | 2.14.1 | | blobtk | 0.5.0 | 0.5.1 | | busco | 5.3.2 | 5.5.0 | | diamond | 2.0.15 | 2.1.8 | @@ -269,9 +314,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: @@ -308,15 +352,11 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -:::tip If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -::: ## Core Nextflow arguments -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). ### `-profile` @@ -324,9 +364,7 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4e..13cb02df 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -22,7 +22,7 @@ class Utils { // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def required_channels_in_order = ['conda-forge', 'bioconda'] def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order diff --git a/modules.json b/modules.json index 3ce2a831..4e41c5ef 100644 --- a/modules.json +++ b/modules.json @@ -7,100 +7,100 @@ "nf-core": { "blast/blastn": { "branch": "master", - "git_sha": "209e5a3e2753c5e628736a662c877c20f341ee15", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/blast/blastn/blast-blastn.diff" }, - "busco": { + "busco/busco": { "branch": "master", - "git_sha": "e3126f437c336c826f242842fe51769cfce0ec2d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], - "patch": "modules/nf-core/busco/busco.diff" + "patch": "modules/nf-core/busco/busco/busco-busco.diff" }, "cat/cat": { "branch": "master", - "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "diamond/blastp": { "branch": "master", - "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/diamond/blastp/diamond-blastp.diff" }, "diamond/blastx": { "branch": "master", - "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/diamond/blastx/diamond-blastx.diff" }, "fastawindows": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/fastawindows/fastawindows.diff" }, "gunzip": { "branch": "master", - "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "2c2d1cf80866dbd6dd0ea5d61ddd59533a72d41e", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, "pigz/compress": { "branch": "master", - "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "samtools/fasta": { - "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"], - "patch": "modules/nf-core/samtools/fasta/samtools-fasta.diff" - }, "samtools/flagstat": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"] + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "seqtk/subseq": { "branch": "master", - "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, + "untar": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } diff --git a/modules/local/blobtoolkit/chunk.nf b/modules/local/blobtoolkit/chunk.nf index 7dad9182..80deca41 100644 --- a/modules/local/blobtoolkit/chunk.nf +++ b/modules/local/blobtoolkit/chunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_CHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta) , path(fasta) diff --git a/modules/local/blobtoolkit/countbuscos.nf b/modules/local/blobtoolkit/countbuscos.nf index 1b415504..5cc9e37f 100644 --- a/modules/local/blobtoolkit/countbuscos.nf +++ b/modules/local/blobtoolkit/countbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_COUNTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_COUNTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(table, stageAs: 'dir??/*') diff --git a/modules/local/blobtoolkit/createblobdir.nf b/modules/local/blobtoolkit/createblobdir.nf index dfaddb7d..487dfee3 100644 --- a/modules/local/blobtoolkit/createblobdir.nf +++ b/modules/local/blobtoolkit/createblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(window, stageAs: 'windowstats/*') diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index 1e4440cb..c8346211 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_EXTRACTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(fasta) diff --git a/modules/local/blobtoolkit/metadata.nf b/modules/local/blobtoolkit/metadata.nf deleted file mode 100644 index ffae2a8c..00000000 --- a/modules/local/blobtoolkit/metadata.nf +++ /dev/null @@ -1,33 +0,0 @@ -process BLOBTOOLKIT_METADATA { - tag "$meta.id" - label 'process_single' - - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "BLOBTOOLKIT_METADATA module does not support Conda. Please use Docker / Singularity / Podman instead." - } - container "docker.io/genomehubs/blobtoolkit:4.3.9" - - input: - tuple val(meta), path(yaml) - - output: - tuple val(meta), path("*.metadata.yaml"), emit: yaml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - btk pipeline add-summary-to-metadata \\ - --config ${yaml} \\ - --out ${prefix}.metadata.yaml - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - blobtoolkit: \$(btk --version | cut -d' ' -f2 | sed 's/v//') - END_VERSIONS - """ -} diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 9b1a262f..e1736d67 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_SUMMARY { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_SUMMARY module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(blobdir) diff --git a/modules/local/blobtoolkit/unchunk.nf b/modules/local/blobtoolkit/unchunk.nf index 5285b0dc..99060a6b 100644 --- a/modules/local/blobtoolkit/unchunk.nf +++ b/modules/local/blobtoolkit/unchunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UNCHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UNCHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(blast_table) diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index 50167f8b..f6bafae7 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -5,12 +5,14 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(input, stageAs: "input_blobdir") - tuple val(meta1), path(blastx, stageAs: "blastx.txt") - tuple val(meta2), path(blastn, stageAs: "blastn.txt") + tuple val(meta2), path(synonyms_tsv) + tuple val(meta3), path(categories_tsv) + tuple val(meta4), path(blastx, stageAs: "blastx.txt") + tuple val(meta5), path(blastn, stageAs: "blastn.txt") path(taxdump) output: @@ -25,6 +27,9 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { prefix = task.ext.prefix ?: "${meta.id}" def hits_blastx = blastx ? "--hits ${blastx}" : "" def hits_blastn = blastn ? "--hits ${blastn}" : "" + def syn = synonyms_tsv ? "--synonyms ${synonyms_tsv}" : "" + def cat = categories_tsv ? "--text ${categories_tsv}" : "" + def head = (synonyms_tsv || categories_tsv) ? "--text-header" : "" """ # In-place modifications are not great in Nextflow, so work on a copy of ${input} mkdir ${prefix} @@ -34,6 +39,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { --taxrule bestdistorder=buscoregions \\ ${hits_blastx} \\ ${hits_blastn} \\ + ${syn} ${cat} ${head} \\ --threads ${task.cpus} \\ $args \\ ${prefix} diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf index 0c3aaaaf..de664149 100644 --- a/modules/local/blobtoolkit/updatemeta.nf +++ b/modules/local/blobtoolkit/updatemeta.nf @@ -5,17 +5,11 @@ process BLOBTOOLKIT_UPDATEMETA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UPDATEMETA module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(input) - val reads path versions - // The following are passed as "val" because we just want to know the full paths. No staging necessary - val blastp - val blastx - val blastn - val taxdump output: tuple val(meta), path("*.json"), emit: json @@ -27,17 +21,11 @@ process BLOBTOOLKIT_UPDATEMETA { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_path ${it[1]}"}.join(' ') """ update_versions.py \\ ${args} \\ --meta_in ${input}/meta.json \\ --software ${versions} \\ - --blastp ${blastp} \\ - --blastx ${blastx} \\ - --blastn ${blastn} \\ - --taxdump ${taxdump} \\ - $input_reads \\ --meta_out ${prefix}.meta.json cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index d432a8ff..e6445f6b 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -4,7 +4,7 @@ process BLOBTOOLKIT_WINDOWSTATS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), path(tsv) diff --git a/modules/local/generate_config.nf b/modules/local/generate_config.nf index 3aab7ee9..d4200641 100644 --- a/modules/local/generate_config.nf +++ b/modules/local/generate_config.nf @@ -3,19 +3,25 @@ process GENERATE_CONFIG { label 'process_single' conda "conda-forge::requests=2.28.1 conda-forge::pyyaml=6.0" - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.0" input: tuple val(meta), val(fasta) val taxon_query val busco_lin path lineage_tax_ids - tuple val(meta2), path(blastn) + val reads + tuple val(meta2), path(blastp, stageAs: 'blastp/*') + tuple val(meta3), path(blastx, stageAs: 'blastx/*') + tuple val(meta4), path(blastn, stageAs: 'blastn/*') + tuple val(meta5), path(taxdump) output: - tuple val(meta), path("*.yaml"), emit: yaml - tuple val(meta), path("*.csv") , emit: csv - path "versions.yml" , emit: versions + tuple val(meta), path("*.yaml") , emit: yaml + tuple val(meta), path("*.csv") , emit: csv + tuple val(meta), path("*.synonyms.tsv") , emit: synonyms_tsv, optional: true + tuple val(meta), path("*.categories.tsv"), emit: categories_tsv, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,6 +31,7 @@ process GENERATE_CONFIG { def prefix = task.ext.prefix ?: "${meta.id}" def busco_param = busco_lin ? "--busco '${busco_lin}'" : "" def accession_params = params.accession ? "--accession ${params.accession}" : "" + def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_layout ${it[0].layout} --read_path ${it[1]}"}.join(' ') """ generate_config.py \\ --fasta $fasta \\ @@ -32,9 +39,13 @@ process GENERATE_CONFIG { --lineage_tax_ids $lineage_tax_ids \\ $busco_param \\ $accession_params \\ - --blastn $blastn \\ - --yml_out ${prefix}.yaml \\ - --csv_out ${prefix}.csv + --nt $blastn \\ + $input_reads \\ + --blastp ${blastp} \\ + --blastx ${blastx} \\ + --blastn ${blastn} \\ + --taxdump ${taxdump} \\ + --output_prefix ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/blast/blastn/blast-blastn.diff b/modules/nf-core/blast/blastn/blast-blastn.diff index e01e07cb..d4a5a366 100644 --- a/modules/nf-core/blast/blastn/blast-blastn.diff +++ b/modules/nf-core/blast/blastn/blast-blastn.diff @@ -25,7 +25,22 @@ Changes in module 'nf-core/blast/blastn' """ if [ "${is_compressed}" == "true" ]; then -@@ -39,8 +41,15 @@ +@@ -35,12 +37,30 @@ + fi + echo Using \$DB + ++ if [ -n "${taxid}" ]; then ++ # Symlink the tax* files (needed for -taxid options to work) ++ for file in taxdb.btd taxdb.bti taxonomy4blast.sqlite3; do ++ if [ ! -f ${db}/\$file ]; then ++ echo "Error: \$file not found in ${db}" ++ exit 1 ++ fi ++ ln -s ${db}/\$file . ++ done ++ fi ++ + blastn \\ -num_threads ${task.cpus} \\ -db \$DB \\ -query ${fasta_name} \\ diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml index cb9b15dd..777e097e 100644 --- a/modules/nf-core/blast/blastn/environment.yml +++ b/modules/nf-core/blast/blastn/environment.yml @@ -1,7 +1,5 @@ -name: blast_blastn channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::blast=2.14.1 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf index d674989a..84756771 100644 --- a/modules/nf-core/blast/blastn/main.nf +++ b/modules/nf-core/blast/blastn/main.nf @@ -3,8 +3,8 @@ process BLAST_BLASTN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0': - 'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta) , path(fasta) @@ -37,6 +37,17 @@ process BLAST_BLASTN { fi echo Using \$DB + if [ -n "${taxid}" ]; then + # Symlink the tax* files (needed for -taxid options to work) + for file in taxdb.btd taxdb.bti taxonomy4blast.sqlite3; do + if [ ! -f ${db}/\$file ]; then + echo "Error: \$file not found in ${db}" + exit 1 + fi + ln -s ${db}/\$file . + done + fi + blastn \\ -num_threads ${task.cpus} \\ -db \$DB \\ diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml index a0d64dd6..0f5e41bb 100644 --- a/modules/nf-core/blast/blastn/meta.yml +++ b/modules/nf-core/blast/blastn/meta.yml @@ -13,39 +13,42 @@ tools: documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs doi: 10.1016/S0022-2836(05)80360-2 licence: ["US-Government-Work"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing queries sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: directory - description: Directory containing the blast database - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - txt: - type: file - description: File containing blastn hits - pattern: "*.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing blastn hits + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test index 02ecfab5..aacc93c5 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { script "../../makeblastdb/main.nf" process { """ - input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } } @@ -29,7 +29,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } @@ -53,7 +53,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap index d1b5f3f2..dd8b7751 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test.snap +++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "versions": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:03.54997013" @@ -10,7 +10,7 @@ "versions_zipped": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:12.925782708" diff --git a/modules/nf-core/busco/busco.diff b/modules/nf-core/busco/busco/busco-busco.diff similarity index 87% rename from modules/nf-core/busco/busco.diff rename to modules/nf-core/busco/busco/busco-busco.diff index 775788fb..1317574a 100644 --- a/modules/nf-core/busco/busco.diff +++ b/modules/nf-core/busco/busco/busco-busco.diff @@ -1,8 +1,8 @@ -Changes in module 'nf-core/busco' ---- modules/nf-core/busco/main.nf -+++ modules/nf-core/busco/main.nf +Changes in module 'nf-core/busco/busco' +--- modules/nf-core/busco/busco/main.nf ++++ modules/nf-core/busco/busco/main.nf @@ -1,6 +1,5 @@ - process BUSCO { + process BUSCO_BUSCO { - tag "$meta.id" - label 'process_medium' + tag "${meta.id}_${lineage}" diff --git a/modules/nf-core/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml similarity index 50% rename from modules/nf-core/busco/environment.yml rename to modules/nf-core/busco/busco/environment.yml index f872d057..5b918b45 100644 --- a/modules/nf-core/busco/environment.yml +++ b/modules/nf-core/busco/busco/environment.yml @@ -1,7 +1,5 @@ -name: busco channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::busco=5.5.0 + - bioconda::busco=5.7.1 diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/busco/main.nf similarity index 80% rename from modules/nf-core/busco/main.nf rename to modules/nf-core/busco/busco/main.nf index 83d8eacd..319f7235 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -1,13 +1,13 @@ -process BUSCO { +process BUSCO_BUSCO { tag "${meta.id}_${lineage}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': - 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" input: - tuple val(meta), path('tmp_input/*') + tuple val(meta), path(fasta, stageAs:'tmp_input/*') val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set @@ -15,13 +15,13 @@ process BUSCO { output: tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table, optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list, optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir, optional: true + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true tuple val(meta), path("*-busco") , emit: busco_dir path "versions.yml" , emit: versions @@ -90,4 +90,17 @@ process BUSCO { busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' + """ + touch ${prefix}-busco.batch_summary.txt + mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml new file mode 100644 index 00000000..7cb6d69c --- /dev/null +++ b/modules/nf-core/busco/busco/meta.yml @@ -0,0 +1,152 @@ +name: busco_busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, + gene set, and transcriptome completeness based on evolutionarily informed expectations + of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + identifier: biotools:busco +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or transcriptome + pattern: "{genome,proteins,transcriptome}" + - - lineage: + type: string + description: The BUSCO lineage to use, or "auto" to automatically select lineage + - - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - - config_file: + type: file + description: Path to BUSCO config file. +output: + - batch_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco.batch_summary.txt": + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" + - full_table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/full_table.tsv": + type: file + description: Full BUSCO results table + pattern: "full_table.tsv" + - missing_busco_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/missing_busco_list.tsv": + type: file + description: List of missing BUSCOs + pattern: "missing_busco_list.tsv" + - single_copy_proteins: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/single_copy_proteins.faa": + type: file + description: Fasta file of single copy proteins (transcriptome mode) + pattern: "single_copy_proteins.faa" + - seq_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/busco_sequences": + type: directory + description: BUSCO sequence directory + pattern: "busco_sequences" + - translated_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/translated_proteins": + type: directory + description: Six frame translations of each transcript made by the transcriptome + mode + pattern: "translated_dir" + - busco_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco": + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" +maintainers: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test new file mode 100644 index 00000000..bb7b49a9 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -0,0 +1,415 @@ +nextflow_process { + + name "Test Process BUSCO_BUSCO" + script "../main.nf" + process "BUSCO_BUSCO" + + tag "modules" + tag "modules_nfcore" + tag "busco" + tag "busco/busco" + + test("test_busco_genome_single_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + input[3] = [] // Download busco lineage + input[4] = [] // No config + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + + test("test_busco_genome_multi_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ] + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1][0]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_txt[0][1][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1][0]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + with(path(process.out.short_summaries_json[0][1][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_metaeuk") { + + config './nextflow.metaeuk.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_augustus") { + + config './nextflow.augustus.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.versions[0] + ).match() + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Augustus did not recognize any genes') + + } + + assert process.out.short_summaries_json == [] + assert process.out.short_summaries_txt == [] + assert process.out.missing_busco_list == [] + assert process.out.full_table == [] + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_protein") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = 'proteins' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_transcriptome") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = 'transcriptome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.translated_dir[0][1], + process.out.single_copy_proteins[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + } + + } + + test("minimal-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap new file mode 100644 index 00000000..1b6411bc --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "minimal-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "9": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "batch_summary": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "busco_dir": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "full_table": [ + + ], + "missing_busco_list": [ + + ], + "seq_dir": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "short_summaries_json": [ + + ], + "short_summaries_txt": [ + + ], + "single_copy_proteins": [ + + ], + "translated_dir": [ + + ], + "versions": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:28:04.451297" + }, + "test_busco_eukaryote_augustus": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:26:36.974986" + }, + "test_busco_genome_single_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:22:45.07816" + }, + "test_busco_genome_multi_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa", + [ + "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1" + ], + [ + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a" + ], + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:23:50.255602" + }, + "test_busco_eukaryote_metaeuk": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", + "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7", + "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:25:38.159041" + }, + "test_busco_transcriptome": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64", + "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d", + "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4", + [ + "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0", + "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195", + "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc", + "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a", + "143460at2.faa:md5,d887431fd988a5556a523440f02d9594", + "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5", + "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67", + "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248", + "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57", + "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5", + "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", + "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08", + "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5", + "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829", + "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc", + "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999", + "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845", + "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793", + "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d", + "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69", + "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", + "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c" + ], + "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:53.992893" + }, + "test_busco_protein": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91", + "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8", + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:12.724862" + } +} \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config new file mode 100644 index 00000000..84daa69d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.augustus.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --augustus' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config new file mode 100644 index 00000000..1ec3fec0 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config new file mode 100644 index 00000000..c1418445 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --metaeuk' + } +} diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml new file mode 100644 index 00000000..75177f5d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/old_test.yml @@ -0,0 +1,624 @@ +- name: busco test_busco_genome_single_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "lineage_dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_genome_multi_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: 8f047bdb33264d22a83920bc2c63f29a + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa + md5sum: e56fd59c38248dc21ac94355dca98121 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna + md5sum: b365f84bf99c68357952e0b98ed7ce42 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log + md5sum: e5f14d7925ba14a0f9850542f3739894 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log + md5sum: d41971bfc1b621d4ffd2633bc47017ea + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: c9651b88b10871abc260ee655898e828 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_augustus + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_protein + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/versions.yml + +- name: busco test_busco_transcriptome + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 46118ecf60d1b87d22b96d80f4f03632 + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint + contains: + - "Tool: makeblastdb" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb + md5sum: 3788c017fe5e6f0f58224e9cdd21822b + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr + md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not + md5sum: 0c340e376c7e85d19f82ec1a833e6a6e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq + md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf + md5sum: de1250813f0c7affc6d12dac9d0fb6bb + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto + md5sum: ff74bd41f9cc9b011c63a32c4f7693bf + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log + contains: + - "Building a new DB" + - "Adding sequences from FASTA" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint + contains: + - "Tool: tblastn" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv + md5sum: cc30eed321944af293452bdbcfc24292 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp + md5sum: 73e9c65fc83fedc58f57f09b08f08238 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp + md5sum: 7fa4cc7955ec0cc36330a221c579b975 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp + md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp + md5sum: 3f8e034686cd240c2330650d791bcae2 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp + md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp + md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp + md5sum: 2288edf7fa4f88f51b4cf4d94086f77e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp + md5sum: 029906abbad6d87fc57830dd548cac24 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp + md5sum: 4937f3b348774a31b1160a00297c29cc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp + md5sum: afcb20ba4c466479d6b91c8c62251e1f + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp + md5sum: 2e1e823ce017345bd998191a39fa9924 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp + md5sum: 08c2d82c34ecffbe1c638b410349412e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp + md5sum: cd9b63cf93524284781535c888313764 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp + md5sum: d1929b742b24ebe379bf4801ca882dca + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp + md5sum: 69215765b010c05336538cb322c900b3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp + md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp + md5sum: 13625eae14e860a96ce17cd4e37e9d01 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp + md5sum: e14b2484649b0dbc8926815c207b806d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp + md5sum: 6902c93691df00e690faea914c71839e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp + md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv + contains: + - "TBLASTN" + - "BLAST processed" + - "queries" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv + md5sum: 24df25199e13c88bd892fc3e7b541ca0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv + md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa + md5sum: e04b9465733577ae6e4bccb7aa01e720 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa + md5sum: 7333c39a20258f20c7019ea0cd83157c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa + md5sum: ebb481e77a824685fbe04d8a2f3a0d7d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa + md5sum: 34621c7d499034e8f8e6b92fd4020a93 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa + md5sum: aa89ca381c1c70c9c4e1380351ca7c2a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa + md5sum: f2e91d78b8dd3722840378789f29e8c8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa + md5sum: 73c25aef5c9cba7f4151804941b146ea + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa + md5sum: cda556018d1f84ebe517e89f6fc107d0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa + md5sum: a9096c9fb8b25c78a72871ab0463acdc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa + md5sum: e463d25ce186c0cebfd749474f3a4c64 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa + md5sum: f2cfd241590c6d8377286d6135480937 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa + md5sum: 586569546fb9861502468e3d9ba2775c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa + md5sum: 24c658bee14ad84b062d81ad96642eb8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa + md5sum: 0b8e26ddf5149bbd8805be7af125208d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa + md5sum: 159320712ee01fb2ccb31a25df44eead + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa + md5sum: 812629c0b06ac3d18661c2ca78de0c08 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa + md5sum: f7ff4e1591342d30b77392a2e84b57d9 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa + md5sum: 7b34a24fc49c540d46fcf96ff5129564 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa + md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa + md5sum: 613af7a3fea30ea2bece66f603b9284a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa + md5sum: a7cd1b13c9ef91c7ef4e31614166f197 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa + md5sum: fe313ffd5efdb0fed887a04fba352552 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa + md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 + - path: output/busco/versions.yml diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml new file mode 100644 index 00000000..7c4d2835 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/tags.yml @@ -0,0 +1,2 @@ +busco/busco: + - "modules/nf-core/busco/busco/**" diff --git a/modules/nf-core/busco/meta.yml b/modules/nf-core/busco/meta.yml deleted file mode 100644 index 90b30d4d..00000000 --- a/modules/nf-core/busco/meta.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: busco -description: Benchmarking Universal Single Copy Orthologs -keywords: - - quality control - - genome - - transcriptome - - proteome -tools: - - busco: - description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. - homepage: https://busco.ezlab.org/ - documentation: https://busco.ezlab.org/busco_userguide.html - tool_dev_url: https://gitlab.com/ezlab/busco - doi: "10.1007/978-1-4939-9173-0_14" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Nucleic or amino acid sequence file in FASTA format. - pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - - mode: - type: string - description: The mode to run Busco in. One of genome, proteins, or transcriptome - pattern: "{genome,proteins,transcriptome}" - - lineage: - type: string - description: The BUSCO lineage to use, or "auto" to automatically select lineage - - busco_lineages_path: - type: directory - description: Path to local BUSCO lineages directory. - - config_file: - type: file - description: Path to BUSCO config file. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - batch_summary: - type: file - description: Summary of all sequence files analyzed - pattern: "*-busco.batch_summary.txt" - - short_summaries_txt: - type: file - description: Short Busco summary in plain text format - pattern: "short_summary.*.txt" - - short_summaries_json: - type: file - description: Short Busco summary in JSON format - pattern: "short_summary.*.json" - - busco_dir: - type: directory - description: BUSCO lineage specific output - pattern: "*-busco" - - full_table: - type: file - description: Full BUSCO results table - pattern: "full_table.tsv" - - missing_busco_list: - type: file - description: List of missing BUSCOs - pattern: "missing_busco_list.tsv" - - single_copy_proteins: - type: file - description: Fasta file of single copy proteins (transcriptome mode) - pattern: "single_copy_proteins.faa" - - seq_dir: - type: directory - description: BUSCO sequence directory - pattern: "busco_sequences" - - translated_proteins: - type: directory - description: Six frame translations of each transcript made by the transcriptome mode - pattern: "translated_proteins" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" -maintainers: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml index 17a04ef2..9b01c865 100644 --- a/modules/nf-core/cat/cat/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -1,7 +1,5 @@ -name: cat_cat channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index adbdbd7b..2862c64c 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -76,4 +76,3 @@ def getFileSuffix(filename) { def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) } - diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 00a8db0b..81778a06 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -9,25 +9,32 @@ tools: description: Just concatenation documentation: https://man7.org/linux/man-pages/man1/cat.1.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index fcee2d19..9cb16178 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -29,7 +29,8 @@ nextflow_process { then { assertAll( { assert !process.success }, - { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,8 +84,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -142,8 +147,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -170,8 +179,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 423571ba..b7623ee6 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,10 +1,4 @@ { - "test_cat_unzipped_zipped_size": { - "content": [ - 375 - ], - "timestamp": "2023-10-16T14:33:08.049445686" - }, "test_cat_unzipped_unzipped": { "content": [ { @@ -34,6 +28,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:18.500464399" }, "test_cat_zipped_unzipped": { @@ -65,9 +63,13 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped_lines": { + "test_cat_zipped_zipped": { "content": [ [ "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", @@ -76,11 +78,31 @@ "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + ] ], - "timestamp": "2023-10-16T14:32:33.629048645" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" }, - "test_cat_unzipped_zipped_lines": { + "test_cat_one_file_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -89,11 +111,19 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:08.038830506" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" }, - "test_cat_one_file_unzipped_zipped_lines": { + "test_cat_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -102,20 +132,16 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:21.39642399" - }, - "test_cat_zipped_zipped_size": { - "content": [ - 78 - ], - "timestamp": "2023-10-16T14:32:33.641869244" - }, - "test_cat_one_file_unzipped_zipped_size": { - "content": [ - 374 - ], - "timestamp": "2023-10-16T14:33:21.4094373" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" } } \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index b48ced26..9d79af93 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -1,7 +1,5 @@ -name: custom_dumpsoftwareversions channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 5f15a5fd..dc1e412f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,34 +1,40 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template +description: Custom module used to dump software versions within the nf-core pipeline + template keywords: - custom - dump - version tools: - custom: - description: Custom module used to dump software versions within the nf-core pipeline template + description: Custom module used to dump software versions within the nf-core pipeline + template homepage: https://github.com/nf-core/tools documentation: https://github.com/nf-core/tools licence: ["MIT"] + identifier: "" input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" + - - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" output: - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" + - software_versions.yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" + - software_versions_mqc.yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index da033408..b83b32c4 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -3,11 +3,11 @@ """Provide functions to merge multiple versions.yml files.""" - -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/diamond/blastp/environment.yml b/modules/nf-core/diamond/blastp/environment.yml index 922ea7ed..950c3c5c 100644 --- a/modules/nf-core/diamond/blastp/environment.yml +++ b/modules/nf-core/diamond/blastp/environment.yml @@ -1,7 +1,5 @@ -name: diamond_blastp channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/blastp/meta.yml b/modules/nf-core/diamond/blastp/meta.yml index bab6801e..fbddfbd0 100644 --- a/modules/nf-core/diamond/blastp/meta.yml +++ b/modules/nf-core/diamond/blastp/meta.yml @@ -13,77 +13,116 @@ tools: tool_dev_url: https://github.com/bbuchfink/diamond doi: "10.1038/s41592-021-01101-x" licence: ["GPL v3.0"] + identifier: biotools:diamond input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing query sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: file - description: File of the indexed DIAMOND database - pattern: "*.dmnd" - - out_ext: - type: string - description: | - Specify the type of output file to be generated. `blast` corresponds to - BLAST pairwise format. `xml` corresponds to BLAST xml format. - `txt` corresponds to to BLAST tabular format. `tsv` corresponds to - taxonomic classification format. - pattern: "blast|xml|txt|daa|sam|tsv|paf" - - blast_columns: - type: string - description: | - Optional space separated list of DIAMOND tabular BLAST output keywords - used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: - qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `blast` corresponds to + BLAST pairwise format. `xml` corresponds to BLAST xml format. + `txt` corresponds to to BLAST tabular format. `tsv` corresponds to + taxonomic classification format. + pattern: "blast|xml|txt|daa|sam|tsv|paf" + - - blast_columns: + type: string + description: | + Optional space separated list of DIAMOND tabular BLAST output keywords + used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: + qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - blast: - type: file - description: File containing blastp hits - pattern: "*.{blast}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.blast": + type: file + description: File containing blastp hits + pattern: "*.{blast}" - xml: - type: file - description: File containing blastp hits - pattern: "*.{xml}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.xml": + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: - type: file - description: File containing hits in tabular BLAST format. - pattern: "*.{txt}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" - daa: - type: file - description: File containing hits DAA format - pattern: "*.{daa}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.daa": + type: file + description: File containing hits DAA format + pattern: "*.{daa}" - sam: - type: file - description: File containing aligned reads in SAM format - pattern: "*.{sam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" - tsv: - type: file - description: Tab separated file containing taxonomic classification of hits - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" - paf: - type: file - description: File containing aligned reads in pairwise mapping format format - pattern: "*.{paf}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@spficklin" - "@jfy133" diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test index 672bf050..f21e926d 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "diamond" + tag "diamond/makedb" tag "diamond/blastp" setup { @@ -13,7 +14,7 @@ nextflow_process { script "../../makedb/main.nf" process { """ - input[0] = [ [id:'test2'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[0] = [ [id:'test2'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] input[1] = [] input[2] = [] input[3] = [] @@ -30,7 +31,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'txt' input[3] = 'qseqid qlen' @@ -41,8 +42,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("txt") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + } ) } @@ -56,7 +60,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta_gz'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'txt' input[3] = 'qseqid qlen' @@ -67,8 +71,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("gz_txt") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match("gz_txt") + } ) } @@ -82,7 +89,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'daa' input[3] = [] @@ -94,7 +101,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.daa }, - { assert process.out.versions } + { assert snapshot(process.out.versions).match() } ) } diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap index 83575bc1..e323c8b8 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "txt": { + "Should search for protein hits against a DIAMOND db and return a tab separated output file of hits": { "content": [ [ [ @@ -8,9 +8,16 @@ }, "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" ] + ], + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" ] ], - "timestamp": "2023-11-07T10:27:02.453987512" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:23.906848" }, "gz_txt": { "content": [ @@ -21,8 +28,27 @@ }, "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" ] + ], + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:29.865487" + }, + "Should search for protein hits against a DIAMOND db and return a daa format file of hits": { + "content": [ + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" ] ], - "timestamp": "2023-11-07T09:41:13.934994026" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:35.362027" } } \ No newline at end of file diff --git a/modules/nf-core/diamond/blastx/environment.yml b/modules/nf-core/diamond/blastx/environment.yml index 70d6857d..950c3c5c 100644 --- a/modules/nf-core/diamond/blastx/environment.yml +++ b/modules/nf-core/diamond/blastx/environment.yml @@ -1,7 +1,5 @@ -name: diamond_blastx channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/blastx/meta.yml b/modules/nf-core/diamond/blastx/meta.yml index 17106548..a5c05875 100644 --- a/modules/nf-core/diamond/blastx/meta.yml +++ b/modules/nf-core/diamond/blastx/meta.yml @@ -13,81 +13,126 @@ tools: tool_dev_url: https://github.com/bbuchfink/diamond doi: "10.1038/s41592-021-01101-x" licence: ["GPL v3.0"] + identifier: biotools:diamond input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing query sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: file - description: File of the indexed DIAMOND database - pattern: "*.dmnd" - - out_ext: - type: string - description: | - Specify the type of output file to be generated. `blast` corresponds to - BLAST pairwise format. `xml` corresponds to BLAST xml format. - `txt` corresponds to to BLAST tabular format. `tsv` corresponds to - taxonomic classification format. - pattern: "blast|xml|txt|daa|sam|tsv|paf" - - blast_columns: - type: string - description: | - Optional space separated list of DIAMOND tabular BLAST output keywords - used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: - qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `blast` corresponds to + BLAST pairwise format. `xml` corresponds to BLAST xml format. + `txt` corresponds to to BLAST tabular format. `tsv` corresponds to + taxonomic classification format. + pattern: "blast|xml|txt|daa|sam|tsv|paf" + - - blast_columns: + type: string + description: | + Optional space separated list of DIAMOND tabular BLAST output keywords + used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: + qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - blast: - type: file - description: File containing blastp hits - pattern: "*.{blast}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.blast": + type: file + description: File containing blastp hits + pattern: "*.{blast}" - xml: - type: file - description: File containing blastp hits - pattern: "*.{xml}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.xml": + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: - type: file - description: File containing hits in tabular BLAST format. - pattern: "*.{txt}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" - daa: - type: file - description: File containing hits DAA format - pattern: "*.{daa}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.daa": + type: file + description: File containing hits DAA format + pattern: "*.{daa}" - sam: - type: file - description: File containing aligned reads in SAM format - pattern: "*.{sam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" - tsv: - type: file - description: Tab separated file containing taxonomic classification of hits - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" - paf: - type: file - description: File containing aligned reads in pairwise mapping format format - pattern: "*.{paf}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - log: - type: file - description: Log file containing stdout information - pattern: "*.{log}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing stdout information + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@spficklin" - "@jfy133" diff --git a/modules/nf-core/diamond/blastx/tests/main.nf.test b/modules/nf-core/diamond/blastx/tests/main.nf.test index a367f883..b8757403 100644 --- a/modules/nf-core/diamond/blastx/tests/main.nf.test +++ b/modules/nf-core/diamond/blastx/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "diamond" + tag "diamond/makedb" tag "diamond/blastx" setup { @@ -13,7 +14,7 @@ nextflow_process { script "../../makedb/main.nf" process { """ - input[0] = [ [id:'test2'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[0] = [ [id:'test2'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] input[1] = [] input[2] = [] input[3] = [] @@ -30,7 +31,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'tfdfdt' // Nonsense file extension to check default case. input[3] = 'qseqid qlen' @@ -41,9 +42,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("txt") }, { assert path(process.out.log.get(0).get(1)).readLines().contains("11 queries aligned.") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + } ) } @@ -57,7 +61,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'daa' input[3] = [] @@ -70,7 +74,7 @@ nextflow_process { { assert process.success }, { assert process.out.daa }, { assert path(process.out.log.get(0).get(1)).readLines().contains("11 queries aligned.") }, - { assert process.out.versions } + { assert snapshot(process.out.versions).match() } ) } diff --git a/modules/nf-core/diamond/blastx/tests/main.nf.test.snap b/modules/nf-core/diamond/blastx/tests/main.nf.test.snap index 27fb0a31..8a128c99 100644 --- a/modules/nf-core/diamond/blastx/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastx/tests/main.nf.test.snap @@ -1,5 +1,17 @@ { - "txt": { + "Should search for transcriptome hits against a DIAMOND db and return the daa format output file of hits": { + "content": [ + [ + "versions.yml:md5,733d944fb173eaf1d1cdf280cd1b3b9c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:49:26.843747" + }, + "Should search for transcriptome hits against a DIAMOND db and return the default tab separated output file of hits": { "content": [ [ [ @@ -8,8 +20,15 @@ }, "test.txt:md5,33dc682dabfa44c7089abbc8fe8b84e4" ] + ], + [ + "versions.yml:md5,733d944fb173eaf1d1cdf280cd1b3b9c" ] ], - "timestamp": "2023-11-07T09:42:36.646074348" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:49:20.769093" } } \ No newline at end of file diff --git a/modules/nf-core/fastawindows/environment.yml b/modules/nf-core/fastawindows/environment.yml index ce557158..6775fb2e 100644 --- a/modules/nf-core/fastawindows/environment.yml +++ b/modules/nf-core/fastawindows/environment.yml @@ -1,7 +1,5 @@ -name: fastawindows channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fasta_windows=0.2.4 diff --git a/modules/nf-core/fastawindows/main.nf b/modules/nf-core/fastawindows/main.nf index 40b28436..dcbf696a 100644 --- a/modules/nf-core/fastawindows/main.nf +++ b/modules/nf-core/fastawindows/main.nf @@ -31,6 +31,25 @@ process FASTAWINDOWS { --fasta $fasta \\ --output ${prefix} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fasta_windows: \$(fasta_windows --version | cut -d' ' -f3) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p fw_out + + touch fw_out/${prefix}_freq_windows.tsv + touch fw_out/${prefix}_mononuc_windows.tsv + touch fw_out/${prefix}_dinuc_windows.tsv + touch fw_out/${prefix}_trinuc_windows.tsv + touch fw_out/${prefix}_tetranuc_windows.tsv + + cat <<-END_VERSIONS > versions.yml "${task.process}": fasta_windows: \$(fasta_windows --version | cut -d' ' -f3) diff --git a/modules/nf-core/fastawindows/meta.yml b/modules/nf-core/fastawindows/meta.yml index 494cc1b6..f8ee0043 100644 --- a/modules/nf-core/fastawindows/meta.yml +++ b/modules/nf-core/fastawindows/meta.yml @@ -7,49 +7,78 @@ keywords: - bed tools: - "fastawindows": - description: "fasta_windows is a tool written for Darwin Tree of Life chromosomal level genome assemblies. The executable takes a fasta formatted file and calculates some statistics of interest in windows" + description: "fasta_windows is a tool written for Darwin Tree of Life chromosomal + level genome assemblies. The executable takes a fasta formatted file and calculates + some statistics of interest in windows" homepage: "https://github.com/tolkit/fasta_windows" - licence: "['MIT']" + licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta,fna}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - freq: - type: file - description: TSV file with frequencies and statistics - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_freq_windows.tsv: + type: file + description: TSV file with frequencies and statistics + pattern: "*.{tsv}" - mononuc: - type: file - description: TSV file with mononucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_mononuc_windows.tsv: + type: file + description: TSV file with mononucleotide counts + pattern: "*.{tsv}" - dinuc: - type: file - description: TSV file with dinucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_dinuc_windows.tsv: + type: file + description: TSV file with dinucleotide counts + pattern: "*.{tsv}" - trinuc: - type: file - description: TSV file with trinucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_trinuc_windows.tsv: + type: file + description: TSV file with trinucleotide counts + pattern: "*.{tsv}" - tetranuc: - type: file - description: TSV file with tetranucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_tetranuc_windows.tsv: + type: file + description: TSV file with tetranucleotide counts + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@muffato" maintainers: diff --git a/modules/nf-core/fastawindows/tests/main.nf.test b/modules/nf-core/fastawindows/tests/main.nf.test new file mode 100644 index 00000000..f4bea49f --- /dev/null +++ b/modules/nf-core/fastawindows/tests/main.nf.test @@ -0,0 +1,57 @@ + +nextflow_process { + + name "Test Process FASTAWINDOWS" + script "../main.nf" + process "FASTAWINDOWS" + + tag "modules" + tag "modules_nfcore" + tag "fastawindows" + + test("test-fastawindows") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-fastawindows-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/fastawindows/tests/main.nf.test.snap b/modules/nf-core/fastawindows/tests/main.nf.test.snap new file mode 100644 index 00000000..f3474d3e --- /dev/null +++ b/modules/nf-core/fastawindows/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "test-fastawindows-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ], + "dinuc": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mononuc": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tetranuc": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trinuc": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T09:58:57.148076" + }, + "test-fastawindows": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,237d50ac5ec2bef3142020d569fa5765" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,a1b4437d0c71d9cfd676de6bda2633f0" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,696a9f2a4b2114dfbd6b414694f56a11" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,dfb05b758f0474e937e2d6ba6fe46dae" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,e621537175ee8019360f8b6e8f4330b7" + ] + ], + "5": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ], + "dinuc": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,696a9f2a4b2114dfbd6b414694f56a11" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,237d50ac5ec2bef3142020d569fa5765" + ] + ], + "mononuc": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,a1b4437d0c71d9cfd676de6bda2633f0" + ] + ], + "tetranuc": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,e621537175ee8019360f8b6e8f4330b7" + ] + ], + "trinuc": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,dfb05b758f0474e937e2d6ba6fe46dae" + ] + ], + "versions": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:22:58.322943" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml index 25910b34..c7794856 100644 --- a/modules/nf-core/gunzip/environment.yml +++ b/modules/nf-core/gunzip/environment.yml @@ -1,7 +1,7 @@ -name: gunzip channels: - conda-forge - bioconda - - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 468a6f28..5e67e3b9 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,8 +18,11 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ # Not calling gunzip itself because it creates files # with the original group ownership rather than the @@ -37,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 231034f2..9066c035 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -10,25 +10,32 @@ tools: gzip is a file format and a software application used for file compression and decompression. documentation: https://www.gnu.org/software/gzip/manual/gzip.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" @@ -37,3 +44,4 @@ maintainers: - "@joseespinosa" - "@drpatelh" - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index 6406008e..776211ad 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -33,4 +33,89 @@ nextflow_process { } + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + } diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap index 720fd9ff..069967e7 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -1,4 +1,70 @@ { + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, "Should run without failures": { "content": [ { @@ -26,6 +92,43 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" } } \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index cf6e775f..dc6476b7 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -1,9 +1,8 @@ -name: minimap2_align channels: - conda-forge - bioconda - - defaults + dependencies: - - bioconda::minimap2=2.24 - - bioconda::samtools=1.18 - - bioconda::htslib=1.18 + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 7030554d..b0dc23bf 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -4,20 +4,22 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" input: tuple val(meta), path(reads) tuple val(meta2), path(reference) val bam_format + val bam_index_extension val cigar_paf_format val cigar_bam output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,16 +27,25 @@ process MINIMAP2_ALIGN { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - ${reference ?: reads} \\ - $reads \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output @@ -43,14 +54,20 @@ process MINIMAP2_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ touch $output_file + ${bam_index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 408522d5..a4cfc891 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,62 +14,86 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test_ref'] - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" maintainers: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 479818b3..9dd291e7 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -4,7 +4,7 @@ Changes in module 'nf-core/minimap2/align' @@ -1,6 +1,5 @@ process MINIMAP2_ALIGN { tag "$meta.id" -- label 'process_medium' +- label 'process_high' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 4d77e0d9..4072c171 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -9,22 +9,23 @@ nextflow_process { tag "minimap2" tag "minimap2/align" - test("sarscov2 - fastq, fasta, true, false, false") { + test("sarscov2 - fastq, fasta, true, [], false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -33,7 +34,43 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -49,17 +86,18 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -68,7 +106,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -83,15 +122,16 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), ] input[1] = [ [ id:'test_ref' ], // meta map [] ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -100,7 +140,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -108,24 +149,57 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, true, false, false - stub") { + test("sarscov2 - bam, fasta, true, [], false, false") { - options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = 'bai' input[4] = false + input[5] = false """ } } @@ -134,7 +208,9 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -142,7 +218,36 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, false, false, false - stub") { + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { options "-stub" @@ -151,15 +256,80 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = false - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -167,13 +337,105 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.paf[0][1]).name, - process.out.versions - ).match() } + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index ec99d13e..12264a85 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -1,67 +1,476 @@ { - "sarscov2 - fastq, fasta, true, false, false": { + "sarscov2 - bam, fasta, true, 'bai', false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:06.01315354" + "timestamp": "2024-07-23T11:18:18.964586894" }, - "sarscov2 - fastq, fasta, true, false, false - stub": { + "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:24.487175659" + "timestamp": "2024-07-23T11:17:48.667488325" }, - "sarscov2 - fastq, fasta, false, false, false - stub": { + "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ - "test.paf", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2024-03-01T11:06:54.090105" + "timestamp": "2024-07-23T11:18:02.517416733" }, - "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "sarscov2 - bam, fasta, true, [], false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:12.50816279" + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" }, "sarscov2 - fastq, [], true, false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:18.414974788" + "timestamp": "2024-07-23T11:18:34.246998277" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..6f5b867b 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..cc0643e1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,16 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -44,7 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..b16c1879 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,40 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..33316a7d 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..2fcbb5ff 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-10-02T17:52:09.185842" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml index 7551d187..5016d226 100644 --- a/modules/nf-core/pigz/compress/environment.yml +++ b/modules/nf-core/pigz/compress/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "pigz_compress" channels: - conda-forge - bioconda - - defaults dependencies: - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml index 42efd735..0966e651 100644 --- a/modules/nf-core/pigz/compress/meta.yml +++ b/modules/nf-core/pigz/compress/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pigz_compress" description: Compresses files with pigz. @@ -12,35 +11,33 @@ tools: homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - raw_file: - type: file - description: File to be compressed - pattern: "*.*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - archive: - type: file - description: The compressed file - pattern: "*.gz" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $archive: + type: file + description: The compressed file + pattern: "*.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@leoisl" maintainers: diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test index 248d40fb..b3cb25e3 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -14,7 +14,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -34,7 +34,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -42,7 +42,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.archive[0][1]).name).match() } + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap index 6e50456f..4d8df9f1 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test.snap +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -26,12 +26,23 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-12-11T22:39:53.350546" }, "sarscov2 - genome - fasta - stub": { "content": [ - "genome.fasta.gz" + "genome.fasta.gz", + [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] ], - "timestamp": "2023-12-11T22:52:24.309192" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:18:32.339508" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/fasta/environment.yml b/modules/nf-core/samtools/fasta/environment.yml deleted file mode 100644 index 14585013..00000000 --- a/modules/nf-core/samtools/fasta/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: samtools_fasta -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf deleted file mode 100644 index 9aa03430..00000000 --- a/modules/nf-core/samtools/fasta/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process SAMTOOLS_FASTA { - tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" - - input: - tuple val(meta), path(input) - val(interleave) - - output: - tuple val(meta), path("*_{1,2}.fasta.gz") , optional:true, emit: fasta - tuple val(meta), path("*_interleaved.fasta.gz"), optional:true, emit: interleaved - tuple val(meta), path("*_singleton.fasta.gz") , optional:true, emit: singleton - tuple val(meta), path("*_other.fasta.gz") , optional:true, emit: other - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def output = ( interleave && ! meta.single_end ) ? "| gzip > ${prefix}_interleaved.fasta.gz" : - meta.single_end ? "-1 ${prefix}_1.fasta.gz -s ${prefix}_singleton.fasta.gz" : - "-1 ${prefix}_1.fasta.gz -2 ${prefix}_2.fasta.gz -s ${prefix}_singleton.fasta.gz" - """ - samtools \\ - fasta \\ - $args \\ - --threads ${task.cpus-1} \\ - $input \\ - $output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/fasta/meta.yml b/modules/nf-core/samtools/fasta/meta.yml deleted file mode 100644 index eae26f01..00000000 --- a/modules/nf-core/samtools/fasta/meta.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: "samtools_fasta" -description: Converts a SAM/BAM/CRAM file to FASTA -keywords: - - bam - - sam - - cram - - fasta -tools: - - "samtools": - description: "Tools for dealing with SAM, BAM and CRAM files" - homepage: "http://www.htslib.org" - documentation: "https://www.htslib.org/doc/samtools-fasta.html" - tool_dev_url: "https://github.com/samtools/samtools" - doi: "10.1093/bioinformatics/btp352" - licence: ["MIT"] -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - interleave: - type: boolean - description: Set true for interleaved fasta files -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fasta: - type: file - description: Compressed FASTA file(s) with reads with either the READ1 or READ2 flag set in separate files. - pattern: "*_{1,2}.fasta.gz" - - interleaved: - type: file - description: Compressed FASTA file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. - pattern: "*_interleaved.fasta.gz" - - singleton: - type: file - description: Compressed FASTA file with singleton reads - pattern: "*_singleton.fasta.gz" - - other: - type: file - description: Compressed FASTA file with reads with either both READ1 and READ2 flags set or unset - pattern: "*_other.fasta.gz" -authors: - - "@priyanka-surana" -maintainers: - - "@priyanka-surana" diff --git a/modules/nf-core/samtools/fasta/samtools-fasta.diff b/modules/nf-core/samtools/fasta/samtools-fasta.diff deleted file mode 100644 index e2374ed9..00000000 --- a/modules/nf-core/samtools/fasta/samtools-fasta.diff +++ /dev/null @@ -1,13 +0,0 @@ -Changes in module 'nf-core/samtools/fasta' ---- modules/nf-core/samtools/fasta/main.nf -+++ modules/nf-core/samtools/fasta/main.nf -@@ -32,7 +32,6 @@ - fasta \\ - $args \\ - --threads ${task.cpus-1} \\ -- -0 ${prefix}_other.fasta.gz \\ - $input \\ - $output - - -************************************************************ diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index bd57cb54..62054fc9 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -1,8 +1,8 @@ -name: samtools_flagstat +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index eb5f5252..c23f3a5c 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) @@ -18,7 +18,6 @@ process SAMTOOLS_FLAGSTAT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 97991358..cdc4c254 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -1,5 +1,6 @@ name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG + type keywords: - stats - mapping @@ -17,34 +18,37 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.flagstat": + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test index 24c3c04b..3b648a37 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -11,9 +11,30 @@ nextflow_process { test("BAM") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { process { """ input[0] = Channel.of([ @@ -28,8 +49,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.flagstat).match("flagstat") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap index a76fc27e..04c3852b 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -1,32 +1,72 @@ { - "flagstat": { + "BAM - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:31:37.783927" + "timestamp": "2024-09-16T08:02:58.866491759" }, - "versions": { + "BAM": { "content": [ - [ - "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:11:44.299617452" + "timestamp": "2024-09-16T08:02:47.383332837" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index a5e50649..62054fc9 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,8 +1,8 @@ -name: samtools_index +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index dc14f98d..31175610 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 01a4ee03..db8df0d5 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -15,38 +15,52 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - csi: - type: file - description: CSI index file - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index bb7756d1..ca34fb5c 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -9,11 +9,7 @@ nextflow_process { tag "samtools/index" test("bai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -27,18 +23,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.bai).match("bai") }, - { assert snapshot(process.out.versions).match("bai_versions") } + { assert snapshot(process.out).match() } ) } } test("crai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -52,20 +43,83 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.crai).match("crai") }, - { assert snapshot(process.out.versions).match("crai_versions") } + { assert snapshot(process.out).match() } ) } } test("csi") { - config "./csi.nextflow.config" when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { process { """ input[0] = Channel.of([ @@ -79,8 +133,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert snapshot(process.out.versions).match("csi_versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 3dc8e7de..72d65e81 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,74 +1,250 @@ { - "crai_versions": { + "csi - stub": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:00.324667957" + "timestamp": "2024-09-16T08:21:25.261127166" }, - "csi_versions": { + "crai - stub": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:07.885103162" + "timestamp": "2024-09-16T08:21:12.653194876" }, - "crai": { + "bai - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:41:38.446424" + "timestamp": "2024-09-16T08:21:01.854932651" }, - "bai": { + "csi": { "content": [ + "test.paired_end.sorted.bam.csi", [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:40:46.579747" + "timestamp": "2024-09-16T08:20:51.485364222" }, - "bai_versions": { + "crai": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:11:51.641425452" + "timestamp": "2024-09-16T08:20:21.184050361" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index b0676f33..02cda6e6 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -1,8 +1,10 @@ -name: samtools_view +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 5a8989d6..4f69a94b 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) @@ -13,13 +13,15 @@ process SAMTOOLS_VIEW { path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -54,14 +56,14 @@ process SAMTOOLS_VIEW { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" """ touch ${prefix}.${file_type} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3dadafae..caa7b015 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -15,68 +15,120 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - index: - type: file - description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) - pattern: "*.{.bai,.csi,.crai}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" - - qname: - type: file - description: Optional file with read names to output only select alignments - pattern: "*.{txt,list}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: optional filtered/converted BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" - cram: - type: file - description: optional filtered/converted CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" - sam: - type: file - description: optional filtered/converted SAM file - pattern: "*.{sam}" - # bai, csi, and crai are created with `--write-index` + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" - bai: - type: file - description: optional BAM file index - pattern: "*.{bai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" - csi: - type: file - description: optional tabix BAM file index - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" - crai: - type: file - description: optional CRAM file index - pattern: "*.{crai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@joseespinosa" diff --git a/modules/nf-core/samtools/view/samtools-view.diff b/modules/nf-core/samtools/view/samtools-view.diff new file mode 100644 index 00000000..fb47ba9a --- /dev/null +++ b/modules/nf-core/samtools/view/samtools-view.diff @@ -0,0 +1,16 @@ +Changes in module 'nf-core/samtools/view' +--- modules/nf-core/samtools/view/main.nf ++++ modules/nf-core/samtools/view/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : +- 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" ++ 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : ++ 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + +************************************************************ diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index 45a0defb..37b81a91 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -172,6 +172,8 @@ nextflow_process { { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } ) } diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index f55943a7..63849b03 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -56,14 +56,14 @@ "bam_stub_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:13:09.713353823" + "timestamp": "2024-09-16T09:26:24.461775464" }, "cram_to_bam_index_cram": { "content": [ @@ -169,6 +169,16 @@ }, "timestamp": "2024-02-12T19:37:56.490286" }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, "bam_csi": { "content": [ [ @@ -208,14 +218,14 @@ "cram_to_bam_index_qname_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:13:03.935041046" + "timestamp": "2024-09-16T09:25:51.953436682" }, "cram_to_bam_bam": { "content": [ @@ -240,14 +250,14 @@ "cram_to_bam_index_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:55.910685496" + "timestamp": "2024-09-16T09:25:14.475388399" }, "cram_to_bam_bai": { "content": [ @@ -264,14 +274,14 @@ "cram_to_bam_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:47.715221169" + "timestamp": "2024-09-16T09:24:49.673441798" }, "cram_bam": { "content": [ @@ -355,17 +365,37 @@ }, "timestamp": "2024-02-12T19:38:23.322874" }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, "bam_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:31.692607421" + "timestamp": "2024-09-16T09:23:27.151650338" }, "cram_to_bam_index_qname_cram": { "content": [ @@ -430,14 +460,24 @@ "cram_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.04.3" }, - "timestamp": "2024-02-13T16:12:39.913411036" + "timestamp": "2024-02-12T19:38:23.322874" }, "bam_sam": { "content": [ @@ -477,7 +517,7 @@ }, "bam_stub_csi": { "content": [ - "test.csi" + "test.bam.csi" ], "meta": { "nf-test": "0.8.4", diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml index 7abe3644..693aa5c1 100644 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -1,7 +1,5 @@ -name: seqtk_subseq channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml index 4e8ee19f..2667f40d 100644 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -6,29 +6,42 @@ keywords: - fastx tools: - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format + description: Seqtk is a fast and lightweight tool for processing sequences in + the FASTA or FASTQ format homepage: https://github.com/lh3/seqtk documentation: https://docs.csc.fi/apps/seqtk/ tool_dev_url: https://github.com/lh3/seqtk licence: ["MIT"] + identifier: biotools:seqtk input: - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq,fq.gz,fa,fa.gz}" - - filter_list: - type: file - description: BED file or a text file with a list of sequence names - pattern: "*.{bed,lst}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq,fq.gz,fa,fa.gz}" + - - filter_list: + type: file + description: BED file or a text file with a list of sequence names + pattern: "*.{bed,lst}" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq.gz,fa.gz}" + - meta: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - "*.gz": + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@sidorov-si" maintainers: diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test index be5602e3..fa8fad69 100644 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -17,9 +17,9 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) """ } } @@ -40,9 +40,9 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) """ } } diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..9bd8f554 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..290346b3 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,49 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..c957517a --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..ceb91b79 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml index e4d72108..777e097e 100644 --- a/modules/nf-core/windowmasker/mkcounts/environment.yml +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -1,7 +1,5 @@ -name: windowmasker_mkcounts channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/mkcounts/meta.yml b/modules/nf-core/windowmasker/mkcounts/meta.yml index 436ed7a5..825a0674 100644 --- a/modules/nf-core/windowmasker/mkcounts/meta.yml +++ b/modules/nf-core/windowmasker/mkcounts/meta.yml @@ -11,31 +11,32 @@ tools: homepage: https://github.com/ncbi/ncbi-cxx-toolkit-public documentation: https://ncbi.github.io/cxx-toolkit/ licence: ["MIT"] + identifier: biotools:windowmasker input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ref: - type: file - description: An input nucleotide fasta file. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ref: + type: file + description: An input nucleotide fasta file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: | - An output file containing genomic locations of low - complexity and highly repetitive regions - pattern: "${prefix}.txt" + - counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: A file containing frequency counts of repetitive units. + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test index 18c4977c..bf53d7fa 100644 --- a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test @@ -20,7 +20,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -43,7 +43,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml index b83d82e5..777e097e 100644 --- a/modules/nf-core/windowmasker/ustat/environment.yml +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -1,7 +1,5 @@ -name: windowmasker_ustat channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/ustat/meta.yml b/modules/nf-core/windowmasker/ustat/meta.yml index 6a07c935..bc51a934 100644 --- a/modules/nf-core/windowmasker/ustat/meta.yml +++ b/modules/nf-core/windowmasker/ustat/meta.yml @@ -1,5 +1,6 @@ name: windowmasker_ustat -description: A program to take a counts file and creates a file of genomic co-ordinates to be masked. +description: A program to take a counts file and creates a file of genomic co-ordinates + to be masked. keywords: - fasta - interval @@ -11,39 +12,39 @@ tools: homepage: https://github.com/ncbi/ncbi-cxx-toolkit-public documentation: https://ncbi.github.io/cxx-toolkit/ licence: ["MIT"] + identifier: biotools:windowmasker input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - counts: - type: file - description: Contains count data of repetitive regions. - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ref: - type: file - description: An input nucleotide fasta file. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - counts: + type: file + description: Contains count data of repetitive regions. + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ref: + type: file + description: An input nucleotide fasta file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - wm_intervals: - type: file - description: | - An output file containing genomic locations of low - complexity and highly repetitive regions - pattern: "${output}" + - intervals: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}: + type: file + description: intervals - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test b/modules/nf-core/windowmasker/ustat/tests/main.nf.test index 58d91b13..6e02c9c1 100644 --- a/modules/nf-core/windowmasker/ustat/tests/main.nf.test +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -33,7 +33,7 @@ nextflow_process { input[0] = WINDOWMASKER_MKCOUNTS.out.counts input[1] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -51,7 +51,7 @@ nextflow_process { input[0] = WINDOWMASKER_MKCOUNTS.out.counts input[1] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } diff --git a/nextflow.config b/nextflow.config index 18994df9..5255f4fe 100644 --- a/nextflow.config +++ b/nextflow.config @@ -182,6 +182,13 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } gitpod { executor.name = 'local' executor.cpus = 4 @@ -249,7 +256,7 @@ manifest { description = """Quality assessment of genome assemblies""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.6.0' + version = '0.7.0' doi = '10.5281/zenodo.7949058' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e722369d..3c75ab58 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -52,7 +52,8 @@ "type": "string", "enum": ["png", "svg"], "description": "Select the format of the output images.", - "fa_icon": "fas fa-image" + "fa_icon": "fas fa-image", + "default": "png" }, "outdir": { "type": "string", @@ -109,40 +110,40 @@ "properties": { "busco": { "type": "string", - "format": "directory-path", + "format": "path", "description": "Local directory where clade-specific BUSCO lineage datasets are stored", "fa_icon": "fas fa-folder-open" }, "lineage_tax_ids": { "type": "string", - "format": "file-path", + "format": "path", "description": "Local file that holds a mapping between BUSCO lineages and taxon IDs.", "help_text": "Initialised from https://busco-data.ezlab.org/v5/data/placement_files/mapping_taxids-busco_dataset_name.*.2019-12-16.txt.tar.gz", "fa_icon": "fas fa-file-code" }, "blastp": { "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.dmnd$", + "format": "path", + "pattern": "^\\S+\\.dmnd.*$", "description": "Path to the Diamond species-specific buscogenes database", "fa_icon": "fas fa-file-archive" }, "blastx": { "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.dmnd$", + "format": "path", + "pattern": "^\\S+\\.dmnd.*$", "description": "Path to the Diamond species-specific buscoregions database", "fa_icon": "fas fa-file-archive" }, "blastn": { "type": "string", - "format": "directory-path", + "format": "path", "description": "Path to the nucleotide BLAST database", "fa_icon": "fas fa-file-archive" }, "taxdump": { "type": "string", - "format": "directory-path", + "format": "path", "description": "Path to the new NCBI tax dump database", "fa_icon": "fas fa-folder-open" } diff --git a/subworkflows/local/blobtools.nf b/subworkflows/local/blobtools.nf index 747bc9fa..eea7f432 100644 --- a/subworkflows/local/blobtools.nf +++ b/subworkflows/local/blobtools.nf @@ -2,13 +2,14 @@ // Create BlobTools dataset // -include { BLOBTOOLKIT_METADATA } from '../../modules/local/blobtoolkit/metadata' include { BLOBTOOLKIT_CREATEBLOBDIR } from '../../modules/local/blobtoolkit/createblobdir' include { BLOBTOOLKIT_UPDATEBLOBDIR } from '../../modules/local/blobtoolkit/updateblobdir' workflow BLOBTOOLS { take: config // channel: [ val(meta), path(config) ] + syn_tsv // channel: [ val(meta), [path(tsv)] ] + cat_tsv // channel: [ val(meta), [path(tsv)] ] windowstats // channel: [ val(meta), path(window_stats_tsvs) ] busco // channel: [ val(meta), path(full_table) ] blastp // channel: [ val(meta), path(txt) ] @@ -21,29 +22,21 @@ workflow BLOBTOOLS { ch_versions = Channel.empty() - // - // Create metadata summary file - // - BLOBTOOLKIT_METADATA ( config ) - ch_versions = ch_versions.mix ( BLOBTOOLKIT_METADATA.out.versions.first() ) - - // // Create Blobtools dataset files // - BLOBTOOLKIT_CREATEBLOBDIR ( windowstats, busco, blastp, BLOBTOOLKIT_METADATA.out.yaml, taxdump ) + BLOBTOOLKIT_CREATEBLOBDIR ( windowstats, busco, blastp, config, taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CREATEBLOBDIR.out.versions.first() ) // // Update Blobtools dataset files // - BLOBTOOLKIT_UPDATEBLOBDIR ( BLOBTOOLKIT_CREATEBLOBDIR.out.blobdir, blastx, blastn, taxdump ) + BLOBTOOLKIT_UPDATEBLOBDIR ( BLOBTOOLKIT_CREATEBLOBDIR.out.blobdir, syn_tsv, cat_tsv, blastx, blastn, taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_UPDATEBLOBDIR.out.versions.first() ) emit: - metadata = BLOBTOOLKIT_METADATA.out.yaml // channel: [ val(meta), path(yaml) ] blobdir = BLOBTOOLKIT_UPDATEBLOBDIR.out.blobdir // channel: [ val(meta), path(dir) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index 4b07723e..69a65b24 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -2,7 +2,7 @@ // Run BUSCO for a genome and runs diamond_blastp // -include { BUSCO } from '../../modules/nf-core/busco/main' +include { BUSCO_BUSCO as BUSCO } from '../../modules/nf-core/busco/busco/main' include { BLOBTOOLKIT_EXTRACTBUSCOS } from '../../modules/local/blobtoolkit/extractbuscos' include { DIAMOND_BLASTP } from '../../modules/nf-core/diamond/blastp/main' include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebuscodir' @@ -47,12 +47,11 @@ workflow BUSCO_DIAMOND { ch_fasta_with_lineage, "genome", ch_fasta_with_lineage.map { it[0].lineage_name }, - busco_db, + busco_db.first(), [], ) ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) - // // Tidy up the BUSCO output directories before publication // diff --git a/subworkflows/local/finalise_blobdir.nf b/subworkflows/local/finalise_blobdir.nf index 352cb01b..04dfe8d2 100644 --- a/subworkflows/local/finalise_blobdir.nf +++ b/subworkflows/local/finalise_blobdir.nf @@ -8,7 +8,6 @@ include { COMPRESSBLOBDIR } from '../../modules/local/compressblobdir' workflow FINALISE_BLOBDIR { take: blobdir // channel: [ val(meta), path(blobdir) ] - reads // channel: [ [meta, reads] ] software // channel: [ val(meta), path(software_yml) ] summary // channel: [ val(meta), path(summary_json) ] @@ -17,17 +16,9 @@ workflow FINALISE_BLOBDIR { ch_versions = Channel.empty() // - // MODULE: Update meta json file + // MODULE: Update the software listed in the meta json file // - BLOBTOOLKIT_UPDATEMETA ( - blobdir, - reads, - software, - params.blastp, - params.blastx, - params.blastn, - params.taxdump, - ) + BLOBTOOLKIT_UPDATEMETA ( blobdir, software ) // // MODULE: Compress all the json files diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 7a8fd112..40b4e721 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -2,6 +2,7 @@ // Check input samplesheet and get aligned read channels // +include { UNTAR } from '../../modules/nf-core/untar/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' @@ -15,11 +16,56 @@ workflow INPUT_CHECK { taxon // channel: val(taxon) busco_lin // channel: val([busco_lin]) lineage_tax_ids // channel: /path/to/lineage_tax_ids - blastn // channel: [ val(meta), path(blastn_db) ] + blastn // channel: [ path(blastn_db) ] + blastp // channel: [ path(blastp_db) ] + blastx // channel: [ path(blastx_db) ] + busco_db // channel: [ path(busco_db) ] + taxdump // channel: [ path(taxdump) ] main: ch_versions = Channel.empty() + // + // SUBWORKFLOW: Decompress databases if needed + // + + // Join into single databases channel + databases = blastn.concat(blastp, blastx, busco_db, taxdump) + + // Check which need to be decompressed + ch_dbs_for_untar = databases + .branch { db_meta, db_path -> + untar: db_path.name.endsWith( ".tar.gz" ) + skip: true + } + + // Untar the databases + UNTAR ( ch_dbs_for_untar.untar ) + ch_versions = ch_versions.mix( UNTAR.out.versions.first() ) + + // Join and format dbs + // NOTE: The conditional for blastp/x is needed because nf-core/untar puts the database in a directory + ch_databases = UNTAR.out.untar.concat( ch_dbs_for_untar.skip ) + .map { meta, db -> [ meta + [id: db.baseName], db] } + .map { db_meta, db_path -> + if (db_meta.type in ["blastp", "blastx"] && db_path.isDirectory()) { + [db_meta, file(db_path.toString() + "/${db_path.name}", checkIfExists: true)] + } else { + [db_meta, db_path] + } + } + .branch { db_meta, db_path -> + blastn: db_meta.type == "blastn" + blastp: db_meta.type == "blastp" + blastx: db_meta.type == "blastx" + busco: db_meta.type == "busco" + taxdump: db_meta.type == "taxdump" + } + + + // + // SUBWORKFLOW: Process samplesheet + // if ( params.fetchngs_samplesheet ) { FETCHNGSSAMPLESHEET_CHECK ( samplesheet ) .csv @@ -66,7 +112,11 @@ workflow INPUT_CHECK { taxon, busco_lin, lineage_tax_ids, - blastn, + reads.collect(flat: false).ifEmpty([]), + ch_databases.blastp, + ch_databases.blastx, + ch_databases.blastn, + ch_databases.taxdump, ) ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first()) @@ -106,8 +156,15 @@ workflow INPUT_CHECK { emit: reads // channel: [ val(meta), path(datafile) ] config = GENERATE_CONFIG.out.yaml // channel: [ val(meta), path(yaml) ] + synonyms_tsv = GENERATE_CONFIG.out.synonyms_tsv // channel: [ val(meta), path(tsv) ] + categories_tsv = GENERATE_CONFIG.out.categories_tsv // channel: [ val(meta), path(tsv) ] taxon_id = ch_taxon_id // channel: val(taxon_id) busco_lineages = ch_busco_lineages // channel: val([busco_lin]) + blastn = ch_databases.blastn // channel: [ val(meta), path(blastn_db) ] + blastp = ch_databases.blastp // channel: [ val(meta), path(blastp_db) ] + blastx = ch_databases.blastx // channel: [ val(meta), path(blastx_db) ] + busco_db = ch_databases.busco.map { _, db_path -> db_path } // channel: [ path(busco_db) ] + taxdump = ch_databases.taxdump.map { _, db_path -> db_path } // channel: [ path(taxdump) ] versions = ch_versions // channel: [ versions.yml ] } @@ -117,7 +174,7 @@ def create_data_channels(LinkedHashMap row) { def meta = [:] meta.id = row.sample meta.datatype = row.datatype - + meta.layout = row.library_layout // add path(s) of the read file(s) to the meta map def data_meta = [] @@ -140,6 +197,7 @@ def create_data_channels_from_fetchngs(LinkedHashMap row) { // create meta map def meta = [:] meta.id = row.run_accession + meta.layout = row.library_layout // Same as https://github.com/blobtoolkit/blobtoolkit/blob/4.3.3/src/blobtoolkit-pipeline/src/lib/functions.py#L30-L39 // with the addition of "hic" diff --git a/subworkflows/local/minimap_alignment.nf b/subworkflows/local/minimap_alignment.nf index 0c25f4c7..33d632c7 100644 --- a/subworkflows/local/minimap_alignment.nf +++ b/subworkflows/local/minimap_alignment.nf @@ -2,7 +2,6 @@ // Optional alignment subworkflow using Minimap2 // -include { SAMTOOLS_FASTA } from '../../modules/nf-core/samtools/fasta/main' include { MINIMAP2_ALIGN as MINIMAP2_HIC } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_ILMN } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_CCS } from '../../modules/nf-core/minimap2/align/main' @@ -20,24 +19,8 @@ workflow MINIMAP2_ALIGNMENT { ch_versions = Channel.empty() - // Convert BAM/CRAM reads to FASTA - input - | branch { - meta, reads -> - fasta: reads.toString().endsWith(".fasta") || reads.toString().endsWith(".fasta.gz") || reads.toString().endsWith(".fa") || reads.toString().endsWith(".fa.gz") - fastq: reads.toString().endsWith(".fastq") || reads.toString().endsWith(".fastq.gz") || reads.toString().endsWith(".fq") || reads.toString().endsWith(".fq.gz") - bamcram: true - } - | set { ch_reads_by_type } - - SAMTOOLS_FASTA ( ch_reads_by_type.bamcram, true ) - ch_versions = ch_versions.mix(SAMTOOLS_FASTA.out.versions.first()) - - // Branch input by sequencing type - SAMTOOLS_FASTA.out.interleaved - | mix ( ch_reads_by_type.fasta ) - | mix ( ch_reads_by_type.fastq ) + input | branch { meta, reads -> hic: meta.datatype == "hic" diff --git a/subworkflows/local/run_blastx.nf b/subworkflows/local/run_blastx.nf index 715e5ae2..cfcb0219 100644 --- a/subworkflows/local/run_blastx.nf +++ b/subworkflows/local/run_blastx.nf @@ -19,7 +19,7 @@ workflow RUN_BLASTX { // - // Create metadata summary file + // Split the sequences // BLOBTOOLKIT_CHUNK ( fasta, table ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CHUNK.out.versions.first() ) diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index cd97fd99..56690ef9 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -24,17 +24,21 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.fasta) { ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ]) } else { exit 1, 'Genome fasta file must be specified!' } if (params.taxon) { ch_taxon = Channel.value(params.taxon) } else { exit 1, 'NCBI Taxon ID not specified!' } -if (params.blastp) { ch_blastp = Channel.value([ [ 'id': file(params.blastp).baseName ], params.blastp ]) } else { exit 1, 'Diamond BLASTp database must be specified!' } -if (params.blastx) { ch_blastx = Channel.value([ [ 'id': file(params.blastx).baseName ], params.blastx ]) } else { exit 1, 'Diamond BLASTx database must be specified!' } -if (params.blastn) { ch_blastn = Channel.value([ [ 'id': file(params.blastn).baseName ], params.blastn ]) } else { exit 1, 'BLASTn database not specified!' } -if (params.taxdump) { ch_taxdump = file(params.taxdump) } else { exit 1, 'NCBI Taxonomy database not specified!' } +if (params.blastp) { ch_blastp = Channel.fromPath(params.blastp).map { tuple(["type": "blastp"], it) } } else { exit 1, 'Diamond BLASTp database must be specified!' } +if (params.blastx) { ch_blastx = Channel.fromPath(params.blastx).map { tuple(["type": "blastx"], it) } } else { exit 1, 'Diamond BLASTx database must be specified!' } +if (params.blastn) { ch_blastn = Channel.fromPath(params.blastn).map { tuple(["type": "blastn"], it) } } else { exit 1, 'BLASTn database not specified!' } +if (params.taxdump) { ch_taxdump = Channel.fromPath(params.taxdump).map { tuple(["type": "taxdump"], it) } } else { exit 1, 'NCBI Taxonomy database not specified!' } if (params.fetchngs_samplesheet && !params.align) { exit 1, '--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!' } if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids).first() } else { exit 1, 'Mapping BUSCO lineage <-> taxon_ids not specified' } // Create channel for optional parameters if (params.busco_lineages) { ch_busco_lin = Channel.value(params.busco_lineages) } else { ch_busco_lin = Channel.value([]) } -if (params.busco) { ch_busco_db = Channel.fromPath(params.busco).first() } else { ch_busco_db = Channel.value([]) } +if (params.busco) { + ch_busco_db = Channel.fromPath(params.busco).first().map { tuple([ "type": "busco"], it ) } +} else { + ch_busco_db = Channel.value([]) +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -109,6 +113,10 @@ workflow BLOBTOOLKIT { ch_busco_lin, ch_lineage_tax_ids, ch_blastn, + ch_blastx, + ch_blastp, + ch_busco_db, + ch_taxdump, ) ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) @@ -135,8 +143,8 @@ workflow BLOBTOOLKIT { BUSCO_DIAMOND ( PREPARE_GENOME.out.genome, INPUT_CHECK.out.busco_lineages, - ch_busco_db, - ch_blastp, + INPUT_CHECK.out.busco_db, + INPUT_CHECK.out.blastp, INPUT_CHECK.out.taxon_id, ) ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions ) @@ -147,7 +155,7 @@ workflow BLOBTOOLKIT { RUN_BLASTX ( PREPARE_GENOME.out.genome, BUSCO_DIAMOND.out.first_table, - ch_blastx, + INPUT_CHECK.out.blastx, INPUT_CHECK.out.taxon_id, ) ch_versions = ch_versions.mix ( RUN_BLASTX.out.versions ) @@ -159,7 +167,7 @@ workflow BLOBTOOLKIT { RUN_BLASTN ( RUN_BLASTX.out.blastx_out, PREPARE_GENOME.out.genome, - ch_blastn, + INPUT_CHECK.out.blastn, INPUT_CHECK.out.taxon_id, ) @@ -180,12 +188,14 @@ workflow BLOBTOOLKIT { // BLOBTOOLS ( INPUT_CHECK.out.config, + INPUT_CHECK.out.synonyms_tsv.ifEmpty([[],[]]), + INPUT_CHECK.out.categories_tsv.ifEmpty([[],[]]), COLLATE_STATS.out.window_tsv, BUSCO_DIAMOND.out.all_tables, BUSCO_DIAMOND.out.blastp_txt.ifEmpty([[],[]]), RUN_BLASTX.out.blastx_out.ifEmpty([[],[]]), RUN_BLASTN.out.blastn_out.ifEmpty([[],[]]), - ch_taxdump + INPUT_CHECK.out.taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLS.out.versions ) @@ -207,7 +217,6 @@ workflow BLOBTOOLKIT { // FINALISE_BLOBDIR ( BLOBTOOLS.out.blobdir, - INPUT_CHECK.out.reads.collect(flat: false).ifEmpty([]), CUSTOM_DUMPSOFTWAREVERSIONS.out.yml, VIEW.out.summary ) @@ -233,7 +242,9 @@ workflow BLOBTOOLKIT { ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) multiqc_report = MULTIQC.out.report.toList() }