diff --git a/CHANGELOG.md b/CHANGELOG.md index 941a344..ef0e042 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [v1.0.0](https://github.com/nf-core/reportho/releases/tag/1.0.0) - Magnificent Mainsail - [2024-06-06] +## [v1.0.0](https://github.com/nf-core/reportho/releases/tag/1.0.0) - Magnificent Mainsail - [2024-06-07] Although its location and design may vary greatly, the mainsail is always a key source of propulsion for a ship. @@ -30,10 +30,6 @@ The pipeline was created. In particular, it has the following features: - basic downstream analysis of the obtained ortholog list - generation of a human-readable report -### `Fixed` - -Nothing yet. - ### `Dependencies` The pipeline has the following notable dependencies: @@ -60,7 +56,3 @@ At release date, the following database versions were current and used for testi | PANTHER | 18 | | OrthoInspector | Eukaryota2023 | | EggNOG | 5.0 | - -### `Deprecated` - -Nothing. diff --git a/conf/test_fasta.config b/conf/test_fasta.config index c6623e4..caccf38 100644 --- a/conf/test_fasta.config +++ b/conf/test_fasta.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile with FASTA input' + config_profile_description = 'Minimal test dataset to check pipeline function with FASTA input' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/conf/test_offline.config b/conf/test_offline.config index 4432cc3..f09bba1 100644 --- a/conf/test_offline.config +++ b/conf/test_offline.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile with offline databases' + config_profile_description = 'Minimal test dataset to check pipeline function with offline databases' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/docs/images/nf-core-reportho_tube_map_beta.png b/docs/images/nf-core-reportho_tube_map_beta.png deleted file mode 100644 index 2bc4166..0000000 Binary files a/docs/images/nf-core-reportho_tube_map_beta.png and /dev/null differ diff --git a/docs/images/reportho_tube_map.svg b/docs/images/reportho_tube_map.svg index 946f3fa..dc5d338 100644 --- a/docs/images/reportho_tube_map.svg +++ b/docs/images/reportho_tube_map.svg @@ -1,4 +1,4 @@ -Filter hitsnf-core/reporthoFastaSequence queryIDUniprot ID queryIdentify sequenceOMAIdentify taxonOMAOMAPANTHEROrthoInspectorEggNOGOnlineLocalOnlineLocalOnlineLocalcsvcsvcsvcsvSingle predictionsQuery informationtxtMake score tablecsvmergePythonFetch ortholog predictions csvScore tablePythonPlot orthologsggplot listFiltered orthologsComparison plotsFastqFastqpngFetch sequencesOMA/UniprotFetch structuresAlphaFoldDB3D-COFFEE fastaOrtholog sequencesT-COFFEE alnMSAIQ-TREEFastME nwkTreePythonOrtholog statisticsCalculate statisticsDump parameterscatGenerate reportReact listVersion devCore subworkflowOptional downstream analysisOptional report generationCore data flowReport data flow htmlReportChoose oneCreate alignmentMake phylogenySubworkflow \ No newline at end of file +Filter hitsnf-core/reporthoFastaSequence queryIDUniprot ID queryIdentify sequenceOMAIdentify taxonOMAOMAPANTHEROrthoInspectorEggNOGOnlineLocalOnlineLocalOnlineLocalcsvcsvcsvcsvSingle predictionsQuery informationtxtMake score tablecsvmergePythonFetch ortholog predictions csvScore tablePythonPlot orthologsggplot listFiltered orthologsComparison plotsFastqFastqpngFetch sequencesOMA/UniprotFetch structuresAlphaFoldDB3D-COFFEE fastaOrtholog sequencesT-COFFEE alnMSAIQ-TREEFastME nwkTreePythonOrtholog statisticsCalculate statisticsDump parameterscatGenerate reportsReact listVersion 1.0.0Magnificent MainsailCore subworkflowOptional downstream analysisOptional report generationCore data flowReport data flow htmlPer-query reportsChoose oneCreate alignmentMake phylogenySubworkflow htmlSummary reportGenerate summaryMultiQC diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf index b3d2f50..e0934f6 100644 --- a/modules/local/dump_params.nf +++ b/modules/local/dump_params.nf @@ -2,6 +2,7 @@ process DUMP_PARAMS { tag "$meta.id" label 'process_single' + conda "conda-forge::coreutils=9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/local/fetch_panther_group_online.nf b/modules/local/fetch_panther_group_online.nf index ad0e249..11d9f36 100644 --- a/modules/local/fetch_panther_group_online.nf +++ b/modules/local/fetch_panther_group_online.nf @@ -11,7 +11,7 @@ process FETCH_PANTHER_GROUP_ONLINE { tuple val(meta), path(uniprot_id), path(taxid), path(exact) output: - tuple val(meta), path("*_panther_group.csv"), emit:panther_group + tuple val(meta), path("*_panther_group.csv"), emit: panther_group path "versions.yml" , emit: versions when: diff --git a/modules/local/plot_orthologs.nf b/modules/local/plot_orthologs.nf index 879eeca..94c0e12 100644 --- a/modules/local/plot_orthologs.nf +++ b/modules/local/plot_orthologs.nf @@ -14,7 +14,7 @@ process PLOT_ORTHOLOGS { tuple val(meta), path("*_supports_light.png"), path("*_supports_dark.png"), emit: supports tuple val(meta), path("*_venn_light.png"), path("*_venn_dark.png") , emit: venn tuple val(meta), path("*_jaccard_light.png"), path("*_jaccard_dark.png") , emit: jaccard - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/plot_tree.nf b/modules/local/plot_tree.nf index c6e98bb..cc20f93 100644 --- a/modules/local/plot_tree.nf +++ b/modules/local/plot_tree.nf @@ -13,7 +13,7 @@ process PLOT_TREE { output: tuple val(meta), path("*_light.png"), path("*_dark.png") , emit: plot - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/nextflow_schema.json b/nextflow_schema.json index 4022919..7607f05 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,7 +65,7 @@ "local_databases": { "type": "boolean", "default": "false", - "description": "Use local databases for the analysis. If use_all is set to `true`, online databases might still be used.", + "description": "Use local databases for the analysis.", "help_text": "If set to `true`, the pipeline will use local databases for the analysis.", "fa_icon": "fas fa-database" }, diff --git a/subworkflows/local/fetch_sequences.nf b/subworkflows/local/fetch_sequences.nf deleted file mode 100644 index 0c441dd..0000000 --- a/subworkflows/local/fetch_sequences.nf +++ /dev/null @@ -1,22 +0,0 @@ -include { FETCH_SEQUENCES_ONLINE } from "../../modules/local/fetch_sequences_online" - -workflow FETCH_SEQUENCES { - take: - ch_id_list - ch_query - - main: - ch_id_list - .join(ch_query) - .set { ch_input } - - FETCH_SEQUENCES_ONLINE ( - ch_input - ) - - emit: - sequences = FETCH_SEQUENCES_ONLINE.out.fasta - hits = FETCH_SEQUENCES_ONLINE.out.hits - misses = FETCH_SEQUENCES_ONLINE.out.misses - versions = FETCH_SEQUENCES_ONLINE.out.versions -} diff --git a/subworkflows/local/fetch_structures.nf b/subworkflows/local/fetch_structures.nf deleted file mode 100644 index 188e5b4..0000000 --- a/subworkflows/local/fetch_structures.nf +++ /dev/null @@ -1,19 +0,0 @@ -include { FETCH_AFDB_STRUCTURES } from "../../modules/local/fetch_afdb_structures" - -workflow FETCH_STRUCTURES { - take: - ch_idlist - - main: - - FETCH_AFDB_STRUCTURES( - ch_idlist - ) - - emit: - structures = FETCH_AFDB_STRUCTURES.out.pdb - hits = FETCH_AFDB_STRUCTURES.out.hits - misses = FETCH_AFDB_STRUCTURES.out.misses - af_versions = FETCH_AFDB_STRUCTURES.out.af_versions - versions = FETCH_AFDB_STRUCTURES.out.versions -} diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 124b0c3..4b8a2ed 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -23,19 +23,18 @@ workflow GET_ORTHOLOGS { take: ch_samplesheet_query ch_samplesheet_fasta + ch_oma_groups + ch_oma_uniprot + ch_oma_ensembl + ch_oma_refseq + ch_panther + ch_eggnog + ch_eggnog_idmap main: ch_versions = Channel.empty() ch_orthogroups = Channel.empty() - ch_oma_groups = params.oma_path ? Channel.value(file(params.oma_path)) : Channel.empty() - ch_oma_uniprot = params.oma_uniprot_path ? Channel.value(file(params.oma_uniprot_path)) : Channel.empty() - ch_oma_ensembl = params.oma_ensembl_path ? Channel.value(file(params.oma_ensembl_path)) : Channel.empty() - ch_oma_refseq = params.oma_refseq_path ? Channel.value(file(params.oma_refseq_path)) : Channel.empty() - ch_panther = params.panther_path ? Channel.value(file(params.panther_path)) : Channel.empty() - ch_eggnog = params.eggnog_path ? Channel.value(file(params.eggnog_path)) : Channel.empty() - ch_eggnog_idmap = params.eggnog_idmap_path ? Channel.value(file(params.eggnog_idmap_path)) : Channel.empty() - ch_samplesheet_fasta.map { if (params.offline_run) { error "Tried to use FASTA input in an offline run. Aborting pipeline for user safety." @@ -53,7 +52,6 @@ workflow GET_ORTHOLOGS { ch_fasta ) - ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions) WRITE_SEQINFO ( diff --git a/workflows/reportho.nf b/workflows/reportho.nf index 0aaf31d..350ab8c 100644 --- a/workflows/reportho.nf +++ b/workflows/reportho.nf @@ -11,12 +11,13 @@ include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pi include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_reportho_pipeline' include { GET_ORTHOLOGS } from '../subworkflows/local/get_orthologs' -include { FETCH_SEQUENCES } from '../subworkflows/local/fetch_sequences' -include { FETCH_STRUCTURES } from '../subworkflows/local/fetch_structures' include { ALIGN } from '../subworkflows/local/align' include { MAKE_TREES } from '../subworkflows/local/make_trees' include { REPORT } from '../subworkflows/local/report' +include { FETCH_SEQUENCES_ONLINE } from '../modules/local/fetch_sequences_online' +include { FETCH_AFDB_STRUCTURES } from '../modules/local/fetch_afdb_structures' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -35,9 +36,24 @@ workflow REPORTHO { ch_multiqc_files = Channel.empty() ch_fasta_query = ch_samplesheet_query.map { [it[0], []] }.mix(ch_samplesheet_fasta.map { [it[0], file(it[1])] }) + ch_oma_groups = params.oma_path ? Channel.value(file(params.oma_path)) : Channel.empty() + ch_oma_uniprot = params.oma_uniprot_path ? Channel.value(file(params.oma_uniprot_path)) : Channel.empty() + ch_oma_ensembl = params.oma_ensembl_path ? Channel.value(file(params.oma_ensembl_path)) : Channel.empty() + ch_oma_refseq = params.oma_refseq_path ? Channel.value(file(params.oma_refseq_path)) : Channel.empty() + ch_panther = params.panther_path ? Channel.value(file(params.panther_path)) : Channel.empty() + ch_eggnog = params.eggnog_path ? Channel.value(file(params.eggnog_path)) : Channel.empty() + ch_eggnog_idmap = params.eggnog_idmap_path ? Channel.value(file(params.eggnog_idmap_path)) : Channel.empty() + GET_ORTHOLOGS ( ch_samplesheet_query, - ch_samplesheet_fasta + ch_samplesheet_fasta, + ch_oma_groups, + ch_oma_uniprot, + ch_oma_ensembl, + ch_oma_refseq, + ch_panther, + ch_eggnog, + ch_eggnog_idmap ) ch_versions = ch_versions.mix(GET_ORTHOLOGS.out.versions) @@ -55,33 +71,34 @@ workflow REPORTHO { ch_fastme = ch_samplesheet.map { [it[0], []] } if (!params.skip_downstream) { - FETCH_SEQUENCES ( - GET_ORTHOLOGS.out.orthologs, - ch_fasta_query + ch_sequences_input = GET_ORTHOLOGS.out.orthologs.join(ch_fasta_query) + + FETCH_SEQUENCES_ONLINE ( + ch_sequences_input ) - ch_seqhits = FETCH_SEQUENCES.out.hits + ch_seqhits = FETCH_SEQUENCES_ONLINE.out.hits - ch_seqmisses = FETCH_SEQUENCES.out.misses + ch_seqmisses = FETCH_SEQUENCES_ONLINE.out.misses - ch_versions = ch_versions.mix(FETCH_SEQUENCES.out.versions) + ch_versions = ch_versions.mix(FETCH_SEQUENCES_ONLINE.out.versions) if (params.use_structures) { - FETCH_STRUCTURES ( + FETCH_AFDB_STRUCTURES ( GET_ORTHOLOGS.out.orthologs ) - ch_strhits = FETCH_STRUCTURES.out.hits + ch_strhits = FETCH_AFDB_STRUCTURES.out.hits - ch_strmisses = FETCH_STRUCTURES.out.misses + ch_strmisses = FETCH_AFDB_STRUCTURES.out.misses - ch_versions = ch_versions.mix(FETCH_STRUCTURES.out.versions) + ch_versions = ch_versions.mix(FETCH_AFDB_STRUCTURES.out.versions) } - ch_structures = params.use_structures ? FETCH_STRUCTURES.out.structures : Channel.empty() + ch_structures = params.use_structures ? FETCH_AFDB_STRUCTURES.out.structures : Channel.empty() ALIGN ( - FETCH_SEQUENCES.out.sequences, + FETCH_SEQUENCES_ONLINE.out.fasta, ch_structures )