diff --git a/CHANGELOG.md b/CHANGELOG.md index 941a344..ef0e042 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [v1.0.0](https://github.com/nf-core/reportho/releases/tag/1.0.0) - Magnificent Mainsail - [2024-06-06] +## [v1.0.0](https://github.com/nf-core/reportho/releases/tag/1.0.0) - Magnificent Mainsail - [2024-06-07] Although its location and design may vary greatly, the mainsail is always a key source of propulsion for a ship. @@ -30,10 +30,6 @@ The pipeline was created. In particular, it has the following features: - basic downstream analysis of the obtained ortholog list - generation of a human-readable report -### `Fixed` - -Nothing yet. - ### `Dependencies` The pipeline has the following notable dependencies: @@ -60,7 +56,3 @@ At release date, the following database versions were current and used for testi | PANTHER | 18 | | OrthoInspector | Eukaryota2023 | | EggNOG | 5.0 | - -### `Deprecated` - -Nothing. diff --git a/conf/test_fasta.config b/conf/test_fasta.config index c6623e4..caccf38 100644 --- a/conf/test_fasta.config +++ b/conf/test_fasta.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile with FASTA input' + config_profile_description = 'Minimal test dataset to check pipeline function with FASTA input' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/conf/test_offline.config b/conf/test_offline.config index 4432cc3..f09bba1 100644 --- a/conf/test_offline.config +++ b/conf/test_offline.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile with offline databases' + config_profile_description = 'Minimal test dataset to check pipeline function with offline databases' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/docs/images/nf-core-reportho_tube_map_beta.png b/docs/images/nf-core-reportho_tube_map_beta.png deleted file mode 100644 index 2bc4166..0000000 Binary files a/docs/images/nf-core-reportho_tube_map_beta.png and /dev/null differ diff --git a/docs/images/reportho_tube_map.svg b/docs/images/reportho_tube_map.svg index 946f3fa..dc5d338 100644 --- a/docs/images/reportho_tube_map.svg +++ b/docs/images/reportho_tube_map.svg @@ -1,4 +1,4 @@ -
Filter hits
nf-core/
reportho
Fasta
Sequence query
ID
Uniprot ID query
Identify sequence
OMA
Identify taxon
OMA
OMA
PANTHER
OrthoInspector
EggNOG
Online
Local
Online
Local
Online
Local
csv
csv
csv
csv
Single predictions
Query information
txt
Make score table
csvmerge
Python
Fetch ortholog predictions
 
csv
Score table
Python
Plot orthologs
ggplot
 
list
Filtered orthologs
Comparison plots
Fastq
Fastq
png
Fetch sequences
OMA/Uniprot
Fetch structures
AlphaFoldDB
3D-COFFEE
 
fasta
Ortholog sequences
T-COFFEE
 
aln
MSA
IQ-TREE
FastME
 
nwk
Tree
Python
Ortholog statistics
Calculate statistics
Dump parameters
cat
Generate report
React
 
list
Version dev
Core subworkflow
Optional downstream analysis
Optional report generation
Core data flow
Report data flow
 
html
Report
Choose one
Create alignment
Make phylogeny
Subworkflow
\ No newline at end of file +
Filter hits
nf-core/
reportho
Fasta
Sequence query
ID
Uniprot ID query
Identify sequence
OMA
Identify taxon
OMA
OMA
PANTHER
OrthoInspector
EggNOG
Online
Local
Online
Local
Online
Local
csv
csv
csv
csv
Single predictions
Query information
txt
Make score table
csvmerge
Python
Fetch ortholog predictions
 
csv
Score table
Python
Plot orthologs
ggplot
 
list
Filtered orthologs
Comparison plots
Fastq
Fastq
png
Fetch sequences
OMA/Uniprot
Fetch structures
AlphaFoldDB
3D-COFFEE
 
fasta
Ortholog sequences
T-COFFEE
 
aln
MSA
IQ-TREE
FastME
 
nwk
Tree
Python
Ortholog statistics
Calculate statistics
Dump parameters
cat
Generate reports
React
 
list
Version 1.0.0
Magnificent Mainsail
Core subworkflow
Optional downstream analysis
Optional report generation
Core data flow
Report data flow
 
html
Per-query reports
Choose one
Create alignment
Make phylogeny
Subworkflow
 
html
Summary report
Generate summary
MultiQC
diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf index b3d2f50..e0934f6 100644 --- a/modules/local/dump_params.nf +++ b/modules/local/dump_params.nf @@ -2,6 +2,7 @@ process DUMP_PARAMS { tag "$meta.id" label 'process_single' + conda "conda-forge::coreutils=9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/local/fetch_panther_group_online.nf b/modules/local/fetch_panther_group_online.nf index ad0e249..11d9f36 100644 --- a/modules/local/fetch_panther_group_online.nf +++ b/modules/local/fetch_panther_group_online.nf @@ -11,7 +11,7 @@ process FETCH_PANTHER_GROUP_ONLINE { tuple val(meta), path(uniprot_id), path(taxid), path(exact) output: - tuple val(meta), path("*_panther_group.csv"), emit:panther_group + tuple val(meta), path("*_panther_group.csv"), emit: panther_group path "versions.yml" , emit: versions when: diff --git a/modules/local/plot_orthologs.nf b/modules/local/plot_orthologs.nf index 879eeca..94c0e12 100644 --- a/modules/local/plot_orthologs.nf +++ b/modules/local/plot_orthologs.nf @@ -14,7 +14,7 @@ process PLOT_ORTHOLOGS { tuple val(meta), path("*_supports_light.png"), path("*_supports_dark.png"), emit: supports tuple val(meta), path("*_venn_light.png"), path("*_venn_dark.png") , emit: venn tuple val(meta), path("*_jaccard_light.png"), path("*_jaccard_dark.png") , emit: jaccard - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/plot_tree.nf b/modules/local/plot_tree.nf index c6e98bb..cc20f93 100644 --- a/modules/local/plot_tree.nf +++ b/modules/local/plot_tree.nf @@ -13,7 +13,7 @@ process PLOT_TREE { output: tuple val(meta), path("*_light.png"), path("*_dark.png") , emit: plot - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/nextflow_schema.json b/nextflow_schema.json index 4022919..7607f05 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,7 +65,7 @@ "local_databases": { "type": "boolean", "default": "false", - "description": "Use local databases for the analysis. If use_all is set to `true`, online databases might still be used.", + "description": "Use local databases for the analysis.", "help_text": "If set to `true`, the pipeline will use local databases for the analysis.", "fa_icon": "fas fa-database" }, diff --git a/subworkflows/local/fetch_sequences.nf b/subworkflows/local/fetch_sequences.nf deleted file mode 100644 index 0c441dd..0000000 --- a/subworkflows/local/fetch_sequences.nf +++ /dev/null @@ -1,22 +0,0 @@ -include { FETCH_SEQUENCES_ONLINE } from "../../modules/local/fetch_sequences_online" - -workflow FETCH_SEQUENCES { - take: - ch_id_list - ch_query - - main: - ch_id_list - .join(ch_query) - .set { ch_input } - - FETCH_SEQUENCES_ONLINE ( - ch_input - ) - - emit: - sequences = FETCH_SEQUENCES_ONLINE.out.fasta - hits = FETCH_SEQUENCES_ONLINE.out.hits - misses = FETCH_SEQUENCES_ONLINE.out.misses - versions = FETCH_SEQUENCES_ONLINE.out.versions -} diff --git a/subworkflows/local/fetch_structures.nf b/subworkflows/local/fetch_structures.nf deleted file mode 100644 index 188e5b4..0000000 --- a/subworkflows/local/fetch_structures.nf +++ /dev/null @@ -1,19 +0,0 @@ -include { FETCH_AFDB_STRUCTURES } from "../../modules/local/fetch_afdb_structures" - -workflow FETCH_STRUCTURES { - take: - ch_idlist - - main: - - FETCH_AFDB_STRUCTURES( - ch_idlist - ) - - emit: - structures = FETCH_AFDB_STRUCTURES.out.pdb - hits = FETCH_AFDB_STRUCTURES.out.hits - misses = FETCH_AFDB_STRUCTURES.out.misses - af_versions = FETCH_AFDB_STRUCTURES.out.af_versions - versions = FETCH_AFDB_STRUCTURES.out.versions -} diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 124b0c3..4b8a2ed 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -23,19 +23,18 @@ workflow GET_ORTHOLOGS { take: ch_samplesheet_query ch_samplesheet_fasta + ch_oma_groups + ch_oma_uniprot + ch_oma_ensembl + ch_oma_refseq + ch_panther + ch_eggnog + ch_eggnog_idmap main: ch_versions = Channel.empty() ch_orthogroups = Channel.empty() - ch_oma_groups = params.oma_path ? Channel.value(file(params.oma_path)) : Channel.empty() - ch_oma_uniprot = params.oma_uniprot_path ? Channel.value(file(params.oma_uniprot_path)) : Channel.empty() - ch_oma_ensembl = params.oma_ensembl_path ? Channel.value(file(params.oma_ensembl_path)) : Channel.empty() - ch_oma_refseq = params.oma_refseq_path ? Channel.value(file(params.oma_refseq_path)) : Channel.empty() - ch_panther = params.panther_path ? Channel.value(file(params.panther_path)) : Channel.empty() - ch_eggnog = params.eggnog_path ? Channel.value(file(params.eggnog_path)) : Channel.empty() - ch_eggnog_idmap = params.eggnog_idmap_path ? Channel.value(file(params.eggnog_idmap_path)) : Channel.empty() - ch_samplesheet_fasta.map { if (params.offline_run) { error "Tried to use FASTA input in an offline run. Aborting pipeline for user safety." @@ -53,7 +52,6 @@ workflow GET_ORTHOLOGS { ch_fasta ) - ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions) WRITE_SEQINFO ( diff --git a/workflows/reportho.nf b/workflows/reportho.nf index 0aaf31d..350ab8c 100644 --- a/workflows/reportho.nf +++ b/workflows/reportho.nf @@ -11,12 +11,13 @@ include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pi include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_reportho_pipeline' include { GET_ORTHOLOGS } from '../subworkflows/local/get_orthologs' -include { FETCH_SEQUENCES } from '../subworkflows/local/fetch_sequences' -include { FETCH_STRUCTURES } from '../subworkflows/local/fetch_structures' include { ALIGN } from '../subworkflows/local/align' include { MAKE_TREES } from '../subworkflows/local/make_trees' include { REPORT } from '../subworkflows/local/report' +include { FETCH_SEQUENCES_ONLINE } from '../modules/local/fetch_sequences_online' +include { FETCH_AFDB_STRUCTURES } from '../modules/local/fetch_afdb_structures' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -35,9 +36,24 @@ workflow REPORTHO { ch_multiqc_files = Channel.empty() ch_fasta_query = ch_samplesheet_query.map { [it[0], []] }.mix(ch_samplesheet_fasta.map { [it[0], file(it[1])] }) + ch_oma_groups = params.oma_path ? Channel.value(file(params.oma_path)) : Channel.empty() + ch_oma_uniprot = params.oma_uniprot_path ? Channel.value(file(params.oma_uniprot_path)) : Channel.empty() + ch_oma_ensembl = params.oma_ensembl_path ? Channel.value(file(params.oma_ensembl_path)) : Channel.empty() + ch_oma_refseq = params.oma_refseq_path ? Channel.value(file(params.oma_refseq_path)) : Channel.empty() + ch_panther = params.panther_path ? Channel.value(file(params.panther_path)) : Channel.empty() + ch_eggnog = params.eggnog_path ? Channel.value(file(params.eggnog_path)) : Channel.empty() + ch_eggnog_idmap = params.eggnog_idmap_path ? Channel.value(file(params.eggnog_idmap_path)) : Channel.empty() + GET_ORTHOLOGS ( ch_samplesheet_query, - ch_samplesheet_fasta + ch_samplesheet_fasta, + ch_oma_groups, + ch_oma_uniprot, + ch_oma_ensembl, + ch_oma_refseq, + ch_panther, + ch_eggnog, + ch_eggnog_idmap ) ch_versions = ch_versions.mix(GET_ORTHOLOGS.out.versions) @@ -55,33 +71,34 @@ workflow REPORTHO { ch_fastme = ch_samplesheet.map { [it[0], []] } if (!params.skip_downstream) { - FETCH_SEQUENCES ( - GET_ORTHOLOGS.out.orthologs, - ch_fasta_query + ch_sequences_input = GET_ORTHOLOGS.out.orthologs.join(ch_fasta_query) + + FETCH_SEQUENCES_ONLINE ( + ch_sequences_input ) - ch_seqhits = FETCH_SEQUENCES.out.hits + ch_seqhits = FETCH_SEQUENCES_ONLINE.out.hits - ch_seqmisses = FETCH_SEQUENCES.out.misses + ch_seqmisses = FETCH_SEQUENCES_ONLINE.out.misses - ch_versions = ch_versions.mix(FETCH_SEQUENCES.out.versions) + ch_versions = ch_versions.mix(FETCH_SEQUENCES_ONLINE.out.versions) if (params.use_structures) { - FETCH_STRUCTURES ( + FETCH_AFDB_STRUCTURES ( GET_ORTHOLOGS.out.orthologs ) - ch_strhits = FETCH_STRUCTURES.out.hits + ch_strhits = FETCH_AFDB_STRUCTURES.out.hits - ch_strmisses = FETCH_STRUCTURES.out.misses + ch_strmisses = FETCH_AFDB_STRUCTURES.out.misses - ch_versions = ch_versions.mix(FETCH_STRUCTURES.out.versions) + ch_versions = ch_versions.mix(FETCH_AFDB_STRUCTURES.out.versions) } - ch_structures = params.use_structures ? FETCH_STRUCTURES.out.structures : Channel.empty() + ch_structures = params.use_structures ? FETCH_AFDB_STRUCTURES.out.structures : Channel.empty() ALIGN ( - FETCH_SEQUENCES.out.sequences, + FETCH_SEQUENCES_ONLINE.out.fasta, ch_structures )