From a376dc2abb8a0606c6bf92a62d309b7b6d492e94 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 22 Apr 2024 19:19:45 -0400 Subject: [PATCH 1/3] Deprecate igblast_base and imgtdb_base params --- CHANGELOG.md | 2 ++ README.md | 2 +- conf/test.config | 4 +-- conf/test_assembled_hs.config | 4 +-- ...est_assembled_immcantation_devel_hs.config | 4 +-- ...est_assembled_immcantation_devel_mm.config | 4 +-- conf/test_assembled_mm.config | 4 +-- conf/test_clontech_umi.config | 4 +-- conf/test_full.config | 4 +-- conf/test_nebnext_umi.config | 4 +-- conf/test_no_umi.config | 4 +-- conf/test_nocluster.config | 4 +-- conf/test_raw_immcantation_devel.config | 4 +-- conf/test_tcr.config | 4 +-- docs/output.md | 4 +-- .../local/changeo/changeo_creategermlines.nf | 4 +-- modules/local/changeo/changeo_makedb.nf | 5 ++- modules/local/enchantr/define_clones.nf | 4 +-- modules/local/enchantr/remove_chimeric.nf | 2 +- modules/local/fetch_databases.nf | 2 +- nextflow.config | 4 +-- nextflow_schema.json | 10 +++--- subworkflows/local/bulk_qc_and_filter.nf | 6 ++-- subworkflows/local/clonal_analysis.nf | 6 ++-- subworkflows/local/databases.nf | 36 +++++++++---------- subworkflows/local/vdj_annotation.nf | 8 ++--- workflows/airrflow.nf | 6 ++-- 27 files changed, 75 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 627c2c0e..42b92beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - `--skip_lineage_trees` is now deprecated in favor of `--lineage_trees`. Lineage trees are skipped by default. - `--igphyml` parameter is deprecated in favor of `--lineage_tree_exec`. All lineage tree building software part of Dowser are now supported. +- `--igblast_base` is deprecated in favor of `--reference_igblast`. +- `--imgtdb_base` is depracated in favor of `--reference_fasta`. ## [3.3.0] - 2024-03-31 Confringo diff --git a/README.md b/README.md index bb8a9302..ef1fd759 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single 2. V(D)J annotation and filtering (bulk and single-cell) -- Assign gene segments with `IgBlast` using the IMGT database (`Change-O AssignGenes`). +- Assign gene segments with `IgBlast` using a germline reference (`Change-O AssignGenes`). - Annotate alignments in AIRR format (`Change-O MakeDB`) - Filter by alignment quality (locus matching v_call chain, min 200 informative positions, max 10% N nucleotides) - Filter productive sequences (`Change-O ParseDB split`) diff --git a/conf/test.config b/conf/test.config index 71a424d6..4d6b1c40 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,8 +23,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' mode = 'fastq' diff --git a/conf/test_assembled_hs.config b/conf/test_assembled_hs.config index dd1b6c52..bb6caa19 100644 --- a/conf/test_assembled_hs.config +++ b/conf/test_assembled_hs.config @@ -19,8 +19,8 @@ params { // Input data mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' reassign = true productive_only = true diff --git a/conf/test_assembled_immcantation_devel_hs.config b/conf/test_assembled_immcantation_devel_hs.config index f924f66c..da5c8d56 100644 --- a/conf/test_assembled_immcantation_devel_hs.config +++ b/conf/test_assembled_immcantation_devel_hs.config @@ -19,8 +19,8 @@ params { // Input data mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' reassign = true productive_only = true diff --git a/conf/test_assembled_immcantation_devel_mm.config b/conf/test_assembled_immcantation_devel_mm.config index f7fa1e33..33fd5bcb 100644 --- a/conf/test_assembled_immcantation_devel_mm.config +++ b/conf/test_assembled_immcantation_devel_mm.config @@ -19,8 +19,8 @@ params { // Input data mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' reassign = true productive_only = true diff --git a/conf/test_assembled_mm.config b/conf/test_assembled_mm.config index fe6fd57b..69ad5052 100644 --- a/conf/test_assembled_mm.config +++ b/conf/test_assembled_mm.config @@ -19,8 +19,8 @@ params { // Input data mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' reassign = true productive_only = true diff --git a/conf/test_clontech_umi.config b/conf/test_clontech_umi.config index 552a7434..1d64ad1c 100644 --- a/conf/test_clontech_umi.config +++ b/conf/test_clontech_umi.config @@ -23,8 +23,8 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-clontech/samplesheet.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' clonal_threshold = 0.1 diff --git a/conf/test_full.config b/conf/test_full.config index 67d37c0b..7e3c131a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,8 +18,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/metadata_pcr_umi_airr_300.tsv' cprimers = 's3://ngi-igenomes/test-data/airrflow/pcr_umi/cprimers.fasta' vprimers = 's3://ngi-igenomes/test-data/airrflow/pcr_umi/vprimers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' lineage_trees = true diff --git a/conf/test_nebnext_umi.config b/conf/test_nebnext_umi.config index c96b16b3..76c9bbea 100644 --- a/conf/test_nebnext_umi.config +++ b/conf/test_nebnext_umi.config @@ -24,8 +24,8 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-neb/samplesheet.tsv' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' clonal_threshold = 0.1 diff --git a/conf/test_no_umi.config b/conf/test_no_umi.config index e17a6526..8800b20c 100644 --- a/conf/test_no_umi.config +++ b/conf/test_no_umi.config @@ -30,8 +30,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Metadata_test-no-umi_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Greiff2014_CPrimers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Greiff2014_VPrimers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' } diff --git a/conf/test_nocluster.config b/conf/test_nocluster.config index 469de7b8..aabccb9b 100644 --- a/conf/test_nocluster.config +++ b/conf/test_nocluster.config @@ -23,8 +23,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' mode = 'fastq' diff --git a/conf/test_raw_immcantation_devel.config b/conf/test_raw_immcantation_devel.config index 7f362bdd..11b8ff69 100644 --- a/conf/test_raw_immcantation_devel.config +++ b/conf/test_raw_immcantation_devel.config @@ -24,8 +24,8 @@ params { cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' mode = 'fastq' diff --git a/conf/test_tcr.config b/conf/test_tcr.config index 5010c010..5af84ee7 100644 --- a/conf/test_tcr.config +++ b/conf/test_tcr.config @@ -31,8 +31,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/TCR_metadata_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/cprimers.fasta' race_linker = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/linker.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' } diff --git a/docs/output.md b/docs/output.md index 1dedb05c..532bdf80 100644 --- a/docs/output.md +++ b/docs/output.md @@ -245,7 +245,7 @@ generate a `.fasta` file from the rearrangement table. -Assign genes with Igblast, using the IMGT database is performed by the [AssignGenes](https://changeo.readthedocs.io/en/stable/examples/igblast.html#running-igblast) command of the Change-O tool from the Immcantation Framework. +Assign genes with Igblast, using the a germline reference is performed by the [AssignGenes](https://changeo.readthedocs.io/en/stable/examples/igblast.html#running-igblast) command of the Change-O tool from the Immcantation Framework. ### Make database from assigned genes @@ -482,7 +482,7 @@ Parsing the logs from the previous processes. Summary of the number of sequences Copy of the downloaded IMGT database by the process `fetch_databases`, used for the gene assignment step. -If databases are provided with `--imgtdb_base` and `--igblast_base` this folder will not be present. +If databases are provided with `--reference_fasta` and `--reference_igblast` this folder will not be present. ## MultiQC diff --git a/modules/local/changeo/changeo_creategermlines.nf b/modules/local/changeo/changeo_creategermlines.nf index d424377a..35cf76d4 100644 --- a/modules/local/changeo/changeo_creategermlines.nf +++ b/modules/local/changeo/changeo_creategermlines.nf @@ -11,7 +11,7 @@ process CHANGEO_CREATEGERMLINES { input: tuple val(meta), path(tab) // sequence tsv table in AIRR format - path(imgt_base) // imgt db + path(reference_fasta) // reference fasta output: tuple val(meta), path("*germ-pass.tsv"), emit: tab @@ -22,7 +22,7 @@ process CHANGEO_CREATEGERMLINES { def args = task.ext.args ?: '' """ CreateGermlines.py -d ${tab} \\ - -r ${imgt_base}/${meta.species}/vdj/ \\ + -r ${reference_fasta}/${meta.species}/vdj/ \\ -g dmask --format airr \\ --log ${meta.id}.log --outname ${meta.id} $args > ${meta.id}_create-germlines_command_log.txt ParseLog.py -l ${meta.id}.log -f ID V_CALL D_CALL J_CALL diff --git a/modules/local/changeo/changeo_makedb.nf b/modules/local/changeo/changeo_makedb.nf index a71d6282..4862ba86 100644 --- a/modules/local/changeo/changeo_makedb.nf +++ b/modules/local/changeo/changeo_makedb.nf @@ -6,14 +6,13 @@ process CHANGEO_MAKEDB { conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - //TODO: update mulled containers when available 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(reads) // reads in fasta format path(igblast) // igblast fasta from ch_igblast_db_for_process_igblast.mix(ch_igblast_db_for_process_igblast_mix).collect() - path(imgt_base) + path(reference_fasta) output: tuple val(meta), path("*db-pass.tsv"), emit: tab //sequence table in AIRR format @@ -24,7 +23,7 @@ process CHANGEO_MAKEDB { def args = task.ext.args ?: '' """ MakeDb.py igblast -i $igblast -s $reads -r \\ - ${imgt_base}/${meta.species.toLowerCase()}/vdj/ \\ + ${reference_fasta}/${meta.species.toLowerCase()}/vdj/ \\ $args \\ --outname ${meta.id} > ${meta.id}_makedb_command_log.txt diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index 055c212a..64b8e7df 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -31,7 +31,7 @@ process DEFINE_CLONES { input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format val threshold - path imgt_base + path reference_fasta path repertoires_samplesheet output: @@ -53,7 +53,7 @@ process DEFINE_CLONES { """ Rscript -e "enchantr::enchantr_report('define_clones', \\ report_params=list('input'='${input}', \\ - 'imgt_db'='${imgt_base}', \\ + 'imgt_db'='${reference_fasta}', \\ 'species'='auto', \\ 'cloneby'='${params.cloneby}', \\ 'outputby'='${params.cloneby}', \\ diff --git a/modules/local/enchantr/remove_chimeric.nf b/modules/local/enchantr/remove_chimeric.nf index e6c73651..94805169 100644 --- a/modules/local/enchantr/remove_chimeric.nf +++ b/modules/local/enchantr/remove_chimeric.nf @@ -15,7 +15,7 @@ process REMOVE_CHIMERIC { input: tuple val(meta), path(tab) // sequence tsv in AIRR format - path(imgt_base) + path(reference_fasta) output: tuple val(meta), path("*chimera-pass.tsv"), emit: tab // sequence tsv in AIRR format diff --git a/modules/local/fetch_databases.nf b/modules/local/fetch_databases.nf index 2deb3cb4..07853277 100644 --- a/modules/local/fetch_databases.nf +++ b/modules/local/fetch_databases.nf @@ -10,7 +10,7 @@ process FETCH_DATABASES { output: path("igblast_base"), emit: igblast - path("imgtdb_base"), emit: imgt + path("imgtdb_base"), emit: reference_fasta path "versions.yml" , emit: versions path("igblast_base/database/imgt_human_ig_v.ndb"), emit: igblast_human_ig_v path("igblast_base/database/imgt_human_ig_d.ndb"), emit: igblast_human_ig_d diff --git a/nextflow.config b/nextflow.config index a5917cfa..00c53278 100644 --- a/nextflow.config +++ b/nextflow.config @@ -81,8 +81,8 @@ params { // ----------------------- productive_only = true reassign = true - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + reference_igblast = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + reference_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' fetch_imgt = false save_databases = true isotype_column = 'c_call' diff --git a/nextflow_schema.json b/nextflow_schema.json index dc75590a..cc33f8ff 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -338,16 +338,16 @@ "description": "Save databases so you can use the cache in future runs.", "fa_icon": "fas fa-file-download" }, - "imgtdb_base": { + "reference_fasta": { "type": "string", - "description": "Path to the cached IMGT database.", - "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", + "description": "Path to the germline reference fasta.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom reference fasta database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" }, - "igblast_base": { + "reference_igblast": { "type": "string", "description": "Path to the cached igblast database.", - "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom reference fasta database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" }, "fetch_imgt": { diff --git a/subworkflows/local/bulk_qc_and_filter.nf b/subworkflows/local/bulk_qc_and_filter.nf index 34b082d1..326fbe34 100644 --- a/subworkflows/local/bulk_qc_and_filter.nf +++ b/subworkflows/local/bulk_qc_and_filter.nf @@ -7,7 +7,7 @@ workflow BULK_QC_AND_FILTER { take: ch_repertoire // tuple [meta, repertoire_tab] - ch_imgt + ch_reference_fasta main: @@ -20,7 +20,7 @@ workflow BULK_QC_AND_FILTER { // Create germlines (not --cloned) CHANGEO_CREATEGERMLINES( ch_repertoire, - ch_imgt.collect() + ch_reference_fasta.collect() ) ch_logs = ch_logs.mix(CHANGEO_CREATEGERMLINES.out.logs) ch_versions = ch_versions.mix(CHANGEO_CREATEGERMLINES.out.versions) @@ -28,7 +28,7 @@ workflow BULK_QC_AND_FILTER { // Remove chimera REMOVE_CHIMERIC( CHANGEO_CREATEGERMLINES.out.tab, - ch_imgt.collect() + ch_reference_fasta.collect() ) ch_logs = ch_logs.mix(REMOVE_CHIMERIC.out.logs) ch_versions = ch_versions.mix(REMOVE_CHIMERIC.out.versions) diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index 176783da..68237551 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -7,7 +7,7 @@ include { DOWSER_LINEAGES } from '../../modules/local/enchantr/dowser_lineages' workflow CLONAL_ANALYSIS { take: ch_repertoire - ch_imgt + ch_reference_fasta ch_logo main: @@ -76,7 +76,7 @@ workflow CLONAL_ANALYSIS { DEFINE_CLONES_COMPUTE( ch_define_clones, clone_threshold.collect(), - ch_imgt.collect(), + ch_reference_fasta.collect(), [] ) @@ -102,7 +102,7 @@ workflow CLONAL_ANALYSIS { DEFINE_CLONES_REPORT( ch_all_repertoires_cloned, clone_threshold.collect(), - ch_imgt.collect(), + ch_reference_fasta.collect(), ch_all_repertoires_cloned_samplesheet ) ch_versions = DEFINE_CLONES_REPORT.out.versions diff --git a/subworkflows/local/databases.nf b/subworkflows/local/databases.nf index 594b340e..f5788e6e 100644 --- a/subworkflows/local/databases.nf +++ b/subworkflows/local/databases.nf @@ -1,6 +1,6 @@ include { FETCH_DATABASES } from '../../modules/local/fetch_databases' include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' -include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' +include { UNZIP_DB as UNZIP_FASTA } from '../../modules/local/unzip_db' workflow DATABASES { @@ -11,44 +11,44 @@ workflow DATABASES { // FETCH DATABASES if( !params.fetch_imgt ){ - if (params.igblast_base.endsWith(".zip")) { - Channel.fromPath("${params.igblast_base}") - .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" } + if (params.reference_igblast.endsWith(".zip")) { + Channel.fromPath("${params.reference_igblast}") + .ifEmpty{ error "IGBLAST DB not found: ${params.reference_igblast}" } .set { ch_igblast_zipped } UNZIP_IGBLAST( ch_igblast_zipped.collect() ) ch_igblast = UNZIP_IGBLAST.out.unzipped ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions) } else { - Channel.fromPath("${params.igblast_base}") - .ifEmpty { error "IGBLAST DB not found: ${params.igblast_base}" } + Channel.fromPath("${params.reference_igblast}") + .ifEmpty { error "IGBLAST DB not found: ${params.reference_igblast}" } .set { ch_igblast } } } if( !params.fetch_imgt ){ - if (params.imgtdb_base.endsWith(".zip")) { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt_zipped } - UNZIP_IMGT( ch_imgt_zipped.collect() ) - ch_imgt = UNZIP_IMGT.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions) + if (params.reference_fasta.endsWith(".zip")) { + Channel.fromPath("${params.reference_fasta}") + .ifEmpty{ error "IMGTDB not found: ${params.reference_fasta}" } + .set { ch_reference_fasta_zipped } + UNZIP_FASTA( ch_reference_fasta_zipped.collect() ) + ch_reference_fasta = UNZIP_FASTA.out.unzipped + ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions) } else { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" } - .set { ch_imgt } + Channel.fromPath("${params.reference_fasta}") + .ifEmpty { error "IMGT DB not found: ${params.reference_fasta}" } + .set { ch_reference_fasta } } } if (params.fetch_imgt) { FETCH_DATABASES() ch_igblast = FETCH_DATABASES.out.igblast - ch_imgt = FETCH_DATABASES.out.imgt + ch_reference_fasta = FETCH_DATABASES.out.reference_fasta ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions) } emit: versions = ch_versions - imgt = ch_imgt + reference_fasta = ch_reference_fasta igblast = ch_igblast } diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index 4ac2b9df..692320ec 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -13,7 +13,7 @@ workflow VDJ_ANNOTATION { ch_fasta // [meta, fasta] ch_validated_samplesheet ch_igblast - ch_imgt + ch_reference_fasta main: ch_versions = Channel.empty() @@ -30,7 +30,7 @@ workflow VDJ_ANNOTATION { CHANGEO_MAKEDB ( CHANGEO_ASSIGNGENES.out.fasta, CHANGEO_ASSIGNGENES.out.blast, - ch_imgt.collect() + ch_reference_fasta.collect() ) ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs) ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions) @@ -78,8 +78,8 @@ workflow VDJ_ANNOTATION { emit: versions = ch_versions repertoire = ADD_META_TO_TAB.out.tab - imgt = ch_imgt - igblast = ch_igblast + reference_fasta = ch_reference_fasta + reference_igblast = ch_igblast changeo_makedb_logs = ch_assignment_logs logs = ch_logs diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 6b2eb589..bc6b7924 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -175,7 +175,7 @@ workflow AIRRFLOW { ch_fasta, ch_validated_samplesheet.collect(), DATABASES.out.igblast.collect(), - DATABASES.out.imgt.collect() + DATABASES.out.reference_fasta.collect() ) ch_versions = ch_versions.mix( VDJ_ANNOTATION.out.versions ) @@ -192,7 +192,7 @@ workflow AIRRFLOW { BULK_QC_AND_FILTER( ch_repertoire_by_processing.bulk, - VDJ_ANNOTATION.out.imgt.collect() + VDJ_ANNOTATION.out.reference_fasta.collect() ) ch_versions = ch_versions.mix( BULK_QC_AND_FILTER.out.versions ) @@ -215,7 +215,7 @@ workflow AIRRFLOW { // Clonal analysis CLONAL_ANALYSIS( ch_repertoires_for_clones, - VDJ_ANNOTATION.out.imgt.collect(), + VDJ_ANNOTATION.out.reference_fasta.collect(), ch_report_logo_img.collect().ifEmpty([]) ) ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) From e598df31688cf58b8007b24d0e742b2e5d81cf89 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 22 Apr 2024 20:54:57 -0400 Subject: [PATCH 2/3] fix full size test --- conf/test_full.config | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 7e3c131a..0ac79d53 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -29,6 +29,7 @@ params { umi_length = 15 umi_start = 0 umi_position = 'R1' + isotype_column = 'c_primer' } process { @@ -41,13 +42,13 @@ process { withName:DEFINE_CLONES_COMPUTE{ ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single', - 'min_n':30, 'isotype_column':'c_primer'] + 'min_n':30] } withName:DEFINE_CLONES_REPORT{ ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single', - 'min_n':30, 'isotype_column':'c_primer'] + 'min_n':30] } } From 553f317ee148e78372858f5ca9c0efbf9d8d0960 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Tue, 23 Apr 2024 09:07:35 -0400 Subject: [PATCH 3/3] update unzip fasta name --- subworkflows/local/databases.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/databases.nf b/subworkflows/local/databases.nf index f5788e6e..08e59108 100644 --- a/subworkflows/local/databases.nf +++ b/subworkflows/local/databases.nf @@ -1,6 +1,6 @@ include { FETCH_DATABASES } from '../../modules/local/fetch_databases' include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' -include { UNZIP_DB as UNZIP_FASTA } from '../../modules/local/unzip_db' +include { UNZIP_DB as UNZIP_REFERENCE_FASTA } from '../../modules/local/unzip_db' workflow DATABASES { @@ -30,9 +30,9 @@ workflow DATABASES { Channel.fromPath("${params.reference_fasta}") .ifEmpty{ error "IMGTDB not found: ${params.reference_fasta}" } .set { ch_reference_fasta_zipped } - UNZIP_FASTA( ch_reference_fasta_zipped.collect() ) - ch_reference_fasta = UNZIP_FASTA.out.unzipped - ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions) + UNZIP_REFERENCE_FASTA( ch_reference_fasta_zipped.collect() ) + ch_reference_fasta = UNZIP_REFERENCE_FASTA.out.unzipped + ch_versions = ch_versions.mix(UNZIP_REFERENCE_FASTA.out.versions) } else { Channel.fromPath("${params.reference_fasta}") .ifEmpty { error "IMGT DB not found: ${params.reference_fasta}" }