From 6aed50284f6b208fd8eff1ec1dae4b25bf03c432 Mon Sep 17 00:00:00 2001 From: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> Date: Wed, 4 Dec 2024 05:23:21 +0100 Subject: [PATCH] Change GLIMPSE2_concordance non mandatory values (#7146) * Update glimpse2 * Fix argument name * Fix nf-test * Improve glimpse2 concordance --------- Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/nf-core/glimpse2/concordance/main.nf | 30 ++-- modules/nf-core/glimpse2/concordance/meta.yml | 4 +- .../glimpse2/concordance/tests/main.nf.test | 155 ++++-------------- .../concordance/tests/main.nf.test.snap | 45 +---- .../concordance/tests/nextflow.config | 4 +- .../concordance/tests/nextflow_R2.config | 7 - 6 files changed, 65 insertions(+), 180 deletions(-) delete mode 100644 modules/nf-core/glimpse2/concordance/tests/nextflow_R2.config diff --git a/modules/nf-core/glimpse2/concordance/main.nf b/modules/nf-core/glimpse2/concordance/main.nf index 06eb139b238..f96f5c3899b 100644 --- a/modules/nf-core/glimpse2/concordance/main.nf +++ b/modules/nf-core/glimpse2/concordance/main.nf @@ -9,9 +9,7 @@ process GLIMPSE2_CONCORDANCE { input: tuple val(meta), path(estimate), path(estimate_index), path(truth), path(truth_index), path(freq), path(freq_index), path(samples), val(region) - tuple val(meta2), path(groups), val(bins), val(ac_bins), val(allele_counts) - val(min_val_gl) - val(min_val_dp) + tuple val(meta2), path(groups), val(bins), val(ac_bins), val(allele_counts), val(min_val_gl), val(min_val_dp) output: tuple val(meta), path("*.error.cal.txt.gz") , emit: errors_cal @@ -26,17 +24,23 @@ process GLIMPSE2_CONCORDANCE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def samples_cmd = samples ? "--samples ${samples}" : "" - def groups_cmd = groups ? "--groups ${groups}" : "" - def bins_cmd = bins ? "--bins ${bins}" : "" - def ac_bins_cmd = ac_bins ? "--ac-bins ${ac_bins}" : "" - def ale_ct_cmd = allele_counts ? "--allele-counts ${allele_counts}" : "" - def region_str = region instanceof List ? region.join('\\n') : region + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samples_cmd = samples ? "--samples ${samples}" : "" + def groups_cmd = groups ? "--groups ${groups}" : "" + def bins_cmd = bins ? "--bins ${bins}" : "" + def ac_bins_cmd = ac_bins ? "--ac-bins ${ac_bins}" : "" + def ale_ct_cmd = allele_counts ? "--allele-counts ${allele_counts}" : "" + def min_val_gl_cmd = min_val_gl ? "--min-val-gl ${min_val_gl}" : "" + def min_val_dp_cmd = min_val_dp ? "--min-val-dp ${min_val_dp}" : "" + def region_str = region instanceof List ? region.join('\\n') : region if (((groups ? 1:0) + (bins ? 1:0) + (ac_bins ? 1:0) + (allele_counts ? 1:0)) != 1) error "One and only one argument should be selected between groups, bins, ac_bins, allele_counts" + if (args.contains("--gt-val")) { + assert !(min_val_gl || min_val_dp) : "If --gt-val is set, --min-val-gl nor --min-val-dp must be set" + } + """ printf '$region_str' > regions.txt sed 's/\$/ $freq $truth $estimate/' regions.txt > input.txt @@ -47,8 +51,8 @@ process GLIMPSE2_CONCORDANCE { $bins_cmd \\ $ac_bins_cmd \\ $ale_ct_cmd \\ - --min-val-gl $min_val_gl \\ - --min-val-dp $min_val_dp \\ + $min_val_gl_cmd \\ + $min_val_dp_cmd \\ --input input.txt \\ --thread $task.cpus \\ --output ${prefix} diff --git a/modules/nf-core/glimpse2/concordance/meta.yml b/modules/nf-core/glimpse2/concordance/meta.yml index f286f6e929d..a9e27c907ac 100644 --- a/modules/nf-core/glimpse2/concordance/meta.yml +++ b/modules/nf-core/glimpse2/concordance/meta.yml @@ -79,12 +79,12 @@ input: description: | Default allele count bins used for rsquared computations. AN field must be defined in the frequency file. - - - min_val_gl: + - min_val_gl: type: float description: | Minimum genotype likelihood probability P(G|R) in validation data. Set to zero to have no filter of if using –gt-validation - - - min_val_dp: + - min_val_dp: type: integer description: | Minimum coverage in validation data. diff --git a/modules/nf-core/glimpse2/concordance/tests/main.nf.test b/modules/nf-core/glimpse2/concordance/tests/main.nf.test index 9c5625207ac..d1c68bc208d 100644 --- a/modules/nf-core/glimpse2/concordance/tests/main.nf.test +++ b/modules/nf-core/glimpse2/concordance/tests/main.nf.test @@ -10,78 +10,36 @@ nextflow_process { tag "modules_nfcore" tag "modules" - setup { - run("GLIMPSE2_PHASE") { - script "../../phase/main.nf" - process { - """ - input_vcf = Channel.of([ - [ id:'input'], // meta map - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - [] - ]) - - samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') - region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) - - ch_ref_panel = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) - ]) - - ch_map = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), - ]) - - // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] - input[0] = input_vcf - .combine(samples_infos) - .combine(region) - .combine( ch_ref_panel ) - .combine( ch_map ) - input[1]= Channel.of([[],[],[]]) - """ - } - } - - run("BCFTOOLS_INDEX") { - script "../../../bcftools/index/main.nf" - process { - """ - input[0] = GLIMPSE2_PHASE.output.phased_variants - """ - } - } - } - - - test("test_glimpse2_concordance") { + test("test glimpse2 concordance") { config "./nextflow.config" when { + params { + glimpse2_concordance_args = "--gt-val --af-tag AF" + } process { """ - allele_freq = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true) + target = Channel.of([ + [id: "input"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true) ]) - truth = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true) ]) - list_inputs = GLIMPSE2_PHASE.output.phased_variants - .join( BCFTOOLS_INDEX.out.csi ) + allele_freq = Channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true) + ]) + list_inputs = target .combine( truth ) .combine( allele_freq ) .combine( Channel.of([[]]) ) - .combine( Channel.of(["chr21"]) ) + .combine( Channel.of(["chr22"]) ) input[0] = list_inputs - input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]]) - input[2] = 0.9999 - input[3] = 8 + input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []]) """ } } @@ -102,78 +60,37 @@ nextflow_process { } - test("test_list_region") { + test("test list of region and rsquare per site") { config "./nextflow.config" when { + params { + glimpse2_concordance_args = "--gt-val --af-tag AF --out-r2-per-site" + } process { """ - allele_freq = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true) + target = Channel.of([ + [id: "input"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true) ]) - truth = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true) ]) - list_inputs = GLIMPSE2_PHASE.output.phased_variants - .join( BCFTOOLS_INDEX.out.csi ) - .combine( truth ) - .combine( allele_freq ) - .combine( Channel.of([[]]) ) - .combine( Channel.of(["chr21", "chr21"]) ) - - input[0] = list_inputs - input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]]) - input[2] = 0.9999 - input[3] = 8 - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.errors_cal.collect{ file(it[1]).name }, - process.out.errors_grp.collect{ file(it[1]).name }, - process.out.errors_spl.collect{ file(it[1]).name }, - process.out.rsquare_grp.collect{ file(it[1]).name }, - process.out.rsquare_spl.collect{ file(it[1]).name }, - process.out.versions - ).match() - } - ) - } - - } - - test("test_r2_per_site") { - config "./nextflow_R2.config" - - when { - process { - """ allele_freq = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true) - ]) - - truth = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true) ]) - list_inputs = GLIMPSE2_PHASE.output.phased_variants - .join( BCFTOOLS_INDEX.out.csi ) + list_inputs = target .combine( truth ) .combine( allele_freq ) .combine( Channel.of([[]]) ) - .combine( Channel.of(["chr21"]) ) + .combine( Channel.of([["chr22", "chr22"]]) ) + .view() + input[0] = list_inputs - input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]]) - input[2] = 0.9999 - input[3] = 8 + input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []]) """ } } @@ -189,11 +106,9 @@ nextflow_process { process.out.rsquare_spl.collect{ file(it[1]).name }, process.out.rsquare_per_site.collect{ file(it[1]).name }, process.out.versions - ).match() - } + ).match() } ) } } - } diff --git a/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap b/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap index 12bded93619..5e81d44e2f0 100644 --- a/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap +++ b/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "test_r2_per_site": { + "test glimpse2 concordance": { "content": [ [ "input.error.cal.txt.gz" @@ -16,20 +16,17 @@ [ "input.rsquare.spl.txt.gz" ], - [ - "input_r2_sites.txt.gz" - ], [ "versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T16:23:00.623182365" + "timestamp": "2024-12-03T16:22:35.440086384" }, - "test_glimpse2_concordance": { + "test list of region and rsquare per site": { "content": [ [ "input.error.cal.txt.gz" @@ -47,40 +44,16 @@ "input.rsquare.spl.txt.gz" ], [ - "versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-10-22T16:22:26.823581573" - }, - "test_list_region": { - "content": [ - [ - "input.error.cal.txt.gz" - ], - [ - "input.error.grp.txt.gz" - ], - [ - "input.error.spl.txt.gz" - ], - [ - "input.rsquare.grp.txt.gz" - ], - [ - "input.rsquare.spl.txt.gz" + "input_r2_sites.txt.gz" ], [ "versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T16:22:43.352729014" + "timestamp": "2024-12-03T16:26:35.009071185" } } \ No newline at end of file diff --git a/modules/nf-core/glimpse2/concordance/tests/nextflow.config b/modules/nf-core/glimpse2/concordance/tests/nextflow.config index e5721995778..f9242b4b0e7 100644 --- a/modules/nf-core/glimpse2/concordance/tests/nextflow.config +++ b/modules/nf-core/glimpse2/concordance/tests/nextflow.config @@ -1,6 +1,6 @@ process { - withName: GLIMPSE2_CHUNK { + withName: GLIMPSE2_CONCORDANCE { + ext.args = { params.glimpse2_concordance_args} ext.prefix = { "${meta.id}" } } - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } \ No newline at end of file diff --git a/modules/nf-core/glimpse2/concordance/tests/nextflow_R2.config b/modules/nf-core/glimpse2/concordance/tests/nextflow_R2.config deleted file mode 100644 index 88174ccd131..00000000000 --- a/modules/nf-core/glimpse2/concordance/tests/nextflow_R2.config +++ /dev/null @@ -1,7 +0,0 @@ -process { - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - - withName:GLIMPSE2_CONCORDANCE { - ext.args = "--out-r2-per-site" - } -}