Skip to content

Commit

Permalink
Change GLIMPSE2_concordance non mandatory values (nf-core#7146)
Browse files Browse the repository at this point in the history
* Update glimpse2

* Fix argument name

* Fix nf-test

* Improve glimpse2 concordance

---------

Co-authored-by: Sateesh_Peri <[email protected]>
  • Loading branch information
LouisLeNezet and sateeshperi authored Dec 4, 2024
1 parent 23600f6 commit 6aed502
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 180 deletions.
30 changes: 17 additions & 13 deletions modules/nf-core/glimpse2/concordance/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ process GLIMPSE2_CONCORDANCE {

input:
tuple val(meta), path(estimate), path(estimate_index), path(truth), path(truth_index), path(freq), path(freq_index), path(samples), val(region)
tuple val(meta2), path(groups), val(bins), val(ac_bins), val(allele_counts)
val(min_val_gl)
val(min_val_dp)
tuple val(meta2), path(groups), val(bins), val(ac_bins), val(allele_counts), val(min_val_gl), val(min_val_dp)

output:
tuple val(meta), path("*.error.cal.txt.gz") , emit: errors_cal
Expand All @@ -26,17 +24,23 @@ process GLIMPSE2_CONCORDANCE {
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samples_cmd = samples ? "--samples ${samples}" : ""
def groups_cmd = groups ? "--groups ${groups}" : ""
def bins_cmd = bins ? "--bins ${bins}" : ""
def ac_bins_cmd = ac_bins ? "--ac-bins ${ac_bins}" : ""
def ale_ct_cmd = allele_counts ? "--allele-counts ${allele_counts}" : ""
def region_str = region instanceof List ? region.join('\\n') : region
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samples_cmd = samples ? "--samples ${samples}" : ""
def groups_cmd = groups ? "--groups ${groups}" : ""
def bins_cmd = bins ? "--bins ${bins}" : ""
def ac_bins_cmd = ac_bins ? "--ac-bins ${ac_bins}" : ""
def ale_ct_cmd = allele_counts ? "--allele-counts ${allele_counts}" : ""
def min_val_gl_cmd = min_val_gl ? "--min-val-gl ${min_val_gl}" : ""
def min_val_dp_cmd = min_val_dp ? "--min-val-dp ${min_val_dp}" : ""
def region_str = region instanceof List ? region.join('\\n') : region

if (((groups ? 1:0) + (bins ? 1:0) + (ac_bins ? 1:0) + (allele_counts ? 1:0)) != 1) error "One and only one argument should be selected between groups, bins, ac_bins, allele_counts"

if (args.contains("--gt-val")) {
assert !(min_val_gl || min_val_dp) : "If --gt-val is set, --min-val-gl nor --min-val-dp must be set"
}

"""
printf '$region_str' > regions.txt
sed 's/\$/ $freq $truth $estimate/' regions.txt > input.txt
Expand All @@ -47,8 +51,8 @@ process GLIMPSE2_CONCORDANCE {
$bins_cmd \\
$ac_bins_cmd \\
$ale_ct_cmd \\
--min-val-gl $min_val_gl \\
--min-val-dp $min_val_dp \\
$min_val_gl_cmd \\
$min_val_dp_cmd \\
--input input.txt \\
--thread $task.cpus \\
--output ${prefix}
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/glimpse2/concordance/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ input:
description: |
Default allele count bins used for rsquared computations.
AN field must be defined in the frequency file.
- - min_val_gl:
- min_val_gl:
type: float
description: |
Minimum genotype likelihood probability P(G|R) in validation data.
Set to zero to have no filter of if using –gt-validation
- - min_val_dp:
- min_val_dp:
type: integer
description: |
Minimum coverage in validation data.
Expand Down
155 changes: 35 additions & 120 deletions modules/nf-core/glimpse2/concordance/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,78 +10,36 @@ nextflow_process {
tag "modules_nfcore"
tag "modules"

setup {
run("GLIMPSE2_PHASE") {
script "../../phase/main.nf"
process {
"""
input_vcf = Channel.of([
[ id:'input'], // meta map
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true),
[]
])
samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt')
region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"])
ch_ref_panel = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true)
])
ch_map = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true),
])
// [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map]
input[0] = input_vcf
.combine(samples_infos)
.combine(region)
.combine( ch_ref_panel )
.combine( ch_map )
input[1]= Channel.of([[],[],[]])
"""
}
}

run("BCFTOOLS_INDEX") {
script "../../../bcftools/index/main.nf"
process {
"""
input[0] = GLIMPSE2_PHASE.output.phased_variants
"""
}
}
}


test("test_glimpse2_concordance") {
test("test glimpse2 concordance") {
config "./nextflow.config"

when {
params {
glimpse2_concordance_args = "--gt-val --af-tag AF"
}
process {
"""
allele_freq = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true)
target = Channel.of([
[id: "input"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true)
])
truth = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true)
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true)
])
list_inputs = GLIMPSE2_PHASE.output.phased_variants
.join( BCFTOOLS_INDEX.out.csi )
allele_freq = Channel.of([
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true)
])
list_inputs = target
.combine( truth )
.combine( allele_freq )
.combine( Channel.of([[]]) )
.combine( Channel.of(["chr21"]) )
.combine( Channel.of(["chr22"]) )
input[0] = list_inputs
input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]])
input[2] = 0.9999
input[3] = 8
input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []])
"""
}
}
Expand All @@ -102,78 +60,37 @@ nextflow_process {

}

test("test_list_region") {
test("test list of region and rsquare per site") {
config "./nextflow.config"

when {
params {
glimpse2_concordance_args = "--gt-val --af-tag AF --out-r2-per-site"
}
process {
"""
allele_freq = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true)
target = Channel.of([
[id: "input"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true)
])
truth = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true)
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true)
])
list_inputs = GLIMPSE2_PHASE.output.phased_variants
.join( BCFTOOLS_INDEX.out.csi )
.combine( truth )
.combine( allele_freq )
.combine( Channel.of([[]]) )
.combine( Channel.of(["chr21", "chr21"]) )
input[0] = list_inputs
input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]])
input[2] = 0.9999
input[3] = 8
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.errors_cal.collect{ file(it[1]).name },
process.out.errors_grp.collect{ file(it[1]).name },
process.out.errors_spl.collect{ file(it[1]).name },
process.out.rsquare_grp.collect{ file(it[1]).name },
process.out.rsquare_spl.collect{ file(it[1]).name },
process.out.versions
).match()
}
)
}

}

test("test_r2_per_site") {
config "./nextflow_R2.config"

when {
process {
"""
allele_freq = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true)
])
truth = Channel.of([
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true),
file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true)
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true)
])
list_inputs = GLIMPSE2_PHASE.output.phased_variants
.join( BCFTOOLS_INDEX.out.csi )
list_inputs = target
.combine( truth )
.combine( allele_freq )
.combine( Channel.of([[]]) )
.combine( Channel.of(["chr21"]) )
.combine( Channel.of([["chr22", "chr22"]]) )
.view()
input[0] = list_inputs
input[1] = Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]])
input[2] = 0.9999
input[3] = 8
input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []])
"""
}
}
Expand All @@ -189,11 +106,9 @@ nextflow_process {
process.out.rsquare_spl.collect{ file(it[1]).name },
process.out.rsquare_per_site.collect{ file(it[1]).name },
process.out.versions
).match()
}
).match() }
)
}

}

}
45 changes: 9 additions & 36 deletions modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"test_r2_per_site": {
"test glimpse2 concordance": {
"content": [
[
"input.error.cal.txt.gz"
Expand All @@ -16,20 +16,17 @@
[
"input.rsquare.spl.txt.gz"
],
[
"input_r2_sites.txt.gz"
],
[
"versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
"nf-test": "0.9.1",
"nextflow": "24.10.1"
},
"timestamp": "2024-10-22T16:23:00.623182365"
"timestamp": "2024-12-03T16:22:35.440086384"
},
"test_glimpse2_concordance": {
"test list of region and rsquare per site": {
"content": [
[
"input.error.cal.txt.gz"
Expand All @@ -47,40 +44,16 @@
"input.rsquare.spl.txt.gz"
],
[
"versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
"timestamp": "2024-10-22T16:22:26.823581573"
},
"test_list_region": {
"content": [
[
"input.error.cal.txt.gz"
],
[
"input.error.grp.txt.gz"
],
[
"input.error.spl.txt.gz"
],
[
"input.rsquare.grp.txt.gz"
],
[
"input.rsquare.spl.txt.gz"
"input_r2_sites.txt.gz"
],
[
"versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
"nf-test": "0.9.1",
"nextflow": "24.10.1"
},
"timestamp": "2024-10-22T16:22:43.352729014"
"timestamp": "2024-12-03T16:26:35.009071185"
}
}
4 changes: 2 additions & 2 deletions modules/nf-core/glimpse2/concordance/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process {
withName: GLIMPSE2_CHUNK {
withName: GLIMPSE2_CONCORDANCE {
ext.args = { params.glimpse2_concordance_args}
ext.prefix = { "${meta.id}" }
}
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}
7 changes: 0 additions & 7 deletions modules/nf-core/glimpse2/concordance/tests/nextflow_R2.config

This file was deleted.

0 comments on commit 6aed502

Please sign in to comment.