From 4d5154fc10cc429d69dcc93cebab89cc6be1b336 Mon Sep 17 00:00:00 2001 From: Pontus Hojer Date: Thu, 28 Nov 2019 09:42:00 +0100 Subject: [PATCH 1/2] Changed naming of VCF file to separate between called variants and those supplied from a reference. --- src/blr/Snakefile | 32 ++++++++++++++++---------------- src/blr/rules/phasing.smk | 8 +++++--- tests/test_cli.py | 26 +++++++++++++++++--------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/blr/Snakefile b/src/blr/Snakefile index fc01433..284b0ef 100644 --- a/src/blr/Snakefile +++ b/src/blr/Snakefile @@ -176,19 +176,19 @@ rule bam_to_fastq: " SECOND_END_FASTQ={output.r2_fastq} 2>> {log}" -if config["reference_variants"]: - rule link: - output: "reference.vcf" - shell: - "ln -s {config[reference_variants]} {output}" -else: - rule call_variants_freebayes: - output: - vcf = "reference.vcf" - input: - bam = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.bam" - log: "call_variants_freebayes.log" - shell: - "freebayes" - " -f {config[genome_reference]}" - " {input.bam} 1> {output.vcf} 2> {log}" +rule symlink_reference_variants: + output: "variants.reference.vcf" + shell: + "ln -s {config[reference_variants]} {output}" + + +rule call_variants_freebayes: + output: + vcf = "variants.called.vcf" + input: + bam = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.bam" + log: "call_variants_freebayes.log" + shell: + "freebayes" + " -f {config[genome_reference]}" + " {input.bam} 1> {output.vcf} 2> {log}" diff --git a/src/blr/rules/phasing.smk b/src/blr/rules/phasing.smk index 210ec5a..be1d834 100644 --- a/src/blr/rules/phasing.smk +++ b/src/blr/rules/phasing.smk @@ -1,10 +1,12 @@ +variants = "variants.reference.vcf" if config["reference_variants"] else "variants.called.vcf" + rule hapcut2_extracthairs: output: unlinked = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.unlinked.txt" input: bam = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.bam", - vcf = "reference.vcf" + vcf = variants log: "hapcut2_extracthairs.log" shell: "extractHAIRS" @@ -19,7 +21,7 @@ rule hapcut2_linkfragments: linked = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.linked.txt" input: bam = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.bam", - vcf = "reference.vcf", + vcf = variants, unlinked = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.unlinked.txt" log: "hapcut2_linkfragments.log" shell: @@ -36,7 +38,7 @@ rule hapcut2_phasing: phased_vcf = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.phase.phased.VCF" input: linked = "mapped.sorted.tag.mkdup.bcmerge.mol.filt.linked.txt", - vcf = "reference.vcf" + vcf = variants log: "hapcut2_phasing.log" shell: "hapcut2" diff --git a/tests/test_cli.py b/tests/test_cli.py index 4a633c7..7b1afe5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -78,17 +78,25 @@ def test_final_compressed_reads_exist(tmpdir): assert Path(workdir / filename).exists() -@pytest.mark.parametrize("reference_variants", [REFERENCE_VARIANTS, "null"]) -def test_reference_variants(tmpdir, reference_variants): +def test_link_reference_variants(tmpdir): workdir = tmpdir / "analysis" init(workdir, TESTDATA_READS) change_config( workdir / DEFAULT_CONFIG, - [("genome_reference", REFERENCE_GENOME), ("reference_variants", reference_variants)] + [("genome_reference", REFERENCE_GENOME), ("reference_variants", REFERENCE_VARIANTS)] ) - target = "reference.vcf" - run(workdir=workdir, targets=["reference.vcf"]) - if reference_variants != "null": - assert Path(workdir / target).is_symlink() - else: - assert Path(workdir / target).is_file() + target = "variants.reference.vcf" + run(workdir=workdir, targets=[target]) + assert Path(workdir / target).is_symlink() + + +def test_call_variants(tmpdir): + workdir = tmpdir / "analysis" + init(workdir, TESTDATA_READS) + change_config( + workdir / DEFAULT_CONFIG, + [("genome_reference", REFERENCE_GENOME), ("reference_variants", "null")] + ) + target = "variants.called.vcf" + run(workdir=workdir, targets=[target]) + assert Path(workdir / target).exists() From ad9b0c619a072cf6cb63cdaa1e4862f1450c8756 Mon Sep 17 00:00:00 2001 From: pontushojer Date: Thu, 28 Nov 2019 17:14:32 +0100 Subject: [PATCH 2/2] Changed test for called variants --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7b1afe5..c0b66db 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -99,4 +99,4 @@ def test_call_variants(tmpdir): ) target = "variants.called.vcf" run(workdir=workdir, targets=[target]) - assert Path(workdir / target).exists() + assert Path(workdir / target).is_file()