From 181275ee1bf9fb7ca0454ac2e82ddce5ed44fd21 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Tue, 13 Sep 2022 14:41:21 +0200 Subject: [PATCH] Adding explicit support for SV/CNV calling tools (#68) Closes: #68 Closes: #60 Related-Issue: #68 Projected-Results-Impact: none --- README.md | 67 +++-- .../Case_1_index.delly2.gts.tsv-expected | 2 +- .../annotate_svs/AnnotateSvsVcf.java | 7 +- .../AnnotateDellyVcf37Chr1Test.java | 36 +-- .../AnnotateDellyVcf37Chr22Test.java | 2 +- .../AnnotateDellyVcf38Chr1Test.java | 31 ++- .../NA-12878WGS_dragen.cnv.gts.tsv | 4 +- .../NA-12878WGS_dragen.sv.gts.tsv | 4 +- ...a.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv | 4 +- .../bwa.gcnv.NA12878-N1-DNA1-WGS1.gts.tsv | 4 +- .../annotate_svs/CallerSupport.java | 89 +++++++ .../annotate_svs/CallerSupportDelly2.java | 60 +++++ .../annotate_svs/CallerSupportDragenCnv.java | 67 +++++ .../annotate_svs/CallerSupportDragenSv.java | 58 +++++ .../annotate_svs/CallerSupportFactory.java | 28 +++ .../annotate_svs/CallerSupportGatkGcnv.java | 35 +++ .../annotate_svs/CallerSupportGeneric.java | 36 +++ .../annotate_svs/CallerSupportManta.java | 53 ++++ .../annotate_svs/CallerSupportXhmm.java | 35 +++ .../annotate_svs/GtRecordBuilder.java | 237 +++++++++--------- .../annotate_svs/SampleGenotype.java | 207 +++++++++++++++ .../annotate_svs/SampleGenotypeBuilder.java | 164 ++++++++++++ .../annotate_svs/SvCaller.java | 12 + .../varfish_annotator/utils/HtsjdkUtils.java | 18 ++ .../varfish_annotator/ResourceUtils.java | 53 ++++ .../CallerSupportDellySvTest.java | 60 +++++ .../CallerSupportDragenCnvTest.java | 61 +++++ .../CallerSupportDragenSvTest.java | 60 +++++ .../CallerSupportFactoryTest.java | 35 +++ .../CallerSupportGatkGcnvTest.java | 60 +++++ .../CallerSupportGenericTest.java | 50 ++++ .../annotate_svs/CallerSupportMantaTest.java | 60 +++++ .../annotate_svs/CallerSupportXhmmTest.java | 60 +++++ .../SampleGenotypeBuilderTest.java | 41 +++ .../annotate_svs/SampleGenotypeTest.java | 62 +++++ .../test/resources/callers-sv/delly2-head.vcf | 132 ++++++++++ .../resources/callers-sv/dragen-cnv-head.vcf | 48 ++++ .../resources/callers-sv/dragen-sv-head.vcf | 133 ++++++++++ .../test/resources/callers-sv/gcnv-head.vcf | 109 ++++++++ .../resources/callers-sv/generic-head.vcf | 14 ++ .../test/resources/callers-sv/manta-head.vcf | 128 ++++++++++ .../test/resources/callers-sv/xhmm-head.vcf | 59 +++++ 42 files changed, 2306 insertions(+), 179 deletions(-) create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupport.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDelly2.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnv.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSv.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactory.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnv.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGeneric.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportManta.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmm.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilder.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SvCaller.java create mode 100644 varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/utils/HtsjdkUtils.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/ResourceUtils.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDellySvTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnvTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSvTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactoryTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnvTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGenericTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportMantaTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmmTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilderTest.java create mode 100644 varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeTest.java create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/delly2-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/dragen-cnv-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/dragen-sv-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/gcnv-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/generic-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/manta-head.vcf create mode 100644 varfish-annotator-core/src/test/resources/callers-sv/xhmm-head.vcf diff --git a/README.md b/README.md index bdf67a9..82f15ec 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,26 @@ The following fields are considered: ### Structural Variants / Copy Number Variants -Note that if the `INFO/SVMETHOD` field is missing then you should define `--default-sv-method` as you would otherwise get a problem downstream. +**Supported Callers and Caller Annotation** + +The following variant callers are explicitely supported. + +- Delly 2 (SVs) +- Dragen CNV caller +- Dragen SV caller +- Manta +- GATK gCNV +- XHMM (deprecated) + +In the other cases, VarFish annotator will fall back to a "generic" import where only the per-sample fields `GT`, `FT`, and `GQ` are interpreted. +Your caller should also write out `INFO/END`, `INFO/SVTYPE`, and `INFO/SVLEN` as defined by VCF4.2 + +VarFish Annotator will look at the field `INFO/SVMETHOD` to annotate calls with the caller where the call originated from. +If this field is empty then you should define `--default-sv-method` so you get appropriately labeled output. If you have any problem with your data then please tell us by opening a GitHub issue. +**Interpretation of top-level and INFO VCF fields** + The following fields are considered: - `CHROM` @@ -70,28 +87,32 @@ The following fields are considered: Confidence interval around the end point of the SV. - `INFO/SVMETHOD` The name of the caller that was used. -- `FORMAT` and per `SAMPLE` - - Common - - `GT` Genotype - - `FT` Per-genotype filter values - - `GQ` Phred-scaled genotype quality - - For Delly2 - - `DR` Reference pairs - - `DV` Variant pairs - - `RR` Reference junction count - - `RV` Variant junction count - - For XHMM - - `DQ` Diploid Quality - - `NDQ` Non-diploid Quality - - `RD` Mean normalized read depth over region - - `PL` Genotype likelihoods for [diploid, deletion, duplication] - - For GATK gCNV - - `CN` Copy number - - `NP` Number of points in segment - - `QA` Phred-scale quality of all points agreeing - - `QS` Phred-scaled quality of least one point agreeing - - `QSS` Phred-scaled quality of start breakpoint - - `QSE` Phred-scaled quality of end breakpoint + +**Interpretation of `FORMAT` and per sample fields** + +- Common + - `GT` Genotype, written as `gt` + - `FT` Per-genotype filter values, written as `ft` + - `GQ` Phred-scaled genotype quality, written as `gq` +- Delly2 + - `DR` Reference pairs, written as `pec = DR + DV` + - `DV` Variant pairs, written as `pev` + - `RR` Reference junction count, written as `src = RR + RV` + - `RV` Variant junction count, written as `srv` + - `RDCN` Copy number estimate, written as `cn` +- Dragen CNV + - `SM` Average normalized overage, written as `anc` + - `BC` Bucket count, written as point count `pc` + - `PE` Discordante read count at start/end, written as `pev = PE[0] + PE[1]` +- Dragen SV + - `PR` Paired read of reference and variant, written as `pec = PR[0] + PR[1]` and `pev = PR[1]` + - `SR` Paired read of reference and variant, written as `src = SR[0] + SR[1]` and `srv = SR[1]` +- For GATK gCNV + - `CN` Integer copy number, written as `cn` + - `NP` Number of points in segment, written as `np` +- Manta (equivalent to Dragen SV) +- For XHMM + - `RD` Average normalized coveage, written as `an` ## Example diff --git a/tests/hg19-chr22/Case_1_index.delly2.gts.tsv-expected b/tests/hg19-chr22/Case_1_index.delly2.gts.tsv-expected index f483509..2625866 100644 --- a/tests/hg19-chr22/Case_1_index.delly2.gts.tsv-expected +++ b/tests/hg19-chr22/Case_1_index.delly2.gts.tsv-expected @@ -1,2 +1,2 @@ release chromosome chromosome_no bin chromosome2 chromosome_no2 bin2 pe_orientation start end start_ci_left start_ci_right end_ci_left end_ci_right case_id set_id sv_uuid caller sv_type sv_sub_type info num_hom_alt num_hom_ref num_het num_hemi_alt num_hemi_ref genotype -GRCh37 22 22 89 22 22 89 3to5 17400000 17700000 -29 29 -29 29 . . UUID EMBL.DELLYv1.1.3 DEL DEL {"""backgroundCarriers""":0,"""affectedCarriers""":0,"""unaffectedCarriers""":0} 0 2 1 0 0 {"""Case_1_father-N1-DNA1-WGS1""":{"""gt""":"""0/1""","""gq""":14,"""pec""":0,"""pev""":0,"""src""":34,"""srv""":4},"""Case_1_index-N1-DNA1-WGS1""":{"""gt""":"""0/1""","""gq""":14,"""pec""":0,"""pev""":0,"""src""":34,"""srv""":4,"""gt""":"""0/0""","""gq""":35,"""pec""":0,"""pev""":0,"""src""":29,"""srv""":2},"""Case_1_mother-N1-DNA1-WGS1""":{"""gt""":"""0/1""","""gq""":14,"""pec""":0,"""pev""":0,"""src""":34,"""srv""":4,"""gt""":"""0/0""","""gq""":35,"""pec""":0,"""pev""":0,"""src""":29,"""srv""":2,"""gt""":"""0/0""","""gq""":67,"""pec""":0,"""pev""":0,"""src""":32,"""srv""":1}} +GRCh37 22 22 89 22 22 89 3to5 17400000 17700000 -29 29 -29 29 . . UUID EMBL.DELLYv1.1.3 DEL DEL {"""backgroundCarriers""":0,"""affectedCarriers""":0,"""unaffectedCarriers""":0} 0 2 1 0 0 {"""Case_1_father-N1-DNA1-WGS1""":{"""gt""":"""0/1""","""ft""":{"""LowQual"""},"""gq""":14,"""pec""":0,"""pev""":0,"""src""":34,"""srv""":4,"""cn""":2},"""Case_1_index-N1-DNA1-WGS1""":{"""gt""":"""0/0""","""gq""":35,"""pec""":0,"""pev""":0,"""src""":29,"""srv""":2,"""cn""":2},"""Case_1_mother-N1-DNA1-WGS1""":{"""gt""":"""0/0""","""gq""":67,"""pec""":0,"""pev""":0,"""src""":32,"""srv""":1,"""cn""":2}} diff --git a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java index 2684f5d..c5c88a6 100644 --- a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java +++ b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java @@ -67,10 +67,14 @@ public final class AnnotateSvsVcf { /** Pedigree to use for annotation. */ private Pedigree pedigree; + /** Helper to use for creating genotypes and feature effects files. */ + private CallerSupport callerSupport; + /** Construct with the given configuration. */ public AnnotateSvsVcf(AnnotateSvsArgs args) { this.args = args; this.pedigree = null; + this.callerSupport = CallerSupportFactory.getFor(new File(args.getInputVcf())); } /** UUID counter for sequential UUID generation. */ @@ -236,7 +240,8 @@ private void annotateSvVcf( args.getOptOutFeatures(), args.getCaseId(), args.getSetId(), - pedigree); + pedigree, + callerSupport); final FeatureEffectsRecordBuilder feRecordBuilder = new FeatureEffectsRecordBuilder(args.getCaseId(), args.getSetId()); diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java index e6d980c..0bcb82c 100644 --- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java +++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java @@ -138,9 +138,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = @@ -172,11 +172,11 @@ void testWithTrio(boolean gzipOutput) throws IOException { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":157,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" - + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" - + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; + + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":157,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" + + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\", \"\"\"MAX_PE_COUNT\"\"\", \"\"\"MAX_SR_AAF\"\"\", \"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" + + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\", \"\"\"MAX_PE_COUNT\"\"\", \"\"\"MAX_SR_AAF\"\"\", \"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -226,8 +226,8 @@ void testAnnotateHemiMale() throws Exception { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\tX\t23\t592\tX\t23\t592\t3to5\t1000000\t1001000\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t0\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\tX\t23\t607\tX\t23\t607\t5to3\t3000000\t3001000\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t0\t1\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; + + "GRCh37\tX\t23\t592\tX\t23\t592\t3to5\t1000000\t1001000\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t0\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\tX\t23\t607\tX\t23\t607\t5to3\t3000000\t3001000\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t0\t1\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -256,8 +256,8 @@ void testAnnotateHemiFamily() throws Exception { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\tX\t23\t592\tX\t23\t592\t3to5\t1000000\t1001000\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t1\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\tX\t23\t607\tX\t23\t607\t5to3\t3000000\t3001000\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t0\t0\t0\t2\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; + + "GRCh37\tX\t23\t592\tX\t23\t592\t3to5\t1000000\t1001000\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t1\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\tX\t23\t607\tX\t23\t607\t5to3\t3000000\t3001000\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t1\t0\t0\t0\t2\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"1/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -286,9 +286,9 @@ void testWithSingletonOptOutChrom2() throws IOException { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\t1\t1\t599\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t604\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t780\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + + "GRCh37\t1\t1\t599\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t604\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t780\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + "GRCh37\t14\t14\t1299\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t654\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = @@ -319,9 +319,9 @@ void testWithSingletonOptOutDbCounts() throws IOException { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tgenotype\n" - + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr22Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr22Test.java index 4e96a9d..5e9fd6d 100644 --- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr22Test.java +++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr22Test.java @@ -138,7 +138,7 @@ void testWithTrio(boolean gzipOutput) throws IOException { + "GRCh37\tvarfish-annotator-db\tfor-testing\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\t22\t22\t89\t22\t22\t89\t3to5\t17400000\t17700000\t-29\t29\t-29\t29\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv1.1.3\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"Case_1_father-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":14,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":34,\"\"\"srv\"\"\":4},\"\"\"Case_1_index-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":14,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":34,\"\"\"srv\"\"\":4,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":35,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":29,\"\"\"srv\"\"\":2},\"\"\"Case_1_mother-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":14,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":34,\"\"\"srv\"\"\":4,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":35,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":29,\"\"\"srv\"\"\":2,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":67,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":32,\"\"\"srv\"\"\":1}}\n"; + + "GRCh37\t22\t22\t89\t22\t22\t89\t3to5\t17400000\t17700000\t-29\t29\t-29\t29\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv1.1.3\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"Case_1_father-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"LowQual\"\"\"},\"\"\"gq\"\"\":14,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":34,\"\"\"srv\"\"\":4,\"\"\"cn\"\"\":2},\"\"\"Case_1_index-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":35,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":29,\"\"\"srv\"\"\":2,\"\"\"cn\"\"\":2},\"\"\"Case_1_mother-N1-DNA1-WGS1\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":67,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":32,\"\"\"srv\"\"\":1,\"\"\"cn\"\"\":2}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n" + ".\t.\t00000000-0000-0000-0000-000000000000\t51816\tNM_017424.2\tTRUE\t{\"transcript_ablation\",\"coding_transcript_variant\"}\tENSG00000093072\tENST00000262607.3\tTRUE\t{\"transcript_ablation\",\"coding_transcript_variant\"}\n" diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java index a3590e4..6e5348d 100644 --- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java +++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java @@ -86,7 +86,7 @@ void runTest( VarfishAnnotatorCli.main(argsArr); if (gzipOutput) { - final File paths[] = {outputDbInfoPath, outputGtsPath, outputFeatureEffects}; + final File[] paths = {outputDbInfoPath, outputGtsPath, outputFeatureEffects}; for (File path : paths) { try (FileInputStream fin = new FileInputStream(path)) { Assertions.assertTrue(GzipUtil.isGZipped(fin)); @@ -113,9 +113,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException { + "GRCh38\thgmd_public\tensembl_r104\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = @@ -142,11 +142,11 @@ void testWithTrio(boolean gzipOutput) throws IOException { + "GRCh38\thgmd_public\tensembl_r104\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":157,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" - + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" - + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; + + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":57,\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":24,\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":39,\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\", \"\"\"MIN_PE_COUNT\"\"\", \"\"\"MIN_SR_AAF\"\"\", \"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":157,\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" + + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\", \"\"\"MAX_PE_COUNT\"\"\", \"\"\"MAX_SR_AAF\"\"\", \"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" + + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"backgroundCarriers\"\"\":0,\"\"\"affectedCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"gq\"\"\":93,\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\", \"\"\"MAX_PE_COUNT\"\"\", \"\"\"MAX_SR_AAF\"\"\", \"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -164,10 +164,15 @@ void testWithTrio(boolean gzipOutput) throws IOException { void testSelfTestFails() throws Exception { final String text = SystemLambda.tapSystemErr( - () -> { - runTest( - "bwa.delly2.NA12878.vcf.gz", "input/grch38-chr1", null, null, null, false, false); - }); + () -> + runTest( + "bwa.delly2.NA12878.vcf.gz", + "input/grch38-chr1", + null, + null, + null, + false, + false)); Assertions.assertTrue(text.contains("Problem with database self-test:")); } } diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.cnv.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.cnv.gts.tsv index 9a608d2..6334f68 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.cnv.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.cnv.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32605be3e0db7fbfa673a4249606121adef9e5fb155e46c221dbee7d95f90402 -size 138233 +oid sha256:31f50efc3eb4bc13b1f5e40bde68561bf91b86e26971d3832ef9e59ea72c9951 +size 162863 diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv index 185a28f..fdd87f5 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:788e67c89f36cdb77a22b7d60c11d2a31334429713601694b8b197ff6bd3030a -size 2199132 +oid sha256:714dc98ae59d15dabf01366f4242c5410e5b432905c333bc2949109f1360f643 +size 2621930 diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv index 13f8603..7f9f728 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b4c6cf8c846166c3119c1e0883f2a0914c17f7df11d47d591b473a33fbfcb90 -size 26053056 +oid sha256:22b6ba1df1e083c28e7f8168157ba998864eb03149b2178900b4aa227bf595d8 +size 19498392 diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.gcnv.NA12878-N1-DNA1-WGS1.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.gcnv.NA12878-N1-DNA1-WGS1.gts.tsv index a28454d..c05f0c7 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.gcnv.NA12878-N1-DNA1-WGS1.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.gcnv.NA12878-N1-DNA1-WGS1.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52c867f389dbee2d732d52b2dfa71a0cdff2194f512a55c50c49d1c71c292772 -size 188896 +oid sha256:96311b8f376ab6ae66a53aedb6381cde5be1e39c1d41d5e7bcbce82b6f865708 +size 160855 diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupport.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupport.java new file mode 100644 index 0000000..e69f401 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupport.java @@ -0,0 +1,89 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import static com.github.bihealth.varfish_annotator.utils.StringUtils.tripleQuote; + +import com.google.common.base.Joiner; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.util.*; +import java.util.stream.Collectors; + +public abstract class CallerSupport { + /** @return Name of the used SV caller. */ + public abstract SvCaller getSvCaller(); + + /** + * Extract version from the given VCF file. + * + *

This may read a record from the VCF file, so make sure to reset the reader if necessary. It + * assumes that all records in the VCF file are from the same caller. + * + * @param vcfReader VCF header to extract the version from + * @return Version string of the used SV caller. + */ + public abstract String getVersion(VCFFileReader vcfReader); + + /** + * @param vcfHeader VCF header to consider + * @return Whether the SV caller is compatible with the given VCF header. + */ + public abstract boolean isCompatible(VCFHeader vcfHeader); + + public SampleGenotype buildSampleGenotype(VariantContext ctx, int alleleNo, String sample) { + SampleGenotypeBuilder builder = new SampleGenotypeBuilder(); + builder.setSampleName(sample); + builder.setGenotype(buildGenotype(ctx, alleleNo, sample)); + builder.setFilters(buildFilters(ctx, sample)); + builder.setGenotypeQuality(getGenotypeQuality(ctx, alleleNo, sample)); + buildSampleGenotypeImpl(builder, ctx, alleleNo, sample); + return builder.build(); + } + + protected abstract void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample); + + private String buildGenotype(VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + final List gtList = new ArrayList<>(); + for (Allele allele : genotype.getAlleles()) { + if (allele.isNoCall()) { + gtList.add("."); + } else if (ctx.getAlleleIndex(allele) == alleleNo) { + gtList.add("1"); + } else { + gtList.add("0"); + } + } + if (genotype.isPhased()) { + return Joiner.on("|").join(gtList); + } else { + gtList.sort(Comparator.naturalOrder()); + return Joiner.on("/").join(gtList); + } + } + + private List buildFilters(VariantContext ctx, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + if (genotype.getFilters() != null && !genotype.getFilters().equals("")) { + final List fts = + Arrays.stream(genotype.getFilters().split(";")) + .map(s -> tripleQuote(s)) + .collect(Collectors.toList()); + return fts; + } else { + return new ArrayList<>(); + } + } + + private Integer getGenotypeQuality(VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + if (genotype.hasGQ()) { + return genotype.getGQ(); + } else { + return null; + } + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDelly2.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDelly2.java new file mode 100644 index 0000000..fe703e4 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDelly2.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFInfoHeaderLine; + +/** Import SV caller support for Delly2. */ +public class CallerSupportDelly2 extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.DELLY2_SV; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + final VariantContext vc = vcfReader.iterator().next(); + if (!vc.hasAttribute("SVMETHOD")) { + return null; + } else { + final String svMethod = vc.getAttributeAsString("SVMETHOD", ""); + if (!svMethod.startsWith("EMBL.DELLYv")) { + return null; + } else { + return svMethod.substring("EMBL.DELLYv".length()); + } + } + } + + public boolean isCompatible(VCFHeader vcfHeader) { + final VCFFilterHeaderLine lowQualFilter = vcfHeader.getFilterHeaderLine("LowQual"); + boolean seenLowQualFilter = (lowQualFilter != null); + final VCFInfoHeaderLine ctInfo = vcfHeader.getInfoHeaderLine("CT"); + boolean seenCtInfo = (ctInfo != null); + final VCFInfoHeaderLine impreciseInfo = vcfHeader.getInfoHeaderLine("IMPRECISE"); + boolean seenImpreciseInfo = (impreciseInfo != null); + + return seenLowQualFilter && seenCtInfo && seenImpreciseInfo; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + final int dr = Integer.parseInt(genotype.getExtendedAttribute("DR", "0").toString()); + final int dv = Integer.parseInt(genotype.getExtendedAttribute("DV", "0").toString()); + builder.setPairedEndCoverage(dr + dv); + builder.setPairedEndVariantSupport(dv); + final int rr = Integer.parseInt(genotype.getExtendedAttribute("RR", "0").toString()); + final int rv = Integer.parseInt(genotype.getExtendedAttribute("RV", "0").toString()); + builder.setSplitReadCoverage(rr + rv); + builder.setSplitReadVariantSupport(rv); + if (genotype.hasExtendedAttribute("RDCN")) { + final int cn = Integer.parseInt(genotype.getExtendedAttribute("RDCN", "0").toString()); + builder.setCopyNumber(cn); + } + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnv.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnv.java new file mode 100644 index 0000000..c1a0401 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnv.java @@ -0,0 +1,67 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** Import SV caller support for Dragen CNV. */ +public class CallerSupportDragenCnv extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.DRAGEN_CNV; + } + + private static final Pattern p = Pattern.compile("Version=\"(.*?)\""); + + @Override + public String getVersion(VCFFileReader vcfReader) { + for (VCFHeaderLine headerLine : vcfReader.getHeader().getOtherHeaderLines()) { + if (headerLine.getKey().equals("DRAGENVersion")) { + final Matcher m = p.matcher(headerLine.getValue()); + if (m.find()) { + return m.group(1); + } + } + } + return null; + } + + public boolean isCompatible(VCFHeader vcfHeader) { + boolean seenDragenVersionHeaderLine = false; + boolean seenDragenCommandLineHeaderLine = false; + for (VCFHeaderLine headerLine : vcfHeader.getOtherHeaderLines()) { + if (headerLine.getKey().equals("DRAGENVersion")) { + seenDragenVersionHeaderLine = true; + } else if (headerLine.getKey().equals("DRAGENCommandLine")) { + seenDragenCommandLineHeaderLine = true; + } + } + + final VCFFilterHeaderLine cnvBinSupportRatio = + vcfHeader.getFilterHeaderLine("cnvBinSupportRatio"); + boolean seenCnvBinSupportRatioFilter = (cnvBinSupportRatio != null); + + return seenDragenVersionHeaderLine + && seenDragenCommandLineHeaderLine + && seenCnvBinSupportRatioFilter; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + builder.setAverageNormalizedCoverage( + Double.parseDouble(genotype.getExtendedAttribute("SM", "0.0").toString())); + builder.setPointCount(Integer.parseInt(genotype.getExtendedAttribute("BC", "0").toString())); + final String pe = genotype.getExtendedAttribute("PE", "0,0").toString(); + final String[] pes = pe.split(","); + final int pe0 = Integer.parseInt(pes[0]); + final int pe1 = Integer.parseInt(pes[1]); + builder.setPairedEndVariantSupport(pe0 + pe1); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSv.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSv.java new file mode 100644 index 0000000..ebe71b8 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSv.java @@ -0,0 +1,58 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.utils.HtsjdkUtils; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.*; + +/** Import SV caller support for Dragen SV. */ +public class CallerSupportDragenSv extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.DRAGEN_SV; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + for (VCFHeaderLine headerLine : HtsjdkUtils.getSourceHeaderLines(vcfReader.getHeader())) { + final String value = headerLine.getValue(); + if (value.startsWith("DRAGEN") && value.contains(" ")) { + return value.split(" ", 2)[1]; + } + } + return null; + } + + public boolean isCompatible(VCFHeader vcfHeader) { + boolean seenSourceDragen = false; + for (VCFHeaderLine headerLine : HtsjdkUtils.getSourceHeaderLines(vcfHeader)) { + final String value = headerLine.getValue(); + if (value.startsWith("DRAGEN")) { + seenSourceDragen = true; + } + } + + final VCFFilterHeaderLine minQualFilter = vcfHeader.getFilterHeaderLine("MinQUAL"); + boolean seenMinQualFilter = (minQualFilter != null); + final VCFInfoHeaderLine mateIdInfo = vcfHeader.getInfoHeaderLine("MATEID"); + boolean seenMateIdInfo = (mateIdInfo != null); + + return seenSourceDragen && seenMinQualFilter && seenMateIdInfo; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + final String[] prs = genotype.getExtendedAttribute("PR", "0,0").toString().split(","); + final int pr0 = Integer.parseInt(prs[0]); + final int pr1 = Integer.parseInt(prs[1]); + final String[] srs = genotype.getExtendedAttribute("SR", "0,0").toString().split(","); + final int sr0 = Integer.parseInt(srs[0]); + final int sr1 = Integer.parseInt(srs[1]); + builder.setPairedEndCoverage(pr0 + pr1); + builder.setPairedEndVariantSupport(pr1); + builder.setSplitReadCoverage(sr0 + sr1); + builder.setSplitReadVariantSupport(sr1); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactory.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactory.java new file mode 100644 index 0000000..fb7a41c --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactory.java @@ -0,0 +1,28 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.google.common.collect.ImmutableList; +import htsjdk.variant.vcf.VCFFileReader; +import java.io.File; + +public class CallerSupportFactory { + private static ImmutableList CALLER_SUPPORTS = + ImmutableList.of( + new CallerSupportManta(), + new CallerSupportDelly2(), + new CallerSupportDragenCnv(), + new CallerSupportDragenSv(), + new CallerSupportGatkGcnv(), + new CallerSupportXhmm()); + private static CallerSupport GENERIC_CALLER = new CallerSupportGeneric(); + + public static CallerSupport getFor(File vcfFile) { + try (VCFFileReader reader = new VCFFileReader(vcfFile, false)) { + for (CallerSupport callerSupport : CALLER_SUPPORTS) { + if (callerSupport.isCompatible(reader.getHeader())) { + return callerSupport; + } + } + } + return GENERIC_CALLER; + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnv.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnv.java new file mode 100644 index 0000000..e936d3a --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnv.java @@ -0,0 +1,35 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.*; + +/** Import SV caller support for Delly2. */ +public class CallerSupportGatkGcnv extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.GATK_GCNV; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + return "UNKNOWN"; + } + + public boolean isCompatible(VCFHeader vcfHeader) { + final VCFFormatHeaderLine qseFormat = vcfHeader.getFormatHeaderLine("QSE"); + boolean seenQseFormat = (qseFormat != null); + final VCFFormatHeaderLine qssFormat = vcfHeader.getFormatHeaderLine("QSS"); + boolean seenQssFormat = (qssFormat != null); + + return seenQseFormat && seenQssFormat; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + builder.setCopyNumber(Integer.parseInt(genotype.getExtendedAttribute("CN", "0").toString())); + builder.setPointCount(Integer.parseInt(genotype.getExtendedAttribute("NP", "0").toString())); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGeneric.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGeneric.java new file mode 100644 index 0000000..1bf4d67 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGeneric.java @@ -0,0 +1,36 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFInfoHeaderLine; + +/** Import SV caller support for generic SV callers. */ +public class CallerSupportGeneric extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.GENERIC; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + return "UNKNOWN"; + } + + public boolean isCompatible(VCFHeader vcfHeader) { + final VCFInfoHeaderLine svTypeInfo = vcfHeader.getInfoHeaderLine("SVTYPE"); + boolean seenSvTypeInfo = (svTypeInfo != null); + final VCFInfoHeaderLine svEndInfo = vcfHeader.getInfoHeaderLine("END"); + boolean seenSvTypeEnd = (svEndInfo != null); + final VCFInfoHeaderLine svLenInfo = vcfHeader.getInfoHeaderLine("SVLEN"); + boolean seenSvLenEnd = (svLenInfo != null); + + return seenSvTypeInfo && seenSvTypeEnd && seenSvLenEnd; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + // no generic implementation possible + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportManta.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportManta.java new file mode 100644 index 0000000..6dda6a8 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportManta.java @@ -0,0 +1,53 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.utils.HtsjdkUtils; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; + +/** Import SV caller support for Manta. */ +public class CallerSupportManta extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.MANTA; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + for (VCFHeaderLine headerLine : HtsjdkUtils.getSourceHeaderLines(vcfReader.getHeader())) { + final String value = headerLine.getValue(); + if (value.startsWith("GenerateSVCandidates") && value.contains(" ")) { + return value.split(" ", 2)[1]; + } + } + return null; + } + + public boolean isCompatible(VCFHeader vcfHeader) { + for (VCFHeaderLine headerLine : HtsjdkUtils.getSourceHeaderLines(vcfHeader)) { + final String value = headerLine.getValue(); + if (value.startsWith("GenerateSVCandidates")) { + return true; + } + } + return false; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + final String[] prs = genotype.getExtendedAttribute("PR", "0,0").toString().split(","); + final int pr0 = Integer.parseInt(prs[0]); + final int pr1 = Integer.parseInt(prs[1]); + final String[] srs = genotype.getExtendedAttribute("SR", "0,0").toString().split(","); + final int sr0 = Integer.parseInt(srs[0]); + final int sr1 = Integer.parseInt(srs[1]); + builder.setPairedEndCoverage(pr0 + pr1); + builder.setPairedEndVariantSupport(pr1); + builder.setSplitReadCoverage(sr0 + sr1); + builder.setSplitReadVariantSupport(sr1); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmm.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmm.java new file mode 100644 index 0000000..50d3be5 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmm.java @@ -0,0 +1,35 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.*; + +/** Import SV caller support for XHMM. */ +public class CallerSupportXhmm extends CallerSupport { + + public SvCaller getSvCaller() { + return SvCaller.XHMM; + } + + @Override + public String getVersion(VCFFileReader vcfReader) { + return "2016_01_04.cc14e52"; // latest release + } + + public boolean isCompatible(VCFHeader vcfHeader) { + final VCFFormatHeaderLine ndqFormat = vcfHeader.getFormatHeaderLine("NDQ"); + boolean seenNdqFormat = (ndqFormat != null); + final VCFFormatHeaderLine dscvrFormat = vcfHeader.getFormatHeaderLine("DSCVR"); + boolean seenDscvrFormat = (dscvrFormat != null); + + return seenNdqFormat && seenDscvrFormat; + } + + @Override + protected void buildSampleGenotypeImpl( + SampleGenotypeBuilder builder, VariantContext ctx, int alleleNo, String sample) { + final Genotype genotype = ctx.getGenotype(sample); + builder.setAverageNormalizedCoverage( + Double.parseDouble(genotype.getExtendedAttribute("RD", "0").toString())); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/GtRecordBuilder.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/GtRecordBuilder.java index 513fcda..b69e1a0 100644 --- a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/GtRecordBuilder.java +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/GtRecordBuilder.java @@ -53,12 +53,13 @@ public class GtRecordBuilder { /** Header fields for the SV and genotype file (part 5). */ private static final ImmutableList HEADERS_GT_PART_5 = ImmutableList.of("genotype"); - String release; - String defaultSvMethod; - String optOutFeatures; - String caseId; - String setId; - Pedigree pedigree; + private final String release; + private final String defaultSvMethod; + private final String optOutFeatures; + private final String caseId; + private final String setId; + private final Pedigree pedigree; + private final CallerSupport callerSupport; public GtRecordBuilder( String release, @@ -66,13 +67,15 @@ public GtRecordBuilder( String optOutFeatures, String caseId, String setId, - Pedigree pedigree) { + Pedigree pedigree, + CallerSupport callerSupport) { this.release = release; this.defaultSvMethod = defaultSvMethod; this.optOutFeatures = optOutFeatures; this.caseId = caseId; this.setId = setId; this.pedigree = pedigree; + this.callerSupport = callerSupport; } public List getHeaders() { @@ -221,8 +224,7 @@ private static String buildInfoValue( return "{" + Joiner.on(",").join(mappings) + "}"; } - private static String buildGenotypeValue(VariantContext ctx, int alleleNo) { - final ArrayList attrs = new ArrayList<>(); + private String buildGenotypeValue(VariantContext ctx, int alleleNo) { // Add "GT" field. final List mappings = new ArrayList<>(); @@ -230,120 +232,127 @@ private static String buildGenotypeValue(VariantContext ctx, int alleleNo) { final List sortedSampleNames = Lists.newArrayList(ctx.getSampleNames()); sortedSampleNames.sort(c); for (String sample : sortedSampleNames) { - final Genotype genotype = ctx.getGenotype(sample); - final Map gts = new TreeMap<>(); - final List gtList = new ArrayList<>(); - for (Allele allele : genotype.getAlleles()) { - if (allele.isNoCall()) { - gtList.add("."); - } else if (ctx.getAlleleIndex(allele) == alleleNo) { - gtList.add("1"); - } else { - gtList.add("0"); - } - } - if (genotype.isPhased()) { - gts.put(sample, Joiner.on("|").join(gtList)); - } else { - gtList.sort(Comparator.naturalOrder()); - gts.put(sample, Joiner.on("/").join(gtList)); - } - attrs.add(Joiner.on("").join(tripleQuote("gt"), ":", tripleQuote(gts.get(sample)))); + mappings.add(callerSupport.buildSampleGenotype(ctx, alleleNo, sample).buildStringFragment()); + } - // FT -- genotype filters - if (genotype.hasExtendedAttribute("FT") - && genotype.getFilters() != null - && !genotype.getFilters().equals("")) { - final List fts = - Arrays.stream(genotype.getFilters().split(",")) - .map(s -> tripleQuote(s)) - .collect(Collectors.toList()); - attrs.add(Joiner.on("").join(tripleQuote("ft"), ":{", Joiner.on(",").join(fts), "}")); - } + return "{" + Joiner.on(",").join(mappings) + "}"; + } - // GQ -- genotype quality - if (genotype.hasGQ()) { - attrs.add(Joiner.on("").join(tripleQuote("gq"), ":", genotype.getGQ())); + private static List buildSampleGenotypeValue( + VariantContext ctx, int alleleNo, String sample) { + final ArrayList attrs = new ArrayList<>(); + + final Genotype genotype = ctx.getGenotype(sample); + final Map gts = new TreeMap<>(); + final List gtList = new ArrayList<>(); + for (Allele allele : genotype.getAlleles()) { + if (allele.isNoCall()) { + gtList.add("."); + } else if (ctx.getAlleleIndex(allele) == alleleNo) { + gtList.add("1"); + } else { + gtList.add("0"); } + } + if (genotype.isPhased()) { + gts.put(sample, Joiner.on("|").join(gtList)); + } else { + gtList.sort(Comparator.naturalOrder()); + gts.put(sample, Joiner.on("/").join(gtList)); + } + attrs.add(Joiner.on("").join(tripleQuote("gt"), ":", tripleQuote(gts.get(sample)))); - // Additional integer attributes, currently Delly only. - boolean looksLikeDelly = ctx.getAttributeAsString("SVMETHOD", "").contains("DELLY"); - boolean looksLikeXHMM = ctx.getAttributeAsString("SVMETHOD", "").contains("XHMM"); - boolean looksLikeGcnv = ctx.getAttributeAsString("SVMETHOD", "").contains("gcnvkernel"); - if (looksLikeDelly) { - // * DR -- reference pairs - // * DV -- variant pairs - // * RR -- reference junction - // * RV -- variant junction - final int dr = Integer.parseInt(genotype.getExtendedAttribute("DR", "0").toString()); - final int dv = Integer.parseInt(genotype.getExtendedAttribute("DV", "0").toString()); - final int rr = Integer.parseInt(genotype.getExtendedAttribute("RR", "0").toString()); - final int rv = Integer.parseInt(genotype.getExtendedAttribute("RV", "0").toString()); + // FT -- genotype filters + if (genotype.hasExtendedAttribute("FT") + && genotype.getFilters() != null + && !genotype.getFilters().equals("")) { + final List fts = + Arrays.stream(genotype.getFilters().split(",")) + .map(s -> tripleQuote(s)) + .collect(Collectors.toList()); + attrs.add(Joiner.on("").join(tripleQuote("ft"), ":{", Joiner.on(",").join(fts), "}")); + } - // Attributes to write out. - // - // * pec - paired end coverage - // * pev - paired end variant support - // * src - split read coverage - // * srv - split read end variant support - attrs.add(Joiner.on("").join(tripleQuote("pec"), ":", String.valueOf(dr + dv))); - attrs.add(Joiner.on("").join(tripleQuote("pev"), ":", String.valueOf(dv))); - attrs.add(Joiner.on("").join(tripleQuote("src"), ":", String.valueOf(rr + rv))); - attrs.add(Joiner.on("").join(tripleQuote("srv"), ":", String.valueOf(rv))); - } else if (looksLikeXHMM) { - // * DQ -- diploid quality - // * NDQ -- non-diploid quality - // * RD -- mean normalized read depth over region - // * PL -- genotype likelihoods, for [diploid, deletion, duplication] - final float dq = Float.parseFloat(genotype.getExtendedAttribute("DQ", "0.0").toString()); - final float ndq = Float.parseFloat(genotype.getExtendedAttribute("NDQ", "0.0").toString()); - final float rd = Float.parseFloat(genotype.getExtendedAttribute("RD", "0.0").toString()); - final int pl[] = genotype.getPL(); + // GQ -- genotype quality + if (genotype.hasGQ()) { + attrs.add(Joiner.on("").join(tripleQuote("gq"), ":", genotype.getGQ())); + } - // Attributes to write out. - // - // * dq -- diploid quality - // * ndq -- non-diploid quality - // * rd -- mean normalized read depth over region - // * pl -- genotype likelihoods, for [diploid, deletion, duplication] - attrs.add(Joiner.on("").join(tripleQuote("dq"), ":", String.valueOf(dq))); - attrs.add(Joiner.on("").join(tripleQuote("ndq"), ":", String.valueOf(ndq))); - attrs.add(Joiner.on("").join(tripleQuote("rd"), ":", String.valueOf(rd))); - attrs.add( - Joiner.on("").join(tripleQuote("pl"), ":[", Joiner.on(',').join(Ints.asList(pl)), "]")); - } else if (looksLikeGcnv) { - // * CN -- copy number - // * NP -- number of points in segment - // * QA -- phred-scaled quality of all points agreeing - // * QS -- phred-scaled quality of at least one point agreeing - // * QSS -- phred-scaled quality of start breakpoint - // * QSE -- phred-scaled quality of end breakpoint - final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN", "0").toString()); - final int np = Integer.parseInt(genotype.getExtendedAttribute("NP", "0").toString()); - final int qa = Integer.parseInt(genotype.getExtendedAttribute("QA", "0").toString()); - final int qs = Integer.parseInt(genotype.getExtendedAttribute("QS", "0").toString()); - final int qss = Integer.parseInt(genotype.getExtendedAttribute("QSS", "0").toString()); - final int qse = Integer.parseInt(genotype.getExtendedAttribute("QSE", "0").toString()); + // Additional integer attributes, currently Delly only. + boolean looksLikeDelly = ctx.getAttributeAsString("SVMETHOD", "").contains("DELLY"); + boolean looksLikeXHMM = ctx.getAttributeAsString("SVMETHOD", "").contains("XHMM"); + boolean looksLikeGcnv = ctx.getAttributeAsString("SVMETHOD", "").contains("gcnvkernel"); + if (looksLikeDelly) { + // * DR -- reference pairs + // * DV -- variant pairs + // * RR -- reference junction + // * RV -- variant junction + final int dr = Integer.parseInt(genotype.getExtendedAttribute("DR", "0").toString()); + final int dv = Integer.parseInt(genotype.getExtendedAttribute("DV", "0").toString()); + final int rr = Integer.parseInt(genotype.getExtendedAttribute("RR", "0").toString()); + final int rv = Integer.parseInt(genotype.getExtendedAttribute("RV", "0").toString()); - // Attributes to write out. - // - // * cn -- copy number - // * np -- number of points in segment - // * qa -- phred-scaled quality of all points agreeing - // * qs -- phred-scaled quality of at least one point agreeing - // * qss -- phred-scaled quality of start breakpoint - // * qse -- phred-scaled quality of end breakpoint - attrs.add(Joiner.on("").join(tripleQuote("cn"), ":", String.valueOf(cn))); - attrs.add(Joiner.on("").join(tripleQuote("np"), ":", String.valueOf(np))); - attrs.add(Joiner.on("").join(tripleQuote("qa"), ":", String.valueOf(qa))); - attrs.add(Joiner.on("").join(tripleQuote("qs"), ":", String.valueOf(qs))); - attrs.add(Joiner.on("").join(tripleQuote("qss"), ":", String.valueOf(qss))); - attrs.add(Joiner.on("").join(tripleQuote("qse"), ":", String.valueOf(qse))); - } + // Attributes to write out. + // + // * pec - paired end coverage + // * pev - paired end variant support + // * src - split read coverage + // * srv - split read end variant support + attrs.add(Joiner.on("").join(tripleQuote("pec"), ":", String.valueOf(dr + dv))); + attrs.add(Joiner.on("").join(tripleQuote("pev"), ":", String.valueOf(dv))); + attrs.add(Joiner.on("").join(tripleQuote("src"), ":", String.valueOf(rr + rv))); + attrs.add(Joiner.on("").join(tripleQuote("srv"), ":", String.valueOf(rv))); + } else if (looksLikeXHMM) { + // * DQ -- diploid quality + // * NDQ -- non-diploid quality + // * RD -- mean normalized read depth over region + // * PL -- genotype likelihoods, for [diploid, deletion, duplication] + final float dq = Float.parseFloat(genotype.getExtendedAttribute("DQ", "0.0").toString()); + final float ndq = Float.parseFloat(genotype.getExtendedAttribute("NDQ", "0.0").toString()); + final float rd = Float.parseFloat(genotype.getExtendedAttribute("RD", "0.0").toString()); + final int pl[] = genotype.getPL(); - mappings.add(Joiner.on("").join(tripleQuote(sample), ":{", Joiner.on(",").join(attrs), "}")); + // Attributes to write out. + // + // * dq -- diploid quality + // * ndq -- non-diploid quality + // * rd -- mean normalized read depth over region + // * pl -- genotype likelihoods, for [diploid, deletion, duplication] + attrs.add(Joiner.on("").join(tripleQuote("dq"), ":", String.valueOf(dq))); + attrs.add(Joiner.on("").join(tripleQuote("ndq"), ":", String.valueOf(ndq))); + attrs.add(Joiner.on("").join(tripleQuote("rd"), ":", String.valueOf(rd))); + attrs.add( + Joiner.on("").join(tripleQuote("pl"), ":[", Joiner.on(',').join(Ints.asList(pl)), "]")); + } else if (looksLikeGcnv) { + // * CN -- copy number + // * NP -- number of points in segment + // * QA -- phred-scaled quality of all points agreeing + // * QS -- phred-scaled quality of at least one point agreeing + // * QSS -- phred-scaled quality of start breakpoint + // * QSE -- phred-scaled quality of end breakpoint + final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN", "0").toString()); + final int np = Integer.parseInt(genotype.getExtendedAttribute("NP", "0").toString()); + final int qa = Integer.parseInt(genotype.getExtendedAttribute("QA", "0").toString()); + final int qs = Integer.parseInt(genotype.getExtendedAttribute("QS", "0").toString()); + final int qss = Integer.parseInt(genotype.getExtendedAttribute("QSS", "0").toString()); + final int qse = Integer.parseInt(genotype.getExtendedAttribute("QSE", "0").toString()); + + // Attributes to write out. + // + // * cn -- copy number + // * np -- number of points in segment + // * qa -- phred-scaled quality of all points agreeing + // * qs -- phred-scaled quality of at least one point agreeing + // * qss -- phred-scaled quality of start breakpoint + // * qse -- phred-scaled quality of end breakpoint + attrs.add(Joiner.on("").join(tripleQuote("cn"), ":", String.valueOf(cn))); + attrs.add(Joiner.on("").join(tripleQuote("np"), ":", String.valueOf(np))); + attrs.add(Joiner.on("").join(tripleQuote("qa"), ":", String.valueOf(qa))); + attrs.add(Joiner.on("").join(tripleQuote("qs"), ":", String.valueOf(qs))); + attrs.add(Joiner.on("").join(tripleQuote("qss"), ":", String.valueOf(qss))); + attrs.add(Joiner.on("").join(tripleQuote("qse"), ":", String.valueOf(qse))); } - return "{" + Joiner.on(",").join(mappings) + "}"; + return attrs; } } diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java new file mode 100644 index 0000000..f65906f --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java @@ -0,0 +1,207 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import static com.github.bihealth.varfish_annotator.utils.StringUtils.tripleQuote; + +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public final class SampleGenotype { + + private final String sampleName; + private final String genotype; + private final ImmutableList filters; + private final Integer genotypeQuality; + private final Integer pairedEndCoverage; + private final Integer pairedEndVariantSupport; + private final Integer splitReadCoverage; + private final Integer splitReadVariantSupport; + private final Integer averageMappingQuality; + private final Integer copyNumber; + private final Double averageNormalizedCoverage; + private final Integer pointCount; + + public SampleGenotype( + String sampleName, + String genotype, + List filters, + Integer genotypeQuality, + Integer pairedEndCoverage, + Integer pairedEndVariantSupport, + Integer splitReadCoverage, + Integer splitReadVariantSupport, + Integer averageMappingQuality, + Integer copyNumber, + Double averageNormalizedCoverage, + Integer pointCount) { + this.sampleName = sampleName; + this.genotype = genotype; + this.filters = ImmutableList.copyOf(filters); + this.genotypeQuality = genotypeQuality; + this.pairedEndCoverage = pairedEndCoverage; + this.pairedEndVariantSupport = pairedEndVariantSupport; + this.splitReadCoverage = splitReadCoverage; + this.splitReadVariantSupport = splitReadVariantSupport; + this.averageMappingQuality = averageMappingQuality; + this.copyNumber = copyNumber; + this.averageNormalizedCoverage = averageNormalizedCoverage; + this.pointCount = pointCount; + } + + public String buildStringFragment() { + final ArrayList attrs = new ArrayList<>(); + attrs.add(Joiner.on("").join(tripleQuote("gt"), ":", tripleQuote(genotype))); + if (filters != null && !filters.isEmpty()) { + final String fts = + filters.stream().map(s -> tripleQuote(s)).collect(Collectors.joining(", ")); + attrs.add(Joiner.on("").join(tripleQuote("ft"), ":{", fts, "}")); + } + if (genotypeQuality != null) { + attrs.add(Joiner.on("").join(tripleQuote("gq"), ":", genotypeQuality)); + } + if (pairedEndCoverage != null) { + attrs.add(Joiner.on("").join(tripleQuote("pec"), ":", pairedEndCoverage)); + } + if (pairedEndVariantSupport != null) { + attrs.add(Joiner.on("").join(tripleQuote("pev"), ":", pairedEndVariantSupport)); + } + if (splitReadCoverage != null) { + attrs.add(Joiner.on("").join(tripleQuote("src"), ":", splitReadCoverage)); + } + if (splitReadVariantSupport != null) { + attrs.add(Joiner.on("").join(tripleQuote("srv"), ":", splitReadVariantSupport)); + } + if (averageMappingQuality != null) { + attrs.add(Joiner.on("").join(tripleQuote("amq"), ":", averageMappingQuality)); + } + if (copyNumber != null) { + attrs.add(Joiner.on("").join(tripleQuote("cn"), ":", copyNumber)); + } + if (averageNormalizedCoverage != null) { + attrs.add(Joiner.on("").join(tripleQuote("anc"), ":", averageNormalizedCoverage)); + } + if (pointCount != null) { + attrs.add(Joiner.on("").join(tripleQuote("pc"), ":", pointCount)); + } + return Joiner.on("").join(tripleQuote(sampleName), ":{", Joiner.on(",").join(attrs), "}"); + } + + public String getSampleName() { + return sampleName; + } + + public String getGenotype() { + return genotype; + } + + public List getFilters() { + return filters; + } + + public Integer getGenotypeQuality() { + return genotypeQuality; + } + + public Integer getPairedEndCoverage() { + return pairedEndCoverage; + } + + public Integer getPairedEndVariantSupport() { + return pairedEndVariantSupport; + } + + public Integer getSplitReadCoverage() { + return splitReadCoverage; + } + + public Integer getSplitReadVariantSupport() { + return splitReadVariantSupport; + } + + public Integer getAverageMappingQuality() { + return averageMappingQuality; + } + + public Integer getCopyNumber() { + return copyNumber; + } + + public Double getAverageNormalizedCoverage() { + return averageNormalizedCoverage; + } + + public Integer getPointCount() { + return pointCount; + } + + @Override + public String toString() { + return "SampleGenotype{" + + "sampleName='" + + sampleName + + '\'' + + ", genotype='" + + genotype + + '\'' + + ", filters=" + + filters + + ", genotypeQuality=" + + genotypeQuality + + ", pairedEndCoverage=" + + pairedEndCoverage + + ", pairedEndVariantSupport=" + + pairedEndVariantSupport + + ", splitReadCoverage=" + + splitReadCoverage + + ", splitReadVariantSupport=" + + splitReadVariantSupport + + ", averageMappingQuality=" + + averageMappingQuality + + ", copyNumber=" + + copyNumber + + ", averageNormalizedCoverage=" + + averageNormalizedCoverage + + ", pointCount=" + + pointCount + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SampleGenotype that = (SampleGenotype) o; + return Objects.equal(getSampleName(), that.getSampleName()) + && Objects.equal(getGenotype(), that.getGenotype()) + && Objects.equal(getFilters(), that.getFilters()) + && Objects.equal(getGenotypeQuality(), that.getGenotypeQuality()) + && Objects.equal(getPairedEndCoverage(), that.getPairedEndCoverage()) + && Objects.equal(getPairedEndVariantSupport(), that.getPairedEndVariantSupport()) + && Objects.equal(getSplitReadCoverage(), that.getSplitReadCoverage()) + && Objects.equal(getSplitReadVariantSupport(), that.getSplitReadVariantSupport()) + && Objects.equal(getAverageMappingQuality(), that.getAverageMappingQuality()) + && Objects.equal(getCopyNumber(), that.getCopyNumber()) + && Objects.equal(getAverageNormalizedCoverage(), that.getAverageNormalizedCoverage()) + && Objects.equal(getPointCount(), that.getPointCount()); + } + + @Override + public int hashCode() { + return Objects.hashCode( + getSampleName(), + getGenotype(), + getFilters(), + getGenotypeQuality(), + getPairedEndCoverage(), + getPairedEndVariantSupport(), + getSplitReadCoverage(), + getSplitReadVariantSupport(), + getAverageMappingQuality(), + getCopyNumber(), + getAverageNormalizedCoverage(), + getPointCount()); + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilder.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilder.java new file mode 100644 index 0000000..0d69823 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilder.java @@ -0,0 +1,164 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import java.util.ArrayList; +import java.util.List; + +public final class SampleGenotypeBuilder { + private String sampleName; + private String genotype; + private List filters = new ArrayList<>(); + private Integer genotypeQuality; + private Integer pairedEndCoverage; + private Integer pairedEndVariantSupport; + private Integer splitReadCoverage; + private Integer splitReadVariantSupport; + private Integer averageMappingQuality; + private Integer copyNumber; + private Double averageNormalizedCoverage; + private Integer pointCount; + + public SampleGenotype build() { + return new SampleGenotype( + sampleName, + genotype, + filters, + genotypeQuality, + pairedEndCoverage, + pairedEndVariantSupport, + splitReadCoverage, + splitReadVariantSupport, + averageMappingQuality, + copyNumber, + averageNormalizedCoverage, + pointCount); + } + + public String getSampleName() { + return sampleName; + } + + public void setSampleName(String sampleName) { + this.sampleName = sampleName; + } + + public String getGenotype() { + return genotype; + } + + public void setGenotype(String genotype) { + this.genotype = genotype; + } + + public List getFilters() { + return filters; + } + + public void setFilters(List filters) { + this.filters = new ArrayList<>(); + this.filters.addAll(filters); + } + + public Integer getGenotypeQuality() { + return genotypeQuality; + } + + public void setGenotypeQuality(Integer genotypeQuality) { + this.genotypeQuality = genotypeQuality; + } + + public Integer getPairedEndCoverage() { + return pairedEndCoverage; + } + + public void setPairedEndCoverage(Integer pairedEndCoverage) { + this.pairedEndCoverage = pairedEndCoverage; + } + + public Integer getPairedEndVariantSupport() { + return pairedEndVariantSupport; + } + + public void setPairedEndVariantSupport(Integer pairedEndVariantSupport) { + this.pairedEndVariantSupport = pairedEndVariantSupport; + } + + public Integer getSplitReadCoverage() { + return splitReadCoverage; + } + + public void setSplitReadCoverage(Integer splitReadCoverage) { + this.splitReadCoverage = splitReadCoverage; + } + + public Integer getSplitReadVariantSupport() { + return splitReadVariantSupport; + } + + public void setSplitReadVariantSupport(Integer splitReadVariantSupport) { + this.splitReadVariantSupport = splitReadVariantSupport; + } + + public Integer getAverageMappingQuality() { + return averageMappingQuality; + } + + public void setAverageMappingQuality(Integer averageMappingQuality) { + this.averageMappingQuality = averageMappingQuality; + } + + public Integer getCopyNumber() { + return copyNumber; + } + + public void setCopyNumber(Integer copyNumber) { + this.copyNumber = copyNumber; + } + + public Double getAverageNormalizedCoverage() { + return averageNormalizedCoverage; + } + + public void setAverageNormalizedCoverage(Double averageNormalizedCoverage) { + this.averageNormalizedCoverage = averageNormalizedCoverage; + } + + public Integer getPointCount() { + return pointCount; + } + + public void setPointCount(Integer pointCount) { + this.pointCount = pointCount; + } + + @Override + public String toString() { + return "SampleGenotypeBuilder{" + + "sampleName='" + + sampleName + + '\'' + + ", genotype='" + + genotype + + '\'' + + ", filters=" + + filters + + ", genotypeQuality=" + + genotypeQuality + + ", pairedEndCoverage=" + + pairedEndCoverage + + ", pairedEndVariantSupport=" + + pairedEndVariantSupport + + ", splitReadCoverage=" + + splitReadCoverage + + ", splitReadVariantSupport=" + + splitReadVariantSupport + + ", averageMappingQuality=" + + averageMappingQuality + + ", copyNumber=" + + copyNumber + + ", averageNormalizedCoverage=" + + averageNormalizedCoverage + + ", pointCount=" + + pointCount + + '}'; + } +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SvCaller.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SvCaller.java new file mode 100644 index 0000000..7682e7b --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SvCaller.java @@ -0,0 +1,12 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +/** Enum with explicitely supported SV callers and generic support. */ +public enum SvCaller { + DELLY2_SV, + DRAGEN_CNV, + DRAGEN_SV, + GATK_GCNV, + GENERIC, + MANTA, + XHMM, +} diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/utils/HtsjdkUtils.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/utils/HtsjdkUtils.java new file mode 100644 index 0000000..dc5c751 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/utils/HtsjdkUtils.java @@ -0,0 +1,18 @@ +package com.github.bihealth.varfish_annotator.utils; + +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import java.util.ArrayList; +import java.util.List; + +public class HtsjdkUtils { + public static List getSourceHeaderLines(VCFHeader vcfHeader) { + final ArrayList result = new ArrayList<>(); + for (VCFHeaderLine headerLine : vcfHeader.getOtherHeaderLines()) { + if (headerLine.getKey().equals("source")) { + result.add(headerLine); + } + } + return result; + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/ResourceUtils.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/ResourceUtils.java new file mode 100644 index 0000000..7faf8b4 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/ResourceUtils.java @@ -0,0 +1,53 @@ +package com.github.bihealth.varfish_annotator; + +import java.io.*; +import java.util.zip.GZIPInputStream; +import org.apache.commons.io.IOUtils; + +/** + * Helper class with static methods for handling resources in tests + * + * @author Manuel Holtgrewe + */ +public class ResourceUtils { + + /** Helper function for reading resources into memory */ + public static String readResource(String path) { + StringWriter writer = new StringWriter(); + try { + IOUtils.copy(ResourceUtils.class.getResourceAsStream(path), writer, "UTF-8"); + } catch (IOException e) { + throw new RuntimeException("Problem reading resource " + path, e); + } + return writer.toString(); + } + + /** Copy resource at the given path to the given output {@link File}. */ + public static void copyResourceToFile(String path, File outFile) { + try (InputStream input = ResourceUtils.class.getResourceAsStream(path); + OutputStream os = new FileOutputStream(outFile)) { + byte[] buffer = new byte[1024]; + int length; + while ((length = input.read(buffer)) > 0) { + os.write(buffer, 0, length); + } + } catch (IOException e) { + throw new RuntimeException("Problem with copying resource to file", e); + } + } + + /** gzunip resource at the given path to the given output {@link File}. */ + public static void gunzipResourceToFile(String path, File outFile) { + try (InputStream inputRaw = ResourceUtils.class.getResourceAsStream(path); + GZIPInputStream input = new GZIPInputStream(inputRaw); + OutputStream os = new FileOutputStream(outFile)) { + byte[] buffer = new byte[1024]; + int length; + while ((length = input.read(buffer)) > 0) { + os.write(buffer, 0, length); + } + } catch (IOException e) { + throw new RuntimeException("Problem with copying resource to file", e); + } + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDellySvTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDellySvTest.java new file mode 100644 index 0000000..048d0f0 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDellySvTest.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportDellySvTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportDelly2 callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/delly2-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/manta-head.vcf", otherVcfFile); + callerSupport = new CallerSupportDelly2(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.DELLY2_SV, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "1.1.3"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='0/1', filters=[], genotypeQuality=59, pairedEndCoverage=0, pairedEndVariantSupport=0, splitReadCoverage=11, splitReadVariantSupport=4, averageMappingQuality=null, copyNumber=2, averageNormalizedCoverage=null, pointCount=null}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnvTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnvTest.java new file mode 100644 index 0000000..cdf13f8 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenCnvTest.java @@ -0,0 +1,61 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportDragenCnvTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportDragenCnv callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/dragen-cnv-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/delly2-head.vcf", otherVcfFile); + callerSupport = new CallerSupportDragenCnv(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.DRAGEN_CNV, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals( + callerSupport.getVersion(vcfReader), "SW: 07.021.624.3.10.4, HW: 07.021.624"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='0/1', filters=[], genotypeQuality=null, pairedEndCoverage=null, pairedEndVariantSupport=2, splitReadCoverage=null, splitReadVariantSupport=null, averageMappingQuality=null, copyNumber=null, averageNormalizedCoverage=0.321909, pointCount=1}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSvTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSvTest.java new file mode 100644 index 0000000..ebe5b9f --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportDragenSvTest.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportDragenSvTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportDragenSv callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/dragen-sv-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/delly2-head.vcf", otherVcfFile); + callerSupport = new CallerSupportDragenSv(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.DRAGEN_SV, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "07.021.624.3.10.4"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='1/1', filters=[], genotypeQuality=53, pairedEndCoverage=2, pairedEndVariantSupport=2, splitReadCoverage=20, splitReadVariantSupport=20, averageMappingQuality=null, copyNumber=null, averageNormalizedCoverage=null, pointCount=null}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactoryTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactoryTest.java new file mode 100644 index 0000000..ac0d2a5 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportFactoryTest.java @@ -0,0 +1,35 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportFactoryTest { + + @TempDir public File tmpFolder; + File vcfFileManta; + File vcfFileGeneric; + + @BeforeEach + void initEach() { + vcfFileManta = new File(tmpFolder + "/manta-head.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/manta-head.vcf", vcfFileManta); + vcfFileGeneric = new File(tmpFolder + "/generic-head.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/generic-head.vcf", vcfFileGeneric); + } + + @Test + void testGetForManta() { + CallerSupport callerSupport = CallerSupportFactory.getFor(vcfFileManta); + Assertions.assertTrue(callerSupport instanceof CallerSupportManta); + } + + @Test + void testGetForGeneric() { + CallerSupport callerSupport = CallerSupportFactory.getFor(vcfFileGeneric); + Assertions.assertTrue(callerSupport instanceof CallerSupportGeneric); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnvTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnvTest.java new file mode 100644 index 0000000..c2b1bf4 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGatkGcnvTest.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportGatkGcnvTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportGatkGcnv callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/gcnv-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/manta-head.vcf", otherVcfFile); + callerSupport = new CallerSupportGatkGcnv(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.GATK_GCNV, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "UNKNOWN"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='1', filters=[], genotypeQuality=null, pairedEndCoverage=null, pairedEndVariantSupport=null, splitReadCoverage=null, splitReadVariantSupport=null, averageMappingQuality=null, copyNumber=1, averageNormalizedCoverage=null, pointCount=1}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGenericTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGenericTest.java new file mode 100644 index 0000000..e25bacc --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportGenericTest.java @@ -0,0 +1,50 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportGenericTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportGeneric callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/generic-head.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/generic-head.vcf", vcfFile); + callerSupport = new CallerSupportGeneric(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.GENERIC, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "UNKNOWN"); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='0/1', filters=[\"\"\"LowQual\"\"\"], genotypeQuality=59, pairedEndCoverage=null, pairedEndVariantSupport=null, splitReadCoverage=null, splitReadVariantSupport=null, averageMappingQuality=null, copyNumber=null, averageNormalizedCoverage=null, pointCount=null}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportMantaTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportMantaTest.java new file mode 100644 index 0000000..900750c --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportMantaTest.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportMantaTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportManta callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/manta-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/delly2-head.vcf", otherVcfFile); + callerSupport = new CallerSupportManta(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.MANTA, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "1.6.0"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='1/1', filters=[], genotypeQuality=53, pairedEndCoverage=2, pairedEndVariantSupport=2, splitReadCoverage=20, splitReadVariantSupport=20, averageMappingQuality=null, copyNumber=null, averageNormalizedCoverage=null, pointCount=null}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmmTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmmTest.java new file mode 100644 index 0000000..c32db0d --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/CallerSupportXhmmTest.java @@ -0,0 +1,60 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.github.bihealth.varfish_annotator.ResourceUtils; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import java.io.File; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CallerSupportXhmmTest { + + @TempDir public File tmpFolder; + File vcfFile; + File otherVcfFile; + CallerSupportXhmm callerSupport; + + @BeforeEach + void initEach() { + vcfFile = new File(tmpFolder + "/vcf-header.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/xhmm-head.vcf", vcfFile); + otherVcfFile = new File(tmpFolder + "/incompatible.vcf"); + ResourceUtils.copyResourceToFile("/callers-sv/manta-head.vcf", otherVcfFile); + callerSupport = new CallerSupportXhmm(); + } + + @Test + void testGetSvCaller() { + Assertions.assertEquals(SvCaller.XHMM, callerSupport.getSvCaller()); + } + + @Test + void testIsCompatiblePositive() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + Assertions.assertEquals(callerSupport.getVersion(vcfReader), "2016_01_04.cc14e52"); + } + + @Test + void testIsCompatibleNegative() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VCFHeader vcfHeader = vcfReader.getHeader(); + + Assertions.assertTrue(callerSupport.isCompatible(vcfHeader)); + } + + @Test + void testBuildSampleGenotype() { + final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false); + final VariantContext vc = vcfReader.iterator().next(); + final SampleGenotype sampleGenotype = callerSupport.buildSampleGenotype(vc, 1, "SAMPLE"); + final String expected = + "SampleGenotype{sampleName='SAMPLE', genotype='0', filters=[], genotypeQuality=null, pairedEndCoverage=null, pairedEndVariantSupport=null, splitReadCoverage=null, splitReadVariantSupport=null, averageMappingQuality=null, copyNumber=null, averageNormalizedCoverage=3.47, pointCount=null}"; + Assertions.assertEquals(expected, sampleGenotype.toString()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilderTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilderTest.java new file mode 100644 index 0000000..8b21ab1 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeBuilderTest.java @@ -0,0 +1,41 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SampleGenotypeBuilderTest { + @Test + public void testAll() { + final SampleGenotypeBuilder builder = new SampleGenotypeBuilder(); + builder.setSampleName("SAMPLE"); + builder.setGenotype("0/1"); + builder.setFilters(ImmutableList.of("PASS")); + builder.setGenotypeQuality(1); + builder.setPairedEndCoverage(5); + builder.setPairedEndVariantSupport(2); + builder.setSplitReadCoverage(7); + builder.setSplitReadVariantSupport(3); + builder.setAverageMappingQuality(10); + builder.setCopyNumber(3); + builder.setAverageNormalizedCoverage(3.3); + builder.setPointCount(100); + + Assertions.assertEquals( + "SampleGenotypeBuilder{sampleName='SAMPLE', genotype='0/1', filters=[PASS], genotypeQuality=1, pairedEndCoverage=5, pairedEndVariantSupport=2, splitReadCoverage=7, splitReadVariantSupport=3, averageMappingQuality=10, copyNumber=3, averageNormalizedCoverage=3.3, pointCount=100}", + builder.toString()); + + Assertions.assertEquals("SAMPLE", builder.getSampleName()); + Assertions.assertEquals("0/1", builder.getGenotype()); + Assertions.assertEquals(ImmutableList.of("PASS"), builder.getFilters()); + Assertions.assertEquals(1, builder.getGenotypeQuality()); + Assertions.assertEquals(5, builder.getPairedEndCoverage()); + Assertions.assertEquals(2, builder.getPairedEndVariantSupport()); + Assertions.assertEquals(7, builder.getSplitReadCoverage()); + Assertions.assertEquals(3, builder.getSplitReadVariantSupport()); + Assertions.assertEquals(10, builder.getAverageMappingQuality()); + Assertions.assertEquals(3, builder.getCopyNumber()); + Assertions.assertEquals(3.3, builder.getAverageNormalizedCoverage()); + Assertions.assertEquals(100, builder.getPointCount()); + } +} diff --git a/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeTest.java b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeTest.java new file mode 100644 index 0000000..66abe51 --- /dev/null +++ b/varfish-annotator-core/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotypeTest.java @@ -0,0 +1,62 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SampleGenotypeTest { + @Test + public void testConstruction() { + final SampleGenotype obj = + new SampleGenotype("SAMPLE", "0/1", ImmutableList.of("PASS"), 1, 2, 3, 4, 5, 6, 7, 8.0, 9); + + Assertions.assertEquals( + "SampleGenotype{sampleName='SAMPLE', genotype='0/1', filters=[PASS], genotypeQuality=1, pairedEndCoverage=2, pairedEndVariantSupport=3, splitReadCoverage=4, splitReadVariantSupport=5, averageMappingQuality=6, copyNumber=7, averageNormalizedCoverage=8.0, pointCount=9}", + obj.toString()); + Assertions.assertEquals(obj, obj); + Assertions.assertEquals(obj.hashCode(), -716760314); + + Assertions.assertEquals("SAMPLE", obj.getSampleName()); + Assertions.assertEquals("0/1", obj.getGenotype()); + Assertions.assertEquals(ImmutableList.of("PASS"), obj.getFilters()); + Assertions.assertEquals(1, obj.getGenotypeQuality()); + Assertions.assertEquals(2, obj.getPairedEndCoverage()); + Assertions.assertEquals(3, obj.getPairedEndVariantSupport()); + Assertions.assertEquals(4, obj.getSplitReadCoverage()); + Assertions.assertEquals(5, obj.getSplitReadVariantSupport()); + Assertions.assertEquals(6, obj.getAverageMappingQuality()); + Assertions.assertEquals(7, obj.getCopyNumber()); + Assertions.assertEquals(8.0, obj.getAverageNormalizedCoverage()); + Assertions.assertEquals(9, obj.getPointCount()); + } + + @Test + public void testBuildFragmentsPart() { + final SampleGenotype obj = + new SampleGenotype( + "SAMPLE", + "0/1", + ImmutableList.of("PASS"), + null, + null, + null, + null, + null, + null, + null, + null, + null); + Assertions.assertEquals( + "\"\"\"SAMPLE\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"PASS\"\"\"}}", + obj.buildStringFragment()); + } + + @Test + public void testBuildFragmentsFull() { + final SampleGenotype obj = + new SampleGenotype("SAMPLE", "0/1", ImmutableList.of("PASS"), 1, 2, 3, 4, 5, 6, 7, 8.0, 9); + Assertions.assertEquals( + "\"\"\"SAMPLE\"\"\":{\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"ft\"\"\":{\"\"\"PASS\"\"\"},\"\"\"gq\"\"\":1,\"\"\"pec\"\"\":2,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":4,\"\"\"srv\"\"\":5,\"\"\"amq\"\"\":6,\"\"\"cn\"\"\":7,\"\"\"anc\"\"\":8.0,\"\"\"pc\"\"\":9}", + obj.buildStringFragment()); + } +} diff --git a/varfish-annotator-core/src/test/resources/callers-sv/delly2-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/delly2-head.vcf new file mode 100644 index 0000000..b7d7b24 --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/delly2-head.vcf @@ -0,0 +1,132 @@ +##fileformat=VCFv4.2 +##FILTER= +##fileDate=20220829 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##reference=/fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##bcftools_viewVersion=1.15.1+htslib-1.15.1 +##bcftools_viewCommand=view -O z -o work/bwa.delly2.SAMPLE-N1-DNA1-WGS1/out/bwa.delly2.SAMPLE-N1-DNA1-WGS1.vcf.gz /data/gpfs-1/users/holtgrem_c/scratch/tmp/hpc-cpu-164/20220829/tmp.jo3WnHhrQp/cwd/1.bcf; Date=Mon Aug 29 16:04:36 2022 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 586412 DEL00000004 C 92 PASS PRECISE;SVTYPE=DEL;SVMETHOD=EMBL.DELLYv1.1.3;END=586439;PE=0;MAPQ=0;CT=3to5;CIPOS=-20,20;CIEND=-20,20;SRMAPQ=23;INSLEN=0;HOMLEN=20;SR=4;SRQ=1;CONSENSUS=CTCAGGGTGTTCGGGATAAAGAAGACTCAGGAAGACAAGTATGAAGCATAATCTGTGACATTATTGATATCTTCCTGAAGAACATAATTCCTGCCTACCATCAACAAGCATCAATACTTTCTACCAGCTATTCTCAACCCTCATCATCGGAAGAGACAGACACTGACTGTGTCAAA;CE=1.96018;AC=3;AN=6 GT:GL:GQ:FT:RCL:RC:RCR:RDCN:DR:DV:RR:RV 0/1:-5.90527,0,-14.2974:59:PASS:203:373:203:2:0:0:7:4 0/1:-2.23535,0,-11.3961:22:PASS:188:321:130:2:0:0:6:2 0/1:-1.0313,0,-19.791:10:LowQual:327:515:247:2:0:0:11:2 diff --git a/varfish-annotator-core/src/test/resources/callers-sv/dragen-cnv-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/dragen-cnv-head.vcf new file mode 100644 index 0000000..046597a --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/dragen-cnv-head.vcf @@ -0,0 +1,48 @@ +##fileformat=VCFv4.2 +##DRAGENVersion= +##DRAGENCommandLine= +##reference=file:///staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##CoverageUniformity=0.333610 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1283844 DRAGEN:LOSS:1:1283845-1284844 N 20 cnvLength SVLEN=-1000;SVTYPE=CNV;END=1284844;REFLEN=1000 GT:SM:CN:BC:PE 0/1:0.321909:1:1:1,1 diff --git a/varfish-annotator-core/src/test/resources/callers-sv/dragen-sv-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/dragen-sv-head.vcf new file mode 100644 index 0000000..43d239f --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/dragen-sv-head.vcf @@ -0,0 +1,133 @@ +##fileformat=VCFv4.1 +##fileDate=20220321 +##source=DRAGEN 07.021.624.3.10.4 +##reference=file:///staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##cmdline=dragen --ref-dir /staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 --fastq-file1 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R1_001.fastq.gz --fastq-file2 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R2_001.fastq.gz --output-directory /staging/output/220318_A01077_0174_AH7JGVDMXY/NA-12878WGSWGS/ --output-file-prefix NA-12878WGSWGS_dragen --RGID WGS --RGSM NA-12878WGSWGS --num-threads 46 --enable-map-align true --enable-map-align-output true --enable-duplicate-marking true --enable-variant-caller true --qc-cross-cont-vcf /opt/edico/config/sample_cross_contamination_resource_GRCh37.vcf.gz --enable-cnv true --cnv-enable-self-normalization true --enable-sv true --qc-coverage-region-1 /staging/human/bed/CDS-v19-ROIs_v2.bed --qc-coverage-reports-1 cov_report full_res --qc-coverage-region-2 /staging/human/bed/Regions_Exomev8.bed --qc-coverage-reports-2 cov_report full_res --qc-coverage-region-3 /staging/human/bed/Padded_Exomev8.bed --qc-coverage-reports-3 cov_report full_res +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1598413 MantaDEL:19042:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 762 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:53:815,56,0:0,2:0,20 diff --git a/varfish-annotator-core/src/test/resources/callers-sv/gcnv-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/gcnv-head.vcf new file mode 100644 index 0000000..14d3429 --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/gcnv-head.vcf @@ -0,0 +1,109 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##source=PostprocessGermlineCNVCalls +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##bcftools_viewVersion=1.15.1+htslib-1.15.1 +##bcftools_viewCommand=view --force-samples --samples-file /data/gpfs-1/users/barbosae_c/scratch/tmp/hpc-cpu-67/20220902/tmp.QNvIpOGpWs/samples.txt --output-type u work/bwa.gcnv_merge_cohort_vcfs.default/out/bwa.gcnv_merge_cohort_vcfs.default.vcf.gz; Date=Fri Sep 2 03:01:57 2022 +##bcftools_viewCommand=view --output-file work/bwa.gcnv.NA12878-N1-DNA1-WGS1/out/bwa.gcnv.NA12878-N1-DNA1-WGS1.vcf.gz --output-type z --include '(GT == "alt")'; Date=Fri Sep 2 03:01:57 2022 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 4124001 . N . . END=4125000;SVTYPE=DEL;SVLEN=-999;AC=1;AN=1 GT:CN:NP:QA:QS:QSE:QSS 1:1:1:107:107:107:107 diff --git a/varfish-annotator-core/src/test/resources/callers-sv/generic-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/generic-head.vcf new file mode 100644 index 0000000..b72fb6d --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/generic-head.vcf @@ -0,0 +1,14 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 586412 . C 92 PASS . GT:GQ:FT 0/1:59:LowQual diff --git a/varfish-annotator-core/src/test/resources/callers-sv/manta-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/manta-head.vcf new file mode 100644 index 0000000..bcc9fde --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/manta-head.vcf @@ -0,0 +1,128 @@ +##fileformat=VCFv4.1 +##fileDate=20220907 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##cmdline=configManta.py --referenceFasta /fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa --runDir work/bwa.manta.SAMPLE-N1-DNA1-WGS1/work --bam ngs_mapping/output/bwa.SAMPLE-N1-DNA1-WGS1/out/bwa.SAMPLE-N1-DNA1-WGS1.bam +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1598413 MantaDEL:19042:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 762 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:53:815,56,0:0,2:0,20 diff --git a/varfish-annotator-core/src/test/resources/callers-sv/xhmm-head.vcf b/varfish-annotator-core/src/test/resources/callers-sv/xhmm-head.vcf new file mode 100644 index 0000000..59cacb6 --- /dev/null +++ b/varfish-annotator-core/src/test/resources/callers-sv/xhmm-head.vcf @@ -0,0 +1,59 @@ +##fileformat=VCFv4.1 +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 586412 1:586412-69696597 N , . . AC=0,1;AF=0,0.01;AN=1;END=69696597;IMPRECISE;SVLEN=14860;SVTYPE=CNV;TPOS=586412;TEND=1598580;NUMT=6;GQT=26;PREVTARGSTART=586412;PREVTARGEND=1598580;POSTTARGSTART=586412;POSTTARGEND=1598580 GT:NDQ:DQ:EQ:SQ:NQ:LQ:RQ:PL:RD:ORD:DSCVR 2:68:0:0,26:0,68:97,0:0,25:0,31:68,255,0:3.47:130.54:Y