diff --git a/repo_utils/test_files/answer_key/bench12/fn.vcf b/repo_utils/test_files/answer_key/bench12/fn.vcf index 0e520aab..e9b28d1e 100644 --- a/repo_utils/test_files/answer_key/bench12/fn.vcf +++ b/repo_utils/test_files/answer_key/bench12/fn.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAG 60 . QNAME=HG002-S9-H2-000001F;QSTART=232485;QSTRAND=+;SVTYPE=INS;SVLEN=184;PctSeqSimilarity=0.662277;PctSizeSimilarity=0.338858;PctRecOverlap=0;SizeDiff=-359;StartDistance=-139;EndDistance=-139;TruScore=41;MatchId=3.0.0;Multi GT:PL:DP 0/1:7,9,6:23,9 -chr20 279062 . G GGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGTGGGAGGGAGGGAGGGAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=258112;QSTRAND=+;SVTYPE=INS;SVLEN=139;PctSeqSimilarity=0.419771;PctSizeSimilarity=0.255985;PctRecOverlap=0;SizeDiff=-404;StartDistance=-7;EndDistance=-7;TruScore=27;MatchId=3.1.0;Multi GT:PL:DP 1/0:5,1,4:27,38 +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAG 60 . QNAME=HG002-S9-H2-000001F;QSTART=232485;QSTRAND=+;SVTYPE=INS;SVLEN=184;PctSeqSimilarity=0.662277;PctSizeSimilarity=0.338858;PctRecOverlap=0.3407;SizeDiff=-359;StartDistance=-139;EndDistance=-139;TruScore=44;MatchId=3.0.0;Multi GT:PL:DP 0/1:7,9,6:23,9 +chr20 279062 . G GGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGTGGGAGGGAGGGAGGGAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=258112;QSTRAND=+;SVTYPE=INS;SVLEN=139;PctSeqSimilarity=0.419771;PctSizeSimilarity=0.255985;PctRecOverlap=0.255985;SizeDiff=-404;StartDistance=-7;EndDistance=-7;TruScore=31;MatchId=3.1.0;Multi GT:PL:DP 1/0:5,1,4:27,38 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=6.0.0;Multi GT:PL 0/1:. -chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408;PctSeqSimilarity=0.81157;PctSizeSimilarity=0.64557;PctRecOverlap=0;SizeDiff=-224;StartDistance=70;EndDistance=70;GTMatch;TruScore=56;MatchId=8.2.1 GT:PL:DP 1/0:5,10,5:44,36 -chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.611797;PctSizeSimilarity=0.10443;PctRecOverlap=0;SizeDiff=-566;StartDistance=-317;EndDistance=-317;GTMatch;TruScore=33;MatchId=8.0.1;Multi GT:PL:DP 1/0:7,5,7:34,13 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.431452;PctSizeSimilarity=0.104762;PctRecOverlap=0;SizeDiff=-564;StartDistance=123;EndDistance=123;GTMatch;TruScore=24;MatchId=8.3.0;Multi GT:PL:DP 0/1:6,1,9:20,25 -chr20 709830 . G GACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGAACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=689726;QSTRAND=+;SVTYPE=INS;SVLEN=465;PctSeqSimilarity=0.0637584;PctSizeSimilarity=0.2;PctRecOverlap=0.0106383;SizeDiff=372;StartDistance=71;EndDistance=-22;GTMatch;TruScore=8;MatchId=9.1.0;Multi GT:PL:DP 1/0:4,8,7:50,48 +chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408;PctSeqSimilarity=0.81157;PctSizeSimilarity=0.64557;PctRecOverlap=0.64613;SizeDiff=-224;StartDistance=70;EndDistance=70;GTMatch;TruScore=70;MatchId=8.2.1 GT:PL:DP 1/0:5,10,5:44,36 +chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.597718;PctSizeSimilarity=0.104762;PctRecOverlap=0.0871632;SizeDiff=-564;StartDistance=-294;EndDistance=-294;TruScore=26;MatchId=8.0.0;Multi GT:PL:DP 1/0:7,5,7:34,13 +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.431452;PctSizeSimilarity=0.104762;PctRecOverlap=0.106181;SizeDiff=-564;StartDistance=123;EndDistance=123;GTMatch;TruScore=21;MatchId=8.3.0;Multi GT:PL:DP 0/1:6,1,9:20,25 +chr20 709830 . G GACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGAACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=689726;QSTRAND=+;SVTYPE=INS;SVLEN=465;PctSeqSimilarity=0.0637584;PctSizeSimilarity=0.2;PctRecOverlap=0.202151;SizeDiff=372;StartDistance=71;EndDistance=-22;GTMatch;TruScore=15;MatchId=9.1.0;Multi GT:PL:DP 1/0:4,8,7:50,48 chr20 949516 . CTCTGAAGAGGTAACATTTGAACTGAGATGTAGAGGGAGAGAAGGAACTAGCCATGTGGAGATCTGGGGGAGGAGCATTCCAGGCAGAGGGGATAGCAAGTGCA C 60 . QNAME=HG002-S9-H2-000001F;QSTART=905663;QSTRAND=+;SVTYPE=DEL;SVLEN=-103;PctSeqSimilarity=.;PctSizeSimilarity=.;PctRecOverlap=.;SizeDiff=.;StartDistance=.;EndDistance=.;TruScore=.;MatchId=12.0._ GT:PL:DP 0/1:6,6,1:34,8 diff --git a/repo_utils/test_files/answer_key/bench12/fp.vcf b/repo_utils/test_files/answer_key/bench12/fp.vcf index a3766db6..93ec5e29 100644 --- a/repo_utils/test_files/answer_key/bench12/fp.vcf +++ b/repo_utils/test_files/answer_key/bench12/fp.vcf @@ -48,5 +48,5 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 chr20 380878 . T TGATTAGCTCCGGTTTGCATCACCCGGACCGGGGGATTAGCTCCGGTTTGCATCACCCGGACCGGGG 60 . QNAME=cluster19_000000F;QSTART=25381150;QSTRAND=-;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=.;PctSizeSimilarity=.;PctRecOverlap=.;SizeDiff=.;StartDistance=.;EndDistance=.;TruScore=.;MatchId=5._.0 GT:PL:AD 1/0:8,3,9:38,14 -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.81157;PctSizeSimilarity=0.64557;PctRecOverlap=0;SizeDiff=-224;StartDistance=70;EndDistance=70;GTMatch;TruScore=56;MatchId=8.2.1 GT:PL:AD 1/0:7,5,6:16,7 -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.80203;PctSizeSimilarity=0.666667;PctRecOverlap=1;SizeDiff=39;StartDistance=0;EndDistance=0;TruScore=81;MatchId=11.0.0;Multi GT:PL:AD 1/0:5,4,9:13,8 +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.81157;PctSizeSimilarity=0.64557;PctRecOverlap=0.64613;SizeDiff=-224;StartDistance=70;EndDistance=70;GTMatch;TruScore=70;MatchId=8.2.1 GT:PL:AD 1/0:7,5,6:16,7 +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.80203;PctSizeSimilarity=0.666667;PctRecOverlap=0.675214;SizeDiff=39;StartDistance=0;EndDistance=0;TruScore=71;MatchId=11.0.0;Multi GT:PL:AD 1/0:5,4,9:13,8 diff --git a/repo_utils/test_files/answer_key/bench12/log.txt b/repo_utils/test_files/answer_key/bench12/log.txt index bfaa6890..50a1da57 100644 --- a/repo_utils/test_files/answer_key/bench12/log.txt +++ b/repo_utils/test_files/answer_key/bench12/log.txt @@ -1,5 +1,5 @@ -2021-10-17 23:27:02,980 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input1.vcf.gz -c repo_utils/test_files/input2.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench12/ -2021-10-17 23:27:02,981 [INFO] Params: +2021-12-21 13:57:48,338 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input1.vcf.gz -c repo_utils/test_files/input2.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench12/ +2021-12-21 13:57:48,340 [INFO] Params: { "base": "repo_utils/test_files/input1.vcf.gz", "comp": "repo_utils/test_files/input2.vcf.gz", @@ -27,11 +27,11 @@ "includebed": null, "multimatch": false } -2021-10-17 23:27:02,982 [INFO] Truvari version: 3.1.0-dev -2021-10-17 23:27:03,310 [INFO] Zipped 3934 variants. Counter({'base': 2151, 'comp': 1783}) -2021-10-17 23:27:03,312 [INFO] 13 chunks of 30 variants. Counter({'base': 16, 'comp': 14}) -2021-10-17 23:27:03,316 [INFO] Results peek: 8 TP-base 8 FN 50.00% Recall -2021-10-17 23:27:03,318 [INFO] Stats: { +2021-12-21 13:57:48,340 [INFO] Truvari version: 3.1.0-dev +2021-12-21 13:57:48,448 [INFO] Zipped 3934 variants. Counter({'base': 2151, 'comp': 1783}) +2021-12-21 13:57:48,450 [INFO] 13 chunks of 3934 variants. Counter({'__filtered': 3904, 'base': 16, 'comp': 14}) +2021-12-21 13:57:48,455 [INFO] Results peek: 8 TP-base 8 FN 50.00% Recall +2021-12-21 13:57:48,456 [INFO] Stats: { "TP-base": 8, "TP-call": 8, "FP": 3, @@ -47,4 +47,4 @@ "TP-base_FP-gt": 5, "gt_concordance": 0.375 } -2021-10-17 23:27:03,323 [INFO] Finished bench +2021-12-21 13:57:48,460 [INFO] Finished bench diff --git a/repo_utils/test_files/answer_key/bench12/tp-base.vcf b/repo_utils/test_files/answer_key/bench12/tp-base.vcf index b58186be..2fd6673d 100644 --- a/repo_utils/test_files/answer_key/bench12/tp-base.vcf +++ b/repo_utils/test_files/answer_key/bench12/tp-base.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 -chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=HG002-S9-H1-000001F;QSTART=127907;QSTRAND=+;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;GTMatch;TruScore=74;MatchId=1.0.0 GT:PL:DP 1/1:8,8,5:15,29 -chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=232808;QSTRAND=+;SVTYPE=INS;SVLEN=438;PctSeqSimilarity=0.873856;PctSizeSimilarity=0.80663;PctRecOverlap=1;SizeDiff=-105;StartDistance=0;EndDistance=0;TruScore=88;MatchId=3.2.0 GT:PL:DP 0/1:7,10,7:12,48 +chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=HG002-S9-H1-000001F;QSTART=127907;QSTRAND=+;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;GTMatch;TruScore=66;MatchId=1.0.0 GT:PL:DP 1/1:8,8,5:15,29 +chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=232808;QSTRAND=+;SVTYPE=INS;SVLEN=438;PctSeqSimilarity=0.873856;PctSizeSimilarity=0.80663;PctRecOverlap=0.808471;SizeDiff=-105;StartDistance=0;EndDistance=0;TruScore=82;MatchId=3.2.0 GT:PL:DP 0/1:7,10,7:12,48 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATTTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=285475;QSTRAND=+;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=4.0.0 GT:PL:DP 1/1:2,7,2:17,31 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=6.1.0 GT:PL:DP 1/0:4,8,6:32,9 chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=7.0.0 GT:PL:DP 1/1:6,8,8:40,41 -chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=HG002-S9-H2-000001F;QSTART=596630;QSTRAND=+;SVTYPE=INS;SVLEN=501;PctSeqSimilarity=0.88173;PctSizeSimilarity=0.795238;PctRecOverlap=1;SizeDiff=-129;StartDistance=0;EndDistance=0;GTMatch;TruScore=88;MatchId=8.1.0 GT:PL:DP 0/1:5,9,1:7,43 +chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=HG002-S9-H2-000001F;QSTART=596630;QSTRAND=+;SVTYPE=INS;SVLEN=501;PctSeqSimilarity=0.88173;PctSizeSimilarity=0.795238;PctRecOverlap=0.793978;SizeDiff=-129;StartDistance=0;EndDistance=0;GTMatch;TruScore=82;MatchId=8.1.0 GT:PL:DP 0/1:5,9,1:7,43 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=HG002-S9-H2-000001F;QSTART=664783;QSTRAND=+;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=9.0.0 GT:PL:DP 0/1:9,6,6:13,31 chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=744872;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.1 GT:PL:DP 1/1:2,1,5:40,30 diff --git a/repo_utils/test_files/answer_key/bench12/tp-call.vcf b/repo_utils/test_files/answer_key/bench12/tp-call.vcf index a75d7606..fc81c915 100644 --- a/repo_utils/test_files/answer_key/bench12/tp-call.vcf +++ b/repo_utils/test_files/answer_key/bench12/tp-call.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;GTMatch;TruScore=74;MatchId=1.0.0 GT:PL:AD 1/1:10,5,10:30,43 -chr20 279069 . A AGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGC 60 . QNAME=cluster19_000000F;QSTART=25483168;QSTRAND=-;SVTYPE=INS;SVLEN=543;PctSeqSimilarity=0.873856;PctSizeSimilarity=0.80663;PctRecOverlap=1;SizeDiff=-105;StartDistance=0;EndDistance=0;TruScore=88;MatchId=3.2.0 GT:PL:AD 1/1:8,1,10:40,42 +chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;GTMatch;TruScore=66;MatchId=1.0.0 GT:PL:AD 1/1:10,5,10:30,43 +chr20 279069 . A AGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGC 60 . QNAME=cluster19_000000F;QSTART=25483168;QSTRAND=-;SVTYPE=INS;SVLEN=543;PctSeqSimilarity=0.873856;PctSizeSimilarity=0.80663;PctRecOverlap=0.808471;SizeDiff=-105;StartDistance=0;EndDistance=0;TruScore=82;MatchId=3.2.0 GT:PL:AD 1/1:8,1,10:40,42 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATGTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster19_000000F;QSTART=25455794;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=4.0.0 GT:PL:AD 1/1:4,10,2:26,18 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=cluster19_000000F;QSTART=25341211;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=6.1.0 GT:PL:AD 1/1:7,10,5:44,42 chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=cluster19_000000F;QSTART=25148089;QSTRAND=-;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=7.0.0 GT:PL:AD 1/0:3,1,8:9,32 -chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=cluster19_000000F;QSTART=602404;QSTRAND=+;SVTYPE=INS;SVLEN=630;PctSeqSimilarity=0.88173;PctSizeSimilarity=0.795238;PctRecOverlap=1;SizeDiff=-129;StartDistance=0;EndDistance=0;GTMatch;TruScore=88;MatchId=8.1.0 GT:PL:AD 0/1:2,1,9:9,12 +chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=cluster19_000000F;QSTART=602404;QSTRAND=+;SVTYPE=INS;SVLEN=630;PctSeqSimilarity=0.88173;PctSizeSimilarity=0.795238;PctRecOverlap=0.793978;SizeDiff=-129;StartDistance=0;EndDistance=0;GTMatch;TruScore=82;MatchId=8.1.0 GT:PL:AD 0/1:2,1,9:9,12 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=cluster19_000000F;QSTART=25051525;QSTRAND=-;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=9.0.0 GT:PL:AD 1/0:8,10,9:32,27 chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=725358;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.1 GT:PL:AD 0/1:7,8,4:13,5 diff --git a/repo_utils/test_files/answer_key/bench13/fn.vcf b/repo_utils/test_files/answer_key/bench13/fn.vcf index 922e0ac5..1c926871 100644 --- a/repo_utils/test_files/answer_key/bench13/fn.vcf +++ b/repo_utils/test_files/answer_key/bench13/fn.vcf @@ -48,8 +48,8 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=8.1.0;Multi GT:PL:DP 1/0:4,8,6:32,9 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.898182;PctSizeSimilarity=0.511628;PctRecOverlap=0;SizeDiff=-63;StartDistance=262;EndDistance=262;GTMatch;TruScore=57;MatchId=10.3.1 GT:PL:DP 0/1:6,1,9:20,25 -chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.866873;PctSizeSimilarity=0.292035;PctRecOverlap=0;SizeDiff=-160;StartDistance=-417;EndDistance=-417;TruScore=50;MatchId=10.0.3 GT:PL:DP 1/0:7,5,7:34,13 -chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=HG002-S9-H2-000001F;QSTART=596630;QSTRAND=+;SVTYPE=INS;SVLEN=501;PctSeqSimilarity=0.730263;PctSizeSimilarity=0.512974;PctRecOverlap=0;SizeDiff=244;StartDistance=-64;EndDistance=-64;TruScore=49;MatchId=10.1.2 GT:PL:DP 0/1:5,9,1:7,43 -chr20 709830 . G GACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGAACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=689726;QSTRAND=+;SVTYPE=INS;SVLEN=465;PctSeqSimilarity=0.0637584;PctSizeSimilarity=0.2;PctRecOverlap=0.0106383;SizeDiff=372;StartDistance=71;EndDistance=-22;TruScore=8;MatchId=11.1.0;Multi GT:PL:DP 1/0:4,8,7:50,48 -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=744872;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=62;MatchId=13.0.0 GT:PL:DP 1/1:2,1,5:40,30 +chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=HG002-S9-H2-000001F;QSTART=596630;QSTRAND=+;SVTYPE=INS;SVLEN=501;PctSeqSimilarity=0.730263;PctSizeSimilarity=0.512974;PctRecOverlap=0.512974;SizeDiff=244;StartDistance=-64;EndDistance=-64;TruScore=58;MatchId=10.1.2 GT:PL:DP 0/1:5,9,1:7,43 +chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.604082;PctSizeSimilarity=0.404908;PctRecOverlap=0.411043;SizeDiff=-97;StartDistance=7;EndDistance=7;TruScore=47;MatchId=10.0.0 GT:PL:DP 1/0:7,5,7:34,13 +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.898182;PctSizeSimilarity=0.511628;PctRecOverlap=0;SizeDiff=-63;StartDistance=262;EndDistance=262;GTMatch;TruScore=46;MatchId=10.3.1 GT:PL:DP 0/1:6,1,9:20,25 +chr20 709830 . G GACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCGCTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGAACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCCGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=689726;QSTRAND=+;SVTYPE=INS;SVLEN=465;PctSeqSimilarity=0.0637584;PctSizeSimilarity=0.2;PctRecOverlap=0.202151;SizeDiff=372;StartDistance=71;EndDistance=-22;TruScore=15;MatchId=11.1.0;Multi GT:PL:DP 1/0:4,8,7:50,48 +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=744872;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0.025641;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=53;MatchId=13.0.0 GT:PL:DP 1/1:2,1,5:40,30 diff --git a/repo_utils/test_files/answer_key/bench13/fp.vcf b/repo_utils/test_files/answer_key/bench13/fp.vcf index 21cbba91..05933c7d 100644 --- a/repo_utils/test_files/answer_key/bench13/fp.vcf +++ b/repo_utils/test_files/answer_key/bench13/fp.vcf @@ -46,10 +46,10 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00733 -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=74;MatchId=1.0.1;Multi GT 0/1 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0.130435;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=70;MatchId=1.0.1;Multi GT 0/1 chr20 280211 . CAACAACAACAATTGTACTTCCCTAAGGTTACACCCAGCAGGTGCATAAAACCTACAGTAACAAT C 60 . QNAME=cluster23_scaffold_3;QSTART=25431380;QSTRAND=-;SVTYPE=DEL;SVLEN=-64;PctSeqSimilarity=.;PctSizeSimilarity=.;PctRecOverlap=.;SizeDiff=.;StartDistance=.;EndDistance=.;TruScore=.;MatchId=5._.0 GT 1/0 -chr20 642068 . G GTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCC 60 . QNAME=cluster23_scaffold_2;QSTART=25053097;QSTRAND=-;SVTYPE=INS;SVLEN=129;PctSeqSimilarity=0.898182;PctSizeSimilarity=0.511628;PctRecOverlap=0;SizeDiff=-63;StartDistance=262;EndDistance=262;GTMatch;TruScore=57;MatchId=10.3.1 GT 0/1 -chr20 642330 . G GCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25052609;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.866873;PctSizeSimilarity=0.292035;PctRecOverlap=0;SizeDiff=-160;StartDistance=-417;EndDistance=-417;TruScore=50;MatchId=10.0.3 GT 0/1 -chr20 642271 . G GGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068731;QSTRAND=-;SVTYPE=INS;SVLEN=257;PctSeqSimilarity=0.730263;PctSizeSimilarity=0.512974;PctRecOverlap=0;SizeDiff=244;StartDistance=-64;EndDistance=-64;TruScore=49;MatchId=10.1.2 GT 1/0 -chr20 641906 . G GGCGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCT 60 . QNAME=cluster23_scaffold_2;QSTART=25053389;QSTRAND=-;SVTYPE=INS;SVLEN=163;PctSeqSimilarity=0.753602;PctSizeSimilarity=0.325349;PctRecOverlap=0;SizeDiff=338;StartDistance=301;EndDistance=301;GTMatch;TruScore=45;MatchId=10.1.0;Multi GT 0/1 -chr20 764442 . A AGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGG 60 . QNAME=cluster23_scaffold_3;QSTART=24946242;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=62;MatchId=13.0.0 GT 1/0 +chr20 642271 . G GGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068731;QSTRAND=-;SVTYPE=INS;SVLEN=257;PctSeqSimilarity=0.730263;PctSizeSimilarity=0.512974;PctRecOverlap=0.512974;SizeDiff=244;StartDistance=-64;EndDistance=-64;TruScore=58;MatchId=10.1.2 GT 1/0 +chr20 641906 . G GGCGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCT 60 . QNAME=cluster23_scaffold_2;QSTART=25053389;QSTRAND=-;SVTYPE=INS;SVLEN=163;PctSeqSimilarity=0.604082;PctSizeSimilarity=0.404908;PctRecOverlap=0.411043;SizeDiff=-97;StartDistance=7;EndDistance=7;TruScore=47;MatchId=10.0.0 GT 0/1 +chr20 642068 . G GTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCC 60 . QNAME=cluster23_scaffold_2;QSTART=25053097;QSTRAND=-;SVTYPE=INS;SVLEN=129;PctSeqSimilarity=0.898182;PctSizeSimilarity=0.511628;PctRecOverlap=0;SizeDiff=-63;StartDistance=262;EndDistance=262;GTMatch;TruScore=46;MatchId=10.3.1 GT 0/1 +chr20 642330 . G GCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25052609;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.866873;PctSizeSimilarity=0.292035;PctRecOverlap=0;SizeDiff=-160;StartDistance=-417;EndDistance=-417;TruScore=38;MatchId=10.0.3;Multi GT 0/1 +chr20 764442 . A AGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGG 60 . QNAME=cluster23_scaffold_3;QSTART=24946242;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0.025641;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=53;MatchId=13.0.0 GT 1/0 diff --git a/repo_utils/test_files/answer_key/bench13/log.txt b/repo_utils/test_files/answer_key/bench13/log.txt index f6543ccb..90ad6d01 100644 --- a/repo_utils/test_files/answer_key/bench13/log.txt +++ b/repo_utils/test_files/answer_key/bench13/log.txt @@ -1,5 +1,5 @@ -2021-10-17 23:27:04,999 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input1.vcf.gz -c repo_utils/test_files/input3.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench13/ -2021-10-17 23:27:05,001 [INFO] Params: +2021-12-21 13:57:50,309 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input1.vcf.gz -c repo_utils/test_files/input3.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench13/ +2021-12-21 13:57:50,310 [INFO] Params: { "base": "repo_utils/test_files/input1.vcf.gz", "comp": "repo_utils/test_files/input3.vcf.gz", @@ -27,11 +27,11 @@ "includebed": null, "multimatch": false } -2021-10-17 23:27:05,002 [INFO] Truvari version: 3.1.0-dev -2021-10-17 23:27:05,295 [INFO] Zipped 4216 variants. Counter({'base': 2151, 'comp': 2065}) -2021-10-17 23:27:05,296 [INFO] 15 chunks of 37 variants. Counter({'comp': 21, 'base': 16}) -2021-10-17 23:27:05,300 [INFO] Results peek: 10 TP-base 6 FN 62.50% Recall -2021-10-17 23:27:05,301 [INFO] Stats: { +2021-12-21 13:57:50,311 [INFO] Truvari version: 3.1.0-dev +2021-12-21 13:57:50,433 [INFO] Zipped 4216 variants. Counter({'base': 2151, 'comp': 2065}) +2021-12-21 13:57:50,434 [INFO] 15 chunks of 4216 variants. Counter({'__filtered': 4179, 'comp': 21, 'base': 16}) +2021-12-21 13:57:50,439 [INFO] Results peek: 10 TP-base 6 FN 62.50% Recall +2021-12-21 13:57:50,440 [INFO] Stats: { "TP-base": 10, "TP-call": 10, "FP": 7, @@ -47,4 +47,4 @@ "TP-base_FP-gt": 4, "gt_concordance": 0.6 } -2021-10-17 23:27:05,305 [INFO] Finished bench +2021-12-21 13:57:50,444 [INFO] Finished bench diff --git a/repo_utils/test_files/answer_key/bench13/tp-base.vcf b/repo_utils/test_files/answer_key/bench13/tp-base.vcf index 4e843404..c2debecb 100644 --- a/repo_utils/test_files/answer_key/bench13/tp-base.vcf +++ b/repo_utils/test_files/answer_key/bench13/tp-base.vcf @@ -48,12 +48,12 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=HG002-S9-H1-000001F;QSTART=127907;QSTRAND=+;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=1.0.0 GT:PL:DP 1/1:8,8,5:15,29 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAG 60 . QNAME=HG002-S9-H2-000001F;QSTART=232485;QSTRAND=+;SVTYPE=INS;SVLEN=184;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=1;SizeDiff=-4;StartDistance=0;EndDistance=0;GTMatch;TruScore=98;MatchId=4.0.0 GT:PL:DP 0/1:7,9,6:23,9 -chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=232808;QSTRAND=+;SVTYPE=INS;SVLEN=438;PctSeqSimilarity=0.920335;PctSizeSimilarity=0.85214;PctRecOverlap=1;SizeDiff=-76;StartDistance=0;EndDistance=0;GTMatch;TruScore=92;MatchId=4.2.1 GT:PL:DP 0/1:7,10,7:12,48 -chr20 279062 . G GGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGTGGGAGGGAGGGAGGGAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=258112;QSTRAND=+;SVTYPE=INS;SVLEN=139;PctSeqSimilarity=0.883663;PctSizeSimilarity=0.776536;PctRecOverlap=0;SizeDiff=-40;StartDistance=-36;EndDistance=-36;GTMatch;TruScore=63;MatchId=4.1.2 GT:PL:DP 1/0:5,1,4:27,38 +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAG 60 . QNAME=HG002-S9-H2-000001F;QSTART=232485;QSTRAND=+;SVTYPE=INS;SVLEN=184;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=0.978836;SizeDiff=-4;StartDistance=0;EndDistance=0;GTMatch;TruScore=98;MatchId=4.0.0 GT:PL:DP 0/1:7,9,6:23,9 +chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=232808;QSTRAND=+;SVTYPE=INS;SVLEN=438;PctSeqSimilarity=0.920335;PctSizeSimilarity=0.85214;PctRecOverlap=0.852427;SizeDiff=-76;StartDistance=0;EndDistance=0;GTMatch;TruScore=87;MatchId=4.2.1 GT:PL:DP 0/1:7,10,7:12,48 +chr20 279062 . G GGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGTGGGAGGGAGGGAGGGAT 60 . QNAME=HG002-S9-H1-000001F;QSTART=258112;QSTRAND=+;SVTYPE=INS;SVLEN=139;PctSeqSimilarity=0.883663;PctSizeSimilarity=0.776536;PctRecOverlap=0.687151;SizeDiff=-40;StartDistance=-36;EndDistance=-36;GTMatch;TruScore=78;MatchId=4.1.2 GT:PL:DP 1/0:5,1,4:27,38 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATTTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=285475;QSTRAND=+;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=100;MatchId=6.0.0 GT:PL:DP 1/1:2,7,2:17,31 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=8.0.0 GT:PL 0/1:. chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=100;MatchId=9.0.0 GT:PL:DP 1/1:6,8,8:40,41 -chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408;PctSeqSimilarity=0.894737;PctSizeSimilarity=0.789216;PctRecOverlap=0;SizeDiff=86;StartDistance=-91;EndDistance=-91;GTMatch;TruScore=64;MatchId=10.2.4 GT:PL:DP 1/0:5,10,5:44,36 +chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408;PctSeqSimilarity=0.894737;PctSizeSimilarity=0.789216;PctRecOverlap=0.672372;SizeDiff=86;StartDistance=-91;EndDistance=-91;GTMatch;TruScore=78;MatchId=10.2.4 GT:PL:DP 1/0:5,10,5:44,36 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=HG002-S9-H2-000001F;QSTART=664783;QSTRAND=+;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.0 GT:PL:DP 0/1:9,6,6:13,31 chr20 949516 . CTCTGAAGAGGTAACATTTGAACTGAGATGTAGAGGGAGAGAAGGAACTAGCCATGTGGAGATCTGGGGGAGGAGCATTCCAGGCAGAGGGGATAGCAAGTGCA C 60 . QNAME=HG002-S9-H2-000001F;QSTART=905663;QSTRAND=+;SVTYPE=DEL;SVLEN=-103;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=14.0.0 GT:PL:DP 0/1:6,6,1:34,8 diff --git a/repo_utils/test_files/answer_key/bench13/tp-call.vcf b/repo_utils/test_files/answer_key/bench13/tp-call.vcf index 8b15ae72..e71df388 100644 --- a/repo_utils/test_files/answer_key/bench13/tp-call.vcf +++ b/repo_utils/test_files/answer_key/bench13/tp-call.vcf @@ -47,12 +47,12 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00733 chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25562871;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=1.0.0 GT 1/0 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=1;SizeDiff=-4;StartDistance=0;EndDistance=0;GTMatch;TruScore=98;MatchId=4.0.0 GT 0/1 -chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25416710;QSTRAND=-;SVTYPE=INS;SVLEN=514;PctSeqSimilarity=0.920335;PctSizeSimilarity=0.85214;PctRecOverlap=1;SizeDiff=-76;StartDistance=0;EndDistance=0;GTMatch;TruScore=92;MatchId=4.2.1 GT 0/1 -chr20 279098 . G GGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGACGGAGGGCGGGACGGAGGGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25432493;QSTRAND=-;SVTYPE=INS;SVLEN=179;PctSeqSimilarity=0.883663;PctSizeSimilarity=0.776536;PctRecOverlap=0;SizeDiff=-40;StartDistance=-36;EndDistance=-36;GTMatch;TruScore=63;MatchId=4.1.2 GT 1/0 +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=0.978836;SizeDiff=-4;StartDistance=0;EndDistance=0;GTMatch;TruScore=98;MatchId=4.0.0 GT 0/1 +chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25416710;QSTRAND=-;SVTYPE=INS;SVLEN=514;PctSeqSimilarity=0.920335;PctSizeSimilarity=0.85214;PctRecOverlap=0.852427;SizeDiff=-76;StartDistance=0;EndDistance=0;GTMatch;TruScore=87;MatchId=4.2.1 GT 0/1 +chr20 279098 . G GGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGACGGAGGGCGGGACGGAGGGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25432493;QSTRAND=-;SVTYPE=INS;SVLEN=179;PctSeqSimilarity=0.883663;PctSizeSimilarity=0.776536;PctRecOverlap=0.687151;SizeDiff=-40;StartDistance=-36;EndDistance=-36;GTMatch;TruScore=78;MatchId=4.1.2 GT 1/0 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATTTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster23_scaffold_3;QSTART=25405192;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=100;MatchId=6.0.0 GT 1/1 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=cluster23_scaffold_3;QSTART=25290548;QSTRAND=-;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=8.0.0 GT 1/1 chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=cluster23_scaffold_3;QSTART=25097419;QSTRAND=-;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=100;MatchId=9.0.0 GT 1/1 -chr20 642391 . G GGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068286;QSTRAND=-;SVTYPE=INS;SVLEN=322;PctSeqSimilarity=0.894737;PctSizeSimilarity=0.789216;PctRecOverlap=0;SizeDiff=86;StartDistance=-91;EndDistance=-91;GTMatch;TruScore=64;MatchId=10.2.4 GT 1/0 +chr20 642391 . G GGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068286;QSTRAND=-;SVTYPE=INS;SVLEN=322;PctSeqSimilarity=0.894737;PctSizeSimilarity=0.789216;PctRecOverlap=0.672372;SizeDiff=86;StartDistance=-91;EndDistance=-91;GTMatch;TruScore=78;MatchId=10.2.4 GT 1/0 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=cluster23_scaffold_3;QSTART=25000888;QSTRAND=-;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.0 GT 1/1 chr20 949516 . CTCTGAAGAGGTAACATTTGAACTGAGATGTAGAGGGAGAGAAGGAACTAGCCATGTGGAGATCTGGGGGAGGAGCATTCCAGGCAGAGGGGATAGCAAGTGCA C 60 . QNAME=cluster23_scaffold_3;QSTART=24759935;QSTRAND=-;SVTYPE=DEL;SVLEN=-103;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=14.0.0 GT 1/0 diff --git a/repo_utils/test_files/answer_key/bench23/fn.vcf b/repo_utils/test_files/answer_key/bench23/fn.vcf index 5056d1bb..fafd5872 100644 --- a/repo_utils/test_files/answer_key/bench23/fn.vcf +++ b/repo_utils/test_files/answer_key/bench23/fn.vcf @@ -47,7 +47,7 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chr20 380878 . T TGATTAGCTCCGGTTTGCATCACCCGGACCGGGGGATTAGCTCCGGTTTGCATCACCCGGACCGGGG 60 . QNAME=cluster19_000000F;QSTART=25381150;QSTRAND=-;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.123894;PctSizeSimilarity=0.5;PctRecOverlap=0.0294118;SizeDiff=33;StartDistance=0;EndDistance=-33;GTMatch;TruScore=19;MatchId=7.0.0 GT:PL:AD 1/0:8,3,9:38,14 -chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=cluster19_000000F;QSTART=602404;QSTRAND=+;SVTYPE=INS;SVLEN=630;PctSeqSimilarity=0.776184;PctSizeSimilarity=0.511111;PctRecOverlap=0;SizeDiff=308;StartDistance=-184;EndDistance=-184;TruScore=51;MatchId=10.0.4 GT:PL:AD 0/1:2,1,9:9,12 -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.766021;PctSizeSimilarity=0.509494;PctRecOverlap=0;SizeDiff=310;StartDistance=-161;EndDistance=-161;GTMatch;TruScore=51;MatchId=10.1.4 GT:PL:AD 1/0:7,5,6:16,7 -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=725358;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=62;MatchId=13.1.0 GT:PL:AD 0/1:7,8,4:13,5 +chr20 380878 . T TGATTAGCTCCGGTTTGCATCACCCGGACCGGGGGATTAGCTCCGGTTTGCATCACCCGGACCGGGG 60 . QNAME=cluster19_000000F;QSTART=25381150;QSTRAND=-;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.123894;PctSizeSimilarity=0.5;PctRecOverlap=0.507463;SizeDiff=33;StartDistance=0;EndDistance=-33;GTMatch;TruScore=37;MatchId=7.0.0 GT:PL:AD 1/0:8,3,9:38,14 +chr20 642207 . T TGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGG 60 . QNAME=cluster19_000000F;QSTART=602404;QSTRAND=+;SVTYPE=INS;SVLEN=630;PctSeqSimilarity=0.776184;PctSizeSimilarity=0.511111;PctRecOverlap=0.464342;SizeDiff=308;StartDistance=-184;EndDistance=-184;TruScore=58;MatchId=10.0.4 GT:PL:AD 0/1:2,1,9:9,12 +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.766021;PctSizeSimilarity=0.509494;PctRecOverlap=0.50079;SizeDiff=310;StartDistance=-161;EndDistance=-161;GTMatch;TruScore=59;MatchId=10.1.4 GT:PL:AD 1/0:7,5,6:16,7 +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=725358;QSTRAND=+;SVTYPE=INS;SVLEN=117;PctSeqSimilarity=0.907801;PctSizeSimilarity=0.666667;PctRecOverlap=0.025641;SizeDiff=39;StartDistance=95;EndDistance=95;TruScore=53;MatchId=13.1.0 GT:PL:AD 0/1:7,8,4:13,5 diff --git a/repo_utils/test_files/answer_key/bench23/fp.vcf b/repo_utils/test_files/answer_key/bench23/fp.vcf index 3635e373..a38d5356 100644 --- a/repo_utils/test_files/answer_key/bench23/fp.vcf +++ b/repo_utils/test_files/answer_key/bench23/fp.vcf @@ -46,12 +46,12 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00733 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.667291;PctSizeSimilarity=0.346225;PctRecOverlap=0;SizeDiff=355;StartDistance=139;EndDistance=139;TruScore=42;MatchId=4.0.0 GT 0/1 -chr20 279098 . G GGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGACGGAGGGCGGGACGGAGGGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25432493;QSTRAND=-;SVTYPE=INS;SVLEN=179;PctSeqSimilarity=0.540302;PctSizeSimilarity=0.32965;PctRecOverlap=0;SizeDiff=364;StartDistance=-29;EndDistance=-29;TruScore=35;MatchId=4.0.2 GT 1/0 +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.667291;PctSizeSimilarity=0.346225;PctRecOverlap=0.348066;SizeDiff=355;StartDistance=139;EndDistance=139;TruScore=45;MatchId=4.0.0 GT 0/1 +chr20 279098 . G GGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGACGGAGGGCGGGACGGAGGGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25432493;QSTRAND=-;SVTYPE=INS;SVLEN=179;PctSeqSimilarity=0.540302;PctSizeSimilarity=0.32965;PctRecOverlap=0.32965;SizeDiff=364;StartDistance=-29;EndDistance=-29;TruScore=39;MatchId=4.0.2 GT 1/0 chr20 280211 . CAACAACAACAATTGTACTTCCCTAAGGTTACACCCAGCAGGTGCATAAAACCTACAGTAACAAT C 60 . QNAME=cluster23_scaffold_3;QSTART=25431380;QSTRAND=-;SVTYPE=DEL;SVLEN=-64;PctSeqSimilarity=.;PctSizeSimilarity=.;PctRecOverlap=.;SizeDiff=.;StartDistance=.;EndDistance=.;TruScore=.;MatchId=5._.0 GT 1/0 -chr20 641906 . G GGCGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCT 60 . QNAME=cluster23_scaffold_2;QSTART=25053389;QSTRAND=-;SVTYPE=INS;SVLEN=163;PctSeqSimilarity=0.701844;PctSizeSimilarity=0.257911;PctRecOverlap=0;SizeDiff=469;StartDistance=324;EndDistance=324;TruScore=41;MatchId=10.1.0 GT 0/1 -chr20 642068 . G GTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCC 60 . QNAME=cluster23_scaffold_2;QSTART=25053097;QSTRAND=-;SVTYPE=INS;SVLEN=129;PctSeqSimilarity=0.562989;PctSizeSimilarity=0.204114;PctRecOverlap=0;SizeDiff=503;StartDistance=162;EndDistance=162;TruScore=33;MatchId=10.1.1 GT 0/1 -chr20 642271 . G GGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068731;QSTRAND=-;SVTYPE=INS;SVLEN=257;PctSeqSimilarity=0.641691;PctSizeSimilarity=0.407937;PctRecOverlap=0;SizeDiff=373;StartDistance=-64;EndDistance=-64;TruScore=42;MatchId=10.0.2 GT 1/0 -chr20 642330 . G GCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25052609;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.649306;PctSizeSimilarity=0.35873;PctRecOverlap=0;SizeDiff=404;StartDistance=-123;EndDistance=-123;GTMatch;TruScore=41;MatchId=10.0.3 GT 0/1 -chr20 642391 . G GGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068286;QSTRAND=-;SVTYPE=INS;SVLEN=322;PctSeqSimilarity=0.776184;PctSizeSimilarity=0.511111;PctRecOverlap=0;SizeDiff=308;StartDistance=-184;EndDistance=-184;TruScore=51;MatchId=10.0.4 GT 1/0 +chr20 641906 . G GGCGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCT 60 . QNAME=cluster23_scaffold_2;QSTART=25053389;QSTRAND=-;SVTYPE=INS;SVLEN=163;PctSeqSimilarity=0.692156;PctSizeSimilarity=0.25873;PctRecOverlap=0.152139;SizeDiff=467;StartDistance=301;EndDistance=301;GTMatch;TruScore=36;MatchId=10.0.0 GT 0/1 +chr20 642068 . G GTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCC 60 . QNAME=cluster23_scaffold_2;QSTART=25053097;QSTRAND=-;SVTYPE=INS;SVLEN=129;PctSeqSimilarity=0.562989;PctSizeSimilarity=0.204114;PctRecOverlap=0.203791;SizeDiff=503;StartDistance=162;EndDistance=162;TruScore=32;MatchId=10.1.1 GT 0/1 +chr20 642271 . G GGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068731;QSTRAND=-;SVTYPE=INS;SVLEN=257;PctSeqSimilarity=0.641691;PctSizeSimilarity=0.407937;PctRecOverlap=0.40729;SizeDiff=373;StartDistance=-64;EndDistance=-64;TruScore=48;MatchId=10.0.2 GT 1/0 +chr20 642330 . G GCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25052609;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.649306;PctSizeSimilarity=0.35873;PctRecOverlap=0.359746;SizeDiff=404;StartDistance=-123;EndDistance=-123;GTMatch;TruScore=45;MatchId=10.0.3 GT 0/1 +chr20 642391 . G GGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGGC 60 . QNAME=cluster23_scaffold_3;QSTART=25068286;QSTRAND=-;SVTYPE=INS;SVLEN=322;PctSeqSimilarity=0.766021;PctSizeSimilarity=0.509494;PctRecOverlap=0.50079;SizeDiff=310;StartDistance=-161;EndDistance=-161;GTMatch;TruScore=59;MatchId=10.1.4 GT 1/0 chr20 949516 . CTCTGAAGAGGTAACATTTGAACTGAGATGTAGAGGGAGAGAAGGAACTAGCCATGTGGAGATCTGGGGGAGGAGCATTCCAGGCAGAGGGGATAGCAAGTGCA C 60 . QNAME=cluster23_scaffold_3;QSTART=24759935;QSTRAND=-;SVTYPE=DEL;SVLEN=-103;PctSeqSimilarity=.;PctSizeSimilarity=.;PctRecOverlap=.;SizeDiff=.;StartDistance=.;EndDistance=.;TruScore=.;MatchId=14._.0 GT 1/0 diff --git a/repo_utils/test_files/answer_key/bench23/log.txt b/repo_utils/test_files/answer_key/bench23/log.txt index b9154957..7b86d0d2 100644 --- a/repo_utils/test_files/answer_key/bench23/log.txt +++ b/repo_utils/test_files/answer_key/bench23/log.txt @@ -1,5 +1,5 @@ -2021-10-17 23:27:06,881 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input2.vcf.gz -c repo_utils/test_files/input3.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench23/ --multimatch -2021-10-17 23:27:06,882 [INFO] Params: +2021-12-21 13:57:52,218 [INFO] Running /data/truvari/__main__.py bench -b repo_utils/test_files/input2.vcf.gz -c repo_utils/test_files/input3.vcf.gz -f repo_utils/test_files/reference.fa -o test_results/bench23/ --multimatch +2021-12-21 13:57:52,219 [INFO] Params: { "base": "repo_utils/test_files/input2.vcf.gz", "comp": "repo_utils/test_files/input3.vcf.gz", @@ -27,11 +27,11 @@ "includebed": null, "multimatch": true } -2021-10-17 23:27:06,883 [INFO] Truvari version: 3.1.0-dev -2021-10-17 23:27:07,147 [INFO] Zipped 3848 variants. Counter({'comp': 2065, 'base': 1783}) -2021-10-17 23:27:07,148 [INFO] 15 chunks of 32 variants. Counter({'comp': 21, 'base': 11}) -2021-10-17 23:27:07,153 [INFO] Results peek: 7 TP-base 4 FN 63.64% Recall -2021-10-17 23:27:07,154 [INFO] Stats: { +2021-12-21 13:57:52,219 [INFO] Truvari version: 3.1.0-dev +2021-12-21 13:57:52,329 [INFO] Zipped 3848 variants. Counter({'comp': 2065, 'base': 1783}) +2021-12-21 13:57:52,330 [INFO] 15 chunks of 3848 variants. Counter({'__filtered': 3816, 'comp': 21, 'base': 11}) +2021-12-21 13:57:52,333 [INFO] Results peek: 7 TP-base 4 FN 63.64% Recall +2021-12-21 13:57:52,334 [INFO] Stats: { "TP-base": 7, "TP-call": 8, "FP": 9, @@ -47,4 +47,4 @@ "TP-base_FP-gt": 4, "gt_concordance": 0.375 } -2021-10-17 23:27:07,159 [INFO] Finished bench +2021-12-21 13:57:52,338 [INFO] Finished bench diff --git a/repo_utils/test_files/answer_key/bench23/tp-base.vcf b/repo_utils/test_files/answer_key/bench23/tp-base.vcf index 1feff82c..68c83f07 100644 --- a/repo_utils/test_files/answer_key/bench23/tp-base.vcf +++ b/repo_utils/test_files/answer_key/bench23/tp-base.vcf @@ -47,10 +47,10 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=82;EndDistance=82;TruScore=74;MatchId=1.0.0 GT:PL:AD 1/1:10,5,10:30,43 -chr20 279069 . A AGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGC 60 . QNAME=cluster19_000000F;QSTART=25483168;QSTRAND=-;SVTYPE=INS;SVLEN=543;PctSeqSimilarity=0.932011;PctSizeSimilarity=0.946593;PctRecOverlap=1;SizeDiff=29;StartDistance=0;EndDistance=0;TruScore=95;MatchId=4.0.1 GT:PL:AD 1/1:8,1,10:40,42 +chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.989583;PctSizeSimilarity=1;PctRecOverlap=0.681159;SizeDiff=0;StartDistance=22;EndDistance=22;TruScore=89;MatchId=1.0.1 GT:PL:AD 1/1:10,5,10:30,43 +chr20 279069 . A AGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGC 60 . QNAME=cluster19_000000F;QSTART=25483168;QSTRAND=-;SVTYPE=INS;SVLEN=543;PctSeqSimilarity=0.932011;PctSizeSimilarity=0.946593;PctRecOverlap=0.948435;SizeDiff=29;StartDistance=0;EndDistance=0;TruScore=94;MatchId=4.0.1 GT:PL:AD 1/1:8,1,10:40,42 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATGTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster19_000000F;QSTART=25455794;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=6.0.0 GT:PL:AD 1/1:4,10,2:26,18 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=cluster19_000000F;QSTART=25341211;QSTRAND=-;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=8.0.0 GT:PL:AD 1/1:7,10,5:44,42 chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=cluster19_000000F;QSTART=25148089;QSTRAND=-;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=9.0.0 GT:PL:AD 1/0:3,1,8:9,32 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=cluster19_000000F;QSTART=25051525;QSTRAND=-;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.0 GT:PL:AD 1/0:8,10,9:32,27 -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=95;EndDistance=95;GTMatch;TruScore=74;MatchId=13.0.0 GT:PL:AD 1/0:5,4,9:13,8 +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=95;EndDistance=95;GTMatch;TruScore=66;MatchId=13.0.0 GT:PL:AD 1/0:5,4,9:13,8 diff --git a/repo_utils/test_files/answer_key/bench23/tp-call.vcf b/repo_utils/test_files/answer_key/bench23/tp-call.vcf index 6013aa43..d2fd4fa5 100644 --- a/repo_utils/test_files/answer_key/bench23/tp-call.vcf +++ b/repo_utils/test_files/answer_key/bench23/tp-call.vcf @@ -46,11 +46,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00733 -chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25562871;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=82;EndDistance=82;TruScore=74;MatchId=1.0.0 GT 1/0 -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.989583;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=22;EndDistance=22;TruScore=74;MatchId=1.0.1 GT 0/1 -chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25416710;QSTRAND=-;SVTYPE=INS;SVLEN=514;PctSeqSimilarity=0.932011;PctSizeSimilarity=0.946593;PctRecOverlap=1;SizeDiff=29;StartDistance=0;EndDistance=0;TruScore=95;MatchId=4.0.1 GT 0/1 +chr20 149013 . A ACCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGAC 60 . QNAME=cluster23_scaffold_3;QSTART=25562871;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=82;EndDistance=82;TruScore=66;MatchId=1.0.0 GT 1/0 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.989583;PctSizeSimilarity=1;PctRecOverlap=0.681159;SizeDiff=0;StartDistance=22;EndDistance=22;TruScore=89;MatchId=1.0.1 GT 0/1 +chr20 279069 . A AGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGACGGAGGGAGGGAGGGAGGGAGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGAGGGAGGGAGGGCGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGAGGGATGGAGGGAGGGAGGGCGGGACGGAGGGAGGGC 60 . QNAME=cluster23_scaffold_2;QSTART=25416710;QSTRAND=-;SVTYPE=INS;SVLEN=514;PctSeqSimilarity=0.932011;PctSizeSimilarity=0.946593;PctRecOverlap=0.948435;SizeDiff=29;StartDistance=0;EndDistance=0;TruScore=94;MatchId=4.0.1 GT 0/1 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATTTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster23_scaffold_3;QSTART=25405192;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=6.0.0 GT 1/1 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=cluster23_scaffold_3;QSTART=25290548;QSTRAND=-;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;GTMatch;TruScore=99;MatchId=8.0.0 GT 1/1 chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=cluster23_scaffold_3;QSTART=25097419;QSTRAND=-;SVTYPE=DEL;SVLEN=-54;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=9.0.0 GT 1/1 chr20 709759 . AGTGTGCCCTGTGACCCCCTCATCCTGCCTTTGGATCACGAGCTGGGGTCGGGGGAGGGGATGGTGGGGGGGACACTGCTCGGTCCTCCACTCC A 60 . QNAME=cluster23_scaffold_3;QSTART=25000888;QSTRAND=-;SVTYPE=DEL;SVLEN=-93;PctSeqSimilarity=1;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=100;MatchId=11.0.0 GT 1/1 -chr20 764442 . A AGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGG 60 . QNAME=cluster23_scaffold_3;QSTART=24946242;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=95;EndDistance=95;GTMatch;TruScore=74;MatchId=13.0.0 GT 1/0 +chr20 764442 . A AGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGG 60 . QNAME=cluster23_scaffold_3;QSTART=24946242;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=95;EndDistance=95;GTMatch;TruScore=66;MatchId=13.0.0 GT 1/0 diff --git a/repo_utils/test_files/answer_key/input1_removed.vcf b/repo_utils/test_files/answer_key/input1_removed.vcf index 44b8ff19..07787e3c 100644 --- a/repo_utils/test_files/answer_key/input1_removed.vcf +++ b/repo_utils/test_files/answer_key/input1_removed.vcf @@ -48,4 +48,4 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=4.0 GT:PL:DP 1/0:4,8,6:32,9 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=73;MatchId=6.0 GT:PL:DP 0/1:6,1,9:20,25 +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=65;MatchId=6.0 GT:PL:DP 0/1:6,1,9:20,25 diff --git a/repo_utils/test_files/answer_key/input2_removed.vcf b/repo_utils/test_files/answer_key/input2_removed.vcf index 892674c6..c1b7e3c9 100644 --- a/repo_utils/test_files/answer_key/input2_removed.vcf +++ b/repo_utils/test_files/answer_key/input2_removed.vcf @@ -47,4 +47,4 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0;SizeDiff=-2;StartDistance=-23;EndDistance=-23;TruScore=74;MatchId=7.0 GT:PL:AD 1/0:7,5,6:16,7 +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0.962085;SizeDiff=-2;StartDistance=-23;EndDistance=-23;TruScore=98;MatchId=7.0 GT:PL:AD 1/0:7,5,6:16,7 diff --git a/repo_utils/test_files/answer_key/input3_removed.vcf b/repo_utils/test_files/answer_key/input3_removed.vcf index 8673afc8..5bac5ab5 100644 --- a/repo_utils/test_files/answer_key/input3_removed.vcf +++ b/repo_utils/test_files/answer_key/input3_removed.vcf @@ -46,4 +46,4 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00733 -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=74;MatchId=1.0 GT 0/1 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0.130435;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=70;MatchId=1.0 GT 0/1 diff --git a/repo_utils/test_files/answer_key/multi_removed_common.vcf b/repo_utils/test_files/answer_key/multi_removed_common.vcf index 8291c4d4..dfc61768 100644 --- a/repo_utils/test_files/answer_key/multi_removed_common.vcf +++ b/repo_utils/test_files/answer_key/multi_removed_common.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 NA12878 HG00733 -chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=74;MatchId=1.0 GT ./. 1/1 ./. -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=74;MatchId=1.0 GT ./. ./. 0/1 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=1;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0.130435;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=70;MatchId=1.0 GT ./. ./. 0/1 +chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=66;MatchId=1.0 GT ./. 1/1 ./. +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=0.978836;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATGTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster19_000000F;QSTART=25455794;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=99;MatchId=4.0 GT ./. 1/1 ./. chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=6.0 GT 0/1 ./. 1/1 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=73;MatchId=8.0 GT 0/1 ./. ./. -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=74;MatchId=8.0 GT ./. 1/0 ./. -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=74;MatchId=10.0 GT ./. 1/0 ./. +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=65;MatchId=8.0 GT 0/1 ./. ./. +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0.962085;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=98;MatchId=8.0 GT ./. 1/0 ./. +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=66;MatchId=10.0 GT ./. 1/0 ./. diff --git a/repo_utils/test_files/answer_key/multi_removed_first.vcf b/repo_utils/test_files/answer_key/multi_removed_first.vcf index 8291c4d4..dfc61768 100644 --- a/repo_utils/test_files/answer_key/multi_removed_first.vcf +++ b/repo_utils/test_files/answer_key/multi_removed_first.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 NA12878 HG00733 -chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=74;MatchId=1.0 GT ./. 1/1 ./. -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=74;MatchId=1.0 GT ./. ./. 0/1 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=1;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0.130435;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=70;MatchId=1.0 GT ./. ./. 0/1 +chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=66;MatchId=1.0 GT ./. 1/1 ./. +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=0.978836;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATGTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster19_000000F;QSTART=25455794;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=99;MatchId=4.0 GT ./. 1/1 ./. chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=6.0 GT 0/1 ./. 1/1 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=73;MatchId=8.0 GT 0/1 ./. ./. -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=74;MatchId=8.0 GT ./. 1/0 ./. -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=74;MatchId=10.0 GT ./. 1/0 ./. +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=65;MatchId=8.0 GT 0/1 ./. ./. +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0.962085;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=98;MatchId=8.0 GT ./. 1/0 ./. +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=66;MatchId=10.0 GT ./. 1/0 ./. diff --git a/repo_utils/test_files/answer_key/multi_removed_maxqual.vcf b/repo_utils/test_files/answer_key/multi_removed_maxqual.vcf index 8291c4d4..dfc61768 100644 --- a/repo_utils/test_files/answer_key/multi_removed_maxqual.vcf +++ b/repo_utils/test_files/answer_key/multi_removed_maxqual.vcf @@ -47,11 +47,11 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 NA12878 HG00733 -chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=74;MatchId=1.0 GT ./. 1/1 ./. -chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=74;MatchId=1.0 GT ./. ./. 0/1 -chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=1;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 +chr20 149073 . G GAATCCTGACCCATATTTGGGAGGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCA 60 . QNAME=cluster23_scaffold_2;QSTART=25547471;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.996479;PctSizeSimilarity=1;PctRecOverlap=0.130435;SizeDiff=0;StartDistance=-60;EndDistance=-60;TruScore=70;MatchId=1.0 GT ./. ./. 0/1 +chr20 149095 . G GGCAATTTTACCTGTTCTCAAGGCCGCATCTCTACCCCATCTCATGCGAATCCTGACCCATATTTGGGAA 60 . QNAME=cluster19_000000F;QSTART=25613718;QSTRAND=-;SVTYPE=INS;SVLEN=69;PctSeqSimilarity=0.997024;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-82;EndDistance=-82;TruScore=66;MatchId=1.0 GT ./. 1/1 ./. +chr20 278930 . C CGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGCGGGACGGAGGGAGGGAGGGAGGGACGGAGGGCGGGACGGCGGGAGGGCGGGACGGAGGGACGGAGGGAGGGCGGGACGGAGGGCGGGAGGGCGGGACGGAGGGCGGGAG 60 . QNAME=cluster23_scaffold_2;QSTART=25417363;QSTRAND=-;SVTYPE=INS;SVLEN=188;PctSeqSimilarity=0.986631;PctSizeSimilarity=0.978723;PctRecOverlap=0.978836;SizeDiff=-4;StartDistance=0;EndDistance=0;TruScore=98;MatchId=2.0 GT ./. ./. 0/1 chr20 306268 . A ACCAGGCTGGAGTGCAGTGGCTCACTGCGTGGCTCGCTACAGCCTACAACTCCTGGGCTCCAGCAATCCTGCTGCCCCAGCCTCCTGTGTAACTGAGACTACAGGCACGCACCACCACACCCAGCTAATGTTTTCTTTCTTTTTTTTTTTTTTGAGATGAACTCTCACTCTGTTGC 60 . QNAME=cluster19_000000F;QSTART=25455794;QSTRAND=-;SVTYPE=INS;SVLEN=175;PctSeqSimilarity=0.997159;PctSizeSimilarity=1;PctRecOverlap=1;SizeDiff=0;StartDistance=0;EndDistance=0;TruScore=99;MatchId=4.0 GT ./. 1/1 ./. chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;PctSeqSimilarity=0.995604;PctSizeSimilarity=0.995595;PctRecOverlap=1;SizeDiff=-1;StartDistance=0;EndDistance=0;TruScore=99;MatchId=6.0 GT 0/1 ./. 1/1 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=73;MatchId=8.0 GT 0/1 ./. ./. -chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=74;MatchId=8.0 GT ./. 1/0 ./. -chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=74;MatchId=10.0 GT ./. 1/0 ./. +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.963781;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;TruScore=65;MatchId=8.0 GT 0/1 ./. ./. +chr20 642230 . T TGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGTGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGAGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGATGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGGGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGGGCCCAGCAGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTG 60 . QNAME=cluster19_000000F;QSTART=25119065;QSTRAND=-;SVTYPE=INS;SVLEN=632;PctSeqSimilarity=0.998483;PctSizeSimilarity=0.996835;PctRecOverlap=0.962085;SizeDiff=-2;StartDistance=-23;EndDistance=-23;GTMatch;TruScore=98;MatchId=8.0 GT ./. 1/0 ./. +chr20 764537 . A AGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAAGGGGAGAATCCCCCACCTAAGGGGAGAGCCCCCACCTAT 60 . QNAME=cluster19_000000F;QSTART=24996803;QSTRAND=-;SVTYPE=INS;SVLEN=78;PctSeqSimilarity=0.997396;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-95;EndDistance=-95;GTMatch;TruScore=66;MatchId=10.0 GT ./. 1/0 ./. diff --git a/repo_utils/test_files/answer_key/truv2df.jl b/repo_utils/test_files/answer_key/truv2df.jl index e598a89c..c0c9c66f 100644 Binary files a/repo_utils/test_files/answer_key/truv2df.jl and b/repo_utils/test_files/answer_key/truv2df.jl differ diff --git a/truvari/bench.py b/truvari/bench.py index 9598fa7c..b2e4a9e9 100644 --- a/truvari/bench.py +++ b/truvari/bench.py @@ -60,9 +60,6 @@ def __lt__(self, other): def __eq__(self, other): return self.state == other.state and self.score == other.score - def __repr__(self): - return f'<{self.score} {self.state} {self.base.chrom}:{self.base.pos}->{self.comp.chrom}:{self.comp.pos}>' - def __str__(self): return f'{self.state} {self.score} ->\n {self.base} {self.comp}' diff --git a/truvari/comparisons.py b/truvari/comparisons.py index 0f9c3d7d..da8a8aeb 100644 --- a/truvari/comparisons.py +++ b/truvari/comparisons.py @@ -21,7 +21,7 @@ def entry_is_present(entry, sample=None): :type `sample`: string, optional :return: True if variant is present in the sample - :rtype: boolean + :rtype: bool Example >>> import truvari @@ -126,7 +126,7 @@ def entry_gt_comp(entryA, entryB, sampleA=None, sampleB=None): :type `sampleB`: string, optional :return: True if the genotypes are concordant - :rtype: boolean + :rtype: bool Example >>> import truvari @@ -185,7 +185,7 @@ def entry_create_haplotype(entryA, entryB, ref, use_ref_seq=False, buf_len=0): :param `ref`: Reference genome :type `ref`: :class:`pysam.FastaFile` :param `use_ref_seq`: If True, use the reference genome to get the sequence instead of the vcf entries - :type `use_ref_seq`: boolean, optional + :type `use_ref_seq`: bool, optional :param `buf_len`: Percent of selected region's range length to buffer :type `buf_len`: float, optional @@ -199,7 +199,6 @@ def get_props(entry): if use_ref_seq and (entry.alts[0] == "" or len(entry.alts[0]) < len(entry.ref)): return entry.chrom, entry.start, entry.stop, ref.fetch(entry.chrom, entry.start, entry.stop) return entry.chrom, entry.start, entry.stop, entry.alts[0] - a1 = get_props(entryA) a2 = get_props(entryB) return create_pos_haplotype(a1, a2, ref, buf_len=buf_len) @@ -240,7 +239,7 @@ def entry_pctsim(entryA, entryB, ref, buf_len=0, use_lev=True): :param `buf_len`: Percent of selected region's range length to buffer :type `buf_len`: float, optional :param `use_lev`: Use levenshtein distance by default. Set to False to use the faster edlib - :type `use_lev`: boolean, optional + :type `use_lev`: bool, optional :return: sequence similarity :rtype: float @@ -255,13 +254,7 @@ def entry_pctsim(entryA, entryB, ref, buf_len=0, use_lev=True): return seqsim(allele1, allele2, use_lev) # Handling of breakends should be here - try: - allele1, allele2 = entry_create_haplotype( - entryA, entryB, ref, buf_len=buf_len) - except Exception as e: # pylint: disable=broad-except - logging.critical('Unable to compare sequence similarity\n%s\n%s\n%s', str( - entryA), str(entryB), str(e)) - return 0 + allele1, allele2 = entry_create_haplotype(entryA, entryB, ref, buf_len=buf_len) return seqsim(allele1, allele2, use_lev) @@ -274,7 +267,7 @@ def seqsim(allele1, allele2, use_lev=False): :param `allele2`: second entry :type `allele2`: :class:`pysam.VariantRecord` :param `use_lev`: Use levenshtein distance by default. Set to False to use the faster edlib - :type `use_lev`: boolean, optional + :type `use_lev`: bool, optional :return: sequence similarity :rtype: float @@ -300,7 +293,7 @@ def overlaps(s1, e1, s2, e2): :type `e2`: int :return: True if ranges overlap - :rtype: boolean + :rtype: bool """ s_cand = max(s1, s2) e_cand = min(e1, e2) @@ -365,25 +358,31 @@ def entry_same_variant_type(entryA, entryB): :type `entryB`: :class:`pysam.VariantRecord` :return: True if entry SVTYPEs match - :rtype: boolean + :rtype: bool """ a_type = entry_variant_type(entryA) b_type = entry_variant_type(entryB) return a_type == b_type -def entry_boundaries(entry): +def entry_boundaries(entry, ins_inflate=False): """ Get the start/end of an entry and order (start < end) :param `entry`: entry to get bounds :type `entry`: :class:`pysam.VariantRecord` + :param `ins_inflate`: inflate INS boundaries by sv length + :type `ins_inflate`: bool, optional :return: the entry's start/end boundaries :rtype: tuple (int, int) """ start = entry.start end = entry.stop + if ins_inflate and entry_variant_type(entry) == 'INS': + size = entry_size(entry) + start -= size // 2 + end += size // 2 return start, end @@ -436,9 +435,9 @@ def weighted_score(sim, size, ovl): :return: The score :rtype: float """ - score = (2 * sim + 1 * size + 1 * ovl) / 3.0 - new_score = score / 1.333333 * 100 - return new_score + score = (sim + size + ovl) / 3.0 * 100 + #new_score = score / 1.333333 * 100 + return score def reciprocal_overlap(astart, aend, bstart, bend): @@ -488,8 +487,8 @@ def entry_reciprocal_overlap(entry1, entry2): >>> truvari.entry_reciprocal_overlap(a, b) 0 """ - astart, aend = entry_boundaries(entry1) - bstart, bend = entry_boundaries(entry2) + astart, aend = entry_boundaries(entry1, True) + bstart, bend = entry_boundaries(entry2, True) return reciprocal_overlap(astart, aend, bstart, bend)