From d22fe7ea1836cde86ce57b277ff7d51ca9c4c979 Mon Sep 17 00:00:00 2001 From: MartonKN Date: Thu, 29 Nov 2018 13:15:02 -0500 Subject: [PATCH 1/4] Changed SelectVariants so that it van handle multiple rsIDs separated by ';' in a VCF file. --- .../engine/filters/VariantIDsVariantFilter.java | 12 +++++++++++- .../variantutils/SelectVariants/complexExample1.vcf | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java index 32d07e82d70..db6e3ba8387 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java @@ -22,6 +22,16 @@ public VariantIDsVariantFilter(Set keepIDs) { @Override public boolean test(final VariantContext vc) { - return includeIDs.contains(vc.getID()); + if (vc.getID().indexOf(';') > 0) { + String[] vc_ids = vc.getID().split(";"); + for (String vc_id : vc_ids) { + if (includeIDs.contains(vc_id)) { + return true; + } + } + return false; + } else { + return includeIDs.contains(vc.getID()); + } } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf index 6e8deacc026..7fd5f353b99 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf @@ -29,5 +29,6 @@ 1 10044557 . C T 62.22 . AF=0.50;AlleleBalance=0.68;DoC=31;HomopolymerRun=16;MAPQ0=0;NS=1;RMSMAPQ=85.34;SB=-0.00;SpanningDeletions=0 GT:DP:GQ ./. ./. 0/1:31:15 1 10045603 . AAAA A 40.26 PASS AF=0.50 GT:DP:GQ 0/1:15:8 0/0:15:8 1/1:15:8 1 10045604 . A ACAT 41.26 PASS AF=0.50 GT:DP:GQ 1/1:14:7 0/0:15:8 1/0:15:8 +1 10046982 testid0;testid1 C T 32.01 PASS AF=0.40;AlleleBalance=0.37;DoC=21;HomopolymerRun=0;MAPQ0=8;NS=1;RMSMAPQ=12.31;SB=-4.2;SpanningDeletions=0 GT:DP:GQ 1/0:3:49 ./. ./. 1 10048142 . A G 126.81 foo AF=1.00;DoC=36;HomopolymerRun=4;MAPQ0=0;NS=1;RMSMAPQ=82.11;SB=-85.45;SpanningDeletions=0 GT:DP:GQ 0/1:15:8 0/0:15:8 1/1:15:8 1 10048580 . T A 72.22 bar;baz AF=0.50;AlleleBalance=0.69;DoC=33;HomopolymerRun=6;MAPQ0=0;NS=1;RMSMAPQ=84.45;SB=-27.41;SpanningDeletions=0 GT:DP:GQ 0/1:15:8 0/0:15:8 1/1:15:8 From 203b73960b06bc10b673c2689e26467ba40a465e Mon Sep 17 00:00:00 2001 From: MartonKN Date: Thu, 29 Nov 2018 13:53:31 -0500 Subject: [PATCH 2/4] The tests testKeepSelectionIDLiteral and testKeepSelectionIDFromFile broke due to the change in the test file complexExample1.vcf. I modified the file testSelectVariants_KeepSelectionID.vcf appropriately so that the tests pass as they should. --- .../expected/testSelectVariants_KeepSelectionID.vcf | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_KeepSelectionID.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_KeepSelectionID.vcf index 9d41d7d9f6a..ee1dbfa312a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_KeepSelectionID.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_KeepSelectionID.vcf @@ -23,3 +23,4 @@ ##source=ArbitrarySource #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 1 10001292 testid1 G A 12.22 PASS AF=0.50;AlleleBalance=0.57;DoC=23;HomopolymerRun=0;MAPQ0=8;NS=1;RMSMAPQ=46.31;SB=-24.92;SpanningDeletions=0 GT:DP:GQ 1/0:23:19 ./. ./. +1 10046982 testid0;testid1 C T 32.01 PASS AF=0.40;AlleleBalance=0.37;DoC=21;HomopolymerRun=0;MAPQ0=8;NS=1;RMSMAPQ=12.31;SB=-4.2;SpanningDeletions=0 GT:DP:GQ 1/0:3:49 ./. ./. From 5083418a2828094730304a5e8cbc3c83c96aa519 Mon Sep 17 00:00:00 2001 From: MartonKN Date: Fri, 30 Nov 2018 08:21:09 -0500 Subject: [PATCH 3/4] Changes due to review by David Benjamin and Phil Shapiro. --- .../engine/filters/VariantIDsVariantFilter.java | 13 ++++--------- .../haplotypecaller/HaplotypeCallerEngine.java | 4 +++- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java index db6e3ba8387..d271a976020 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java @@ -5,6 +5,8 @@ import java.util.Set; import java.util.LinkedHashSet; +import java.util.*; +import java.util.stream.*; /** * Keep only variants with any of these IDs. @@ -23,15 +25,8 @@ public VariantIDsVariantFilter(Set keepIDs) { @Override public boolean test(final VariantContext vc) { if (vc.getID().indexOf(';') > 0) { - String[] vc_ids = vc.getID().split(";"); - for (String vc_id : vc_ids) { - if (includeIDs.contains(vc_id)) { - return true; - } - } - return false; - } else { - return includeIDs.contains(vc.getID()); + return Arrays.stream(vc.getID().split(";")).anyMatch(includeIDs::contains); } + return includeIDs.contains(vc.getID()); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index ee248b90c5e..4d386cb784b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -138,6 +138,8 @@ public final class HaplotypeCallerEngine implements AssemblyRegionEvaluator { private static final Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private static final Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file + public static final int MINIMUM_READ_LENGTH = 10; + /** * Create and initialize a new HaplotypeCallerEngine given a collection of HaplotypeCaller arguments, a reads header, * and a reference file @@ -523,7 +525,7 @@ public List callRegion(final AssemblyRegion region, final Featur final List givenAlleles = new ArrayList<>(); if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - features.getValues(hcArgs.alleles).stream().filter(vc -> hcArgs.genotypeFilteredAlleles || vc.isNotFiltered()).forEach(givenAlleles::add); + features.getValues(hcArgs.alleles).stream().filter(vc -> (hcArgs.genotypeFilteredAlleles || vc.isNotFiltered()) && (vc.getEnd() - vc.getStart() + 1 >= MINIMUM_READ_LENGTH)).forEach(givenAlleles::add); // No alleles found in this region so nothing to do! if ( givenAlleles.isEmpty() ) { From eb246a213afd4dc4953c06a23617e53b1833bbd1 Mon Sep 17 00:00:00 2001 From: MartonKN Date: Fri, 30 Nov 2018 11:38:32 -0500 Subject: [PATCH 4/4] Made the code in VariantIDsVariantFilter's test function more concise. --- .../hellbender/engine/filters/VariantIDsVariantFilter.java | 5 +---- .../tools/walkers/haplotypecaller/HaplotypeCallerEngine.java | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java index d271a976020..1298c8b87e5 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/VariantIDsVariantFilter.java @@ -24,9 +24,6 @@ public VariantIDsVariantFilter(Set keepIDs) { @Override public boolean test(final VariantContext vc) { - if (vc.getID().indexOf(';') > 0) { - return Arrays.stream(vc.getID().split(";")).anyMatch(includeIDs::contains); - } - return includeIDs.contains(vc.getID()); + return Arrays.stream(vc.getID().split(";")).anyMatch(includeIDs::contains); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 4d386cb784b..ee248b90c5e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -138,8 +138,6 @@ public final class HaplotypeCallerEngine implements AssemblyRegionEvaluator { private static final Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private static final Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file - public static final int MINIMUM_READ_LENGTH = 10; - /** * Create and initialize a new HaplotypeCallerEngine given a collection of HaplotypeCaller arguments, a reads header, * and a reference file @@ -525,7 +523,7 @@ public List callRegion(final AssemblyRegion region, final Featur final List givenAlleles = new ArrayList<>(); if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - features.getValues(hcArgs.alleles).stream().filter(vc -> (hcArgs.genotypeFilteredAlleles || vc.isNotFiltered()) && (vc.getEnd() - vc.getStart() + 1 >= MINIMUM_READ_LENGTH)).forEach(givenAlleles::add); + features.getValues(hcArgs.alleles).stream().filter(vc -> hcArgs.genotypeFilteredAlleles || vc.isNotFiltered()).forEach(givenAlleles::add); // No alleles found in this region so nothing to do! if ( givenAlleles.isEmpty() ) {