From 544ce97045e3b2308ffb64a5c8a8589f5a9d5cd4 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Tue, 2 Oct 2018 13:39:08 -0400 Subject: [PATCH 01/11] adding option to drop annotations in selectvariants --- .../hellbender/engine/VariantWalker.java | 2 + .../walkers/variantutils/SelectVariants.java | 63 +++++++++++++++++-- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java index 443480c692d..d5c31eca433 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java @@ -3,10 +3,12 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.exceptions.GATKException; +import java.util.Set; import java.util.Spliterator; /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index cf262d6d16d..f88c15de578 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -10,11 +10,7 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.VariantContextUtils; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFConstants; -import htsjdk.variant.vcf.VCFHeaderLine; -import htsjdk.variant.vcf.VCFStandardHeaderLines; -import htsjdk.variant.vcf.VCFUtils; +import htsjdk.variant.vcf.*; import java.nio.file.Path; import org.broadinstitute.barclay.argparser.Argument; @@ -404,6 +400,18 @@ public final class SelectVariants extends VariantWalker { @Argument(fullName="set-filtered-gt-to-nocall", optional=true, doc="Set filtered genotypes to no-call") private boolean setFilteredGenotypesToNocall = false; + /** + * Info annotation fields to be dropped + */ + @Argument(fullName="drop-annotation",shortName = "DA",optional = true, doc="Set info fields to drop from output vcf") + private List infoFieldsToDrop=new ArrayList<>(); + + /** + * Genotype annotation fields to be dropped + */ + @Argument(fullName="drop-genotype-annotation",shortName = "DGA",optional = true, doc="Set genotype annotations to drop from output vcf") + private List genotypeAnnotationsToDrop=new ArrayList<>(); + @Hidden @Argument(fullName="allow-nonoverlapping-command-line-samples", optional=true, doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.") @@ -508,6 +516,11 @@ public void onTraversalStart() { actualLines = headerLines; } } + if(!infoFieldsToDrop.isEmpty()) { + for(String infoField : infoFieldsToDrop) { + logger.info("Will drop info field: "+infoField); + } + } vcfWriter = createVCFWriter(outFile); vcfWriter.writeHeader(new VCFHeader(actualLines, samples)); @@ -569,6 +582,28 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext } final VariantContext filteredGenotypeToNocall = setFilteredGenotypesToNocall ? builder.make(): sub; + final VariantContextBuilder rmAnnotationsBuilder=new VariantContextBuilder(filteredGenotypeToNocall); + + for (String infoField : infoFieldsToDrop) { + rmAnnotationsBuilder.rmAttribute(infoField); + } + + ArrayList genotypesToWrite=new ArrayList<>(); + for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { + final GenotypeBuilder genotypeBuilder=new GenotypeBuilder(genotype).noAttributes(); + Map attributes=genotype.getExtendedAttributes(); + for(String genotypeAnnotation : genotypeAnnotationsToDrop) { + if (attributes.containsKey(genotypeAnnotation)) { + attributes.remove(genotypeAnnotation); + } + } + genotypeBuilder.attributes(attributes); + genotypesToWrite.add(genotypeBuilder.make()); + } + rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); + final VariantContext variantContextToWrite=rmAnnotationsBuilder.make(); + + // Not excluding non-variants OR (subsetted polymorphic variants AND not spanning deletion) AND (including filtered loci OR subsetted variant) is not filtered // If exclude non-variants argument is not called, filtering will NOT occur. // If exclude non-variants is called, and a spanning deletion exists, the spanning deletion will be filtered @@ -596,7 +631,7 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext if (!failedJexlMatch && (!selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(filteredGenotypeToNocall); + vcfWriter.add(variantContextToWrite); } } } @@ -780,6 +815,22 @@ private Set createVCFHeaderLineList(Map vcfHea headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); + //remove header lines for info field and genotype annotations being dropped + List headerLinesToRemove=new ArrayList<>(); + List infoHeaderLines = headerLines.stream().filter(l -> l instanceof VCFInfoHeaderLine).map(l->(VCFInfoHeaderLine)l).collect(Collectors.toList()); + for (VCFInfoHeaderLine infoHeaderLine : infoHeaderLines) { + if (infoFieldsToDrop.contains(infoHeaderLine.getID())) { + headerLinesToRemove.add(infoHeaderLine); + } + } + List formatHeaderLines = headerLines.stream().filter(l -> l instanceof VCFFormatHeaderLine).map(l->(VCFFormatHeaderLine)l).collect(Collectors.toList()); + for (VCFFormatHeaderLine formatHeaderLine : formatHeaderLines) { + if (genotypeAnnotationsToDrop.contains(formatHeaderLine.getID())) { + headerLinesToRemove.add(formatHeaderLine); + } + } + headerLines.removeAll(headerLinesToRemove); + return headerLines; } From 04d5078c27f17f85342819441ffcd140126d78f1 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 12:26:06 -0400 Subject: [PATCH 02/11] adding tests --- .../walkers/variantutils/SelectVariants.java | 2 +- .../SelectVariantsIntegrationTest.java | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index f88c15de578..2f3fd948595 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -591,7 +591,7 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext ArrayList genotypesToWrite=new ArrayList<>(); for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { final GenotypeBuilder genotypeBuilder=new GenotypeBuilder(genotype).noAttributes(); - Map attributes=genotype.getExtendedAttributes(); + Map attributes=new HashMap<>(genotype.getExtendedAttributes()); for(String genotypeAnnotation : genotypeAnnotationsToDrop) { if (attributes.containsKey(genotypeAnnotation)) { attributes.remove(genotypeAnnotation); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index 0539645dac7..b3c361fcf02 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -877,4 +877,27 @@ public void testSetFilteredGtoNocallUpdateInfo() throws IOException { spec.executeTest("testSetFilteredGtoNocallUpdateInfo--" + testFile, this); } + + @DataProvider(name="dropAnnotationsDataProvider") + Object[][] dropAnnotationsDataProvider() { + return new Object[][] { + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894","testSelectVariants_DropAnnotations.vcf","standard"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -DA NotAnAnnotation -DGA AlsoNotAnAnnotation","testSelectVariants_DropAnnotations.vcf","unused_annotations"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'FisherStrand > 10.0'","testSelectVariants_DropAnnotationsSelectFisherStrand.vcf","select_on_dropped_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'RMSMAPQ > 175.0'","testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf","select_on_kept_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getExtendedAttribute(\"RD\")>6'","testSelectVariants_DropAnnotationsSelectRD.vcf","select_on_dropped_genotype_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getGQ()==1'","testSelectVariants_DropAnnotationsSelectGQ.vcf","select_on_kept_genotype_annotation"} + }; + } + + @Test(dataProvider = "dropAnnotationsDataProvider") + public void testDropAnnotations(String args, String expectedFile,String name) throws IOException { + final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; + + final IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(args, testFile), + Collections.singletonList(getToolTestDataDir() + "expected/" + expectedFile) + ); + spec.executeTest("testDropAnnotations--"+name, this); + } } From 4b24b513b82eb9292f3330fceac134255d64d298 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 12:30:04 -0400 Subject: [PATCH 03/11] adding expected vcfs for tests --- .../testSelectVariants_DropAnnotations.vcf | 33 +++++++++++++++++++ ...ants_DropAnnotationsSelectFisherStrand.vcf | 26 +++++++++++++++ ...SelectVariants_DropAnnotationsSelectGQ.vcf | 25 ++++++++++++++ ...SelectVariants_DropAnnotationsSelectRD.vcf | 25 ++++++++++++++ ...tVariants_DropAnnotationsSelectRMSMAPQ.vcf | 25 ++++++++++++++ 5 files changed, 134 insertions(+) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotations.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectFisherStrand.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectGQ.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRD.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotations.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotations.vcf new file mode 100644 index 00000000000..0335ab6d3e7 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotations.vcf @@ -0,0 +1,33 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 +1 10020400 . C T 30.66 . AC=1;AF=0.500;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.72;DoC=193;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=177.45;SB=-0.02;SpanningDeletions=0 GT:GQ 0/1:1 +1 10020408 . C A 57.15 . AC=1;AF=0.500;AFrange=0.01-0.11,95%;AN=2;AlleleBalance=0.73;DoC=179;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=174.11;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020416 . G A,T 40.12 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.73;DoC=166;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020436 . A T 64.57 . AC=1;AF=0.500;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.73;DoC=168;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=170.66;SB=-1.53;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020439 . G A,T 57.80 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.13,95%;AN=2;AlleleBalance=0.73;DoC=156;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=167.02;SB=-0.33;SpanningDeletions=0 GT:GQ 1/0:2 +1 10020447 . C T 68.03 . AC=1;AF=0.500;AFrange=0.01-0.14,95%;AN=2;AlleleBalance=0.72;DoC=140;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=166.27;SB=-0.62;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020452 . T C 32.71 . AC=1;AF=0.500;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.70;DoC=138;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=167.20;SB=-0.03;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020453 . G A,T 48.53 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.70;DoC=133;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:2 +1 10020464 . G T 74.83 . AC=1;AF=0.500;AFrange=0.01-0.13,95%;AN=2;AlleleBalance=0.74;DoC=152;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ 0/1:3 +1 10020470 . A G,T 91.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.13,95%;AN=2;AlleleBalance=0.70;DoC=182;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020484 . A C,T 55.89 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.76;DoC=239;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020485 . G A,T 32.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.08,95%;AN=2;AlleleBalance=0.75;DoC=237;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020492 . T A,G 44.35 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.68;DoC=284;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020615 . C T,A 162.10 . AC=0,0;AF=0.00,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.72;DoC=285;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ 0/0:2 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectFisherStrand.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectFisherStrand.vcf new file mode 100644 index 00000000000..d9b7cac368c --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectFisherStrand.vcf @@ -0,0 +1,26 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 +1 10020408 . C A 57.15 . AC=1;AF=0.500;AFrange=0.01-0.11,95%;AN=2;AlleleBalance=0.73;DoC=179;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=174.11;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020416 . G A,T 40.12 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.73;DoC=166;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020453 . G A,T 48.53 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.70;DoC=133;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=168.40;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:2 +1 10020464 . G T 74.83 . AC=1;AF=0.500;AFrange=0.01-0.13,95%;AN=2;AlleleBalance=0.74;DoC=152;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=170.06;SB=-0.04;SpanningDeletions=0 GT:GQ 0/1:3 +1 10020470 . A G,T 91.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.13,95%;AN=2;AlleleBalance=0.70;DoC=182;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=174.37;SB=-0.25;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020484 . A C,T 55.89 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.76;DoC=239;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020485 . G A,T 32.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.08,95%;AN=2;AlleleBalance=0.75;DoC=237;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectGQ.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectGQ.vcf new file mode 100644 index 00000000000..15f165109e5 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectGQ.vcf @@ -0,0 +1,25 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 +1 10020400 . C T 30.66 . AC=1;AF=0.500;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.72;DoC=193;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=177.45;SB=-0.02;SpanningDeletions=0 GT:GQ 0/1:1 +1 10020408 . C A 57.15 . AC=1;AF=0.500;AFrange=0.01-0.11,95%;AN=2;AlleleBalance=0.73;DoC=179;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=174.11;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020416 . G A,T 40.12 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.73;DoC=166;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020452 . T C 32.71 . AC=1;AF=0.500;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.70;DoC=138;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=167.20;SB=-0.03;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020485 . G A,T 32.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.08,95%;AN=2;AlleleBalance=0.75;DoC=237;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020492 . T A,G 44.35 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.68;DoC=284;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ 1/0:1 \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRD.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRD.vcf new file mode 100644 index 00000000000..30269d60ac6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRD.vcf @@ -0,0 +1,25 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 +1 10020408 . C A 57.15 . AC=1;AF=0.500;AFrange=0.01-0.11,95%;AN=2;AlleleBalance=0.73;DoC=179;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=174.11;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020416 . G A,T 40.12 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.73;DoC=166;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020436 . A T 64.57 . AC=1;AF=0.500;AFrange=0.01-0.12,95%;AN=2;AlleleBalance=0.73;DoC=168;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=170.66;SB=-1.53;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020484 . A C,T 55.89 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.76;DoC=239;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020485 . G A,T 32.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.08,95%;AN=2;AlleleBalance=0.75;DoC=237;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020492 . T A,G 44.35 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.68;DoC=284;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ 1/0:1 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf new file mode 100644 index 00000000000..28f8f835b54 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf @@ -0,0 +1,25 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=UnifiedGenotyper +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA11894 +1 10020400 . C T 30.66 . AC=1;AF=0.500;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.72;DoC=193;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=177.45;SB=-0.02;SpanningDeletions=0 GT:GQ 0/1:1 +1 10020416 . G A,T 40.12 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.73;DoC=166;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=176.69;SB=-0.01;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020484 . A C,T 55.89 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.76;DoC=239;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=181.59;SB=-0.03;SpanningDeletions=0 GT:GQ 0/1:2 +1 10020485 . G A,T 32.66 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.08,95%;AN=2;AlleleBalance=0.75;DoC=237;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=180.16;SB=-0.02;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020492 . T A,G 44.35 . AC=1,0;AF=0.500,0.00;AFrange=0.01-0.09,95%;AN=2;AlleleBalance=0.68;DoC=284;HomopolymerRun=1;MAPQ0=0;NS=60;RMSMAPQ=184.59;SB=-0.04;SpanningDeletions=0 GT:GQ 1/0:1 +1 10020615 . C T,A 162.10 . AC=0,0;AF=0.00,0.00;AFrange=0.01-0.10,95%;AN=2;AlleleBalance=0.72;DoC=285;HomopolymerRun=0;MAPQ0=0;NS=60;RMSMAPQ=195.54;SB=-67.56;SpanningDeletions=0 GT:GQ 0/0:2 From e118a0d6bcbe7516f6e4b74e51c0547417e3453d Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 14:08:06 -0400 Subject: [PATCH 04/11] removing unneeded added imports from VariantWalker --- .../org/broadinstitute/hellbender/engine/VariantWalker.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java index d5c31eca433..443480c692d 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java @@ -3,12 +3,10 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.exceptions.GATKException; -import java.util.Set; import java.util.Spliterator; /** From 31771d3d474d50a2e0b4fd71b0c6d3c4f43dc539 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 14:12:24 -0400 Subject: [PATCH 05/11] aesthetics --- .../walkers/variantutils/SelectVariants.java | 34 +++++++++---------- .../SelectVariantsIntegrationTest.java | 20 +++++------ 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index 2f3fd948595..9e33b452df5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -403,14 +403,14 @@ public final class SelectVariants extends VariantWalker { /** * Info annotation fields to be dropped */ - @Argument(fullName="drop-annotation",shortName = "DA",optional = true, doc="Set info fields to drop from output vcf") - private List infoFieldsToDrop=new ArrayList<>(); + @Argument(fullName = "drop-annotation", shortName = "DA", optional = true, doc = "Set info fields to drop from output vcf") + private List infoFieldsToDrop = new ArrayList<>(); /** * Genotype annotation fields to be dropped */ - @Argument(fullName="drop-genotype-annotation",shortName = "DGA",optional = true, doc="Set genotype annotations to drop from output vcf") - private List genotypeAnnotationsToDrop=new ArrayList<>(); + @Argument(fullName = "drop-genotype-annotation", shortName = "DGA", optional = true, doc = "Set genotype annotations to drop from output vcf") + private List genotypeAnnotationsToDrop = new ArrayList<>(); @Hidden @Argument(fullName="allow-nonoverlapping-command-line-samples", optional=true, @@ -516,9 +516,9 @@ public void onTraversalStart() { actualLines = headerLines; } } - if(!infoFieldsToDrop.isEmpty()) { - for(String infoField : infoFieldsToDrop) { - logger.info("Will drop info field: "+infoField); + if (!infoFieldsToDrop.isEmpty()) { + for (String infoField : infoFieldsToDrop) { + logger.info("Will drop info field: " + infoField); } } @@ -582,17 +582,16 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext } final VariantContext filteredGenotypeToNocall = setFilteredGenotypesToNocall ? builder.make(): sub; - final VariantContextBuilder rmAnnotationsBuilder=new VariantContextBuilder(filteredGenotypeToNocall); - + final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(filteredGenotypeToNocall); for (String infoField : infoFieldsToDrop) { rmAnnotationsBuilder.rmAttribute(infoField); } - ArrayList genotypesToWrite=new ArrayList<>(); + ArrayList genotypesToWrite = new ArrayList<>(); for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { - final GenotypeBuilder genotypeBuilder=new GenotypeBuilder(genotype).noAttributes(); - Map attributes=new HashMap<>(genotype.getExtendedAttributes()); - for(String genotypeAnnotation : genotypeAnnotationsToDrop) { + final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); + Map attributes = new HashMap<>(genotype.getExtendedAttributes()); + for (String genotypeAnnotation : genotypeAnnotationsToDrop) { if (attributes.containsKey(genotypeAnnotation)) { attributes.remove(genotypeAnnotation); } @@ -601,8 +600,7 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext genotypesToWrite.add(genotypeBuilder.make()); } rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); - final VariantContext variantContextToWrite=rmAnnotationsBuilder.make(); - + final VariantContext variantContextToWrite = rmAnnotationsBuilder.make(); // Not excluding non-variants OR (subsetted polymorphic variants AND not spanning deletion) AND (including filtered loci OR subsetted variant) is not filtered // If exclude non-variants argument is not called, filtering will NOT occur. @@ -816,14 +814,14 @@ private Set createVCFHeaderLineList(Map vcfHea headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); //remove header lines for info field and genotype annotations being dropped - List headerLinesToRemove=new ArrayList<>(); - List infoHeaderLines = headerLines.stream().filter(l -> l instanceof VCFInfoHeaderLine).map(l->(VCFInfoHeaderLine)l).collect(Collectors.toList()); + List headerLinesToRemove = new ArrayList<>(); + List infoHeaderLines = headerLines.stream().filter(l -> l instanceof VCFInfoHeaderLine).map(l -> (VCFInfoHeaderLine) l).collect(Collectors.toList()); for (VCFInfoHeaderLine infoHeaderLine : infoHeaderLines) { if (infoFieldsToDrop.contains(infoHeaderLine.getID())) { headerLinesToRemove.add(infoHeaderLine); } } - List formatHeaderLines = headerLines.stream().filter(l -> l instanceof VCFFormatHeaderLine).map(l->(VCFFormatHeaderLine)l).collect(Collectors.toList()); + List formatHeaderLines = headerLines.stream().filter(l -> l instanceof VCFFormatHeaderLine).map(l -> (VCFFormatHeaderLine) l).collect(Collectors.toList()); for (VCFFormatHeaderLine formatHeaderLine : formatHeaderLines) { if (genotypeAnnotationsToDrop.contains(formatHeaderLine.getID())) { headerLinesToRemove.add(formatHeaderLine); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index b3c361fcf02..8502e3f1af2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -878,26 +878,26 @@ public void testSetFilteredGtoNocallUpdateInfo() throws IOException { spec.executeTest("testSetFilteredGtoNocallUpdateInfo--" + testFile, this); } - @DataProvider(name="dropAnnotationsDataProvider") + @DataProvider(name = "dropAnnotationsDataProvider") Object[][] dropAnnotationsDataProvider() { - return new Object[][] { - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894","testSelectVariants_DropAnnotations.vcf","standard"}, - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -DA NotAnAnnotation -DGA AlsoNotAnAnnotation","testSelectVariants_DropAnnotations.vcf","unused_annotations"}, - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'FisherStrand > 10.0'","testSelectVariants_DropAnnotationsSelectFisherStrand.vcf","select_on_dropped_annotation"}, - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'RMSMAPQ > 175.0'","testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf","select_on_kept_annotation"}, - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getExtendedAttribute(\"RD\")>6'","testSelectVariants_DropAnnotationsSelectRD.vcf","select_on_dropped_genotype_annotation"}, - {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getGQ()==1'","testSelectVariants_DropAnnotationsSelectGQ.vcf","select_on_kept_genotype_annotation"} + return new Object[][]{ + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894", "testSelectVariants_DropAnnotations.vcf", "standard"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -DA NotAnAnnotation -DGA AlsoNotAnAnnotation", "testSelectVariants_DropAnnotations.vcf", "unused_annotations"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'FisherStrand > 10.0'", "testSelectVariants_DropAnnotationsSelectFisherStrand.vcf", "select_on_dropped_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'RMSMAPQ > 175.0'", "testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf", "select_on_kept_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getExtendedAttribute(\"RD\")>6'", "testSelectVariants_DropAnnotationsSelectRD.vcf", "select_on_dropped_genotype_annotation"}, + {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getGQ()==1'", "testSelectVariants_DropAnnotationsSelectGQ.vcf", "select_on_kept_genotype_annotation"} }; } @Test(dataProvider = "dropAnnotationsDataProvider") - public void testDropAnnotations(String args, String expectedFile,String name) throws IOException { + public void testDropAnnotations(String args, String expectedFile, String name) throws IOException { final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; final IntegrationTestSpec spec = new IntegrationTestSpec( baseTestString(args, testFile), Collections.singletonList(getToolTestDataDir() + "expected/" + expectedFile) ); - spec.executeTest("testDropAnnotations--"+name, this); + spec.executeTest("testDropAnnotations--" + name, this); } } From 349cbd76024331d427ceaddbcbbf72bb7d41b449 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 14:20:46 -0400 Subject: [PATCH 06/11] adding message of genotype annotations which will be dropped --- .../tools/walkers/variantutils/SelectVariants.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index 9e33b452df5..e3fce8b05ad 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -518,7 +518,12 @@ public void onTraversalStart() { } if (!infoFieldsToDrop.isEmpty()) { for (String infoField : infoFieldsToDrop) { - logger.info("Will drop info field: " + infoField); + logger.info("Will drop info annotation: " + infoField); + } + } + if (!genotypeAnnotationsToDrop.isEmpty()) { + for (String genotypeAnnotation : genotypeAnnotationsToDrop) { + logger.info("Will drop genotype annotation: " + genotypeAnnotation); } } From 707968314bf165019ed0d6234437b1fbc8b7211f Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 3 Oct 2018 14:31:47 -0400 Subject: [PATCH 07/11] don't need to check for key before removing from hashmap --- .../hellbender/tools/walkers/variantutils/SelectVariants.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index e3fce8b05ad..0121b6498e3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -597,9 +597,7 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); Map attributes = new HashMap<>(genotype.getExtendedAttributes()); for (String genotypeAnnotation : genotypeAnnotationsToDrop) { - if (attributes.containsKey(genotypeAnnotation)) { attributes.remove(genotypeAnnotation); - } } genotypeBuilder.attributes(attributes); genotypesToWrite.add(genotypeBuilder.make()); From 31492398ba3f4989d77c7b7584bdddb4b42b0f35 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 4 Oct 2018 10:27:31 -0400 Subject: [PATCH 08/11] requested changes --- .../walkers/variantutils/SelectVariants.java | 68 ++++++++----------- .../SelectVariantsIntegrationTest.java | 4 +- 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index 0121b6498e3..ec67d8518b4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -403,13 +403,13 @@ public final class SelectVariants extends VariantWalker { /** * Info annotation fields to be dropped */ - @Argument(fullName = "drop-annotation", shortName = "DA", optional = true, doc = "Set info fields to drop from output vcf") - private List infoFieldsToDrop = new ArrayList<>(); + @Argument(fullName = "drop-info-annotation", shortName = "DA", optional = true, doc = "Info annotations to drop from output vcf") + private List infoAnnotationsToDrop = new ArrayList<>(); /** * Genotype annotation fields to be dropped */ - @Argument(fullName = "drop-genotype-annotation", shortName = "DGA", optional = true, doc = "Set genotype annotations to drop from output vcf") + @Argument(fullName = "drop-genotype-annotation", shortName = "DGA", optional = true, doc = "Genotype annotations to drop from output vcf") private List genotypeAnnotationsToDrop = new ArrayList<>(); @Hidden @@ -516,14 +516,14 @@ public void onTraversalStart() { actualLines = headerLines; } } - if (!infoFieldsToDrop.isEmpty()) { - for (String infoField : infoFieldsToDrop) { - logger.info("Will drop info annotation: " + infoField); + if (!infoAnnotationsToDrop.isEmpty()) { + for (final String infoField : infoAnnotationsToDrop) { + logger.info(String.format("Will drop info annotation: %s",infoField)); } } if (!genotypeAnnotationsToDrop.isEmpty()) { - for (String genotypeAnnotation : genotypeAnnotationsToDrop) { - logger.info("Will drop genotype annotation: " + genotypeAnnotation); + for (final String genotypeAnnotation : genotypeAnnotationsToDrop) { + logger.info(String.format("Will drop genotype annotation: %s",genotypeAnnotation)); } } @@ -587,24 +587,6 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext } final VariantContext filteredGenotypeToNocall = setFilteredGenotypesToNocall ? builder.make(): sub; - final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(filteredGenotypeToNocall); - for (String infoField : infoFieldsToDrop) { - rmAnnotationsBuilder.rmAttribute(infoField); - } - - ArrayList genotypesToWrite = new ArrayList<>(); - for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { - final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); - Map attributes = new HashMap<>(genotype.getExtendedAttributes()); - for (String genotypeAnnotation : genotypeAnnotationsToDrop) { - attributes.remove(genotypeAnnotation); - } - genotypeBuilder.attributes(attributes); - genotypesToWrite.add(genotypeBuilder.make()); - } - rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); - final VariantContext variantContextToWrite = rmAnnotationsBuilder.make(); - // Not excluding non-variants OR (subsetted polymorphic variants AND not spanning deletion) AND (including filtered loci OR subsetted variant) is not filtered // If exclude non-variants argument is not called, filtering will NOT occur. // If exclude non-variants is called, and a spanning deletion exists, the spanning deletion will be filtered @@ -632,6 +614,24 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext if (!failedJexlMatch && (!selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom)) { + //remove annotations being dropped and write variantcontext + final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(filteredGenotypeToNocall); + for (String infoField : infoAnnotationsToDrop) { + rmAnnotationsBuilder.rmAttribute(infoField); + } + + ArrayList genotypesToWrite = new ArrayList<>(); + for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { + final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); + Map attributes = new HashMap<>(genotype.getExtendedAttributes()); + for (String genotypeAnnotation : genotypeAnnotationsToDrop) { + attributes.remove(genotypeAnnotation); + } + genotypeBuilder.attributes(attributes); + genotypesToWrite.add(genotypeBuilder.make()); + } + rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); + final VariantContext variantContextToWrite = rmAnnotationsBuilder.make(); vcfWriter.add(variantContextToWrite); } } @@ -817,20 +817,8 @@ private Set createVCFHeaderLineList(Map vcfHea headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); //remove header lines for info field and genotype annotations being dropped - List headerLinesToRemove = new ArrayList<>(); - List infoHeaderLines = headerLines.stream().filter(l -> l instanceof VCFInfoHeaderLine).map(l -> (VCFInfoHeaderLine) l).collect(Collectors.toList()); - for (VCFInfoHeaderLine infoHeaderLine : infoHeaderLines) { - if (infoFieldsToDrop.contains(infoHeaderLine.getID())) { - headerLinesToRemove.add(infoHeaderLine); - } - } - List formatHeaderLines = headerLines.stream().filter(l -> l instanceof VCFFormatHeaderLine).map(l -> (VCFFormatHeaderLine) l).collect(Collectors.toList()); - for (VCFFormatHeaderLine formatHeaderLine : formatHeaderLines) { - if (genotypeAnnotationsToDrop.contains(formatHeaderLine.getID())) { - headerLinesToRemove.add(formatHeaderLine); - } - } - headerLines.removeAll(headerLinesToRemove); + headerLines.removeIf(l->l instanceof VCFInfoHeaderLine && infoAnnotationsToDrop.contains(((VCFInfoHeaderLine)l).getID())); + headerLines.removeIf(l->l instanceof VCFFormatHeaderLine && genotypeAnnotationsToDrop.contains(((VCFFormatHeaderLine)l).getID())); return headerLines; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index 8502e3f1af2..19946f12a56 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -891,13 +891,13 @@ Object[][] dropAnnotationsDataProvider() { } @Test(dataProvider = "dropAnnotationsDataProvider") - public void testDropAnnotations(String args, String expectedFile, String name) throws IOException { + public void testDropAnnotations(String args, String expectedFile, String testName) throws IOException { final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; final IntegrationTestSpec spec = new IntegrationTestSpec( baseTestString(args, testFile), Collections.singletonList(getToolTestDataDir() + "expected/" + expectedFile) ); - spec.executeTest("testDropAnnotations--" + name, this); + spec.executeTest("testDropAnnotations--" + testName, this); } } From c68a4766432ef8cd624b769f9902ac01657a8194 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 4 Oct 2018 10:30:16 -0400 Subject: [PATCH 09/11] a few missing finals --- .../hellbender/tools/walkers/variantutils/SelectVariants.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index ec67d8518b4..81b6e5ca096 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -620,10 +620,10 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext rmAnnotationsBuilder.rmAttribute(infoField); } - ArrayList genotypesToWrite = new ArrayList<>(); + final ArrayList genotypesToWrite = new ArrayList<>(); for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); - Map attributes = new HashMap<>(genotype.getExtendedAttributes()); + final Map attributes = new HashMap<>(genotype.getExtendedAttributes()); for (String genotypeAnnotation : genotypeAnnotationsToDrop) { attributes.remove(genotypeAnnotation); } From 9da7c072a090c3453b541d2fd5661c47aa4ea0cf Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Fri, 12 Oct 2018 15:34:57 -0400 Subject: [PATCH 10/11] requested performance change --- .../walkers/variantutils/SelectVariants.java | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index 81b6e5ca096..17042442545 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -615,26 +615,34 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext if (!failedJexlMatch && (!selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom)) { //remove annotations being dropped and write variantcontext - final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(filteredGenotypeToNocall); - for (String infoField : infoAnnotationsToDrop) { - rmAnnotationsBuilder.rmAttribute(infoField); - } + final VariantContext variantContextToWrite = buildVariantContextWithDroppedAnnotationsRemoved(filteredGenotypeToNocall); + vcfWriter.add(variantContextToWrite); + } + } + } - final ArrayList genotypesToWrite = new ArrayList<>(); - for (Genotype genotype : filteredGenotypeToNocall.getGenotypes()) { - final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); - final Map attributes = new HashMap<>(genotype.getExtendedAttributes()); - for (String genotypeAnnotation : genotypeAnnotationsToDrop) { - attributes.remove(genotypeAnnotation); - } - genotypeBuilder.attributes(attributes); - genotypesToWrite.add(genotypeBuilder.make()); + private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) { + if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) { + return vc; + } + final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc); + for (String infoField : infoAnnotationsToDrop) { + rmAnnotationsBuilder.rmAttribute(infoField); + } + if (!genotypeAnnotationsToDrop.isEmpty()) { + final ArrayList genotypesToWrite = new ArrayList<>(); + for (Genotype genotype : vc.getGenotypes()) { + final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); + final Map attributes = new HashMap<>(genotype.getExtendedAttributes()); + for (String genotypeAnnotation : genotypeAnnotationsToDrop) { + attributes.remove(genotypeAnnotation); } - rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); - final VariantContext variantContextToWrite = rmAnnotationsBuilder.make(); - vcfWriter.add(variantContextToWrite); + genotypeBuilder.attributes(attributes); + genotypesToWrite.add(genotypeBuilder.make()); } + rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); } + return rmAnnotationsBuilder.make(); } private boolean checkOnlySpanDel(VariantContext vc){ From 4ecf3f5db12098814ad88a38fee07a55859d2595 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Tue, 16 Oct 2018 11:44:03 -0400 Subject: [PATCH 11/11] doc improvements requested by Laura --- .../tools/walkers/variantutils/SelectVariants.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java index 17042442545..bd23811dfb9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants.java @@ -401,15 +401,15 @@ public final class SelectVariants extends VariantWalker { private boolean setFilteredGenotypesToNocall = false; /** - * Info annotation fields to be dropped + * Info annotation fields to be dropped (specified by key) */ - @Argument(fullName = "drop-info-annotation", shortName = "DA", optional = true, doc = "Info annotations to drop from output vcf") + @Argument(fullName = "drop-info-annotation", shortName = "DA", optional = true, doc = "Info annotations to drop from output vcf. Annotations to be dropped are specified by their key.") private List infoAnnotationsToDrop = new ArrayList<>(); /** - * Genotype annotation fields to be dropped + * Genotype annotation fields to be dropped (specified by key) */ - @Argument(fullName = "drop-genotype-annotation", shortName = "DGA", optional = true, doc = "Genotype annotations to drop from output vcf") + @Argument(fullName = "drop-genotype-annotation", shortName = "DGA", optional = true, doc = "Genotype annotations to drop from output vcf. Annotations to be dropped are specified by their key.") private List genotypeAnnotationsToDrop = new ArrayList<>(); @Hidden