Skip to content

Commit

Permalink
review edits and a rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Nov 24, 2019
1 parent 54baa9a commit 5ffdb83
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,6 @@
* </pre>
*
* <h3>Caveat</h3>
* <p>This tool will not output every annotation as many cannot be made without computing the per-read AlleleLikelihoods,
* which is generated in either the HaplotypeCaller or Mutect2. </p>
* <p>This tool outputs no annotations by default, all annotations/groups must be specified explicitly. </p>
*
* <h3>Special note on RankSumTestAnnotations</h3>
Expand Down Expand Up @@ -167,7 +165,7 @@ public class VariantAnnotator extends VariantWalker {
protected Boolean expressionAlleleConcordance = false;

/**
* Bases with a quality below this threshold will not be used for calling.
* Bases with a quality below this threshold will not be used for annotation.
*/
@Argument(fullName = AssemblyBasedCallerArgumentCollection.MIN_BASE_QUALITY_SCORE_LONG_NAME, doc = "Minimum base quality required to confidently assign a read to an allele", optional = true)
public byte minBaseQualityScore = 10;
Expand Down Expand Up @@ -235,9 +233,7 @@ public void apply(final VariantContext vc, final ReadsContext readsContext, fina
}

private AlleleLikelihoods<GATKRead, Allele> makeLikelihoods(final VariantContext vc, final ReadsContext readsContext) {
//TODO remove this filter and update the tests, this implementation filters out reads that start in a spanning deleting according to variant context in order to match gatk3,
//TODO this will cause the reads to be assigned and annotated in a different manner than the haplotype caller.
final List<GATKRead> reads = Utils.stream(readsContext).filter(r -> r.getStart() <= vc.getStart()).collect(Collectors.toList());
final List<GATKRead> reads = Utils.stream(readsContext).collect(Collectors.toList());
final AlleleLikelihoods<GATKRead, Allele> result = new AlleleLikelihoods<>(variantSamples,
new IndexedAlleleList<>(vc.getAlleles()), AssemblyBasedCallerUtils.splitReadsBySample(variantSamples, getHeaderForReads(), reads));

Expand All @@ -248,7 +244,7 @@ private AlleleLikelihoods<GATKRead, Allele> makeLikelihoods(final VariantContext
final List<Allele> altAlleles = vc.getAlternateAlleles();
final int numAlleles = vc.getNAlleles();

// manually fill each sample's likelihoods one read at a time, assigning probability 1 to the matching allele, if present
// manually fill each sample's likelihoods one read at a time, assigning probability 1 (0 in log space) to the matching allele, if present
for (final Map.Entry<String, ReadPileup> samplePileups : pileupsBySample.entrySet()) {
final LikelihoodMatrix<GATKRead, Allele> sampleMatrix = result.sampleMatrix(result.indexOfSample(samplePileups.getKey()));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,32 @@
import htsjdk.variant.utils.VCFHeaderReader;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.math3.util.MathArrays;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.Main;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.codehaus.plexus.util.cli.Arg;
import org.reflections.Reflections;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -96,7 +97,7 @@ public void testAgainstMutect2() throws Exception {
final Set<VariantAnnotation> representativeInfoStringAnnotations = ImmutableSet.of(new ReferenceBases());
final Set<VariantAnnotation> representativeInfoBooleanAnnotations = ImmutableSet.of(new TandemRepeat());

final Set<GenotypeAnnotation> representativeGenotypeAnnotations = ImmutableSet.of(new DepthPerAlleleBySample(), new DepthPerSampleHC(), new OrientationBiasReadCounts());
final Set<GenotypeAnnotation> representativeGenotypeAnnotations = ImmutableSet.of(new DepthPerAlleleBySample(), new DepthPerSampleHC(), new OrientationBiasReadCounts(), new AlleleFraction());

// check the header
final Set<String> infoHeaderKeys = getHeaderFromFile(reannotatedVcf).getInfoHeaderLines().stream().map(VCFInfoHeaderLine::getID).collect(Collectors.toSet());
Expand Down Expand Up @@ -155,6 +156,9 @@ public void testAgainstMutect2() throws Exception {
});
}

// VariantAnnotator can't hope to match M2 and HC perfectly since it doesn't go throught the whole process of assembly and realignment.
// The best we can expect is that annotations usually match. Performance as of this writing (GATK 4.1.5.0) is that less than 1 in 40 INFO
// and less than 1 in 15 FORMAT annotations are off
Assert.assertTrue(matchingInfoFieldValues.intValue() > 40 * nonMatchingInfoFieldValues.intValue());
Assert.assertTrue(matchingFormatFieldValues.intValue() > 15 * nonMatchingFormatFieldValues.intValue());
}
Expand Down Expand Up @@ -182,7 +186,7 @@ public void testComp() {
Assert.assertEquals(outputVC.hasAttribute(FOO), unfilteredCompVariants.contains(keyForVariant(outputVC)));
}

// add the input as a comp -- every site shold get this annotation
// add the input as a comp -- every site should get this annotation
final ArgumentsBuilder argsForTwoComps = new ArgumentsBuilder(argsForOneComp.getArgsArray())
.addFileArgument(StandardArgumentDefinitions.COMPARISON_LONG_NAME + ":" + FOO2, inputVCF);

Expand Down Expand Up @@ -309,6 +313,7 @@ public void testNoReads() {
final ArgumentsBuilder args = new ArgumentsBuilder()
.addVCF(inputVCF)
.addOutput(outputVCF)
.addBooleanArgument(StandardArgumentDefinitions.ENABLE_ALL_ANNOTATIONS, true)
.addInterval(new SimpleInterval("20", 1, 1));

runCommandLine(args.getArgsList());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
##fileformat=VCFv4.1
##INFO=<ID=AC,Number=A,Type=Integer,Description="Alt allele counts">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
##source=UnifiedGenotyper
#CHROM POS ID REF ALT QUAL FILTER INFO
20 10002458 . G GTT,GTTT . filter1 AC=2,4
Expand Down
Binary file not shown.

0 comments on commit 5ffdb83

Please sign in to comment.