From a08e467340e6ca0c6aa0c37d1c1a205692b3d6f8 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 21 Nov 2018 11:46:40 -0500 Subject: [PATCH 1/2] fixed M2 bug for germline resources with AF=. --- .../tools/walkers/SplitIntervals.java | 2 +- .../mutect/GermlineProbabilityCalculator.java | 8 +++---- .../tools/walkers/mutect/Mutect2Engine.java | 21 +++++++++++++++++-- .../mutect/Mutect2IntegrationTest.java | 18 ++++++++++++++++ 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervals.java index ba7d24057d9..16a49a76286 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/SplitIntervals.java @@ -9,11 +9,11 @@ import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; -import picard.cmdline.programgroups.IntervalsManipulationProgramGroup; import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.param.ParamUtils; +import picard.cmdline.programgroups.IntervalsManipulationProgramGroup; import picard.util.IntervalList.IntervalListScatterMode; import picard.util.IntervalList.IntervalListScatterer; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/GermlineProbabilityCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/GermlineProbabilityCalculator.java index ad447d868b2..cd807389903 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/GermlineProbabilityCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/GermlineProbabilityCalculator.java @@ -45,17 +45,17 @@ public static double[] calculateGermlineProbabilities(final double[] populationA @VisibleForTesting static double[] getGermlineAltAlleleFrequencies(final List altAlleles, final Optional germlineVC, final double afOfAllelesNotInGermlineResource) { - if (germlineVC.isPresent() && germlineVC.get().hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) { - final List germlineAltAFs = germlineVC.get().getAttributeAsDoubleList(VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); + if (germlineVC.isPresent()) { + final List germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC.get(), VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); return altAlleles.stream() .mapToDouble(allele -> { final OptionalInt germlineAltIndex = findAltAlleleIndex(germlineVC.get(), allele); return germlineAltIndex.isPresent() ? germlineAltAFs.get(germlineAltIndex.getAsInt()) : afOfAllelesNotInGermlineResource; }).toArray(); - } else { - return Doubles.toArray(Collections.nCopies(altAlleles.size(), afOfAllelesNotInGermlineResource)); } + + return Doubles.toArray(Collections.nCopies(altAlleles.size(), afOfAllelesNotInGermlineResource)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 0f59e4fa3c6..c552344b722 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -288,8 +288,9 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer } else if (!MTAC.genotypeGermlineSites) { final List germline = featureContext.getValues(MTAC.germlineResource, refInterval); if (!germline.isEmpty()){ - final List germlineAlleleFrequencies = germline.get(0).getAttributeAsDoubleList(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); - if (! germlineAlleleFrequencies.isEmpty() && germlineAlleleFrequencies.get(0) > MTAC.maxPopulationAlleleFrequency) { + final VariantContext germlineVariant = germline.get(0); + final List germlineAlleleFrequencies = getAttributeAsDoubleList(germlineVariant, VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); + if (!germlineAlleleFrequencies.isEmpty() && germlineAlleleFrequencies.get(0) > MTAC.maxPopulationAlleleFrequency) { return new ActivityProfileState(refInterval, 0.0); } } @@ -302,6 +303,22 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer return new ActivityProfileState( refInterval, 1.0, ActivityProfileState.Type.NONE, null); } + // NOTE: this is a hack to get around an htsjdk bug: https://github.com/samtools/htsjdk/issues/1228 + // htsjdk doesn't correctly detect the missing value string '.', so we have copied and fixed the htsjdk code + public static List getAttributeAsDoubleList(final VariantContext vc, final String key, final double defaultValue) { + return vc.getCommonInfo().getAttributeAsList(key).stream() + .map(x -> { + if (x == null) { + return defaultValue; + } else if (x instanceof Number) { + return ((Number) x).doubleValue(); + } else { + String string = (String) x; + return string.equals(VCFConstants.MISSING_VALUE_v4) ? defaultValue : Double.valueOf(string); // throws an exception if this isn't a string + } + }).collect(Collectors.toList()); + } + private static int getCurrentOrFollowingIndelLength(final PileupElement pe) { return pe.isDeletion() ? pe.getCurrentCigarElement().getLength() : pe.getLengthOfImmediatelyFollowingIndel(); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index d5c26219342..85257ac0b8f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -58,6 +58,8 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest { private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam"); private static final String DEEP_MITO_SAMPLE_NAME = "mixture"; + private static final File GNOMAD_WITHOUT_AF_SNIPPET = new File(toolsTestDir, "mutect/gnomad-without-af.vcf"); + /** * Several DREAM challenge bams with synthetic truth data. In order to keep file sizes manageable, bams are restricted * to chromosome 20, leaving ~100-200 variants, and then further restricted to 400-bp intervals centered around @@ -351,6 +353,22 @@ public void testGivenAllelesZeroCoverage() throws Exception { runCommandLine(args); } + // make sure we have fixed a bug where germline resources with AF=. throw errors + @Test + public void testMissingAF() { + final File bam = new File(DREAM_BAMS_DIR, "tumor_4.bam"); + final String sample = "synthetic.challenge.set4.tumour"; + final File unfilteredVcf = createTempFile("unfiltered", ".vcf"); + final List args = Arrays.asList("-I", bam.getAbsolutePath(), + "-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, sample, + "-R", b37_reference_20_21, + "--" + M2ArgumentCollection.GERMLINE_RESOURCE_LONG_NAME, GNOMAD_WITHOUT_AF_SNIPPET.getAbsolutePath(), + "-L", "20:10086110", + "-L", "20:10837425-10837426", + "-O", unfilteredVcf.getAbsolutePath()); + runCommandLine(args); + } + @Test public void testContaminationFilter() throws Exception { Utils.resetRandomGenerator(); From bf39362a9c2962d3192f7d91410e4bc2867687c6 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 21 Nov 2018 12:57:02 -0500 Subject: [PATCH 2/2] whoops, git add --- .../hellbender/tools/mutect/gnomad-without-af.vcf | 6 ++++++ .../tools/mutect/gnomad-without-af.vcf.idx | Bin 0 -> 321 bytes 2 files changed, 6 insertions(+) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf.idx diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf new file mode 100644 index 00000000000..0e30a79b310 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +20 10086110 rs592026 G A 166.95 PASS AC=32788;AF=. +20 10837425 . TC T 305.46 PASS AC=1;AF=. diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/gnomad-without-af.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..2f99f9fe610145c8df8a5617a27ee030a57ef203 GIT binary patch literal 321 zcmWIXbctYOU|?Vd;Fk`|TgFGGmoLzwm5oS31hx&yu j7%_nCH8NmeSisM~4P-I_F$jQYeW;tLMJGYcgZUc(`Vcki literal 0 HcmV?d00001