diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 7f3e16862c6..62262c91a77 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -458,6 +458,11 @@ private AssemblyResult createGraph(final Iterable reads, // actually build the read threading graph rtgraph.buildGraphIfNecessary(); + // It's important to prune before recovering dangling ends so that we don't waste time recovering bad ends. + // It's also important to prune before checking for cycles so that sequencing errors don't create false cycles + // and unnecessarily abort assembly + chainPruner.pruneLowWeightChains(rtgraph); + // sanity check: make sure there are no cycles in the graph if ( rtgraph.hasCycles() ) { if ( debug ) { @@ -480,9 +485,6 @@ private AssemblyResult createGraph(final Iterable reads, private AssemblyResult getAssemblyResult(final Haplotype refHaplotype, final int kmerSize, final ReadThreadingGraph rtgraph, final SmithWatermanAligner aligner) { printDebugGraphTransform(rtgraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.0.raw_readthreading_graph.dot"); - // Prune before recovering dangling ends because the latter is expensive - chainPruner.pruneLowWeightChains(rtgraph); - // look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if // we can recover them by merging some N bases from the chain back into the reference if ( recoverDanglingBranches ) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d966eb393e5..e6042176622 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -14,6 +14,7 @@ import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.engine.AssemblyRegionWalker; import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.engine.ReadsDataSource; @@ -1016,6 +1017,28 @@ public void test100PercentContaminationNoCallsInGVCFMode() throws Exception { } } + // check that a single, errorful, read that induces a cycle does not cause an assembly region to lose a real variant + @Test + public void testPrunedCycle() throws Exception { + final File output = createTempFile("output", ".vcf"); + + final String[] args = { + "-I", TEST_FILES_DIR + "pruned_cycle.bam", + "-R", b37Reference, + "-L", "1:169510380", + "--" + IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, "100", + "-O", output.getAbsolutePath() + }; + runCommandLine(args); + + final Optional het = VariantContextTestUtils.streamVcf(output) + .filter(vc -> vc.getStart() == 169510380) + .findFirst(); + + Assert.assertTrue(het.isPresent()); + Assert.assertTrue(het.get().getGenotype(0).getAD()[1] > 100); + } + @DataProvider public Object[][] getMaxAlternateAllelesData() { return new Object[][] { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 28b07023c7a..d5ed6889367 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -634,7 +634,7 @@ public void testMitochondria() throws Exception { "chrM:750-750 A*, [G]"); Assert.assertTrue(expectedKeys.stream().allMatch(variantKeys::contains)); - Assert.assertEquals(variants.get(0).getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0), 1513); + Assert.assertEquals(variants.get(0).getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0), 1517); Assert.assertEquals(variants.get(0).getGenotype("NA12878").getAnyAttribute(GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY), "true"); } @@ -693,7 +693,7 @@ public void testMitochondrialRefConf() throws Exception { final List variantKeys = new ArrayList<>(variantMap.keySet()); final List expectedKeys = Arrays.asList( - "chrM:152-152 T*, [, C]", + "chrM:152-153 TA*, [, CA, CC]", "chrM:263-263 A*, [, G]", "chrM:297-297 A*, [, AC, C]", //alt alleles get sorted when converted to keys //"chrM:301-301 A*, [, AC, ACC]", diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam new file mode 100644 index 00000000000..a596b3809a3 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam.bai b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam.bai new file mode 100644 index 00000000000..39fa15f8a25 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/pruned_cycle.bam.bai differ