Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prune assembly graph before checking for cycles #5562

Merged
merged 1 commit into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,11 @@ private AssemblyResult createGraph(final Iterable<GATKRead> reads,
// actually build the read threading graph
rtgraph.buildGraphIfNecessary();

// It's important to prune before recovering dangling ends so that we don't waste time recovering bad ends.
// It's also important to prune before checking for cycles so that sequencing errors don't create false cycles
// and unnecessarily abort assembly
chainPruner.pruneLowWeightChains(rtgraph);

// sanity check: make sure there are no cycles in the graph
if ( rtgraph.hasCycles() ) {
if ( debug ) {
Expand All @@ -480,9 +485,6 @@ private AssemblyResult createGraph(final Iterable<GATKRead> reads,
private AssemblyResult getAssemblyResult(final Haplotype refHaplotype, final int kmerSize, final ReadThreadingGraph rtgraph, final SmithWatermanAligner aligner) {
printDebugGraphTransform(rtgraph, refHaplotype.getLocation() + "-sequenceGraph." + kmerSize + ".0.0.raw_readthreading_graph.dot");

// Prune before recovering dangling ends because the latter is expensive
chainPruner.pruneLowWeightChains(rtgraph);

// look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if
// we can recover them by merging some N bases from the chain back into the reference
if ( recoverDanglingBranches ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.engine.AssemblyRegionWalker;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.ReadsDataSource;
Expand Down Expand Up @@ -1016,6 +1017,28 @@ public void test100PercentContaminationNoCallsInGVCFMode() throws Exception {
}
}

// check that a single, errorful, read that induces a cycle does not cause an assembly region to lose a real variant
@Test
public void testPrunedCycle() throws Exception {
final File output = createTempFile("output", ".vcf");

final String[] args = {
"-I", TEST_FILES_DIR + "pruned_cycle.bam",
"-R", b37Reference,
"-L", "1:169510380",
"--" + IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, "100",
"-O", output.getAbsolutePath()
};
runCommandLine(args);

final Optional<VariantContext> het = VariantContextTestUtils.streamVcf(output)
.filter(vc -> vc.getStart() == 169510380)
.findFirst();

Assert.assertTrue(het.isPresent());
Assert.assertTrue(het.get().getGenotype(0).getAD()[1] > 100);
}

@DataProvider
public Object[][] getMaxAlternateAllelesData() {
return new Object[][] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ public void testMitochondria() throws Exception {
"chrM:750-750 A*, [G]");
Assert.assertTrue(expectedKeys.stream().allMatch(variantKeys::contains));

Assert.assertEquals(variants.get(0).getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0), 1513);
Assert.assertEquals(variants.get(0).getAttributeAsInt(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, 0), 1517);
Assert.assertEquals(variants.get(0).getGenotype("NA12878").getAnyAttribute(GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY), "true");
}

Expand Down Expand Up @@ -693,7 +693,7 @@ public void testMitochondrialRefConf() throws Exception {
final List<String> variantKeys = new ArrayList<>(variantMap.keySet());

final List<String> expectedKeys = Arrays.asList(
"chrM:152-152 T*, [<NON_REF>, C]",
"chrM:152-153 TA*, [<NON_REF>, CA, CC]",
"chrM:263-263 A*, [<NON_REF>, G]",
"chrM:297-297 A*, [<NON_REF>, AC, C]", //alt alleles get sorted when converted to keys
//"chrM:301-301 A*, [<NON_REF>, AC, ACC]",
Expand Down
Binary file not shown.
Binary file not shown.