Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutect2 warns but does not fail when three or more reads have the same name #6240

Merged
merged 1 commit into from
Nov 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public CalledHaplotypes callMutations(
if (MTAC.likelihoodArgs.phredScaledGlobalReadMismappingRate > 0) {
logReadLikelihoods.normalizeLikelihoods(NaturalLogUtils.qualToLogErrorProb(MTAC.likelihoodArgs.phredScaledGlobalReadMismappingRate));
}
final AlleleLikelihoods<Fragment, Haplotype> logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::create);
final AlleleLikelihoods<Fragment, Haplotype> logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::createAndAvoidFailure);

for( final int loc : startPosKeySet ) {
final List<VariantContext> eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, haplotypes, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import htsjdk.samtools.util.Locatable;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

/**
* All available evidence coming from a single biological fragment. Either one read or a read pair.
Expand All @@ -19,6 +22,9 @@ public class Fragment implements Locatable {

private final List<GATKRead> reads;

private static final Logger logger = LogManager.getLogger(Fragment.class);



public Fragment(final GATKRead read) {
reads = Collections.singletonList(read);
Expand All @@ -38,6 +44,21 @@ public static Fragment create(final List<GATKRead> reads) {
return reads.size() == 1 ? new Fragment(reads.get(0)) : new Fragment(ImmutablePair.of(reads.get(0), reads.get(1)));
}

public static Fragment createAndAvoidFailure(final List<GATKRead> reads) {
if (reads.size() <= 2) {
return create(reads);
} else {
final List<GATKRead> nonSupplementaryReads = reads.stream()
.filter(read -> !(read.isDuplicate() || read.isSecondaryAlignment() || read.isSupplementaryAlignment()))
.collect(Collectors.toList());
if (nonSupplementaryReads.size() > 2) {
logger.warn("More than two reads with the same name found. Using two reads randomly to combine as a fragment.");
return create(nonSupplementaryReads.subList(0,2));
}
return create(nonSupplementaryReads);
}
}

public List<GATKRead> getReads() {
return reads;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ public void testBamWithRepeatedReads() {
final File bam = new File(toolsTestDir + "mutect/repeated_reads.bam");
final File outputVcf = createTempFile("output", ".vcf");

runMutect2(bam, outputVcf, "20:10018000-10020000", b37Reference, Optional.empty(),
args -> args.addBooleanArgument(M2ArgumentCollection.INDEPENDENT_MATES_LONG_NAME, true));
runMutect2(bam, outputVcf, "20:10018000-10020000", b37Reference, Optional.empty());
}

// basic test on a small chunk of NA12878 mitochondria. This is not a validation, but rather a sanity check
Expand Down