From 056c6f9293bfe16eb0a3e02ee979a3c549561bdc Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 31 Oct 2019 14:39:18 -0400 Subject: [PATCH] Mutect2 warns but does not fail when three or more reads have the same name --- .../mutect/SomaticGenotypingEngine.java | 2 +- .../hellbender/utils/read/Fragment.java | 21 +++++++++++++++++++ .../mutect/Mutect2IntegrationTest.java | 3 +-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 624abd633e2..859cbd90bb4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -90,7 +90,7 @@ public CalledHaplotypes callMutations( if (MTAC.likelihoodArgs.phredScaledGlobalReadMismappingRate > 0) { logReadLikelihoods.normalizeLikelihoods(NaturalLogUtils.qualToLogErrorProb(MTAC.likelihoodArgs.phredScaledGlobalReadMismappingRate)); } - final AlleleLikelihoods logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::create); + final AlleleLikelihoods logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::createAndAvoidFailure); for( final int loc : startPosKeySet ) { final List eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, haplotypes, false); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/Fragment.java b/src/main/java/org/broadinstitute/hellbender/utils/read/Fragment.java index 919848b6ccb..127f2849fb1 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/Fragment.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/Fragment.java @@ -3,12 +3,15 @@ import htsjdk.samtools.util.Locatable; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; /** * All available evidence coming from a single biological fragment. Either one read or a read pair. @@ -19,6 +22,9 @@ public class Fragment implements Locatable { private final List reads; + private static final Logger logger = LogManager.getLogger(Fragment.class); + + public Fragment(final GATKRead read) { reads = Collections.singletonList(read); @@ -38,6 +44,21 @@ public static Fragment create(final List reads) { return reads.size() == 1 ? new Fragment(reads.get(0)) : new Fragment(ImmutablePair.of(reads.get(0), reads.get(1))); } + public static Fragment createAndAvoidFailure(final List reads) { + if (reads.size() <= 2) { + return create(reads); + } else { + final List nonSupplementaryReads = reads.stream() + .filter(read -> !(read.isDuplicate() || read.isSecondaryAlignment() || read.isSupplementaryAlignment())) + .collect(Collectors.toList()); + if (nonSupplementaryReads.size() > 2) { + logger.warn("More than two reads with the same name found. Using two reads randomly to combine as a fragment."); + return create(nonSupplementaryReads.subList(0,2)); + } + return create(nonSupplementaryReads); + } + } + public List getReads() { return reads; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 5aa1cf7cfc4..750484b768d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -464,8 +464,7 @@ public void testBamWithRepeatedReads() { final File bam = new File(toolsTestDir + "mutect/repeated_reads.bam"); final File outputVcf = createTempFile("output", ".vcf"); - runMutect2(bam, outputVcf, "20:10018000-10020000", b37Reference, Optional.empty(), - args -> args.addBooleanArgument(M2ArgumentCollection.INDEPENDENT_MATES_LONG_NAME, true)); + runMutect2(bam, outputVcf, "20:10018000-10020000", b37Reference, Optional.empty()); } // basic test on a small chunk of NA12878 mitochondria. This is not a validation, but rather a sanity check