From cfc744bc6459bc85541c4ebb697177b4e35d54b9 Mon Sep 17 00:00:00 2001
From: tedsharpe <tedsharpe@users.noreply.github.com>
Date: Mon, 20 May 2019 11:58:54 -0400
Subject: [PATCH] arbitrarily choose 1 read for disjoint pairs, dump rejected
 reads (#5926)

---
 .../tools/AnalyzeSaturationMutagenesis.java   | 380 ++++++++++--------
 .../AnalyzeSaturationMutagenesisUnitTest.java |  29 +-
 2 files changed, 235 insertions(+), 174 deletions(-)
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java b/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java
index 1ee8d7a0bd6..1f4c02ca5f4 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java
@@ -19,8 +19,10 @@
 import org.broadinstitute.hellbender.utils.SimpleInterval;
 import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
 import org.broadinstitute.hellbender.utils.read.GATKRead;
+import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
 
 import java.io.BufferedWriter;
+import java.io.File;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.text.DecimalFormat;
@@ -99,7 +101,7 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool {
     @VisibleForTesting static int minLength = 15;
 
     @Argument(doc = "minimum number of wt calls flanking variant", fullName = "min-flanking-length")
-    @VisibleForTesting static int minFlankingLength = 18;
+    @VisibleForTesting static int minFlankingLength = 2;
 
     @Argument(doc = "reference interval(s) of the ORF (1-based, inclusive), for example, '134-180,214-238' (no spaces)",
             fullName = "orf")
@@ -118,17 +120,22 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool {
     @Argument(doc = "paired mode evaluation of variants (combine mates, when possible)", fullName = "paired-mode")
     private static boolean pairedMode = true;
 
+    @Argument(doc = "write BAM of rejected reads", fullName = "write-rejected-reads")
+    private static boolean writeRejectedReads = false;
+
     @VisibleForTesting static Reference reference;
     @VisibleForTesting static CodonTracker codonTracker; // device for turning SNVs into CodonVariations
+    @VisibleForTesting static SAMFileGATKReadWriter rejectedReadsBAMWriter;
 
     // a map of SNV sets onto number of observations of that set of variations
     private static final HopscotchMap<SNVCollectionCount, Long, SNVCollectionCount> variationCounts =
             new HopscotchMap<>(10000000);
 
-    private static final ReadCounts readCounts = new ReadCounts();
-    private static final MoleculeCounts moleculeCountsUnpaired = new MoleculeCounts();
-    private static final MoleculeCounts moleculeCountsDisjointPair = new MoleculeCounts();
-    private static final MoleculeCounts moleculeCountsOverlappingPair = new MoleculeCounts();
+    private static long totalBaseCalls;
+    private static final ReportTypeCounts readCounts = new ReportTypeCounts();
+    private static final ReportTypeCounts unpairedCounts = new ReportTypeCounts();
+    private static final ReportTypeCounts disjointPairCounts = new ReportTypeCounts();
+    private static final ReportTypeCounts overlappingPairCounts = new ReportTypeCounts();
 
     // a place to stash the first read of a pair during pairwise processing of the read stream
     private GATKRead read1 = null;
@@ -165,6 +172,9 @@ public void onTraversalStart() {
         }
         reference = new Reference(ReferenceDataSource.of(referenceArguments.getReferencePath()));
         codonTracker = new CodonTracker(orfCoords, reference.getRefSeq(), logger);
+        if ( writeRejectedReads ) {
+            rejectedReadsBAMWriter = createSAMWriter(new File(outputFilePrefix + ".rejected.bam"), false);
+        }
     }
 
     @Override
@@ -173,55 +183,48 @@ public void traverse() {
         // ignore non-primary alignments
         final Stream<GATKRead> reads = getTransformedReadStream(ReadFilterLibrary.PRIMARY_LINE);
 
-        if ( !pairedMode ) {
-            reads.forEach(read -> {
-                try {
-                    final ReadReport report = getReadReport(read);
-                    report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference);
-                } catch ( final Exception e ) {
-                    final String readName = read.getName();
-                    throw new GATKException("Caught unexpected exception on read " +
-                            readCounts.getNReads() + ": " + readName, e);
-                }
-            });
-        } else {
-            read1 = null;
-            reads.forEach(read -> {
-                try {
+        try {
+            if ( !pairedMode ) {
+                reads.forEach(read -> {
+                    read1 = read;
+                    processReport(read, getReadReport(read), unpairedCounts);
+                });
+            } else {
+                read1 = null;
+                reads.forEach(read -> {
                     if ( !read.isPaired() ) {
-                        final ReadReport report = getReadReport(read);
-                        report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference);
+                        if ( read1 != null ) {
+                            processRead1();
+                        }
+                        read1 = read;
+                        processReport(read, getReadReport(read), unpairedCounts);
+                        read1 = null;
                     } else if ( read1 == null ) {
                         read1 = read;
                     } else if ( !read1.getName().equals(read.getName()) ) {
-                        logger.warn("Read " + read1.getName() + " has no mate.");
-                        final ReadReport report = getReadReport(read1);
-                        report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference);
+                        processRead1();
                         read1 = read;
                     } else {
-                        updateCountsForPair(getReadReport(read1), getReadReport(read));
+                        updateCountsForPair(read1, getReadReport(read1), read, getReadReport(read));
                         read1 = null;
                     }
-                } catch ( final Exception e ) {
-                    final String readName = read.getName();
-                    throw new GATKException("Caught unexpected exception on read " +
-                            readCounts.getNReads() + ": " + readName, e);
-                }
-            });
-            if ( read1 != null ) {
-                logger.warn("Read " + read1.getName() + " has no mate.");
-                try {
-                    final ReadReport report = getReadReport(read1);
-                    report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference);
-                } catch ( final Exception e ) {
-                    final String readName = read1.getName();
-                    throw new GATKException("Caught unexpected exception on read " +
-                            readCounts.getNReads() + ": " + readName, e);
+                });
+                if ( read1 != null ) {
+                    processRead1();
                 }
             }
+        } catch ( final Exception exception ) {
+            throw new GATKException(
+                    "Caught unexpected exception on read " + readCounts.totalCounts() + ": " + read1.getName(),
+                    exception);
         }
     }
 
+    private void processRead1() {
+        logger.warn("Read " + read1.getName() + " has no mate.");
+        processReport(read1, getReadReport(read1), unpairedCounts);
+    }
+
     @Override
     public Object onTraversalSuccess() {
         writeVariationCounts(getVariationEntries());
@@ -232,6 +235,7 @@ public Object onTraversalSuccess() {
         writeAAFractions();
         writeReadCounts();
         writeCoverageSizeHistogram();
+        if ( rejectedReadsBAMWriter != null ) rejectedReadsBAMWriter.close();
         return super.onTraversalSuccess();
     }
 
@@ -481,38 +485,46 @@ private static void writeReadCounts() {
                       new BufferedWriter(new OutputStreamWriter(BucketUtils.createFile(readCountsFile))) ) {
             final DecimalFormat df = new DecimalFormat("0.000");
 
-            final long totalReads = readCounts.getNReads();
+            final long totalReads = readCounts.totalCounts();
             writer.write("Total Reads:\t" + totalReads + "\t100.000%");
             writer.newLine();
 
-            final long nUnmappedReads = readCounts.getNReadsUnmapped();
+            final long nUnmappedReads = readCounts.getCount(ReportType.UNMAPPED);
             writer.write(">Unmapped Reads:\t" + nUnmappedReads + "\t" +
                     df.format(100. * nUnmappedReads / totalReads) + "%");
             writer.newLine();
 
-            final long nLowQualityReads = readCounts.getNReadsLowQuality();
+            final long nLowQualityReads = readCounts.getCount(ReportType.LOW_QUALITY);
             writer.write(">LowQ Reads:\t" + nLowQualityReads + "\t" +
                     df.format(100. * nLowQualityReads / totalReads) + "%");
             writer.newLine();
 
-            final long nEvaluableReads = readCounts.getNReadsEvaluable();
+            final long nEvaluableReads = readCounts.getCount(ReportType.EVALUABLE);
             writer.write(">Evaluable Reads:\t" + nEvaluableReads + "\t" +
                     df.format(100. * nEvaluableReads / totalReads) + "%");
             writer.newLine();
 
-            final long nDisjointReads = moleculeCountsDisjointPair.getTotal();
+            final long nUnpairedReads = unpairedCounts.totalCounts();
+            if ( nUnpairedReads > 0 ) {
+                writer.write(">>Unpaired reads:\t" +  nUnpairedReads + "\t" +
+                        df.format(100. * nUnpairedReads / nEvaluableReads) + "%");
+                writer.newLine();
+                writeReportTypeCounts(unpairedCounts, df, writer);
+            }
+
+            final long nDisjointReads = disjointPairCounts.totalCounts();
             writer.write(">>Reads in disjoint pairs evaluated separately:\t" +  nDisjointReads + "\t" +
                     df.format(100. * nDisjointReads / nEvaluableReads) + "%");
             writer.newLine();
-            writeMoleculeCounts(moleculeCountsDisjointPair, df, writer);
+            writeReportTypeCounts(disjointPairCounts, df, writer);
 
-            final long nOverlappingReads = 2 * moleculeCountsOverlappingPair.getTotal();
+            final long nOverlappingReads = 2 * overlappingPairCounts.totalCounts();
             writer.write(">>Reads in overlapping pairs evaluated together:\t" + nOverlappingReads + "\t" +
                     df.format(100. * nOverlappingReads / nEvaluableReads) + "%");
             writer.newLine();
-            writeMoleculeCounts(moleculeCountsOverlappingPair, df, writer);
+            writeReportTypeCounts(overlappingPairCounts, df, writer);
 
-            final long totalBases = readCounts.getNTotalBaseCalls();
+            final long totalBases = totalBaseCalls;
             writer.write("Total base calls:\t" + totalBases + "\t100.000%");
             writer.newLine();
             final long totalCoverage = reference.getTotalCoverage();
@@ -528,31 +540,18 @@ private static void writeReadCounts() {
         }
     }
 
-    private static void writeMoleculeCounts( final MoleculeCounts moleculeCounts,
-                                             final DecimalFormat df,
-                                             final BufferedWriter writer )
-            throws IOException {
-        final long nInconsistentPairs = moleculeCounts.getNInconsistentPairs();
-        final long nInsufficientFlankMolecules = moleculeCounts.getNInsufficientFlank();
-        final long nWildTypeMolecules = moleculeCounts.getNWildType();
-        final long nLowQualityVariantMolecules = moleculeCounts.getNLowQualityVariant();
-        final long nCalledVariantMolecules = moleculeCounts.getCalledVariant();
-        final long totalMolecules = moleculeCounts.getTotal();
-        writer.write(">>>Number of inconsistent pair molecules:\t" + nInconsistentPairs + "\t" +
-                df.format(100. * nInconsistentPairs / totalMolecules) + "%");
-        writer.newLine();
-        writer.write(">>>Number of wild type molecules:\t" + nWildTypeMolecules + "\t" +
-                df.format(100. * nWildTypeMolecules / totalMolecules) + "%");
-        writer.newLine();
-        writer.write(">>>Number of insufficient flank molecules:\t" + nInsufficientFlankMolecules + "\t" +
-                df.format(100. * nInsufficientFlankMolecules / totalMolecules) + "%");
-        writer.newLine();
-        writer.write(">>>Number of low quality variation molecules:\t" + nLowQualityVariantMolecules + "\t" +
-                df.format(100. * nLowQualityVariantMolecules / totalMolecules) + "%");
-        writer.newLine();
-        writer.write(">>>Number of called variant molecules:\t" + nCalledVariantMolecules + "\t" +
-                df.format(100. * nCalledVariantMolecules / totalMolecules) + "%");
-        writer.newLine();
+    private static void writeReportTypeCounts( final ReportTypeCounts counts,
+                                               final DecimalFormat df,
+                                               final BufferedWriter writer ) throws IOException {
+        final long totalCounts = counts.totalCounts();
+        for ( final ReportType reportType : ReportType.values() ) {
+            final long count = counts.getCount(reportType);
+            if ( count != 0 ) {
+                writer.write(">>>" + reportType.label + ":\t" + count + "\t" +
+                        df.format(100. * count / totalCounts) + "%");
+                writer.newLine();
+            }
+        }
     }
 
     private static void writeCoverageSizeHistogram() {
@@ -578,6 +577,42 @@ private static void writeCoverageSizeHistogram() {
         }
     }
 
+    enum ReportType {
+        UNMAPPED("unmapped", "Unmapped Reads"),
+        LOW_QUALITY("lowQ", "LowQ Reads"),
+        EVALUABLE(null, "Evaluable Reads"),
+        WILD_TYPE(null, "Wild type"),
+        CALLED_VARIANT(null, "Called variants"),
+        INCONSISTENT("inconsistent", "Inconsistent pair"),
+        IGNORED_MATE("ignoredMate", "Mate ignored"),
+        LOW_Q_VAR("lowQVar", "Low quality variation"),
+        NO_FLANK("noFlank", "Insufficient flank");
+
+        public final String attributeValue; // value for tagging rejected reads.  when null, don't tag the read.
+        public final String label;
+
+        private ReportType( final String attributeValue, final String label ) {
+            this.attributeValue = attributeValue;
+            this.label = label;
+        }
+
+        public final static String REPORT_TYPE_ATTRIBUTE_KEY = "XX";
+    }
+
+    public final static class ReportTypeCounts {
+        private long[] counts = new long[ReportType.values().length];
+
+        public void bumpCount( final ReportType reportType ) {
+            counts[reportType.ordinal()] += 1;
+        }
+        public long getCount( final ReportType reportType ) {
+            return counts[reportType.ordinal()];
+        }
+        public long totalCounts() {
+            return Arrays.stream(counts).sum();
+        }
+    }
+
     @VisibleForTesting final static class Reference {
         // the amplicon -- all bytes are converted to upper-case 'A', 'C', 'G', or 'T', no nonsense
         private final byte[] refSeq;
@@ -649,52 +684,6 @@ private int updateCoverage( final List<Interval> refCoverageList ) {
         }
     }
 
-    @VisibleForTesting final static class ReadCounts {
-        private long nReadsTotal = 0; // total number of reads in input bam
-        private long nReadsUnmapped = 0; // number of unmapped reads in bam
-        private long nReadsLowQuality = 0; // number of reads where trimming made the read disappear
-        private long nReadsEvaluable = 0; // number of reads that can be evaluated for variants
-        private long nTotalBaseCalls = 0; // number of base calls over all reads in bam (mapped or not, call trimmed or not)
-
-        public void bumpNReads() { nReadsTotal += 1; }
-        public long getNReads() { return nReadsTotal; }
-
-        public void bumpNReadsUnmapped() { nReadsUnmapped += 1; }
-        public long getNReadsUnmapped() { return nReadsUnmapped; }
-
-        public void bumpNLowQualityReads() { nReadsLowQuality += 1; }
-        public long getNReadsLowQuality() { return nReadsLowQuality; }
-
-        public void bumpNReadsEvaluable() { nReadsEvaluable += 1; }
-        public long getNReadsEvaluable() { return nReadsEvaluable; }
-
-        public void addBaseCalls( final long nBases ) { nTotalBaseCalls += nBases; }
-        public long getNTotalBaseCalls() { return nTotalBaseCalls; }
-    }
-
-    // a bunch of mutually exclusive counts of molecules
-    @VisibleForTesting final static class MoleculeCounts {
-        private long nWildType = 0; // number of molecules in which no variation from reference was detected
-        private long nInconsistentPairs = 0; // number of molecules where mates with conflicting variants in overlap region
-        private long nInsufficientFlank = 0; // number of molecules where variation was too close to end of region
-        private long nLowQualityVariant = 0; // number of molecules where a variation was called with low quality
-        private long nCalledVariant = 0; // number of molecules with a least one variant
-
-        public void bumpNWildType() { nWildType += 1; }
-        public long getNWildType() { return nWildType; }
-        public void bumpInconsistentPairs() { nInconsistentPairs += 1; }
-        public long getNInconsistentPairs() { return nInconsistentPairs; }
-        public void bumpInsufficientFlank() { nInsufficientFlank += 1; }
-        public long getNInsufficientFlank() { return nInsufficientFlank; }
-        public void bumpNLowQualityVariant() { nLowQualityVariant += 1; }
-        public long getNLowQualityVariant() { return nLowQualityVariant; }
-        public void bumpCalledVariant() { nCalledVariant += 1; }
-        public long getCalledVariant() { return nCalledVariant; }
-        public long getTotal() {
-            return nWildType + nInconsistentPairs + nInsufficientFlank + nLowQualityVariant + nCalledVariant;
-        }
-    }
-
     // describes an interval on some sequence as a pair of offsets (0-based, half-open).
     @VisibleForTesting final static class Interval {
         private final int start;
@@ -1401,25 +1390,22 @@ public boolean hasCleanRightFlank( final int minFlankingLength, final int refLen
         }
 
         // apply the report: update reference coverage, translate SNVs to codon values, and count 'em all up
-        public void updateCounts( final MoleculeCounts moleculeCounts,
-                                  final CodonTracker codonTracker,
-                                  final HopscotchMap<SNVCollectionCount, Long, SNVCollectionCount> variationCounts,
-                                  final Reference reference ) {
+        public ReportType updateCounts( final CodonTracker codonTracker,
+                                        final HopscotchMap<SNVCollectionCount, Long, SNVCollectionCount> variationCounts,
+                                        final Reference reference ) {
             final List<Interval> refCoverage = getRefCoverage();
-            if ( refCoverage.isEmpty() ) return;
-
+            if ( refCoverage.isEmpty() ) {
+                return ReportType.LOW_QUALITY;
+            }
             if ( snvList == null ) {
-                moleculeCounts.bumpInconsistentPairs();
-                return;
+                return ReportType.INCONSISTENT;
             }
             if ( snvList.stream().anyMatch(
                     snv -> snv.getQuality() < minQ || "-ACGT".indexOf(snv.getVariantCall()) == -1) ) {
-                moleculeCounts.bumpNLowQualityVariant();
-                return;
+                return ReportType.LOW_Q_VAR;
             }
             if ( !hasCleanFlanks(minFlankingLength, reference.getRefSeqLength()) ) {
-                moleculeCounts.bumpInsufficientFlank();
-                return;
+                return ReportType.NO_FLANK;
             }
 
             final int coverage = reference.updateCoverage(refCoverage);
@@ -1427,19 +1413,19 @@ public void updateCounts( final MoleculeCounts moleculeCounts,
             reference.updateSpan(totalCoverage);
 
             if ( snvList.isEmpty() ) {
-                moleculeCounts.bumpNWildType();
                 codonTracker.reportWildCodonCounts(totalCoverage);
+                return ReportType.WILD_TYPE;
+            }
+
+            codonTracker.reportVariantCodonCounts(totalCoverage, codonTracker.encodeSNVsAsCodons(snvList));
+            final SNVCollectionCount newVal = new SNVCollectionCount(snvList, coverage);
+            final SNVCollectionCount oldVal = variationCounts.find(newVal);
+            if ( oldVal != null ) {
+                oldVal.bumpCount(coverage);
             } else {
-                moleculeCounts.bumpCalledVariant();
-                codonTracker.reportVariantCodonCounts(totalCoverage, codonTracker.encodeSNVsAsCodons(snvList));
-                final SNVCollectionCount newVal = new SNVCollectionCount(snvList, coverage);
-                final SNVCollectionCount oldVal = variationCounts.find(newVal);
-                if ( oldVal != null ) {
-                    oldVal.bumpCount(coverage);
-                } else {
-                    variationCounts.add(newVal);
-                }
+                variationCounts.add(newVal);
             }
+            return ReportType.CALLED_VARIANT;
         }
 
         private static List<Interval> combineCoverage( final ReadReport report1, final ReadReport report2 ) {
@@ -1558,30 +1544,41 @@ private static List<SNV> combineVariations( final ReadReport report1, final Read
     }
 
     @VisibleForTesting static ReadReport getReadReport( final GATKRead read ) {
-        readCounts.bumpNReads();
-        readCounts.addBaseCalls(read.getLength());
+        totalBaseCalls += read.getLength();
 
         if ( read.isUnmapped() || read.isDuplicate() || read.failsVendorQualityCheck() ) {
-            readCounts.bumpNReadsUnmapped();
-            return ReadReport.NULL_REPORT;
+            return rejectRead(read, ReportType.UNMAPPED);
         }
 
-        final Interval trim = calculateTrim(read.getBaseQualitiesNoCopy());
-        if ( trim.size() == 0 ) {
-            readCounts.bumpNLowQualityReads();
-            return ReadReport.NULL_REPORT;
+        Interval trim = calculateTrim(read);
+        if ( trim.size() < minLength ) {
+            return rejectRead(read, ReportType.LOW_QUALITY);
         }
 
         final ReadReport readReport = new ReadReport(read, trim, reference.getRefSeq());
         if ( readReport.getRefCoverage().isEmpty() ) {
-            readCounts.bumpNLowQualityReads();
-        } else {
-            readCounts.bumpNReadsEvaluable();
+            return rejectRead(read, ReportType.LOW_QUALITY);
         }
+
+        readCounts.bumpCount(ReportType.EVALUABLE);
         return readReport;
     }
 
-    @VisibleForTesting static Interval calculateTrim( final byte[] quals ) {
+    private static ReadReport rejectRead( final GATKRead read, final ReportType reportType ) {
+        readCounts.bumpCount(reportType);
+        if ( rejectedReadsBAMWriter != null ) {
+            read.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue);
+            rejectedReadsBAMWriter.addRead(read);
+        }
+        return ReadReport.NULL_REPORT;
+    }
+
+    private static Interval calculateTrim( final GATKRead read ) {
+        return calculateShortFragmentTrim(read, calculateQualityTrim(read.getBaseQualitiesNoCopy()));
+    }
+
+    //* find first and last minLength consecutive bases with quality scores at least as big as minQ */
+    @VisibleForTesting static Interval calculateQualityTrim( final byte[] quals ) {
         // find initial end-trim
         int readStart = 0;
         int hiQCount = 0;
@@ -1614,21 +1611,80 @@ private static List<SNV> combineVariations( final ReadReport report1, final Read
         return new Interval(readStart, readEnd);
     }
 
-    @VisibleForTesting static void updateCountsForPair( final ReadReport report1, final ReadReport report2 ) {
+    /** if properly paired, and fragment length is less than read length, trim read ends to fragment length */
+    private static Interval calculateShortFragmentTrim( final GATKRead read, final Interval qualityTrim ) {
+        if ( qualityTrim.size() < minLength ) return qualityTrim;
+
+        // don't process past end of fragment when fragment length < read length
+        if ( read.isProperlyPaired() ) {
+            final int fragmentLength = Math.abs(read.getFragmentLength());
+            if ( read.isReverseStrand() ) { // the read has been RC'd: trim its beginning if necessary
+                final int minStart = read.getLength() - fragmentLength;
+                if ( qualityTrim.getStart() < minStart ) {
+                    if ( qualityTrim.getEnd() - minStart < minLength ) { // if there are too few calls left
+                        return Interval.NULL_INTERVAL;
+                    }
+                    return new Interval(minStart, qualityTrim.getEnd());
+                }
+            } else if ( fragmentLength < qualityTrim.getEnd() ) { // trim the end of the read, if necessary
+                if ( fragmentLength - qualityTrim.getStart() < minLength ) { // if there are too few calls left
+                    return Interval.NULL_INTERVAL;
+                }
+                return new Interval(qualityTrim.getStart(), fragmentLength);
+            }
+        }
+        return qualityTrim;
+    }
+
+    @VisibleForTesting static void updateCountsForPair( final GATKRead read1, final ReadReport report1,
+                                                        final GATKRead read2, final ReadReport report2 ) {
         if ( report1.getRefCoverage().isEmpty() ) {
-            report2.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference);
+            if ( !report2.getRefCoverage().isEmpty() ) {
+                processReport(read2, report2, disjointPairCounts);
+            }
         } else if ( report2.getRefCoverage().isEmpty() ) {
-            report1.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference);
+            processReport(read1, report1, disjointPairCounts);
         } else {
             final int overlapStart = Math.max(report1.getFirstRefIndex(), report2.getFirstRefIndex());
             final int overlapEnd = Math.min(report1.getLastRefIndex(), report2.getLastRefIndex());
             if ( overlapStart <= overlapEnd ) { // if mates overlap, or are adjacent
                 final ReadReport combinedReport = new ReadReport(report1, report2);
-                combinedReport.updateCounts(moleculeCountsOverlappingPair, codonTracker, variationCounts, reference);
-            } else {
-                report1.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference);
-                report2.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference);
+                final ReportType reportType = combinedReport.updateCounts(codonTracker, variationCounts, reference);
+                overlappingPairCounts.bumpCount(reportType);
+                if ( reportType.attributeValue != null && rejectedReadsBAMWriter != null ) {
+                    read1.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue);
+                    rejectedReadsBAMWriter.addRead(read1);
+                    read2.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue);
+                    rejectedReadsBAMWriter.addRead(read2);
+                }
+            } else { // mates are disjoint
+                final ReportType ignoredMate = ReportType.IGNORED_MATE;
+                if ( read1.isFirstOfPair() ) {
+                    processReport(read1, report1, disjointPairCounts);
+                    if ( rejectedReadsBAMWriter != null ) {
+                        read2.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, ignoredMate.attributeValue);
+                        rejectedReadsBAMWriter.addRead(read2);
+                    }
+                } else {
+                    processReport(read2, report2, disjointPairCounts);
+                    if ( rejectedReadsBAMWriter != null ) {
+                        read1.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, ignoredMate.attributeValue);
+                        rejectedReadsBAMWriter.addRead(read1);
+                    }
+                }
+                disjointPairCounts.bumpCount(ignoredMate);
             }
         }
     }
+
+    private static void processReport( final GATKRead read,
+                                       final ReadReport readReport,
+                                       final ReportTypeCounts counts ) {
+        final ReportType reportType = readReport.updateCounts(codonTracker, variationCounts, reference);
+        counts.bumpCount(reportType);
+        if ( reportType.attributeValue != null && rejectedReadsBAMWriter != null ) {
+            read.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue);
+            rejectedReadsBAMWriter.addRead(read);
+        }
+    }
 }
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java
index 946bcedfd3c..f8f6cccd8ca 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java
@@ -487,29 +487,34 @@ public void testPairedApplication() {
                 new SNV(45, CALL_G, CALL_A, QUAL_30),
                 new SNV(55, CALL_C, CALL_A, QUAL_30));
 
+        final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
+        final List<GATKRead> reads =
+            ArtificialReadUtils.createPair(header, "blahBlah", 150, 1, 151, true, false);
+        final GATKRead read1 = reads.get(0);
+        final GATKRead read2 = reads.get(1);
         final ReadReport report1 = new ReadReport(Collections.singletonList(new Interval(0, 32)), list1);
         final ReadReport report2 = new ReadReport(Collections.singletonList(new Interval(33, 60)), list2);
-        updateCountsForPair(report1, report2);
+        updateCountsForPair(read1, report1, read2, report2);
 
         // shouldn't be applied -- fails flanking bases test
         Assert.assertEquals(reference.countSpanners(0, 32), 0);
         Assert.assertEquals(reference.countSpanners(32, 33), 0);
         Assert.assertEquals(reference.countSpanners(33, 60), 0);
 
-        // should be applied separately
+        // only read1 should be applied
         minFlankingLength = 1;
-        updateCountsForPair(report1, report2);
+        updateCountsForPair(read1, report1, read2, report2);
         Assert.assertEquals(reference.countSpanners(0, 32), 1);
         Assert.assertEquals(reference.countSpanners(32, 33), 0);
-        Assert.assertEquals(reference.countSpanners(33, 60), 1);
+        Assert.assertEquals(reference.countSpanners(33, 60), 0);
 
         // should be applied as one
         final ReadReport report3 = new ReadReport(Collections.singletonList(new Interval(0, 35)), list1);
         final ReadReport report4 = new ReadReport(Collections.singletonList(new Interval(33, 60)), list2);
-        updateCountsForPair(report3, report4);
+        updateCountsForPair(read1, report3, read2, report4);
         Assert.assertEquals(reference.countSpanners(0, 32), 2);
         Assert.assertEquals(reference.countSpanners(32, 33), 1);
-        Assert.assertEquals(reference.countSpanners(33, 60), 2);
+        Assert.assertEquals(reference.countSpanners(33, 60), 1);
     }
 
     @Test
@@ -518,20 +523,20 @@ public void testCalculateTrim() {
         final byte GOOD_Q = (byte)minQ;
         final byte[] quals = new byte[100];
         Arrays.fill(quals, BAD_Q);
-        Assert.assertEquals(calculateTrim(quals).size(), 0);
+        Assert.assertEquals(calculateQualityTrim(quals).size(), 0);
         Arrays.fill(quals, GOOD_Q);
-        Assert.assertEquals(calculateTrim(quals), new Interval(0, 100));
+        Assert.assertEquals(calculateQualityTrim(quals), new Interval(0, 100));
         quals[minLength] = BAD_Q;
-        Assert.assertEquals(calculateTrim(quals), new Interval(0, 100));
+        Assert.assertEquals(calculateQualityTrim(quals), new Interval(0, 100));
         quals[minLength - 1] = BAD_Q;
-        Assert.assertEquals(calculateTrim(quals), new Interval(minLength + 1, 100));
+        Assert.assertEquals(calculateQualityTrim(quals), new Interval(minLength + 1, 100));
         quals[quals.length - minLength] = BAD_Q;
-        Assert.assertEquals(calculateTrim(quals), new Interval(minLength + 1, quals.length - minLength));
+        Assert.assertEquals(calculateQualityTrim(quals), new Interval(minLength + 1, quals.length - minLength));
         Arrays.fill(quals, GOOD_Q);
         for ( int idx = minLength - 1; idx < quals.length; idx += minLength ) {
             quals[idx] = BAD_Q;
         }
-        Assert.assertEquals(calculateTrim(quals).size(), 0);
+        Assert.assertEquals(calculateQualityTrim(quals).size(), 0);
     }
 
     @Test