From cfc744bc6459bc85541c4ebb697177b4e35d54b9 Mon Sep 17 00:00:00 2001 From: tedsharpe Date: Mon, 20 May 2019 11:58:54 -0400 Subject: [PATCH] arbitrarily choose 1 read for disjoint pairs, dump rejected reads (#5926) --- .../tools/AnalyzeSaturationMutagenesis.java | 380 ++++++++++-------- .../AnalyzeSaturationMutagenesisUnitTest.java | 29 +- 2 files changed, 235 insertions(+), 174 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java b/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java index 1ee8d7a0bd6..1f4c02ca5f4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesis.java @@ -19,8 +19,10 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; import java.io.BufferedWriter; +import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.text.DecimalFormat; @@ -99,7 +101,7 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool { @VisibleForTesting static int minLength = 15; @Argument(doc = "minimum number of wt calls flanking variant", fullName = "min-flanking-length") - @VisibleForTesting static int minFlankingLength = 18; + @VisibleForTesting static int minFlankingLength = 2; @Argument(doc = "reference interval(s) of the ORF (1-based, inclusive), for example, '134-180,214-238' (no spaces)", fullName = "orf") @@ -118,17 +120,22 @@ public final class AnalyzeSaturationMutagenesis extends GATKTool { @Argument(doc = "paired mode evaluation of variants (combine mates, when possible)", fullName = "paired-mode") private static boolean pairedMode = true; + @Argument(doc = "write BAM of rejected reads", fullName = "write-rejected-reads") + private static boolean writeRejectedReads = false; + @VisibleForTesting static Reference reference; @VisibleForTesting static CodonTracker codonTracker; // device for turning SNVs into CodonVariations + @VisibleForTesting static SAMFileGATKReadWriter rejectedReadsBAMWriter; // a map of SNV sets onto number of observations of that set of variations private static final HopscotchMap variationCounts = new HopscotchMap<>(10000000); - private static final ReadCounts readCounts = new ReadCounts(); - private static final MoleculeCounts moleculeCountsUnpaired = new MoleculeCounts(); - private static final MoleculeCounts moleculeCountsDisjointPair = new MoleculeCounts(); - private static final MoleculeCounts moleculeCountsOverlappingPair = new MoleculeCounts(); + private static long totalBaseCalls; + private static final ReportTypeCounts readCounts = new ReportTypeCounts(); + private static final ReportTypeCounts unpairedCounts = new ReportTypeCounts(); + private static final ReportTypeCounts disjointPairCounts = new ReportTypeCounts(); + private static final ReportTypeCounts overlappingPairCounts = new ReportTypeCounts(); // a place to stash the first read of a pair during pairwise processing of the read stream private GATKRead read1 = null; @@ -165,6 +172,9 @@ public void onTraversalStart() { } reference = new Reference(ReferenceDataSource.of(referenceArguments.getReferencePath())); codonTracker = new CodonTracker(orfCoords, reference.getRefSeq(), logger); + if ( writeRejectedReads ) { + rejectedReadsBAMWriter = createSAMWriter(new File(outputFilePrefix + ".rejected.bam"), false); + } } @Override @@ -173,55 +183,48 @@ public void traverse() { // ignore non-primary alignments final Stream reads = getTransformedReadStream(ReadFilterLibrary.PRIMARY_LINE); - if ( !pairedMode ) { - reads.forEach(read -> { - try { - final ReadReport report = getReadReport(read); - report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference); - } catch ( final Exception e ) { - final String readName = read.getName(); - throw new GATKException("Caught unexpected exception on read " + - readCounts.getNReads() + ": " + readName, e); - } - }); - } else { - read1 = null; - reads.forEach(read -> { - try { + try { + if ( !pairedMode ) { + reads.forEach(read -> { + read1 = read; + processReport(read, getReadReport(read), unpairedCounts); + }); + } else { + read1 = null; + reads.forEach(read -> { if ( !read.isPaired() ) { - final ReadReport report = getReadReport(read); - report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference); + if ( read1 != null ) { + processRead1(); + } + read1 = read; + processReport(read, getReadReport(read), unpairedCounts); + read1 = null; } else if ( read1 == null ) { read1 = read; } else if ( !read1.getName().equals(read.getName()) ) { - logger.warn("Read " + read1.getName() + " has no mate."); - final ReadReport report = getReadReport(read1); - report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference); + processRead1(); read1 = read; } else { - updateCountsForPair(getReadReport(read1), getReadReport(read)); + updateCountsForPair(read1, getReadReport(read1), read, getReadReport(read)); read1 = null; } - } catch ( final Exception e ) { - final String readName = read.getName(); - throw new GATKException("Caught unexpected exception on read " + - readCounts.getNReads() + ": " + readName, e); - } - }); - if ( read1 != null ) { - logger.warn("Read " + read1.getName() + " has no mate."); - try { - final ReadReport report = getReadReport(read1); - report.updateCounts(moleculeCountsUnpaired, codonTracker, variationCounts, reference); - } catch ( final Exception e ) { - final String readName = read1.getName(); - throw new GATKException("Caught unexpected exception on read " + - readCounts.getNReads() + ": " + readName, e); + }); + if ( read1 != null ) { + processRead1(); } } + } catch ( final Exception exception ) { + throw new GATKException( + "Caught unexpected exception on read " + readCounts.totalCounts() + ": " + read1.getName(), + exception); } } + private void processRead1() { + logger.warn("Read " + read1.getName() + " has no mate."); + processReport(read1, getReadReport(read1), unpairedCounts); + } + @Override public Object onTraversalSuccess() { writeVariationCounts(getVariationEntries()); @@ -232,6 +235,7 @@ public Object onTraversalSuccess() { writeAAFractions(); writeReadCounts(); writeCoverageSizeHistogram(); + if ( rejectedReadsBAMWriter != null ) rejectedReadsBAMWriter.close(); return super.onTraversalSuccess(); } @@ -481,38 +485,46 @@ private static void writeReadCounts() { new BufferedWriter(new OutputStreamWriter(BucketUtils.createFile(readCountsFile))) ) { final DecimalFormat df = new DecimalFormat("0.000"); - final long totalReads = readCounts.getNReads(); + final long totalReads = readCounts.totalCounts(); writer.write("Total Reads:\t" + totalReads + "\t100.000%"); writer.newLine(); - final long nUnmappedReads = readCounts.getNReadsUnmapped(); + final long nUnmappedReads = readCounts.getCount(ReportType.UNMAPPED); writer.write(">Unmapped Reads:\t" + nUnmappedReads + "\t" + df.format(100. * nUnmappedReads / totalReads) + "%"); writer.newLine(); - final long nLowQualityReads = readCounts.getNReadsLowQuality(); + final long nLowQualityReads = readCounts.getCount(ReportType.LOW_QUALITY); writer.write(">LowQ Reads:\t" + nLowQualityReads + "\t" + df.format(100. * nLowQualityReads / totalReads) + "%"); writer.newLine(); - final long nEvaluableReads = readCounts.getNReadsEvaluable(); + final long nEvaluableReads = readCounts.getCount(ReportType.EVALUABLE); writer.write(">Evaluable Reads:\t" + nEvaluableReads + "\t" + df.format(100. * nEvaluableReads / totalReads) + "%"); writer.newLine(); - final long nDisjointReads = moleculeCountsDisjointPair.getTotal(); + final long nUnpairedReads = unpairedCounts.totalCounts(); + if ( nUnpairedReads > 0 ) { + writer.write(">>Unpaired reads:\t" + nUnpairedReads + "\t" + + df.format(100. * nUnpairedReads / nEvaluableReads) + "%"); + writer.newLine(); + writeReportTypeCounts(unpairedCounts, df, writer); + } + + final long nDisjointReads = disjointPairCounts.totalCounts(); writer.write(">>Reads in disjoint pairs evaluated separately:\t" + nDisjointReads + "\t" + df.format(100. * nDisjointReads / nEvaluableReads) + "%"); writer.newLine(); - writeMoleculeCounts(moleculeCountsDisjointPair, df, writer); + writeReportTypeCounts(disjointPairCounts, df, writer); - final long nOverlappingReads = 2 * moleculeCountsOverlappingPair.getTotal(); + final long nOverlappingReads = 2 * overlappingPairCounts.totalCounts(); writer.write(">>Reads in overlapping pairs evaluated together:\t" + nOverlappingReads + "\t" + df.format(100. * nOverlappingReads / nEvaluableReads) + "%"); writer.newLine(); - writeMoleculeCounts(moleculeCountsOverlappingPair, df, writer); + writeReportTypeCounts(overlappingPairCounts, df, writer); - final long totalBases = readCounts.getNTotalBaseCalls(); + final long totalBases = totalBaseCalls; writer.write("Total base calls:\t" + totalBases + "\t100.000%"); writer.newLine(); final long totalCoverage = reference.getTotalCoverage(); @@ -528,31 +540,18 @@ private static void writeReadCounts() { } } - private static void writeMoleculeCounts( final MoleculeCounts moleculeCounts, - final DecimalFormat df, - final BufferedWriter writer ) - throws IOException { - final long nInconsistentPairs = moleculeCounts.getNInconsistentPairs(); - final long nInsufficientFlankMolecules = moleculeCounts.getNInsufficientFlank(); - final long nWildTypeMolecules = moleculeCounts.getNWildType(); - final long nLowQualityVariantMolecules = moleculeCounts.getNLowQualityVariant(); - final long nCalledVariantMolecules = moleculeCounts.getCalledVariant(); - final long totalMolecules = moleculeCounts.getTotal(); - writer.write(">>>Number of inconsistent pair molecules:\t" + nInconsistentPairs + "\t" + - df.format(100. * nInconsistentPairs / totalMolecules) + "%"); - writer.newLine(); - writer.write(">>>Number of wild type molecules:\t" + nWildTypeMolecules + "\t" + - df.format(100. * nWildTypeMolecules / totalMolecules) + "%"); - writer.newLine(); - writer.write(">>>Number of insufficient flank molecules:\t" + nInsufficientFlankMolecules + "\t" + - df.format(100. * nInsufficientFlankMolecules / totalMolecules) + "%"); - writer.newLine(); - writer.write(">>>Number of low quality variation molecules:\t" + nLowQualityVariantMolecules + "\t" + - df.format(100. * nLowQualityVariantMolecules / totalMolecules) + "%"); - writer.newLine(); - writer.write(">>>Number of called variant molecules:\t" + nCalledVariantMolecules + "\t" + - df.format(100. * nCalledVariantMolecules / totalMolecules) + "%"); - writer.newLine(); + private static void writeReportTypeCounts( final ReportTypeCounts counts, + final DecimalFormat df, + final BufferedWriter writer ) throws IOException { + final long totalCounts = counts.totalCounts(); + for ( final ReportType reportType : ReportType.values() ) { + final long count = counts.getCount(reportType); + if ( count != 0 ) { + writer.write(">>>" + reportType.label + ":\t" + count + "\t" + + df.format(100. * count / totalCounts) + "%"); + writer.newLine(); + } + } } private static void writeCoverageSizeHistogram() { @@ -578,6 +577,42 @@ private static void writeCoverageSizeHistogram() { } } + enum ReportType { + UNMAPPED("unmapped", "Unmapped Reads"), + LOW_QUALITY("lowQ", "LowQ Reads"), + EVALUABLE(null, "Evaluable Reads"), + WILD_TYPE(null, "Wild type"), + CALLED_VARIANT(null, "Called variants"), + INCONSISTENT("inconsistent", "Inconsistent pair"), + IGNORED_MATE("ignoredMate", "Mate ignored"), + LOW_Q_VAR("lowQVar", "Low quality variation"), + NO_FLANK("noFlank", "Insufficient flank"); + + public final String attributeValue; // value for tagging rejected reads. when null, don't tag the read. + public final String label; + + private ReportType( final String attributeValue, final String label ) { + this.attributeValue = attributeValue; + this.label = label; + } + + public final static String REPORT_TYPE_ATTRIBUTE_KEY = "XX"; + } + + public final static class ReportTypeCounts { + private long[] counts = new long[ReportType.values().length]; + + public void bumpCount( final ReportType reportType ) { + counts[reportType.ordinal()] += 1; + } + public long getCount( final ReportType reportType ) { + return counts[reportType.ordinal()]; + } + public long totalCounts() { + return Arrays.stream(counts).sum(); + } + } + @VisibleForTesting final static class Reference { // the amplicon -- all bytes are converted to upper-case 'A', 'C', 'G', or 'T', no nonsense private final byte[] refSeq; @@ -649,52 +684,6 @@ private int updateCoverage( final List refCoverageList ) { } } - @VisibleForTesting final static class ReadCounts { - private long nReadsTotal = 0; // total number of reads in input bam - private long nReadsUnmapped = 0; // number of unmapped reads in bam - private long nReadsLowQuality = 0; // number of reads where trimming made the read disappear - private long nReadsEvaluable = 0; // number of reads that can be evaluated for variants - private long nTotalBaseCalls = 0; // number of base calls over all reads in bam (mapped or not, call trimmed or not) - - public void bumpNReads() { nReadsTotal += 1; } - public long getNReads() { return nReadsTotal; } - - public void bumpNReadsUnmapped() { nReadsUnmapped += 1; } - public long getNReadsUnmapped() { return nReadsUnmapped; } - - public void bumpNLowQualityReads() { nReadsLowQuality += 1; } - public long getNReadsLowQuality() { return nReadsLowQuality; } - - public void bumpNReadsEvaluable() { nReadsEvaluable += 1; } - public long getNReadsEvaluable() { return nReadsEvaluable; } - - public void addBaseCalls( final long nBases ) { nTotalBaseCalls += nBases; } - public long getNTotalBaseCalls() { return nTotalBaseCalls; } - } - - // a bunch of mutually exclusive counts of molecules - @VisibleForTesting final static class MoleculeCounts { - private long nWildType = 0; // number of molecules in which no variation from reference was detected - private long nInconsistentPairs = 0; // number of molecules where mates with conflicting variants in overlap region - private long nInsufficientFlank = 0; // number of molecules where variation was too close to end of region - private long nLowQualityVariant = 0; // number of molecules where a variation was called with low quality - private long nCalledVariant = 0; // number of molecules with a least one variant - - public void bumpNWildType() { nWildType += 1; } - public long getNWildType() { return nWildType; } - public void bumpInconsistentPairs() { nInconsistentPairs += 1; } - public long getNInconsistentPairs() { return nInconsistentPairs; } - public void bumpInsufficientFlank() { nInsufficientFlank += 1; } - public long getNInsufficientFlank() { return nInsufficientFlank; } - public void bumpNLowQualityVariant() { nLowQualityVariant += 1; } - public long getNLowQualityVariant() { return nLowQualityVariant; } - public void bumpCalledVariant() { nCalledVariant += 1; } - public long getCalledVariant() { return nCalledVariant; } - public long getTotal() { - return nWildType + nInconsistentPairs + nInsufficientFlank + nLowQualityVariant + nCalledVariant; - } - } - // describes an interval on some sequence as a pair of offsets (0-based, half-open). @VisibleForTesting final static class Interval { private final int start; @@ -1401,25 +1390,22 @@ public boolean hasCleanRightFlank( final int minFlankingLength, final int refLen } // apply the report: update reference coverage, translate SNVs to codon values, and count 'em all up - public void updateCounts( final MoleculeCounts moleculeCounts, - final CodonTracker codonTracker, - final HopscotchMap variationCounts, - final Reference reference ) { + public ReportType updateCounts( final CodonTracker codonTracker, + final HopscotchMap variationCounts, + final Reference reference ) { final List refCoverage = getRefCoverage(); - if ( refCoverage.isEmpty() ) return; - + if ( refCoverage.isEmpty() ) { + return ReportType.LOW_QUALITY; + } if ( snvList == null ) { - moleculeCounts.bumpInconsistentPairs(); - return; + return ReportType.INCONSISTENT; } if ( snvList.stream().anyMatch( snv -> snv.getQuality() < minQ || "-ACGT".indexOf(snv.getVariantCall()) == -1) ) { - moleculeCounts.bumpNLowQualityVariant(); - return; + return ReportType.LOW_Q_VAR; } if ( !hasCleanFlanks(minFlankingLength, reference.getRefSeqLength()) ) { - moleculeCounts.bumpInsufficientFlank(); - return; + return ReportType.NO_FLANK; } final int coverage = reference.updateCoverage(refCoverage); @@ -1427,19 +1413,19 @@ public void updateCounts( final MoleculeCounts moleculeCounts, reference.updateSpan(totalCoverage); if ( snvList.isEmpty() ) { - moleculeCounts.bumpNWildType(); codonTracker.reportWildCodonCounts(totalCoverage); + return ReportType.WILD_TYPE; + } + + codonTracker.reportVariantCodonCounts(totalCoverage, codonTracker.encodeSNVsAsCodons(snvList)); + final SNVCollectionCount newVal = new SNVCollectionCount(snvList, coverage); + final SNVCollectionCount oldVal = variationCounts.find(newVal); + if ( oldVal != null ) { + oldVal.bumpCount(coverage); } else { - moleculeCounts.bumpCalledVariant(); - codonTracker.reportVariantCodonCounts(totalCoverage, codonTracker.encodeSNVsAsCodons(snvList)); - final SNVCollectionCount newVal = new SNVCollectionCount(snvList, coverage); - final SNVCollectionCount oldVal = variationCounts.find(newVal); - if ( oldVal != null ) { - oldVal.bumpCount(coverage); - } else { - variationCounts.add(newVal); - } + variationCounts.add(newVal); } + return ReportType.CALLED_VARIANT; } private static List combineCoverage( final ReadReport report1, final ReadReport report2 ) { @@ -1558,30 +1544,41 @@ private static List combineVariations( final ReadReport report1, final Read } @VisibleForTesting static ReadReport getReadReport( final GATKRead read ) { - readCounts.bumpNReads(); - readCounts.addBaseCalls(read.getLength()); + totalBaseCalls += read.getLength(); if ( read.isUnmapped() || read.isDuplicate() || read.failsVendorQualityCheck() ) { - readCounts.bumpNReadsUnmapped(); - return ReadReport.NULL_REPORT; + return rejectRead(read, ReportType.UNMAPPED); } - final Interval trim = calculateTrim(read.getBaseQualitiesNoCopy()); - if ( trim.size() == 0 ) { - readCounts.bumpNLowQualityReads(); - return ReadReport.NULL_REPORT; + Interval trim = calculateTrim(read); + if ( trim.size() < minLength ) { + return rejectRead(read, ReportType.LOW_QUALITY); } final ReadReport readReport = new ReadReport(read, trim, reference.getRefSeq()); if ( readReport.getRefCoverage().isEmpty() ) { - readCounts.bumpNLowQualityReads(); - } else { - readCounts.bumpNReadsEvaluable(); + return rejectRead(read, ReportType.LOW_QUALITY); } + + readCounts.bumpCount(ReportType.EVALUABLE); return readReport; } - @VisibleForTesting static Interval calculateTrim( final byte[] quals ) { + private static ReadReport rejectRead( final GATKRead read, final ReportType reportType ) { + readCounts.bumpCount(reportType); + if ( rejectedReadsBAMWriter != null ) { + read.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue); + rejectedReadsBAMWriter.addRead(read); + } + return ReadReport.NULL_REPORT; + } + + private static Interval calculateTrim( final GATKRead read ) { + return calculateShortFragmentTrim(read, calculateQualityTrim(read.getBaseQualitiesNoCopy())); + } + + //* find first and last minLength consecutive bases with quality scores at least as big as minQ */ + @VisibleForTesting static Interval calculateQualityTrim( final byte[] quals ) { // find initial end-trim int readStart = 0; int hiQCount = 0; @@ -1614,21 +1611,80 @@ private static List combineVariations( final ReadReport report1, final Read return new Interval(readStart, readEnd); } - @VisibleForTesting static void updateCountsForPair( final ReadReport report1, final ReadReport report2 ) { + /** if properly paired, and fragment length is less than read length, trim read ends to fragment length */ + private static Interval calculateShortFragmentTrim( final GATKRead read, final Interval qualityTrim ) { + if ( qualityTrim.size() < minLength ) return qualityTrim; + + // don't process past end of fragment when fragment length < read length + if ( read.isProperlyPaired() ) { + final int fragmentLength = Math.abs(read.getFragmentLength()); + if ( read.isReverseStrand() ) { // the read has been RC'd: trim its beginning if necessary + final int minStart = read.getLength() - fragmentLength; + if ( qualityTrim.getStart() < minStart ) { + if ( qualityTrim.getEnd() - minStart < minLength ) { // if there are too few calls left + return Interval.NULL_INTERVAL; + } + return new Interval(minStart, qualityTrim.getEnd()); + } + } else if ( fragmentLength < qualityTrim.getEnd() ) { // trim the end of the read, if necessary + if ( fragmentLength - qualityTrim.getStart() < minLength ) { // if there are too few calls left + return Interval.NULL_INTERVAL; + } + return new Interval(qualityTrim.getStart(), fragmentLength); + } + } + return qualityTrim; + } + + @VisibleForTesting static void updateCountsForPair( final GATKRead read1, final ReadReport report1, + final GATKRead read2, final ReadReport report2 ) { if ( report1.getRefCoverage().isEmpty() ) { - report2.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference); + if ( !report2.getRefCoverage().isEmpty() ) { + processReport(read2, report2, disjointPairCounts); + } } else if ( report2.getRefCoverage().isEmpty() ) { - report1.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference); + processReport(read1, report1, disjointPairCounts); } else { final int overlapStart = Math.max(report1.getFirstRefIndex(), report2.getFirstRefIndex()); final int overlapEnd = Math.min(report1.getLastRefIndex(), report2.getLastRefIndex()); if ( overlapStart <= overlapEnd ) { // if mates overlap, or are adjacent final ReadReport combinedReport = new ReadReport(report1, report2); - combinedReport.updateCounts(moleculeCountsOverlappingPair, codonTracker, variationCounts, reference); - } else { - report1.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference); - report2.updateCounts(moleculeCountsDisjointPair, codonTracker, variationCounts, reference); + final ReportType reportType = combinedReport.updateCounts(codonTracker, variationCounts, reference); + overlappingPairCounts.bumpCount(reportType); + if ( reportType.attributeValue != null && rejectedReadsBAMWriter != null ) { + read1.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue); + rejectedReadsBAMWriter.addRead(read1); + read2.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue); + rejectedReadsBAMWriter.addRead(read2); + } + } else { // mates are disjoint + final ReportType ignoredMate = ReportType.IGNORED_MATE; + if ( read1.isFirstOfPair() ) { + processReport(read1, report1, disjointPairCounts); + if ( rejectedReadsBAMWriter != null ) { + read2.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, ignoredMate.attributeValue); + rejectedReadsBAMWriter.addRead(read2); + } + } else { + processReport(read2, report2, disjointPairCounts); + if ( rejectedReadsBAMWriter != null ) { + read1.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, ignoredMate.attributeValue); + rejectedReadsBAMWriter.addRead(read1); + } + } + disjointPairCounts.bumpCount(ignoredMate); } } } + + private static void processReport( final GATKRead read, + final ReadReport readReport, + final ReportTypeCounts counts ) { + final ReportType reportType = readReport.updateCounts(codonTracker, variationCounts, reference); + counts.bumpCount(reportType); + if ( reportType.attributeValue != null && rejectedReadsBAMWriter != null ) { + read.setAttribute(ReportType.REPORT_TYPE_ATTRIBUTE_KEY, reportType.attributeValue); + rejectedReadsBAMWriter.addRead(read); + } + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java index 946bcedfd3c..f8f6cccd8ca 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/AnalyzeSaturationMutagenesisUnitTest.java @@ -487,29 +487,34 @@ public void testPairedApplication() { new SNV(45, CALL_G, CALL_A, QUAL_30), new SNV(55, CALL_C, CALL_A, QUAL_30)); + final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(); + final List reads = + ArtificialReadUtils.createPair(header, "blahBlah", 150, 1, 151, true, false); + final GATKRead read1 = reads.get(0); + final GATKRead read2 = reads.get(1); final ReadReport report1 = new ReadReport(Collections.singletonList(new Interval(0, 32)), list1); final ReadReport report2 = new ReadReport(Collections.singletonList(new Interval(33, 60)), list2); - updateCountsForPair(report1, report2); + updateCountsForPair(read1, report1, read2, report2); // shouldn't be applied -- fails flanking bases test Assert.assertEquals(reference.countSpanners(0, 32), 0); Assert.assertEquals(reference.countSpanners(32, 33), 0); Assert.assertEquals(reference.countSpanners(33, 60), 0); - // should be applied separately + // only read1 should be applied minFlankingLength = 1; - updateCountsForPair(report1, report2); + updateCountsForPair(read1, report1, read2, report2); Assert.assertEquals(reference.countSpanners(0, 32), 1); Assert.assertEquals(reference.countSpanners(32, 33), 0); - Assert.assertEquals(reference.countSpanners(33, 60), 1); + Assert.assertEquals(reference.countSpanners(33, 60), 0); // should be applied as one final ReadReport report3 = new ReadReport(Collections.singletonList(new Interval(0, 35)), list1); final ReadReport report4 = new ReadReport(Collections.singletonList(new Interval(33, 60)), list2); - updateCountsForPair(report3, report4); + updateCountsForPair(read1, report3, read2, report4); Assert.assertEquals(reference.countSpanners(0, 32), 2); Assert.assertEquals(reference.countSpanners(32, 33), 1); - Assert.assertEquals(reference.countSpanners(33, 60), 2); + Assert.assertEquals(reference.countSpanners(33, 60), 1); } @Test @@ -518,20 +523,20 @@ public void testCalculateTrim() { final byte GOOD_Q = (byte)minQ; final byte[] quals = new byte[100]; Arrays.fill(quals, BAD_Q); - Assert.assertEquals(calculateTrim(quals).size(), 0); + Assert.assertEquals(calculateQualityTrim(quals).size(), 0); Arrays.fill(quals, GOOD_Q); - Assert.assertEquals(calculateTrim(quals), new Interval(0, 100)); + Assert.assertEquals(calculateQualityTrim(quals), new Interval(0, 100)); quals[minLength] = BAD_Q; - Assert.assertEquals(calculateTrim(quals), new Interval(0, 100)); + Assert.assertEquals(calculateQualityTrim(quals), new Interval(0, 100)); quals[minLength - 1] = BAD_Q; - Assert.assertEquals(calculateTrim(quals), new Interval(minLength + 1, 100)); + Assert.assertEquals(calculateQualityTrim(quals), new Interval(minLength + 1, 100)); quals[quals.length - minLength] = BAD_Q; - Assert.assertEquals(calculateTrim(quals), new Interval(minLength + 1, quals.length - minLength)); + Assert.assertEquals(calculateQualityTrim(quals), new Interval(minLength + 1, quals.length - minLength)); Arrays.fill(quals, GOOD_Q); for ( int idx = minLength - 1; idx < quals.length; idx += minLength ) { quals[idx] = BAD_Q; } - Assert.assertEquals(calculateTrim(quals).size(), 0); + Assert.assertEquals(calculateQualityTrim(quals).size(), 0); } @Test