From 9fe90428a680db552d288c2609732205f58d646e Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 12 Dec 2024 10:50:22 -0500 Subject: [PATCH] SamUtils PairOrientation is confusingly ambiguous about paired reads needing to map to the same contig (#1709) * - Improved documentation to clarify that PairOrientation (in SamUtils) pertains only to reads on the same contig. - Now getPairOrientation will throw an exception if thrown when the two reads are mapped to different contigs - Added tests to verify that the exception is thrown in the 4 cases specified. --- .../java/htsjdk/samtools/SamPairUtil.java | 37 +++++++++---- .../java/htsjdk/samtools/SamPairUtilTest.java | 55 +++++++++++++++++++ 2 files changed, 80 insertions(+), 12 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SamPairUtil.java b/src/main/java/htsjdk/samtools/SamPairUtil.java index 525684c28d..24d7d62609 100644 --- a/src/main/java/htsjdk/samtools/SamPairUtil.java +++ b/src/main/java/htsjdk/samtools/SamPairUtil.java @@ -45,6 +45,8 @@ public class SamPairUtil { * FR means the read that's mapped to the forward strand comes before the * read mapped to the reverse strand when their 5'-end coordinates are * compared. + * + * PairOrientation only makes sense for a pair of reads that are both mapped to the same contig/chromosome */ public static enum PairOrientation { @@ -57,31 +59,42 @@ public static enum PairOrientation /** * Computes the pair orientation of the given SAMRecord. - * @param r + * @param record the record for which the pair orientation is requested * @return PairOrientation of the given SAMRecord. * @throws IllegalArgumentException If the record is not a paired read, or - * one or both reads are unmapped. + * one or both reads are unmapped, or if the two records are not mapped to the + * same reference. + * + * NOTA BENE: This is NOT the orientation byte as used in Picard's MarkDuplicates. For that please look + * in ReadEnds (in Picard). */ - public static PairOrientation getPairOrientation(final SAMRecord r) + public static PairOrientation getPairOrientation(final SAMRecord record) { - final boolean readIsOnReverseStrand = r.getReadNegativeStrandFlag(); + final boolean readIsOnReverseStrand = record.getReadNegativeStrandFlag(); + + if(record.getReadUnmappedFlag() || !record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + throw new IllegalArgumentException("Invalid SAMRecord: " + record.getReadName() + + ". This method only works for SAMRecords that are paired reads with both reads aligned."); + } - if(r.getReadUnmappedFlag() || !r.getReadPairedFlag() || r.getMateUnmappedFlag()) { - throw new IllegalArgumentException("Invalid SAMRecord: " + r.getReadName() + ". This method only works for SAMRecords " + - "that are paired reads with both reads aligned."); + if (!record.getReferenceIndex().equals(record.getMateReferenceIndex())) { + throw new IllegalArgumentException("Invalid SAMRecord: " + record.getReadName() + + ". This method only works for SAMRecords that are paired reads with both reads aligned to the" + + " same reference. Found difference references:" + record.getReferenceName() + " and " + + record.getMateReferenceName() + "."); } - if(readIsOnReverseStrand == r.getMateNegativeStrandFlag() ) { + if(readIsOnReverseStrand == record.getMateNegativeStrandFlag() ) { return PairOrientation.TANDEM; } final long positiveStrandFivePrimePos = ( readIsOnReverseStrand - ? r.getMateAlignmentStart() //mate's 5' position ( x---> ) - : r.getAlignmentStart() ); //read's 5' position ( x---> ) + ? record.getMateAlignmentStart() //mate's 5' position ( x---> ) + : record.getAlignmentStart() ); //read's 5' position ( x---> ) final long negativeStrandFivePrimePos = ( readIsOnReverseStrand - ? r.getAlignmentEnd() //read's 5' position ( <---x ) - : r.getAlignmentStart() + r.getInferredInsertSize() ); //mate's 5' position ( <---x ) + ? record.getAlignmentEnd() //read's 5' position ( <---x ) + : record.getAlignmentStart() + record.getInferredInsertSize() ); //mate's 5' position ( <---x ) return ( positiveStrandFivePrimePos < negativeStrandFivePrimePos ? PairOrientation.FR diff --git a/src/test/java/htsjdk/samtools/SamPairUtilTest.java b/src/test/java/htsjdk/samtools/SamPairUtilTest.java index f5c288a544..0d5ae5a8af 100644 --- a/src/test/java/htsjdk/samtools/SamPairUtilTest.java +++ b/src/test/java/htsjdk/samtools/SamPairUtilTest.java @@ -30,6 +30,7 @@ import org.testng.annotations.Test; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; @@ -49,6 +50,12 @@ public void testGetPairOrientation(final String testName, Assert.assertEquals(SamPairUtil.getPairOrientation(rec2), expectedOrientation, testName + " second end"); } + @Test(dataProvider = "testGetPairOrientationFail", expectedExceptions = IllegalArgumentException.class) + public void testGetPairOrientationFail(final SAMRecord record) { + SamPairUtil.getPairOrientation(record); + } + + @Test(dataProvider = "testSetMateInfoMateCigar") public void testSetMateInfoMateCigar(final String testName, final int read1Start, final boolean read1Reverse, final String read1Cigar, @@ -203,6 +210,54 @@ public Object[][] testGetPairOrientationDataProvider() { }; } + @DataProvider(name = "testGetPairOrientationFail") + public Iterator testGetPairOrientationFailDataProvider() { + /** + * @param testName + * @param read1Start + * @param read1Length + * @param read1Reverse + * @param read2Start + * @param read2Length + * @param read2Reverse + */ + + List tests=new ArrayList<>(); + final SAMFileHeader header = new SAMFileHeader(); + header.addSequence(new SAMSequenceRecord("chr1", 100000000)); + header.addSequence(new SAMSequenceRecord("chr2", 100000000)); + { + final SAMRecord rec = makeSamRecord(header, 50, 50, false, true); + rec.setReadName("Unpaired"); + rec.setReadPairedFlag(false); + tests.add(new Object[]{rec}); + } + { + final SAMRecord rec = makeSamRecord(header, 50, 50, false, true); + rec.setReadName("Unmapped"); + rec.setReadPairedFlag(true); + rec.setReadUnmappedFlag(true); + tests.add(new Object[]{rec}); + } + { + final SAMRecord rec = makeSamRecord(header, 50, 50, false, true); + rec.setReadName("Unmapped mate"); + rec.setReadPairedFlag(true); + rec.setReferenceIndex(0); + rec.setMateUnmappedFlag(true); + tests.add(new Object[]{rec}); + } + { + final SAMRecord rec = makeSamRecord(header, 50, 50, false, true); + rec.setReadName("mate on difference reference"); + rec.setReadPairedFlag(true); + rec.setReferenceIndex(0); + rec.setMateReferenceIndex(1); + tests.add(new Object[]{rec}); + } + return tests.iterator(); + } + @DataProvider(name = "testSetMateInfoMateCigar") public Object[][] testSetMateInfoMateCigarDataProvider() { /**