Skip to content

Commit

Permalink
Added setIsUnplaced() to the GATKRead api to distinguish reads with n…
Browse files Browse the repository at this point in the history
…o mapping information (#5320)

* added new isUnplaced()/setIsUnplaced methods to GATKRead to allow querying and removing the assigned positions of unmapped reads
  • Loading branch information
jamesemery authored and lbergelson committed Nov 20, 2018
1 parent b7c0560 commit 9acddab
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,21 @@ default int numCigarElements(){
*/
void setIsUnmapped();

/**
* Does the read have a position assigned to it for sorting purposes.
* @return `true iff this read has no assigned position or contig.
*/
boolean isUnplaced();

/**
* Mark the read as unmapped, and also removes mapping information from the read (i.e. sets contig to {@link ReadConstants#UNSET_CONTIG} and position to {@link ReadConstants#UNSET_POSITION}).
*
* NOTE: this does not remove the cigar string from the read, use {@link #setCigar(Cigar)}
*
* To mark a read as mapped, use {@link #setPosition}
*/
void setIsUnplaced();

/**
* @return True if this read's mate is unmapped (this includes mates that have a position but are explicitly marked as unmapped,
* as well as mates that lack a fully-defined position but are not explicitly marked as unmapped). Otherwise false.
Expand All @@ -477,7 +492,7 @@ default int numCigarElements(){
boolean mateIsUnmapped();

/**
* Mark the read's mate as unmapped (lacking a defined position on the genome).
* Mark the read's mate as unmapped (lacking a defined position on the genome). (i.e. sets mate contig to {@link ReadConstants#UNSET_CONTIG} and position to {@link ReadConstants#UNSET_POSITION}).
*
* To mark the read's mate as mapped, use {@link #setMatePosition}
*
Expand All @@ -486,6 +501,23 @@ default int numCigarElements(){
*/
void setMateIsUnmapped();

/**
* Does the reads mate have a position assigned to it for sorting purposes..
* @return `true iff this reads mate has no assigned position or contig.
*/
boolean mateIsUnplaced();

/**
* Mark the read's mate as unmapped (lacking a defined position on the genome). In contrast with {@link #setMateIsUnmapped},
* this method will revert the mapping information for the mate (i.e. sets the mate's contig to "*" and position to 0).
*
* To mark the read's mate as mapped, use {@link #setMatePosition}
*
* Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired}
* were invoked with true.
*/
void setMateIsUnplaced();

/**
* @return True if this read is on the reverse strand as opposed to the forward strand, otherwise false.
* @throws GATKException.MissingReadField if this information is not available
Expand Down Expand Up @@ -715,6 +747,12 @@ default int numCigarElements(){
*/
String getSAMString();

/**
* Modify this read by reverse complementing its bases and reversing its quality scores. Implementations may
* also update tags that are known to need updating after the reverse complement operation.
*/
public void reverseComplement();

/**
* A human-digestable representation of the read.
* NOTE: java will not let us have a default method override toString so we need this dance. Subclasses should override toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,30 @@ public boolean isUnmapped() {
samRecord.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START;
}

@Override
public boolean isUnplaced() {
return samRecord.getReferenceName() == null || samRecord.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) ||
samRecord.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START;
}

@Override
public void setIsUnmapped() {
clearCachedValues();

samRecord.setReadUnmappedFlag(true);
}


@Override
public void setIsUnplaced() {
clearCachedValues();

samRecord.setReadUnmappedFlag(true);
samRecord.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
samRecord.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
samRecord.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
}

@Override
public boolean mateIsUnmapped() {
Utils.validate(isPaired(), "Cannot get mate information for an unpaired read");
Expand All @@ -439,6 +456,14 @@ public boolean mateIsUnmapped() {
samRecord.getMateAlignmentStart() == SAMRecord.NO_ALIGNMENT_START;
}

@Override
public boolean mateIsUnplaced() {
Utils.validate(isPaired(), "Cannot get mate information for an unpaired read");

return samRecord.getMateReferenceName() == null || samRecord.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) ||
samRecord.getMateAlignmentStart() == SAMRecord.NO_ALIGNMENT_START;
}

@Override
public void setMateIsUnmapped() {
clearCachedValues();
Expand All @@ -449,6 +474,19 @@ public void setMateIsUnmapped() {
samRecord.setMateUnmappedFlag(true);
}

@Override
public void setMateIsUnplaced() {
clearCachedValues();

// Calling this method has the side effect of marking the read as paired.
setIsPaired(true);

samRecord.setMateUnmappedFlag(true);
samRecord.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
samRecord.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
}


@Override
public boolean isReverseStrand() {
return samRecord.getReadNegativeStrandFlag();
Expand Down Expand Up @@ -677,6 +715,13 @@ public String getSAMString() {
return samRecord.getSAMString();
}

@Override
public void reverseComplement() {
clearCachedValues();

samRecord.reverseComplement(true);
}

@Override
public SAMRecord convertToSAMRecord( final SAMFileHeader header ) {
samRecord.setHeaderStrict(header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1294,4 +1294,91 @@ public void testDeepCopyPosition(final GATKRead read) {

Assert.assertEquals(read, read.deepCopy());
}

@DataProvider
public Object[][] unmappedReadDataProvider() {
List<Object[]> testCases = new ArrayList<>();

SAMRecord unmappedUnplacedRead = new SAMRecord(hg19Header);
SAMRecord unmappedPlacedRead = new SAMRecord(hg19Header);
SAMRecord mappedUnplacedRead = new SAMRecord(hg19Header);
SAMRecord mappedPlacedRead = new SAMRecord(hg19Header);

unmappedUnplacedRead.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
unmappedUnplacedRead.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
unmappedUnplacedRead.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
unmappedUnplacedRead.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
unmappedUnplacedRead.setReadUnmappedFlag(true);
unmappedUnplacedRead.setMateUnmappedFlag(true);
unmappedUnplacedRead.setReadPairedFlag(true);
testCases.add(new Object[]{new SAMRecordToGATKReadAdapter(unmappedUnplacedRead), true, true});


unmappedPlacedRead.setReferenceIndex(0);
unmappedPlacedRead.setMateReferenceIndex(0);
unmappedPlacedRead.setAlignmentStart(100);
unmappedPlacedRead.setMateAlignmentStart(100);
unmappedPlacedRead.setReadUnmappedFlag(true);
unmappedPlacedRead.setMateUnmappedFlag(true);
unmappedPlacedRead.setReadPairedFlag(true);
testCases.add(new Object[]{new SAMRecordToGATKReadAdapter(unmappedPlacedRead), true, false});


mappedUnplacedRead.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
mappedUnplacedRead.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
mappedUnplacedRead.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
mappedUnplacedRead.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
mappedUnplacedRead.setReadUnmappedFlag(false);
mappedUnplacedRead.setMateUnmappedFlag(false);
mappedUnplacedRead.setReadPairedFlag(true);
testCases.add(new Object[]{new SAMRecordToGATKReadAdapter(mappedUnplacedRead), true, true}); // Unmapped is true here because it tests alignment info second


mappedPlacedRead.setReferenceIndex(0);
mappedPlacedRead.setMateReferenceIndex(0);
mappedPlacedRead.setAlignmentStart(100);
mappedPlacedRead.setMateAlignmentStart(100);
mappedPlacedRead.setReadUnmappedFlag(false);
mappedPlacedRead.setMateUnmappedFlag(false);
mappedPlacedRead.setReadPairedFlag(true);
testCases.add(new Object[]{new SAMRecordToGATKReadAdapter(mappedPlacedRead), false, false});

return testCases.toArray(new Object[][]{});
}

@Test(dataProvider="unmappedReadDataProvider")
public void testUnmappedUnpairedDistinction(final GATKRead read, final boolean unmapped, final boolean unplaced) {
Assert.assertEquals(read.isUnmapped(), unmapped);
Assert.assertEquals(read.isUnplaced(), unplaced);

read.setIsUnmapped();

Assert.assertEquals(read.isUnmapped(), true);
Assert.assertEquals(read.isUnplaced(), unplaced);

read.setIsUnplaced();
Assert.assertEquals(read.isUnmapped(), true);
Assert.assertEquals(read.isUnplaced(), true);

read.convertToSAMRecord(hg19Header).setReadUnmappedFlag(false); // have to unset unmapped to allow SAMRecord to show its underlying data
Assert.assertEquals(read.convertToSAMRecord(hg19Header).getStart(), SAMRecord.NO_ALIGNMENT_START);
Assert.assertEquals(read.convertToSAMRecord(hg19Header).getMappingQuality(), SAMRecord.NO_MAPPING_QUALITY);
}

@Test(dataProvider="unmappedReadDataProvider")
public void testUnmappedUnpairedDistinctionmates(final GATKRead read, final boolean unmapped, final boolean unplaced) {
Assert.assertEquals(read.mateIsUnmapped(), unmapped);
Assert.assertEquals(read.mateIsUnplaced(), unplaced);

read.setMateIsUnmapped();

Assert.assertEquals(read.mateIsUnmapped(), true);
Assert.assertEquals(read.mateIsUnplaced(), unplaced);

read.setMateIsUnplaced();
Assert.assertEquals(read.mateIsUnmapped(), true);
Assert.assertEquals(read.mateIsUnplaced(), true);

read.convertToSAMRecord(hg19Header).setReadUnmappedFlag(false); // have to unset unmapped to allow SAMRecord to show its underlying data
Assert.assertEquals(read.convertToSAMRecord(hg19Header).getMateAlignmentStart(), SAMRecord.NO_ALIGNMENT_START); }
}

0 comments on commit 9acddab

Please sign in to comment.