Skip to content

Commit

Permalink
Fix bug in merge_rels preventing a match in one read from compensatin…
Browse files Browse the repository at this point in the history
…g for ambiguity in the other; add unit tests
  • Loading branch information
matthewfallan committed Feb 28, 2024
1 parent 17655f8 commit c18df35
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 35 deletions.
2 changes: 1 addition & 1 deletion src/seismicrna/core/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

logger = getLogger(__name__)

__version__ = "0.14.0"
__version__ = "0.14.1"

VERSION_DELIM = "."

Expand Down
56 changes: 35 additions & 21 deletions src/seismicrna/relate/py/relate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def __repr__(self):


class SamRead(object):
""" One read in a SAM file. """

# Define __slots__ to improve speed and memory performance.
__slots__ = "qname", "flag", "rname", "pos", "mapq", "cigar", "seq", "qual"

Expand All @@ -74,10 +76,10 @@ def __init__(self, line: str):

def __str__(self):
attrs = {attr: getattr(self, attr) for attr in self.__slots__[1:]}
return f"Read '{self.qname}' {attrs}"
return f"Read {repr(self.qname)} {attrs}"


def find_rels_read(read: SamRead, refseq: DNA, min_qual: str, ambrel: bool):
def _find_rels_read(read: SamRead, refseq: DNA, min_qual: str, ambrel: bool):
"""
Find the relationships between a read and a reference.
Expand Down Expand Up @@ -223,7 +225,7 @@ def find_rels_read(read: SamRead, refseq: DNA, min_qual: str, ambrel: bool):
return read.pos, ref_pos, dict(rels)


def validate_read(read: SamRead, ref: str, min_mapq: int):
def _validate_read(read: SamRead, ref: str, min_mapq: int):
if read.rname != ref:
raise ValueError(f"Read {repr(read.qname)} mapped to a reference named "
f"{repr(read.rname)} but is in an alignment map file "
Expand All @@ -233,36 +235,49 @@ def validate_read(read: SamRead, ref: str, min_mapq: int):
f"{read.mapq}, less than the minimum of {min_mapq}")


def validate_pair(read1: SamRead, read2: SamRead):
def _validate_pair(read1: SamRead, read2: SamRead):
""" Ensure that reads 1 and 2 are compatible mates. """
if not read1.flag.paired:
raise RelateValueError(f"Read 1 ({read1.qname}) was not paired, "
f"but read 2 ('{read2.qname}') was given")
f"but read 2 ({read2.qname}) was given")
if not read2.flag.paired:
raise RelateValueError(f"Read 2 ({read2.qname}) was not paired, "
f"but read 1 ({read1.qname}) was given")
if read1.qname != read2.qname:
raise RelateValueError(f"Reads 1 ({read1.qname}) and 2 "
f"({read2.qname}) had different names")
raise RelateValueError(f"Got different names for reads "
f"1 ({read1.qname}) and 2 ({read2.qname})")
if not (read1.flag.first and read2.flag.second):
raise RelateValueError(f"Read '{read1.qname}' had mate 1 "
raise RelateValueError(f"Read {repr(read1.qname)} had mate 1 "
f"labeled {2 - read1.flag.first} and mate 2 "
f"labeled {1 + read2.flag.second}")
if read1.flag.rev == read2.flag.rev:
raise RelateValueError(f"Read '{read1.qname}' had "
raise RelateValueError(f"Read {repr(read1.qname)} had "
"mates 1 and 2 facing the same way")


def merge_ends(end51: int, end31: int, end52: int, end32: int):
def _merge_ends(end51: int, end31: int, end52: int, end32: int):
return (min(end51, end52),
max(end51, end52),
min(end31, end32),
max(end31, end32))


def merge_rels(rels1: dict[int, int], rels2: dict[int, int]):
return {ref_idx: rels1.get(ref_idx, NOCOV) & rels2.get(ref_idx, NOCOV)
for ref_idx in rels1 | rels2}
def _merge_rels(end51: int,
end31: int,
rels1: dict[int, int],
end52: int,
end32: int,
rels2: dict[int, int]):
merged = dict()
for pos in rels1 | rels2:
rel1 = rels1.get(pos, MATCH if end51 <= pos <= end31 else NOCOV)
rel2 = rels2.get(pos, MATCH if end52 <= pos <= end32 else NOCOV)
rel = rel1 & rel2
if rel != MATCH:
if rel == NOCOV:
raise ValueError(f"Cannot merge two blanks at position {pos}")
merged[pos] = rel
return merged


def find_rels_line(line1: str,
Expand All @@ -273,20 +288,19 @@ def find_rels_line(line1: str,
qmin: str,
ambrel: bool):
read1 = SamRead(line1)
validate_read(read1, ref, min_mapq)
end5, end3, rels = find_rels_read(read1, refseq, qmin, ambrel)
_validate_read(read1, ref, min_mapq)
end5, end3, rels = _find_rels_read(read1, refseq, qmin, ambrel)
if line2:
read2 = SamRead(line2)
validate_read(read2, ref, min_mapq)
validate_pair(read1, read2)
end52, end32, rels2 = find_rels_read(read2, refseq, qmin, ambrel)
end5, mid5, mid3, end3 = merge_ends(end5, end3, end52, end32)
rels = merge_rels(rels, rels2)
_validate_read(read2, ref, min_mapq)
_validate_pair(read1, read2)
end52, end32, rels2 = _find_rels_read(read2, refseq, qmin, ambrel)
rels = _merge_rels(end5, end3, rels, end52, end32, rels2)
end5, mid5, mid3, end3 = _merge_ends(end5, end3, end52, end32)
else:
mid5, mid3 = end5, end3
return read1.qname, end5, mid5, mid3, end3, rels


########################################################################
# #
# © Copyright 2024, the Rouskin Lab. #
Expand Down
134 changes: 121 additions & 13 deletions src/seismicrna/relate/py/tests/relate_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import unittest as ut

from ..relate import find_rels_line
from ...aux.iterread import iter_alignments
from ....align.sim import as_sam
from ....core.arg import opt_min_mapq
from ....core.ngs import OK_QUAL
from ....core.seq import DNA
from seismicrna.core.rel import IRREC, MATCH, NOCOV
from seismicrna.relate.py.relate import find_rels_line, _merge_rels
from seismicrna.relate.aux.iterread import iter_alignments
from seismicrna.align.sim import as_sam
from seismicrna.core.arg import opt_min_mapq
from seismicrna.core.ngs import OK_QUAL
from seismicrna.core.seq import DNA


class TestRelateRelateLineAmbrel(ut.TestCase):
Expand Down Expand Up @@ -48,19 +49,19 @@ def iter_cases(self, refseq: DNA, max_ins: int = 2):
max_ins,
max_ins,
max_ins):
name, end5_, mid5, mid3, end3_, rels_ = self.relate("ref",
refseq,
read,
qual,
self.MAPQ,
cigar,
end5)
with self.subTest(refseq=refseq,
read=read,
qual=qual,
end5=end5,
cigar=cigar,
rels=rels):
name, end5_, mid5, mid3, end3_, rels_ = self.relate("ref",
refseq,
read,
qual,
self.MAPQ,
cigar,
end5)
self.assertEqual(end5_, end5)
self.assertEqual(mid5, end5)
self.assertEqual(mid3, end3)
Expand All @@ -85,6 +86,113 @@ def test_acgt_1ins(self):
""" Test all possible reads with ≤ 1 insertion from ACGT. """
self.iter_cases(DNA("ACGT"), 1)


class TestMergeRels(ut.TestCase):

def test_empty(self):
result = _merge_rels(1, 10, {}, 1, 10, {})
expect = {}
self.assertEqual(result, expect)

def test_read1(self):
end51 = 1
end31 = 20
end52 = 11
end32 = 30
for pos in range(end51, end31 + 1):
for rel in range(MATCH + 1, NOCOV):
result = _merge_rels(end51, end31, {pos: rel}, end52, end32, {})
if end52 <= pos <= end32:
# The relationship can be compensated by read 2.
if rel & MATCH:
# The match in read 2 compensated.
expect = {}
else:
# The match in read 2 is irreconcilable.
expect = {pos: IRREC}
else:
# Read 2 cannot compensate.
expect = {pos: rel}
self.assertEqual(result, expect)

def test_read2(self):
end51 = 1
end31 = 20
end52 = 11
end32 = 30
for pos in range(end52, end32 + 1):
for rel in range(MATCH + 1, NOCOV):
result = _merge_rels(end51, end31, {}, end52, end32, {pos: rel})
if end51 <= pos <= end31:
# The relationship can be compensated by read 1.
if rel & MATCH:
# The match in read 1 compensated.
expect = {}
else:
# The match in read 1 is irreconcilable.
expect = {pos: IRREC}
else:
# Read 1 cannot compensate.
expect = {pos: rel}
self.assertEqual(result, expect)

def test_both_reads(self):
end51 = 1
end31 = 2
end52 = 2
end32 = 3
for pos1 in range(end51, end31 + 1):
for rel1 in range(MATCH + 1, NOCOV):
rels1 = {pos1: rel1}
for pos2 in range(end52, end32 + 1):
for rel2 in range(MATCH + 1, NOCOV):
rels2 = {pos2: rel2}
with self.subTest(pos1=pos1, rel1=rel1,
pos2=pos2, rel2=rel2):
result = _merge_rels(end51, end31, rels1,
end52, end32, rels2)
if pos1 == pos2:
merged = rel1 & rel2
if merged == MATCH:
expect = {}
else:
expect = {pos1: merged}
else:
expect = dict()
merged1 = rel1 & MATCH if end52 <= pos1 <= end32 else rel1
if merged1 != MATCH:
expect[pos1] = merged1
merged2 = rel2 & MATCH if end51 <= pos2 <= end31 else rel2
if merged2 != MATCH:
expect[pos2] = merged2
self.assertEqual(result, expect)

def test_both_blank(self):
end51 = 1
end31 = 2
end52 = 2
end32 = 3
for pos1 in range(end51, end31 + 1):
rels1 = {pos1: NOCOV}
for pos2 in range(end52, end32 + 1):
rels2 = {pos2: NOCOV}
with self.subTest(pos1=pos1, pos2=pos2):
if end52 <= pos1 <= end32:
error = pos2
else:
error = pos1
self.assertRaisesRegex(
ValueError,
f"Cannot merge two blanks at position {error}",
_merge_rels,
end51, end31, rels1,
end52, end32, rels2,
)


if __name__ == "__main__":
ut.main()

########################################################################
# #
# © Copyright 2024, the Rouskin Lab. #
Expand Down

0 comments on commit c18df35

Please sign in to comment.