Skip to content

Commit

Permalink
also handle iupac codes in ReverseComplement()
Browse files Browse the repository at this point in the history
  • Loading branch information
aryarm authored Nov 2, 2024
1 parent 8952dcf commit c1f78ee
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 13 deletions.
12 changes: 12 additions & 0 deletions trtools/utils/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,25 @@ def test_GetCanonicalOneStrand():
assert(utils.GetCanonicalOneStrand("TTGTT")=="GTTTT")
assert(utils.GetCanonicalOneStrand("")=="")
assert(utils.GetCanonicalOneStrand("at")=="AT")
# Additional tests with IUPAC codes
assert(utils.GetCanonicalOneStrand("RY")=="RY")
assert(utils.GetCanonicalOneStrand("YR")=="RY")
assert(utils.GetCanonicalOneStrand("SW")=="SW")
assert(utils.GetCanonicalOneStrand("WS")=="SW")
assert(utils.GetCanonicalOneStrand("KM")=="KM")
assert(utils.GetCanonicalOneStrand("MK")=="KM")

# ReverseComplement
def test_ReverseComplement():
assert(utils.ReverseComplement("CGAT")=="ATCG")
assert(utils.ReverseComplement("")=="")
assert(utils.ReverseComplement("CGNT")=="ANCG")
assert(utils.ReverseComplement("ccga")=="TCGG")
# additional tests with IUPAC codes
assert(utils.ReverseComplement("RYASWKM")=="KMWSTRY")
# also test the characters that don't change
assert(utils.ReverseComplement("BDHV")=="BDHV")
assert(utils.ReverseComplement("N")=="N")

# InferRepeatSequence
def test_InferRepeatSequence():
Expand Down
24 changes: 11 additions & 13 deletions trtools/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def GetCanonicalOneStrand(repseq):
def ReverseComplement(seq):
r"""Get reverse complement of a sequence.
Converts everything to uppsercase.
Converts everything to uppercase and handles IUPAC codes.
Parameters
----------
Expand All @@ -444,21 +444,19 @@ def ReverseComplement(seq):
--------
>>> ReverseComplement("AGGCT")
'AGCCT'
>>> ReverseComplement("AGGCTRY")
'RAGCCT'
"""
iupac_complement = {
'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W',
'K': 'M', 'M': 'K', 'B': 'V', 'D': 'H',
'H': 'D', 'V': 'B', 'N': 'N'
}
seq = seq.upper()
newseq = ""
size = len(seq)
for i in range(len(seq)):
char = seq[len(seq)-i-1]
if char == "A":
newseq += "T"
elif char == "G":
newseq += "C"
elif char == "C":
newseq += "G"
elif char == "T":
newseq += "A"
else: newseq += "N"
for char in reversed(seq):
newseq += iupac_complement.get(char, 'N')
return newseq

def InferRepeatSequence(seq, period):
Expand Down

0 comments on commit c1f78ee

Please sign in to comment.