From af86c4b8e4d50936d35190fab8de8aab896c1e3c Mon Sep 17 00:00:00 2001
From: Arya Massarat <23412689+aryarm@users.noreply.github.com>
Date: Tue, 22 Oct 2024 16:23:52 -0700
Subject: [PATCH 1/4] remove nucToNumber in GetCanonicalMotif

---
 trtools/utils/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py
index f7328f81..f5a02284 100644
--- a/trtools/utils/utils.py
+++ b/trtools/utils/utils.py
@@ -387,9 +387,9 @@ def GetCanonicalMotif(repseq):
     repseq_r = GetCanonicalOneStrand(ReverseComplement(repseq))
     # choose first seq alphabetically
     for i in range(len(repseq_f)):
-        if nucToNumber[repseq_f[i]] < nucToNumber[repseq_r[i]]:
+        if repseq_f[i] < repseq_r[i]:
             return repseq_f
-        if nucToNumber[repseq_r[i]] < nucToNumber[repseq_f[i]]:
+        if repseq_r[i] < repseq_f[i]:
             return repseq_r
     return repseq_f
 
@@ -420,9 +420,9 @@ def GetCanonicalOneStrand(repseq):
     for i in range(size):
         newseq = repseq[size-i:]+repseq[0:size-i]
         for j in range(size):
-            if nucToNumber[newseq[j]] < nucToNumber[canonical[j]]:
+            if newseq[j] < canonical[j]:
                 canonical = newseq
-            elif nucToNumber[newseq[j]] > nucToNumber[canonical[j]]:
+            elif newseq[j] > canonical[j]:
                 break
     return canonical
 

From 66bf81f8271da2eced2a088f59b2c750eae1ebd2 Mon Sep 17 00:00:00 2001
From: Arya Massarat <23412689+aryarm@users.noreply.github.com>
Date: Tue, 22 Oct 2024 23:39:57 +0000
Subject: [PATCH 2/4] remove nucToNumber and add tests for IUPAC codes

---
 trtools/utils/tests/test_utils.py | 2 ++
 trtools/utils/utils.py            | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/trtools/utils/tests/test_utils.py b/trtools/utils/tests/test_utils.py
index 12097042..ec0c4d37 100644
--- a/trtools/utils/tests/test_utils.py
+++ b/trtools/utils/tests/test_utils.py
@@ -115,6 +115,8 @@ def test_GetCanonicalMotif():
     assert(utils.GetCanonicalMotif("TTGTT")=="AAAAC")
     assert(utils.GetCanonicalMotif("")=="")
     assert(utils.GetCanonicalMotif("cag")=="AGC")
+    assert(utils.GetCanonicalMotif("AARRG")=="AARRG")
+    assert(utils.GetCanonicalMotif("YARRG")=="ARRGY")
 
 # GetCanonicalOneStrand
 def test_GetCanonicalOneStrand():
diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py
index f5a02284..602845f3 100644
--- a/trtools/utils/utils.py
+++ b/trtools/utils/utils.py
@@ -14,7 +14,6 @@
 
 import trtools.utils.common as common # pragma: no cover
 
-nucToNumber={"A":0,"C":1,"G":2,"T":3}
 
 def LoadSingleReader(
         vcf_loc: str,

From c1f78eeb73393a9642398911b9c585496b173af4 Mon Sep 17 00:00:00 2001
From: Arya Massarat <23412689+aryarm@users.noreply.github.com>
Date: Sat, 2 Nov 2024 05:23:01 +0000
Subject: [PATCH 3/4] also handle iupac codes in ReverseComplement()

---
 trtools/utils/tests/test_utils.py | 12 ++++++++++++
 trtools/utils/utils.py            | 24 +++++++++++-------------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/trtools/utils/tests/test_utils.py b/trtools/utils/tests/test_utils.py
index ec0c4d37..9ae4f39a 100644
--- a/trtools/utils/tests/test_utils.py
+++ b/trtools/utils/tests/test_utils.py
@@ -128,6 +128,13 @@ def test_GetCanonicalOneStrand():
     assert(utils.GetCanonicalOneStrand("TTGTT")=="GTTTT")
     assert(utils.GetCanonicalOneStrand("")=="")
     assert(utils.GetCanonicalOneStrand("at")=="AT")
+    # Additional tests with IUPAC codes
+    assert(utils.GetCanonicalOneStrand("RY")=="RY")
+    assert(utils.GetCanonicalOneStrand("YR")=="RY")
+    assert(utils.GetCanonicalOneStrand("SW")=="SW")
+    assert(utils.GetCanonicalOneStrand("WS")=="SW")
+    assert(utils.GetCanonicalOneStrand("KM")=="KM")
+    assert(utils.GetCanonicalOneStrand("MK")=="KM")
 
 # ReverseComplement
 def test_ReverseComplement():
@@ -135,6 +142,11 @@ def test_ReverseComplement():
     assert(utils.ReverseComplement("")=="")
     assert(utils.ReverseComplement("CGNT")=="ANCG")
     assert(utils.ReverseComplement("ccga")=="TCGG")
+    # additional tests with IUPAC codes
+    assert(utils.ReverseComplement("RYASWKM")=="KMWSTRY")
+    # also test the characters that don't change
+    assert(utils.ReverseComplement("BDHV")=="BDHV")
+    assert(utils.ReverseComplement("N")=="N")
 
 # InferRepeatSequence
 def test_InferRepeatSequence():
diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py
index 602845f3..71ef86af 100644
--- a/trtools/utils/utils.py
+++ b/trtools/utils/utils.py
@@ -428,7 +428,7 @@ def GetCanonicalOneStrand(repseq):
 def ReverseComplement(seq):
     r"""Get reverse complement of a sequence.
 
-    Converts everything to uppsercase.
+    Converts everything to uppercase and handles IUPAC codes.
 
     Parameters
     ----------
@@ -444,21 +444,19 @@ def ReverseComplement(seq):
     --------
     >>> ReverseComplement("AGGCT")
     'AGCCT'
+    >>> ReverseComplement("AGGCTRY")
+    'RAGCCT'
     """
+    iupac_complement = {
+        'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
+        'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W',
+        'K': 'M', 'M': 'K', 'B': 'V', 'D': 'H',
+        'H': 'D', 'V': 'B', 'N': 'N'
+    }
     seq = seq.upper()
     newseq = ""
-    size = len(seq)
-    for i in range(len(seq)):
-        char = seq[len(seq)-i-1]
-        if char == "A":
-            newseq += "T"
-        elif char == "G":
-            newseq += "C"
-        elif char == "C":
-            newseq += "G"
-        elif char == "T":
-            newseq += "A"
-        else: newseq += "N"
+    for char in reversed(seq):
+        newseq += iupac_complement.get(char, 'N')
     return newseq
 
 def InferRepeatSequence(seq, period):

From 916bdc0737b9b04b611ded32dea05bc444bf36c8 Mon Sep 17 00:00:00 2001
From: Arya Massarat <23412689+aryarm@users.noreply.github.com>
Date: Fri, 1 Nov 2024 22:32:59 -0700
Subject: [PATCH 4/4] update test case in docstring of ReverseComplement

---
 trtools/utils/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py
index 71ef86af..2ec249d4 100644
--- a/trtools/utils/utils.py
+++ b/trtools/utils/utils.py
@@ -445,7 +445,7 @@ def ReverseComplement(seq):
     >>> ReverseComplement("AGGCT")
     'AGCCT'
     >>> ReverseComplement("AGGCTRY")
-    'RAGCCT'
+    'RYAGCCT'
     """
     iupac_complement = {
         'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',