From c1f78eeb73393a9642398911b9c585496b173af4 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Sat, 2 Nov 2024 05:23:01 +0000 Subject: [PATCH] also handle iupac codes in ReverseComplement() --- trtools/utils/tests/test_utils.py | 12 ++++++++++++ trtools/utils/utils.py | 24 +++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/trtools/utils/tests/test_utils.py b/trtools/utils/tests/test_utils.py index ec0c4d37..9ae4f39a 100644 --- a/trtools/utils/tests/test_utils.py +++ b/trtools/utils/tests/test_utils.py @@ -128,6 +128,13 @@ def test_GetCanonicalOneStrand(): assert(utils.GetCanonicalOneStrand("TTGTT")=="GTTTT") assert(utils.GetCanonicalOneStrand("")=="") assert(utils.GetCanonicalOneStrand("at")=="AT") + # Additional tests with IUPAC codes + assert(utils.GetCanonicalOneStrand("RY")=="RY") + assert(utils.GetCanonicalOneStrand("YR")=="RY") + assert(utils.GetCanonicalOneStrand("SW")=="SW") + assert(utils.GetCanonicalOneStrand("WS")=="SW") + assert(utils.GetCanonicalOneStrand("KM")=="KM") + assert(utils.GetCanonicalOneStrand("MK")=="KM") # ReverseComplement def test_ReverseComplement(): @@ -135,6 +142,11 @@ def test_ReverseComplement(): assert(utils.ReverseComplement("")=="") assert(utils.ReverseComplement("CGNT")=="ANCG") assert(utils.ReverseComplement("ccga")=="TCGG") + # additional tests with IUPAC codes + assert(utils.ReverseComplement("RYASWKM")=="KMWSTRY") + # also test the characters that don't change + assert(utils.ReverseComplement("BDHV")=="BDHV") + assert(utils.ReverseComplement("N")=="N") # InferRepeatSequence def test_InferRepeatSequence(): diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py index 602845f3..71ef86af 100644 --- a/trtools/utils/utils.py +++ b/trtools/utils/utils.py @@ -428,7 +428,7 @@ def GetCanonicalOneStrand(repseq): def ReverseComplement(seq): r"""Get reverse complement of a sequence. - Converts everything to uppsercase. + Converts everything to uppercase and handles IUPAC codes. Parameters ---------- @@ -444,21 +444,19 @@ def ReverseComplement(seq): -------- >>> ReverseComplement("AGGCT") 'AGCCT' + >>> ReverseComplement("AGGCTRY") + 'RAGCCT' """ + iupac_complement = { + 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', + 'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W', + 'K': 'M', 'M': 'K', 'B': 'V', 'D': 'H', + 'H': 'D', 'V': 'B', 'N': 'N' + } seq = seq.upper() newseq = "" - size = len(seq) - for i in range(len(seq)): - char = seq[len(seq)-i-1] - if char == "A": - newseq += "T" - elif char == "G": - newseq += "C" - elif char == "C": - newseq += "G" - elif char == "T": - newseq += "A" - else: newseq += "N" + for char in reversed(seq): + newseq += iupac_complement.get(char, 'N') return newseq def InferRepeatSequence(seq, period):