Skip to content

Commit

Permalink
0-based positions
Browse files Browse the repository at this point in the history
  • Loading branch information
nvta1209 committed Apr 26, 2024
1 parent c4ae399 commit 78bb6c1
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 26 deletions.
65 changes: 40 additions & 25 deletions src/uta/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import itertools
import logging
import time
from typing import Any
from typing import Any, Dict, List

from biocommons.seqrepo import SeqRepo
from bioutils.coordinates import strand_pm_to_int, MINUS_STRAND
Expand Down Expand Up @@ -685,32 +685,13 @@ def _fetch_origin_by_name(name):
if ti.transl_except:
# if transl_except exists, it looks like this:
# (pos:333..335,aa:Sec);(pos:1017,aa:TERM)
for te in ti.transl_except.split(';'):
# remove parens
te = te.replace('(','').replace(')','')

# extract positions
pos_str, aa_str = te.split(',')
pos_str = pos_str.removeprefix('pos:')
if '..' in pos_str:
start_position, _, end_position = pos_str.partition('..')
else:
start_position = end_position = pos_str

# extract amino acid
amino_acid = aa_str.removeprefix('aa:')

u_te = usam.TranslationException(
tx_ac=ti.ac,
start_position=int(start_position),
end_position=int(end_position),
amino_acid=amino_acid,
)
session.add(u_te)
transl_except_list = ti.transl_except.split(';')
te_list = _create_translation_exceptions(transcript=ti.ac, transl_except_list=transl_except_list)
for te in te_list:
session.add(usam.TranslationException(**te))

if u_tx.gene_id != ti.gene_id:
raise Exception("{ti.ac}: GeneID changed from {u_tx.gene_id} to {ti.gene_id}".format(
u_tx=u_tx, ti=ti))
raise Exception("{ti.ac}: GeneID changed from {u_tx.gene_id} to {ti.gene_id}".format(u_tx=u_tx, ti=ti))

# state: transcript now exists, either existing or freshly-created

Expand All @@ -736,6 +717,40 @@ def _fetch_origin_by_name(name):
p=(i_ti + 1) / n_rows * 100))


def _create_translation_exceptions(transcript: str, transl_except_list: List[str]) -> List[Dict]:
"""
Create TranslationException object data where start and end positions are 0-based, from transl_except data that is 1-based.
For example, [(pos:333..335,aa:Sec), (pos:1017,aa:TERM)] should result in start and end positions [(332, 335), (1016, 1017)]
"""
result = []

for te in transl_except_list:
# remove parens
te = te.replace('(','').replace(')','')

# extract positions
pos_str, aa_str = te.split(',')
pos_str = pos_str.removeprefix('pos:')
if '..' in pos_str:
start_position, _, end_position = pos_str.partition('..')
else:
start_position = end_position = pos_str

# extract amino acid
amino_acid = aa_str.removeprefix('aa:')

result.append(
{
'tx_ac': transcript,
'start_position': int(start_position) - 1,
'end_position': int(end_position),
'amino_acid': amino_acid,
}
)

return result


def refresh_matviews(session, opts, cf):
session.execute(text("set role {admin_role};".format(
admin_role=cf.get("uta", "admin_role"))))
Expand Down
22 changes: 21 additions & 1 deletion tests/test_uta_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,28 @@ def test_load_txinfo(self):
},
{
'tx_ac': 'NM_080430.4',
'start_position': 205,
'start_position': 204,
'end_position': 207,
'amino_acid': 'Sec',
},
)


class TestUtaLoadingFunctions(unittest.TestCase):
def test__create_translation_exceptions(self):
transl_except_list = ['(pos:333..335,aa:Sec)', '(pos:1017,aa:TERM)']
translation_exceptions = ul._create_translation_exceptions(transcript='dummy_tx', transl_except_list=transl_except_list)
self.assertEqual(translation_exceptions, [
{
'tx_ac': 'dummy_tx',
'start_position': 332,
'end_position': 335,
'amino_acid': 'Sec',
},
{
'tx_ac': 'dummy_tx',
'start_position': 1016,
'end_position': 1017,
'amino_acid': 'TERM',
},
])

0 comments on commit 78bb6c1

Please sign in to comment.