Skip to content

Commit

Permalink
Merge pull request #185 from replikation/fix_nextclade_insertion_offset
Browse files Browse the repository at this point in the history
nextclade pos is now the nucleotide before the insertion
  • Loading branch information
hoelzer authored Dec 19, 2021
2 parents c53af98 + 1a8a435 commit 0e9aa6f
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions bin/convert_insertions_nt2aa.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,10 @@ def nt2aa(nucleotides, allow_uracil=True, allow_stop_codon=False):

def insertions_nt_to_aa(nt_ins):
'''convert Nextclade nucleotide insertion calls to gene/codon number aa insertion format
e.g. '22205:GAGCCAGAA' -> 'S:R214REPE'
e.g. '22204:GAGCCAGAA' -> 'S:R214REPE'
(2021-12-19 Nextclade changed the position, it is now the last nucleotide
of the preceding amino acid (R) instead of the first nucleotide of the first inserted aa (E),
so it was 22205 before, and is 22204 now)
will skip conversion as a fallback if something goes wrong
'''
nt_ins_list = nt_ins.split(',')
Expand All @@ -559,9 +562,12 @@ def insertions_nt_to_aa(nt_ins):
aa_ins_list.append(ins)
else:
# notation includes the amino acid directly before, e.g. 'S:R214REPE'
nt_before = get_wuhan_seq_from_pos(pos-3, pos-1)
aa_before = nt2aa(nt_before)
aa_ins_list.append(gene + ':' + aa_before + str(codon-1) + aa_before + aminos)
nt_before = get_wuhan_seq_from_pos(pos-2, pos)
if nt_before is None:
aa_before = '*'
else:
aa_before = nt2aa(nt_before)
aa_ins_list.append(gene + ':' + aa_before + str(codon) + aa_before + aminos)

return ','.join(aa_ins_list)

Expand Down

0 comments on commit 0e9aa6f

Please sign in to comment.