From 2642c8b0d0e48b1f1522e241d0864f7d03f73913 Mon Sep 17 00:00:00 2001 From: Sebastian Krautwurst Date: Sat, 18 Dec 2021 19:03:33 +0100 Subject: [PATCH 1/2] netclade pos is now the nucleotide before the insertion --- bin/convert_insertions_nt2aa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/convert_insertions_nt2aa.py b/bin/convert_insertions_nt2aa.py index 85f6921..415076b 100755 --- a/bin/convert_insertions_nt2aa.py +++ b/bin/convert_insertions_nt2aa.py @@ -559,9 +559,9 @@ def insertions_nt_to_aa(nt_ins): aa_ins_list.append(ins) else: # notation includes the amino acid directly before, e.g. 'S:R214REPE' - nt_before = get_wuhan_seq_from_pos(pos-3, pos-1) + nt_before = get_wuhan_seq_from_pos(pos, pos+2) aa_before = nt2aa(nt_before) - aa_ins_list.append(gene + ':' + aa_before + str(codon-1) + aa_before + aminos) + aa_ins_list.append(gene + ':' + aa_before + str(codon) + aa_before + aminos) return ','.join(aa_ins_list) From 1a8a435170d9b99f611fe97b845d264acc42a0ca Mon Sep 17 00:00:00 2001 From: Sebastian Krautwurst Date: Sun, 19 Dec 2021 21:50:58 +0100 Subject: [PATCH 2/2] new nextclade info: seq from [pos-2:pos] --- bin/convert_insertions_nt2aa.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bin/convert_insertions_nt2aa.py b/bin/convert_insertions_nt2aa.py index 415076b..d05fbc7 100755 --- a/bin/convert_insertions_nt2aa.py +++ b/bin/convert_insertions_nt2aa.py @@ -543,7 +543,10 @@ def nt2aa(nucleotides, allow_uracil=True, allow_stop_codon=False): def insertions_nt_to_aa(nt_ins): '''convert Nextclade nucleotide insertion calls to gene/codon number aa insertion format - e.g. '22205:GAGCCAGAA' -> 'S:R214REPE' + e.g. '22204:GAGCCAGAA' -> 'S:R214REPE' + (2021-12-19 Nextclade changed the position, it is now the last nucleotide + of the preceding amino acid (R) instead of the first nucleotide of the first inserted aa (E), + so it was 22205 before, and is 22204 now) will skip conversion as a fallback if something goes wrong ''' nt_ins_list = nt_ins.split(',') @@ -559,8 +562,11 @@ def insertions_nt_to_aa(nt_ins): aa_ins_list.append(ins) else: # notation includes the amino acid directly before, e.g. 'S:R214REPE' - nt_before = get_wuhan_seq_from_pos(pos, pos+2) - aa_before = nt2aa(nt_before) + nt_before = get_wuhan_seq_from_pos(pos-2, pos) + if nt_before is None: + aa_before = '*' + else: + aa_before = nt2aa(nt_before) aa_ins_list.append(gene + ':' + aa_before + str(codon) + aa_before + aminos) return ','.join(aa_ins_list)