From d0320ce160f82936b1a1604ddc1297ee901b1460 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 7 Dec 2021 10:23:53 +0000 Subject: [PATCH 1/5] add files for proposed sublineages - still testing --- constellations/definitions/cB.1.1.529.json | 52 ++++++++---------- .../definitions/cB.1.1.529_probable.json | 38 ++++++------- constellations/definitions/cBA.1.json | 47 ++++++++++++++++ constellations/definitions/cBA.2.json | 54 +++++++++++++++++++ 4 files changed, 139 insertions(+), 52 deletions(-) create mode 100644 constellations/definitions/cBA.1.json create mode 100644 constellations/definitions/cBA.2.json diff --git a/constellations/definitions/cB.1.1.529.json b/constellations/definitions/cB.1.1.529.json index f17b0b9..7fe3ae6 100644 --- a/constellations/definitions/cB.1.1.529.json +++ b/constellations/definitions/cB.1.1.529.json @@ -10,68 +10,60 @@ ], "WHO_label": "Omicron", "mrca_lineage": "B.1.1.529", - "representative_genome": "" + "representative_genome": "", + "lineage_name": "B.1.1.529", + "incompatible_lineage_calls": [ + "BA.1", + "BA.2" + ] }, "tags": [ "B.1.1.529" ], "sites": [ - "orf1a:K856R", - "nuc:C3037T", - "nuc:T5386G", - "orf1a:A2710T", - "orf1a:T3255I", - "orf1a:P3395H", - "orf1a:I3758V", + "nuc:C3037T # B.1 defining", + "orf1ab:T3255I", + "orf1ab:P3395H", + "del:11288:9 # orf1a:SGF3675-", "nuc:T13195C", - "orf1b:P314L", - "nuc:C15240T", - "orf1b:I1566V", - "spike:A67V", - "spike:T95I", + "orf1ab:P4715L # B.1 defining", + "orf1ab:I5967V", + "spike:G142D", "spike:G339D", - "spike:S371L", "spike:S373P", + "spike:S375F", "spike:K417N", "spike:N440K", - "spike:G446S", "spike:S477N", "spike:T478K", "spike:E484A", "spike:Q493R", - "spike:G496S", "spike:Q498R", "spike:N501Y", "spike:Y505H", - "spike:T547K", - "spike:D614G", + "spike:D614G # B.1 defining", "spike:H655Y", "spike:N679K", "spike:P681H", "spike:N764K", "spike:D796Y", - "spike:N856K", "spike:Q954H", "spike:N969K", "nuc:C25000T", + "nuc:C25584T", "e:T9I", - "m:D3G", "m:Q19E", "m:A63T", "nuc:A27259C", "nuc:C27807T", - "n:RG203KR" + "nuc:A28271T", + "n:P13L", + "del:28362:9 # n:ERS31-", + "n:RG203KR # B.1.1 defining" ], - "note": "We have included spike mutations S477N T478K E484A Q493R G496S Q498R N501Y, but these drop out in some early sequences. Additional rules have been added to compensate.", + "note": "Common mutations to the sublineages found", "rules": { - "min_alt": 30, + "min_alt": 31, "max_ref": 5, - "spike:S477N": "not ref", - "spike:T478K": "not ref", - "spike:E484A": "not ref", - "spike:Q493R": "not ref", - "spike:G496S": "not ref", - "spike:Q498R": "not ref", - "spike:N501Y": "not ref" } } diff --git a/constellations/definitions/cB.1.1.529_probable.json b/constellations/definitions/cB.1.1.529_probable.json index bf31bd6..2596e41 100644 --- a/constellations/definitions/cB.1.1.529_probable.json +++ b/constellations/definitions/cB.1.1.529_probable.json @@ -10,61 +10,55 @@ ], "WHO_label": "Omicron", "mrca_lineage": "B.1.1.529", - "representative_genome": "" + "representative_genome": "", }, "tags": [ "B.1.1.529" ], "sites": [ - "orf1a:K856R", - "nuc:C3037T", - "nuc:T5386G", - "orf1a:A2710T", - "orf1a:T3255I", - "orf1a:P3395H", - "orf1a:I3758V", + "nuc:C3037T # B.1 defining", + "orf1ab:T3255I", + "orf1ab:P3395H", + "del:11288:9 # orf1a:SGF3675-", "nuc:T13195C", - "orf1b:P314L", - "nuc:C15240T", - "orf1b:I1566V", - "spike:A67V", - "spike:T95I", + "orf1ab:P4715L # B.1 defining", + "orf1ab:I5967V", + "spike:G142D", "spike:G339D", - "spike:S371L", "spike:S373P", + "spike:S375F", "spike:K417N", "spike:N440K", - "spike:G446S", "spike:S477N", "spike:T478K", "spike:E484A", "spike:Q493R", - "spike:G496S", "spike:Q498R", "spike:N501Y", "spike:Y505H", - "spike:T547K", - "spike:D614G", + "spike:D614G # B.1 defining", "spike:H655Y", "spike:N679K", "spike:P681H", "spike:N764K", "spike:D796Y", - "spike:N856K", "spike:Q954H", "spike:N969K", "nuc:C25000T", + "nuc:C25584T", "e:T9I", - "m:D3G", "m:Q19E", "m:A63T", "nuc:A27259C", "nuc:C27807T", - "n:RG203KR" + "nuc:A28271T", + "n:P13L", + "del:28362:9 # n:ERS31-", + "n:RG203KR # B.1.1 defining" ], "note": "This file has been added to allow probable calls to be made, even when the incoming consensus pipeline calls for ref in lower quality regions", "rules": { "min_alt": 20, - "max_ref": 15 + "max_ref": 15, } } diff --git a/constellations/definitions/cBA.1.json b/constellations/definitions/cBA.1.json new file mode 100644 index 0000000..fb298ee --- /dev/null +++ b/constellations/definitions/cBA.1.json @@ -0,0 +1,47 @@ +{ + "label": "Omicron (BA.1-like)", + "description": "BA.1 lineage defining mutations", + "sources": [ + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "BA.1" + ], + "WHO_label": "Omicron", + "mrca_lineage": "BA.1", + "lineage_name": "BA.1", + "parent_lineage": "B.1.1.529", + "representative_genome": "" + }, + "tags": [ + "BA.1" + ], + "sites": [ + "orf1ab:K856R", + "del:6513:3 # orf1a:SL2083I ", + "nuc:T5386G", + "orf1ab:A2710T", + "orf1b:L3674F # aligners often place the parent del at 11283 rather than 11288", + "orf1ab:I3758V", + "nuc:C15240T", + "spike:A67V", + "del:21765:6 # spike:HV69-", + "spike:T95I", + "del:21987:9 # spike:VYY143-", + "del:22194:3 # spike:NL211I", + "ins:22205+GAGCCAGAA # spike:-215EPE", + "spike:S371L", + "spike:G446S", + "spike:G496S", + "spike:T547K", + "spike:N856K", + "spike:L981F", + "m:D3G" + ], + "note": "Unique mutations for sublineage", + "rules": { + "min_alt": 11, + "max_ref": 3 + } +} diff --git a/constellations/definitions/cBA.2.json b/constellations/definitions/cBA.2.json new file mode 100644 index 0000000..6eb6397 --- /dev/null +++ b/constellations/definitions/cBA.2.json @@ -0,0 +1,54 @@ +{ + "label": "Omicron (BA.2-like)", + "description": "BA.2 lineage defining mutations", + "sources": [ + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "BA.2" + ], + "WHO_label": "Omicron", + "mrca_lineage": "BA.2", + "lineage_name": "BA.2", + "parent_lineage": "B.1.1.529", + "representative_genome": "" + }, + "tags": [ + "BA.2" + ], + "sites": [ + "orf1ab:S135R", + "orf1ab:T842I ", + "orf1ab:G1307S", + "nuc:C4321T", + "orf1ab:L3027F", + "nuc:A9424G", + "orf1ab:T3090I", + "orf1ab:L3201F", + "nuc:C10198T", + "nuc:G10447A", + "nuc:C15714T", + "nuc:C12880T", + "nuc:C15714T", + "orf1ab:R5716C", + "orf1ab:T6564I", + "nuc:A20055G", + "spike:T19I", + "del:21633:9 # spike:LPPA24S", + "spike:V231G", + "spike:S371F", + "spike:T376A", + "spike:D405N", + "spike:R408S", + "orf3a:T233I", + "nuc:C26858T", + "orf6:D61L # GAT->CTC", + "n:S413R" + ], + "note": "Unique mutations for sublineage", + "rules": { + "min_alt": 19, + "max_ref": 3 + } +} From cbf707a2e6aba04438d7faab9d1a83157d6ee6c8 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 7 Dec 2021 12:20:53 +0000 Subject: [PATCH 2/5] updates - extend probably sites so real calls not assigned probable and fix typos and deletion issue --- constellations/definitions/cB.1.1.529.json | 4 +- .../definitions/cB.1.1.529_probable.json | 54 +++++++++++++++++-- constellations/definitions/cBA.1.json | 6 +-- constellations/definitions/cBA.2.json | 2 +- 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/constellations/definitions/cB.1.1.529.json b/constellations/definitions/cB.1.1.529.json index 7fe3ae6..9e0af83 100644 --- a/constellations/definitions/cB.1.1.529.json +++ b/constellations/definitions/cB.1.1.529.json @@ -24,7 +24,7 @@ "nuc:C3037T # B.1 defining", "orf1ab:T3255I", "orf1ab:P3395H", - "del:11288:9 # orf1a:SGF3675-", + "orf1ab:SGF3675- # del:11288:9", "nuc:T13195C", "orf1ab:P4715L # B.1 defining", "orf1ab:I5967V", @@ -64,6 +64,6 @@ "note": "Common mutations to the sublineages found", "rules": { "min_alt": 31, - "max_ref": 5, + "max_ref": 5 } } diff --git a/constellations/definitions/cB.1.1.529_probable.json b/constellations/definitions/cB.1.1.529_probable.json index 2596e41..3aca44f 100644 --- a/constellations/definitions/cB.1.1.529_probable.json +++ b/constellations/definitions/cB.1.1.529_probable.json @@ -10,7 +10,7 @@ ], "WHO_label": "Omicron", "mrca_lineage": "B.1.1.529", - "representative_genome": "", + "representative_genome": "" }, "tags": [ "B.1.1.529" @@ -19,7 +19,7 @@ "nuc:C3037T # B.1 defining", "orf1ab:T3255I", "orf1ab:P3395H", - "del:11288:9 # orf1a:SGF3675-", + "orf1ab:SGF3675- # del:11288:9", "nuc:T13195C", "orf1ab:P4715L # B.1 defining", "orf1ab:I5967V", @@ -54,11 +54,57 @@ "nuc:A28271T", "n:P13L", "del:28362:9 # n:ERS31-", - "n:RG203KR # B.1.1 defining" + "n:RG203KR # B.1.1 defining", + "orf1ab:K856R # BA.1 defining", + "del:6513:3 # orf1a:SL2083I # BA.1 defining", + "nuc:T5386G # BA.1 defining", + "orf1ab:A2710T # BA.1 defining", + "orf1ab:I3758V # BA.1 defining", + "nuc:C15240T # BA.1 defining", + "spike:A67V # BA.1 defining", + "del:21765:6 # spike:HV69- # BA.1 defining", + "spike:T95I # BA.1 defining", + "del:21987:9 # spike:VYY143- # BA.1 defining", + "del:22194:3 # spike:NL211I # BA.1 defining", + "nuc:22205+GAGCCAGAA # spike:-215EPE # BA.1 defining", + "spike:S371L # BA.1 defining", + "spike:G446S # BA.1 defining", + "spike:G496S # BA.1 defining", + "spike:T547K # BA.1 defining", + "spike:N856K # BA.1 defining", + "spike:L981F # BA.1 defining", + "m:D3G # BA.1 defining", + "orf1ab:S135R # BA.2 defining", + "orf1ab:T842I # BA.2 defining", + "orf1ab:G1307S # BA.2 defining", + "nuc:C4321T # BA.2 defining", + "orf1ab:L3027F # BA.2 defining", + "nuc:A9424G # BA.2 defining", + "orf1ab:T3090I # BA.2 defining", + "orf1ab:L3201F # BA.2 defining", + "nuc:C10198T # BA.2 defining", + "nuc:G10447A # BA.2 defining", + "nuc:C15714T # BA.2 defining", + "nuc:C12880T # BA.2 defining", + "nuc:C15714T # BA.2 defining", + "orf1ab:R5716C # BA.2 defining", + "orf1ab:T6564I # BA.2 defining", + "nuc:A20055G # BA.2 defining", + "spike:T19I # BA.2 defining", + "del:21633:9 # spike:LPPA24S # BA.2 defining", + "spike:V231G # BA.2 defining", + "spike:S371F # BA.2 defining", + "spike:T376A # BA.2 defining", + "spike:D405N # BA.2 defining", + "spike:R408S # BA.2 defining", + "orf3a:T233I # BA.2 defining", + "nuc:C26858T # BA.2 defining", + "orf6:D61L # GAT->CTC # BA.2 defining", + "n:S413R # BA.2 defining" ], "note": "This file has been added to allow probable calls to be made, even when the incoming consensus pipeline calls for ref in lower quality regions", "rules": { "min_alt": 20, - "max_ref": 15, + "max_ref": 15 } } diff --git a/constellations/definitions/cBA.1.json b/constellations/definitions/cBA.1.json index fb298ee..7a7fabf 100644 --- a/constellations/definitions/cBA.1.json +++ b/constellations/definitions/cBA.1.json @@ -22,7 +22,7 @@ "del:6513:3 # orf1a:SL2083I ", "nuc:T5386G", "orf1ab:A2710T", - "orf1b:L3674F # aligners often place the parent del at 11283 rather than 11288", + "# nuc:G11287T # orf1ab:L3674F # aligners often place the parent del at 11283 rather than 11288", "orf1ab:I3758V", "nuc:C15240T", "spike:A67V", @@ -30,7 +30,7 @@ "spike:T95I", "del:21987:9 # spike:VYY143-", "del:22194:3 # spike:NL211I", - "ins:22205+GAGCCAGAA # spike:-215EPE", + "nuc:22205+GAGCCAGAA # spike:-215EPE", "spike:S371L", "spike:G446S", "spike:G496S", @@ -41,7 +41,7 @@ ], "note": "Unique mutations for sublineage", "rules": { - "min_alt": 11, + "min_alt": 10, "max_ref": 3 } } diff --git a/constellations/definitions/cBA.2.json b/constellations/definitions/cBA.2.json index 6eb6397..17b5a4a 100644 --- a/constellations/definitions/cBA.2.json +++ b/constellations/definitions/cBA.2.json @@ -19,7 +19,7 @@ ], "sites": [ "orf1ab:S135R", - "orf1ab:T842I ", + "orf1ab:T842I", "orf1ab:G1307S", "nuc:C4321T", "orf1ab:L3027F", From 8ec490b4b3030740aad0fd4f88110fdf309735e8 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 7 Dec 2021 12:39:28 +0000 Subject: [PATCH 3/5] update probable definition to capture existing probables --- constellations/__init__.py | 2 +- constellations/definitions/cB.1.1.529_probable.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/constellations/__init__.py b/constellations/__init__.py index d6ca430..df6df80 100644 --- a/constellations/__init__.py +++ b/constellations/__init__.py @@ -1,3 +1,3 @@ _program = "constellations" -__version__ = "v0.0.27" +__version__ = "v0.0.28" diff --git a/constellations/definitions/cB.1.1.529_probable.json b/constellations/definitions/cB.1.1.529_probable.json index 3aca44f..23e64c8 100644 --- a/constellations/definitions/cB.1.1.529_probable.json +++ b/constellations/definitions/cB.1.1.529_probable.json @@ -104,7 +104,7 @@ ], "note": "This file has been added to allow probable calls to be made, even when the incoming consensus pipeline calls for ref in lower quality regions", "rules": { - "min_alt": 20, - "max_ref": 15 + "min_alt": 25, + "max_ref": 45 } } From fe8bb856ad4ea02162ea12e9a11926cb8b398f2c Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 7 Dec 2021 20:29:50 +0000 Subject: [PATCH 4/5] update so BA.1==omicron --- constellations/definitions/cB.1.1.529.json | 3 +- constellations/definitions/cBA.1.json | 4 +- ....529_probable.json => cBA.1_probable.json} | 41 ++++--------------- constellations/definitions/cBA.2.json | 7 ++-- 4 files changed, 14 insertions(+), 41 deletions(-) rename constellations/definitions/{cB.1.1.529_probable.json => cBA.1_probable.json} (61%) diff --git a/constellations/definitions/cB.1.1.529.json b/constellations/definitions/cB.1.1.529.json index 9e0af83..baaf6d6 100644 --- a/constellations/definitions/cB.1.1.529.json +++ b/constellations/definitions/cB.1.1.529.json @@ -1,5 +1,5 @@ { - "label": "Omicron (B.1.1.529-like)", + "label": "B.1.1.529-like", "description": "B.1.1.529 lineage defining mutations", "sources": [ ], @@ -8,7 +8,6 @@ "Pango_lineages": [ "B.1.1.529" ], - "WHO_label": "Omicron", "mrca_lineage": "B.1.1.529", "representative_genome": "", "lineage_name": "B.1.1.529", diff --git a/constellations/definitions/cBA.1.json b/constellations/definitions/cBA.1.json index 7a7fabf..303ecc9 100644 --- a/constellations/definitions/cBA.1.json +++ b/constellations/definitions/cBA.1.json @@ -40,8 +40,10 @@ "m:D3G" ], "note": "Unique mutations for sublineage", + "note2": "Requires ancestral mutation so scorpio tie breaks correctly", "rules": { "min_alt": 10, - "max_ref": 3 + "max_ref": 3, + "spike:D614G": "not ref" } } diff --git a/constellations/definitions/cB.1.1.529_probable.json b/constellations/definitions/cBA.1_probable.json similarity index 61% rename from constellations/definitions/cB.1.1.529_probable.json rename to constellations/definitions/cBA.1_probable.json index 23e64c8..60c1aa8 100644 --- a/constellations/definitions/cB.1.1.529_probable.json +++ b/constellations/definitions/cBA.1_probable.json @@ -1,19 +1,19 @@ { - "label": "Probable Omicron (B.1.1.529-like)", - "description": "B.1.1.529 lineage defining mutations with lower thresholds to handle lower quality consensus sequences", + "label": "Probable Omicron (BA.1-like)", + "description": "BA.1 lineage defining mutations with lower thresholds to handle lower quality consensus sequences", "sources": [ ], "type": "variant", "variant": { "Pango_lineages": [ - "B.1.1.529" + "BA.1" ], "WHO_label": "Omicron", - "mrca_lineage": "B.1.1.529", + "mrca_lineage": "BA.1", "representative_genome": "" }, "tags": [ - "B.1.1.529" + "BA.1" ], "sites": [ "nuc:C3037T # B.1 defining", @@ -73,38 +73,11 @@ "spike:T547K # BA.1 defining", "spike:N856K # BA.1 defining", "spike:L981F # BA.1 defining", - "m:D3G # BA.1 defining", - "orf1ab:S135R # BA.2 defining", - "orf1ab:T842I # BA.2 defining", - "orf1ab:G1307S # BA.2 defining", - "nuc:C4321T # BA.2 defining", - "orf1ab:L3027F # BA.2 defining", - "nuc:A9424G # BA.2 defining", - "orf1ab:T3090I # BA.2 defining", - "orf1ab:L3201F # BA.2 defining", - "nuc:C10198T # BA.2 defining", - "nuc:G10447A # BA.2 defining", - "nuc:C15714T # BA.2 defining", - "nuc:C12880T # BA.2 defining", - "nuc:C15714T # BA.2 defining", - "orf1ab:R5716C # BA.2 defining", - "orf1ab:T6564I # BA.2 defining", - "nuc:A20055G # BA.2 defining", - "spike:T19I # BA.2 defining", - "del:21633:9 # spike:LPPA24S # BA.2 defining", - "spike:V231G # BA.2 defining", - "spike:S371F # BA.2 defining", - "spike:T376A # BA.2 defining", - "spike:D405N # BA.2 defining", - "spike:R408S # BA.2 defining", - "orf3a:T233I # BA.2 defining", - "nuc:C26858T # BA.2 defining", - "orf6:D61L # GAT->CTC # BA.2 defining", - "n:S413R # BA.2 defining" + "m:D3G # BA.1 defining" ], "note": "This file has been added to allow probable calls to be made, even when the incoming consensus pipeline calls for ref in lower quality regions", "rules": { "min_alt": 25, - "max_ref": 45 + "max_ref": 18 } } diff --git a/constellations/definitions/cBA.2.json b/constellations/definitions/cBA.2.json index 17b5a4a..25c908d 100644 --- a/constellations/definitions/cBA.2.json +++ b/constellations/definitions/cBA.2.json @@ -1,5 +1,5 @@ { - "label": "Omicron (BA.2-like)", + "label": "BA.2-like", "description": "BA.2 lineage defining mutations", "sources": [ ], @@ -8,7 +8,6 @@ "Pango_lineages": [ "BA.2" ], - "WHO_label": "Omicron", "mrca_lineage": "BA.2", "lineage_name": "BA.2", "parent_lineage": "B.1.1.529", @@ -36,12 +35,12 @@ "nuc:A20055G", "spike:T19I", "del:21633:9 # spike:LPPA24S", - "spike:V231G", + "nuc:T22200G # spike:V231G # but this seems to be wrong aa ref allele", "spike:S371F", "spike:T376A", "spike:D405N", "spike:R408S", - "orf3a:T233I", + "nuc:C26060T # orf3a:T233I # but this seems to be wrong aa ref allele", "nuc:C26858T", "orf6:D61L # GAT->CTC", "n:S413R" From 65ed35007cc022153567a62fc22ea9e37e43d4b7 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Wed, 8 Dec 2021 11:52:09 +0000 Subject: [PATCH 5/5] make parent possible omicron to handle small number of ref-called pipelines --- constellations/definitions/cB.1.1.529.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constellations/definitions/cB.1.1.529.json b/constellations/definitions/cB.1.1.529.json index baaf6d6..ee7d331 100644 --- a/constellations/definitions/cB.1.1.529.json +++ b/constellations/definitions/cB.1.1.529.json @@ -1,5 +1,5 @@ { - "label": "B.1.1.529-like", + "label": "Possible Omicron (B.1.1.529-like)", "description": "B.1.1.529 lineage defining mutations", "sources": [ ],