From 83ffa8e32b3b2d76cdb915c33d990d8370d5e7fb Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 3 Jan 2024 12:38:27 +0100 Subject: [PATCH] feat: integrate PanelApp for gene-phenotype links (#225) (#377) --- protos/annonars/genes/base.proto | 116 +++++++ src/genes/cli/data.rs | 310 ++++++++++++++++++ src/genes/cli/import.rs | 58 +++- ...a__tests__deserialize_panelapp_record.snap | 184 +++++++++++ tests/genes/panelapp/panelapp.jsonl | 5 + 5 files changed, 671 insertions(+), 2 deletions(-) create mode 100644 src/genes/cli/snapshots/annonars__genes__cli__data__tests__deserialize_panelapp_record.snap create mode 100644 tests/genes/panelapp/panelapp.jsonl diff --git a/protos/annonars/genes/base.proto b/protos/annonars/genes/base.proto index 373f5bdd..7dc30c38 100644 --- a/protos/annonars/genes/base.proto +++ b/protos/annonars/genes/base.proto @@ -814,6 +814,120 @@ message GtexRecord { repeated GtexTissueRecord records = 4; } +// Entry in PanelApp. +message PanelAppRecord { + /// Gene identity information. + message GeneData { + // HGNC ID. + optional string hgnc_id = 1; + // HGNC gene symbol. + optional string hgnc_symbol = 2; + // Gene symbol. + optional string gene_symbol = 3; + } + + // Enumeration for entity types. + enum EntityType { + // Unknown + ENTITY_TYPE_UNKNOWN = 0; + // Gene + ENTITY_TYPE_GENE = 1; + // Short Tandem Repeat + ENTITY_TYPE_STR = 2; + // Region + ENTITY_TYPE_REGION = 3; + } + + // Enumeration for confidence levels. + enum ConfidenceLevel { + // Unknown + CONFIDENCE_LEVEL_UNKNOWN = 0; + // None + CONFIDENCE_LEVEL_NONE = 1; + // Red + CONFIDENCE_LEVEL_RED = 2; + // Amber + CONFIDENCE_LEVEL_AMBER = 3; + // Green + CONFIDENCE_LEVEL_GREEN = 4; + } + + // Enumeration for penetrance. + enum Penetrance { + // Unknown + PENETRANCE_UNKNOWN = 0; + // Complete + PENETRANCE_COMPLETE = 1; + // Incomplete + PENETRANCE_INCOMPLETE = 2; + } + + // Message for panel statistics. + message PanelStats { + // Number of genes. + uint32 number_of_genes = 1; + // Number of STRs. + uint32 number_of_strs = 2; + // Number of regions. + uint32 number_of_regions = 3; + } + + // Message for panel types. + message PanelType { + // Type name. + string name = 1; + // Slug. + string slug = 2; + // Description. + string description = 3; + } + + // Message for panel information. + message Panel { + // Panel ID. + uint32 id = 1; + // Panel hash ID. + optional string hash_id = 2; + // Panel name. + string name = 3; + // Disease group. + string disease_group = 4; + // Disease subgroup. + string disease_sub_group = 5; + // Version + string version = 6; + // Creation date of version. + string version_created = 7; + // Relevant disorders. + repeated string relevant_disorders = 8; + // Stats. + PanelStats stats = 9; + // Panel types. + repeated PanelType types = 10; + } + + // Gene identity information. + GeneData gene_data = 1; + // Entity type. + EntityType entity_type = 2; + // Entity name. + string entity_name = 3; + // Confidence level. + ConfidenceLevel confidence_level = 4; + // Penetrance. + Penetrance penetrance = 5; + // Publications. + repeated string publications = 6; + // Evidence. + repeated string evidence = 7; + // Phenotypes. + repeated string phenotypes = 8; + // Mode of inheritance. + string mode_of_inheritance = 9; + // Panel. + Panel panel = 10; +} + // Entry in the genes RocksDB database. message Record { // Information from the ACMG secondary finding list. @@ -842,4 +956,6 @@ message Record { DominoRecord domino = 12; // DECIPHER HI score. DecipherHiRecord decipher_hi = 13; + // GenomicsEngland PanelApp gene information. + repeated PanelAppRecord panelapp = 14; } diff --git a/src/genes/cli/data.rs b/src/genes/cli/data.rs index ae2817c3..c8ec3994 100644 --- a/src/genes/cli/data.rs +++ b/src/genes/cli/data.rs @@ -26,6 +26,8 @@ pub struct Record { pub omim: Option, /// Information about ORPHA diseases for a gene. pub orpha: Option, + /// Information about PanelApp entries for a gene. + pub panelapp: Vec, /// Information from rCNV (Collins et al., 2022). pub rcnv: Option, /// Information from sHet (Weghorn et al., 2019). @@ -1579,6 +1581,293 @@ pub mod omim { } } +/// Code for reading relevant parts of the PanelApp gene data. +pub mod panelapp { + use serde::{Deserialize, Serialize}; + + use crate::pbs::genes::base::panel_app_record; + + /// Gene identity information. + /// + /// We only keep the minimal information as we already have everything in HGNC. + /// + /// Note that the HGNC ID/symbol can be empty for genes but then gene_symbol is set. + /// + /// For regions, all can be null. + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct GeneData { + /// HGNC gene ID. + pub hgnc_id: Option, + /// HGNC gene symbol. + pub hgnc_symbol: Option, + /// Gene symbol. + pub gene_symbol: Option, + } + + impl From for panel_app_record::GeneData { + fn from(val: GeneData) -> Self { + let GeneData { + hgnc_id, + hgnc_symbol, + gene_symbol, + } = val; + panel_app_record::GeneData { + hgnc_id, + hgnc_symbol, + gene_symbol, + } + } + } + + /// Enumeration for entity types. + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] + pub enum EntityType { + /// Gene + #[serde(rename = "gene")] + Gene, + /// Short Tandem Repeat + #[serde(rename = "str")] + Str, + /// Region + #[serde(rename = "region")] + Region, + } + + impl From for panel_app_record::EntityType { + fn from(val: EntityType) -> Self { + match val { + EntityType::Gene => panel_app_record::EntityType::Gene, + EntityType::Str => panel_app_record::EntityType::Str, + EntityType::Region => panel_app_record::EntityType::Region, + } + } + } + + /// Enumeration for confidence levels. + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] + pub enum ConfidenceLevel { + /// 0 - lowest level, when expert review was removed + #[serde(rename = "0")] + None, + /// 1 - red, low evidence + #[serde(rename = "1")] + Red, + /// 2 - amber, moderate evidence + #[serde(rename = "2")] + Amber, + /// 3 - green, high evidence + #[serde(rename = "3")] + Green, + } + + impl From for panel_app_record::ConfidenceLevel { + fn from(val: ConfidenceLevel) -> Self { + match val { + ConfidenceLevel::None => panel_app_record::ConfidenceLevel::None, + ConfidenceLevel::Red => panel_app_record::ConfidenceLevel::Red, + ConfidenceLevel::Amber => panel_app_record::ConfidenceLevel::Amber, + ConfidenceLevel::Green => panel_app_record::ConfidenceLevel::Green, + } + } + } + + /// Enumeration for penetrance. + #[derive( + Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, + )] + pub enum Penetrance { + /// Penetrance is unknown. + #[default] + #[serde(rename = "unknown")] + Unknown, + /// Complete penetrance. + #[serde(rename = "Complete")] + Complete, + /// Incomplete penetrance. + #[serde(rename = "Incomplete")] + Incomplete, + } + + impl From for panel_app_record::Penetrance { + fn from(val: Penetrance) -> Self { + match val { + Penetrance::Unknown => panel_app_record::Penetrance::Unknown, + Penetrance::Complete => panel_app_record::Penetrance::Complete, + Penetrance::Incomplete => panel_app_record::Penetrance::Incomplete, + } + } + } + + /// Panel statistics. + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct PanelStats { + /// Number of genes. + pub number_of_genes: u32, + /// Number of STRs. + pub number_of_strs: u32, + /// Number of regions. + pub number_of_regions: u32, + } + + impl From for panel_app_record::PanelStats { + fn from(val: PanelStats) -> Self { + panel_app_record::PanelStats { + number_of_genes: val.number_of_genes, + number_of_strs: val.number_of_strs, + number_of_regions: val.number_of_regions, + } + } + } + + /// Panel type. + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct PanelType { + /// Panel type ID. + pub name: String, + /// Panel type slug. + pub slug: String, + /// Panel type description. + pub description: String, + } + + impl From for panel_app_record::PanelType { + fn from(val: PanelType) -> Self { + let PanelType { + name, + slug, + description, + } = val; + panel_app_record::PanelType { + name, + slug, + description, + } + } + } + + /// Representation of a panel. + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct Panel { + /// Panel ID. + pub id: u32, + /// Panel hash ID. + pub hash_id: Option, + /// The panel name. + pub name: String, + /// The disease group. + pub disease_group: String, + /// The disease sub group. + pub disease_sub_group: String, + /// The panel version. + pub version: String, + /// The panel version created. + pub version_created: String, + /// The panel relevant disorders. + pub relevant_disorders: Vec, + /// The panel stats. + pub stats: PanelStats, + /// The panel types. + pub types: Vec, + } + + impl From for panel_app_record::Panel { + fn from(val: Panel) -> Self { + let Panel { + id, + hash_id, + name, + disease_group, + disease_sub_group, + version, + version_created, + relevant_disorders, + stats, + types, + } = val; + panel_app_record::Panel { + id, + hash_id, + name, + disease_group, + disease_sub_group, + version, + version_created, + relevant_disorders, + stats: Some(Into::::into(stats)), + types: types + .into_iter() + .map(Into::::into) + .collect(), + } + } + } + + /// Representation of one gene record. + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct Record { + /// Gene identity information. + pub gene_data: Option, + /// Entity type. + pub entity_type: EntityType, + /// Entity name. + pub entity_name: String, + /// Confidence level. + pub confidence_level: ConfidenceLevel, + /// Penetrance. + #[serde(deserialize_with = "deserialize_null_default")] + pub penetrance: Penetrance, + /// Publications. + pub publications: Vec, + /// Evidence. + pub evidence: Vec, + /// Phenotypes. + pub phenotypes: Vec, + /// Mode of inheritance. + pub mode_of_inheritance: String, + /// Information about the panel of this assessment. + pub panel: Panel, + } + + impl From for crate::pbs::genes::base::PanelAppRecord { + fn from(val: Record) -> Self { + let Record { + gene_data, + entity_type, + entity_name, + confidence_level, + penetrance, + publications, + evidence, + phenotypes, + mode_of_inheritance, + panel, + } = val; + crate::pbs::genes::base::PanelAppRecord { + gene_data: gene_data.map(Into::::into), + entity_type: Into::::into(entity_type) as i32, + entity_name, + confidence_level: Into::::into(confidence_level) + as i32, + penetrance: Into::::into(penetrance) as i32, + publications, + evidence, + phenotypes, + mode_of_inheritance, + panel: Some(Into::::into(panel)), + } + } + } + + fn deserialize_null_default<'de, D, T>(deserializer: D) -> Result + where + T: Default + Deserialize<'de>, + D: serde::Deserializer<'de>, + { + let opt = Option::deserialize(deserializer)?; + Ok(opt.unwrap_or_default()) + } +} + /// Code for reading gene to ORPHA disease associations. pub mod orpha { use serde::{Deserialize, Serialize}; @@ -2200,6 +2489,27 @@ mod tests { Ok(()) } + #[test] + fn deserialize_panelapp_record() -> Result<(), anyhow::Error> { + let path_jsonl = "tests/genes/panelapp/panelapp.jsonl"; + let str_jsonl = std::fs::read_to_string(path_jsonl)?; + let records = str_jsonl + .lines() + .map(|s| { + serde_json::from_str::(s) + .map_err(|e| { + println!("{}", &s); + e + }) + .unwrap() + }) + .collect::>(); + + insta::assert_yaml_snapshot!(records); + + Ok(()) + } + #[test] fn deserialize_ncbi_record() -> Result<(), anyhow::Error> { let path_tsv = "tests/genes/acmg/acmg.tsv"; diff --git a/src/genes/cli/import.rs b/src/genes/cli/import.rs index fb94c2c0..bbc76375 100644 --- a/src/genes/cli/import.rs +++ b/src/genes/cli/import.rs @@ -13,12 +13,12 @@ use tracing::info; use crate::{ common::{self, version}, - pbs, + pbs::{self, genes::base::PanelAppRecord}, }; use super::data::{ self, acmg_sf, clingen_gene, dbnsfp_gene, decipher_hi, domino, gnomad_constraints, gtex, hgnc, - ncbi, omim, orpha, rcnv, shet, + ncbi, omim, orpha, panelapp, rcnv, shet, }; /// Command line arguments for `genes import` sub command. @@ -52,6 +52,9 @@ pub struct Args { /// Path to the TSV file with ORPHA disease information. #[arg(long, required = true)] pub path_in_orpha: String, + /// Path to the JSONL file with PanelApp disease information. + #[arg(long, required = true)] + pub path_in_panelapp: String, /// Path to the TSV file with rCNV information. #[arg(long, required = true)] pub path_in_rcnv: String, @@ -322,6 +325,42 @@ fn load_orpha(path: &str) -> Result, anyhow::Erro Ok(result) } +/// Load PanelApp gene mapping. +/// +/// # Result +/// +/// A map from HGNC ID to PanelApp gene record. +fn load_panelapp( + path: &str, + hgnc: &HashMap, +) -> Result>, anyhow::Error> { + // Build map from HGNC gene symbol to HGNC id. + let hgnc_symbol_to_id = hgnc + .iter() + .map(|(hgnc_id, record)| (record.symbol.clone(), hgnc_id)) + .collect::>(); + + info!(" loading PanelApp information from {}", path); + let mut result: HashMap> = HashMap::new(); + + let reader = std::fs::File::open(path).map(std::io::BufReader::new)?; + for line in reader.lines() { + let line = line?; + let record = serde_json::from_str::(&line)?; + if let Some(gene_data) = record.gene_data.as_ref() { + if let Some(hgnc_id) = gene_data.hgnc_id.as_ref() { + result.entry(hgnc_id.clone()).or_default().push(record); + } else if let Some(gene_symbol) = gene_data.gene_symbol.as_ref() { + if let Some(hgnc_id) = hgnc_symbol_to_id.get(gene_symbol) { + result.entry((*hgnc_id).clone()).or_default().push(record); + } + } + } + } + + Ok(result) +} + /// Load rCNV (Collins et al., 2022) information. /// /// # Result @@ -419,6 +458,7 @@ fn convert_record(record: data::Record) -> pbs::genes::base::Record { ncbi, omim, orpha, + panelapp, rcnv, shet, gtex, @@ -936,6 +976,11 @@ fn convert_record(record: data::Record) -> pbs::genes::base::Record { } }); + let panelapp = panelapp + .into_iter() + .map(Into::::into) + .collect::>(); + let rcnv = rcnv.map(|rcnv| { let rcnv::Record { hgnc_id, @@ -1004,6 +1049,7 @@ fn convert_record(record: data::Record) -> pbs::genes::base::Record { shet, gtex, domino, + panelapp, decipher_hi, } } @@ -1020,6 +1066,7 @@ fn write_rocksdb( ncbi_by_ncbi_id: HashMap, omim_by_hgnc_id: HashMap, orpha_by_hgnc_id: HashMap, + panelapp_by_hgnc_id: HashMap>, rcnv_by_hgnc_id: HashMap, shet_by_hgnc_id: HashMap, gtex_by_hgnc_id: HashMap, @@ -1064,6 +1111,10 @@ fn write_rocksdb( hgnc: hgnc_record.clone(), omim: omim_by_hgnc_id.get(&hgnc_id).cloned(), orpha: orpha_by_hgnc_id.get(&hgnc_id).cloned(), + panelapp: panelapp_by_hgnc_id + .get(&hgnc_id) + .cloned() + .unwrap_or_default(), ncbi: hgnc_record .entrez_id .as_ref() @@ -1103,6 +1154,7 @@ pub fn run(common_args: &common::cli::Args, args: &Args) -> Result<(), anyhow::E let ncbi_by_ncbi_id = load_ncbi(&args.path_in_ncbi)?; let omim_by_hgnc_id = load_omim(&args.path_in_omim)?; let orpha_by_hgnc_id = load_orpha(&args.path_in_orpha)?; + let panelapp_by_hgnc_id = load_panelapp(&args.path_in_panelapp, &hgnc)?; let rcnv_by_hgnc_id = load_rcnv(&args.path_in_rcnv)?; let shet_by_hgnc_id = load_shet(&args.path_in_shet)?; let gtex_by_hgnc_id = load_gtex(&args.path_in_gtex)?; @@ -1125,6 +1177,7 @@ pub fn run(common_args: &common::cli::Args, args: &Args) -> Result<(), anyhow::E ncbi_by_ncbi_id, omim_by_hgnc_id, orpha_by_hgnc_id, + panelapp_by_hgnc_id, rcnv_by_hgnc_id, shet_by_hgnc_id, gtex_by_hgnc_id, @@ -1172,6 +1225,7 @@ pub mod test { path_in_ncbi: String::from("tests/genes/ncbi/gene_info.jsonl"), path_in_omim: String::from("tests/genes/omim/omim_diseases.tsv"), path_in_orpha: String::from("tests/genes/orphanet/orphanet_diseases.tsv"), + path_in_panelapp: String::from("tests/genes/panelapp/panelapp.jsonl"), path_in_rcnv: String::from("tests/genes/rcnv/rcnv.tsv"), path_in_shet: String::from("tests/genes/shet/shet.tsv"), path_in_gtex: String::from("tests/genes/gtex/genes_tpm.jsonl"), diff --git a/src/genes/cli/snapshots/annonars__genes__cli__data__tests__deserialize_panelapp_record.snap b/src/genes/cli/snapshots/annonars__genes__cli__data__tests__deserialize_panelapp_record.snap new file mode 100644 index 00000000..eed78841 --- /dev/null +++ b/src/genes/cli/snapshots/annonars__genes__cli__data__tests__deserialize_panelapp_record.snap @@ -0,0 +1,184 @@ +--- +source: src/genes/cli/data.rs +expression: records +--- +- gene_data: + hgnc_id: "HGNC:13164" + hgnc_symbol: CNBP + gene_symbol: CNBP + entity_type: str + entity_name: CNBP_CCTG + confidence_level: "1" + penetrance: unknown + publications: [] + evidence: + - NHS GMS + - Expert Review Red + - Expert list + phenotypes: + - "Myotonic dystrophy 2, OMIM:602668" + - "Myotonic dystrophy type 2, MONDO:0011266" + mode_of_inheritance: "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted" + panel: + id: 229 + hash_id: 55c4a3ed22c1fc0fe5e416e9 + name: Skeletal Muscle Channelopathies + disease_group: Neurology and neurodevelopmental disorders + disease_sub_group: Channelopathies + version: "1.45" + version_created: "2022-03-10T17:10:13.493246Z" + relevant_disorders: [] + stats: + number_of_genes: 22 + number_of_strs: 2 + number_of_regions: 0 + types: + - name: Rare Disease 100K + slug: rare-disease-100k + description: Rare Disease 100K +- gene_data: + hgnc_id: "HGNC:2933" + hgnc_symbol: DMPK + gene_symbol: DMPK + entity_type: str + entity_name: DMPK_CTG + confidence_level: "3" + penetrance: unknown + publications: [] + evidence: + - Expert Review Green + - NHS GMS + - Expert list + phenotypes: + - "Myotonic dystrophy 1, OMIM:160900" + mode_of_inheritance: "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted" + panel: + id: 229 + hash_id: 55c4a3ed22c1fc0fe5e416e9 + name: Skeletal Muscle Channelopathies + disease_group: Neurology and neurodevelopmental disorders + disease_sub_group: Channelopathies + version: "1.45" + version_created: "2022-03-10T17:10:13.493246Z" + relevant_disorders: [] + stats: + number_of_genes: 22 + number_of_strs: 2 + number_of_regions: 0 + types: + - name: Rare Disease 100K + slug: rare-disease-100k + description: Rare Disease 100K +- gene_data: + hgnc_id: "HGNC:2933" + hgnc_symbol: DMPK + gene_symbol: DMPK + entity_type: str + entity_name: DMPK_CTG + confidence_level: "1" + penetrance: unknown + publications: [] + evidence: + - NHS GMS + - Expert Review Red + - Expert list + phenotypes: + - "Myotonic dystrophy 1, OMIM:160900" + mode_of_inheritance: "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted" + panel: + id: 144 + hash_id: 5763f4868f620350a199604f + name: Fetal hydrops + disease_group: Dysmorphic and congenital abnormality syndromes + disease_sub_group: Fetal disorders + version: "1.61" + version_created: "2023-07-26T10:19:26.703341Z" + relevant_disorders: [] + stats: + number_of_genes: 91 + number_of_strs: 1 + number_of_regions: 0 + types: + - name: Rare Disease 100K + slug: rare-disease-100k + description: Rare Disease 100K +- gene_data: + hgnc_id: "HGNC:644" + hgnc_symbol: AR + gene_symbol: AR + entity_type: str + entity_name: AR_CAG + confidence_level: "3" + penetrance: unknown + publications: [] + evidence: + - Expert Review Green + - NHS GMS + - Expert list + phenotypes: + - "Spinal and bulbar muscular atrophy of Kennedy, OMIM:313200" + mode_of_inheritance: "X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)" + panel: + id: 235 + hash_id: 55b7a0bb22c1fc05fd2345d1 + name: Distal myopathies + disease_group: Neurology and neurodevelopmental disorders + disease_sub_group: Neuromuscular disorders + version: "3.16" + version_created: "2023-10-25T21:25:56.162797Z" + relevant_disorders: [] + stats: + number_of_genes: 31 + number_of_strs: 2 + number_of_regions: 0 + types: + - name: Rare Disease 100K + slug: rare-disease-100k + description: Rare Disease 100K + - name: Component Of Super Panel + slug: component-of-super-panel + description: This panel is a component of a Super Panel + - name: GMS signed-off + slug: gms-signed-off + description: This panel has undergone review by a NHSE GMS disease specialist group and processes to be signed-off for use within the GMS. +- gene_data: + hgnc_id: "HGNC:13164" + hgnc_symbol: CNBP + gene_symbol: CNBP + entity_type: str + entity_name: CNBP_CCTG + confidence_level: "1" + penetrance: unknown + publications: [] + evidence: + - NHS GMS + - Expert Review Red + - Expert list + phenotypes: + - "Myotonic dystrophy 2, OMIM:602668" + - "Myotonic dystrophy type 2, MONDO:0011266" + mode_of_inheritance: "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted" + panel: + id: 235 + hash_id: 55b7a0bb22c1fc05fd2345d1 + name: Distal myopathies + disease_group: Neurology and neurodevelopmental disorders + disease_sub_group: Neuromuscular disorders + version: "3.16" + version_created: "2023-10-25T21:25:56.162797Z" + relevant_disorders: [] + stats: + number_of_genes: 31 + number_of_strs: 2 + number_of_regions: 0 + types: + - name: Rare Disease 100K + slug: rare-disease-100k + description: Rare Disease 100K + - name: Component Of Super Panel + slug: component-of-super-panel + description: This panel is a component of a Super Panel + - name: GMS signed-off + slug: gms-signed-off + description: This panel has undergone review by a NHSE GMS disease specialist group and processes to be signed-off for use within the GMS. + diff --git a/tests/genes/panelapp/panelapp.jsonl b/tests/genes/panelapp/panelapp.jsonl new file mode 100644 index 00000000..d0570b51 --- /dev/null +++ b/tests/genes/panelapp/panelapp.jsonl @@ -0,0 +1,5 @@ +{"gene_data": {"alias": ["RNF163", "ZCCHC22", "CNBP1"], "biotype": "protein_coding", "hgnc_id": "HGNC:13164", "gene_name": "CCHC-type zinc finger nucleic acid binding protein", "omim_gene": ["116955"], "alias_name": null, "gene_symbol": "CNBP", "hgnc_symbol": "CNBP", "hgnc_release": "2017-11-03", "ensembl_genes": {"GRch37": {"82": {"location": "3:128888327-128902765", "ensembl_id": "ENSG00000169714"}}, "GRch38": {"90": {"location": "3:129169484-129183922", "ensembl_id": "ENSG00000169714"}}}, "hgnc_date_symbol_changed": "2006-06-29"}, "entity_type": "str", "entity_name": "CNBP_CCTG", "confidence_level": "1", "penetrance": null, "publications": [], "evidence": ["NHS GMS", "Expert Review Red", "Expert list"], "phenotypes": ["Myotonic dystrophy 2, OMIM:602668", "Myotonic dystrophy type 2, MONDO:0011266"], "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", "repeated_sequence": "CAGG", "chromosome": "3", "grch37_coordinates": [128891420, 128891499], "grch38_coordinates": [129172577, 129172656], "normal_repeats": 27, "pathogenic_repeats": 75, "tags": ["STR", "NGS Not Validated"], "panel": {"id": 229, "hash_id": "55c4a3ed22c1fc0fe5e416e9", "name": "Skeletal Muscle Channelopathies", "disease_group": "Neurology and neurodevelopmental disorders", "disease_sub_group": "Channelopathies", "status": "public", "version": "1.45", "version_created": "2022-03-10T17:10:13.493246Z", "relevant_disorders": [], "stats": {"number_of_genes": 22, "number_of_strs": 2, "number_of_regions": 0}, "types": [{"name": "Rare Disease 100K", "slug": "rare-disease-100k", "description": "Rare Disease 100K"}]}} +{"gene_data": {"alias": ["DMK", "DM1PK", "MDPK", "MT-PK"], "biotype": "protein_coding", "hgnc_id": "HGNC:2933", "gene_name": "DM1 protein kinase", "omim_gene": ["605377"], "alias_name": ["dystrophia myotonica 1", "DM protein kinase", "myotonin protein kinase A", "myotonic dystrophy associated protein kinase", "thymopoietin homolog"], "gene_symbol": "DMPK", "hgnc_symbol": "DMPK", "hgnc_release": "2017-11-03", "ensembl_genes": {"GRch37": {"82": {"location": "19:46272975-46285810", "ensembl_id": "ENSG00000104936"}}, "GRch38": {"90": {"location": "19:45769717-45782552", "ensembl_id": "ENSG00000104936"}}}, "hgnc_date_symbol_changed": "1997-10-10"}, "entity_type": "str", "entity_name": "DMPK_CTG", "confidence_level": "3", "penetrance": null, "publications": [], "evidence": ["Expert Review Green", "NHS GMS", "Expert list"], "phenotypes": ["Myotonic dystrophy 1, OMIM:160900"], "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", "repeated_sequence": "CTG", "chromosome": "19", "grch37_coordinates": [46273463, 46273522], "grch38_coordinates": [45770205, 45770264], "normal_repeats": 35, "pathogenic_repeats": 50, "tags": ["STR"], "panel": {"id": 229, "hash_id": "55c4a3ed22c1fc0fe5e416e9", "name": "Skeletal Muscle Channelopathies", "disease_group": "Neurology and neurodevelopmental disorders", "disease_sub_group": "Channelopathies", "status": "public", "version": "1.45", "version_created": "2022-03-10T17:10:13.493246Z", "relevant_disorders": [], "stats": {"number_of_genes": 22, "number_of_strs": 2, "number_of_regions": 0}, "types": [{"name": "Rare Disease 100K", "slug": "rare-disease-100k", "description": "Rare Disease 100K"}]}} +{"gene_data": {"alias": ["DMK", "DM1PK", "MDPK", "MT-PK"], "biotype": "protein_coding", "hgnc_id": "HGNC:2933", "gene_name": "DM1 protein kinase", "omim_gene": ["605377"], "alias_name": ["dystrophia myotonica 1", "DM protein kinase", "myotonin protein kinase A", "myotonic dystrophy associated protein kinase", "thymopoietin homolog"], "gene_symbol": "DMPK", "hgnc_symbol": "DMPK", "hgnc_release": "2017-11-03", "ensembl_genes": {"GRch37": {"82": {"location": "19:46272975-46285810", "ensembl_id": "ENSG00000104936"}}, "GRch38": {"90": {"location": "19:45769717-45782552", "ensembl_id": "ENSG00000104936"}}}, "hgnc_date_symbol_changed": "1997-10-10"}, "entity_type": "str", "entity_name": "DMPK_CTG", "confidence_level": "1", "penetrance": null, "publications": [], "evidence": ["NHS GMS", "Expert Review Red", "Expert list"], "phenotypes": ["Myotonic dystrophy 1, OMIM:160900"], "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", "repeated_sequence": "CTG", "chromosome": "19", "grch37_coordinates": [46273463, 46273522], "grch38_coordinates": [45770205, 45770264], "normal_repeats": 35, "pathogenic_repeats": 50, "tags": ["STR"], "panel": {"id": 144, "hash_id": "5763f4868f620350a199604f", "name": "Fetal hydrops", "disease_group": "Dysmorphic and congenital abnormality syndromes", "disease_sub_group": "Fetal disorders", "status": "public", "version": "1.61", "version_created": "2023-07-26T10:19:26.703341Z", "relevant_disorders": [], "stats": {"number_of_genes": 91, "number_of_strs": 1, "number_of_regions": 0}, "types": [{"name": "Rare Disease 100K", "slug": "rare-disease-100k", "description": "Rare Disease 100K"}]}} +{"gene_data": {"alias": ["AIS", "NR3C4", "SMAX1", "HUMARA"], "biotype": "protein_coding", "hgnc_id": "HGNC:644", "gene_name": "androgen receptor", "omim_gene": ["313700"], "alias_name": ["testicular feminization", "Kennedy disease"], "gene_symbol": "AR", "hgnc_symbol": "AR", "hgnc_release": "2017-11-03", "ensembl_genes": {"GRch37": {"82": {"location": "X:66764465-66950461", "ensembl_id": "ENSG00000169083"}}, "GRch38": {"90": {"location": "X:67544032-67730619", "ensembl_id": "ENSG00000169083"}}}, "hgnc_date_symbol_changed": "1986-01-01"}, "entity_type": "str", "entity_name": "AR_CAG", "confidence_level": "3", "penetrance": null, "publications": [], "evidence": ["Expert Review Green", "NHS GMS", "Expert list"], "phenotypes": ["Spinal and bulbar muscular atrophy of Kennedy, OMIM:313200"], "mode_of_inheritance": "X-LINKED: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)", "repeated_sequence": "CAG", "chromosome": "X", "grch37_coordinates": [66765160, 66765225], "grch38_coordinates": [67545316, 67545383], "normal_repeats": 35, "pathogenic_repeats": 38, "tags": ["STR"], "panel": {"id": 235, "hash_id": "55b7a0bb22c1fc05fd2345d1", "name": "Distal myopathies", "disease_group": "Neurology and neurodevelopmental disorders", "disease_sub_group": "Neuromuscular disorders", "status": "public", "version": "3.16", "version_created": "2023-10-25T21:25:56.162797Z", "relevant_disorders": [], "stats": {"number_of_genes": 31, "number_of_strs": 2, "number_of_regions": 0}, "types": [{"name": "Rare Disease 100K", "slug": "rare-disease-100k", "description": "Rare Disease 100K"}, {"name": "Component Of Super Panel", "slug": "component-of-super-panel", "description": "This panel is a component of a Super Panel"}, {"name": "GMS signed-off", "slug": "gms-signed-off", "description": "This panel has undergone review by a NHSE GMS disease specialist group and processes to be signed-off for use within the GMS."}]}} +{"gene_data": {"alias": ["RNF163", "ZCCHC22", "CNBP1"], "biotype": "protein_coding", "hgnc_id": "HGNC:13164", "gene_name": "CCHC-type zinc finger nucleic acid binding protein", "omim_gene": ["116955"], "alias_name": null, "gene_symbol": "CNBP", "hgnc_symbol": "CNBP", "hgnc_release": "2017-11-03", "ensembl_genes": {"GRch37": {"82": {"location": "3:128888327-128902765", "ensembl_id": "ENSG00000169714"}}, "GRch38": {"90": {"location": "3:129169484-129183922", "ensembl_id": "ENSG00000169714"}}}, "hgnc_date_symbol_changed": "2006-06-29"}, "entity_type": "str", "entity_name": "CNBP_CCTG", "confidence_level": "1", "penetrance": null, "publications": [], "evidence": ["NHS GMS", "Expert Review Red", "Expert list"], "phenotypes": ["Myotonic dystrophy 2, OMIM:602668", "Myotonic dystrophy type 2, MONDO:0011266"], "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", "repeated_sequence": "CAGG", "chromosome": "3", "grch37_coordinates": [128891420, 128891499], "grch38_coordinates": [129172577, 129172656], "normal_repeats": 27, "pathogenic_repeats": 75, "tags": ["STR", "NGS Not Validated"], "panel": {"id": 235, "hash_id": "55b7a0bb22c1fc05fd2345d1", "name": "Distal myopathies", "disease_group": "Neurology and neurodevelopmental disorders", "disease_sub_group": "Neuromuscular disorders", "status": "public", "version": "3.16", "version_created": "2023-10-25T21:25:56.162797Z", "relevant_disorders": [], "stats": {"number_of_genes": 31, "number_of_strs": 2, "number_of_regions": 0}, "types": [{"name": "Rare Disease 100K", "slug": "rare-disease-100k", "description": "Rare Disease 100K"}, {"name": "Component Of Super Panel", "slug": "component-of-super-panel", "description": "This panel is a component of a Super Panel"}, {"name": "GMS signed-off", "slug": "gms-signed-off", "description": "This panel has undergone review by a NHSE GMS disease specialist group and processes to be signed-off for use within the GMS."}]}}