Skip to content

Commit

Permalink
Fixed merge request coming from parser refactoring on multiple branches
Browse files Browse the repository at this point in the history
  • Loading branch information
anergictcell committed Jun 11, 2024
1 parent 9619a32 commit e1b193e
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 124 deletions.
11 changes: 5 additions & 6 deletions src/annotations/omim_disease.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ use std::hash::Hash;
use crate::annotations::disease::DiseaseIterator;
use crate::annotations::{AnnotationId, Disease};
use crate::term::HpoGroup;
use crate::HpoError;
use crate::HpoTermId;
use crate::{HpoError, HpoSet, HpoTermId, Ontology};

/// A set of OMIM diseases
///
Expand Down Expand Up @@ -114,7 +113,7 @@ impl Disease for OmimDisease {
/// # Examples
///
/// ```
/// use hpo::annotations::OmimDisease;
/// use hpo::annotations::{Disease, OmimDisease};
///
/// let mut disease = OmimDisease::new(123.into(), "FooBar");
/// let bytes = disease.as_bytes();
Expand All @@ -123,7 +122,7 @@ impl Disease for OmimDisease {
/// assert_eq!(bytes[4..8], [0u8, 0u8, 0u8, 123u8]); // ID of disease => 123
/// assert_eq!(bytes[8..12], [0u8, 0u8, 0u8, 6u8]); // Length of Name => 6
/// ```
pub fn as_bytes(&self) -> Vec<u8> {
fn as_bytes(&self) -> Vec<u8> {
fn usize_to_u32(n: usize) -> u32 {
n.try_into().expect("unable to convert {n} to u32")
}
Expand Down Expand Up @@ -157,7 +156,7 @@ impl Disease for OmimDisease {
}

/// Returns an [`HpoSet`] from the `OmimDisease`
pub fn to_hpo_set<'a>(&self, ontology: &'a Ontology) -> HpoSet<'a> {
fn to_hpo_set<'a>(&self, ontology: &'a Ontology) -> HpoSet<'a> {
HpoSet::new(ontology, self.hpos.clone())
}

Expand All @@ -167,7 +166,7 @@ impl Disease for OmimDisease {
///
/// This method does **not** add the [`OmimDisease`] to the [HPO term](`crate::HpoTerm`).
/// Clients should not use this method, unless they are creating their own Ontology.
pub fn add_term<I: Into<HpoTermId>>(&mut self, term_id: I) -> bool {
fn add_term<I: Into<HpoTermId>>(&mut self, term_id: I) -> bool {
self.hpos.insert(term_id)
}
}
Expand Down
109 changes: 13 additions & 96 deletions src/ontology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,87 +611,6 @@ impl Ontology {
}
}

/// Returns a binary representation of the Ontology
///
/// The binary data is separated into sections:
///
/// - Metadata (HPO and Bindat Version) (see `Ontology::metadata_as_bytes`)
/// - Terms (Names + IDs) (see `HpoTermInternal::as_bytes`)
/// - Term - Parent connection (Child ID - Parent ID)
/// (see `HpoTermInternal::parents_as_byte`)
/// - Genes (Names + IDs + Connected HPO Terms) ([`Gene::as_bytes`])
/// - OMIM Diseases (Names + IDs + Connected HPO Terms)
/// ([`OmimDisease::as_bytes`])
///
/// Every section starts with 4 bytes to indicate its size
/// (big-endian encoded `u32`)
///
/// This method is only useful if you use are modifying the ontology
/// and want to save data for later re-use.
///
/// # Panics
///
/// Panics when the buffer length of any subsegment larger than `u32::MAX`
///
/// # Examples
///
/// ```
/// use hpo::Ontology;
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
/// let bytes = ontology.as_bytes();
/// ```
pub fn as_bytes(&self) -> Vec<u8> {
fn usize_to_u32(n: usize) -> u32 {
n.try_into().expect("unable to convert {n} to u32")
}
let mut res = Vec::new();

// Add metadata, version info
res.append(&mut self.metadata_as_bytes());

// All HPO Terms
let mut buffer = Vec::new();
for term in self.hpo_terms.values() {
buffer.append(&mut term.as_bytes());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

// All Term - Parent connections
buffer.clear();
for term in self.hpo_terms.values() {
buffer.append(&mut term.parents_as_byte());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

// Genes and Gene-Term connections
buffer.clear();
for gene in self.genes.values() {
buffer.append(&mut gene.as_bytes());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

// OMIM Disease and Disease-Term connections
buffer.clear();
for omim_disease in self.omim_diseases.values() {
buffer.append(&mut omim_disease.as_bytes());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

// ORPHA Disease and Disease-Term connections
buffer.clear();
for orpha_disease in self.orpha_diseases.values() {
buffer.append(&mut orpha_disease.as_bytes());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

res
}

/// Returns the number of HPO-Terms in the Ontology
///
/// # Examples
Expand All @@ -718,16 +637,6 @@ impl Ontology {
self.len() == 0
}

/// Returns the Jax-Ontology release version
///
/// e.g. `2023-03-13`
pub fn hpo_version(&self) -> String {
format!(
"{:0>4}-{:0>2}-{:0>2}",
self.hpo_version.0, self.hpo_version.1, self.hpo_version.2,
)
}

/// Returns the [`HpoTerm`] of the provided [`HpoTermId`]
///
/// If no such term is present in the Ontolgy, `None` is returned
Expand Down Expand Up @@ -1087,6 +996,14 @@ impl Ontology {
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

// ORPHA Disease and Disease-Term connections
buffer.clear();
for orpha_disease in self.orpha_diseases.values() {
buffer.append(&mut orpha_disease.as_bytes());
}
res.append(&mut usize_to_u32(buffer.len()).to_be_bytes().to_vec());
res.append(&mut buffer);

res
}

Expand Down Expand Up @@ -1165,16 +1082,16 @@ impl Ontology {
if (gene.hpo_terms() & &phenotype_ids).is_empty() {
continue;
}
let gene_id = ont.add_gene(
ont.add_gene(
self.gene(gene.id()).ok_or(HpoError::DoesNotExist)?.name(),
&gene.id().as_u32().to_string()
)?;
*gene.id(),
);

// Link the gene to every term in the new ontology
// --> also modifier terms
for term in &(gene.hpo_terms() & &ids) {
ont.link_gene_term(term, gene_id)?;
ont.gene_mut(&gene_id)
ont.link_gene_term(term, *gene.id())?;
ont.gene_mut(gene.id())
.ok_or(HpoError::DoesNotExist)?
.add_term(term);
}
Expand Down
64 changes: 42 additions & 22 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,35 +366,44 @@ pub(crate) mod disease_to_hpo {

fn parse_line(line: &str) -> HpoResult<Option<DiseaseKind<'_>>> {
if line.starts_with("OMIM") {
parse_disease_components(line).map(DiseaseKind::Omim)
Ok(parse_disease_components(line)?.map(DiseaseKind::Omim))
} else if line.starts_with("ORPHA") {
parse_disease_components(line).map(DiseaseKind::Orpha)
Ok(parse_disease_components(line)?.map(DiseaseKind::Orpha))
} else {
Ok(None)
}
}

fn parse_disease_components(line: &str) -> HpoResul<Option<DiseaseComponents<'_>>> {
let cols: Vec<&str> = line.trim().split('\t').collect();
if cols[2] == "NOT" {
return Ok(None);
}
fn parse_disease_components(line: &str) -> HpoResult<Option<DiseaseComponents<'_>>> {
let mut cols = line.trim().splitn(5, '\t');

let Some(id_col) = cols.next() else {
return Err(HpoError::InvalidInput(line.to_string()));
};

let Some((_, omim_id)) = cols[0].split_once(':') else {
error!("cannot parse Disease ID from {}", cols[0]);
let Some((_, disease_id)) = id_col.split_once(':') else {
return Err(HpoError::InvalidInput(line.to_string()));
};

let Ok(hpo_id) = HpoTermId::try_from(cols[3]) else {
error!("invalid HPO ID: {}", cols[3]);

let Some(disease_name) = cols.next() else {
return Err(HpoError::InvalidInput(line.to_string()));
};


if let Some("NOT") = cols.next() {
return Ok(None);
};

let hpo_id = if let Some(id) = cols.next() {
HpoTermId::try_from(id)?
} else {
return Err(HpoError::InvalidInput(line.to_string()));
};

Ok(Some(DiseaseComponents {
id: omim_id,
name: omim_name,
id: disease_id,
name: disease_name,
hpo_id,
})))
}))
}

/// Quick and dirty parser for development and debugging
Expand Down Expand Up @@ -456,11 +465,18 @@ pub(crate) mod disease_to_hpo {
}

#[test]
fn test_skip_orpha() {
fn test_correct_orpha() {
let s = "ORPHA:600171\tGonadal agenesis\t\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]";
assert!(parse_line(s)
let orpha = parse_line(s)
.expect("This line has the correct format")
.is_none());
.expect("Line describes an Omim disease");
if let DiseaseKind::Orpha(orpha) = orpha {
assert_eq!(orpha.name, "Gonadal agenesis");
assert_eq!(orpha.id, "600171");
assert_eq!(orpha.hpo_id, "HP:0000055");
} else {
panic!("Orpha line should be parsed as Orpha correctly");
}
}

#[test]
Expand All @@ -477,9 +493,13 @@ pub(crate) mod disease_to_hpo {
let omim = parse_line(s)
.expect("This line has the correct format")
.expect("Line describes an Omim disease");
assert_eq!(omim.name, "Gonadal agenesis");
assert_eq!(omim.id, "600171");
assert_eq!(omim.hpo_id, "HP:0000055");
if let DiseaseKind::Omim(omim) = omim {
assert_eq!(omim.name, "Gonadal agenesis");
assert_eq!(omim.id, "600171");
assert_eq!(omim.hpo_id, "HP:0000055");
} else {
panic!("Omim line should be parsed as Omim correctly");
}
}

#[test]
Expand Down

0 comments on commit e1b193e

Please sign in to comment.