Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change Gene - Term associations #54

Merged
merged 3 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions examples/compare_ontologies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ fn ontology(path_arg: &str) -> Ontology {
}
}

fn ontology2(path_arg: &str) -> Ontology {
let path = Path::new(path_arg);

match path.is_file() {
true => Ontology::from_binary(path).unwrap(),
false => Ontology::from_standard_transitive(&path.to_string_lossy()).unwrap(),
}
}

/// Prints some basic stats about the differences
/// between two Ontologies
fn overview(diffs: &Comparison) {
Expand Down Expand Up @@ -89,15 +98,19 @@ fn changed_terms(diffs: &Comparison) {

/// Prints info about Gene-specific changes
fn changed_genes(diffs: &Comparison) {
println!("#Gene Delta\tID\tOld Name:New Name\tAdded Parents\tRemoved Parents");
for term in diffs.changed_genes() {
print!("Delta\t{}", term.id());
if let Some(names) = term.changed_name() {
println!(
"#Gene Delta\tID\tOld Name:New Name\tn Terms Old\tn Terms New\tAdded Terms\tRemoved Terms"
);
for gene in diffs.changed_genes() {
print!("Delta\t{}", gene.id());
if let Some(names) = gene.changed_name() {
print!("\t{}:{}", names.0, names.1);
} else {
print!("\t.");
}
if let Some(added) = term.added_terms() {
let (n_l, n_r) = gene.n_terms();
print!("\t{n_l}\t{n_r}");
if let Some(added) = gene.added_terms() {
print!(
"\t{}",
added
Expand All @@ -109,7 +122,7 @@ fn changed_genes(diffs: &Comparison) {
} else {
print!("\t.");
}
if let Some(removed) = term.removed_terms() {
if let Some(removed) = gene.removed_terms() {
print!(
"\t{}",
removed
Expand All @@ -127,15 +140,19 @@ fn changed_genes(diffs: &Comparison) {

/// Prints info about Gene-specific changes
fn changed_diseases(diffs: &Comparison) {
println!("#Disease Delta\tID\tOld Name:New Name\tAdded Parents\tRemoved Parents");
for term in diffs.changed_omim_diseases() {
print!("Delta\t{}", term.id());
if let Some(names) = term.changed_name() {
println!("#Disease Delta\tID\tOld Name:New Name\tn Terms Old\tn Terms New\tAdded Terms\tRemoved Terms");
for disease in diffs.changed_omim_diseases() {
print!("Delta\t{}", disease.id());
if let Some(names) = disease.changed_name() {
print!("\t{}:{}", names.0, names.1);
} else {
print!("\t.");
}
if let Some(added) = term.added_terms() {

let (n_l, n_r) = disease.n_terms();
print!("\t{n_l}\t{n_r}");

if let Some(added) = disease.added_terms() {
print!(
"\t{}",
added
Expand All @@ -147,7 +164,7 @@ fn changed_diseases(diffs: &Comparison) {
} else {
print!("\t.");
}
if let Some(removed) = term.removed_terms() {
if let Some(removed) = disease.removed_terms() {
print!(
"\t{}",
removed
Expand Down Expand Up @@ -206,7 +223,7 @@ fn main() {
let arg_new = args.next().unwrap();

let lhs = ontology(&arg_old);
let rhs = ontology(&arg_new);
let rhs = ontology2(&arg_new);

let diffs = lhs.compare(&rhs);

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const MAX_HPO_ID_INTEGER: usize = 10_000_000;

const OBO_FILENAME: &str = "hp.obo";
const GENE_FILENAME: &str = "phenotype_to_genes.txt";
const GENE_TO_PHENO_FILENAME: &str = "genes_to_phenotype.txt";
const DISEASE_FILENAME: &str = "phenotype.hpoa";

/// The `HpoTermId` of `HP:0000118 | Phenotypic abnormality`
Expand Down
81 changes: 78 additions & 3 deletions src/ontology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ use termarena::Arena;
/// Then use [`Ontology::from_standard`] to load the data.
/// You need the following files:
/// - `phenotype.hpoa` (Required to connect [`OmimDisease`]s to [`HpoTerm`]s)
/// - `phenotype_to_genes.txt` (Required to connect [`Gene`]s to [`HpoTerm`]s)
/// - `genes_to_phenotype.txt` (Required to connect [`Gene`]s to [`HpoTerm`]s)
/// - alternatively: `phenotype_to_genes.txt` (use [`Ontology::from_standard_transitive`])
/// - `hp.obo` (Required for [`HpoTerm`]s and their connection to each other)
/// 2. Load the ontology from a binary build using [`Ontology::from_binary`].
///
Expand Down Expand Up @@ -112,6 +113,15 @@ use termarena::Arena;
/// [`Ontology`] does not contain a direct relationship between genes and diseases. This relation
/// is only present indirectly via the connected [`HpoTerm`]s.
///
/// # Transivity of relations
///
/// **New in 0.9.0**
///
/// During the construction of the Ontology, every [`HpoTerm`] inherits all gene and disease
/// association of its child terms.
/// But [`Gene`]s and [`OmimDisease`]s will only contain links to *direct* [`HpoTerm`]s. The annotations
/// are not transitiv.
///
/// ```mermaid
/// erDiagram
/// ONTOLOGY ||--|{ HPOTERM : contains
Expand Down Expand Up @@ -368,13 +378,22 @@ impl Ontology {
/// - [`Ontology::add_gene`] failed
/// - [`Ontology::add_omim_disease`] failed
///
///
/// # Note
///
/// Since version `0.9.0` this method will not load genes transitively. That means
/// only directly linked [`HpoTerm`]s are connected to each gene. However, every
/// [`HpoTerm`] will still inherit all gene and disease associations from its children.
/// See [this discussion](https://github.com/anergictcell/hpo/issues/44) for a more detailed
/// explanation
///
/// # Examples
///
/// ```no_run
/// use hpo::Ontology;
/// use hpo::HpoTermId;
///
/// let ontology = Ontology::from_binary("/path/to/jax_hpo_data/").unwrap();
/// let ontology = Ontology::from_standard("/path/to/jax_hpo_data/").unwrap();
///
/// assert!(ontology.len() == 26);
///
Expand All @@ -387,12 +406,68 @@ impl Ontology {
/// ```
///
pub fn from_standard(folder: &str) -> HpoResult<Self> {
let mut ont = Ontology::default();
let path = Path::new(folder);
let obo = path.join(crate::OBO_FILENAME);
let gene = path.join(crate::GENE_TO_PHENO_FILENAME);
let disease = path.join(crate::DISEASE_FILENAME);
parser::load_from_jax_files(&obo, &gene, &disease, &mut ont)?;
ont.calculate_information_content()?;
ont.set_default_categories()?;
ont.set_default_modifier()?;
Ok(ont)
}

/// Initialize the [`Ontology`] from data provided by [Jax HPO](https://hpo.jax.org/)
///
/// You must download:
///
/// - Actual OBO data: [`hp.obo`](https://hpo.jax.org/app/data/ontology)
/// - Links between HPO and OMIM diseases: [`phenotype.hpoa`](https://hpo.jax.org/app/data/annotations)
/// - Links between HPO and Genes: [`genes_to_phenotypes.txt`](http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt)
///
/// and then specify the folder where the data is stored.
///
/// # Errors
///
/// This method can fail for various reasons:
///
/// - obo file not present or available: [`HpoError::CannotOpenFile`]
/// - [`Ontology::add_gene`] failed
/// - [`Ontology::add_omim_disease`] failed
///
/// # Note
///
/// This method has one difference to [`Ontology::from_standard`] in that every [`Gene`]
/// contains directly linked [`HpoTerm`] and all their ancestor terms.
/// See [this discussion](https://github.com/anergictcell/hpo/issues/44) for a more detailed
/// explanation
///
/// # Examples
///
/// ```no_run
/// use hpo::Ontology;
/// use hpo::HpoTermId;
///
/// let ontology = Ontology::from_standard_transitive("/path/to/jax_hpo_data/").unwrap();
///
/// assert!(ontology.len() == 26);
///
/// let absent_term = HpoTermId::try_from("HP:9999999").unwrap();
/// assert!(ontology.hpo(absent_term).is_none());
///
/// let present_term = HpoTermId::try_from("HP:0000001").unwrap();
/// let root_term = ontology.hpo(present_term).unwrap();
/// assert_eq!(root_term.name(), "All");
/// ```
///
pub fn from_standard_transitive(folder: &str) -> HpoResult<Self> {
let mut ont = Ontology::default();
let path = Path::new(folder);
let obo = path.join(crate::OBO_FILENAME);
let gene = path.join(crate::GENE_FILENAME);
let disease = path.join(crate::DISEASE_FILENAME);
parser::load_from_standard_files(&obo, &gene, &disease, &mut ont)?;
parser::load_from_jax_files_with_transivitve_genes(&obo, &gene, &disease, &mut ont)?;
ont.calculate_information_content()?;
ont.set_default_categories()?;
ont.set_default_modifier()?;
Expand Down
7 changes: 7 additions & 0 deletions src/ontology/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ impl HpoTermDelta {
pub struct AnnotationDelta {
id: String,
names: (String, String),
n_terms: (usize, usize),
added_terms: Vec<HpoTermId>,
removed_terms: Vec<HpoTermId>,
}
Expand Down Expand Up @@ -337,6 +338,7 @@ impl<'a> AnnotationDelta {
Some(Self {
id,
names,
n_terms: (lhs_terms.len(), rhs_terms.len()),
added_terms,
removed_terms,
})
Expand Down Expand Up @@ -384,4 +386,9 @@ impl<'a> AnnotationDelta {
pub fn id(&self) -> &str {
&self.id
}

/// Returns the number of terms linked to `old` and `new`
pub fn n_terms(&self) -> (usize, usize) {
self.n_terms
}
}
49 changes: 48 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,41 @@ pub(crate) mod binary;
/// Module to parse `hp.obo` file
pub(crate) mod hp_obo;

/// Module to parse HPO - `Gene` associations from `genes_to_phenotype.txt` file
pub(crate) mod genes_to_phenotype {
use crate::parser::Path;
use crate::HpoResult;
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;

use crate::HpoTermId;
use crate::Ontology;

/// Quick and dirty parser for development and debugging
pub fn parse<P: AsRef<Path>>(file: P, ontology: &mut Ontology) -> HpoResult<()> {
let file = File::open(file).unwrap();
let reader = BufReader::new(file);
for line in reader.lines() {
let line = line.unwrap();
// TODO: Check for the header outside of the `lines` iterator
if line.starts_with('#') || line.starts_with("ncbi_gene_id") {
continue;
}
let cols: Vec<&str> = line.trim().split('\t').collect();
let gene_id = ontology.add_gene(cols[1], cols[0])?;
let term_id = HpoTermId::try_from(cols[2])?;
ontology.link_gene_term(term_id, gene_id)?;

ontology
.gene_mut(&gene_id)
.expect("Cannot find gene {gene_id}")
.add_term(term_id);
}
Ok(())
}
}

/// Module to parse HPO - `Gene` associations from `phenotype_to_genes.txt` file
pub(crate) mod phenotype_to_genes {
use crate::parser::Path;
Expand Down Expand Up @@ -126,7 +161,7 @@ pub(crate) mod phenotype_hpoa {
}
}

pub(crate) fn load_from_standard_files<P: AsRef<Path>>(
pub(crate) fn load_from_jax_files_with_transivitve_genes<P: AsRef<Path>>(
obo_file: P,
gene_file: P,
disease_file: P,
Expand All @@ -137,3 +172,15 @@ pub(crate) fn load_from_standard_files<P: AsRef<Path>>(
phenotype_hpoa::parse(disease_file, ontology)?;
Ok(())
}

pub(crate) fn load_from_jax_files<P: AsRef<Path>>(
obo_file: P,
gene_file: P,
disease_file: P,
ontology: &mut Ontology,
) -> HpoResult<()> {
hp_obo::read_obo_file(obo_file, ontology)?;
genes_to_phenotype::parse(gene_file, ontology)?;
phenotype_hpoa::parse(disease_file, ontology)?;
Ok(())
}
Binary file modified tests/ontology.hpo
Binary file not shown.
Loading