Skip to content

Commit

Permalink
feat: implementing ClinGen region dosage annotation (#282) (#319)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 21, 2023
1 parent fe855a6 commit 57e1408
Show file tree
Hide file tree
Showing 31 changed files with 1,935 additions and 35 deletions.
1 change: 1 addition & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ fn main() -> Result<(), anyhow::Error> {
"annonars/gnomad/vep_gnomad2.proto",
"annonars/gnomad/vep_gnomad3.proto",
"annonars/helixmtdb/base.proto",
"annonars/regions/clingen.proto",
]
.iter()
.map(|f| root.join(f))
Expand Down
25 changes: 25 additions & 0 deletions protos/annonars/regions/clingen.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Genomic region annotation with ClinGen information.

syntax = "proto3";

package annonars.regions.clingen;

import "annonars/genes/base.proto";

// Dosage pathogenicity region annotation.
message Region {
// ISCA ID
string isca_id = 1;
// ISCA Region Name
string isca_region_name = 2;
// Genomic locaion.
string genomic_location = 3;
// Haploinsufficiency score.
annonars.genes.base.ClingenDosageScore haploinsufficiency_score = 4;
// Triplosensitivity score.
annonars.genes.base.ClingenDosageScore triplosensitivity_score = 5;
// Haploinsufficiency Disease ID.
optional string haploinsufficiency_disease_id = 6;
// Haploinsufficiency Disease ID.
optional string triplosensitivity_disease_id = 7;
}
6 changes: 3 additions & 3 deletions src/genes/cli/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ pub mod clingen_gene {
}
}

impl From<Score> for crate::pbs::genes::ClingenDosageScore {
impl From<Score> for crate::pbs::genes::base::ClingenDosageScore {
fn from(val: Score) -> Self {
use crate::pbs::genes::ClingenDosageScore::*;
use crate::pbs::genes::base::ClingenDosageScore::*;
match val {
Score::SufficientEvidence => SufficientEvidenceAvailable,
Score::SomeEvidence => SomeEvidenceAvailable,
Expand Down Expand Up @@ -1616,7 +1616,7 @@ pub mod shet {
pub mod gtex {
use serde::{Deserialize, Serialize};

use crate::pbs::genes::{GtexTissue, GtexTissueDetailed};
use crate::pbs::genes::base::{GtexTissue, GtexTissueDetailed};

/// GTEx V8 tissue types.
#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down
42 changes: 21 additions & 21 deletions src/genes/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ fn load_domino(path: &str) -> Result<HashMap<String, domino::Record>, anyhow::Er
}

/// Convert from `data::*` records to protobuf records.
fn convert_record(record: data::Record) -> pbs::genes::Record {
fn convert_record(record: data::Record) -> pbs::genes::base::Record {
let data::Record {
acmg_sf,
clingen,
Expand Down Expand Up @@ -402,7 +402,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
variants_to_report,
} = acmg_sf;

pbs::genes::AcmgSecondaryFindingRecord {
pbs::genes::base::AcmgSecondaryFindingRecord {
hgnc_id,
ensembl_gene_id,
ncbi_gene_id,
Expand All @@ -428,15 +428,15 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
triplosensitivity_disease_id,
} = clingen;

pbs::genes::ClingenDosageRecord {
pbs::genes::base::ClingenDosageRecord {
gene_symbol,
ncbi_gene_id,
genomic_location,
haploinsufficiency_score: Into::<pbs::genes::ClingenDosageScore>::into(
haploinsufficiency_score: Into::<pbs::genes::base::ClingenDosageScore>::into(
clingen_gene::Score::try_from(haploinsufficiency_score)
.expect("invalid haploinsufficiency score"),
) as i32,
triplosensitivity_score: Into::<pbs::genes::ClingenDosageScore>::into(
triplosensitivity_score: Into::<pbs::genes::base::ClingenDosageScore>::into(
clingen_gene::Score::try_from(triplosensitivity_score)
.expect("invalid triplosensitivity score"),
) as i32,
Expand Down Expand Up @@ -548,7 +548,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
zfin_zebrafish_phenotype_tag,
} = dbnsfp;

pbs::genes::DbnsfpRecord {
pbs::genes::base::DbnsfpRecord {
gene_name,
ensembl_gene,
chr,
Expand Down Expand Up @@ -680,7 +680,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
exac_oe_lof,
} = gnomad_constraints;

pbs::genes::GnomadConstraintsRecord {
pbs::genes::base::GnomadConstraintsRecord {
ensembl_gene_id,
entrez_id,
gene_symbol,
Expand Down Expand Up @@ -762,7 +762,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
mane_select,
} = hgnc;

Some(pbs::genes::HgncRecord {
Some(pbs::genes::base::HgncRecord {
hgnc_id,
symbol,
name,
Expand Down Expand Up @@ -796,7 +796,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
lsdb: lsdb
.map(|lsdb| {
lsdb.iter()
.map(|lsdb| pbs::genes::HgncLsdb {
.map(|lsdb| pbs::genes::base::HgncLsdb {
name: lsdb.name.clone(),
url: lsdb.url.clone(),
})
Expand Down Expand Up @@ -831,14 +831,14 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
summary,
rif_entries,
} = ncbi;
pbs::genes::NcbiRecord {
pbs::genes::base::NcbiRecord {
gene_id,
summary,
rif_entries: rif_entries
.map(|rif_entries| {
rif_entries
.into_iter()
.map(|rif_entry| pbs::genes::RifEntry {
.map(|rif_entry| pbs::genes::base::RifEntry {
pmids: rif_entry.pmids.unwrap_or_default(),
text: rif_entry.text,
})
Expand All @@ -850,11 +850,11 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {

let omim = omim.map(|omim| {
let omim::Record { hgnc_id, diseases } = omim;
pbs::genes::OmimRecord {
pbs::genes::base::OmimRecord {
hgnc_id,
omim_diseases: diseases
.into_iter()
.map(|disease| pbs::genes::OmimTerm {
.map(|disease| pbs::genes::base::OmimTerm {
omim_id: disease.omim_id,
label: disease.label,
})
Expand All @@ -864,11 +864,11 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {

let orpha = orpha.map(|orpha| {
let orpha::Record { hgnc_id, diseases } = orpha;
pbs::genes::OrphaRecord {
pbs::genes::base::OrphaRecord {
hgnc_id,
orpha_diseases: diseases
.into_iter()
.map(|disease| pbs::genes::OrphaTerm {
.map(|disease| pbs::genes::base::OrphaTerm {
orpha_id: disease.orpha_id,
label: disease.label,
})
Expand All @@ -882,7 +882,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
p_haplo,
p_triplo,
} = rcnv;
pbs::genes::RcnvRecord {
pbs::genes::base::RcnvRecord {
hgnc_id,
p_haplo,
p_triplo,
Expand All @@ -891,7 +891,7 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {

let shet = shet.map(|shet| {
let shet::Record { hgnc_id, s_het } = shet;
pbs::genes::ShetRecord { hgnc_id, s_het }
pbs::genes::base::ShetRecord { hgnc_id, s_het }
});

let gtex = gtex.map(|gtex| {
Expand All @@ -909,14 +909,14 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {
tissue_detailed,
tpms,
} = record;
pbs::genes::GtexTissueRecord {
pbs::genes::base::GtexTissueRecord {
tissue: tissue as i32,
tissue_detailed: tissue_detailed as i32,
tpms,
}
})
.collect::<Vec<_>>();
pbs::genes::GtexRecord {
pbs::genes::base::GtexRecord {
hgnc_id,
ensembl_gene_id,
ensembl_gene_version,
Expand All @@ -926,10 +926,10 @@ fn convert_record(record: data::Record) -> pbs::genes::Record {

let domino = domino.map(|domino| {
let domino::Record { gene_symbol, score } = domino;
pbs::genes::DominoRecord { gene_symbol, score }
pbs::genes::base::DominoRecord { gene_symbol, score }
});

pbs::genes::Record {
pbs::genes::base::Record {
acmg_sf,
clingen,
dbnsfp,
Expand Down
6 changes: 3 additions & 3 deletions src/genes/cli/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub fn open_rocksdb_from_args(
fn print_record(
out_writer: &mut Box<dyn std::io::Write>,
output_format: common::cli::OutputFormat,
value: &genes::Record,
value: &genes::base::Record,
) -> Result<(), anyhow::Error> {
match output_format {
common::cli::OutputFormat::Jsonl => {
Expand All @@ -79,13 +79,13 @@ pub fn query_for_gene(
hgnc_id: &str,
db: &rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>,
cf_data: &Arc<rocksdb::BoundColumnFamily>,
) -> Result<Option<genes::Record>, anyhow::Error> {
) -> Result<Option<genes::base::Record>, anyhow::Error> {
let raw_value = db
.get_cf(cf_data, hgnc_id.as_bytes())
.map_err(|e| anyhow::anyhow!("error while querying for HGNC ID {}: {}", hgnc_id, e))?;
raw_value
.map(|raw_value| {
genes::Record::decode(&mut std::io::Cursor::new(&raw_value)).map_err(|e| {
genes::base::Record::decode(&mut std::io::Cursor::new(&raw_value)).map_err(|e| {
anyhow::anyhow!(
"error while decoding gene record for HGNC ID {}: {}",
hgnc_id,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub mod gnomad_nuclear;
pub mod gnomad_sv;
pub mod helixmtdb;
pub mod pbs;
pub mod regions;
pub mod server;
pub mod tsv;

Expand Down
25 changes: 24 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use annonars::{
clinvar_genes, clinvar_minimal, clinvar_sv, common, cons, db_utils, dbsnp, freqs, functional,
genes, gnomad_mtdna, gnomad_nuclear, gnomad_sv, helixmtdb, server, tsv,
genes, gnomad_mtdna, gnomad_nuclear, gnomad_sv, helixmtdb, regions, server, tsv,
};
use anyhow::Error;
use clap::{command, Args, Parser, Subcommand};
Expand Down Expand Up @@ -52,6 +52,8 @@ enum Commands {
GnomadNuclear(GnomadNuclear),
/// "gnomad-sv" sub commands
GnomadSv(GnomadSv),
/// "regions" sub commands
Regions(Regions),
/// "db-utils" sub commands
DbUtils(DbUtils),
/// "run-server" command.
Expand Down Expand Up @@ -279,6 +281,23 @@ enum GnomadSvCommands {
Query(gnomad_sv::cli::query::Args),
}

/// Parsing of "regions" subcommands.
#[derive(Debug, Args, Clone)]
struct Regions {
/// The sub command to run
#[command(subcommand)]
command: RegionsCommands,
}

/// Enum supporting the parsing of "regions *" subcommands.
#[derive(Debug, Subcommand, Clone)]
enum RegionsCommands {
/// "import" sub command
Import(regions::cli::import::Args),
/// "query" sub command
Query(regions::cli::query::Args),
}

/// Parsing of "db-utils" subcommands.
#[derive(Debug, Args, Clone)]
struct DbUtils {
Expand Down Expand Up @@ -387,6 +406,10 @@ pub fn main() -> Result<(), anyhow::Error> {
GnomadSvCommands::Import(args) => gnomad_sv::cli::import::run(&cli.common, args)?,
GnomadSvCommands::Query(args) => gnomad_sv::cli::query::run(&cli.common, args)?,
},
Commands::Regions(args) => match &args.command {
RegionsCommands::Import(args) => regions::cli::import::run(&cli.common, args)?,
RegionsCommands::Query(args) => regions::cli::query::run(&cli.common, args)?,
},
Commands::DbUtils(args) => match &args.command {
DbUtilsCommands::Copy(args) => db_utils::cli::copy::run(&cli.common, args)?,
DbUtilsCommands::DumpMeta(args) => {
Expand Down
7 changes: 5 additions & 2 deletions src/pbs/genes.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
//! Code generate for protobufs by `prost-build`.
include!(concat!(env!("OUT_DIR"), "/annonars.genes.base.rs"));
include!(concat!(env!("OUT_DIR"), "/annonars.genes.base.serde.rs"));
/// Code generate for protobufs by `prost-build`.
pub mod base {
include!(concat!(env!("OUT_DIR"), "/annonars.genes.base.rs"));
include!(concat!(env!("OUT_DIR"), "/annonars.genes.base.serde.rs"));
}
1 change: 1 addition & 0 deletions src/pbs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ pub mod functional;
pub mod genes;
pub mod gnomad;
pub mod helixmtdb;
pub mod regions;
10 changes: 10 additions & 0 deletions src/pbs/regions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//! Code generate for protobufs by `prost-build`.
/// Code generate for protobufs by `prost-build`.
pub mod clingen {
include!(concat!(env!("OUT_DIR"), "/annonars.regions.clingen.rs"));
include!(concat!(
env!("OUT_DIR"),
"/annonars.regions.clingen.serde.rs"
));
}
Loading

0 comments on commit 57e1408

Please sign in to comment.