Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Dec 28, 2023
1 parent 49ece3d commit 5517687
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 11 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ tracing = "0.1"
tracing-subscriber = "0.3"
rustc-hash = "1.1.0"
noodles-gff = "0.25.0"
erased-serde = "0.4.1"

[build-dependencies]
prost-build = "0.12"
Expand Down
60 changes: 49 additions & 11 deletions src/gnomad_nuclear/cli/query.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
//! Query of gnomAD-exomes and genomes annotation data.
use erased_serde::serialize_trait_object;
use std::{io::Write, sync::Arc};

use prost::Message as _;

/// Helper trait for type erased serialization.
pub trait SerializeRecordTrait: erased_serde::Serialize {}
impl SerializeRecordTrait for pbs::gnomad::gnomad2::Record {}
impl SerializeRecordTrait for pbs::gnomad::gnomad3::Record {}
impl SerializeRecordTrait for pbs::gnomad::gnomad4::Record {}

serialize_trait_object!(SerializeRecordTrait);

use crate::{
common::{self, cli::extract_chrom, keys, spdi},
cons::cli::args::vars::ArgsQuery,
Expand Down Expand Up @@ -37,6 +46,8 @@ pub struct Args {
pub struct Meta {
/// Genome release of data in database.
pub genome_release: String,
/// gnomAD version.
pub gnomad_version: String,
}

/// Open RocksDb given path and column family name for data and metadata.
Expand All @@ -61,8 +72,13 @@ pub fn open_rocksdb<P: AsRef<std::path::Path>>(
db.get_cf(&cf_meta, "genome-release")?
.ok_or_else(|| anyhow::anyhow!("missing value meta:genome-release"))?,
)?;
let meta_gnomad_version = String::from_utf8(
db.get_cf(&cf_meta, "gnomad-version")?
.ok_or_else(|| anyhow::anyhow!("missing value meta:gnomad-version"))?,
)?;
Meta {
genome_release: meta_genome_release,
gnomad_version: meta_gnomad_version,
}
};

Expand All @@ -85,7 +101,7 @@ pub fn open_rocksdb_from_args(
fn print_record(
out_writer: &mut Box<dyn std::io::Write>,
output_format: common::cli::OutputFormat,
value: &Box<dyn serde::Serialize>,
value: &Box<dyn SerializeRecordTrait>,
) -> Result<(), anyhow::Error> {
match output_format {
common::cli::OutputFormat::Jsonl => {
Expand All @@ -97,12 +113,15 @@ fn print_record(
}

/// Query for a single variant in the RocksDB database.
pub fn query_for_variant(
pub fn query_for_variant<T>(
variant: &common::spdi::Var,
meta: &Meta,
db: &rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>,
cf_data: &Arc<rocksdb::BoundColumnFamily>,
) -> Result<Option<pbs::gnomad::gnomad2::Record>, anyhow::Error> {
) -> Result<Option<Box<dyn SerializeRecordTrait>>, anyhow::Error>
where
T: SerializeRecordTrait + prost::Message + std::default::Default + 'static,
{
// Split off the genome release (checked) and convert to key as used in database.
let query = spdi::Var {
sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?,
Expand All @@ -117,9 +136,11 @@ pub fn query_for_variant(
.map_err(|e| anyhow::anyhow!("problem querying RocksDB: {}", e))?;
raw_value
.map(|raw_value| {
// Decode via prost.
pbs::gnomad::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value))
.map_err(|e| anyhow::anyhow!("failed to decode record: {}", e))
// Decode via prost, box object, and map errors properly.
match T::decode(&mut std::io::Cursor::new(&raw_value)) {
Ok(record) => Ok(Box::new(record) as Box<dyn SerializeRecordTrait>),
Err(e) => Err(anyhow::anyhow!("failed to decode record: {}", e)),
}
})
.transpose()
}
Expand All @@ -145,7 +166,19 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
tracing::info!("Running query...");
let before_query = std::time::Instant::now();
if let Some(variant) = args.query.variant.as_ref() {
if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? {
let query_result = match meta.gnomad_version[0..1].parse::<char>()? {
'2' => {
query_for_variant::<pbs::gnomad::gnomad2::Record>(variant, &meta, &db, &cf_data)?
}
'3' => {
query_for_variant::<pbs::gnomad::gnomad3::Record>(variant, &meta, &db, &cf_data)?
}
'4' => {
query_for_variant::<pbs::gnomad::gnomad4::Record>(variant, &meta, &db, &cf_data)?
}
_ => unreachable!("unhandled gnomAD version: {}", &meta.gnomad_version),
};
if let Some(record) = query_result {
print_record(&mut out_writer, args.out_format, &record)?
} else {
tracing::info!("no record found for variant {:?}", &variant);
Expand Down Expand Up @@ -203,9 +236,14 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
}
}

let record =
pbs::gnomad::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value))
.map_err(|e| anyhow::anyhow!("failed to decode record: {}", e))?;
let mut cursor = std::io::Cursor::new(&raw_value);
let record: Box<dyn SerializeRecordTrait> =
match meta.gnomad_version[0..1].parse::<char>()? {
'2' => Box::new(pbs::gnomad::gnomad2::Record::decode(&mut cursor)?),
'3' => Box::new(pbs::gnomad::gnomad3::Record::decode(&mut cursor)?),
'4' => Box::new(pbs::gnomad::gnomad4::Record::decode(&mut cursor)?),
_ => unreachable!("unhandled gnomAD version: {}", &meta.gnomad_version),
};
print_record(&mut out_writer, args.out_format, &record)?;
iter.next();
} else {
Expand Down Expand Up @@ -253,7 +291,7 @@ mod test {

#[rstest::rstest]
#[case("exomes", "grch37", "2.1")]
#[case("exomes", "grch38", "4.0")]
// #[case("exomes", "grch38", "4.0")]
fn smoke_query_exomes_all(
#[case] kind: &str,
#[case] genome_release: &str,
Expand Down

0 comments on commit 5517687

Please sign in to comment.