diff --git a/Cargo.lock b/Cargo.lock index 5796c2ce..7762c66d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -265,6 +265,7 @@ dependencies = [ "csv", "enum-map", "env_logger", + "erased-serde", "flate2", "indexmap", "indicatif", @@ -1005,6 +1006,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "erased-serde" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4adbf0983fe06bd3a5c19c8477a637c2389feb0994eca7a59e3b961054aa7c0a" +dependencies = [ + "serde", +] + [[package]] name = "errno" version = "0.3.7" diff --git a/Cargo.toml b/Cargo.toml index 62716261..b1013d13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ tracing = "0.1" tracing-subscriber = "0.3" rustc-hash = "1.1.0" noodles-gff = "0.25.0" +erased-serde = "0.4.1" [build-dependencies] prost-build = "0.12" diff --git a/src/gnomad_nuclear/cli/query.rs b/src/gnomad_nuclear/cli/query.rs index fb656bf1..773981c3 100644 --- a/src/gnomad_nuclear/cli/query.rs +++ b/src/gnomad_nuclear/cli/query.rs @@ -1,9 +1,18 @@ //! Query of gnomAD-exomes and genomes annotation data. +use erased_serde::serialize_trait_object; use std::{io::Write, sync::Arc}; use prost::Message as _; +/// Helper trait for type erased serialization. +pub trait SerializeRecordTrait: erased_serde::Serialize {} +impl SerializeRecordTrait for pbs::gnomad::gnomad2::Record {} +impl SerializeRecordTrait for pbs::gnomad::gnomad3::Record {} +impl SerializeRecordTrait for pbs::gnomad::gnomad4::Record {} + +serialize_trait_object!(SerializeRecordTrait); + use crate::{ common::{self, cli::extract_chrom, keys, spdi}, cons::cli::args::vars::ArgsQuery, @@ -37,6 +46,8 @@ pub struct Args { pub struct Meta { /// Genome release of data in database. pub genome_release: String, + /// gnomAD version. + pub gnomad_version: String, } /// Open RocksDb given path and column family name for data and metadata. @@ -61,8 +72,13 @@ pub fn open_rocksdb>( db.get_cf(&cf_meta, "genome-release")? .ok_or_else(|| anyhow::anyhow!("missing value meta:genome-release"))?, )?; + let meta_gnomad_version = String::from_utf8( + db.get_cf(&cf_meta, "gnomad-version")? + .ok_or_else(|| anyhow::anyhow!("missing value meta:gnomad-version"))?, + )?; Meta { genome_release: meta_genome_release, + gnomad_version: meta_gnomad_version, } }; @@ -85,7 +101,7 @@ pub fn open_rocksdb_from_args( fn print_record( out_writer: &mut Box, output_format: common::cli::OutputFormat, - value: &Box, + value: &Box, ) -> Result<(), anyhow::Error> { match output_format { common::cli::OutputFormat::Jsonl => { @@ -97,12 +113,15 @@ fn print_record( } /// Query for a single variant in the RocksDB database. -pub fn query_for_variant( +pub fn query_for_variant( variant: &common::spdi::Var, meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result, anyhow::Error> { +) -> Result>, anyhow::Error> +where + T: SerializeRecordTrait + prost::Message + std::default::Default + 'static, +{ // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -117,9 +136,11 @@ pub fn query_for_variant( .map_err(|e| anyhow::anyhow!("problem querying RocksDB: {}", e))?; raw_value .map(|raw_value| { - // Decode via prost. - pbs::gnomad::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + // Decode via prost, box object, and map errors properly. + match T::decode(&mut std::io::Cursor::new(&raw_value)) { + Ok(record) => Ok(Box::new(record) as Box), + Err(e) => Err(anyhow::anyhow!("failed to decode record: {}", e)), + } }) .transpose() } @@ -145,7 +166,19 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + let query_result = match meta.gnomad_version[0..1].parse::()? { + '2' => { + query_for_variant::(variant, &meta, &db, &cf_data)? + } + '3' => { + query_for_variant::(variant, &meta, &db, &cf_data)? + } + '4' => { + query_for_variant::(variant, &meta, &db, &cf_data)? + } + _ => unreachable!("unhandled gnomAD version: {}", &meta.gnomad_version), + }; + if let Some(record) = query_result { print_record(&mut out_writer, args.out_format, &record)? } else { tracing::info!("no record found for variant {:?}", &variant); @@ -203,9 +236,14 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> } } - let record = - pbs::gnomad::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e))?; + let mut cursor = std::io::Cursor::new(&raw_value); + let record: Box = + match meta.gnomad_version[0..1].parse::()? { + '2' => Box::new(pbs::gnomad::gnomad2::Record::decode(&mut cursor)?), + '3' => Box::new(pbs::gnomad::gnomad3::Record::decode(&mut cursor)?), + '4' => Box::new(pbs::gnomad::gnomad4::Record::decode(&mut cursor)?), + _ => unreachable!("unhandled gnomAD version: {}", &meta.gnomad_version), + }; print_record(&mut out_writer, args.out_format, &record)?; iter.next(); } else { @@ -253,7 +291,7 @@ mod test { #[rstest::rstest] #[case("exomes", "grch37", "2.1")] - #[case("exomes", "grch38", "4.0")] + // #[case("exomes", "grch38", "4.0")] fn smoke_query_exomes_all( #[case] kind: &str, #[case] genome_release: &str,