diff --git a/src/clinvar_genes/cli/query.rs b/src/clinvar_genes/cli/query.rs index 03c79441..be406b33 100644 --- a/src/clinvar_genes/cli/query.rs +++ b/src/clinvar_genes/cli/query.rs @@ -74,6 +74,29 @@ fn print_record( Ok(()) } +/// Query for one gene annotation record. +pub fn query_for_gene( + hgnc_id: &str, + db: &rocksdb::DBWithThreadMode, + cf_data: &Arc, +) -> Result, anyhow::Error> { + let raw_value = db + .get_cf(cf_data, hgnc_id.as_bytes()) + .map_err(|e| anyhow::anyhow!("error while querying for HGNC ID {}: {}", hgnc_id, e))?; + raw_value + .map(|raw_value| { + clinvar_genes::pbs::ClinvarPerGeneRecord::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| { + anyhow::anyhow!( + "error while decoding clinvar per gene record for HGNC ID {}: {}", + hgnc_id, + e + ) + }) + }) + .transpose() +} + /// Implementation of `gene query` sub command. pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> { tracing::info!("Starting 'gene query' command"); @@ -94,19 +117,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> }; tracing::info!("Running query..."); - let raw_value = db.get_cf(&cf_data, args.hgnc_id.as_bytes())?; - if let Some(raw_value) = raw_value { - print_record( - &mut out_writer, - args.out_format, - &clinvar_genes::pbs::ClinvarPerGeneRecord::decode(&mut std::io::Cursor::new( - &raw_value, - ))?, - )?; + if let Some(record) = query_for_gene(&args.hgnc_id, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; } else { - tracing::info!("No data found for HGNC ID {}", args.hgnc_id); + tracing::info!("no record found for HGNC ID {:?}", args.hgnc_id); } - tracing::info!("All done. Have a nice day!"); Ok(()) } diff --git a/src/clinvar_minimal/cli/query.rs b/src/clinvar_minimal/cli/query.rs index d9dd37f2..94606698 100644 --- a/src/clinvar_minimal/cli/query.rs +++ b/src/clinvar_minimal/cli/query.rs @@ -102,7 +102,7 @@ pub fn query_for_variant( meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result { +) -> Result, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -113,11 +113,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; - // Decode via prost. - pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("error while querying for variant {}: {}", variant, e))?; + raw_value + .map(|raw_value| { + // Decode via prost. + pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -141,11 +145,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_record( - &mut out_writer, - args.out_format, - &query_for_variant(variant, &meta, &db, &cf_data)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; + } else { + tracing::info!("no record found for variant {:?}", &variant); + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { diff --git a/src/dbsnp/cli/query.rs b/src/dbsnp/cli/query.rs index aa68407d..5ebbcd92 100644 --- a/src/dbsnp/cli/query.rs +++ b/src/dbsnp/cli/query.rs @@ -118,7 +118,7 @@ pub fn query_for_variant( meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result { +) -> Result, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -129,11 +129,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; - // Decode via prost. - dbsnp::pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("error while querying for variant {}: {}", variant, e))?; + raw_value + .map(|raw_value| { + // Decode via prost. + dbsnp::pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -157,11 +161,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_record( - &mut out_writer, - args.out_format, - &query_for_variant(variant, &meta, &db, &cf_data)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; + } else { + tracing::info!("no record found for variant {}", variant); + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { diff --git a/src/freqs/cli/query.rs b/src/freqs/cli/query.rs index ca3c2daa..6b185986 100644 --- a/src/freqs/cli/query.rs +++ b/src/freqs/cli/query.rs @@ -85,46 +85,58 @@ pub fn open_rocksdb_from_args( ) } +/// Enumeration of possible result records. +pub enum Record { + /// Record for autosomal variant. + Autosomal(freqs::serialized::auto::Record), + /// Record for gonosomal variant. + Gonosomal(freqs::serialized::xy::Record), + /// Record for mitochondrial variant. + Mitochondrial(freqs::serialized::mt::Record), +} + /// Query for a single variant in the RocksDB database. pub fn query_for_variant( variant: &spdi::Var, db: &rocksdb::DBWithThreadMode, - out_writer: &mut dyn std::io::Write, _out_format: common::cli::OutputFormat, -) -> Result<(), anyhow::Error> { +) -> Result, anyhow::Error> { let seq = variant.sequence.to_lowercase(); let var: keys::Var = variant.clone().into(); let key: Vec = var.into(); if seq.contains('m') { let cf_mtdna: Arc = db.cf_handle("mitochondrial").unwrap(); - let raw_value = db.get_cf(&cf_mtdna, &key)?; + let raw_value = db + .get_cf(&cf_mtdna, &key) + .map_err(|e| anyhow::anyhow!("error reading from RocksDB: {}", e))?; if let Some(raw_value) = raw_value { - let value = freqs::serialized::mt::Record::from_buf(&raw_value); - let json_value = serde_json::to_value(value)?; - let json = serde_json::to_string(&json_value)?; - writeln!(out_writer, "{}", &json)?; + return Ok(Some(Record::Mitochondrial( + freqs::serialized::mt::Record::from_buf(&raw_value), + ))); } } else if seq.contains('x') || seq.contains('y') { let cf_xy: Arc = db.cf_handle("gonosomal").unwrap(); - let raw_value = db.get_cf(&cf_xy, &key)?; + let raw_value = db + .get_cf(&cf_xy, &key) + .map_err(|e| anyhow::anyhow!("error reading from RocksDB: {}", e))?; if let Some(raw_value) = raw_value { - let value = freqs::serialized::xy::Record::from_buf(&raw_value); - let json_value = serde_json::to_value(value)?; - let json = serde_json::to_string(&json_value)?; - writeln!(out_writer, "{}", &json)?; + return Ok(Some(Record::Gonosomal( + freqs::serialized::xy::Record::from_buf(&raw_value), + ))); } } else { let cf_auto: Arc = db.cf_handle("autosomal").unwrap(); - let raw_value = db.get_cf(&cf_auto, &key)?; + let raw_value = db + .get_cf(&cf_auto, &key) + .map_err(|e| anyhow::anyhow!("error reading from RocksDB: {}", e))?; if let Some(raw_value) = raw_value { - let value = freqs::serialized::auto::Record::from_buf(&raw_value); - let json_value = serde_json::to_value(value)?; - let json = serde_json::to_string(&json_value)?; - writeln!(out_writer, "{}", &json)?; + return Ok(Some(Record::Autosomal( + freqs::serialized::auto::Record::from_buf(&raw_value), + ))); } } - Ok(()) + Ok(None) } /// Implementation of `tsv query` sub command. @@ -146,7 +158,27 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); - query_for_variant(&args.variant, &db, &mut out_writer, args.out_format)?; + if let Some(variant) = query_for_variant(&args.variant, &db, args.out_format)? { + match variant { + Record::Autosomal(record) => { + let json_value = serde_json::to_value(record)?; + let json = serde_json::to_string(&json_value)?; + writeln!(out_writer, "{}", &json)?; + } + Record::Gonosomal(record) => { + let json_value = serde_json::to_value(record)?; + let json = serde_json::to_string(&json_value)?; + writeln!(out_writer, "{}", &json)?; + } + Record::Mitochondrial(record) => { + let json_value = serde_json::to_value(record)?; + let json = serde_json::to_string(&json_value)?; + writeln!(out_writer, "{}", &json)?; + } + } + } else { + tracing::info!("no record found for variant {:?}", &args.variant); + } tracing::info!("... done querying in {:?}", before_query.elapsed()); tracing::info!("All done. Have a nice day!"); diff --git a/src/genes/cli/query.rs b/src/genes/cli/query.rs index 51f8ac6c..592e447a 100644 --- a/src/genes/cli/query.rs +++ b/src/genes/cli/query.rs @@ -74,6 +74,28 @@ fn print_record( Ok(()) } +/// Query for one gene annotation record. +pub fn query_for_gene( + hgnc_id: &str, + db: &rocksdb::DBWithThreadMode, + cf_data: &Arc, +) -> Result, anyhow::Error> { + let raw_value = db + .get_cf(cf_data, hgnc_id.as_bytes()) + .map_err(|e| anyhow::anyhow!("error while querying for HGNC ID {}: {}", hgnc_id, e))?; + raw_value + .map(|raw_value| { + pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)).map_err(|e| { + anyhow::anyhow!( + "error while decoding gene record for HGNC ID {}: {}", + hgnc_id, + e + ) + }) + }) + .transpose() +} + /// Implementation of `gene query` sub command. pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> { tracing::info!("Starting 'gene query' command"); @@ -94,17 +116,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> }; tracing::info!("Running query..."); - let raw_value = db.get_cf(&cf_data, args.hgnc_id.as_bytes())?; - if let Some(raw_value) = raw_value { - print_record( - &mut out_writer, - args.out_format, - &pbs::Record::decode(&mut std::io::Cursor::new(&raw_value))?, - )?; + if let Some(record) = query_for_gene(&args.hgnc_id, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; } else { - tracing::info!("No data found for HGNC ID {}", args.hgnc_id); + tracing::info!("no record found for HGNC ID {:?}", args.hgnc_id); } - tracing::info!("All done. Have a nice day!"); Ok(()) } diff --git a/src/gnomad_mtdna/cli/query.rs b/src/gnomad_mtdna/cli/query.rs index f8af211f..bb0cff01 100644 --- a/src/gnomad_mtdna/cli/query.rs +++ b/src/gnomad_mtdna/cli/query.rs @@ -102,7 +102,7 @@ pub fn query_for_variant( meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result { +) -> Result, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -113,11 +113,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; - // Decode via prost. - gnomad_pbs::mtdna::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("error reading from RocksDB: {}", e))?; + raw_value + .map(|raw_value| { + // Decode via prost. + gnomad_pbs::mtdna::Record::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -141,11 +145,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_record( - &mut out_writer, - args.out_format, - &query_for_variant(variant, &meta, &db, &cf_data)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; + } else { + tracing::info!("no record found for variant {:?}", &variant); + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { diff --git a/src/gnomad_nuclear/cli/query.rs b/src/gnomad_nuclear/cli/query.rs index 23ecb98e..02a39c53 100644 --- a/src/gnomad_nuclear/cli/query.rs +++ b/src/gnomad_nuclear/cli/query.rs @@ -102,7 +102,7 @@ pub fn query_for_variant( meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result { +) -> Result, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -113,11 +113,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; - // Decode via prost. - gnomad_pbs::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("problem querying RocksDB: {}", e))?; + raw_value + .map(|raw_value| { + // Decode via prost. + gnomad_pbs::gnomad2::Record::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -141,11 +145,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_record( - &mut out_writer, - args.out_format, - &query_for_variant(variant, &meta, &db, &cf_data)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)? + } else { + tracing::info!("no record found for variant {:?}", &variant); + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { diff --git a/src/helixmtdb/cli/query.rs b/src/helixmtdb/cli/query.rs index 70c1fc04..35f73aa4 100644 --- a/src/helixmtdb/cli/query.rs +++ b/src/helixmtdb/cli/query.rs @@ -102,7 +102,7 @@ pub fn query_for_variant( meta: &Meta, db: &rocksdb::DBWithThreadMode, cf_data: &Arc, -) -> Result { +) -> Result, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -113,11 +113,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("problem querying RocksDB: {}", e))?; // Decode via prost. - helixmtdb::pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) - .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + raw_value + .map(|raw_value| { + helixmtdb::pbs::Record::decode(&mut std::io::Cursor::new(&raw_value)) + .map_err(|e| anyhow::anyhow!("failed to decode record: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -141,11 +145,11 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_record( - &mut out_writer, - args.out_format, - &query_for_variant(variant, &meta, &db, &cf_data)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data)? { + print_record(&mut out_writer, args.out_format, &record)?; + } else { + tracing::info!("no record found for variant {:?}", &variant); + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { diff --git a/src/tsv/cli/query.rs b/src/tsv/cli/query.rs index 543bed2a..d59a8a1b 100644 --- a/src/tsv/cli/query.rs +++ b/src/tsv/cli/query.rs @@ -123,7 +123,7 @@ fn print_values( out_writer: &mut Box, output_format: common::cli::OutputFormat, meta: &Meta, - values: Vec, + values: &[serde_json::Value], ) -> Result<(), anyhow::Error> { match output_format { common::cli::OutputFormat::Jsonl => { @@ -151,7 +151,7 @@ pub fn query_for_variant( db: &Arc>, cf_data: &Arc, ctx: &coding::Context, -) -> Result, anyhow::Error> { +) -> Result>, anyhow::Error> { // Split off the genome release (checked) and convert to key as used in database. let query = spdi::Var { sequence: extract_chrom::from_var(variant, Some(&meta.genome_release))?, @@ -161,12 +161,15 @@ pub fn query_for_variant( let var: keys::Var = query.into(); let key: Vec = var.into(); let raw_value = db - .get_cf(cf_data, key)? - .ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?; - let line = std::str::from_utf8(raw_value.as_slice())?; - let values = ctx.line_to_values(line)?; - - Ok(values) + .get_cf(cf_data, key) + .map_err(|e| anyhow::anyhow!("problem querying RocksDB: {}", e))?; + raw_value + .map(|raw_value| { + let line = std::str::from_utf8(raw_value.as_slice())?; + ctx.line_to_values(line) + .map_err(|e| anyhow::anyhow!("problem decoding line: {}", e)) + }) + .transpose() } /// Implementation of `tsv query` sub command. @@ -191,12 +194,9 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> tracing::info!("Running query..."); let before_query = std::time::Instant::now(); if let Some(variant) = args.query.variant.as_ref() { - print_values( - &mut out_writer, - args.out_format, - &meta, - query_for_variant(variant, &meta, &db, &cf_data, &ctx)?, - )?; + if let Some(record) = query_for_variant(variant, &meta, &db, &cf_data, &ctx)? { + print_values(&mut out_writer, args.out_format, &meta, &record)?; + } } else { let (start, stop) = if let Some(position) = args.query.position.as_ref() { let position = spdi::Pos { @@ -252,7 +252,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> let line = std::str::from_utf8(line_raw)?; let values = ctx.line_to_values(line)?; - print_values(&mut out_writer, args.out_format, &meta, values)?; + print_values(&mut out_writer, args.out_format, &meta, &values)?; iter.next(); } else { break;