From 9a0abd5faeacb3968db3da716d5df99d587c3395 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Thu, 1 Jun 2023 08:59:59 +0200 Subject: [PATCH] feat: adding "db-utils dump-meta" command (#56) --- src/db_utils/cli/dump_meta.rs | 78 +++++++++++++++++++++++++++++++++++ src/db_utils/cli/mod.rs | 1 + src/main.rs | 5 +++ 3 files changed, 84 insertions(+) create mode 100644 src/db_utils/cli/dump_meta.rs diff --git a/src/db_utils/cli/dump_meta.rs b/src/db_utils/cli/dump_meta.rs new file mode 100644 index 00000000..7c5a4568 --- /dev/null +++ b/src/db_utils/cli/dump_meta.rs @@ -0,0 +1,78 @@ +//! Implementation of `db-utils dump-meta` sub command. + +use clap::Parser; + +use crate::common; + +/// Command line arguments for `db-utils dump-meta` sub command. +#[derive(Parser, Debug, Clone)] +#[command(about = "Dump the metadata columns", long_about = None)] +pub struct Args { + /// Path to input directory. + #[arg(long)] + pub path_in: String, +} + +/// Main entry point for `db-utils dump-meta` sub command. +pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> { + tracing::info!("Starting 'db-utils dump-meta' command"); + tracing::info!("common = {:#?}", &common); + tracing::info!("args = {:#?}", &args); + + tracing::info!("Opening input database"); + // List all column families in database and check that the meta column exists. + let cf_names = rocksdb::DB::list_cf(&rocksdb::Options::default(), &args.path_in)?; + if !cf_names.iter().any(|s| s == "meta") { + anyhow::bail!("input database does not contain a column family named 'meta'"); + } + // Open database for reading. + let db_read = rocksdb::DB::open_cf_for_read_only( + &rocksdb::Options::default(), + &args.path_in, + ["meta"], + false, + )?; + + // Iterate over all values in the "meta" column family. + println!("#key\tvalue"); + let mut count = 0; + let cf_read = db_read.cf_handle("meta").unwrap(); + let mut iter = db_read.raw_iterator_cf(&cf_read); + iter.seek(b""); + while iter.valid() { + if let Some(iter_value) = iter.value() { + let iter_key = iter.key().unwrap(); + println!( + "{}\t{}", + String::from_utf8(iter_key.to_vec())?, + String::from_utf8(iter_value.to_vec())? + ); + iter.next(); + count += 1; + } else { + break; + } + } + println!("#rows\t{}", count); + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + + use clap_verbosity_flag::Verbosity; + + #[test] + fn smoke_test_dump() -> Result<(), anyhow::Error> { + let common = common::cli::Args { + verbose: Verbosity::new(1, 0), + }; + let args = Args { + path_in: String::from("tests/dbsnp/example/dbsnp.brca1.vcf.bgz.db"), + }; + + run(&common, &args) + } +} diff --git a/src/db_utils/cli/mod.rs b/src/db_utils/cli/mod.rs index 9f3dc652..8247c304 100644 --- a/src/db_utils/cli/mod.rs +++ b/src/db_utils/cli/mod.rs @@ -1,3 +1,4 @@ //! CLI for the database utilities. pub mod copy; +pub mod dump_meta; diff --git a/src/main.rs b/src/main.rs index fbbfe30e..91e9f7f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -154,6 +154,8 @@ struct DbUtils { enum DbUtilsCommands { /// "copy" sub command Copy(db_utils::cli::copy::Args), + /// "dump-meta" sub command + DumpMeta(db_utils::cli::dump_meta::Args), } pub fn main() -> Result<(), anyhow::Error> { @@ -211,6 +213,9 @@ pub fn main() -> Result<(), anyhow::Error> { }, Commands::DbUtils(args) => match &args.command { DbUtilsCommands::Copy(args) => db_utils::cli::copy::run(&cli.common, args)?, + DbUtilsCommands::DumpMeta(args) => { + db_utils::cli::dump_meta::run(&cli.common, args)? + } }, }