Skip to content

Commit

Permalink
feat: adding support for gnomAD-SV (#291) (#297)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 17, 2023
1 parent 807abaf commit 8195101
Show file tree
Hide file tree
Showing 101 changed files with 8,084 additions and 69 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
tests/**/*.db/** filter=lfs diff=lfs merge=lfs -text
tests/**/rocksdb/** filter=lfs diff=lfs merge=lfs -text
src/gnomad_sv/cli/snapshots/*.snap filter=lfs diff=lfs merge=lfs -text
12 changes: 10 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ fn main() {
println!("cargo:rerun-if-changed=src/proto/annonars/cons/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/dbsnp/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gene/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/mtdna.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/exac_cnv.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad3.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_cnv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/mtdna.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_common.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad3.proto");
Expand Down Expand Up @@ -43,9 +47,13 @@ fn main() {
"annonars/cons/v1/base.proto",
"annonars/dbsnp/v1/base.proto",
"annonars/gene/v1/base.proto",
"annonars/gnomad/v1/mtdna.proto",
"annonars/gnomad/v1/exac_cnv.proto",
"annonars/gnomad/v1/gnomad2.proto",
"annonars/gnomad/v1/gnomad3.proto",
"annonars/gnomad/v1/gnomad_cnv4.proto",
"annonars/gnomad/v1/gnomad_sv2.proto",
"annonars/gnomad/v1/gnomad_sv4.proto",
"annonars/gnomad/v1/mtdna.proto",
"annonars/gnomad/v1/vep_common.proto",
"annonars/gnomad/v1/vep_gnomad2.proto",
"annonars/gnomad/v1/vep_gnomad3.proto",
Expand Down
6 changes: 3 additions & 3 deletions src/clinvar_sv/cli/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::common::{self, cli::extract_chrom, spdi};
#[derive(clap::Args, Debug, Clone, Default)]
#[group(required = true, multiple = false)]
pub struct ArgsQuery {
/// Specify range to query for.
/// Specify accession to query for.
#[arg(long, group = "query")]
pub accession: Option<String>,
/// Query for all variants.
Expand All @@ -22,9 +22,9 @@ pub struct ArgsQuery {
pub range: Option<spdi::Range>,
}

/// Command line arguments for `tsv query` sub command.
/// Command line arguments for `clinvar-sv query` sub command.
#[derive(clap::Parser, Debug, Clone)]
#[command(about = "query gnomAD-mtDNA data stored in RocksDB", long_about = None)]
#[command(about = "query ClinVar SV data stored in RocksDB", long_about = None)]
pub struct Args {
/// Path to RocksDB directory with data.
#[arg(long)]
Expand Down
7 changes: 6 additions & 1 deletion src/common/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,21 @@ pub struct Args {
}

/// Output format to write.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, clap::ValueEnum, strum::Display)]
#[derive(
Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, clap::ValueEnum, strum::Display,
)]
#[strum(serialize_all = "lowercase")]
pub enum OutputFormat {
/// JSONL format.
#[default]
Jsonl,
}

/// Local genome release for command line arguments.
#[derive(
Copy,
Clone,
Default,
PartialEq,
Eq,
PartialOrd,
Expand All @@ -34,6 +38,7 @@ pub enum OutputFormat {
pub enum GenomeRelease {
/// GRCh37 genome release.
#[strum(serialize = "grch37")]
#[default]
Grch37,
/// GRCh38 genome release.
#[strum(serialize = "grch38")]
Expand Down
6 changes: 3 additions & 3 deletions src/common/spdi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::{fmt::Display, str::FromStr};
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Var;
///
Expand Down Expand Up @@ -84,7 +84,7 @@ impl Display for Var {
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Pos;
///
Expand Down Expand Up @@ -133,7 +133,7 @@ impl Display for Pos {
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Range;
///
Expand Down
43 changes: 33 additions & 10 deletions src/gnomad_nuclear/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,47 @@ use crate::{
};

/// Select the type of gnomAD data to import.
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
pub enum GnomadKind {
/// gnomAD exomes
/// gnomAD exomes / CNV (or: ExAC CNVs)
#[strum(serialize = "exomes")]
#[default]
Exomes,
/// gnomAD genomes
/// gnomAD genomes / SVs
#[strum(serialize = "genomes")]
Genomes,
}

/// Select the genomAD version (v2/v3; important for the field names).
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GnomadVersion {
/// Version 1.x (ExAC)
One,
/// Version 2.x
Two,
/// Version 3.x
Three,
/// Version 4.x
Four,
}

impl FromStr for GnomadVersion {
type Err = anyhow::Error;

/// Parse out the gnomAD version from the leading number.
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(if s.starts_with("1.") {
GnomadVersion::One
} else if s.starts_with("2.") {
GnomadVersion::Two
} else if s.starts_with("3.") {
GnomadVersion::Three
} else if s.starts_with("4.") {
GnomadVersion::Four
} else {
anyhow::bail!("invalid GnomadVersion: {}", s)
})
}
}

/// Command line arguments for `gnomad_nuclear import` sub command.
Expand Down Expand Up @@ -199,6 +223,7 @@ fn process_window(
)?
.encode_to_vec()
}
_ => anyhow::bail!("gnomAD version must be either 2 or 3"),
};
db.put_cf(&cf_gnomad, &key_buf, &record_buf)?;
}
Expand All @@ -210,13 +235,10 @@ fn process_window(

/// Implementation of `gnomad_nuclear import` sub command.
pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> {
let gnomad_version = if args.gnomad_version.starts_with("2.") {
GnomadVersion::Two
} else if args.gnomad_version.starts_with("3.") {
GnomadVersion::Three
} else {
anyhow::bail!("gnomAD version must be either 2 or 3")
};
let gnomad_version: GnomadVersion = args.gnomad_version.parse()?;
if !matches!(gnomad_version, GnomadVersion::Two | GnomadVersion::Three) {
anyhow::bail!("gnomAD version must be either 2 or 3");
}

// Put defaults for fields to serialize into args.
let args = match gnomad_version {
Expand All @@ -242,6 +264,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
.transpose()?,
..args.clone()
},
_ => anyhow::bail!("gnomAD version must be either 2 or 3"),
};

tracing::info!("Starting 'gnomad-nuclear import' command");
Expand Down
3 changes: 3 additions & 0 deletions src/gnomad_pbs/exac_cnv.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! Code for working with the ExAC r1 CNV data.
include!(concat!(env!("OUT_DIR"), "/annonars.gnomad.v1.exac_cnv.rs"));
2 changes: 1 addition & 1 deletion src/gnomad_pbs/gnomad2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ impl Record {
by_sex: Some(AlleleCountsBySex {
overall: Self::extract_allele_counts(record, &prefix, "")?,
xx: Self::extract_allele_counts(record, &prefix, "_female")?,
xy: Self::extract_allele_counts(record, &prefix, "male")?,
xy: Self::extract_allele_counts(record, &prefix, "_male")?,
}),
raw: Self::extract_allele_counts(record, &prefix, "_raw")?,
popmax: common::noodles::get_string(record, &format!("{}_popmax", cohort)).ok(),
Expand Down
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_cnv4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD CNV v4.0 data.
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_cnv4.rs"
));
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_sv2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD SV v2.1 data.
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_sv2.rs"
));
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_sv4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD SV v4.0 data.
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_sv4.rs"
));
4 changes: 4 additions & 0 deletions src/gnomad_pbs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
//! Protocolbuffers related code for gnomAD mtDNA and nuclear.
pub mod exac_cnv;
pub mod gnomad2;
pub mod gnomad3;
pub mod gnomad_cnv4;
pub mod gnomad_sv2;
pub mod gnomad_sv4;
pub mod mtdna;
pub mod vep_common;
pub mod vep_gnomad2;
Expand Down
Loading

0 comments on commit 8195101

Please sign in to comment.