Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding support for gnomAD-SV (#291) #297

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
tests/**/*.db/** filter=lfs diff=lfs merge=lfs -text
tests/**/rocksdb/** filter=lfs diff=lfs merge=lfs -text
src/gnomad_sv/cli/snapshots/*.snap filter=lfs diff=lfs merge=lfs -text
12 changes: 10 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ fn main() {
println!("cargo:rerun-if-changed=src/proto/annonars/cons/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/dbsnp/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gene/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/mtdna.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/exac_cnv.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad3.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_cnv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/mtdna.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_common.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad3.proto");
Expand Down Expand Up @@ -43,9 +47,13 @@ fn main() {
"annonars/cons/v1/base.proto",
"annonars/dbsnp/v1/base.proto",
"annonars/gene/v1/base.proto",
"annonars/gnomad/v1/mtdna.proto",
"annonars/gnomad/v1/exac_cnv.proto",
"annonars/gnomad/v1/gnomad2.proto",
"annonars/gnomad/v1/gnomad3.proto",
"annonars/gnomad/v1/gnomad_cnv4.proto",
"annonars/gnomad/v1/gnomad_sv2.proto",
"annonars/gnomad/v1/gnomad_sv4.proto",
"annonars/gnomad/v1/mtdna.proto",
"annonars/gnomad/v1/vep_common.proto",
"annonars/gnomad/v1/vep_gnomad2.proto",
"annonars/gnomad/v1/vep_gnomad3.proto",
Expand Down
6 changes: 3 additions & 3 deletions src/clinvar_sv/cli/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::common::{self, cli::extract_chrom, spdi};
#[derive(clap::Args, Debug, Clone, Default)]
#[group(required = true, multiple = false)]
pub struct ArgsQuery {
/// Specify range to query for.
/// Specify accession to query for.
#[arg(long, group = "query")]
pub accession: Option<String>,
/// Query for all variants.
Expand All @@ -22,9 +22,9 @@ pub struct ArgsQuery {
pub range: Option<spdi::Range>,
}

/// Command line arguments for `tsv query` sub command.
/// Command line arguments for `clinvar-sv query` sub command.
#[derive(clap::Parser, Debug, Clone)]
#[command(about = "query gnomAD-mtDNA data stored in RocksDB", long_about = None)]
#[command(about = "query ClinVar SV data stored in RocksDB", long_about = None)]
pub struct Args {
/// Path to RocksDB directory with data.
#[arg(long)]
Expand Down
7 changes: 6 additions & 1 deletion src/common/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,21 @@ pub struct Args {
}

/// Output format to write.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, clap::ValueEnum, strum::Display)]
#[derive(
Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, clap::ValueEnum, strum::Display,
)]
#[strum(serialize_all = "lowercase")]
pub enum OutputFormat {
/// JSONL format.
#[default]
Jsonl,
}

/// Local genome release for command line arguments.
#[derive(
Copy,
Clone,
Default,
PartialEq,
Eq,
PartialOrd,
Expand All @@ -34,6 +38,7 @@ pub enum OutputFormat {
pub enum GenomeRelease {
/// GRCh37 genome release.
#[strum(serialize = "grch37")]
#[default]
Grch37,
/// GRCh38 genome release.
#[strum(serialize = "grch38")]
Expand Down
6 changes: 3 additions & 3 deletions src/common/spdi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::{fmt::Display, str::FromStr};
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Var;
///
Expand Down Expand Up @@ -84,7 +84,7 @@ impl Display for Var {
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Pos;
///
Expand Down Expand Up @@ -133,7 +133,7 @@ impl Display for Pos {
///
/// # Example
///
/// ```
/// ```ignore
/// use std::str::FromStr;
/// use annonars::common::spdi::Range;
///
Expand Down
43 changes: 33 additions & 10 deletions src/gnomad_nuclear/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,47 @@
};

/// Select the type of gnomAD data to import.
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
pub enum GnomadKind {
/// gnomAD exomes
/// gnomAD exomes / CNV (or: ExAC CNVs)
#[strum(serialize = "exomes")]
#[default]
Exomes,
/// gnomAD genomes
/// gnomAD genomes / SVs
#[strum(serialize = "genomes")]
Genomes,
}

/// Select the genomAD version (v2/v3; important for the field names).
#[derive(strum::Display, clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GnomadVersion {
/// Version 1.x (ExAC)
One,
/// Version 2.x
Two,
/// Version 3.x
Three,
/// Version 4.x
Four,
}

impl FromStr for GnomadVersion {
type Err = anyhow::Error;

/// Parse out the gnomAD version from the leading number.
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(if s.starts_with("1.") {
GnomadVersion::One
} else if s.starts_with("2.") {
GnomadVersion::Two
} else if s.starts_with("3.") {
GnomadVersion::Three
} else if s.starts_with("4.") {
GnomadVersion::Four
} else {
anyhow::bail!("invalid GnomadVersion: {}", s)

Check warning on line 55 in src/gnomad_nuclear/cli/import.rs

View check run for this annotation

Codecov / codecov/patch

src/gnomad_nuclear/cli/import.rs#L55

Added line #L55 was not covered by tests
})
}
}

/// Command line arguments for `gnomad_nuclear import` sub command.
Expand Down Expand Up @@ -199,6 +223,7 @@
)?
.encode_to_vec()
}
_ => anyhow::bail!("gnomAD version must be either 2 or 3"),

Check warning on line 226 in src/gnomad_nuclear/cli/import.rs

View check run for this annotation

Codecov / codecov/patch

src/gnomad_nuclear/cli/import.rs#L226

Added line #L226 was not covered by tests
};
db.put_cf(&cf_gnomad, &key_buf, &record_buf)?;
}
Expand All @@ -210,13 +235,10 @@

/// Implementation of `gnomad_nuclear import` sub command.
pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> {
let gnomad_version = if args.gnomad_version.starts_with("2.") {
GnomadVersion::Two
} else if args.gnomad_version.starts_with("3.") {
GnomadVersion::Three
} else {
anyhow::bail!("gnomAD version must be either 2 or 3")
};
let gnomad_version: GnomadVersion = args.gnomad_version.parse()?;
if !matches!(gnomad_version, GnomadVersion::Two | GnomadVersion::Three) {
anyhow::bail!("gnomAD version must be either 2 or 3");

Check warning on line 240 in src/gnomad_nuclear/cli/import.rs

View check run for this annotation

Codecov / codecov/patch

src/gnomad_nuclear/cli/import.rs#L240

Added line #L240 was not covered by tests
}

// Put defaults for fields to serialize into args.
let args = match gnomad_version {
Expand All @@ -242,6 +264,7 @@
.transpose()?,
..args.clone()
},
_ => anyhow::bail!("gnomAD version must be either 2 or 3"),

Check warning on line 267 in src/gnomad_nuclear/cli/import.rs

View check run for this annotation

Codecov / codecov/patch

src/gnomad_nuclear/cli/import.rs#L267

Added line #L267 was not covered by tests
};

tracing::info!("Starting 'gnomad-nuclear import' command");
Expand Down Expand Up @@ -314,13 +337,13 @@
)?;
db.put_cf(&cf_meta, "gnomad-version", &args.gnomad_version)?;
if let Some(vep_version) = vep_version {
db.put_cf(&cf_meta, "gnomad-vep-version", &vep_version)?;

Check warning on line 340 in src/gnomad_nuclear/cli/import.rs

View workflow job for this annotation

GitHub Actions / clippy

the borrowed expression implements the required traits

warning: the borrowed expression implements the required traits --> src/gnomad_nuclear/cli/import.rs:340:51 | 340 | db.put_cf(&cf_meta, "gnomad-vep-version", &vep_version)?; | ^^^^^^^^^^^^ help: change this to: `vep_version` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrows_for_generic_args = note: `#[warn(clippy::needless_borrows_for_generic_args)]` on by default
}
if let Some(dbsnp_version) = dbsnp_version {
db.put_cf(&cf_meta, "gnomad-dbsnp-version", &dbsnp_version)?;

Check warning on line 343 in src/gnomad_nuclear/cli/import.rs

View workflow job for this annotation

GitHub Actions / clippy

the borrowed expression implements the required traits

warning: the borrowed expression implements the required traits --> src/gnomad_nuclear/cli/import.rs:343:53 | 343 | db.put_cf(&cf_meta, "gnomad-dbsnp-version", &dbsnp_version)?; | ^^^^^^^^^^^^^^ help: change this to: `dbsnp_version` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrows_for_generic_args
}
if let Some(age_distributions) = age_distributions {
db.put_cf(&cf_meta, "gnomad-age-distributions", &age_distributions)?;

Check warning on line 346 in src/gnomad_nuclear/cli/import.rs

View workflow job for this annotation

GitHub Actions / clippy

the borrowed expression implements the required traits

warning: the borrowed expression implements the required traits --> src/gnomad_nuclear/cli/import.rs:346:57 | 346 | db.put_cf(&cf_meta, "gnomad-age-distributions", &age_distributions)?; | ^^^^^^^^^^^^^^^^^^ help: change this to: `age_distributions` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrows_for_generic_args
}
tracing::info!(
"... done opening RocksDB for writing in {:?}",
Expand Down
3 changes: 3 additions & 0 deletions src/gnomad_pbs/exac_cnv.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! Code for working with the ExAC r1 CNV data.

include!(concat!(env!("OUT_DIR"), "/annonars.gnomad.v1.exac_cnv.rs"));
2 changes: 1 addition & 1 deletion src/gnomad_pbs/gnomad2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ impl Record {
by_sex: Some(AlleleCountsBySex {
overall: Self::extract_allele_counts(record, &prefix, "")?,
xx: Self::extract_allele_counts(record, &prefix, "_female")?,
xy: Self::extract_allele_counts(record, &prefix, "male")?,
xy: Self::extract_allele_counts(record, &prefix, "_male")?,
}),
raw: Self::extract_allele_counts(record, &prefix, "_raw")?,
popmax: common::noodles::get_string(record, &format!("{}_popmax", cohort)).ok(),
Expand Down
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_cnv4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD CNV v4.0 data.
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_cnv4.rs"
));
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_sv2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD SV v2.1 data.

include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_sv2.rs"
));
6 changes: 6 additions & 0 deletions src/gnomad_pbs/gnomad_sv4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! Code for working with the gnomAD SV v4.0 data.
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.v1.gnomad_sv4.rs"
));
4 changes: 4 additions & 0 deletions src/gnomad_pbs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
//! Protocolbuffers related code for gnomAD mtDNA and nuclear.

pub mod exac_cnv;
pub mod gnomad2;
pub mod gnomad3;
pub mod gnomad_cnv4;
pub mod gnomad_sv2;
pub mod gnomad_sv4;
pub mod mtdna;
pub mod vep_common;
pub mod vep_gnomad2;
Expand Down
Loading
Loading