Skip to content

Commit

Permalink
adding gnomad v4 example files
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Feb 27, 2024
1 parent 61ab476 commit 6dba78b
Show file tree
Hide file tree
Showing 13 changed files with 14,736 additions and 31 deletions.
66 changes: 60 additions & 6 deletions src/strucvars/txt_to_bin/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@ pub enum InputType {
StrucvarExacCnv,
/// Convert Thousand Genomes to binary.
StrucvarG1k,
/// Convert gnomAD SV to binary.
StrucvarGnomadSv,
/// Convert gnomAD SV v2 to binary.
StrucvarGnomadSv2,
/// Convert gnomAD CNV v4 to binary.
StrucvarGnomadCnv4,
/// Convert gnomAD SV v4 to binary.
StrucvarGnomadSv4,
/// Convert masked region to binary.
MaskedRegion,
/// Convert cross-link to binary.
Expand All @@ -100,7 +104,7 @@ pub struct Args {

/// Main entry point for the `strucvars txt-to-bin` command.
pub fn run(common_args: &crate::common::Args, args: &Args) -> Result<(), anyhow::Error> {
tracing::info!("Starting `db to-bin`");
tracing::info!("Starting `strucvars txt-to-bin`");
tracing::info!(" common_args = {:?}", &common_args);
tracing::info!(" args = {:?}", &args);

Expand Down Expand Up @@ -135,11 +139,21 @@ pub fn run(common_args: &crate::common::Args, args: &Args) -> Result<(), anyhow:
InputType::StrucvarG1k => {
vardbs::convert_to_bin(&args.path_input, &args.path_output, InputFileType::G1k)?
}
InputType::StrucvarGnomadSv => vardbs::convert_to_bin(
InputType::StrucvarGnomadSv2 => vardbs::convert_to_bin(
&args.path_input,
&args.path_output,
InputFileType::GnomadSv2,
)?,
InputType::StrucvarGnomadCnv4 => vardbs::convert_to_bin(
&args.path_input,
&args.path_output,
InputFileType::GnomadCnv4,
)?,
InputType::StrucvarGnomadSv4 => vardbs::convert_to_bin(
&args.path_input,
&args.path_output,
InputFileType::GnomadSv4,
)?,
InputType::MaskedRegion => masked::convert_to_bin(&args.path_input, &args.path_output)?,
InputType::Xlink => xlink::convert_to_bin(&args.path_input, &args.path_output)?,
}
Expand Down Expand Up @@ -303,14 +317,14 @@ mod test {
}

#[test]
fn run_strucvar_gnomad_smoke() -> Result<(), anyhow::Error> {
fn run_strucvar_gnomad_sv2_smoke() -> Result<(), anyhow::Error> {
let tmp_dir = temp_testdir::TempDir::default();
let common_args = common::Args {
verbose: Verbosity::new(0, 0),
};
let args = Args {
assembly: None,
input_type: InputType::StrucvarGnomadSv,
input_type: InputType::StrucvarGnomadSv2,
path_input: String::from(
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/gnomad_sv.bed.gz",
),
Expand All @@ -322,6 +336,46 @@ mod test {
Ok(())
}

#[test]
fn run_strucvar_gnomad_cnv4_smoke() -> Result<(), anyhow::Error> {
let tmp_dir = temp_testdir::TempDir::default();
let common_args = common::Args {
verbose: Verbosity::new(0, 0),
};
let args = Args {
assembly: None,
input_type: InputType::StrucvarGnomadCnv4,
path_input: String::from(
"tests/db/to-bin/varfish-db-downloader/vardbs/grch38/strucvar/gnomad-cnv.bed.gz",
),
path_output: tmp_dir.join("gnomad-cnv.bin"),
};

super::run(&common_args, &args)?;

Ok(())
}

#[test]
fn run_strucvar_gnomad_sv4_smoke() -> Result<(), anyhow::Error> {
let tmp_dir = temp_testdir::TempDir::default();
let common_args = common::Args {
verbose: Verbosity::new(0, 0),
};
let args = Args {
assembly: None,
input_type: InputType::StrucvarGnomadSv4,
path_input: String::from(
"tests/db/to-bin/varfish-db-downloader/vardbs/grch38/strucvar/gnomad-sv.bed.gz",
),
path_output: tmp_dir.join("gnomad-sv.bin"),
};

super::run(&common_args, &args)?;

Ok(())
}

#[test]
fn run_masked_region_smoke() -> Result<(), anyhow::Error> {
let tmp_dir = temp_testdir::TempDir::default();
Expand Down
73 changes: 63 additions & 10 deletions src/strucvars/txt_to_bin/vardbs/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,26 @@ pub struct GnomadSv4Record {
pub end: i32,
/// The structural vairant type
pub svtype: String,
/// Number of homozygous alternative carriers
pub n_homalt: u32,
/// Number of heterozygous carriers
pub n_het: u32,
/// Number of male homozygous reference allele carriers.
pub male_n_homref: u32,
/// Number of male heterozygous alternate allele carriers.
pub male_n_het: u32,
/// Number of male homozygous alternate allele carriers.
pub male_n_homalt: u32,
/// Number of male hemizygous alternate allele carriers.
pub male_n_hemiref: u32,
/// Number of male hemizygous reference allele carriers.
pub male_n_hemialt: u32,
/// Number of female homozygous reference allele carriers.
pub female_n_homref: u32,
/// Number of female heterozygous alternate allele carriers.
pub female_n_het: u32,
/// Number of female homozygous alternate allele carriers.
pub female_n_homalt: u32,
/// Number of samples at this site (CNV only).
pub cnv_n_total: u32,
/// Number of samples with a CNV at this site (CNV only).
pub cnv_n_var: u32,
}

/// gnomAD CNV v$ database record as read from TSV file.
Expand All @@ -130,10 +146,10 @@ pub struct GnomadCnv4Record {
pub end: i32,
/// The structural vairant type
pub svtype: String,
/// Number of homozygous alternative carriers
pub n_homalt: u32,
/// Number of heterozygous carriers
pub n_het: u32,
/// Number of samples at this site (passing QC).
pub n_total: u32,
/// Number of samples with a CNV at this site (passing QC).
pub n_var: u32,
}

/// Common type to convert input data to.
Expand Down Expand Up @@ -317,15 +333,52 @@ impl TryInto<Option<InputRecord>> for GnomadCnv4Record {
type Error = &'static str;

fn try_into(self) -> Result<Option<InputRecord>, Self::Error> {
todo!()
Ok(Some(InputRecord {
chromosome: self.chromosome.clone(),
chromosome2: self.chromosome,
begin: self.begin,
end: self.end,
sv_type: match self.svtype.as_str() {
"DEL" => SvType::Del,
"DUP" => SvType::Dup,
_ => {
error!("sv_type = {}", &self.svtype);
return Err("unknown SV type");
}
},
count: self.n_var,
}))
}
}

impl TryInto<Option<InputRecord>> for GnomadSv4Record {
type Error = &'static str;

fn try_into(self) -> Result<Option<InputRecord>, Self::Error> {
todo!()
Ok(Some(InputRecord {
chromosome: self.chromosome.clone(),
chromosome2: self.chromosome,
begin: self.begin,
end: self.end,
sv_type: match self.svtype.as_str() {
"BND" => SvType::Bnd,
"CNV" => SvType::Cnv,
"DEL" => SvType::Del,
"DUP" => SvType::Dup,
"INS" => SvType::Ins,
"INV" => SvType::Inv,
_ => {
error!("sv_type = {}", &self.svtype);
return Err("unknown SV type");
}
},
count: self.male_n_het
+ self.male_n_homalt
+ self.male_n_hemialt
+ self.female_n_het
+ self.female_n_homalt
+ self.cnv_n_var,
}))
}
}

Expand Down
30 changes: 15 additions & 15 deletions src/strucvars/txt_to_bin/vardbs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,39 +145,39 @@ mod test {
use super::InputFileType;

#[rstest::rstest]
#[case(
#[case::dbvar(
InputFileType::Dbvar,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/dbvar.bed.gz"
)]
#[case(
#[case::dgv(
InputFileType::Dgv,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/dgv.bed.gz"
)]
#[case(
#[case::dgv_gs(
InputFileType::DgvGs,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/dgv_gs.bed.gz"
)]
#[case(
#[case::exac(
InputFileType::Exac,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/exac.bed.gz"
)]
#[case(
#[case::g1k(
InputFileType::G1k,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/g1k.bed.gz"
)]
#[case(
#[case::gnomad_sv2(
InputFileType::GnomadSv2,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/gnomad_sv.bed.gz"
)]
// #[case(
// InputFileType::GnomadCnv4,
// "tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/gnomad_cnv4.bed.gz",
// )]
// #[case(
// InputFileType::GnomadSv4,
// "tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/gnomad_sv4.bed.gz",
// )]
#[case(
#[case::gnomad_cnv4(
InputFileType::GnomadCnv4,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch38/strucvar/gnomad-cnv.bed.gz"
)]
#[case::gnomad_sv4(
InputFileType::GnomadSv4,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch38/strucvar/gnomad-sv.bed.gz"
)]
#[case::inhouse_db(
InputFileType::InhouseDb,
"tests/db/to-bin/varfish-db-downloader/vardbs/grch37/strucvar/inhouse.tsv"
)]
Expand Down
Loading

0 comments on commit 6dba78b

Please sign in to comment.