Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Oct 5, 2023
1 parent 83ee391 commit 417a64f
Show file tree
Hide file tree
Showing 10 changed files with 556 additions and 28 deletions.
360 changes: 337 additions & 23 deletions src/seqvars/ingest/header.rs

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion src/seqvars/ingest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ pub struct Args {
pub path_out: String,
}

/// Return the version of the `varfish-server-worker` crate and `x.y.z` in tests.
fn worker_version() -> &'static str {
if cfg!(test) {
"x.y.z"
} else {
env!("CARGO_PKG_VERSION")
}
}

/// Main entry point for `seqvars ingest` sub command.
pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow::Error> {
let before_anything = std::time::Instant::now();
Expand All @@ -42,7 +51,8 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow:
let input_header = input_reader
.read_header()
.map_err(|e| anyhow::anyhow!("problem reading VCF header: {}", e))?;
let output_header = header::build_output_header(&input_header, args.genomebuild)?;
let output_header =
header::build_output_header(&input_header, args.genomebuild, worker_version())?;

let mut output_writer = {
let writer = std::fs::File::create(&args.path_out).map_err(|e| {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
source: src/seqvars/ingest/header.rs
expression: "std::fs::read_to_string(out_path_str)?"
---
##fileformat=VCFv4.4
##INFO=<ID=gnomad_exomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_genomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD genomes">
##INFO=<ID=helix_an,Number=1,Type=Integer,Description="Number of samples in HelixMtDb">
##INFO=<ID=helix_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in HelixMtDb">
##INFO=<ID=helix_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in HelixMtDb">
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'">
##FILTER=<ID=PASS,Description="All filters passed">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##contig=<ID=chr1,length=248956422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr2,length=242193529,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr3,length=198295559,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr4,length=190214555,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr5,length=181538259,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr6,length=170805979,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr7,length=159345973,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr8,length=145138636,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr9,length=138394717,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr10,length=133797422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr11,length=135086622,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr12,length=133275309,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr13,length=114364328,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr14,length=107043718,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr15,length=101991189,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr16,length=90338345,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr17,length=83257441,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr18,length=80373285,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr19,length=58617616,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr20,length=64444167,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr21,length=46709983,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr22,length=50818468,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrX,length=156040895,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrY,length=57227415,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly="GRCh37",species="Homo sapiens">
##x-varfish-version=<ID=varfish-server-worker,Version="x.y.z">
##x-varfish-version=<ID=orig-caller,Name="Dragen",Version="SW: 07.021.624.3.10.4, HW: 07.021.624">
#CHROM POS ID REF ALT QUAL FILTER INFO

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
source: src/seqvars/ingest/header.rs
expression: "std::fs::read_to_string(out_path_str)?"
---
##fileformat=VCFv4.4
##INFO=<ID=gnomad_exomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_genomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD genomes">
##INFO=<ID=helix_an,Number=1,Type=Integer,Description="Number of samples in HelixMtDb">
##INFO=<ID=helix_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in HelixMtDb">
##INFO=<ID=helix_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in HelixMtDb">
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'">
##FILTER=<ID=PASS,Description="All filters passed">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##contig=<ID=chr1,length=248956422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr2,length=242193529,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr3,length=198295559,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr4,length=190214555,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr5,length=181538259,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr6,length=170805979,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr7,length=159345973,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr8,length=145138636,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr9,length=138394717,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr10,length=133797422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr11,length=135086622,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr12,length=133275309,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr13,length=114364328,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr14,length=107043718,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr15,length=101991189,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr16,length=90338345,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr17,length=83257441,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr18,length=80373285,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr19,length=58617616,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr20,length=64444167,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr21,length=46709983,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr22,length=50818468,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrX,length=156040895,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrY,length=57227415,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly="GRCh37",species="Homo sapiens">
##x-varfish-version=<ID=varfish-server-worker,Version="x.y.z">
##x-varfish-version=<ID=orig-caller,Name="Dragen",Version="SW: 07.021.624.3.10.9, HW: 07.021.624">
#CHROM POS ID REF ALT QUAL FILTER INFO

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
source: src/seqvars/ingest/header.rs
expression: "std::fs::read_to_string(out_path_str)?"
---
##fileformat=VCFv4.4
##INFO=<ID=gnomad_exomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_genomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD genomes">
##INFO=<ID=helix_an,Number=1,Type=Integer,Description="Number of samples in HelixMtDb">
##INFO=<ID=helix_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in HelixMtDb">
##INFO=<ID=helix_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in HelixMtDb">
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'">
##FILTER=<ID=PASS,Description="All filters passed">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##contig=<ID=chr1,length=248956422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr2,length=242193529,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr3,length=198295559,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr4,length=190214555,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr5,length=181538259,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr6,length=170805979,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr7,length=159345973,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr8,length=145138636,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr9,length=138394717,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr10,length=133797422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr11,length=135086622,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr12,length=133275309,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr13,length=114364328,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr14,length=107043718,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr15,length=101991189,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr16,length=90338345,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr17,length=83257441,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr18,length=80373285,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr19,length=58617616,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr20,length=64444167,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr21,length=46709983,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr22,length=50818468,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrX,length=156040895,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrY,length=57227415,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly="GRCh37",species="Homo sapiens">
##x-varfish-version=<ID=varfish-server-worker,Version="x.y.z">
##x-varfish-version=<ID=orig-caller,Name="GatkHaplotypeCaller",Version="3.7-0-gcfedb67">
#CHROM POS ID REF ALT QUAL FILTER INFO

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
source: src/seqvars/ingest/header.rs
expression: "std::fs::read_to_string(out_path_str)?"
---
##fileformat=VCFv4.4
##INFO=<ID=gnomad_exomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_exomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD exomes">
##INFO=<ID=gnomad_genomes_an,Number=1,Type=Integer,Description="Number of samples in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD genomes">
##INFO=<ID=gnomad_genomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD genomes">
##INFO=<ID=helix_an,Number=1,Type=Integer,Description="Number of samples in HelixMtDb">
##INFO=<ID=helix_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in HelixMtDb">
##INFO=<ID=helix_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in HelixMtDb">
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'">
##FILTER=<ID=PASS,Description="All filters passed">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##contig=<ID=chr1,length=248956422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr2,length=242193529,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr3,length=198295559,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr4,length=190214555,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr5,length=181538259,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr6,length=170805979,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr7,length=159345973,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr8,length=145138636,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr9,length=138394717,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr10,length=133797422,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr11,length=135086622,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr12,length=133275309,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr13,length=114364328,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr14,length=107043718,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr15,length=101991189,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr16,length=90338345,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr17,length=83257441,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr18,length=80373285,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr19,length=58617616,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr20,length=64444167,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr21,length=46709983,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chr22,length=50818468,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrX,length=156040895,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrY,length=57227415,assembly="GRCh37",species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly="GRCh37",species="Homo sapiens">
##x-varfish-version=<ID=varfish-server-worker,Version="x.y.z">
##x-varfish-version=<ID=orig-caller,Name="GatkHaplotypeCaller",Version="4.4.0.0">
#CHROM POS ID REF ALT QUAL FILTER INFO

Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: src/seqvars/ingest/header.rs
assertion_line: 82
expression: "VariantCaller::guess(&vcf_header)"
---
Dragen:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: src/seqvars/ingest/header.rs
assertion_line: 82
expression: "VariantCaller::guess(&vcf_header)"
---
Dragen:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: src/seqvars/ingest/header.rs
assertion_line: 82
expression: "VariantCaller::guess(&vcf_header)"
---
GatkHaplotypeCaller:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: src/seqvars/ingest/header.rs
assertion_line: 82
expression: "VariantCaller::guess(&vcf_header)"
---
GatkHaplotypeCaller:
Expand Down

0 comments on commit 417a64f

Please sign in to comment.