-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
379 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
//! Implementation of `seqvars ingest` subcommand. | ||
use crate::common::{self, GenomeRelease}; | ||
|
||
/// Command line arguments for `seqvars ingest` subcommand. | ||
#[derive(Debug, clap::Parser)] | ||
#[command(author, version, about = "ingest sequence variant VCF", long_about = None)] | ||
pub struct Args { | ||
/// The assumed genome build. | ||
#[clap(long)] | ||
pub genomebuild: GenomeRelease, | ||
/// Path to input file. | ||
#[clap(long)] | ||
pub path_in: String, | ||
/// Path to output file. | ||
#[clap(long)] | ||
pub path_out: String, | ||
} | ||
|
||
/// Main entry point for `seqvars ingest` sub command. | ||
pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow::Error> { | ||
let before_anything = std::time::Instant::now(); | ||
tracing::info!("args_common = {:#?}", &args_common); | ||
tracing::info!("args = {:#?}", &args); | ||
|
||
common::trace_rss_now(); | ||
|
||
tracing::info!( | ||
"All of `seqvars ingest` completed in {:?}", | ||
before_anything.elapsed() | ||
); | ||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use rstest::rstest; | ||
|
||
use crate::common::GenomeRelease; | ||
|
||
macro_rules! set_snapshot_suffix { | ||
($($expr:expr),*) => { | ||
let mut settings = insta::Settings::clone_current(); | ||
settings.set_snapshot_suffix(format!($($expr,)*)); | ||
let _guard = settings.bind_to_scope(); | ||
} | ||
} | ||
|
||
#[rstest] | ||
#[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.4.vcf")] | ||
#[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.9.vcf")] | ||
#[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] | ||
#[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] | ||
fn smoke_test_run(#[case] path: &str) { | ||
set_snapshot_suffix!("{:?}", path.split('/').last().unwrap().replace(".", "_")); | ||
|
||
let tmpdir = temp_testdir::TempDir::default(); | ||
|
||
let args_common = Default::default(); | ||
let args = super::Args { | ||
genomebuild: GenomeRelease::Grch37, | ||
path_in: path.into(), | ||
path_out: tmpdir | ||
.join("out.vcf") | ||
.to_str() | ||
.expect("invalid path") | ||
.into(), | ||
}; | ||
super::run(&args_common, &args).unwrap(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pub mod ingest; |
120 changes: 120 additions & 0 deletions
120
tests/seqvars/ingest/example_dragen.07.021.624.3.10.4.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
##fileformat=VCFv4.2 | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths (counting only informative reads out of the total reads) for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele fractions for alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=F1R2,Number=R,Type=Integer,Description="Count of reads in F1R2 pair orientation supporting each allele"> | ||
##FORMAT=<ID=F2R1,Number=R,Type=Integer,Description="Count of reads in F2R1 pair orientation supporting each allele"> | ||
##FORMAT=<ID=GP,Number=G,Type=Float,Description="Phred-scaled posterior probabilities for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MB,Number=4,Type=Integer,Description="Per-sample component statistics to detect mate bias"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=PRI,Number=G,Type=Float,Description="Phred-scaled prior probabilities for genotypes"> | ||
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias"> | ||
##FORMAT=<ID=SQ,Number=A,Type=Float,Description="Somatic quality"> | ||
##DRAGENCommandLine=<ID=HashTableBuild,Version="SW: 01.003.044.3.10.4, HashTableVersion: 8",CommandLineOptions="dragen --build-hash-table true --ht-reference /staging/human/reference/hs37d5/hs37d5.fa --output-dir /staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 --ht-num-threads 32 --ht-seed-len 21 --enable-cnv true"> | ||
##DRAGENCommandLine=<ID=dragen,Version="SW: 07.021.624.3.10.4, HW: 07.021.624",Date="Mon Mar 21 19:12:18 CET 2022",CommandLineOptions="--ref-dir /staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 --fastq-file1 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R1_001.fastq.gz --fastq-file2 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R2_001.fastq.gz --output-directory /staging/output/220318_A01077_0174_AH7JGVDMXY/NA-12878WGSWGS/ --output-file-prefix NA-12878WGSWGS_dragen --RGID WGS --RGSM NA-12878WGSWGS --num-threads 46 --enable-map-align true --enable-map-align-output true --enable-duplicate-marking true --enable-variant-caller true --qc-cross-cont-vcf /opt/edico/config/sample_cross_contamination_resource_GRCh37.vcf.gz --enable-cnv true --cnv-enable-self-normalization true --enable-sv true --qc-coverage-region-1 /staging/human/bed/CDS-v19-ROIs_v2.bed --qc-coverage-reports-1 cov_report full_res --qc-coverage-region-2 /staging/human/bed/Regions_Exomev8.bed --qc-coverage-reports-2 cov_report full_res --qc-coverage-region-3 /staging/human/bed/Padded_Exomev8.bed --qc-coverage-reports-3 cov_report full_res"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> | ||
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> | ||
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> | ||
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (informative and non-informative); some reads may have been filtered based on mapq etc."> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##INFO=<ID=FractionInformativeReads,Number=1,Type=Float,Description="The fraction of informative reads out of the total reads"> | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> | ||
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> | ||
##contig=<ID=1,length=249250621> | ||
##contig=<ID=2,length=243199373> | ||
##contig=<ID=3,length=198022430> | ||
##contig=<ID=4,length=191154276> | ||
##contig=<ID=5,length=180915260> | ||
##contig=<ID=6,length=171115067> | ||
##contig=<ID=7,length=159138663> | ||
##contig=<ID=8,length=146364022> | ||
##contig=<ID=9,length=141213431> | ||
##contig=<ID=10,length=135534747> | ||
##contig=<ID=11,length=135006516> | ||
##contig=<ID=12,length=133851895> | ||
##contig=<ID=13,length=115169878> | ||
##contig=<ID=14,length=107349540> | ||
##contig=<ID=15,length=102531392> | ||
##contig=<ID=16,length=90354753> | ||
##contig=<ID=17,length=81195210> | ||
##contig=<ID=18,length=78077248> | ||
##contig=<ID=19,length=59128983> | ||
##contig=<ID=20,length=63025520> | ||
##contig=<ID=21,length=48129895> | ||
##contig=<ID=22,length=51304566> | ||
##contig=<ID=X,length=155270560> | ||
##contig=<ID=Y,length=59373566> | ||
##contig=<ID=MT,length=16569> | ||
##contig=<ID=GL000207.1,length=4262> | ||
##contig=<ID=GL000226.1,length=15008> | ||
##contig=<ID=GL000229.1,length=19913> | ||
##contig=<ID=GL000231.1,length=27386> | ||
##contig=<ID=GL000210.1,length=27682> | ||
##contig=<ID=GL000239.1,length=33824> | ||
##contig=<ID=GL000235.1,length=34474> | ||
##contig=<ID=GL000201.1,length=36148> | ||
##contig=<ID=GL000247.1,length=36422> | ||
##contig=<ID=GL000245.1,length=36651> | ||
##contig=<ID=GL000197.1,length=37175> | ||
##contig=<ID=GL000203.1,length=37498> | ||
##contig=<ID=GL000246.1,length=38154> | ||
##contig=<ID=GL000249.1,length=38502> | ||
##contig=<ID=GL000196.1,length=38914> | ||
##contig=<ID=GL000248.1,length=39786> | ||
##contig=<ID=GL000244.1,length=39929> | ||
##contig=<ID=GL000238.1,length=39939> | ||
##contig=<ID=GL000202.1,length=40103> | ||
##contig=<ID=GL000234.1,length=40531> | ||
##contig=<ID=GL000232.1,length=40652> | ||
##contig=<ID=GL000206.1,length=41001> | ||
##contig=<ID=GL000240.1,length=41933> | ||
##contig=<ID=GL000236.1,length=41934> | ||
##contig=<ID=GL000241.1,length=42152> | ||
##contig=<ID=GL000243.1,length=43341> | ||
##contig=<ID=GL000242.1,length=43523> | ||
##contig=<ID=GL000230.1,length=43691> | ||
##contig=<ID=GL000237.1,length=45867> | ||
##contig=<ID=GL000233.1,length=45941> | ||
##contig=<ID=GL000204.1,length=81310> | ||
##contig=<ID=GL000198.1,length=90085> | ||
##contig=<ID=GL000208.1,length=92689> | ||
##contig=<ID=GL000191.1,length=106433> | ||
##contig=<ID=GL000227.1,length=128374> | ||
##contig=<ID=GL000228.1,length=129120> | ||
##contig=<ID=GL000214.1,length=137718> | ||
##contig=<ID=GL000221.1,length=155397> | ||
##contig=<ID=GL000209.1,length=159169> | ||
##contig=<ID=GL000218.1,length=161147> | ||
##contig=<ID=GL000220.1,length=161802> | ||
##contig=<ID=GL000213.1,length=164239> | ||
##contig=<ID=GL000211.1,length=166566> | ||
##contig=<ID=GL000199.1,length=169874> | ||
##contig=<ID=GL000217.1,length=172149> | ||
##contig=<ID=GL000216.1,length=172294> | ||
##contig=<ID=GL000215.1,length=172545> | ||
##contig=<ID=GL000205.1,length=174588> | ||
##contig=<ID=GL000219.1,length=179198> | ||
##contig=<ID=GL000224.1,length=179693> | ||
##contig=<ID=GL000223.1,length=180455> | ||
##contig=<ID=GL000195.1,length=182896> | ||
##contig=<ID=GL000212.1,length=186858> | ||
##contig=<ID=GL000222.1,length=186861> | ||
##contig=<ID=GL000200.1,length=187035> | ||
##contig=<ID=GL000193.1,length=189789> | ||
##contig=<ID=GL000194.1,length=191469> | ||
##contig=<ID=GL000225.1,length=211173> | ||
##contig=<ID=GL000192.1,length=547496> | ||
##contig=<ID=NC_007605,length=171823> | ||
##contig=<ID=hs37d5,length=35477943> | ||
##reference=file:///staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149/reference.bin | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 | ||
17 41256074 . CA C 44.34 . AC=1;AF=0.500;AN=2;DP=18;FS=0.000;MQ=243.16;MQRankSum=3.266;QD=2.46;ReadPosRankSum=2.663;SOR=1.112;FractionInformativeReads=0.944 GT:AD:AF:DP:F1R2:F2R1:GQ:PL:GP:PRI:SB:MB:PS 0|1:11,6:0.353:17:8,3:3,3:43:55,0,47:4.4342e+01,2.0325e-04,5.0000e+01:0.00,11.00,14.01:5,6,2,4:4,7,5,1:41256074 | ||
MT 750 . A G . . DP=7835;MQ=167.68;FractionInformativeReads=0.998 GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB 1/1:98.13:1,7818:1.000:1,3815:0,4003:7819:1,0,4126,3692:1,0,3979,3839 |
Oops, something went wrong.