-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #135 from fellen31/fix-chr
Ensure chrX and X are treated equally
- Loading branch information
Showing
6 changed files
with
106 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
##fileformat=VCFv4.1 | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=Annotation,Number=.,Type=String,Description="Annotates what feature(s) this variant belongs to."> | ||
##contig=<ID=1,length=249250621,assembly=b37> | ||
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT father mother proband father_2 mother_2 proband_2 | ||
chr1 879537 . T C 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60 | ||
chr1 879541 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ ./. 0/1:10,10:60 1/1:10,10:60 ./. 0/1:10,10:60 0/1:10,10:60 | ||
chr1 879595 . C T 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/0:10,10:60 1/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr1 879676 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 1/1:10,10:60 1/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 | ||
chr1 879911 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr1 880012 . A G 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 | ||
chr1 880086 . T C 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr1 880199 . G A 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr1 880217 . T G 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr10 76154051 . A G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 | ||
chr10 76154073 . T G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 | ||
chr10 76154074 . C G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ ./. 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 | ||
chr10 76154076 . G C 100 PASS MQ=1;Annotation=ADK GT:AD:GQ ./. 0/0:10,10:60 0/1:10,10:60 ./. 0/0:10,10:60 0/1:10,10:60 | ||
chrX 302253 . CCCTCCTGCCCCT C 100 PASS MQ=1;Annotation=PPP2R3B GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 1/1:10,10:60 1/1:10,10:60 | ||
chrM 302253 . CCCTCCTGCCCCT C 100 PASS MQ=1 GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 1/1:10,10:60 1/1:10,10:60 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 2 additions & 41 deletions
43
tests/functionality/test_score_variants_ranks_score_is_float.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import Dict, Union | ||
from genmod.vcf_tools import HeaderParser, get_variant_dict, get_info_dict | ||
|
||
def parse_variant_file(file_path: str) -> HeaderParser: | ||
""" | ||
Parse VCF header fields | ||
:param file_path: VCF to be read | ||
:raises ValueError: in case file is empty | ||
""" | ||
with open(file_path, 'r') as variant_file: | ||
head = HeaderParser() | ||
for line_index, line in enumerate(variant_file): | ||
line = line.rstrip() | ||
if line.startswith('#'): | ||
if line.startswith('##'): | ||
head.parse_meta_data(line) | ||
else: | ||
head.parse_header_line(line) | ||
else: | ||
break | ||
if line_index == 0: | ||
raise ValueError('Expected contents in file, got none') | ||
return head | ||
|
||
def generate_variants_from_file(file_path: str) -> Dict[str, Union[str, int, float]]: | ||
""" | ||
Yield variants from VCF file. | ||
:param file_path: VCF to be read | ||
""" | ||
header = parse_variant_file(file_path=file_path) | ||
with open(file_path, 'r') as variant_file: | ||
for line in variant_file: | ||
if line.startswith('#'): | ||
continue | ||
variant: Dict[str, str] = get_variant_dict(line, header.header) | ||
variant['info_dict'] = get_info_dict(variant['INFO']) | ||
yield variant | ||
|