-
Notifications
You must be signed in to change notification settings - Fork 0
/
classify_major_minor.py
40 lines (32 loc) · 1.23 KB
/
classify_major_minor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 1 17:01:50 2018
@author: YudongCai
@Email: [email protected]
"""
import os
import gzip
import click
from collections import Counter
@click.command()
@click.option('--varfile', help='input vcf(.gz)/bcf file')
@click.option('--querysamples', help='query sample list')
@click.option('--outfile', help='outfile is gzipped')
def main(varfile, querysamples, outfile):
cmd = f"bcftools query -S {querysamples} -f '%CHROM\t%POS\t%REF\t%ALT\t[%TGT\t]\n' {varfile}"
with gzip.open(outfile, 'wb') as f:
header = 'chrom\tpos\tmajor\tminor\n'.encode()
f.write(header)
for line in os.popen(cmd):
tline = line.strip().split()
chrom, pos, ref, alt = tline[:4]
gts = [x[0] for x in tline[4:]] + [x[-1] for x in tline[4:]]
count = Counter(gts)
if count.most_common()[0][0] == ref:
f.write(f'{chrom}\t{pos}\t{ref}\t{alt}\n'.encode())
elif count.most_common()[0][0] == alt:
f.write(f'{chrom}\t{pos}\t{alt}\t{ref}\n'.encode())
else:
print(f'{chrom}:{pos} seems not biallelic')
if __name__ == '__main__':
main()