forked from joyceyiyiwang/Portability_Questions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
04d_combine_glm_threshold_4.py
33 lines (27 loc) · 1.17 KB
/
04d_combine_glm_threshold_4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import argparse
import csv
def process(paths, keep_path, output_path):
with open(keep_path, 'r') as f:
keep_ids = set(f.read().strip().split('\n'))
output_file = open(output_path, 'w')
output_writer = csv.DictWriter(output_file, fieldnames=[
'#CHROM', 'POS', 'ID', 'REF', 'ALT', 'A1', 'TEST', 'OBS_CT', 'BETA',
'SE', 'T_STAT', 'P', 'ERRCODE'], delimiter='\t')
output_writer.writeheader()
for path in paths:
f = open(path, 'r')
reader = csv.DictReader(f, delimiter='\t')
for line in reader:
if line['ID'] in keep_ids:
output_writer.writerow(line)
f.close()
output_file.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=(
'Combine Plink .glm.linear files, keeping only certain loci.'))
parser.add_argument('paths', help=('paths to .glm.linear files'), nargs='+')
parser.add_argument('-k', '--keep', help='file of SNP IDs to keep')
parser.add_argument('-o', '--output',
help=('path to the combined .glm.linear file to be created'))
args = parser.parse_args()
process(args.paths, args.keep, args.output)