-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse_chrom
executable file
·71 lines (64 loc) · 2.54 KB
/
parse_chrom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/python
# coding=utf-8
'''
Created on May 16, 2015
@author: Allis Tauri <[email protected]>
'''
import os
import sys
import argparse
import csv
import re
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Parses Phoenix chromatogram report files.')
parser.add_argument('reports', metavar='filename.txt',
type=str, nargs='+',
help='File(s) with chromatogram reports.')
parser.add_argument('-o', '--output', metavar='filename.csv',
type=str, nargs=1, default='-',
help='Name of the output csv file. Output to stdout if not provided.')
args = parser.parse_args()
if not args.reports:
print 'You should provide at least one report file.'
sys.exit(1)
rows = [['sample']]
for report_file in args.reports:
if not os.path.isfile(report_file):
print "No such file:", report_file
continue
rows.append([report_file[:report_file.rfind('.')]])
row = rows[-1]
nconv = re.compile(r'(\d+),(\d+)')
with open(report_file, 'r') as report:
for line in report:
#replace comma with dot in numbers
uline = nconv.sub(r'\1.\2', line.decode('cp1251'))
#split into columns
cols = [w.strip() for w in uline.split(',')]
if len(cols) < 3: continue
#split the first column into words
words = [unicode(w.strip().strip(':')) for w in cols[0].split()]
if not words: continue
#search for the right first word
if words[0] != u'Компонент': continue
if len(words) < 3:
print 'Invalid format of the component string'
continue
#parse component concentration
comp = words[2]
try: ncomp = rows[0].index(comp)
except ValueError:
ncomp = len(rows[0])
rows[0].append(comp)
if ncomp >= len(rows[-1]):
row.extend([0.0]*(ncomp-len(row)+1))
rows[-1][ncomp] = float(cols[2].split()[1])
if len(row) < len(rows[0]):
row.extend([0.0]*(len(rows[0])-len(row)))
#write results
if args.output == '-': outf = sys.stdout
else: outf = open(args.output, 'wb')
output = csv.writer(outf)
for r in rows: output.writerow(r)
if outf is not sys.stdout: outf.close()
#end main