-
Notifications
You must be signed in to change notification settings - Fork 0
/
SEperformance.py
142 lines (126 loc) · 4.87 KB
/
SEperformance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
__author__ = 'walzer'
import sys
import argparse
import pyopenms as oms
import pandas as pd
from os import path
VERSION = "0.2"
def __main__():
parser = argparse.ArgumentParser(version=VERSION)
parser.add_argument('-in', dest="inf", help='<Required> full path to the input idXML', required=True)
parser.add_argument('-out', dest="out", help="<Required> full path to the csv to be written", required=True)
parser.add_argument('-doi', '--dept_of_immuno-filenames', dest='doi', action='store_true', help="If filename conforms with the filename scheme of the dept. of immunology... (in doubt, it probably doesn'T)")
parser.set_defaults(doi=False)
parser.add_argument('-qcML', dest="qcml", action='store_true', help="If instead of writing a csv embedding in a qcml file - prerequisite: -out must be a existing qcml file")
parser.set_defaults(qcml=False)
options = parser.parse_args()
if len(sys.argv) <= 1:
parser.print_help()
sys.exit(1)
if not (options.inf or options.out):
parser.print_help()
sys.exit(1)
pros = list()
peps = list()
f = oms.IdXMLFile()
f.load(options.inf, pros, peps)
if options.qcml:
try:
qf = oms.QcMLFile()
qf.load(oms.String(options.out))
except:
print "no usable qcml file given"
sys.exit(1)
SE = "NA"
SE = pros[0].getSearchEngine()
sesmv = "NA"
if SE == 'MS-GF+':
sesmv = 'MS:1002053'
elif SE == 'XTandem':
sesmv = 'E-Value'
elif SE == 'Comet':
sesmv = 'MS:1002257'
elif SE == 'Mascot':
sesmv = 'EValue'
if not sesmv:
print "no adequate search engine score found"
#sys.exit(1)
organ, repli, patient = None, None, None
if options.doi:
organ = path.basename(options.inf).split('_')[3]
repli = path.basename(options.inf).split('_')[6].split('#')[-1]
patient = path.basename(options.inf).split('_')[2]
if not organ or not repli or not patient:
options.doi = False
sepe = list()
for pep in peps:
pep.sort()
hits = pep.getHits()
st = pep.getScoreType()
if st != 'Posterior Error Probability':
print "Warning, no PEP as score type found!"
if pep.metaValueExists('spectrum_reference'):
sn_ref = pep.getMetaValue('spectrum_reference')
sn = sn_ref.split('=')[-1]
else:
sn = '?'
sn_ref = '?'
for i, h in enumerate(hits):
row = dict()
row['file'] = path.basename(options.inf)
row['sequence'] = h.getSequence().toUnmodifiedString()
row['modified'] = h.getSequence().isModified()
row['rank'] = i + 1
row['spectrum'] = sn
row['spectrum_reference'] = sn_ref # match with spectrum_native_id von featureXML and spectrum_reference from mapped PeptideIdentifications
if pep.getScoreType() == 'Posterior Error Probability':
row['PEP'] = h.getScore()
else:
row['PEP'] = "NA"
row['SE'] = SE
if h.metaValueExists(sesmv):
row['SEscore'] = h.getMetaValue(sesmv)
if st == 'q-value':
row['qvalue'] = h.getScore()
else:
if h.metaValueExists('q-value_score'):
row['qvalue'] = h.getMetaValue('q-value_score')
if h.metaValueExists('binder'):
if h.metaValueExists('weak'):
row['binder'] = 'weak'
elif h.metaValueExists('strong'):
row['binder'] = 'strong'
else:
row['binder'] = 'yes'
row['allele'] = h.getMetaValue("binder")
else:
row['binder'] = 'no'
if "decoy" not in h.getMetaValue("target_decoy"):
row['target'] = 'target'
else:
row['target'] = 'decoy'
if options.doi:
row['organ'] = organ
row['replicate'] = repli
row['patient'] = patient
sepe.append(row)
outframe = pd.DataFrame(sepe)
if options.qcml:
idxa = oms.Attachment()
idxa.name = "generic table" #"extended id tab"
idxa.cvRef = "qcML"
idxa.cvAcc = "QC:0000049"
idxa.qualityRef = "QC:0000025"
idxa.colTypes = list(outframe.columns)
rows = [[str(row[f]) for f in idxa.colTypes] for ind, row in outframe.iterrows()] #writes nan if cell not filled
idxa.tableRows = rows
ls = list()
qf.getRunNames(ls)
qf.addRunAttachment(ls[0], idxa)
#qf.store(oms.String(options.out.replace('%', 'perc')))
qf.store(oms.String(options.out))
else:
outframe.to_csv(options.out, sep='\t', index=False)
if __name__ == '__main__':
__main__()