-
Notifications
You must be signed in to change notification settings - Fork 2
/
gen_negatives.py
45 lines (39 loc) · 1.26 KB
/
gen_negatives.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import click as ck
import pandas as pd
import numpy as np
import os
@ck.command()
@ck.option('--data-frame', '-df', help='Output of uni2pandas.py script')
def main(data_frame):
regulations = load_regulations()
df = pd.read_pickle(data_frame)
fn, ext = os.path.splitext(data_frame)
output = fn + '_negs' + ext
neg_annotations = []
for i, row in enumerate(df.itertuples()):
annots = set()
for go_id in row.prop_annotations:
if go_id in regulations:
annots.add(regulations[go_id])
neg_annotations.append(annots)
print(len(annots))
df['neg_annotations'] = neg_annotations
df.to_pickle(output)
print(df)
def load_regulations():
regs = {}
with open('data-sim/regulations.txt') as f:
for line in f:
it = line.strip().split('\t')
reg, cl, pn = it[1].replace('_', ':'), it[0].replace('_', ':'), it[2]
if reg not in regs:
regs[reg] = {}
regs[reg][pn] = cl
regulations = {}
for key, vals in regs.items():
if 'pos' in vals and 'neg' in vals:
regulations[vals['pos']] = vals['neg']
regulations[vals['neg']] = vals['pos']
return regulations
if __name__ == '__main__':
main()