-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathoptiso
executable file
·139 lines (115 loc) · 3.63 KB
/
optiso
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
import argparse
import json
import os
import random
import isoform
CFG = None
INTRONS = None
PROGRAM = None
def get_fitness(guy):
base = [PROGRAM]
for opt, val in CFG['cli'].items():
if opt == '--icost': val = int(val * 100) # icost is 0-100
base.append(f'{opt} {val}')
for opt, val in guy['genotype'].items():
base.append(f'{opt} {val}')
sum = 0
tmpfile = f'tmp.{os.getpid()}.gff'
for d in CFG['data']:
cmd = [' '.join(base)]
cmd.append(d['fasta'])
if CFG['gff_introns']: cmd.append(f'--introns {d["gff"]}')
cmd.append(f'> {tmpfile}')
cli = ' '.join(cmd)
os.system(cli)
found = isoform.get_introns(tmpfile)
dist, details = isoform.expdiff(found, INTRONS[d['name']])
sum += dist
os.system(f'rm {tmpfile}')
return sum
def random_guy():
guy = {
'genotype': {
'--wdpwm': random.random(),
'--wapwm': random.random(),
'--wemm': random.random(),
'--wimm': random.random(),
'--welen': random.random(),
'--wilen': random.random(),
'--icost': random.random(),
},
'fitness': None
}
return guy
def read_cfg(filename):
cfg = None
with open(filename) as fp:
s = fp.read()
cfg = json.loads(s)
return cfg
def mate(p1, p2, mut):
child = {
'genotype': {},
'fitness': None
}
weight = ('--wdpwm', '--wapwm', '--wemm', '--wimm', '--welen', '--wilen',
'--icost')
for k in weight:
if random.random() < 0.5: child['genotype'][k] = p1['genotype'][k]
else: child['genotype'][k] = p2['genotype'][k]
if random.random() < mut: child['genotype'][k] = random.random();
return child
# CLI
parser = argparse.ArgumentParser(
description='Parameter optimization program')
parser.add_argument('config', type=str, metavar='<json>',
help='configuration file')
parser.add_argument('--program', required=False, type=str, default='isoformer',
metavar='<exec>', help='path to program [%(default)s]')
parser.add_argument('--pop', required=False, type=int, default=100,
metavar='<int>', help='population size [%(default)i]')
parser.add_argument('--gen', required=False, type=int, default=100,
metavar='<int>', help='generations [%(default)i]')
parser.add_argument('--die', required=False, type=float, default=0.5,
metavar='<int>', help='fraction that die each gen [%(default).2f]')
parser.add_argument('--mut', required=False, type=float, default=0.1,
metavar='<int>', help='mutation frequency [%(default).2f]')
parser.add_argument('--seed', required=False, type=int,
metavar='<int>', help='random seed')
parser.add_argument('--name', required=False, type=str, default='',
metavar='<int>', help='name the output')
parser.add_argument('--verbose', action='store_true', help='show progress')
arg = parser.parse_args()
# GLOBALS
if arg.seed: random.seed(arg.seed)
CFG = read_cfg(arg.config)
INTRONS = {}
for d in CFG['data']:
INTRONS[d['name']] = isoform.get_introns(d['gff'])
PROGRAM = arg.program
# Initialize
pop = []
for i in range(arg.pop): pop.append(random_guy())
for guy in pop: guy['fitness'] = get_fitness(guy)
# Evolve population
half = int(len(pop) * arg.die)
for g in range(arg.gen):
pop = sorted(pop, key=lambda item: item['fitness'])
if arg.verbose: print(f'generation: {g}, fitness: {pop[0]["fitness"]}')
# mate
children = []
for i in range(half, len(pop)):
p1 = random.randint(0, half)
p2 = random.randint(0, half)
pop[i] = mate(pop[p1], pop[p2], arg.mut)
children.append(pop[i])
# fitness
for child in children: child['fitness'] = get_fitness(child)
# Final report
pop = sorted(pop, key=lambda item: item['fitness'])
best = pop[0]
print(f'{best["fitness"]:.4f}', end='\t')
for prop, val in best['genotype'].items():
print(f'{val:.4f}', end='\t')
print(arg.name)