-
Notifications
You must be signed in to change notification settings - Fork 1
/
mix.py
95 lines (70 loc) · 3.08 KB
/
mix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import sys, itertools, re
'''
Created on Sep 3, 2017
@author: Abbas Ghaddar
'''
tagIdx = ('LOC', 'MISC','ORG','PER')
rate = 4
def getCombination (sent, entity_indices, model):
maxscore = -sys.maxint
if len(entity_indices) < rate:
for p in itertools.product(tagIdx, repeat=len(entity_indices)):
for i in range(len(entity_indices)):
sent[entity_indices[i][0]] = '<B-'+p[i]+">"
sent[entity_indices[i][1]] = '<L-'+p[i]+">"
score = model.score(' '.join(sent))
if score > maxscore:
best = p
maxscore = score
for i in range(len(entity_indices)):
sent[entity_indices[i][0]] = '<B-'+best[i]+">"
sent[entity_indices[i][1]] = '<L-'+best[i]+">"
else:
sent, best, maxscore = getBestForLarge (entity_indices, sent, model)
return getTopCandidates (entity_indices, sent, maxscore, model)
def getBestForLarge (entity_indices, sent, model):
best = []
maxscore = -sys.maxint
for k in range (0, len(entity_indices), rate):
if k + rate >= len(entity_indices) :
rate = len(entity_indices) - k
maxscore = -sys.maxint
for p in itertools.product(tagIdx, repeat=rate) :
for i in range(len(entity_indices)):
if i >= k and i < k+rate:
sent[entity_indices[i][0]] = '<B-'+p[i-k ]+">"
sent[entity_indices[i][1]] = '<L-'+p[i-k]+">"
elif i> k+rate:
sent[entity_indices[i][0]] = ' '
sent[entity_indices[i][1]] = ' '
score = model.score(re.sub( '\s+', ' ', ' '.join(sent)))
if score > maxscore:
best1 = p
maxscore = score
for tag in best1 :
best.append(tag)
for i in range(len(entity_indices)):
if i >= k and i < k+rate:
sent[entity_indices[i][0]] = '<B-'+best1[i-k]+">"
sent[entity_indices[i][1]] = '<L-'+best1[i-k]+">"
score = model.score(re.sub( '\s+', ' ', ' '.join(sent)))
return sent,best,score
def getTopCandidates (entity_indices, sent, maxscore, model):
lst= []
if len(entity_indices) == 0 :
return []
for i in range(len(entity_indices)):
tag = sent[entity_indices[i][0]].replace('<B-','').replace('>','')
tmp = sent
local = []
local.append ((tag, maxscore))
for p in itertools.product(tagIdx, repeat=1):
if p[0] != tag :
tmp[entity_indices[i][0]] = '<B-'+p[0]+">"
tmp[entity_indices[i][1]] = '<L-'+p[0]+">"
score = model.score(' '.join(tmp))
local.append ((p[0], score))
tmp[entity_indices[i][0]] = '<B-'+tag+">"
tmp[entity_indices[i][1]] = '<L-'+tag+">"
lst.append(sorted(local, key=lambda x: x[1], reverse=True))
return lst