-
Notifications
You must be signed in to change notification settings - Fork 3
/
ROUGE.py
169 lines (137 loc) · 4.89 KB
/
ROUGE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import collections
import numpy
import nltk
from nltk.util import ngrams
from utils import stemmer, tokenizer, stopset, normalize_word
from scipy import spatial
import six
###################################################
### Pre-Processing
###################################################
def get_all_content_words(sentences, N, stem):
all_words = []
for s in sentences:
if stem:
all_words.extend([stemmer.stem(r) for r in tokenizer.tokenize(s)])
else:
all_words.extend(tokenizer.tokenize(s))
normalized_content_words = map(normalize_word, all_words)
return normalized_content_words
def pre_process_summary(summary, N, stem=True):
summary_ngrams = get_all_content_words(summary, N, stem)
return summary_ngrams
def _ngrams(words, n):
queue = collections.deque(maxlen=n)
for w in words:
queue.append(w)
if len(queue) == n:
yield tuple(queue)
def _ngram_counts(words, n):
return collections.Counter(_ngrams(words, n))
def _ngram_count(words, n):
return max(len(words) - n + 1, 0)
def _counter_overlap(counter1, counter2):
result = 0
for k, v in six.iteritems(counter1):
result += min(v, counter2[k])
return result
def _safe_divide(numerator, denominator):
if denominator > 0:
return numerator / denominator
else:
return 0
def _safe_f1(matches, recall_total, precision_total, alpha):
recall_score = _safe_divide(matches, recall_total)
precision_score = _safe_divide(matches, precision_total)
denom = (1.0 - alpha) * precision_score + alpha * recall_score
if denom > 0.0:
return (precision_score * recall_score) / denom
else:
return 0.0
def rouge_n(peer, models, n, alpha):
"""
Compute the ROUGE-N score of a peer with respect to one or more models, for
a given value of `n`.
"""
peer = pre_process_summary(peer, n)
models = [pre_process_summary(model, n) for model in models]
matches = 0
recall_total = 0
peer_counter = _ngram_counts(peer, n)
for model in models:
model_counter = _ngram_counts(model, n)
matches += _counter_overlap(peer_counter, model_counter)
recall_total += _ngram_count(model, n)
precision_total = len(models) * _ngram_count(peer, n)
return _safe_f1(matches, recall_total, precision_total, alpha)
def _has_embedding(ngram, embs):
for w in ngram:
if not(w in embs):
return False
return True
def _get_embedding(ngram, embs):
res = []
for w in ngram:
res.append(embs[w])
return numpy.sum(numpy.array(res),0)
def _find_closest(ngram, counter, embs):
## If there is nothin to match, nothing is matched
if len(counter) == 0:
return "", 0, 0
## If we do not have embedding for it, we try lexical matching
if not(_has_embedding(ngram,embs)):
if ngram in counter:
return ngram, counter[ngram], 1
else:
return "", 0, 0
ranking_list = []
ngram_emb = _get_embedding(ngram, embs)
for k, v in six.iteritems(counter):
## First check if there is an exact match
if k == ngram:
ranking_list.append((k, v, 1.))
continue
## if no exact match and no embeddings: no match
if not(_has_embedding(k, embs)):
ranking_list.append((k, v, 0.))
continue
## soft matching based on embeddings similarity
k_emb = _get_embedding(k, embs)
ranking_list.append((k, v, 1 - spatial.distance.cosine(k_emb, ngram_emb)))
## Sort ranking list according to sim
ranked_list = sorted(ranking_list, key=lambda tup: tup[2], reverse=True)
## extract top item
return ranked_list[0]
def _soft_overlap(peer_counter, model_counter, embs):
THRESHOLD = 0.8
result = 0
for k, v in six.iteritems(peer_counter):
closest, count, sim = _find_closest(k, model_counter, embs)
if sim < THRESHOLD:
continue
if count <= v:
del model_counter[closest]
result += count
else:
model_counter[closest] -= v
result += v
return result
def rouge_n_we(peer, models, embs, n, alpha):
"""
Compute the ROUGE-N-WE score of a peer with respect to one or more models, for
a given value of `n`.
"""
peer = pre_process_summary(peer, n, False)
models = [pre_process_summary(model, n, False) for model in models]
matches = 0
recall_total = 0
peer_counter = _ngram_counts(peer, n)
for model in models:
model_counter = _ngram_counts(model, n)
matches += _soft_overlap(peer_counter, model_counter, embs)
recall_total += _ngram_count(model, n)
precision_total = len(models) * _ngram_count(peer, n)
return _safe_f1(matches, recall_total, precision_total, alpha)