-
Notifications
You must be signed in to change notification settings - Fork 16
/
metrics.py
113 lines (96 loc) · 2.99 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
'''
Evaluation metrics functions.
'''
# import math
import numpy as np
import collections
from sklearn.preprocessing import label_binarize
from scipy.stats import rankdata
import Constants
def _retype(y_prob, y):
if not isinstance(y, (collections.Sequence, np.ndarray)):
y_prob = [y_prob]
y = [y]
y_prob = np.array(y_prob)
y = np.array(y)
return y_prob, y
def _binarize(y, n_classes=None):
return label_binarize(y, classes=range(n_classes))
def apk(actual, predicted, k=10):
"""
Computes the average precision at k.
This function computes the average prescision at k between two lists of
items.
Parameters
----------
actual : list
A list of elements that are to be predicted (order doesn't matter)
predicted : list
A list of predicted elements (order does matter)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The average precision at k over the input lists
"""
if len(predicted) > k:
predicted = predicted[:k]
score = 0.0
num_hits = 0.0
for i, p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
num_hits += 1.0
score += num_hits / (i + 1.0)
if not actual:
return 0.0
return score / min(len(actual), k)
def mapk(y_prob, y, k=10):
"""
Computes the mean average precision at k.
This function computes the mean average prescision at k between two lists
of lists of items.
Parameters
----------
actual : list
A list of lists of elements that are to be predicted
(order doesn't matter in the lists)
predicted : list
A list of lists of predicted elements
(order matters in the lists)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The mean average precision at k over the input lists
"""
predicted = [np.argsort(p_)[-k:][::-1] for p_ in y_prob]
actual = [[y_] for y_ in y]
return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])
def mean_rank(y_prob, y):
ranks = []
n_classes = y_prob.shape[1]
for p_, y_ in zip(y_prob, y):
ranks += [n_classes - rankdata(p_, method='max')[y_]]
return sum(ranks) / float(len(ranks))
def hits_k(y_prob, y, k=10):
acc = []
for p_, y_ in zip(y_prob, y):
top_k = p_.argsort()[-k:][::-1]
acc += [1. if y_ in top_k else 0.]
return sum(acc) / len(acc)
def portfolio(pred, gold, k_list=[1,5,10,20]):
scores_len = 0
y_prob=[]
y=[]
for i in range(gold.shape[0]): # predict counts
if gold[i]!=Constants.PAD:
scores_len+=1.0
y_prob.append(pred[i])
y.append(gold[i])
scores = {}
for k in k_list:
scores['hits@' + str(k)] = hits_k(y_prob, y, k=k)
scores['map@' + str(k)] = mapk(y_prob, y, k=k)
return scores, scores_len