-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval.py
executable file
·72 lines (60 loc) · 2.77 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import nltk
import pandas as pd
import numpy as np
path_to_reference = 'Dataset/Validation_Captions.txt' # df -> image_id:str caption:str len(5000)
path_to_model = 'model/Decoder/Generated_Captions.txt'
with open(path_to_model) as f:
model_data = f.readlines()
model_filenames=[caps.split('\t')[0] for caps in model_data]
model_captions = [caps.replace('\n', '').split('\t')[1] for caps in model_data]
with open(path_to_reference, 'r') as f:
ref_data = f.readlines()
reference_filenames = [caps.split('\t')[0].split('#')[0] for caps in ref_data]
reference_captions = [caps.replace('\n', '').split('\t')[1] for caps in ref_data]
df = pd.DataFrame()
df['image'] = reference_filenames
df['caption'] = reference_captions
df.caption = df.caption.str.decode('utf').str.split()
df = pd.DataFrame(data = {'image':list(df.image.unique()),'caption':list(df.groupby('image')['caption'].apply(list))})[:len(model_captions)]
bleu1_scores = []
bleu2_scores = []
bleu3_scores = []
bleu4_scores = []
index1=None
index2=None
for i, row in df.iterrows():
model = model_captions[i].split()
reference = row.caption
try:
score1 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0])
score2 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[0.5,0.5])
score3 = nltk.translate.bleu_score.sentence_bleu(reference, model, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
score4 = nltk.translate.bleu_score.sentence_bleu(reference, model)
bleu1_scores.append(score1)
bleu2_scores.append(score2)
bleu3_scores.append(score3)
bleu4_scores.append(score4)
if i%10000 == 0 and i!=0:
print (float(i)/df.shape[0])*100,"%"," done"
except:
index1=df.index[i]
index2=i
print "Invalid Caption Generated for: ", model_filenames[i]
print "\nMean Sentence-Level BLEU-1 score: ", np.mean(bleu1_scores)
print "Mean Sentence-Level BLEU-2 score: ", np.mean(bleu2_scores)
print "Mean Sentence-Level BLEU-3 score: ", np.mean(bleu3_scores)
print "Mean Sentence-Level BLEU-4 score: ", np.mean(bleu4_scores)
if index1 and index2:
df=df.drop([index1])
df=df.reset_index(drop=True)
del model_captions[index2]
references=df.caption
model_captions = [caption.split() for caption in model_captions]
score1 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0])
print "\n\nCorpus-Level BLEU-1 score: ", score1
score2 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[0.5,0.5])
print "Corpus-Level BLEU-2 score: ", score2
score3 = nltk.translate.bleu_score.corpus_bleu(references,model_captions, weights=[1.0/3,1.0/3,1-2*(1.0/3)])
print "Corpus-Level BLEU-3 score: ", score3
score4 = nltk.translate.bleu_score.corpus_bleu(references,model_captions)
print "Corpus-Level BLEU-4 score: ", score4