-
Notifications
You must be signed in to change notification settings - Fork 3
/
evaluate_model.py
91 lines (76 loc) · 2.99 KB
/
evaluate_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from pickle import load
from numpy import array, argmax
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu
def load_dataset(filename):
return load(open(filename, 'rb'))
# fit a tokenizer
def create_tokenizer(lines):
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
# max sentence length
def max_length(lines):
return max(len(line.split()) for line in lines)
# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
# integer encode sequences
X = tokenizer.texts_to_sequences(lines)
# pad sequences with 0 values
X = pad_sequences(X, maxlen=length, padding='post')
return X
# Map an integer to a word
def map_int_to_word(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# Predict the target sequence
def predict_sequence(model, tokenizer, source):
pred = model.predict(source, verbose=0)[0]
integers = [argmax(vector) for vector in pred]
target = list()
for i in integers:
word = map_int_to_word(i, tokenizer)
if word is None:
break
target.append(word)
return ' '.join(target)
# Evaluate the model
def evaluate_model(model, tokenizer, source, raw_dataset):
predicted, actual = list(), list()
for i, source in enumerate(source):
source = source.reshape((1, source.shape[0]))
translation = predict_sequence(model, tokenizer, source)
raw_target, raw_source = raw_dataset[i]
if i < 10:
print('src=[%s], target=[%s], predicted=[%s]' % (raw_source, raw_target, translation))
actual.append(raw_target.split())
predicted.append(translation.split())
# Bleu Scores
print('Bleu-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
print('Bleu-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
print('Bleu-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
print('Bleu-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))
# Load datasets
dataset = load_dataset('english-german-both.pkl')
train = load_dataset('english-german-train.pkl')
test = load_dataset('english-german-test.pkl')
# prepare english tokenizer
eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 0])
# prepare german tokenizer
ger_tokenizer = create_tokenizer(dataset[:, 1])
ger_vocab_size = len(ger_tokenizer.word_index) + 1
ger_length = max_length(dataset[:, 1])
# Prepare data
trainX = encode_sequences(ger_tokenizer, ger_length, train[:, 1])
testX = encode_sequences(ger_tokenizer, ger_length, test[:, 1])
model = load_model('model.h5')
print('Testing on trained examples')
evaluate_model(model, eng_tokenizer, trainX, train)
print('Testing on test examples')
evaluate_model(model, eng_tokenizer, testX, test)