forked from lingyongyan/Neural-Machine-Translation
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathlanguage.py
25 lines (22 loc) · 772 Bytes
/
language.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Language:
sos_token = 0
eos_token = 1
pad_token = 2
unk_token = 3
def __init__(self, name):
self.name = name
self.word2index = {'<SOS>': 0, '<EOS>': 1, '<PAD>': 2, '<UNK>': 3}
self.word2count = {}
self.index2word = {0: '<SOS>', 1: '<EOS>', 2: '<PAD>', 3: '<UNK>'}
self.n_words = len(self.index2word)
def index_words(self, sentence):
for word in sentence.split(' '):
self.index_word(word)
def index_word(self, word):
if word not in self.word2index:
self.word2index[word] = self.n_words
self.word2count[word] = 1
self.index2word[self.n_words] = word
self.n_words += 1
else:
self.word2count[word] += 1