forked from tensorlayer/TensorLayer
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_nlp.py
81 lines (62 loc) · 2.32 KB
/
test_nlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import unittest
import nltk
import tensorflow as tf
from tensorflow.python.platform import gfile
import tensorlayer as tl
from tests.utils import CustomTestCase
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
nltk.download('punkt')
class Test_Leaky_ReLUs(CustomTestCase):
@classmethod
def setUpClass(cls):
pass
@classmethod
def tearDownClass(cls):
pass
def test_as_bytes(self):
origin_str = "str"
origin_bytes = b'bytes'
converted_str = tl.nlp.as_bytes(origin_str)
converted_bytes = tl.nlp.as_bytes(origin_bytes)
print('str after using as_bytes:', converted_str)
print('bytes after using as_bytes:', converted_bytes)
def test_as_text(self):
origin_str = "str"
origin_bytes = b'bytes'
converted_str = tl.nlp.as_text(origin_str)
converted_bytes = tl.nlp.as_text(origin_bytes)
print('str after using as_text:', converted_str)
print('bytes after using as_text:', converted_bytes)
def test_save_vocab(self):
words = tl.files.load_matt_mahoney_text8_dataset()
vocabulary_size = 50000
data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True)
tl.nlp.save_vocab(count, name='vocab_text8.txt')
def test_basic_tokenizer(self):
c = "how are you?"
tokens = tl.nlp.basic_tokenizer(c)
print(tokens)
def test_generate_skip_gram_batch(self):
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
batch, labels, data_index = tl.nlp.generate_skip_gram_batch(
data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0
)
print(batch)
print(labels)
def test_process_sentence(self):
c = "how are you?"
c = tl.nlp.process_sentence(c)
print(c)
def test_words_to_word_id(self):
words = tl.files.load_matt_mahoney_text8_dataset()
vocabulary_size = 50000
data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True)
ids = tl.nlp.words_to_word_ids(words, dictionary)
context = tl.nlp.word_ids_to_words(ids, reverse_dictionary)
# print(ids)
# print(context)
if __name__ == '__main__':
unittest.main()