-
Notifications
You must be signed in to change notification settings - Fork 18
/
tests.py
75 lines (60 loc) · 1.51 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from tempfile import NamedTemporaryFile
import numpy as np
import pytest
from navec import Navec
from navec.meta import Meta
from navec.vocab import Vocab
from navec.pq import PQ
@pytest.fixture
def emb():
meta = Meta(
id='test_1B_3k_6d_2q'
)
pq = PQ(
vectors=3,
dim=6,
qdim=2,
# 1 0 0 | 1 0 0
# 0 1 1 | 0 0 0
# 0 0 0 | 0 1 0
centroids=3,
indexes=np.array([ # vectors x qdim
[0, 1],
[1, 0],
[2, 2]
]).astype(np.uint8),
codes=np.array([ # qdim x centroids x chunk
[[1, 0, 0], [0, 1, 1], [0, 0, 0]],
[[0, 0, 0], [1, 0, 0], [0, 1, 0]],
]).astype(np.float32),
)
vocab = Vocab(
words=['a', 'b', 'c'],
counts=[1, 2, 3]
)
return Navec(meta, vocab, pq)
def test_dump_load(emb):
with NamedTemporaryFile() as file:
path = file.name
emb.dump(path)
Navec.load(path)
def test_get(emb):
assert np.array_equal(
emb.get('a'),
np.array([1., 0., 0., 1., 0., 0.])
)
assert emb.get('d') is None
def test_sim(emb):
assert emb.sim('a', 'b') == 0.
def test_gensim(emb):
model = emb.as_gensim
assert model.most_similar('a') == [
('b', 0.),
('c', 0.)
]
def test_top(emb):
words = emb.vocab.top(2)
sample = emb.sampled(words)
assert len(sample.pq.indexes) == 2
assert sample.sim('b', 'c') == emb.sim('b', 'c')
assert sample.vocab.get('a') is None