-
Notifications
You must be signed in to change notification settings - Fork 2
/
w2v.py
31 lines (22 loc) · 987 Bytes
/
w2v.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#import word2vec
import numpy as np
import cPickle
from collections import defaultdict
import sys, re
import pandas as pd
from gensim.models.word2vec import Word2Vec
'''word2vec.word2phrase('/home/kumud/Desktop/TheanoPrep/11', '/home/kumud/Desktop/TheanoPrep/text-phrases', verbose=True)
word2vec.word2vec('/home/kumud/Desktop/TheanoPrep/11', '/home/kumud/Desktop/TheanoPrep/text.bin', size=100, verbose=True)
word2vec.word2clusters('/home/kumud/Desktop/TheanoPrep/11', '/home/kumud/Desktop/TheanoPreps/text-clusters.txt', 100, verbose=True)
model = word2vec.load('/home/kumud/Desktop/TheanoPrep/text.bin')
model.vocab
model.vectors.shape
#print model'''
#print model.vectors
#cPickle.dump([model.vectors], open("vec.p", "wb"))
#np.savetxt('vvv.txt', model.vectors,delimiter=" ", fmt="%s")
#np.savetxt('mv.txt', model.vocab,delimiter=" ", fmt="%s")
#model['ke'].shape
model = Word2Vec.load_word2vec_format('text.bin', binary=True)
#model.most_similar(['eka'])
print model.vocab