Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix gensim build (docs & pyemd issues) #2318

Merged
merged 25 commits into from
Jan 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
name: Build documentation
command: |
source venv/bin/activate
tox -e docs -vv
tox -e compile,docs -vv

- store_artifacts:
path: docs/src/_build
Expand Down
2 changes: 1 addition & 1 deletion docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.append(os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('../..'))

# -- General configuration -----------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion gensim/models/deprecated/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
try:
from pyemd import emd
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

from numpy import dot, zeros, dtype, float32 as REAL,\
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
try:
from pyemd import emd
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

from numpy import dot, float32 as REAL, empty, memmap as np_memmap, \
Expand Down
8 changes: 8 additions & 0 deletions gensim/test/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.test.utils import datapath, get_tmpfile, temporary_file, common_texts as sentences


try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except (ImportError, ValueError):
PYEMD_EXT = False

logger = logging.getLogger(__name__)

IS_WIN32 = (os.name == "nt") and (struct.calcsize('P') * 8 == 32)
Expand Down Expand Up @@ -357,6 +364,7 @@ def test_contains(self):
self.assertFalse('nights' in self.test_model.wv.vocab)
self.assertTrue('nights' in self.test_model.wv)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def test_wm_distance(self):
doc = ['night', 'payment']
oov_doc = ['nights', 'forests', 'payments']
Expand Down
9 changes: 9 additions & 0 deletions gensim/test/test_fasttext_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
from gensim.models import keyedvectors
from gensim.test.utils import datapath, get_tmpfile


try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except (ImportError, ValueError):
PYEMD_EXT = False


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -311,6 +319,7 @@ def testContains(self):
self.assertFalse('a!@' in self.test_model.wv.vocab)
self.assertFalse('a!@' in self.test_model)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testWmdistance(self):
"""Tests wmdistance for docs with in-vocab and out-of-vocab words"""
doc = ['night', 'payment']
Expand Down
36 changes: 15 additions & 21 deletions gensim/test/test_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(texts)]
Expand Down Expand Up @@ -78,9 +78,8 @@ def testFull(self, num_best=None, shardsize=100):
index.destroy()

def testNumBest(self):

if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

for num_best in [None, 0, 1, 9, 1000]:
self.testFull(num_best=num_best)
Expand Down Expand Up @@ -110,6 +109,9 @@ def test_scipy2scipy_clipped(self):

def testEmptyQuery(self):
index = self.factoryMethod()
if isinstance(index, similarities.WmdSimilarity) and not PYEMD_EXT:
self.skipTest("pyemd not installed or have some issues")

query = []
try:
sims = index[query]
Expand Down Expand Up @@ -166,7 +168,7 @@ def testIter(self):

def testPersistency(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -186,7 +188,7 @@ def testPersistency(self):

def testPersistencyCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -206,7 +208,7 @@ def testPersistencyCompressed(self):

def testLarge(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -228,7 +230,7 @@ def testLarge(self):

def testLargeCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -250,7 +252,7 @@ def testLargeCompressed(self):

def testMmap(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -273,7 +275,7 @@ def testMmap(self):

def testMmapCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -298,12 +300,10 @@ def factoryMethod(self):
# Override factoryMethod.
return self.cls(texts, self.w2v_model)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testFull(self, num_best=None):
# Override testFull.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
index.num_best = num_best
query = texts[0]
Expand All @@ -319,15 +319,13 @@ def testFull(self, num_best=None):
self.assertTrue(numpy.alltrue(sims[1:] > 0.0))
self.assertTrue(numpy.alltrue(sims[1:] < 1.0))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testNonIncreasing(self):
''' Check that similarities are non-increasing when `num_best` is not
`None`.'''
# NOTE: this could be implemented for other similarities as well (i.e.
# in _TestSimilarityABC).

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model, num_best=3)
query = texts[0]
sims = index[query]
Expand All @@ -337,12 +335,10 @@ def testNonIncreasing(self):
cond = sum(numpy.diff(sims2) < 0) == len(sims2) - 1
self.assertTrue(cond)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testChunking(self):
# Override testChunking.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
query = texts[:3]
sims = index[query]
Expand All @@ -358,12 +354,10 @@ def testChunking(self):
self.assertTrue(numpy.alltrue(sim > 0.0))
self.assertTrue(numpy.alltrue(sim <= 1.0))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testIter(self):
# Override testIter.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
for sims in index:
self.assertTrue(numpy.alltrue(sims >= 0.0))
Expand Down
15 changes: 5 additions & 10 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False


Expand Down Expand Up @@ -1023,12 +1023,11 @@ def test_compute_training_loss(self):
# endclass TestWord2VecModel

class TestWMD(unittest.TestCase):

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testNonzero(self):
'''Test basic functionality with a test sentence.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1)
sentence1 = ['human', 'interface', 'computer']
sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time']
Expand All @@ -1037,25 +1036,21 @@ def testNonzero(self):
# Check that distance is non-zero.
self.assertFalse(distance == 0.0)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testSymmetry(self):
'''Check that distance is symmetric.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1)
sentence1 = ['human', 'interface', 'computer']
sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time']
distance1 = model.wv.wmdistance(sentence1, sentence2)
distance2 = model.wv.wmdistance(sentence2, sentence1)
self.assertTrue(np.allclose(distance1, distance2))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testIdenticalSentences(self):
'''Check that the distance from a sentence to itself is zero.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=1)
sentence = ['survey', 'user', 'computer', 'system', 'response', 'time']
distance = model.wv.wmdistance(sentence, sentence)
Expand Down
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ deps = flake8-rst == 0.4.3
commands = flake8-rst gensim/ docs/ {posargs}


[testenv:compile]
basepython = python2
recreate = True

deps = numpy == 1.11.3
commands = python setup.py build_ext --inplace


[testenv:docs]
basepython = python2
recreate = True
Expand Down