Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for issue #1009 #1018

Merged
merged 9 commits into from
Nov 16, 2016
2 changes: 1 addition & 1 deletion gensim/corpora/wikicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ class WikiCorpus(TextCorpus):
>>> MmCorpus.serialize('wiki_en_vocab200k.mm', wiki) # another 8h, creates a file in MatrixMarket format plus file with id->word

"""
def __init__(self, fname, processes=None, lemmatize=utils.has_pattern(), dictionary=None, filter_namespaces=('0',)):
def __init__(self, fname, processes=None, lemmatize=True, dictionary=None, filter_namespaces=('0',)):
"""
Initialize the corpus. Unless a dictionary is provided, this scans the
corpus once, to determine its vocabulary.
Expand Down
22 changes: 4 additions & 18 deletions gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def smart_open(fname, mode='rb'):
RE_HTML_ENTITY = re.compile(r'&(#?)([xX]?)(\w{1,8});', re.UNICODE)



def synchronous(tlockname):
"""
A decorator to place an instance-based lock around a method.
Expand Down Expand Up @@ -1003,19 +1002,6 @@ def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf={}):
daemon.requestLoop()


def has_pattern():
"""
Function to check if there is installed pattern library
"""
pattern = False
try:
from pattern.en import parse
pattern = True
except ImportError:
warnings.warn("Pattern library is not installed, lemmatization won't be available.")
return pattern


def lemmatize(content, allowed_tags=re.compile('(NN|VB|JJ|RB)'), light=False,
stopwords=frozenset(), min_length=2, max_length=15):
"""
Expand All @@ -1037,10 +1023,10 @@ def lemmatize(content, allowed_tags=re.compile('(NN|VB|JJ|RB)'), light=False,
['rank/NN', 'study/VB', 'hard/RB']

"""
if not has_pattern():
raise ImportError("Pattern library is not installed. Pattern library is needed in order \
to use lemmatize function")
from pattern.en import parse
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please only check import in one place

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Please review.

from pattern.en import parse
except ImportError:
warnings.warn("Pattern library is not installed, lemmatization won't be available.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it cause an unhandled exception later on if parse not imported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it will create an unhandled expection later in the method where parse is used, since only a warning is created. I'll change that.


if light:
import warnings
Expand Down