Skip to content

Commit

Permalink
Update duplicate copies of howtos and tutorials
Browse files Browse the repository at this point in the history
  • Loading branch information
pabs3 committed Mar 19, 2023
1 parent 57cb684 commit 0c480a3
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 5 deletions.
11 changes: 9 additions & 2 deletions docs/src/auto_examples/howtos/run_doc2vec_imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,20 @@ def download_dataset(url='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v
return fname

# Download the file to local storage first.
with smart_open.open(url, "rb", ignore_ext=True) as fin:
with smart_open.open(fname, 'wb', ignore_ext=True) as fout:
try:
kwargs = { 'compression': smart_open.compression.NO_COMPRESSION }
fin = smart_open.open(url, "rb", **kwargs)
except (AttributeError, TypeError):
kwargs = { 'ignore_ext': True }
fin = smart_open.open(url, "rb", **kwargs)
if fin:
with smart_open.open(fname, 'wb', **kwargs) as fout:
while True:
buf = fin.read(io.DEFAULT_BUFFER_SIZE)
if not buf:
break
fout.write(buf)
fin.close()

return fname

Expand Down
2 changes: 1 addition & 1 deletion docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4f33a3697f7efc9f1db2eaa1c62f6999
962095cc3cced8ff530c5cc9dc365248
3 changes: 3 additions & 0 deletions docs/src/auto_examples/tutorials/run_ensemblelda.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from gensim.corpora import Dictionary
from nltk.stem.wordnet import WordNetLemmatizer

from nltk import download
download('wordnet')

lemmatizer = WordNetLemmatizer()
docs = api.load('text8')

Expand Down
2 changes: 1 addition & 1 deletion docs/src/auto_examples/tutorials/run_ensemblelda.py.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
be0c32b18644ebb1a7826764b37ebc01
9f666b02b1eeac820f2a2200e6d14f6e
4 changes: 4 additions & 0 deletions docs/src/auto_examples/tutorials/run_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz'
# easy to read is very desirable in topic modelling.
#

# Download the WordNet data
from nltk import download
download('wordnet')

# Lemmatize the documents.
from nltk.stem.wordnet import WordNetLemmatizer

Expand Down
2 changes: 1 addition & 1 deletion docs/src/auto_examples/tutorials/run_lda.py.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6733157cebb44ef13ae98ec8f4a533f1
fe4ed74f00222e38cabeb67437f8d0f3

0 comments on commit 0c480a3

Please sign in to comment.