Update duplicate copies of howtos and tutorials

piskvorky · Mar 19, 2023 · 0c480a3 · 0c480a3
1 parent 57cb684
commit 0c480a3
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 5 deletions.
diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py
@@ -100,13 +100,20 @@ def download_dataset(url='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v
        return fname
 
     # Download the file to local storage first.
-    with smart_open.open(url, "rb", ignore_ext=True) as fin:
-        with smart_open.open(fname, 'wb', ignore_ext=True) as fout:
+    try:
+        kwargs = { 'compression': smart_open.compression.NO_COMPRESSION }
+        fin = smart_open.open(url, "rb", **kwargs)
+    except (AttributeError, TypeError):
+        kwargs = { 'ignore_ext': True }
+        fin = smart_open.open(url, "rb", **kwargs)
+    if fin:
+        with smart_open.open(fname, 'wb', **kwargs) as fout:
             while True:
                 buf = fin.read(io.DEFAULT_BUFFER_SIZE)
                 if not buf:
                     break
                 fout.write(buf)
+        fin.close()
 
     return fname
 

diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5
@@ -1 +1 @@
-4f33a3697f7efc9f1db2eaa1c62f6999
+962095cc3cced8ff530c5cc9dc365248
diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py b/docs/src/auto_examples/tutorials/run_ensemblelda.py
@@ -29,6 +29,9 @@
 from gensim.corpora import Dictionary
 from nltk.stem.wordnet import WordNetLemmatizer
 
+from nltk import download
+download('wordnet')
+
 lemmatizer = WordNetLemmatizer()
 docs = api.load('text8')
 

diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 b/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5
@@ -1 +1 @@
-be0c32b18644ebb1a7826764b37ebc01
+9f666b02b1eeac820f2a2200e6d14f6e
diff --git a/docs/src/auto_examples/tutorials/run_lda.py b/docs/src/auto_examples/tutorials/run_lda.py
@@ -126,6 +126,10 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz'
 # easy to read is very desirable in topic modelling.
 #
 
+# Download the WordNet data
+from nltk import download
+download('wordnet')
+
 # Lemmatize the documents.
 from nltk.stem.wordnet import WordNetLemmatizer
 

diff --git a/docs/src/auto_examples/tutorials/run_lda.py.md5 b/docs/src/auto_examples/tutorials/run_lda.py.md5
@@ -1 +1 @@
-6733157cebb44ef13ae98ec8f4a533f1
+fe4ed74f00222e38cabeb67437f8d0f3
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		4f33a3697f7efc9f1db2eaa1c62f6999
		962095cc3cced8ff530c5cc9dc365248
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		be0c32b18644ebb1a7826764b37ebc01
		9f666b02b1eeac820f2a2200e6d14f6e
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		6733157cebb44ef13ae98ec8f4a533f1
		fe4ed74f00222e38cabeb67437f8d0f3