Update duplicate copies of howtos and tutorials

piskvorky · Apr 29, 2023 · 1788ffd · 1788ffd
1 parent efc6f32
commit 1788ffd
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 5 deletions.
diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py
@@ -100,13 +100,20 @@ def download_dataset(url='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v
        return fname
 
     # Download the file to local storage first.
-    with smart_open.open(url, "rb", ignore_ext=True) as fin:
-        with smart_open.open(fname, 'wb', ignore_ext=True) as fout:
+    try:
+        kwargs = { 'compression': smart_open.compression.NO_COMPRESSION }
+        fin = smart_open.open(url, "rb", **kwargs)
+    except (AttributeError, TypeError):
+        kwargs = { 'ignore_ext': True }
+        fin = smart_open.open(url, "rb", **kwargs)
+    if fin:
+        with smart_open.open(fname, 'wb', **kwargs) as fout:
             while True:
                 buf = fin.read(io.DEFAULT_BUFFER_SIZE)
                 if not buf:
                     break
                 fout.write(buf)
+        fin.close()
 
     return fname
 

diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5
@@ -1 +1 @@
-7020ef8545a05962fe2d7146b4b95f11
+507b6c07ce76db341761559a96daa17d
diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py b/docs/src/auto_examples/tutorials/run_ensemblelda.py
@@ -29,6 +29,9 @@
 from gensim.corpora import Dictionary
 from nltk.stem.wordnet import WordNetLemmatizer
 
+from nltk import download
+download('wordnet')
+
 lemmatizer = WordNetLemmatizer()
 docs = api.load('text8')
 

diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 b/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5
@@ -1 +1 @@
-be0c32b18644ebb1a7826764b37ebc01
+9f666b02b1eeac820f2a2200e6d14f6e
diff --git a/docs/src/auto_examples/tutorials/run_lda.py b/docs/src/auto_examples/tutorials/run_lda.py
@@ -126,6 +126,10 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz'
 # easy to read is very desirable in topic modelling.
 #
 
+# Download the WordNet data
+from nltk import download
+download('wordnet')
+
 # Lemmatize the documents.
 from nltk.stem.wordnet import WordNetLemmatizer
 

diff --git a/docs/src/auto_examples/tutorials/run_lda.py.md5 b/docs/src/auto_examples/tutorials/run_lda.py.md5
@@ -1 +1 @@
-0995a15406049093d95974700d471876
+802d286d0c620260af50bf5ef0e08253
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		7020ef8545a05962fe2d7146b4b95f11
		507b6c07ce76db341761559a96daa17d
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		be0c32b18644ebb1a7826764b37ebc01
		9f666b02b1eeac820f2a2200e6d14f6e
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		0995a15406049093d95974700d471876
		802d286d0c620260af50bf5ef0e08253