From 0c480a33e56755c2d1abd8a0aa2a07daa50baad0 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Sat, 18 Mar 2023 15:15:05 +0800 Subject: [PATCH] Update duplicate copies of howtos and tutorials --- docs/src/auto_examples/howtos/run_doc2vec_imdb.py | 11 +++++++++-- docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 | 2 +- docs/src/auto_examples/tutorials/run_ensemblelda.py | 3 +++ .../auto_examples/tutorials/run_ensemblelda.py.md5 | 2 +- docs/src/auto_examples/tutorials/run_lda.py | 4 ++++ docs/src/auto_examples/tutorials/run_lda.py.md5 | 2 +- 6 files changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py index 68c55ee9e4..6731bbc72d 100644 --- a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py +++ b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py @@ -100,13 +100,20 @@ def download_dataset(url='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v return fname # Download the file to local storage first. - with smart_open.open(url, "rb", ignore_ext=True) as fin: - with smart_open.open(fname, 'wb', ignore_ext=True) as fout: + try: + kwargs = { 'compression': smart_open.compression.NO_COMPRESSION } + fin = smart_open.open(url, "rb", **kwargs) + except (AttributeError, TypeError): + kwargs = { 'ignore_ext': True } + fin = smart_open.open(url, "rb", **kwargs) + if fin: + with smart_open.open(fname, 'wb', **kwargs) as fout: while True: buf = fin.read(io.DEFAULT_BUFFER_SIZE) if not buf: break fout.write(buf) + fin.close() return fname diff --git a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 index 4da245b7e2..67a6d6dc5d 100644 --- a/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 +++ b/docs/src/auto_examples/howtos/run_doc2vec_imdb.py.md5 @@ -1 +1 @@ -4f33a3697f7efc9f1db2eaa1c62f6999 \ No newline at end of file +962095cc3cced8ff530c5cc9dc365248 \ No newline at end of file diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py b/docs/src/auto_examples/tutorials/run_ensemblelda.py index aa87d0ecd3..1cb34ec17e 100644 --- a/docs/src/auto_examples/tutorials/run_ensemblelda.py +++ b/docs/src/auto_examples/tutorials/run_ensemblelda.py @@ -29,6 +29,9 @@ from gensim.corpora import Dictionary from nltk.stem.wordnet import WordNetLemmatizer +from nltk import download +download('wordnet') + lemmatizer = WordNetLemmatizer() docs = api.load('text8') diff --git a/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 b/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 index f09f123fba..620d90e6ee 100644 --- a/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 +++ b/docs/src/auto_examples/tutorials/run_ensemblelda.py.md5 @@ -1 +1 @@ -be0c32b18644ebb1a7826764b37ebc01 \ No newline at end of file +9f666b02b1eeac820f2a2200e6d14f6e \ No newline at end of file diff --git a/docs/src/auto_examples/tutorials/run_lda.py b/docs/src/auto_examples/tutorials/run_lda.py index 7ee6b07cd2..7bcdc1e4ba 100644 --- a/docs/src/auto_examples/tutorials/run_lda.py +++ b/docs/src/auto_examples/tutorials/run_lda.py @@ -126,6 +126,10 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz' # easy to read is very desirable in topic modelling. # +# Download the WordNet data +from nltk import download +download('wordnet') + # Lemmatize the documents. from nltk.stem.wordnet import WordNetLemmatizer diff --git a/docs/src/auto_examples/tutorials/run_lda.py.md5 b/docs/src/auto_examples/tutorials/run_lda.py.md5 index 6ce0e72960..9e3e6f3186 100644 --- a/docs/src/auto_examples/tutorials/run_lda.py.md5 +++ b/docs/src/auto_examples/tutorials/run_lda.py.md5 @@ -1 +1 @@ -6733157cebb44ef13ae98ec8f4a533f1 \ No newline at end of file +fe4ed74f00222e38cabeb67437f8d0f3 \ No newline at end of file