From 32604bcab05947f0a1b6ce95717ecfa83e6d449b Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 12:59:11 +0800 Subject: [PATCH 01/14] Update links to the GNU LGPL v2.1 license The license was superseded by the LGPL v3 and consequently the URL of the LGPL v2.1 has changed. Also update existing correct links to https. Fixes: commit beb04ea1f8f9c438b0a40aa4cbbd955ea065f84f --- continuous_integration/check_wheels.py | 2 +- docs/src/_index.rst.unused | 2 +- docs/src/_license.rst.unused | 2 +- docs/src/intro.rst | 2 +- gensim/corpora/bleicorpus.py | 2 +- gensim/corpora/csvcorpus.py | 2 +- gensim/corpora/dictionary.py | 2 +- gensim/corpora/hashdictionary.py | 2 +- gensim/corpora/indexedcorpus.py | 2 +- gensim/corpora/lowcorpus.py | 2 +- gensim/corpora/malletcorpus.py | 2 +- gensim/corpora/mmcorpus.py | 2 +- gensim/corpora/opinosiscorpus.py | 2 +- gensim/corpora/sharded_corpus.py | 2 +- gensim/corpora/svmlightcorpus.py | 2 +- gensim/corpora/textcorpus.py | 2 +- gensim/corpora/ucicorpus.py | 2 +- gensim/corpora/wikicorpus.py | 2 +- gensim/examples/dmlcz/dmlcorpus.py | 2 +- gensim/examples/dmlcz/gensim_build.py | 2 +- gensim/examples/dmlcz/gensim_genmodel.py | 2 +- gensim/examples/dmlcz/gensim_xml.py | 2 +- gensim/examples/dmlcz/sources.py | 2 +- gensim/interfaces.py | 2 +- gensim/matutils.py | 2 +- gensim/models/_fasttext_bin.py | 2 +- gensim/models/atmodel.py | 2 +- gensim/models/callbacks.py | 2 +- gensim/models/coherencemodel.py | 2 +- gensim/models/doc2vec.py | 2 +- gensim/models/doc2vec_corpusfile.pyx | 2 +- gensim/models/doc2vec_inner.pxd | 2 +- gensim/models/doc2vec_inner.pyx | 2 +- gensim/models/ensemblelda.py | 2 +- gensim/models/fasttext.py | 2 +- gensim/models/fasttext_corpusfile.pyx | 2 +- gensim/models/hdpmodel.py | 2 +- gensim/models/keyedvectors.py | 2 +- gensim/models/lda_dispatcher.py | 2 +- gensim/models/lda_worker.py | 2 +- gensim/models/ldamodel.py | 2 +- gensim/models/ldamulticore.py | 2 +- gensim/models/ldaseqmodel.py | 2 +- gensim/models/logentropy_model.py | 2 +- gensim/models/lsi_dispatcher.py | 2 +- gensim/models/lsi_worker.py | 2 +- gensim/models/lsimodel.py | 2 +- gensim/models/normmodel.py | 2 +- gensim/models/phrases.py | 2 +- gensim/models/poincare.py | 2 +- gensim/models/rpmodel.py | 2 +- gensim/models/tfidfmodel.py | 2 +- gensim/models/word2vec.py | 2 +- gensim/models/word2vec_corpusfile.pxd | 2 +- gensim/models/word2vec_corpusfile.pyx | 2 +- gensim/models/word2vec_inner.pxd | 2 +- gensim/models/word2vec_inner.pyx | 2 +- gensim/parsing/preprocessing.py | 2 +- gensim/scripts/glove2word2vec.py | 2 +- gensim/scripts/make_wiki_online.py | 2 +- gensim/scripts/make_wiki_online_nodebug.py | 2 +- gensim/scripts/make_wikicorpus.py | 2 +- gensim/scripts/word2vec_standalone.py | 2 +- gensim/similarities/annoy.py | 2 +- gensim/similarities/docsim.py | 2 +- gensim/similarities/fastss.pyx | 2 +- gensim/similarities/levenshtein.py | 2 +- gensim/similarities/nmslib.py | 2 +- gensim/similarities/termsim.py | 2 +- gensim/test/basetmtests.py | 2 +- gensim/test/simspeed.py | 2 +- gensim/test/simspeed2.py | 2 +- gensim/test/test_aggregation.py | 2 +- gensim/test/test_atmodel.py | 2 +- gensim/test/test_big.py | 2 +- gensim/test/test_coherencemodel.py | 2 +- gensim/test/test_corpora.py | 2 +- gensim/test/test_corpora_dictionary.py | 2 +- gensim/test/test_corpora_hashdictionary.py | 2 +- gensim/test/test_datatype.py | 2 +- gensim/test/test_direct_confirmation.py | 2 +- gensim/test/test_doc2vec.py | 2 +- gensim/test/test_glove2word2vec.py | 2 +- gensim/test/test_hdpmodel.py | 2 +- gensim/test/test_indirect_confirmation.py | 2 +- gensim/test/test_keyedvectors.py | 2 +- gensim/test/test_lda_callback.py | 2 +- gensim/test/test_ldamodel.py | 2 +- gensim/test/test_lee.py | 2 +- gensim/test/test_logentropy_model.py | 2 +- gensim/test/test_lsimodel.py | 2 +- gensim/test/test_matutils.py | 2 +- gensim/test/test_miislita.py | 2 +- gensim/test/test_nmf.py | 2 +- gensim/test/test_normmodel.py | 2 +- gensim/test/test_phrases.py | 2 +- gensim/test/test_poincare.py | 2 +- gensim/test/test_probability_estimation.py | 2 +- gensim/test/test_rpmodel.py | 2 +- gensim/test/test_scripts.py | 2 +- gensim/test/test_segmentation.py | 2 +- gensim/test/test_similarities.py | 2 +- gensim/test/test_similarity_metrics.py | 2 +- gensim/test/test_tfidfmodel.py | 2 +- gensim/test/test_tmdiff.py | 2 +- gensim/test/test_utils.py | 2 +- gensim/test/test_word2vec.py | 2 +- gensim/topic_coherence/aggregation.py | 2 +- gensim/topic_coherence/direct_confirmation_measure.py | 2 +- gensim/topic_coherence/indirect_confirmation_measure.py | 2 +- gensim/topic_coherence/probability_estimation.py | 2 +- gensim/topic_coherence/segmentation.py | 2 +- gensim/topic_coherence/text_analysis.py | 2 +- gensim/utils.py | 2 +- release/check_wheels.py | 2 +- release/generate_changelog.py | 2 +- setup.py | 4 ++-- 117 files changed, 118 insertions(+), 118 deletions(-) diff --git a/continuous_integration/check_wheels.py b/continuous_integration/check_wheels.py index e66d5d69f3..f5183fca70 100644 --- a/continuous_integration/check_wheels.py +++ b/continuous_integration/check_wheels.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2019 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Print available wheels for a particular Python package.""" import re import sys diff --git a/docs/src/_index.rst.unused b/docs/src/_index.rst.unused index 71390c1060..b9789f54a5 100644 --- a/docs/src/_index.rst.unused +++ b/docs/src/_index.rst.unused @@ -57,7 +57,7 @@ The **principal design objectives** behind Gensim are: Availability ------------ -Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_ and can be downloaded either from its `github repository `_ or from the `Python Package Index `_. +Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_ and can be downloaded either from its `github repository `_ or from the `Python Package Index `_. .. seealso:: diff --git a/docs/src/_license.rst.unused b/docs/src/_license.rst.unused index d85983aa44..c99bfa9e7c 100644 --- a/docs/src/_license.rst.unused +++ b/docs/src/_license.rst.unused @@ -5,7 +5,7 @@ Licensing --------- -Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_. +Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_. This means that it's free for both personal and commercial use, but if you make any modification to Gensim that you distribute to other people, you have to disclose diff --git a/docs/src/intro.rst b/docs/src/intro.rst index 9c079711ea..aac9966c5e 100644 --- a/docs/src/intro.rst +++ b/docs/src/intro.rst @@ -63,7 +63,7 @@ After installation, learn how to use Gensim from its :ref:`sphx_glr_auto_example Licensing ---------- -Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_. +Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_. This means that it's free for both personal and commercial use, but if you make any modification to Gensim that you distribute to other people, you have to disclose the source code of these modifications. diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py index 15d79aeffd..65796ebab8 100644 --- a/gensim/corpora/bleicorpus.py +++ b/gensim/corpora/bleicorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Сorpus in Blei's LDA-C format.""" diff --git a/gensim/corpora/csvcorpus.py b/gensim/corpora/csvcorpus.py index 505d7c9be1..43e7c50f64 100644 --- a/gensim/corpora/csvcorpus.py +++ b/gensim/corpora/csvcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Zygmunt Zając -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in CSV format.""" diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py index 51ec35038f..fbfa67fb6f 100644 --- a/gensim/corpora/dictionary.py +++ b/gensim/corpora/dictionary.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module implements the concept of a Dictionary -- a mapping between words and their integer ids.""" diff --git a/gensim/corpora/hashdictionary.py b/gensim/corpora/hashdictionary.py index 8eb6d87dd1..5241fd1725 100644 --- a/gensim/corpora/hashdictionary.py +++ b/gensim/corpora/hashdictionary.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2012 Homer Strong, Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Implements the `"hashing trick" `_ -- a mapping between words and their integer ids using a fixed, static mapping (hash function). diff --git a/gensim/corpora/indexedcorpus.py b/gensim/corpora/indexedcorpus.py index 8624c54fbf..f521657923 100644 --- a/gensim/corpora/indexedcorpus.py +++ b/gensim/corpora/indexedcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Base Indexed Corpus class.""" diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py index 01b1043a9c..25e9515004 100644 --- a/gensim/corpora/lowcorpus.py +++ b/gensim/corpora/lowcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in `GibbsLda++ format `_.""" diff --git a/gensim/corpora/malletcorpus.py b/gensim/corpora/malletcorpus.py index 2b83a90bb1..d24402584c 100644 --- a/gensim/corpora/malletcorpus.py +++ b/gensim/corpora/malletcorpus.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in `Mallet format `_.""" diff --git a/gensim/corpora/mmcorpus.py b/gensim/corpora/mmcorpus.py index 72f2587d95..0ace957afc 100644 --- a/gensim/corpora/mmcorpus.py +++ b/gensim/corpora/mmcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in the `Matrix Market format `_.""" diff --git a/gensim/corpora/opinosiscorpus.py b/gensim/corpora/opinosiscorpus.py index b4e25731ce..5c083a494e 100644 --- a/gensim/corpora/opinosiscorpus.py +++ b/gensim/corpora/opinosiscorpus.py @@ -3,7 +3,7 @@ # # Author: Tobias B # Copyright (C) 2021 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Creates a corpus and dictionary from the Opinosis dataset. diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py index b8858f93c3..c82d711137 100644 --- a/gensim/corpora/sharded_corpus.py +++ b/gensim/corpora/sharded_corpus.py @@ -3,7 +3,7 @@ # # Original author: Jan Hajic jr. # Copyright (C) 2015 Radim Rehurek and gensim team. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module implements a corpus class that stores its data in separate files called diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py index 5f0b049b07..54516e082e 100644 --- a/gensim/corpora/svmlightcorpus.py +++ b/gensim/corpora/svmlightcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in SVMlight format.""" diff --git a/gensim/corpora/textcorpus.py b/gensim/corpora/textcorpus.py index b4406c248a..ef2880beca 100644 --- a/gensim/corpora/textcorpus.py +++ b/gensim/corpora/textcorpus.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Module provides some code scaffolding to simplify use of built dictionary for constructing BoW vectors. diff --git a/gensim/corpora/ucicorpus.py b/gensim/corpora/ucicorpus.py index fd81f3ef2a..5231cf28d0 100644 --- a/gensim/corpora/ucicorpus.py +++ b/gensim/corpora/ucicorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2012 Jonathan Esterhazy -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Corpus in `UCI format `_.""" diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py index ee8c4ef281..9696f843d9 100644 --- a/gensim/corpora/wikicorpus.py +++ b/gensim/corpora/wikicorpus.py @@ -4,7 +4,7 @@ # Copyright (C) 2010 Radim Rehurek # Copyright (C) 2012 Lars Buitinck # Copyright (C) 2018 Emmanouil Stergiadis -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Construct a corpus from a Wikipedia (or other MediaWiki-based) database dump. diff --git a/gensim/examples/dmlcz/dmlcorpus.py b/gensim/examples/dmlcz/dmlcorpus.py index 24aca6cb65..35c2e113b5 100644 --- a/gensim/examples/dmlcz/dmlcorpus.py +++ b/gensim/examples/dmlcz/dmlcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ diff --git a/gensim/examples/dmlcz/gensim_build.py b/gensim/examples/dmlcz/gensim_build.py index 873c7915ab..efdbced4ca 100755 --- a/gensim/examples/dmlcz/gensim_build.py +++ b/gensim/examples/dmlcz/gensim_build.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ USAGE: %(program)s LANGUAGE diff --git a/gensim/examples/dmlcz/gensim_genmodel.py b/gensim/examples/dmlcz/gensim_genmodel.py index a2f2b792e7..eed275ed2c 100755 --- a/gensim/examples/dmlcz/gensim_genmodel.py +++ b/gensim/examples/dmlcz/gensim_genmodel.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ USAGE: %(program)s LANGUAGE METHOD diff --git a/gensim/examples/dmlcz/gensim_xml.py b/gensim/examples/dmlcz/gensim_xml.py index 9fbbc1d92f..eac7f8d21b 100755 --- a/gensim/examples/dmlcz/gensim_xml.py +++ b/gensim/examples/dmlcz/gensim_xml.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ USAGE: %(program)s LANGUAGE METHOD diff --git a/gensim/examples/dmlcz/sources.py b/gensim/examples/dmlcz/sources.py index 4c6eb8a048..53ec1cc8d1 100644 --- a/gensim/examples/dmlcz/sources.py +++ b/gensim/examples/dmlcz/sources.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module contains implementations (= different classes) which encapsulate the diff --git a/gensim/interfaces.py b/gensim/interfaces.py index c685602e57..b3f8fd89df 100644 --- a/gensim/interfaces.py +++ b/gensim/interfaces.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Basic interfaces used across the whole Gensim package. diff --git a/gensim/matutils.py b/gensim/matutils.py index fdd1a6b592..2bddf07bf0 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Math helper functions.""" diff --git a/gensim/models/_fasttext_bin.py b/gensim/models/_fasttext_bin.py index 5fbce5d926..80eedfa628 100644 --- a/gensim/models/_fasttext_bin.py +++ b/gensim/models/_fasttext_bin.py @@ -3,7 +3,7 @@ # # Authors: Michael Penkov # Copyright (C) 2019 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Load models from the native binary format released by Facebook. diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py index 75893c5ac0..3467937c0d 100755 --- a/gensim/models/atmodel.py +++ b/gensim/models/atmodel.py @@ -3,7 +3,7 @@ # # Copyright (C) 2016 Radim Rehurek # Copyright (C) 2016 Olavur Mortensen -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Author-topic model. diff --git a/gensim/models/callbacks.py b/gensim/models/callbacks.py index c5560441af..006917b600 100644 --- a/gensim/models/callbacks.py +++ b/gensim/models/callbacks.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2018 RARE Technologies -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Callbacks can be used to observe the training process. diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py index d6df976153..ee3a828376 100644 --- a/gensim/models/coherencemodel.py +++ b/gensim/models/coherencemodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Calculate topic coherence for topic models. This is the implementation of the four stage topic coherence pipeline from the paper `Michael Roeder, Andreas Both and Alexander Hinneburg: "Exploring the space of topic coherence measures" diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index d76be81f9b..9601be1b78 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -3,7 +3,7 @@ # # Author: Gensim Contributors # Copyright (C) 2018 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Introduction diff --git a/gensim/models/doc2vec_corpusfile.pyx b/gensim/models/doc2vec_corpusfile.pyx index 50a07fc3ab..da5b230b9f 100644 --- a/gensim/models/doc2vec_corpusfile.pyx +++ b/gensim/models/doc2vec_corpusfile.pyx @@ -7,7 +7,7 @@ # coding: utf-8 # # Copyright (C) 2018 Dmitry Persiyanov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized cython functions for file-based training :class:`~gensim.models.doc2vec.Doc2Vec` model.""" diff --git a/gensim/models/doc2vec_inner.pxd b/gensim/models/doc2vec_inner.pxd index 41635b47a0..525d20c6b6 100644 --- a/gensim/models/doc2vec_inner.pxd +++ b/gensim/models/doc2vec_inner.pxd @@ -11,7 +11,7 @@ # used from doc2vec_corpusfile # # Copyright (C) 2018 Dmitry Persiyanov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html import numpy as np cimport numpy as np diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 804a3ac28d..21964b79b6 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -7,7 +7,7 @@ # coding: utf-8 # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized cython functions for training :class:`~gensim.models.doc2vec.Doc2Vec` model.""" diff --git a/gensim/models/ensemblelda.py b/gensim/models/ensemblelda.py index 16b5c1c1bc..6ba3ae6fcc 100644 --- a/gensim/models/ensemblelda.py +++ b/gensim/models/ensemblelda.py @@ -4,7 +4,7 @@ # Authors: Tobias Brigl , Alex Salles , # Alex Loosley , Data Reply Munich # Copyright (C) 2021 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Ensemble Latent Dirichlet Allocation (eLDA), an algorithm for extracting reliable topics. diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 7c0ec8501b..e36cba771a 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -3,7 +3,7 @@ # # Authors: Gensim Contributors # Copyright (C) 2018 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Introduction diff --git a/gensim/models/fasttext_corpusfile.pyx b/gensim/models/fasttext_corpusfile.pyx index 2b5344e2d5..1f67785bf7 100644 --- a/gensim/models/fasttext_corpusfile.pyx +++ b/gensim/models/fasttext_corpusfile.pyx @@ -8,7 +8,7 @@ # coding: utf-8 # # Copyright (C) 2018 Dmitry Persiyanov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized cython functions for file-based training :class:`~gensim.models.fasttext.FastText` model.""" diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py index 5e0fbfe3e2..e83561b2e7 100755 --- a/gensim/models/hdpmodel.py +++ b/gensim/models/hdpmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2012 Jonathan Esterhazy -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html # # HDP inference code is adapted from the onlinehdp.py script by # Chong Wang (chongw at cs.princeton.edu). diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index b09f440f92..821e977d4d 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -3,7 +3,7 @@ # # Author: Gensim Contributors # Copyright (C) 2018 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module implements word vectors, and more generally sets of vectors keyed by lookup tokens/ints, and various similarity look-ups. diff --git a/gensim/models/lda_dispatcher.py b/gensim/models/lda_dispatcher.py index 41dc3e632b..ab36e4f2cb 100755 --- a/gensim/models/lda_dispatcher.py +++ b/gensim/models/lda_dispatcher.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Dispatcher process which orchestrates distributed Latent Dirichlet Allocation (LDA, :class:`~gensim.models.ldamodel.LdaModel`) computations. diff --git a/gensim/models/lda_worker.py b/gensim/models/lda_worker.py index 25d787738e..2d357abacc 100755 --- a/gensim/models/lda_worker.py +++ b/gensim/models/lda_worker.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Worker ("slave") process used in computing distributed Latent Dirichlet Allocation (LDA, :class:`~gensim.models.ldamodel.LdaModel`). diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index b5f8017f07..945ff04599 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized `Latent Dirichlet Allocation (LDA) `_ in Python. diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py index fdb5ce70a9..7c1e1b50fa 100644 --- a/gensim/models/ldamulticore.py +++ b/gensim/models/ldamulticore.py @@ -3,7 +3,7 @@ # # Author: Jan Zikes, Radim Rehurek # Copyright (C) 2014 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Online Latent Dirichlet Allocation (LDA) in Python, using all CPU cores to parallelize and speed up model training. diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py index 8ffcb5eee6..8926480abe 100644 --- a/gensim/models/ldaseqmodel.py +++ b/gensim/models/ldaseqmodel.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html # Based on Copyright (C) 2016 Radim Rehurek """Lda Sequence model, inspired by diff --git a/gensim/models/logentropy_model.py b/gensim/models/logentropy_model.py index 16fbace8d2..156ea36581 100644 --- a/gensim/models/logentropy_model.py +++ b/gensim/models/logentropy_model.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module allows simple Bag of Words (BoW) represented corpus to be transformed into log entropy space. It implements Log Entropy Model that produces entropy-weighted logarithmic term frequency representation. diff --git a/gensim/models/lsi_dispatcher.py b/gensim/models/lsi_dispatcher.py index 2265dc7811..ca3bc78b35 100755 --- a/gensim/models/lsi_dispatcher.py +++ b/gensim/models/lsi_dispatcher.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Dispatcher process which orchestrates distributed :class:`~gensim.models.lsimodel.LsiModel` computations. Run this script only once, on any node in your cluster. diff --git a/gensim/models/lsi_worker.py b/gensim/models/lsi_worker.py index a3b5845f19..69247b73ea 100755 --- a/gensim/models/lsi_worker.py +++ b/gensim/models/lsi_worker.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Worker ("slave") process used in computing distributed Latent Semantic Indexing (LSI, :class:`~gensim.models.lsimodel.LsiModel`) models. diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py index 0bdb9f9bf9..5506faa5db 100644 --- a/gensim/models/lsimodel.py +++ b/gensim/models/lsimodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Module for `Latent Semantic Analysis (aka Latent Semantic Indexing) diff --git a/gensim/models/normmodel.py b/gensim/models/normmodel.py index 62cbfc8fef..26aac40d3f 100644 --- a/gensim/models/normmodel.py +++ b/gensim/models/normmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2012 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html import logging diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index c95682fa5e..8bef57bf53 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automatically detect common phrases -- aka multi-word expressions, word n-gram collocations -- from diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py index 136fd6b6d5..0adbc82d4e 100644 --- a/gensim/models/poincare.py +++ b/gensim/models/poincare.py @@ -3,7 +3,7 @@ # # Author: Jayant Jain # Copyright (C) 2017 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Python implementation of Poincaré Embeddings. diff --git a/gensim/models/rpmodel.py b/gensim/models/rpmodel.py index cbdaf4cb55..38487a4b92 100644 --- a/gensim/models/rpmodel.py +++ b/gensim/models/rpmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Random Projections (also known as Random Indexing). diff --git a/gensim/models/tfidfmodel.py b/gensim/models/tfidfmodel.py index cf2c3d3e1a..336023bc1e 100644 --- a/gensim/models/tfidfmodel.py +++ b/gensim/models/tfidfmodel.py @@ -3,7 +3,7 @@ # # Copyright (C) 2012 Radim Rehurek # Copyright (C) 2017 Mohit Rathore -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module implements functionality related to the `Term Frequency - Inverse Document Frequency `_ class of bag-of-words vector space models. diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index a3fe865b7a..d5c6fe24e6 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -3,7 +3,7 @@ # # Author: Gensim Contributors # Copyright (C) 2018 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Introduction diff --git a/gensim/models/word2vec_corpusfile.pxd b/gensim/models/word2vec_corpusfile.pxd index 56e8cb64ee..2490c2ca37 100644 --- a/gensim/models/word2vec_corpusfile.pxd +++ b/gensim/models/word2vec_corpusfile.pxd @@ -10,7 +10,7 @@ # also used from fasttext_corpusfile and doc2vec_corpusfile # # Copyright (C) 2018 Dmitry Persiyanov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html from libcpp.string cimport string from libcpp.vector cimport vector diff --git a/gensim/models/word2vec_corpusfile.pyx b/gensim/models/word2vec_corpusfile.pyx index a2b962aed6..89012cfd81 100644 --- a/gensim/models/word2vec_corpusfile.pyx +++ b/gensim/models/word2vec_corpusfile.pyx @@ -8,7 +8,7 @@ # coding: utf-8 # # Copyright (C) 2018 Dmitry Persiyanov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized cython functions for file-based training :class:`~gensim.models.word2vec.Word2Vec` model.""" diff --git a/gensim/models/word2vec_inner.pxd b/gensim/models/word2vec_inner.pxd index 4b4523dc55..8a77a17041 100644 --- a/gensim/models/word2vec_inner.pxd +++ b/gensim/models/word2vec_inner.pxd @@ -9,7 +9,7 @@ # used by both word2vec_inner.pyx (automatically) and doc2vec_inner.pyx (by explicit cimport) # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html cimport numpy as np diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index 1c0807ee0f..965e80c1ce 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -7,7 +7,7 @@ # coding: utf-8 # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Optimized cython functions for training :class:`~gensim.models.word2vec.Word2Vec` model.""" diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py index bb96b1ec2f..d165a3224a 100644 --- a/gensim/parsing/preprocessing.py +++ b/gensim/parsing/preprocessing.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains methods for parsing and preprocessing strings. diff --git a/gensim/scripts/glove2word2vec.py b/gensim/scripts/glove2word2vec.py index 16de58743d..5d4ba0c785 100644 --- a/gensim/scripts/glove2word2vec.py +++ b/gensim/scripts/glove2word2vec.py @@ -3,7 +3,7 @@ # # Copyright (C) 2016 Radim Rehurek # Copyright (C) 2016 Manas Ranjan Kar -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This script allows to convert GloVe vectors into the word2vec. Both files are diff --git a/gensim/scripts/make_wiki_online.py b/gensim/scripts/make_wiki_online.py index e5ee11283a..99b3ae2967 100755 --- a/gensim/scripts/make_wiki_online.py +++ b/gensim/scripts/make_wiki_online.py @@ -3,7 +3,7 @@ # # Copyright (C) 2010 Radim Rehurek # Copyright (C) 2012 Lars Buitinck -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ diff --git a/gensim/scripts/make_wiki_online_nodebug.py b/gensim/scripts/make_wiki_online_nodebug.py index 0ec9704724..c8d3a4802a 100755 --- a/gensim/scripts/make_wiki_online_nodebug.py +++ b/gensim/scripts/make_wiki_online_nodebug.py @@ -3,7 +3,7 @@ # # Copyright (C) 2010 Radim Rehurek # Copyright (C) 2012 Lars Buitinck -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ diff --git a/gensim/scripts/make_wikicorpus.py b/gensim/scripts/make_wikicorpus.py index b76c8c2bd5..188762b545 100755 --- a/gensim/scripts/make_wikicorpus.py +++ b/gensim/scripts/make_wikicorpus.py @@ -3,7 +3,7 @@ # # Copyright (C) 2010 Radim Rehurek # Copyright (C) 2012 Lars Buitinck -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ diff --git a/gensim/scripts/word2vec_standalone.py b/gensim/scripts/word2vec_standalone.py index 22be887cd1..73eee8e7c2 100644 --- a/gensim/scripts/word2vec_standalone.py +++ b/gensim/scripts/word2vec_standalone.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ diff --git a/gensim/similarities/annoy.py b/gensim/similarities/annoy.py index 688985ca51..a59e06bded 100644 --- a/gensim/similarities/annoy.py +++ b/gensim/similarities/annoy.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module integrates Spotify's `Annoy `_ (Approximate Nearest Neighbors Oh Yeah) diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index 2e46479f87..2fa5945e43 100644 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Compute similarities across a collection of documents in the Vector Space Model. diff --git a/gensim/similarities/fastss.pyx b/gensim/similarities/fastss.pyx index fe0366bb04..14304c72f2 100644 --- a/gensim/similarities/fastss.pyx +++ b/gensim/similarities/fastss.pyx @@ -5,7 +5,7 @@ # coding: utf-8 # # Copyright (C) 2021 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html # Code adapted from TinyFastSS (public domain), https://github.com/fujimotos/TinyFastSS """Fast approximate string similarity search using the FastSS algorithm.""" diff --git a/gensim/similarities/levenshtein.py b/gensim/similarities/levenshtein.py index 768429a62a..11011ed1f2 100644 --- a/gensim/similarities/levenshtein.py +++ b/gensim/similarities/levenshtein.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2018 Vit Novotny -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module allows fast fuzzy search between strings, using kNN queries with Levenshtein similarity. diff --git a/gensim/similarities/nmslib.py b/gensim/similarities/nmslib.py index 752976862b..0e175f4079 100644 --- a/gensim/similarities/nmslib.py +++ b/gensim/similarities/nmslib.py @@ -2,7 +2,7 @@ # # Copyright (C) 2019 Radim Rehurek # Copyright (C) 2019 Masahiro Kazama -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module integrates `NMSLIB `_ fast similarity diff --git a/gensim/similarities/termsim.py b/gensim/similarities/termsim.py index f97801ca66..8c04dec078 100644 --- a/gensim/similarities/termsim.py +++ b/gensim/similarities/termsim.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2018 Vit Novotny -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module provides classes that deal with term similarities. diff --git a/gensim/test/basetmtests.py b/gensim/test/basetmtests.py index 56de810691..9b61c66c20 100644 --- a/gensim/test/basetmtests.py +++ b/gensim/test/basetmtests.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/simspeed.py b/gensim/test/simspeed.py index 27f52fd276..3547663706 100755 --- a/gensim/test/simspeed.py +++ b/gensim/test/simspeed.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ USAGE: %(program)s CORPUS_DENSE.mm CORPUS_SPARSE.mm [NUMDOCS] diff --git a/gensim/test/simspeed2.py b/gensim/test/simspeed2.py index 931caef950..595f0616a7 100755 --- a/gensim/test/simspeed2.py +++ b/gensim/test/simspeed2.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ USAGE: %(program)s CORPUS_DENSE.mm CORPUS_SPARSE.mm [NUMDOCS] diff --git a/gensim/test/test_aggregation.py b/gensim/test/test_aggregation.py index cce4bc73fc..f7cdd60f5d 100644 --- a/gensim/test/test_aggregation.py +++ b/gensim/test/test_aggregation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py index e55ffe97ba..bea73862e0 100644 --- a/gensim/test/test_atmodel.py +++ b/gensim/test/test_atmodel.py @@ -3,7 +3,7 @@ # # Copyright (C) 2016 Radim Rehurek # Copyright (C) 2016 Olavur Mortensen -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for the author-topic model (AuthorTopicModel class). These tests diff --git a/gensim/test/test_big.py b/gensim/test/test_big.py index a2444e87e4..cef0988821 100644 --- a/gensim/test/test_big.py +++ b/gensim/test/test_big.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2014 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking processing/storing large inputs. diff --git a/gensim/test/test_coherencemodel.py b/gensim/test/test_coherencemodel.py index 9927851a93..de26bc8646 100644 --- a/gensim/test/test_coherencemodel.py +++ b/gensim/test/test_coherencemodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_corpora.py b/gensim/test/test_corpora.py index 431b07c0ce..2c33873113 100644 --- a/gensim/test/test_corpora.py +++ b/gensim/test/test_corpora.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking corpus I/O formats (the corpora package). diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index b1f4d4f33f..78e0b19186 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Unit tests for the `corpora.Dictionary` class. diff --git a/gensim/test/test_corpora_hashdictionary.py b/gensim/test/test_corpora_hashdictionary.py index 48764cb91c..d1842556d5 100644 --- a/gensim/test/test_corpora_hashdictionary.py +++ b/gensim/test/test_corpora_hashdictionary.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Unit tests for the `corpora.HashDictionary` class. diff --git a/gensim/test/test_datatype.py b/gensim/test/test_datatype.py index 22b278f7b5..38ed438889 100644 --- a/gensim/test/test_datatype.py +++ b/gensim/test/test_datatype.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking various matutils functions. diff --git a/gensim/test/test_direct_confirmation.py b/gensim/test/test_direct_confirmation.py index c04864ee50..313f686e6b 100644 --- a/gensim/test/test_direct_confirmation.py +++ b/gensim/test/test_direct_confirmation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for direct confirmation measures in the direct_confirmation_measure module. diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index a7e1fa58df..60720b0444 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_glove2word2vec.py b/gensim/test/test_glove2word2vec.py index 004d8baee6..8300b9d250 100644 --- a/gensim/test/test_glove2word2vec.py +++ b/gensim/test/test_glove2word2vec.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2016 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Test for gensim.scripts.glove2word2vec.py.""" diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py index 307d664e89..7c6eb6222a 100644 --- a/gensim/test/test_hdpmodel.py +++ b/gensim/test/test_hdpmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_indirect_confirmation.py b/gensim/test/test_indirect_confirmation.py index 25ade2f55a..fd720916ae 100644 --- a/gensim/test/test_indirect_confirmation.py +++ b/gensim/test/test_indirect_confirmation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for indirect confirmation measures in the indirect_confirmation_measure module. diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py index cc70577842..c94458648a 100644 --- a/gensim/test/test_keyedvectors.py +++ b/gensim/test/test_keyedvectors.py @@ -3,7 +3,7 @@ # # Author: Jayant Jain # Copyright (C) 2017 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking the poincare module from the models package. diff --git a/gensim/test/test_lda_callback.py b/gensim/test/test_lda_callback.py index 21c4e5a2be..11f49538be 100644 --- a/gensim/test/test_lda_callback.py +++ b/gensim/test/test_lda_callback.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2018 Allenyl -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking visdom API diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index 7ce675e337..1be0124576 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_lee.py b/gensim/test/test_lee.py index c8a592d539..e9749079a0 100644 --- a/gensim/test/test_lee.py +++ b/gensim/test/test_lee.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated test to reproduce the results of Lee et al. (2005) diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py index 457153cd7c..1a3a9ab789 100644 --- a/gensim/test/test_logentropy_model.py +++ b/gensim/test/test_logentropy_model.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_lsimodel.py b/gensim/test/test_lsimodel.py index 381fb941f5..39b55332bd 100644 --- a/gensim/test/test_lsimodel.py +++ b/gensim/test/test_lsimodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_matutils.py b/gensim/test/test_matutils.py index 5c5f14398e..1401815bb8 100644 --- a/gensim/test/test_matutils.py +++ b/gensim/test/test_matutils.py @@ -3,7 +3,7 @@ # # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html import logging import unittest import numpy as np diff --git a/gensim/test/test_miislita.py b/gensim/test/test_miislita.py index d3c4384f95..58da1b579f 100644 --- a/gensim/test/test_miislita.py +++ b/gensim/test/test_miislita.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ This module replicates the miislita vector spaces from diff --git a/gensim/test/test_nmf.py b/gensim/test/test_nmf.py index b06e83761e..bc875079c6 100644 --- a/gensim/test/test_nmf.py +++ b/gensim/test/test_nmf.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2018 Timofey Yefimov -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_normmodel.py b/gensim/test/test_normmodel.py index 475ace0aab..fd903fe524 100644 --- a/gensim/test/test_normmodel.py +++ b/gensim/test/test_normmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py index e8d9567b20..68d229a47e 100644 --- a/gensim/test/test_phrases.py +++ b/gensim/test/test_phrases.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for the phrase detection module. diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py index 98970525a2..ae99335b69 100644 --- a/gensim/test/test_poincare.py +++ b/gensim/test/test_poincare.py @@ -3,7 +3,7 @@ # # Author: Jayant Jain # Copyright (C) 2017 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking the poincare module from the models package. diff --git a/gensim/test/test_probability_estimation.py b/gensim/test/test_probability_estimation.py index f5ff9e753d..f0c6715bff 100644 --- a/gensim/test/test_probability_estimation.py +++ b/gensim/test/test_probability_estimation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for probability estimation algorithms in the probability_estimation module. diff --git a/gensim/test/test_rpmodel.py b/gensim/test/test_rpmodel.py index 4ae27b557f..bee5b0cdd7 100644 --- a/gensim/test/test_rpmodel.py +++ b/gensim/test/test_rpmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_scripts.py b/gensim/test/test_scripts.py index caa0b84f93..2b782f3c30 100644 --- a/gensim/test/test_scripts.py +++ b/gensim/test/test_scripts.py @@ -3,7 +3,7 @@ # # Copyright (C) 2018 Vimig Socrates heavily influenced from @AakaashRao # Copyright (C) 2018 Manos Stergiadis -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking the output of gensim.scripts. diff --git a/gensim/test/test_segmentation.py b/gensim/test/test_segmentation.py index b3a852a5ff..b591d4d69b 100644 --- a/gensim/test/test_segmentation.py +++ b/gensim/test/test_segmentation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for segmentation algorithms in the segmentation module. diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index a7fdfdf7bc..314336dadc 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2011 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for similarity algorithms (the similarities package). diff --git a/gensim/test/test_similarity_metrics.py b/gensim/test/test_similarity_metrics.py index cc9ab2aae9..45a09b9e6f 100644 --- a/gensim/test/test_similarity_metrics.py +++ b/gensim/test/test_similarity_metrics.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated test to check similarity functions and isbow function. diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py index 6290b7a07a..d3f8136142 100644 --- a/gensim/test/test_tfidfmodel.py +++ b/gensim/test/test_tfidfmodel.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/test/test_tmdiff.py b/gensim/test/test_tmdiff.py index cb5b8ba984..7e7db83002 100644 --- a/gensim/test/test_tmdiff.py +++ b/gensim/test/test_tmdiff.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2016 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html import logging import unittest diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py index 18436bf655..11dcb87833 100644 --- a/gensim/test/test_utils.py +++ b/gensim/test/test_utils.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking various utils functions. diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index 74639af865..8fc67f606f 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Automated tests for checking transformation algorithms (the models package). diff --git a/gensim/topic_coherence/aggregation.py b/gensim/topic_coherence/aggregation.py index 79ecee793f..be8eccb30a 100644 --- a/gensim/topic_coherence/aggregation.py +++ b/gensim/topic_coherence/aggregation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains functions to perform aggregation on a list of values obtained from the confirmation measure.""" diff --git a/gensim/topic_coherence/direct_confirmation_measure.py b/gensim/topic_coherence/direct_confirmation_measure.py index 8e46fbf91a..ce3b961f8d 100644 --- a/gensim/topic_coherence/direct_confirmation_measure.py +++ b/gensim/topic_coherence/direct_confirmation_measure.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains functions to compute direct confirmation on a pair of words or word subsets.""" diff --git a/gensim/topic_coherence/indirect_confirmation_measure.py b/gensim/topic_coherence/indirect_confirmation_measure.py index 3e96c8fbc2..57293aadb1 100644 --- a/gensim/topic_coherence/indirect_confirmation_measure.py +++ b/gensim/topic_coherence/indirect_confirmation_measure.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html r"""This module contains functions to compute confirmation on a pair of words or word subsets. diff --git a/gensim/topic_coherence/probability_estimation.py b/gensim/topic_coherence/probability_estimation.py index 6296437a94..a9f65d6664 100644 --- a/gensim/topic_coherence/probability_estimation.py +++ b/gensim/topic_coherence/probability_estimation.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains functions to perform segmentation on a list of topics.""" diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py index 94924c8a60..e7bd3c5b7f 100644 --- a/gensim/topic_coherence/segmentation.py +++ b/gensim/topic_coherence/segmentation.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains functions to perform segmentation on a list of topics.""" diff --git a/gensim/topic_coherence/text_analysis.py b/gensim/topic_coherence/text_analysis.py index 58bdc2c35f..292c307453 100644 --- a/gensim/topic_coherence/text_analysis.py +++ b/gensim/topic_coherence/text_analysis.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2013 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """This module contains classes for analyzing the texts of a corpus to accumulate statistical information about word occurrences.""" diff --git a/gensim/utils.py b/gensim/utils.py index 0619296888..92b0913d7c 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Various general utility functions.""" diff --git a/release/check_wheels.py b/release/check_wheels.py index eeca5b4853..4cd37de757 100644 --- a/release/check_wheels.py +++ b/release/check_wheels.py @@ -2,7 +2,7 @@ # # Authors: Michael Penkov # Copyright (C) 2019 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Check that our wheels are all there.""" import os import os.path diff --git a/release/generate_changelog.py b/release/generate_changelog.py index 1f930d1f38..30f8925f99 100644 --- a/release/generate_changelog.py +++ b/release/generate_changelog.py @@ -3,7 +3,7 @@ # # Author: Gensim Contributors # Copyright (C) 2020 RaRe Technologies s.r.o. -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """Generate changelog entries for all PRs merged since the last release.""" import re diff --git a/setup.py b/setup.py index e0dc4e8881..42f5287bca 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2014 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html +# Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html """ Run with:: @@ -254,7 +254,7 @@ def run(self): ---------------- -Gensim is open source software released under the `GNU LGPLv2.1 license `_. +Gensim is open source software released under the `GNU LGPLv2.1 license `_. Copyright (c) 2009-now Radim Rehurek |Analytics|_ From 0ee958f3ff81173c038a5ef9cf123e263bfb6c54 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:05:22 +0800 Subject: [PATCH 02/14] Fix links that point to the website about page This page was removed from the website for Gensim 4. Link instead to the right subsections of the intro page. Also update the links to https. --- README.md | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b4bd542f65..5a0809dc36 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ BibTeX entry: [citing gensim in academic papers and theses]: https://scholar.google.com/citations?view_op=view_citation&hl=en&user=9vG_kV0AAAAJ&citation_for_view=9vG_kV0AAAAJ:NaGl4SEjCO4C - [design goals]: http://radimrehurek.com/gensim/about.html + [design goals]: https://radimrehurek.com/gensim/intro.html#design-principles [RaRe Technologies]: http://rare-technologies.com/wp-content/uploads/2016/02/rare_image_only.png%20=10x20 [rare\_tech]: //rare-technologies.com [Talentpair]: https://avatars3.githubusercontent.com/u/8418395?v=3&s=100 diff --git a/setup.py b/setup.py index 42f5287bca..caf2e2a2f2 100644 --- a/setup.py +++ b/setup.py @@ -171,7 +171,7 @@ def run(self): .. _GA: https://github.com/RaRe-Technologies/gensim/actions .. _Downloads: https://pypi.python.org/pypi/gensim -.. _License: http://radimrehurek.com/gensim/about.html +.. _License: https://radimrehurek.com/gensim/intro.html#licensing .. _Wheel: https://pypi.python.org/pypi/gensim Gensim is a Python library for *topic modelling*, *document indexing* and *similarity retrieval* with large corpora. @@ -224,7 +224,7 @@ def run(self): Many scientific algorithms can be expressed in terms of large matrix operations (see the BLAS note above). Gensim taps into these low-level BLAS libraries, by means of its dependency on NumPy. So while gensim-the-top-level-code is pure Python, it actually executes highly optimized Fortran/C under the hood, including multithreading (if your BLAS is so configured). -Memory-wise, gensim makes heavy use of Python's built-in generators and iterators for streamed data processing. Memory efficiency was one of gensim's `design goals `_, and is a central feature of gensim, rather than something bolted on as an afterthought. +Memory-wise, gensim makes heavy use of Python's built-in generators and iterators for streamed data processing. Memory efficiency was one of gensim's `design goals `_, and is a central feature of gensim, rather than something bolted on as an afterthought. Documentation ------------- From 3c1490f9a497948bd10b91ad372d12c482248f34 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:16:03 +0800 Subject: [PATCH 03/14] Update link to gensim install instructions The install page now redirects to a section of the index page. Also update the link to https. Fixes: commit e17bcf2bd14be4f99e5efca03caaa6b17d70fa03 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a0809dc36..2ce753b429 100644 --- a/README.md +++ b/README.md @@ -175,4 +175,4 @@ BibTeX entry: [ATLAS]: http://math-atlas.sourceforge.net/ [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ [source tar.gz]: http://pypi.python.org/pypi/gensim - [documentation]: http://radimrehurek.com/gensim/install.html + [documentation]: https://radimrehurek.com/gensim/#install From 5397b0c9b92f194f8cc7a8dc0f63bc25af694abe Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:18:40 +0800 Subject: [PATCH 04/14] Add trailing slash to links to the website front page Avoids a redirect to URL including the slash. Also update URLs to https. --- CHANGELOG.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b78ff30f78..a6f51d63ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -290,7 +290,7 @@ Gensim 4.0 is a major release with lots of performance & robustness improvements * Dropped Python 2. Gensim 4.0 is Py3.6+. Read our [Python version support policy](https://github.com/RaRe-Technologies/gensim/wiki/Gensim-And-Compatibility). - If you still need Python 2 for some reason, stay at [Gensim 3.8.3](https://github.com/RaRe-Technologies/gensim/releases/tag/3.8.3). -* A new [Gensim website](https://radimrehurek.com/gensim) – finally! 🙃 +* A new [Gensim website](https://radimrehurek.com/gensim/) – finally! 🙃 So, a major clean-up release overall. We're happy with this **tighter, leaner and faster Gensim**. diff --git a/setup.py b/setup.py index caf2e2a2f2..ce44a1ff62 100644 --- a/setup.py +++ b/setup.py @@ -365,7 +365,7 @@ def run(self): author=u'Radim Rehurek', author_email='me@radimrehurek.com', - url='http://radimrehurek.com/gensim', + url='https://radimrehurek.com/gensim/', project_urls={ 'Source': 'https://github.com/RaRe-Technologies/gensim', }, From 8e1faa11c4e191896b8b7245afe0d382fb26b6f8 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:20:12 +0800 Subject: [PATCH 05/14] Update links to the website to use https This avoids a redirect and protects the content on the network. --- README.md | 6 +++--- docs/notebooks/Varembed.ipynb | 4 ++-- docs/notebooks/WordRank_wrapper_quickstart.ipynb | 2 +- docs/notebooks/atmodel_tutorial.ipynb | 2 +- docs/notebooks/deepir.ipynb | 2 +- docs/notebooks/distributed.md | 8 ++++---- docs/src/gallery/core/run_corpora_and_vector_spaces.py | 2 +- docs/src/gallery/tutorials/run_word2vec.py | 6 +++--- docs/src/intro.rst | 4 ++-- gensim/models/ldamulticore.py | 2 +- setup.py | 6 +++--- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 2ce753b429..accbe90091 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ package: For alternative modes of installation, see the [documentation]. -Gensim is being [continuously tested](http://radimrehurek.com/gensim/#testing) under all +Gensim is being [continuously tested](https://radimrehurek.com/gensim/#testing) under all [supported Python versions](https://github.com/RaRe-Technologies/gensim/wiki/Gensim-And-Compatibility). Support for Python 2.7 was dropped in gensim 4.0.0 – install gensim 3.8.3 if you must use Python 2.7. @@ -101,8 +101,8 @@ Documentation [QuickStart]: https://radimrehurek.com/gensim/auto_examples/core/run_core_concepts.html [Tutorials]: https://radimrehurek.com/gensim/auto_examples/ - [Official Documentation and Walkthrough]: http://radimrehurek.com/gensim/ - [Official API Documentation]: http://radimrehurek.com/gensim/apiref.html + [Official Documentation and Walkthrough]: https://radimrehurek.com/gensim/ + [Official API Documentation]: https://radimrehurek.com/gensim/apiref.html Support ------- diff --git a/docs/notebooks/Varembed.ipynb b/docs/notebooks/Varembed.ipynb index 300f73aeda..4aacf67a99 100644 --- a/docs/notebooks/Varembed.ipynb +++ b/docs/notebooks/Varembed.ipynb @@ -8,7 +8,7 @@ "\n", "Varembed is a word embedding model incorporating morphological information, capturing shared sub-word features. Unlike previous work that constructs word embeddings directly from morphemes, varembed combines morphological and distributional information in a unified probabilistic framework. Varembed thus yields improvements on intrinsic word similarity evaluations. Check out the original paper, [arXiv:1608.01056](https://arxiv.org/abs/1608.01056) accepted in [EMNLP 2016](http://www.emnlp2016.net/accepted-papers.html).\n", "\n", - "Varembed is now integrated into [Gensim](http://radimrehurek.com/gensim/) providing ability to load already trained varembed models into gensim with additional functionalities over word vectors already present in gensim.\n", + "Varembed is now integrated into [Gensim](https://radimrehurek.com/gensim/) providing ability to load already trained varembed models into gensim with additional functionalities over word vectors already present in gensim.\n", "\n", "# This Tutorial\n", "\n", @@ -118,7 +118,7 @@ "# Resources\n", "\n", "* [Varembed Source Code](https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings)\n", - "* [Gensim](http://radimrehurek.com/gensim/)\n", + "* [Gensim](https://radimrehurek.com/gensim/)\n", "* [Lee Corpus](https://github.com/RaRe-Technologies/gensim/blob/develop/gensim/test/test_data/lee.cor)\n" ] } diff --git a/docs/notebooks/WordRank_wrapper_quickstart.ipynb b/docs/notebooks/WordRank_wrapper_quickstart.ipynb index d027600429..6fcc0ba923 100644 --- a/docs/notebooks/WordRank_wrapper_quickstart.ipynb +++ b/docs/notebooks/WordRank_wrapper_quickstart.ipynb @@ -176,7 +176,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "These methods take an [optional parameter](http://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec.accuracy) restrict_vocab which limits which test examples are to be considered.\n", + "These methods take an [optional parameter](https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec.accuracy) restrict_vocab which limits which test examples are to be considered.\n", "\n", "The results here don't look good because the training corpus is very small. To get meaningful results one needs to train on 500k+ words.\n", "\n", diff --git a/docs/notebooks/atmodel_tutorial.ipynb b/docs/notebooks/atmodel_tutorial.ipynb index 54daece7da..6f8d7fa66a 100644 --- a/docs/notebooks/atmodel_tutorial.ipynb +++ b/docs/notebooks/atmodel_tutorial.ipynb @@ -15,7 +15,7 @@ "Naturally, familiarity with topic modelling, LDA and Gensim is assumed in this tutorial. If you are not familiar with either LDA, or its Gensim implementation, I would recommend starting there. Consider some of these resources:\n", "* Gentle introduction to the LDA model: http://blog.echen.me/2011/08/22/introduction-to-latent-dirichlet-allocation/\n", "* Gensim's LDA API documentation: https://radimrehurek.com/gensim/models/ldamodel.html\n", - "* Topic modelling in Gensim: http://radimrehurek.com/topic_modeling_tutorial/2%20-%20Topic%20Modeling.html\n", + "* Topic modelling in Gensim: https://radimrehurek.com/topic_modeling_tutorial/2%20-%20Topic%20Modeling.html\n", "* [Pre-processing and training LDA](lda_training_tips.ipynb)\n", "\n", "\n", diff --git a/docs/notebooks/deepir.ipynb b/docs/notebooks/deepir.ipynb index 1bdcff61eb..4ffacc1fc2 100644 --- a/docs/notebooks/deepir.ipynb +++ b/docs/notebooks/deepir.ipynb @@ -6,7 +6,7 @@ "source": [ "## Deep Inverse Regression with Yelp reviews\n", "\n", - "In this note we'll use [gensim](http://radimrehurek.com/gensim/) to turn the Word2Vec machinery into a document classifier, as in [Document Classification by Inversion of Distributed Language Representations](http://arxiv.org/pdf/1504.07295v3) from ACL 2015." + "In this note we'll use [gensim](https://radimrehurek.com/gensim/) to turn the Word2Vec machinery into a document classifier, as in [Document Classification by Inversion of Distributed Language Representations](http://arxiv.org/pdf/1504.07295v3) from ACL 2015." ] }, { diff --git a/docs/notebooks/distributed.md b/docs/notebooks/distributed.md index 2dff54eaf3..ff8ffa87e6 100644 --- a/docs/notebooks/distributed.md +++ b/docs/notebooks/distributed.md @@ -53,8 +53,8 @@ Available distributed algorithms [1]: http://en.wikipedia.org/wiki/Distributed_computing [2]: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms [3]: http://pypi.python.org/pypi/Pyro4 -[4]: http://radimrehurek.com/gensim/intro.html#design -[5]: http://radimrehurek.com/gensim/distributed.html#term-worker +[4]: https://radimrehurek.com/gensim/intro.html#design +[5]: https://radimrehurek.com/gensim/distributed.html#term-worker [6]: http://en.wikipedia.org/wiki/Broadcast_domain -[7]: http://radimrehurek.com/gensim/dist_lsi.html -[8]: http://radimrehurek.com/gensim/dist_lda.html +[7]: https://radimrehurek.com/gensim/dist_lsi.html +[8]: https://radimrehurek.com/gensim/dist_lda.html diff --git a/docs/src/gallery/core/run_corpora_and_vector_spaces.py b/docs/src/gallery/core/run_corpora_and_vector_spaces.py index d02e7d3418..0facecb5b8 100644 --- a/docs/src/gallery/core/run_corpora_and_vector_spaces.py +++ b/docs/src/gallery/core/run_corpora_and_vector_spaces.py @@ -270,7 +270,7 @@ def __iter__(self): # Compatibility with NumPy and SciPy # ---------------------------------- # -# Gensim also contains `efficient utility functions `_ +# Gensim also contains `efficient utility functions `_ # to help converting from/to numpy matrices import gensim diff --git a/docs/src/gallery/tutorials/run_word2vec.py b/docs/src/gallery/tutorials/run_word2vec.py index c5ef323bb2..80813775a1 100644 --- a/docs/src/gallery/tutorials/run_word2vec.py +++ b/docs/src/gallery/tutorials/run_word2vec.py @@ -126,7 +126,7 @@ # below. # # You may also check out an `online word2vec demo -# `_ where you can try +# `_ where you can try # this vector algebra for yourself. That demo runs ``word2vec`` on the # **entire** Google News dataset, of **about 100 billion words**. # @@ -309,7 +309,7 @@ def __iter__(self): # ------- # # ``workers`` , the last of the major parameters (full list `here -# `_) +# `_) # is for training parallelization, to speed up training: # @@ -372,7 +372,7 @@ def __iter__(self): ############################################################################### # # This ``evaluate_word_analogies`` method takes an `optional parameter -# `_ +# `_ # ``restrict_vocab`` which limits which test examples are to be considered. # diff --git a/docs/src/intro.rst b/docs/src/intro.rst index aac9966c5e..846189b256 100644 --- a/docs/src/intro.rst +++ b/docs/src/intro.rst @@ -104,9 +104,9 @@ Historically, Gensim started off as a collection of Python scripts for the Czech I (Radim) also wanted to try these fancy "Latent Semantic Methods", but the libraries that realized the necessary computation were `not much fun to work with `_. -Naturally, I set out to reinvent the wheel. Our `2010 LREC publication `_ describes the initial design decisions behind Gensim: **clarity, efficiency and scalability**. It is fairly representative of how Gensim works even today. +Naturally, I set out to reinvent the wheel. Our `2010 LREC publication `_ describes the initial design decisions behind Gensim: **clarity, efficiency and scalability**. It is fairly representative of how Gensim works even today. -Later versions of Gensim improved this efficiency and scalability tremendously. In fact, I made algorithmic scalability of distributional semantics the topic of my `PhD thesis `_. +Later versions of Gensim improved this efficiency and scalability tremendously. In fact, I made algorithmic scalability of distributional semantics the topic of my `PhD thesis `_. By now, Gensim is---to my knowledge---the most robust, efficient and hassle-free piece of software to realize unsupervised semantic modelling from plain text. It stands diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py index 7c1e1b50fa..a25252783f 100644 --- a/gensim/models/ldamulticore.py +++ b/gensim/models/ldamulticore.py @@ -17,7 +17,7 @@ * runs in **constant memory** w.r.t. the number of documents: size of the training corpus does not affect memory footprint, can process corpora larger than RAM -Wall-clock `performance on the English Wikipedia `_ (2G corpus positions, +Wall-clock `performance on the English Wikipedia `_ (2G corpus positions, 3.5M documents, 100K features, 0.54G non-zero entries in the final bag-of-words matrix), requesting 100 topics: diff --git a/setup.py b/setup.py index ce44a1ff62..c11bcc378a 100644 --- a/setup.py +++ b/setup.py @@ -213,9 +213,9 @@ def run(self): python setup.py install -For alternative modes of installation, see the `documentation `_. +For alternative modes of installation, see the `documentation `_. -Gensim is being `continuously tested `_ under all `supported Python versions `_. +Gensim is being `continuously tested `_ under all `supported Python versions `_. Support for Python 2.7 was dropped in gensim 4.0.0 – install gensim 3.8.3 if you must use Python 2.7. @@ -261,7 +261,7 @@ def run(self): .. |Analytics| image:: https://ga-beacon.appspot.com/UA-24066335-5/your-repo/page-name .. _Analytics: https://github.com/igrigorik/ga-beacon -.. _Official Documentation and Walkthrough: http://radimrehurek.com/gensim/ +.. _Official Documentation and Walkthrough: https://radimrehurek.com/gensim/ .. _Tutorials: https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#tutorials .. _Tutorial Videos: https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#videos .. _QuickStart: https://radimrehurek.com/gensim/gensim_numfocus/auto_examples/core/run_core_concepts.html From 84150621e8870fea3d2510b8f3a04bcd8b46dd4e Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:25:55 +0800 Subject: [PATCH 06/14] Update PyPI module links to pypi.org and https Avoids redirects and protects the network data. --- README.md | 2 +- docs/notebooks/distributed.md | 2 +- docs/src/_index.rst.unused | 2 +- docs/src/distributed.rst | 2 +- docs/src/intro.rst | 2 +- gensim/models/lsimodel.py | 2 +- gensim/nosy.py | 2 +- setup.py | 8 ++++---- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index accbe90091..75e374c8b7 100644 --- a/README.md +++ b/README.md @@ -174,5 +174,5 @@ BibTeX entry: [NumPy and Scipy]: http://www.scipy.org/Download [ATLAS]: http://math-atlas.sourceforge.net/ [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ - [source tar.gz]: http://pypi.python.org/pypi/gensim + [source tar.gz]: https://pypi.org/project/gensim/ [documentation]: https://radimrehurek.com/gensim/#install diff --git a/docs/notebooks/distributed.md b/docs/notebooks/distributed.md index ff8ffa87e6..316fb44a58 100644 --- a/docs/notebooks/distributed.md +++ b/docs/notebooks/distributed.md @@ -52,7 +52,7 @@ Available distributed algorithms [1]: http://en.wikipedia.org/wiki/Distributed_computing [2]: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms -[3]: http://pypi.python.org/pypi/Pyro4 +[3]: https://pypi.org/project/Pyro4/ [4]: https://radimrehurek.com/gensim/intro.html#design [5]: https://radimrehurek.com/gensim/distributed.html#term-worker [6]: http://en.wikipedia.org/wiki/Broadcast_domain diff --git a/docs/src/_index.rst.unused b/docs/src/_index.rst.unused index b9789f54a5..bd1ae4225a 100644 --- a/docs/src/_index.rst.unused +++ b/docs/src/_index.rst.unused @@ -57,7 +57,7 @@ The **principal design objectives** behind Gensim are: Availability ------------ -Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_ and can be downloaded either from its `github repository `_ or from the `Python Package Index `_. +Gensim is licensed under the OSI-approved `GNU LGPLv2.1 license `_ and can be downloaded either from its `github repository `_ or from the `Python Package Index `_. .. seealso:: diff --git a/docs/src/distributed.rst b/docs/src/distributed.rst index b79e097e08..eb2fc79b57 100644 --- a/docs/src/distributed.rst +++ b/docs/src/distributed.rst @@ -38,7 +38,7 @@ Prerequisites ----------------- For communication between nodes, `gensim` uses `Pyro (PYthon Remote Objects) -`_, version >= 4.27. This is a library for low-level socket communication +`_, version >= 4.27. This is a library for low-level socket communication and remote procedure calls (RPC) in Python. `Pyro4` is a pure-Python library, so its installation is quite painless and only involves copying its `*.py` files somewhere onto your Python's import path:: diff --git a/docs/src/intro.rst b/docs/src/intro.rst index 846189b256..738f8b38a0 100644 --- a/docs/src/intro.rst +++ b/docs/src/intro.rst @@ -53,7 +53,7 @@ To install gensim, simply run:: pip install --upgrade gensim Alternatively, you can download the source code from `Github `__ -or the `Python Package Index `_. +or the `Python Package Index `_. After installation, learn how to use Gensim from its :ref:`sphx_glr_auto_examples_core_run_core_concepts.py` tutorials. diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py index 5506faa5db..cce19b0b6b 100644 --- a/gensim/models/lsimodel.py +++ b/gensim/models/lsimodel.py @@ -179,7 +179,7 @@ def __init__( docs : {iterable of list of (int, float), scipy.sparse.csc} Corpus in BoW format or as sparse matrix. use_svdlibc : bool, optional - If True - will use `sparsesvd library `_, + If True - will use `sparsesvd library `_, otherwise - our own version will be used. power_iters: int, optional Number of power iteration steps to be used. Tune to improve accuracy. diff --git a/gensim/nosy.py b/gensim/nosy.py index 0606166449..10e4f120d6 100644 --- a/gensim/nosy.py +++ b/gensim/nosy.py @@ -8,7 +8,7 @@ arguments 'DEFAULTARGS'. The --with-color option needs the "rudolf" nose plugin. See: -http://pypi.python.org/pypi/rudolf/ +https://pypi.org/project/rudolf/ Originally by Jeff Winkler, http://jeffwinkler.net Forked from wkral http://github.com/wkral/Nosy diff --git a/setup.py b/setup.py index c11bcc378a..8a05ebc1cd 100644 --- a/setup.py +++ b/setup.py @@ -170,9 +170,9 @@ def run(self): .. |Wheel| image:: https://img.shields.io/pypi/wheel/gensim.svg .. _GA: https://github.com/RaRe-Technologies/gensim/actions -.. _Downloads: https://pypi.python.org/pypi/gensim +.. _Downloads: https://pypi.org/project/gensim/ .. _License: https://radimrehurek.com/gensim/intro.html#licensing -.. _Wheel: https://pypi.python.org/pypi/gensim +.. _Wheel: https://pypi.org/project/gensim/ Gensim is a Python library for *topic modelling*, *document indexing* and *similarity retrieval* with large corpora. Target audience is the *natural language processing* (NLP) and *information retrieval* (IR) community. @@ -208,7 +208,7 @@ def run(self): pip install --upgrade gensim -Or, if you have instead downloaded and unzipped the `source tar.gz `_ package:: +Or, if you have instead downloaded and unzipped the `source tar.gz `_ package:: python setup.py install @@ -369,7 +369,7 @@ def run(self): project_urls={ 'Source': 'https://github.com/RaRe-Technologies/gensim', }, - download_url='http://pypi.python.org/pypi/gensim', + download_url='https://pypi.org/project/gensim/', license='LGPL-2.1-only', From 0581678f4fc7da0718ae5d4028eaf1b697358bda Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:26:24 +0800 Subject: [PATCH 07/14] Update PyPI classifiers link to pypi.org, https and HTML --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8a05ebc1cd..f70876776c 100644 --- a/setup.py +++ b/setup.py @@ -382,7 +382,7 @@ def run(self): zip_safe=False, - classifiers=[ # from http://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ # from https://pypi.org/classifiers/ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: Science/Research', From b898dd05601b437711687945925b8879e109b1e1 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:36:27 +0800 Subject: [PATCH 08/14] Update links to RaRe Technologies website to https Avoids a redirect and protects the content. --- README.md | 4 ++-- docs/notebooks/WMD_tutorial.ipynb | 4 ++-- docs/notebooks/Word2Vec_FastText_Comparison.ipynb | 2 +- docs/notebooks/ldaseqmodel.ipynb | 4 ++-- docs/notebooks/soft_cosine_tutorial.ipynb | 2 +- docs/src/gallery/other/README.txt | 2 +- docs/src/gallery/tutorials/run_lda.py | 6 +++--- docs/src/gallery/tutorials/run_wmd.py | 2 +- docs/src/gallery/tutorials/run_word2vec.py | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 75e374c8b7..b8e73f92d1 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ Adopters | Company | Logo | Industry | Use of Gensim | |---------|------|----------|---------------| -| [RARE Technologies](http://rare-technologies.com) | ![rare](docs/src/readme_images/rare.png) | ML & NLP consulting | Creators of Gensim – this is us! | +| [RARE Technologies](https://rare-technologies.com/) | ![rare](docs/src/readme_images/rare.png) | ML & NLP consulting | Creators of Gensim – this is us! | | [Amazon](http://www.amazon.com/) | ![amazon](docs/src/readme_images/amazon.png) | Retail | Document similarity. | | [National Institutes of Health](https://github.com/NIHOPA/pipeline_word2vec) | ![nih](docs/src/readme_images/nih.png) | Health | Processing grants and publications with word2vec. | | [Cisco Security](http://www.cisco.com/c/en/us/products/security/index.html) | ![cisco](docs/src/readme_images/cisco.png) | Security | Large-scale fraud detection. | @@ -163,7 +163,7 @@ BibTeX entry: [citing gensim in academic papers and theses]: https://scholar.google.com/citations?view_op=view_citation&hl=en&user=9vG_kV0AAAAJ&citation_for_view=9vG_kV0AAAAJ:NaGl4SEjCO4C [design goals]: https://radimrehurek.com/gensim/intro.html#design-principles - [RaRe Technologies]: http://rare-technologies.com/wp-content/uploads/2016/02/rare_image_only.png%20=10x20 + [RaRe Technologies]: https://rare-technologies.com/wp-content/uploads/2016/02/rare_image_only.png%20=10x20 [rare\_tech]: //rare-technologies.com [Talentpair]: https://avatars3.githubusercontent.com/u/8418395?v=3&s=100 [citing gensim in academic papers and theses]: https://scholar.google.cz/citations?view_op=view_citation&hl=en&user=9vG_kV0AAAAJ&citation_for_view=9vG_kV0AAAAJ:u-x6o8ySG0sC diff --git a/docs/notebooks/WMD_tutorial.ipynb b/docs/notebooks/WMD_tutorial.ipynb index 9b051104d5..13ec7fa9ae 100644 --- a/docs/notebooks/WMD_tutorial.ipynb +++ b/docs/notebooks/WMD_tutorial.ipynb @@ -12,7 +12,7 @@ "\n", "## Word Mover's Distance basics\n", "\n", - "WMD is a method that allows us to assess the \"distance\" between two documents in a meaningful way, even when they have no words in common. It uses [word2vec](http://rare-technologies.com/word2vec-tutorial/) [4] vector embeddings of words. It been shown to outperform many of the state-of-the-art methods in *k*-nearest neighbors classification [3].\n", + "WMD is a method that allows us to assess the \"distance\" between two documents in a meaningful way, even when they have no words in common. It uses [word2vec](https://rare-technologies.com/word2vec-tutorial/) [4] vector embeddings of words. It been shown to outperform many of the state-of-the-art methods in *k*-nearest neighbors classification [3].\n", "\n", "WMD is illustrated below for two very similar sentences (illustration taken from [Vlad Niculae's blog](http://vene.ro/blog/word-movers-distance-in-python.html)). The sentences have no words in common, but by matching the relevant words, WMD is able to accurately measure the (dis)similarity between the two sentences. The method also uses the bag-of-words representation of the documents (simply put, the word's frequencies in the documents), noted as $d$ in the figure below. The intuition behind the method is that we find the minimum \"traveling distance\" between documents, in other words the most efficient way to \"move\" the distribution of document 1 to the distribution of document 2.\n", "\n", @@ -36,7 +36,7 @@ "\n", "## Part 1: Computing the Word Mover's Distance\n", "\n", - "To use WMD, we need some word embeddings first of all. You could train a word2vec (see tutorial [here](http://rare-technologies.com/word2vec-tutorial/)) model on some corpus, but we will start by downloading some pre-trained word2vec embeddings. Download the GoogleNews-vectors-negative300.bin.gz embeddings [here](https://code.google.com/archive/p/word2vec/) (warning: 1.5 GB, file is not needed for part 2). Training your own embeddings can be beneficial, but to simplify this tutorial, we will be using pre-trained embeddings at first.\n", + "To use WMD, we need some word embeddings first of all. You could train a word2vec (see tutorial [here](https://rare-technologies.com/word2vec-tutorial/)) model on some corpus, but we will start by downloading some pre-trained word2vec embeddings. Download the GoogleNews-vectors-negative300.bin.gz embeddings [here](https://code.google.com/archive/p/word2vec/) (warning: 1.5 GB, file is not needed for part 2). Training your own embeddings can be beneficial, but to simplify this tutorial, we will be using pre-trained embeddings at first.\n", "\n", "Let's take some sentences to compute the distance between." ] diff --git a/docs/notebooks/Word2Vec_FastText_Comparison.ipynb b/docs/notebooks/Word2Vec_FastText_Comparison.ipynb index 012441d913..bbb8f0c813 100644 --- a/docs/notebooks/Word2Vec_FastText_Comparison.ipynb +++ b/docs/notebooks/Word2Vec_FastText_Comparison.ipynb @@ -537,7 +537,7 @@ "3. In general, the performance of the models seems to get closer with the increasing corpus size. However, this might possibly be due to the size of the model staying constant at 100, and a larger model size for large corpora might result in higher performance gains.\n", "4. The semantic accuracy for all models increases significantly with the increase in corpus size.\n", "5. However, the increase in syntactic accuracy from the increase in corpus size for the n-gram FastText model is lower (in both relative and absolute terms). This could possibly indicate that advantages gained by incorporating morphological information could be less significant in case of larger corpus sizes (the corpuses used in the original paper seem to indicate this too)\n", - "6. Training times for gensim are slightly lower than the fastText no-ngram model, and significantly lower than the n-gram variant. This is quite impressive considering fastText is implemented in C++ and Gensim in Python (with calls to low-level BLAS routines for much of the heavy lifting). You could read [this post](http://rare-technologies.com/word2vec-in-python-part-two-optimizing/) for more details regarding word2vec optimisation in Gensim. Note that these times include importing any dependencies and serializing the models to disk, and not just the training times." + "6. Training times for gensim are slightly lower than the fastText no-ngram model, and significantly lower than the n-gram variant. This is quite impressive considering fastText is implemented in C++ and Gensim in Python (with calls to low-level BLAS routines for much of the heavy lifting). You could read [this post](https://rare-technologies.com/word2vec-in-python-part-two-optimizing/) for more details regarding word2vec optimisation in Gensim. Note that these times include importing any dependencies and serializing the models to disk, and not just the training times." ] }, { diff --git a/docs/notebooks/ldaseqmodel.ipynb b/docs/notebooks/ldaseqmodel.ipynb index acf5667e19..7cb902f479 100644 --- a/docs/notebooks/ldaseqmodel.ipynb +++ b/docs/notebooks/ldaseqmodel.ipynb @@ -45,7 +45,7 @@ "\n", "While most traditional topic mining algorithms do not expect time-tagged data or take into account any prior ordering, Dynamic Topic Models (DTM) leverages the knowledge of different documents belonging to a different time-slice in an attempt to map how the words in a topic change over time.\n", "\n", - "[This](http://rare-technologies.com/understanding-and-coding-dynamic-topic-models/) blog post is also useful in breaking down the ideas and theory behind DTM.\n", + "[This](https://rare-technologies.com/understanding-and-coding-dynamic-topic-models/) blog post is also useful in breaking down the ideas and theory behind DTM.\n", "\n" ] }, @@ -65,7 +65,7 @@ "\n", "There is some clarity on how they built their code now - Variational Inference using Kalman Filters, as described in section 3 of the paper. The mathematical basis for the code is well described in the appendix of the paper. If the documentation is lacking or not clear, comments via Issues or PRs via the gensim repo would be useful in improving the quality.\n", "\n", - "This project was part of the Google Summer of Code 2016 program: I have been regularly blogging about my progress with implementing this, which you can find [here](http://rare-technologies.com/author/bhargav/)." + "This project was part of the Google Summer of Code 2016 program: I have been regularly blogging about my progress with implementing this, which you can find [here](https://rare-technologies.com/author/bhargav/)." ] }, { diff --git a/docs/notebooks/soft_cosine_tutorial.ipynb b/docs/notebooks/soft_cosine_tutorial.ipynb index a8d1c41555..b109c27a51 100644 --- a/docs/notebooks/soft_cosine_tutorial.ipynb +++ b/docs/notebooks/soft_cosine_tutorial.ipynb @@ -53,7 +53,7 @@ "source": [ "## Part 1: Computing the Soft Cosine Measure\n", "\n", - "To use SCM, we need some word embeddings first of all. You could train a [word2vec][] (see tutorial [here](http://rare-technologies.com/word2vec-tutorial/)) model on some corpus, but we will use pre-trained word2vec embeddings.\n", + "To use SCM, we need some word embeddings first of all. You could train a [word2vec][] (see tutorial [here](https://rare-technologies.com/word2vec-tutorial/)) model on some corpus, but we will use pre-trained word2vec embeddings.\n", "\n", "[word2vec]: https://radimrehurek.com/gensim/models/word2vec.html\n", "\n", diff --git a/docs/src/gallery/other/README.txt b/docs/src/gallery/other/README.txt index 5b23b3c20f..91f24f8b11 100644 --- a/docs/src/gallery/other/README.txt +++ b/docs/src/gallery/other/README.txt @@ -6,7 +6,7 @@ Blog posts, tutorial videos, hackathons and other useful Gensim resources, from - *Use FastText or Word2Vec?* Comparison of embedding quality and performance. `Jupyter Notebook `__ - Multiword phrases extracted from *How I Met Your Mother*. `Blog post by Mark Needham `__ - *Using Gensim LDA for hierarchical document clustering*. `Jupyter notebook by Brandon Rose `__ -- *Evolution of Voldemort topic through the 7 Harry Potter books*. `Blog post `__ +- *Evolution of Voldemort topic through the 7 Harry Potter books*. `Blog post `__ - *Movie plots by genre*: Document classification using various techniques: TF-IDF, word2vec averaging, Deep IR, Word Movers Distance and doc2vec. `Github repo `__ - *Word2vec: Faster than Google? Optimization lessons in Python*, talk by Radim Řehůřek at PyData Berlin 2014. `Youtube video `__ - *Word2vec & friends*, talk by Radim Řehůřek at MLMU.cz 7.1.2015. `Youtube video `__ diff --git a/docs/src/gallery/tutorials/run_lda.py b/docs/src/gallery/tutorials/run_lda.py index 7ee6b07cd2..74956619a9 100644 --- a/docs/src/gallery/tutorials/run_lda.py +++ b/docs/src/gallery/tutorials/run_lda.py @@ -268,14 +268,14 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz' # Note that we use the "Umass" topic coherence measure here (see # :py:func:`gensim.models.ldamodel.LdaModel.top_topics`), Gensim has recently # obtained an implementation of the "AKSW" topic coherence measure (see -# accompanying blog post, http://rare-technologies.com/what-is-topic-coherence/). +# accompanying blog post, https://rare-technologies.com/what-is-topic-coherence/). # # If you are familiar with the subject of the articles in this dataset, you can # see that the topics below make a lot of sense. However, they are not without # flaws. We can see that there is substantial overlap between some topics, # others are hard to interpret, and most of them have at least some terms that # seem out of place. If you were able to do better, feel free to share your -# methods on the blog at http://rare-technologies.com/lda-training-tips/ ! +# methods on the blog at https://rare-technologies.com/lda-training-tips/ ! # top_topics = model.top_topics(corpus) @@ -299,7 +299,7 @@ def extract_documents(url='https://cs.nyu.edu/~roweis/data/nips12raw_str602.tgz' # Where to go from here # --------------------- # -# * Check out a RaRe blog post on the AKSW topic coherence measure (http://rare-technologies.com/what-is-topic-coherence/). +# * Check out a RaRe blog post on the AKSW topic coherence measure (https://rare-technologies.com/what-is-topic-coherence/). # * pyLDAvis (https://pyldavis.readthedocs.io/en/latest/index.html). # * Read some more Gensim tutorials (https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#tutorials). # * If you haven't already, read [1] and [2] (see references). diff --git a/docs/src/gallery/tutorials/run_wmd.py b/docs/src/gallery/tutorials/run_wmd.py index c037ef697b..3d64b0d2ea 100644 --- a/docs/src/gallery/tutorials/run_wmd.py +++ b/docs/src/gallery/tutorials/run_wmd.py @@ -17,7 +17,7 @@ # # WMD enables us to assess the "distance" between two documents in a meaningful # way even when they have no words in common. It uses `word2vec -# `_ [4] vector embeddings of +# `_ [4] vector embeddings of # words. It been shown to outperform many of the state-of-the-art methods in # k-nearest neighbors classification [3]. # diff --git a/docs/src/gallery/tutorials/run_word2vec.py b/docs/src/gallery/tutorials/run_word2vec.py index 80813775a1..c12f453ab6 100644 --- a/docs/src/gallery/tutorials/run_word2vec.py +++ b/docs/src/gallery/tutorials/run_word2vec.py @@ -322,7 +322,7 @@ def __iter__(self): # one core because of the `GIL # `_ (and ``word2vec`` # training will be `miserably slow -# `_\ ). +# `_\ ). # ############################################################################### From e50776e1b43ba4f425e7ee2bea0271f213a861fb Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:42:52 +0800 Subject: [PATCH 09/14] Link to the SciPy install page instead of download page The download page no longer exists. Also update the URL to https. --- README.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b8e73f92d1..3359aeca4e 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ BibTeX entry: [documentation and Jupyter Notebook tutorials]: https://github.com/RaRe-Technologies/gensim/#documentation [Vector Space Model]: http://en.wikipedia.org/wiki/Vector_space_model [unsupervised document analysis]: http://en.wikipedia.org/wiki/Latent_semantic_indexing - [NumPy and Scipy]: http://www.scipy.org/Download + [NumPy and Scipy]: https://scipy.org/install/ [ATLAS]: http://math-atlas.sourceforge.net/ [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ [source tar.gz]: https://pypi.org/project/gensim/ diff --git a/setup.py b/setup.py index f70876776c..236eb06c5a 100644 --- a/setup.py +++ b/setup.py @@ -199,7 +199,7 @@ def run(self): Installation ------------ -This software depends on `NumPy and Scipy `_, two Python packages for scientific computing. +This software depends on `NumPy and Scipy `_, two Python packages for scientific computing. You must have them installed prior to installing `gensim`. It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. From 1c9fc756f0a2422173e37d4276576d25f375e901 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:46:25 +0800 Subject: [PATCH 10/14] Update Wikipedia URLs to https --- README.md | 4 ++-- docs/notebooks/distributed.md | 6 +++--- docs/src/_index.rst.unused | 4 ++-- docs/src/dist_lsi.rst | 2 +- docs/src/distributed.rst | 6 +++--- docs/src/gallery/core/run_corpora_and_vector_spaces.py | 4 ++-- docs/src/gallery/core/run_similarity_queries.py | 4 ++-- docs/src/gallery/core/run_topics_and_transformations.py | 8 ++++---- docs/src/intro.rst | 4 ++-- docs/src/wiki.rst | 4 ++-- gensim/corpora/hashdictionary.py | 2 +- setup.py | 4 ++-- 12 files changed, 26 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 3359aeca4e..b59756f341 100644 --- a/README.md +++ b/README.md @@ -169,8 +169,8 @@ BibTeX entry: [citing gensim in academic papers and theses]: https://scholar.google.cz/citations?view_op=view_citation&hl=en&user=9vG_kV0AAAAJ&citation_for_view=9vG_kV0AAAAJ:u-x6o8ySG0sC [documentation and Jupyter Notebook tutorials]: https://github.com/RaRe-Technologies/gensim/#documentation - [Vector Space Model]: http://en.wikipedia.org/wiki/Vector_space_model - [unsupervised document analysis]: http://en.wikipedia.org/wiki/Latent_semantic_indexing + [Vector Space Model]: https://en.wikipedia.org/wiki/Vector_space_model + [unsupervised document analysis]: https://en.wikipedia.org/wiki/Latent_semantic_indexing [NumPy and Scipy]: https://scipy.org/install/ [ATLAS]: http://math-atlas.sourceforge.net/ [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ diff --git a/docs/notebooks/distributed.md b/docs/notebooks/distributed.md index 316fb44a58..4b4b7b2851 100644 --- a/docs/notebooks/distributed.md +++ b/docs/notebooks/distributed.md @@ -50,11 +50,11 @@ Available distributed algorithms * [Distributed Latent Dirichlet Allocation][8] -[1]: http://en.wikipedia.org/wiki/Distributed_computing -[2]: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms +[1]: https://en.wikipedia.org/wiki/Distributed_computing +[2]: https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms [3]: https://pypi.org/project/Pyro4/ [4]: https://radimrehurek.com/gensim/intro.html#design [5]: https://radimrehurek.com/gensim/distributed.html#term-worker -[6]: http://en.wikipedia.org/wiki/Broadcast_domain +[6]: https://en.wikipedia.org/wiki/Broadcast_domain [7]: https://radimrehurek.com/gensim/dist_lsi.html [8]: https://radimrehurek.com/gensim/dist_lda.html diff --git a/docs/src/_index.rst.unused b/docs/src/_index.rst.unused index bd1ae4225a..2ba6b2dac9 100644 --- a/docs/src/_index.rst.unused +++ b/docs/src/_index.rst.unused @@ -21,8 +21,8 @@ against other documents, words or phrases. .. note:: If the previous paragraphs left you confused, you can read more about the `Vector - Space Model `_ and `unsupervised - document analysis `_ on Wikipedia. + Space Model `_ and `unsupervised + document analysis `_ on Wikipedia. .. _design: diff --git a/docs/src/dist_lsi.rst b/docs/src/dist_lsi.rst index d7d6d40077..7500260f8a 100644 --- a/docs/src/dist_lsi.rst +++ b/docs/src/dist_lsi.rst @@ -57,7 +57,7 @@ ____________ So let's test our setup and run one computation of distributed LSA. Open a Python shell on one of the five machines (again, this can be done on any computer -in the same `broadcast domain `_, +in the same `broadcast domain `_, our choice is incidental) and try: .. sourcecode:: pycon diff --git a/docs/src/distributed.rst b/docs/src/distributed.rst index eb2fc79b57..4edc22b512 100644 --- a/docs/src/distributed.rst +++ b/docs/src/distributed.rst @@ -10,7 +10,7 @@ Why distributed computing? Need to build semantic representation of a corpus that is millions of documents large and it's taking forever? Have several idle machines at your disposal that you could use? -`Distributed computing `_ tries +`Distributed computing `_ tries to accelerate computations by splitting a given task into several smaller subtasks, passing them on to several computing nodes in parallel. @@ -23,7 +23,7 @@ much communication going on), so the network is allowed to be of relatively high The primary reason for using distributed computing is making things run faster. In `gensim`, most of the time consuming stuff is done inside low-level routines for linear algebra, inside NumPy, independent of any `gensim` code. - **Installing a fast** `BLAS (Basic Linear Algebra) `_ **library + **Installing a fast** `BLAS (Basic Linear Algebra) `_ **library for NumPy can improve performance up to 15 times!** So before you start buying those extra computers, consider installing a fast, threaded BLAS that is optimized for your particular machine (as opposed to a generic, binary-distributed library). @@ -71,7 +71,7 @@ inside `gensim` will try to look for and enslave all available worker nodes. Cluster Several nodes which communicate over TCP/IP. Currently, network broadcasting is used to discover and connect all communicating nodes, so the nodes must lie - within the same `broadcast domain `_. + within the same `broadcast domain `_. Worker A process which is created on each node. To remove a node from your cluster, diff --git a/docs/src/gallery/core/run_corpora_and_vector_spaces.py b/docs/src/gallery/core/run_corpora_and_vector_spaces.py index 0facecb5b8..74bd0341eb 100644 --- a/docs/src/gallery/core/run_corpora_and_vector_spaces.py +++ b/docs/src/gallery/core/run_corpora_and_vector_spaces.py @@ -72,10 +72,10 @@ # by the features extracted from it, not by its "surface" string form: how you get to # the features is up to you. Below I describe one common, general-purpose approach (called # :dfn:`bag-of-words`), but keep in mind that different application domains call for -# different features, and, as always, it's `garbage in, garbage out `_... +# different features, and, as always, it's `garbage in, garbage out `_... # # To convert documents to vectors, we'll use a document representation called -# `bag-of-words `_. In this representation, +# `bag-of-words `_. In this representation, # each document is represented by one vector where each vector element represents # a question-answer pair, in the style of: # diff --git a/docs/src/gallery/core/run_similarity_queries.py b/docs/src/gallery/core/run_similarity_queries.py index 1eb979cb20..d2d92082f9 100644 --- a/docs/src/gallery/core/run_similarity_queries.py +++ b/docs/src/gallery/core/run_similarity_queries.py @@ -96,10 +96,10 @@ print(vec_lsi) ############################################################################### -# In addition, we will be considering `cosine similarity `_ +# In addition, we will be considering `cosine similarity `_ # to determine the similarity of two vectors. Cosine similarity is a standard measure # in Vector Space Modeling, but wherever the vectors represent probability distributions, -# `different similarity measures `_ +# `different similarity measures `_ # may be more appropriate. # # Initializing query structures diff --git a/docs/src/gallery/core/run_topics_and_transformations.py b/docs/src/gallery/core/run_topics_and_transformations.py index 45888505e0..cb7c486775 100644 --- a/docs/src/gallery/core/run_topics_and_transformations.py +++ b/docs/src/gallery/core/run_topics_and_transformations.py @@ -130,7 +130,7 @@ corpus_lsi = lsi_model[corpus_tfidf] # create a double wrapper over the original corpus: bow->tfidf->fold-in-lsi ############################################################################### -# Here we transformed our Tf-Idf corpus via `Latent Semantic Indexing `_ +# Here we transformed our Tf-Idf corpus via `Latent Semantic Indexing `_ # into a latent 2-D space (2-D because we set ``num_topics=2``). Now you're probably wondering: what do these two latent # dimensions stand for? Let's inspect with :func:`models.LsiModel.print_topics`: @@ -175,7 +175,7 @@ # # Gensim implements several popular Vector Space Model algorithms: # -# * `Term Frequency * Inverse Document Frequency, Tf-Idf `_ +# * `Term Frequency * Inverse Document Frequency, Tf-Idf `_ # expects a bag-of-words (integer values) training corpus during initialization. # During transformation, it will take a vector and return another vector of the # same dimensionality, except that features which were rare in the training corpus @@ -202,7 +202,7 @@ # # model = models.OkapiBM25Model(corpus) # -# * `Latent Semantic Indexing, LSI (or sometimes LSA) `_ +# * `Latent Semantic Indexing, LSI (or sometimes LSA) `_ # transforms documents from either bag-of-words or (preferrably) TfIdf-weighted space into # a latent space of a lower dimensionality. For the toy corpus above we used only # 2 latent dimensions, but on real corpora, target dimensionality of 200--500 is recommended @@ -247,7 +247,7 @@ # # model = models.RpModel(tfidf_corpus, num_topics=500) # -# * `Latent Dirichlet Allocation, LDA `_ +# * `Latent Dirichlet Allocation, LDA `_ # is yet another transformation from bag-of-words counts into a topic space of lower # dimensionality. LDA is a probabilistic extension of LSA (also called multinomial PCA), # so LDA's topics can be interpreted as probability distributions over words. These distributions are, diff --git a/docs/src/intro.rst b/docs/src/intro.rst index 738f8b38a0..fa4a4e0e15 100644 --- a/docs/src/intro.rst +++ b/docs/src/intro.rst @@ -24,8 +24,8 @@ Once these statistical patterns are found, any plain text documents (sentence, p .. note:: If the previous paragraphs left you confused, you can read more about the `Vector - Space Model `_ and `unsupervised - document analysis `_ on Wikipedia. + Space Model `_ and `unsupervised + document analysis `_ on Wikipedia. .. _design: diff --git a/docs/src/wiki.rst b/docs/src/wiki.rst index 800e1b9c65..38ce51d099 100644 --- a/docs/src/wiki.rst +++ b/docs/src/wiki.rst @@ -221,7 +221,7 @@ into LDA topic distributions: in your list appear to be meta topics, concerning the administration and cleanup of Wikipedia. These show up because you didn't exclude templates such as these, some of which are included in most articles for quality - control: http://en.wikipedia.org/wiki/Wikipedia:Template_messages/Cleanup + control: https://en.wikipedia.org/wiki/Wikipedia:Template_messages/Cleanup The fourth and fifth topics clearly shows the influence of bots that import massive databases of cities, countries, etc. and their statistics such as @@ -232,7 +232,7 @@ into LDA topic distributions: So the top ten concepts are apparently dominated by Wikipedia robots and expanded templates; this is a good reminder that LSA is a powerful tool for data analysis, but no silver bullet. As always, it's `garbage in, garbage out - `_... + `_... By the way, improvements to the Wiki markup parsing code are welcome :-) .. [3] Hoffman, Blei, Bach. 2010. Online learning for Latent Dirichlet Allocation diff --git a/gensim/corpora/hashdictionary.py b/gensim/corpora/hashdictionary.py index 5241fd1725..ec605662b8 100644 --- a/gensim/corpora/hashdictionary.py +++ b/gensim/corpora/hashdictionary.py @@ -4,7 +4,7 @@ # Copyright (C) 2012 Homer Strong, Radim Rehurek # Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html -"""Implements the `"hashing trick" `_ -- a mapping between words +"""Implements the `"hashing trick" `_ -- a mapping between words and their integer ids using a fixed, static mapping (hash function). Notes diff --git a/setup.py b/setup.py index 236eb06c5a..b641800977 100644 --- a/setup.py +++ b/setup.py @@ -193,8 +193,8 @@ def run(self): If this feature list left you scratching your head, you can first read more about the `Vector -Space Model `_ and `unsupervised -document analysis `_ on Wikipedia. +Space Model `_ and `unsupervised +document analysis `_ on Wikipedia. Installation ------------ From 38ff69a98f12bca3489d928cb2c5390ece69c6b6 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:48:14 +0800 Subject: [PATCH 11/14] Update SourceForge URLs to https --- README.md | 2 +- docs/src/dist_lda.rst | 2 +- docs/src/gallery/core/run_corpora_and_vector_spaces.py | 2 +- gensim/corpora/lowcorpus.py | 4 ++-- setup.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b59756f341..739138ea68 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ BibTeX entry: [Vector Space Model]: https://en.wikipedia.org/wiki/Vector_space_model [unsupervised document analysis]: https://en.wikipedia.org/wiki/Latent_semantic_indexing [NumPy and Scipy]: https://scipy.org/install/ - [ATLAS]: http://math-atlas.sourceforge.net/ + [ATLAS]: https://math-atlas.sourceforge.net/ [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ [source tar.gz]: https://pypi.org/project/gensim/ [documentation]: https://radimrehurek.com/gensim/#install diff --git a/docs/src/dist_lda.rst b/docs/src/dist_lda.rst index a8d0cb9816..1c9e13f527 100644 --- a/docs/src/dist_lda.rst +++ b/docs/src/dist_lda.rst @@ -33,7 +33,7 @@ parameter In serial mode (no distribution), creating this online LDA :doc:`model of Wikipedia ` takes 10h56m on my laptop (OS X, C2D 2.53GHz, 4GB RAM with `libVec`). In distributed mode with four workers (Linux, Xeons of 2Ghz, 4GB RAM -with `ATLAS `_), the wallclock time taken drops to 3h20m. +with `ATLAS `_), the wallclock time taken drops to 3h20m. To run standard batch LDA (no online updates of mini-batches) instead, you would similarly call diff --git a/docs/src/gallery/core/run_corpora_and_vector_spaces.py b/docs/src/gallery/core/run_corpora_and_vector_spaces.py index 74bd0341eb..8206d63e04 100644 --- a/docs/src/gallery/core/run_corpora_and_vector_spaces.py +++ b/docs/src/gallery/core/run_corpora_and_vector_spaces.py @@ -223,7 +223,7 @@ def __iter__(self): ############################################################################### # Other formats include `Joachim's SVMlight format `_, # `Blei's LDA-C format `_ and -# `GibbsLDA++ format `_. +# `GibbsLDA++ format `_. corpora.SvmLightCorpus.serialize('/tmp/corpus.svmlight', corpus) corpora.BleiCorpus.serialize('/tmp/corpus.lda-c', corpus) diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py index 25e9515004..0e2b280578 100644 --- a/gensim/corpora/lowcorpus.py +++ b/gensim/corpora/lowcorpus.py @@ -4,7 +4,7 @@ # Copyright (C) 2010 Radim Rehurek # Licensed under the GNU LGPL v2.1 - https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html -"""Corpus in `GibbsLda++ format `_.""" +"""Corpus in `GibbsLda++ format `_.""" import logging from collections import Counter @@ -17,7 +17,7 @@ class LowCorpus(IndexedCorpus): - """Corpus handles input in `GibbsLda++ format `_. + """Corpus handles input in `GibbsLda++ format `_. **Format description** diff --git a/setup.py b/setup.py index b641800977..8f274f32f2 100644 --- a/setup.py +++ b/setup.py @@ -202,7 +202,7 @@ def run(self): This software depends on `NumPy and Scipy `_, two Python packages for scientific computing. You must have them installed prior to installing `gensim`. -It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. +It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. Install the latest version of gensim:: From 27f27b1d6697e955208b782de1e8c762bd8fd59c Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:51:57 +0800 Subject: [PATCH 12/14] Update GitHub links to https --- README.md | 2 +- gensim/nosy.py | 2 +- gensim/utils.py | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 739138ea68..db8b58960f 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,6 @@ BibTeX entry: [unsupervised document analysis]: https://en.wikipedia.org/wiki/Latent_semantic_indexing [NumPy and Scipy]: https://scipy.org/install/ [ATLAS]: https://math-atlas.sourceforge.net/ - [OpenBLAS]: http://xianyi.github.io/OpenBLAS/ + [OpenBLAS]: https://xianyi.github.io/OpenBLAS/ [source tar.gz]: https://pypi.org/project/gensim/ [documentation]: https://radimrehurek.com/gensim/#install diff --git a/gensim/nosy.py b/gensim/nosy.py index 10e4f120d6..bfeffabff6 100644 --- a/gensim/nosy.py +++ b/gensim/nosy.py @@ -11,7 +11,7 @@ https://pypi.org/project/rudolf/ Originally by Jeff Winkler, http://jeffwinkler.net -Forked from wkral http://github.com/wkral/Nosy +Forked from wkral https://github.com/wkral/Nosy """ import os diff --git a/gensim/utils.py b/gensim/utils.py index 92b0913d7c..755b6c7f0c 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1160,7 +1160,7 @@ def safe_unichr(intval): def decode_htmlentities(text): """Decode all HTML entities in text that are encoded as hex, decimal or named entities. Adapted from `python-twitter-ircbot/html_decode.py - `_. + `_. Parameters ---------- diff --git a/setup.py b/setup.py index 8f274f32f2..314ee87711 100644 --- a/setup.py +++ b/setup.py @@ -202,7 +202,7 @@ def run(self): This software depends on `NumPy and Scipy `_, two Python packages for scientific computing. You must have them installed prior to installing `gensim`. -It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. +It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. Install the latest version of gensim:: From c110730efe161b2a35bdd8bc6b95d22b8ade692a Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 13 Mar 2023 14:59:43 +0800 Subject: [PATCH 13/14] Update links to Google Groups Support users without JavaScript, avoid redirects, use https. --- CHANGELOG.md | 4 ++-- CONTRIBUTING.md | 2 +- HACKTOBERFEST.md | 2 +- ISSUE_TEMPLATE.md | 2 +- README.md | 4 ++-- docs/notebooks/Any2Vec_Filebased.ipynb | 2 +- docs/src/gallery/core/run_similarity_queries.py | 4 ++-- docs/src/gallery/howtos/run_doc2vec_imdb.py | 2 +- docs/src/sphinx_rtd_theme/layouthome.html | 4 ++-- docs/src/support.rst | 6 +++--- gensim/similarities/docsim.py | 2 +- 11 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6f51d63ba..585d3694e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -486,7 +486,7 @@ This is the direction we'll keep going forward: less kitchen-sink of "latest aca ### Why pre-release? -This 4.0.0beta pre-release is for users who want the **cutting edge performance and bug fixes**. Plus users who want to help out, by **testing and providing feedback**: code, documentation, workflows… Please let us know on the [mailing list](https://groups.google.com/forum/#!forum/gensim)! +This 4.0.0beta pre-release is for users who want the **cutting edge performance and bug fixes**. Plus users who want to help out, by **testing and providing feedback**: code, documentation, workflows… Please let us know on the [mailing list](https://groups.google.com/g/gensim)! Install the pre-release with: @@ -2557,7 +2557,7 @@ Tutorial and doc improvements: * transactional similarity server: see docs/simserver.html * website moved from university hosting to radimrehurek.com * much improved speed of lsi[corpus] transformation: -* accuracy tests of incremental svd: test/svd_error.py and http://groups.google.com/group/gensim/browse_thread/thread/4b605b72f8062770 +* accuracy tests of incremental svd: test/svd_error.py and https://groups.google.com/g/gensim/c/S2BbcvgGJ3A * further improvements to memory-efficiency of LDA and LSA * improved wiki preprocessing (thx to Luca de Alfaro) * model.print_topics() debug fncs now support std output, in addition to logging (thx to Homer Strong) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 09f2f5a870..9ace1bdcda 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ First, please see [contribution-guide.org](http://www.contribution-guide.org/) f Also, please check the [Gensim FAQ](https://github.com/RaRe-Technologies/gensim/wiki/Recipes-&-FAQ) page before posting. -**The proper place for open-ended questions is the [Gensim mailing list](https://groups.google.com/forum/#!forum/gensim).** Github is not the right place for research discussions or feature requests. +**The proper place for open-ended questions is the [Gensim mailing list](https://groups.google.com/g/gensim).** Github is not the right place for research discussions or feature requests. # How to add a new feature or create a pull request? diff --git a/HACKTOBERFEST.md b/HACKTOBERFEST.md index b60d1ffa41..4682f2f135 100644 --- a/HACKTOBERFEST.md +++ b/HACKTOBERFEST.md @@ -28,7 +28,7 @@ Check out the following: ## Questions -If you have a general question about Gensim, please ask on the [mailing list](https://groups.google.com/forum/#!forum/gensim). +If you have a general question about Gensim, please ask on the [mailing list](https://groups.google.com/g/gensim). If you have a question a about a specific issue or PR, just ask there directly, and we'll get back to you as soon as we can. Otherwise, ping @mpenkov on [Twitter](https://twitter.com/mpenkov) or [Telegram](https://t.me/mpenkov). diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md index 95121b30ae..91904dbb45 100644 --- a/ISSUE_TEMPLATE.md +++ b/ISSUE_TEMPLATE.md @@ -1,7 +1,7 @@