Merge remote-tracking branch 'upstream/develop' into Utopiah_patch-1

piskvorky · Aug 22, 2022 · a2c43ab · a2c43ab
2 parents 09b18d0 + 2350e92
commit a2c43ab
Show file tree

Hide file tree

Showing 11 changed files with 191 additions and 195 deletions.
diff --git a/docs/src/auto_examples/index.rst b/docs/src/auto_examples/index.rst
diff --git a/docs/src/gallery/README.txt b/docs/src/gallery/README.txt
@@ -1,5 +1,7 @@
 Documentation
 =============
 
+.. _gallery_top:
+
 We welcome contributions to our documentation via GitHub pull requests, whether it's fixing a typo or authoring an entirely new tutorial or guide.
 If you're thinking about contributing documentation, please see :ref:`sphx_glr_auto_examples_howtos_run_doc.py`.
diff --git a/docs/src/support.rst b/docs/src/support.rst
@@ -11,7 +11,7 @@ Open source support
 
 The main communication channel is the free `Gensim mailing list <https://groups.google.com/group/gensim>`_.
 
-This is the preferred way to ask for help, report problems and share insights with the community. Newbie questions are perfectly fine, as long as you've read the :ref:`tutorials <sphx_glr_auto_examples>` and `FAQ <https://github.com/RaRe-Technologies/gensim/wiki/Recipes-&-FAQ>`_.
+This is the preferred way to ask for help, report problems and share insights with the community. Newbie questions are perfectly fine, as long as you've read the :ref:`tutorials <gallery_top>` and `FAQ <https://github.com/RaRe-Technologies/gensim/wiki/Recipes-&-FAQ>`_.
 
 FAQ and some useful snippets of code are maintained on GitHub: https://github.com/RARE-Technologies/gensim/wiki/Recipes-&-FAQ.
 

diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
@@ -1137,7 +1137,8 @@ def __iter__(self):
 
 class TaggedLineDocument:
     def __init__(self, source):
-        """Iterate over a file that contains documents: one line = :class:`~gensim.models.doc2vec.TaggedDocument` object.
+        """Iterate over a file that contains documents:
+        one line = :class:`~gensim.models.doc2vec.TaggedDocument` object.
 
         Words are expected to be already preprocessed and separated by whitespace. Document tags are constructed
         automatically from the document line number (each document gets a unique integer tag).

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -517,7 +517,7 @@ def get_mean_vector(self, keys, weights=None, pre_normalize=True, post_normalize
             elif not ignore_missing:
                 raise KeyError(f"Key '{key}' not present in vocabulary")
 
-        if(total_weight > 0):
+        if total_weight > 0:
             mean = mean / total_weight
         if post_normalize:
             mean = matutils.unitvec(mean).astype(REAL)
@@ -1252,7 +1252,7 @@ def n_similarity(self, ws1, ws2):
             Similarities between `ws1` and `ws2`.
 
         """
-        if not(len(ws1) and len(ws2)):
+        if not (len(ws1) and len(ws2)):
             raise ZeroDivisionError('At least one of the passed list is empty.')
         mean1 = self.get_mean_vector(ws1, pre_normalize=False)
         mean2 = self.get_mean_vector(ws2, pre_normalize=False)

diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
@@ -314,7 +314,8 @@ def load(cls, fname, *args, **kwargs):
 
 
 class LdaModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
-    """Train and use Online Latent Dirichlet Allocation model as presented in `'Online Learning for LDA' by Hoffman et al.`_
+    """Train and use Online Latent Dirichlet Allocation model as presented in
+    `'Online Learning for LDA' by Hoffman et al.`_
 
     Examples
     -------

diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
@@ -4,7 +4,8 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 # Based on Copyright (C) 2016 Radim Rehurek <[email protected]>
 
-"""Lda Sequence model, inspired by `David M. Blei, John D. Lafferty: "Dynamic Topic Models"
+"""Lda Sequence model, inspired by
+`David M. Blei, John D. Lafferty: "Dynamic Topic Models"
 <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
 The original C/C++ implementation can be found on `blei-lab/dtm <https://github.com/blei-lab/dtm>`_.
 
@@ -744,7 +745,8 @@ def update_zeta(self):
         return self.zeta
 
     def compute_post_variance(self, word, chain_variance):
-        r"""Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
+        r"""Get the variance, based on the
+        `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
         <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
 
         This function accepts the word to compute variance for, along with the associated sslm class object,

diff --git a/gensim/similarities/fastss.pyx b/gensim/similarities/fastss.pyx
@@ -43,9 +43,11 @@ cdef extern from *:
         void * s1_data = PyUnicode_DATA(s1);
         void * s2_data = PyUnicode_DATA(s2);
 
-        for (WIDTH tmpi = 0; tmpi <= len_s1; tmpi++) row2[tmpi] = tmpi;
+        WIDTH tmpi;
+        for (tmpi = 0; tmpi <= len_s1; tmpi++) row2[tmpi] = tmpi;
 
-        for (WIDTH i2 = 0; i2 < len_s2; i2++) {
+        WIDTH i2;
+        for (i2 = 0; i2 < len_s2; i2++) {
             int all_bad = i2 >= maximum;
             const Py_UCS4 ch = PyUnicode_READ(kind2, s2_data, i2);
             row_flip = 1 - row_flip;
@@ -56,7 +58,8 @@ cdef extern from *:
             }
             *pos_new = i2 + 1;
 
-            for (WIDTH i1 = 0; i1 < len_s1; i1++) {
+            WIDTH i1;
+            for (i1 = 0; i1 < len_s1; i1++) {
                 WIDTH val = *(pos_old++);
                 if (ch != PyUnicode_READ(kind1, s1_data, i1)) {
                     const WIDTH _val1 = *pos_old;

diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
@@ -33,7 +33,7 @@
 def test_random_state():
     testcases = [np.random.seed(0), None, np.random.RandomState(0), 0]
     for testcase in testcases:
-        assert(isinstance(utils.get_random_state(testcase), np.random.RandomState))
+        assert isinstance(utils.get_random_state(testcase), np.random.RandomState)
 
 
 class TestLdaModel(unittest.TestCase, basetmtests.TestBaseTopicModel):

diff --git a/release/hijack_pr.py b/release/hijack_pr.py
@@ -18,54 +18,82 @@
 The above commands would check out the code for the PR, make changes to them, and push them back.
 Obviously, this requires the PR to be writable, but most gensim PRs are.
 If they aren't, then leave it up to the PR author to make the required changes.
+
+Sometimes, we'll make upstream changes that we want to merge into existing PRs.
+This is particularly useful when some nagging build problem is affecting multiple PRs.
+We can achieve this with:
+
+    $ release/hijack_pr.py merge-upstream-into 1234
+
+This hijacks the PR and merges upstream/develop into it.
 """
 import json
 import subprocess
 import sys
 
 import smart_open
 
+
 def check_output(command):
     return subprocess.check_output(command).strip().decode('utf-8')
 
 
-if sys.argv[1] == "push":
+def push():
     command = "git rev-parse --abbrev-ref HEAD@{upstream}".split()
     remote, remote_branch = check_output(command).split('/')
     current_branch = check_output(['git', 'branch', '--show-current'])
-    check_output(['git', 'push', remote, f'{current_branch}:{remote_branch}'])
+    subprocess.check_call(['git', 'push', remote, f'{current_branch}:{remote_branch}'])
 
     #
     # Cleanup to prevent remotes and branches from piling up
     #
-    check_output(['git', 'branch', '--delete', current_branch])
-    check_output(['git', 'remote', 'remove', remote])
-    sys.exit(0)
+    subprocess.check_call(['git', 'checkout', 'develop'])
+    subprocess.check_call(['git', 'branch', '--delete', current_branch])
+    subprocess.check_call(['git', 'remote', 'remove', remote])
+
+
+def hijack(prid):
+    url = f"https://api.github.com/repos/RaRe-Technologies/gensim/pulls/{prid}"
+    with smart_open.open(url) as fin:
+        prinfo = json.load(fin)
+
+    user = prinfo['head']['user']['login']
+    ssh_url = prinfo['head']['repo']['ssh_url']
 
-prid = int(sys.argv[1])
-url = f"https://api.github.com/repos/RaRe-Technologies/gensim/pulls/{prid}"
-with smart_open.open(url) as fin:
-    prinfo = json.load(fin)
+    remotes = check_output(['git', 'remote']).split('\n')
+    if user not in remotes:
+        subprocess.check_call(['git', 'remote', 'add', user, ssh_url])
 
-user = prinfo['head']['user']['login']
-ssh_url = prinfo['head']['repo']['ssh_url']
+    subprocess.check_call(['git', 'fetch', user])
 
-remotes = check_output(['git', 'remote']).split('\n')
-if user not in remotes:
-    subprocess.check_call(['git', 'remote', 'add', user, ssh_url])
+    ref = prinfo['head']['ref']
+    subprocess.check_call(['git', 'checkout', f'{user}/{ref}'])
+
+    #
+    # Prefix the local branch name with the user to avoid naming clashes with
+    # existing branches, e.g. develop
+    #
+    subprocess.check_call(['git', 'switch', '-c', f'{user}_{ref}'])
+
+    #
+    # Set the upstream so we can push back to it more easily
+    #
+    subprocess.check_call(['git', 'branch', '--set-upstream-to', f'{user}/{ref}'])
 
-subprocess.check_call(['git', 'fetch', user])
 
-ref = prinfo['head']['ref']
-subprocess.check_call(['git', 'checkout', f'{user}/{ref}'])
+def main():
+    if sys.argv[1] == "push":
+        push()
+    elif sys.argv[1] == 'merge-upstream-into':
+        prid = int(sys.argv[2])
+        hijack(prid)
+        subprocess.check_call(['git', 'fetch', 'upstream'])
+        subprocess.check_call(['git', 'merge', 'upstream/develop', '--no-edit'])
+        push()
+    else:
+        prid = int(sys.argv[1])
+        hijack(prid)
 
-#
-# Prefix the local branch name with the user to avoid naming clashes with
-# existing branches, e.g. develop
-#
-subprocess.check_call(['git', 'switch', '-c', f'{user}_{ref}'])
 
-#
-# Set the upstream so we can push back to it more easily
-#
-subprocess.check_call(['git', 'branch', '--set-upstream-to', f'{user}/{ref}'])
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
@@ -297,11 +297,17 @@ def run(self):
 #   https://packaging.python.org/discussions/install-requires-vs-requirements/
 #
 
+#
+# We pin the Sphinx-related packages to specific versions here because we want
+# our documentation builds to be reproducible.  Different versions of Sphinx
+# can generate slightly different output, and because we keep some of the output
+# under version control, we want to keep these differences to a minimum.
+#
 docs_testenv = core_testenv + distributed_env + visdom_req + [
-    'sphinx',
-    'sphinx-gallery',
-    'sphinxcontrib.programoutput',
-    'sphinxcontrib-napoleon',
+    'sphinx==5.1.1',
+    'sphinx-gallery==0.11.1',
+    'sphinxcontrib.programoutput==0.17',
+    'sphinxcontrib-napoleon==0.7',
     'matplotlib',  # expected by sphinx-gallery
     'memory_profiler',
     'annoy',