Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into Utopiah_patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
mpenkov committed Aug 22, 2022
2 parents 09b18d0 + 2350e92 commit a2c43ab
Show file tree
Hide file tree
Showing 11 changed files with 191 additions and 195 deletions.
261 changes: 107 additions & 154 deletions docs/src/auto_examples/index.rst

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docs/src/gallery/README.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
Documentation
=============

.. _gallery_top:

We welcome contributions to our documentation via GitHub pull requests, whether it's fixing a typo or authoring an entirely new tutorial or guide.
If you're thinking about contributing documentation, please see :ref:`sphx_glr_auto_examples_howtos_run_doc.py`.
2 changes: 1 addition & 1 deletion docs/src/support.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Open source support

The main communication channel is the free `Gensim mailing list <https://groups.google.com/group/gensim>`_.

This is the preferred way to ask for help, report problems and share insights with the community. Newbie questions are perfectly fine, as long as you've read the :ref:`tutorials <sphx_glr_auto_examples>` and `FAQ <https://github.com/RaRe-Technologies/gensim/wiki/Recipes-&-FAQ>`_.
This is the preferred way to ask for help, report problems and share insights with the community. Newbie questions are perfectly fine, as long as you've read the :ref:`tutorials <gallery_top>` and `FAQ <https://github.com/RaRe-Technologies/gensim/wiki/Recipes-&-FAQ>`_.

FAQ and some useful snippets of code are maintained on GitHub: https://github.com/RARE-Technologies/gensim/wiki/Recipes-&-FAQ.

Expand Down
3 changes: 2 additions & 1 deletion gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,8 @@ def __iter__(self):

class TaggedLineDocument:
def __init__(self, source):
"""Iterate over a file that contains documents: one line = :class:`~gensim.models.doc2vec.TaggedDocument` object.
"""Iterate over a file that contains documents:
one line = :class:`~gensim.models.doc2vec.TaggedDocument` object.
Words are expected to be already preprocessed and separated by whitespace. Document tags are constructed
automatically from the document line number (each document gets a unique integer tag).
Expand Down
4 changes: 2 additions & 2 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def get_mean_vector(self, keys, weights=None, pre_normalize=True, post_normalize
elif not ignore_missing:
raise KeyError(f"Key '{key}' not present in vocabulary")

if(total_weight > 0):
if total_weight > 0:
mean = mean / total_weight
if post_normalize:
mean = matutils.unitvec(mean).astype(REAL)
Expand Down Expand Up @@ -1252,7 +1252,7 @@ def n_similarity(self, ws1, ws2):
Similarities between `ws1` and `ws2`.
"""
if not(len(ws1) and len(ws2)):
if not (len(ws1) and len(ws2)):
raise ZeroDivisionError('At least one of the passed list is empty.')
mean1 = self.get_mean_vector(ws1, pre_normalize=False)
mean2 = self.get_mean_vector(ws2, pre_normalize=False)
Expand Down
3 changes: 2 additions & 1 deletion gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ def load(cls, fname, *args, **kwargs):


class LdaModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
"""Train and use Online Latent Dirichlet Allocation model as presented in `'Online Learning for LDA' by Hoffman et al.`_
"""Train and use Online Latent Dirichlet Allocation model as presented in
`'Online Learning for LDA' by Hoffman et al.`_
Examples
-------
Expand Down
6 changes: 4 additions & 2 deletions gensim/models/ldaseqmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
# Based on Copyright (C) 2016 Radim Rehurek <[email protected]>

"""Lda Sequence model, inspired by `David M. Blei, John D. Lafferty: "Dynamic Topic Models"
"""Lda Sequence model, inspired by
`David M. Blei, John D. Lafferty: "Dynamic Topic Models"
<https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
The original C/C++ implementation can be found on `blei-lab/dtm <https://github.com/blei-lab/dtm>`_.
Expand Down Expand Up @@ -744,7 +745,8 @@ def update_zeta(self):
return self.zeta

def compute_post_variance(self, word, chain_variance):
r"""Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
r"""Get the variance, based on the
`Variational Kalman Filtering approach for Approximate Inference (section 3.1)
<https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
This function accepts the word to compute variance for, along with the associated sslm class object,
Expand Down
9 changes: 6 additions & 3 deletions gensim/similarities/fastss.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ cdef extern from *:
void * s1_data = PyUnicode_DATA(s1);
void * s2_data = PyUnicode_DATA(s2);
for (WIDTH tmpi = 0; tmpi <= len_s1; tmpi++) row2[tmpi] = tmpi;
WIDTH tmpi;
for (tmpi = 0; tmpi <= len_s1; tmpi++) row2[tmpi] = tmpi;
for (WIDTH i2 = 0; i2 < len_s2; i2++) {
WIDTH i2;
for (i2 = 0; i2 < len_s2; i2++) {
int all_bad = i2 >= maximum;
const Py_UCS4 ch = PyUnicode_READ(kind2, s2_data, i2);
row_flip = 1 - row_flip;
Expand All @@ -56,7 +58,8 @@ cdef extern from *:
}
*pos_new = i2 + 1;
for (WIDTH i1 = 0; i1 < len_s1; i1++) {
WIDTH i1;
for (i1 = 0; i1 < len_s1; i1++) {
WIDTH val = *(pos_old++);
if (ch != PyUnicode_READ(kind1, s1_data, i1)) {
const WIDTH _val1 = *pos_old;
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
def test_random_state():
testcases = [np.random.seed(0), None, np.random.RandomState(0), 0]
for testcase in testcases:
assert(isinstance(utils.get_random_state(testcase), np.random.RandomState))
assert isinstance(utils.get_random_state(testcase), np.random.RandomState)


class TestLdaModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
Expand Down
80 changes: 54 additions & 26 deletions release/hijack_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,54 +18,82 @@
The above commands would check out the code for the PR, make changes to them, and push them back.
Obviously, this requires the PR to be writable, but most gensim PRs are.
If they aren't, then leave it up to the PR author to make the required changes.
Sometimes, we'll make upstream changes that we want to merge into existing PRs.
This is particularly useful when some nagging build problem is affecting multiple PRs.
We can achieve this with:
$ release/hijack_pr.py merge-upstream-into 1234
This hijacks the PR and merges upstream/develop into it.
"""
import json
import subprocess
import sys

import smart_open


def check_output(command):
return subprocess.check_output(command).strip().decode('utf-8')


if sys.argv[1] == "push":
def push():
command = "git rev-parse --abbrev-ref HEAD@{upstream}".split()
remote, remote_branch = check_output(command).split('/')
current_branch = check_output(['git', 'branch', '--show-current'])
check_output(['git', 'push', remote, f'{current_branch}:{remote_branch}'])
subprocess.check_call(['git', 'push', remote, f'{current_branch}:{remote_branch}'])

#
# Cleanup to prevent remotes and branches from piling up
#
check_output(['git', 'branch', '--delete', current_branch])
check_output(['git', 'remote', 'remove', remote])
sys.exit(0)
subprocess.check_call(['git', 'checkout', 'develop'])
subprocess.check_call(['git', 'branch', '--delete', current_branch])
subprocess.check_call(['git', 'remote', 'remove', remote])


def hijack(prid):
url = f"https://api.github.com/repos/RaRe-Technologies/gensim/pulls/{prid}"
with smart_open.open(url) as fin:
prinfo = json.load(fin)

user = prinfo['head']['user']['login']
ssh_url = prinfo['head']['repo']['ssh_url']

prid = int(sys.argv[1])
url = f"https://api.github.com/repos/RaRe-Technologies/gensim/pulls/{prid}"
with smart_open.open(url) as fin:
prinfo = json.load(fin)
remotes = check_output(['git', 'remote']).split('\n')
if user not in remotes:
subprocess.check_call(['git', 'remote', 'add', user, ssh_url])

user = prinfo['head']['user']['login']
ssh_url = prinfo['head']['repo']['ssh_url']
subprocess.check_call(['git', 'fetch', user])

remotes = check_output(['git', 'remote']).split('\n')
if user not in remotes:
subprocess.check_call(['git', 'remote', 'add', user, ssh_url])
ref = prinfo['head']['ref']
subprocess.check_call(['git', 'checkout', f'{user}/{ref}'])

#
# Prefix the local branch name with the user to avoid naming clashes with
# existing branches, e.g. develop
#
subprocess.check_call(['git', 'switch', '-c', f'{user}_{ref}'])

#
# Set the upstream so we can push back to it more easily
#
subprocess.check_call(['git', 'branch', '--set-upstream-to', f'{user}/{ref}'])

subprocess.check_call(['git', 'fetch', user])

ref = prinfo['head']['ref']
subprocess.check_call(['git', 'checkout', f'{user}/{ref}'])
def main():
if sys.argv[1] == "push":
push()
elif sys.argv[1] == 'merge-upstream-into':
prid = int(sys.argv[2])
hijack(prid)
subprocess.check_call(['git', 'fetch', 'upstream'])
subprocess.check_call(['git', 'merge', 'upstream/develop', '--no-edit'])
push()
else:
prid = int(sys.argv[1])
hijack(prid)

#
# Prefix the local branch name with the user to avoid naming clashes with
# existing branches, e.g. develop
#
subprocess.check_call(['git', 'switch', '-c', f'{user}_{ref}'])

#
# Set the upstream so we can push back to it more easily
#
subprocess.check_call(['git', 'branch', '--set-upstream-to', f'{user}/{ref}'])
if __name__ == '__main__':
main()
14 changes: 10 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,17 @@ def run(self):
# https://packaging.python.org/discussions/install-requires-vs-requirements/
#

#
# We pin the Sphinx-related packages to specific versions here because we want
# our documentation builds to be reproducible. Different versions of Sphinx
# can generate slightly different output, and because we keep some of the output
# under version control, we want to keep these differences to a minimum.
#
docs_testenv = core_testenv + distributed_env + visdom_req + [
'sphinx',
'sphinx-gallery',
'sphinxcontrib.programoutput',
'sphinxcontrib-napoleon',
'sphinx==5.1.1',
'sphinx-gallery==0.11.1',
'sphinxcontrib.programoutput==0.17',
'sphinxcontrib-napoleon==0.7',
'matplotlib', # expected by sphinx-gallery
'memory_profiler',
'annoy',
Expand Down

0 comments on commit a2c43ab

Please sign in to comment.