Fix flake8 warnings W605

piskvorky · Nov 6, 2018 · da0db8c · da0db8c
1 parent 7e4965e
commit da0db8c
Show file tree

Hide file tree

Showing 14 changed files with 55 additions and 55 deletions.
diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py
@@ -81,7 +81,7 @@
 """Capture interlinks text and article linked"""
 RE_P17 = re.compile(
     r'(\n.{0,4}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=)|(scope=))(.*))|'
-    '(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
+    r'(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
     re.UNICODE
 )
 """Table markup"""
@@ -143,8 +143,8 @@ def filter_example(elem, text, *args, **kwargs):
     # regex is in the function call so that we do not pollute the wikicorpus
     # namespace do not do this in production as this function is called for
     # every element in the wiki dump
-    _regex_de_excellent = re.compile('.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
-    _regex_de_featured = re.compile('.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_excellent = re.compile(r'.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_featured = re.compile(r'.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
 
     if text is None:
         return False

diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -376,14 +376,14 @@ def extend_corpus(self, corpus):
             self.corpus.extend(corpus)
 
     def compute_phinorm(self, expElogthetad, expElogbetad):
-        """Efficiently computes the normalizing factor in phi.
+        r"""Efficiently computes the normalizing factor in phi.
 
         Parameters
         ----------
         expElogthetad: numpy.ndarray
             Value of variational distribution :math:`q(\theta|\gamma)`.
         expElogbetad: numpy.ndarray
-            Value of variational distribution :math:`q(\\beta|\lambda)`.
+            Value of variational distribution :math:`q(\beta|\lambda)`.
 
         Returns
         -------
@@ -888,7 +888,7 @@ def rho():
                 del other
 
     def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, doc2author=None):
-        """Estimate the variational bound of documents from `corpus`.
+        r"""Estimate the variational bound of documents from `corpus`.
 
         :math:`\mathbb{E_{q}}[\log p(corpus)] - \mathbb{E_{q}}[\log q(corpus)]`
 

diff --git a/gensim/models/base_any2vec.py b/gensim/models/base_any2vec.py
@@ -5,7 +5,7 @@
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
-"""This module contains base classes required for implementing \*2vec algorithms.
+r"""This module contains base classes required for implementing \*2vec algorithms.
 
 The class hierarchy is designed to facilitate adding more concrete implementations for creating embeddings.
 In the most general case, the purpose of this class is to transform an arbitrary representation to a numerical vector
@@ -56,7 +56,7 @@
 
 
 class BaseAny2VecModel(utils.SaveLoad):
-    """Base class for training, using and evaluating \*2vec model.
+    r"""Base class for training, using and evaluating \*2vec model.
 
     Contains implementation for multi-threaded training. The purpose of this class is to provide a
     reference interface for concrete embedding implementations, whether the input space is a corpus

diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
@@ -72,7 +72,7 @@
 
 
 def expect_log_sticks(sticks):
-    """For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
+    r"""For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
 
     Parameters
     ----------
@@ -97,7 +97,7 @@ def expect_log_sticks(sticks):
 
 
 def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
-    """Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
+    r"""Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
 
     Parameters
     ----------
@@ -115,7 +115,7 @@ def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
     Returns
     -------
     (numpy.ndarray, numpy.ndarray)
-        Computed (:math:`likelihood`, :math:`\\gamma`).
+        Computed (:math:`likelihood`, :math:`\gamma`).
 
     """
     gamma = np.ones(len(alpha))
@@ -172,7 +172,7 @@ def set_zero(self):
 
 
 class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
-    """`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
+    r"""`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
 
     Topic models promise to help summarize and organize large archives of texts that cannot be easily analyzed by hand.
     Hierarchical Dirichlet process (HDP) is a powerful mixed-membership model for the unsupervised analysis of grouped
@@ -194,7 +194,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     For this assume that there is a restaurant franchise (`corpus`) which has a large number of restaurants
     (`documents`, `j`) under it. They have a global menu of dishes (`topics`, :math:`\Phi_{k}`) which they serve.
     Also, a single dish (`topic`, :math:`\Phi_{k}`) is only served at a single table `t` for all the customers
-    (`words`, :math:`\\theta_{j,i}`) who sit at that table.
+    (`words`, :math:`\theta_{j,i}`) who sit at that table.
     So, when a customer enters the restaurant he/she has the choice to make where he/she wants to sit.
     He/she can choose to sit at a table where some customers are already sitting , or he/she can choose to sit
     at a new table. Here the probability of choosing each option is not same.
@@ -213,31 +213,31 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     share the same set of atoms, :math:`\Phi_{k}`, and only the atom weights :math:`\pi _{jt}` differs.
 
     There will be multiple document-level atoms :math:`\psi_{jt}` which map to the same corpus-level atom
-    :math:`\Phi_{k}`. Here, the :math:`\\beta` signify the weights given to each of the topics globally. Also, each
-    factor :math:`\\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
+    :math:`\Phi_{k}`. Here, the :math:`\beta` signify the weights given to each of the topics globally. Also, each
+    factor :math:`\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
     :math:`\Phi_{k}` with probability :math:`\pi _{jt}`. :math:`C_{j,t}` is an indicator variable whose value `k`
     signifies the index of :math:`\Phi`. This helps to map :math:`\psi_{jt}` to :math:`\Phi_{k}`.
 
-    The top level (`corpus` level) stick proportions correspond the values of :math:`\\beta`,
+    The top level (`corpus` level) stick proportions correspond the values of :math:`\beta`,
     bottom level (`document` level) stick proportions correspond to the values of :math:`\pi`.
-    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\\beta`
+    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\beta`
     and :math:`\pi` which are in existence.
 
     Now, whenever coordinate ascent updates are to be performed, they happen at two level. The document level as well
     as corpus level.
 
     At document level, we update the following:
 
-    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\\beta` distribution of the
+    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\beta` distribution of the
        variable :math:`\pi _{jt}`.
     #. The parameters to per word topic indicators, :math:`Z_{j,n}`. Here :math:`Z_{j,n}` selects topic parameter
        :math:`\psi_{jt}`.
     #. The parameters to per document topic indices :math:`\Phi_{jtk}`.
 
     At corpus level, we update the following:
 
-    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\\beta` distribution for the
-       corpus level :math:`\\beta`, which signify the topic distribution at corpus level.
+    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\beta` distribution for the
+       corpus level :math:`\beta`, which signify the topic distribution at corpus level.
     #. The parameters to the topics :math:`\Phi_{k}`.
 
     Now coming on to the steps involved, procedure for online variational inference for the Hdp model is as follows:
@@ -261,14 +261,14 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     Attributes
     ----------
     lda_alpha : numpy.ndarray
-        Same as :math:`\\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
     lda_beta : numpy.ndarray
-        Same as :math:`\\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
     m_D : int
         Number of documents in the corpus.
     m_Elogbeta : numpy.ndarray:
-        Stores value of dirichlet expectation, i.e., compute :math:`E[log \\theta]` for a vector
-        :math:`\\theta \sim Dir(\\alpha)`.
+        Stores value of dirichlet expectation, i.e., compute :math:`E[log \theta]` for a vector
+        :math:`\theta \sim Dir(\alpha)`.
     m_lambda : {numpy.ndarray, float}
         Drawn samples from the parameterized gamma distribution.
     m_lambda_sum : {numpy.ndarray, float}
@@ -280,7 +280,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     m_rhot : float
         Assigns weight to the information obtained from the mini-chunk and its value it between 0 and 1.
     m_status_up_to_date : bool
-        Flag to indicate whether `lambda `and :math:`E[log \\theta]` have been updated if True, otherwise - not.
+        Flag to indicate whether `lambda `and :math:`E[log \theta]` have been updated if True, otherwise - not.
     m_timestamp : numpy.ndarray
         Helps to keep track and perform lazy updates on lambda.
     m_updatect : int

diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
@@ -741,7 +741,7 @@ def update_zeta(self):
         return self.zeta
 
     def compute_post_variance(self, word, chain_variance):
-        """Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
+        r"""Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
         <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
 
         This function accepts the word to compute variance for, along with the associated sslm class object,

diff --git a/gensim/models/logentropy_model.py b/gensim/models/logentropy_model.py
@@ -25,7 +25,7 @@
 
 
 class LogEntropyModel(interfaces.TransformationABC):
-    """Objects of this class realize the transformation between word-document co-occurrence matrix (int)
+    r"""Objects of this class realize the transformation between word-document co-occurrence matrix (int)
     into a locally/globally weighted matrix (positive floats).
 
     This is done by a log entropy normalization, optionally normalizing the resulting documents to unit length.
@@ -35,9 +35,9 @@ class LogEntropyModel(interfaces.TransformationABC):
 
         local\_weight_{i,j} = log(frequency_{i,j} + 1)
 
-        P_{i,j} = \\frac{frequency_{i,j}}{\sum_j frequency_{i,j}}
+        P_{i,j} = \frac{frequency_{i,j}}{\sum_j frequency_{i,j}}
 
-        global\_weight_i = 1 + \\frac{\sum_j P_{i,j} * log(P_{i,j})}{log(number\_of\_documents + 1)}
+        global\_weight_i = 1 + \frac{\sum_j P_{i,j} * log(P_{i,j})}{log(number\_of\_documents + 1)}
 
         final\_weight_{i,j} = local\_weight_{i,j} * global\_weight_i
 

diff --git a/gensim/models/normmodel.py b/gensim/models/normmodel.py
@@ -15,15 +15,15 @@ class NormModel(interfaces.TransformationABC):
     """Objects of this class realize the explicit normalization of vectors (l1 and l2)."""
 
     def __init__(self, corpus=None, norm='l2'):
-        """Compute the l1 or l2 normalization by normalizing separately for each document in a corpus.
+        r"""Compute the l1 or l2 normalization by normalizing separately for each document in a corpus.
 
         If :math:`v_{i,j}` is the 'i'th component of the vector representing document 'j', the l1 normalization is
 
-        .. math:: l1_{i, j} = \\frac{v_{i,j}}{\sum_k |v_{k,j}|}
+        .. math:: l1_{i, j} = \frac{v_{i,j}}{\sum_k |v_{k,j}|}
 
         the l2 normalization is
 
-        .. math:: l2_{i, j} = \\frac{v_{i,j}}{\sqrt{\sum_k v_{k,j}^2}}
+        .. math:: l2_{i, j} = \frac{v_{i,j}}{\sqrt{\sum_k v_{k,j}^2}}
 
 
         Parameters

diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
@@ -658,7 +658,7 @@ def __getitem__(self, sentence):
 
 
 def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    """Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
+    r"""Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
     of Words and Phrases and their Compositionality" <https://arxiv.org/abs/1310.4546>`_.
 
     Parameters
@@ -678,14 +678,14 @@ def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count
 
     Notes
     -----
-    Formula: :math:`\\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
+    Formula: :math:`\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
 
     """
     return (bigram_count - min_count) / worda_count / wordb_count * len_vocab
 
 
 def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    """Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
+    r"""Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
     by Gerlof Bouma <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_.
 
     Parameters
@@ -705,8 +705,8 @@ def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, co
 
     Notes
     -----
-    Formula: :math:`\\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
-    where :math:`prob(word) = \\frac{word\_count}{corpus\_word\_count}`
+    Formula: :math:`\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
+    where :math:`prob(word) = \frac{word\_count}{corpus\_word\_count}`
 
     """
     if bigram_count >= min_count:

diff --git a/gensim/models/tfidfmodel.py b/gensim/models/tfidfmodel.py
@@ -83,8 +83,8 @@ def resolve_weights(smartirs):
 
 
 def df2idf(docfreq, totaldocs, log_base=2.0, add=0.0):
-    """Compute inverse-document-frequency for a term with the given document frequency `docfreq`:
-    :math:`idf = add + log_{log\_base} \\frac{totaldocs}{docfreq}`
+    r"""Compute inverse-document-frequency for a term with the given document frequency `docfreq`:
+    :math:`idf = add + log_{log\_base} \frac{totaldocs}{docfreq}`
 
     Parameters
     ----------
@@ -239,11 +239,11 @@ class TfidfModel(interfaces.TransformationABC):
     """
     def __init__(self, corpus=None, id2word=None, dictionary=None, wlocal=utils.identity,
                  wglobal=df2idf, normalize=True, smartirs=None, pivot=None, slope=0.65):
-        """Compute TF-IDF by multiplying a local component (term frequency) with a global component
+        r"""Compute TF-IDF by multiplying a local component (term frequency) with a global component
         (inverse document frequency), and normalizing the resulting documents to unit length.
         Formula for non-normalized weight of term :math:`i` in document :math:`j` in a corpus of :math:`D` documents
 
-        .. math:: weight_{i,j} = frequency_{i,j} * log_2 \\frac{D}{document\_freq_{i}}
+        .. math:: weight_{i,j} = frequency_{i,j} * log_2 \frac{D}{document\_freq_{i}}
 
         or, more generally
 

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -5,7 +5,7 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
 
-"""Python wrapper for `Latent Dirichlet Allocation (LDA) <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
+r"""Python wrapper for `Latent Dirichlet Allocation (LDA) <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
 from `MALLET, the Java topic modelling toolkit <http://mallet.cs.umass.edu/>`_
 
 This module allows both LDA model estimation from a training corpus and inference of topic distribution on new,
@@ -250,7 +250,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
         cmd = \
             self.mallet_path + \
             " import-file --preserve-case --keep-sequence " \
-            "--remove-stopwords --token-regex \"\S+\" --input %s --output %s"
+            "--remove-stopwords --token-regex \"\\S+\" --input %s --output %s"
         if infer:
             cmd += ' --use-pipe-from ' + self.fcorpusmallet()
             cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet() + '.infer')

diff --git a/gensim/test/test_corpora.py b/gensim/test/test_corpora.py
@@ -786,7 +786,7 @@ def test_two_level_directory(self):
 
     def test_filename_filtering(self):
         dirpath = self.write_one_level('test1.log', 'test1.txt', 'test2.log', 'other1.log')
-        corpus = textcorpus.TextDirectoryCorpus(dirpath, pattern="test.*\.log")
+        corpus = textcorpus.TextDirectoryCorpus(dirpath, pattern=r"test.*\.log")
         filenames = list(corpus.iter_filepaths())
         expected = [os.path.join(dirpath, name) for name in ('test1.log', 'test2.log')]
         self.assertEqual(sorted(expected), sorted(filenames))

diff --git a/gensim/test/test_matutils.py b/gensim/test/test_matutils.py
@@ -60,7 +60,7 @@ def mean_absolute_difference(a, b):
 
 
 def dirichlet_expectation(alpha):
-    """For a vector :math:`\\theta \sim Dir(\\alpha)`, compute :math:`E[log \\theta]`.
+    r"""For a vector :math:`\theta \sim Dir(\alpha)`, compute :math:`E[log \theta]`.
 
     Parameters
     ----------
@@ -70,7 +70,7 @@ def dirichlet_expectation(alpha):
     Returns
     -------
     numpy.ndarray:
-        :math:`E[log \\theta]`
+        :math:`E[log \theta]`
 
     """
     if len(alpha.shape) == 1:

diff --git a/gensim/topic_coherence/direct_confirmation_measure.py b/gensim/topic_coherence/direct_confirmation_measure.py
@@ -17,8 +17,8 @@
 
 
 def log_conditional_probability(segmented_topics, accumulator, with_std=False, with_support=False):
-    """Calculate the log-conditional-probability measure which is used by coherence measures such as `U_mass`.
-    This is defined as :math:`m_{lc}(S_i) = log \\frac{P(W', W^{*}) + \epsilon}{P(W^{*})}`.
+    r"""Calculate the log-conditional-probability measure which is used by coherence measures such as `U_mass`.
+    This is defined as :math:`m_{lc}(S_i) = log \frac{P(W', W^{*}) + \epsilon}{P(W^{*})}`.
 
     Parameters
     ----------
@@ -124,7 +124,7 @@ def aggregate_segment_sims(segment_sims, with_std, with_support):
 
 
 def log_ratio_measure(segmented_topics, accumulator, normalize=False, with_std=False, with_support=False):
-    """Compute log ratio measure for `segment_topics`.
+    r"""Compute log ratio measure for `segment_topics`.
 
     Parameters
     ----------
@@ -146,12 +146,12 @@ def log_ratio_measure(segmented_topics, accumulator, normalize=False, with_std=F
     -----
     If `normalize=False`:
         Calculate the log-ratio-measure, popularly known as **PMI** which is used by coherence measures such as `c_v`.
-        This is defined as :math:`m_{lr}(S_i) = log \\frac{P(W', W^{*}) + \epsilon}{P(W') * P(W^{*})}`
+        This is defined as :math:`m_{lr}(S_i) = log \frac{P(W', W^{*}) + \epsilon}{P(W') * P(W^{*})}`
 
     If `normalize=True`:
         Calculate the normalized-log-ratio-measure, popularly knowns as **NPMI**
         which is used by coherence measures such as `c_v`.
-        This is defined as :math:`m_{nlr}(S_i) = \\frac{m_{lr}(S_i)}{-log(P(W', W^{*}) + \epsilon)}`
+        This is defined as :math:`m_{nlr}(S_i) = \frac{m_{lr}(S_i)}{-log(P(W', W^{*}) + \epsilon)}`
 
     Returns
     -------

diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py
@@ -12,7 +12,7 @@
 
 
 def s_one_pre(topics):
-    """Performs segmentation on a list of topics.
+    r"""Performs segmentation on a list of topics.
 
     Notes
     -----
@@ -54,9 +54,9 @@ def s_one_pre(topics):
 
 
 def s_one_one(topics):
-    """Perform segmentation on a list of topics.
+    r"""Perform segmentation on a list of topics.
     Segmentation is defined as
-    :math:`s_{one} = {(W', W^{*}) | W' = {w_i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i \\neq j}`.
+    :math:`s_{one} = {(W', W^{*}) | W' = {w_i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i \neq j}`.
 
     Parameters
     ----------
@@ -96,7 +96,7 @@ def s_one_one(topics):
 
 
 def s_one_set(topics):
-    """Perform s_one_set segmentation on a list of topics.
+    r"""Perform s_one_set segmentation on a list of topics.
     Segmentation is defined as
     :math:`s_{set} = {(W', W^{*}) | W' = {w_i}; w_{i} \in W; W^{*} = W}`