piskvorky · tmylk · May 2, 2017 · Apr 29, 2017 · Apr 29, 2017 · May 1, 2017
diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py
@@ -264,10 +264,11 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp
                 logger.info('Deriving dataset dimension from corpus: '
                              '{0}'.format(proposed_dim))
             else:
-                logger.warn('Dataset dimension derived from input corpus diffe'
-                             'rs from initialization argument, using corpus.'
-                             '(corpus {0}, init arg {1})'.format(proposed_dim,
-                                                                 self.dim))
+                logger.warning(
+                    'Dataset dimension derived from input corpus diffe'
+                    'rs from initialization argument, using corpus.'
+                    '(corpus {0}, init arg {1})'.format(proposed_dim, self.dim)
+                )
 
         self.dim = proposed_dim
         self.offsets = [0]
@@ -311,9 +312,11 @@ def init_by_clone(self):
             if self.dim is None:
                 logger.info('Loaded dataset dimension: {0}'.format(temp.dim))
             else:
-                logger.warn('Loaded dataset dimension differs from init arg '
-                             'dimension, using loaded dim. '
-                             '(loaded {0}, init {1})'.format(temp.dim, self.dim))
+                logger.warning(
+                    'Loaded dataset dimension differs from init arg '
+                    'dimension, using loaded dim. '
+                    '(loaded {0}, init {1})'.format(temp.dim, self.dim)
+                )
 
         self.dim = temp.dim  # To be consistent with the loaded data!
 
@@ -531,14 +534,18 @@ def _guess_n_features(self, corpus):
                                  'refusing to guess (dimension set to {0},'
                                  'type of corpus: {1}).'.format(self.dim, type(corpus)))
             else:
-                logger.warn('Couldn\'t find number of features, trusting '
-                             'supplied dimension ({0})'.format(self.dim))
+                logger.warning(
+                    'Couldn\'t find number of features, trusting '
+                    'supplied dimension ({0})'.format(self.dim)
+                )
                 n_features = self.dim
 
         if self.dim and n_features != self.dim:
-            logger.warn('Discovered inconsistent dataset dim ({0}) and '
-                         'feature count from corpus ({1}). Coercing to dimension'
-                         ' given by argument.'.format(self.dim, n_features))
+            logger.warning(
+                'Discovered inconsistent dataset dim ({0}) and '
+                'feature count from corpus ({1}). Coercing to dimension'
+                ' given by argument.'.format(self.dim, n_features)
+            )
 
         return n_features
 

diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
@@ -617,12 +617,12 @@ def __init__(self, documents=None, dm_mean=None,
         super(Doc2Vec, self).__init__(
             sg=(1 + dm) % 2,
             null_word=dm_concat, **kwargs)
-        
+
         self.load = call_on_class_only
 
         if dm_mean is not None:
             self.cbow_mean = dm_mean
-        
+
         self.dbow_words = dbow_words
         self.dm_concat = dm_concat
         self.dm_tag_count = dm_tag_count
@@ -672,8 +672,10 @@ def scan_vocab(self, documents, progress_per=10000, trim_rule=None, update=False
         for document_no, document in enumerate(documents):
             if not checked_string_types:
                 if isinstance(document.words, string_types):
-                    logger.warn("Each 'words' should be a list of words (usually unicode strings)."
-                                "First 'words' here is instead plain %s." % type(document.words))
+                    logger.warning(
+                        "Each 'words' should be a list of words (usually unicode strings)."
+                        "First 'words' here is instead plain %s." % type(document.words)
+                    )
                 checked_string_types += 1
             if document_no % progress_per == 0:
                 interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
@@ -845,7 +847,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
                         fout.write(utils.to_utf8(doctag) + b" " + row.tostring())
                     else:
                         fout.write(utils.to_utf8("%s %s\n" % (doctag, ' '.join("%f" % val for val in row))))
-        
+
 
 class TaggedBrownCorpus(object):
     """Iterate over documents from the Brown corpus (part of NLTK data), yielding

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -565,8 +565,10 @@ def scan_vocab(self, sentences, progress_per=10000, trim_rule=None):
         for sentence_no, sentence in enumerate(sentences):
             if not checked_string_types:
                 if isinstance(sentence, string_types):
-                    logger.warn("Each 'sentences' item should be a list of words (usually unicode strings)."
-                                "First item here is instead plain %s.", type(sentence))
+                    logger.warning(
+                        "Each 'sentences' item should be a list of words (usually unicode strings)."
+                        "First item here is instead plain %s.", type(sentence)
+                    )
                 checked_string_types += 1
             if sentence_no % progress_per == 0:
                 logger.info("PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
@@ -845,7 +847,9 @@ def job_producer():
             pushed_words, pushed_examples = 0, 0
             next_alpha = start_alpha
             if next_alpha > self.min_alpha_yet_reached:
-                logger.warn("Effective 'alpha' higher than previous training cycles")
+                logger.warning(
+                    "Effective 'alpha' higher than previous training cycles"
+                )
             self.min_alpha_yet_reached = next_alpha
             job_no = 0
 
@@ -953,13 +957,19 @@ def job_producer():
             "training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
             raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed)
         if job_tally < 10 * self.workers:
-            logger.warn("under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay")
+            logger.warning(
+                "under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay"
+            )
 
         # check that the input corpus hasn't changed during iteration
         if total_examples and total_examples != example_count:
-            logger.warn("supplied example count (%i) did not equal expected count (%i)", example_count, total_examples)
+            logger.warning(
+                "supplied example count (%i) did not equal expected count (%i)", example_count, total_examples
+            )
         if total_words and total_words != raw_word_count:
-            logger.warn("supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words)
+            logger.warning(
+                "supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words
+            )
 
         self.train_count += 1  # number of times train() has been called
         self.total_train_time += elapsed

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -242,7 +242,9 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
 
     def show_topic(self, topicid, num_words=10):
         if self.word_topics is None:
-            logger.warn("Run train or load_word_topics before showing topics.")
+            logger.warning(
+                "Run train or load_word_topics before showing topics."
+            )
         topic = self.word_topics[topicid]
         topic = topic / topic.sum()  # normalize to probability dist
         bestn = matutils.argsort(topic, num_words, reverse=True)