Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Replaced logger.warn by logger.warning #1295

Merged
merged 3 commits into from
May 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,11 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp
logger.info('Deriving dataset dimension from corpus: '
'{0}'.format(proposed_dim))
else:
logger.warn('Dataset dimension derived from input corpus diffe'
'rs from initialization argument, using corpus.'
'(corpus {0}, init arg {1})'.format(proposed_dim,
self.dim))
logger.warning(
'Dataset dimension derived from input corpus diffe'
'rs from initialization argument, using corpus.'
'(corpus {0}, init arg {1})'.format(proposed_dim, self.dim)
)

self.dim = proposed_dim
self.offsets = [0]
Expand Down Expand Up @@ -311,9 +312,11 @@ def init_by_clone(self):
if self.dim is None:
logger.info('Loaded dataset dimension: {0}'.format(temp.dim))
else:
logger.warn('Loaded dataset dimension differs from init arg '
'dimension, using loaded dim. '
'(loaded {0}, init {1})'.format(temp.dim, self.dim))
logger.warning(
'Loaded dataset dimension differs from init arg '
'dimension, using loaded dim. '
'(loaded {0}, init {1})'.format(temp.dim, self.dim)
)

self.dim = temp.dim # To be consistent with the loaded data!

Expand Down Expand Up @@ -531,14 +534,18 @@ def _guess_n_features(self, corpus):
'refusing to guess (dimension set to {0},'
'type of corpus: {1}).'.format(self.dim, type(corpus)))
else:
logger.warn('Couldn\'t find number of features, trusting '
'supplied dimension ({0})'.format(self.dim))
logger.warning(
'Couldn\'t find number of features, trusting '
'supplied dimension ({0})'.format(self.dim)
)
n_features = self.dim

if self.dim and n_features != self.dim:
logger.warn('Discovered inconsistent dataset dim ({0}) and '
'feature count from corpus ({1}). Coercing to dimension'
' given by argument.'.format(self.dim, n_features))
logger.warning(
'Discovered inconsistent dataset dim ({0}) and '
'feature count from corpus ({1}). Coercing to dimension'
' given by argument.'.format(self.dim, n_features)
)

return n_features

Expand Down
12 changes: 7 additions & 5 deletions gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,12 +617,12 @@ def __init__(self, documents=None, dm_mean=None,
super(Doc2Vec, self).__init__(
sg=(1 + dm) % 2,
null_word=dm_concat, **kwargs)

self.load = call_on_class_only

if dm_mean is not None:
self.cbow_mean = dm_mean

self.dbow_words = dbow_words
self.dm_concat = dm_concat
self.dm_tag_count = dm_tag_count
Expand Down Expand Up @@ -672,8 +672,10 @@ def scan_vocab(self, documents, progress_per=10000, trim_rule=None, update=False
for document_no, document in enumerate(documents):
if not checked_string_types:
if isinstance(document.words, string_types):
logger.warn("Each 'words' should be a list of words (usually unicode strings)."
"First 'words' here is instead plain %s." % type(document.words))
logger.warning(
"Each 'words' should be a list of words (usually unicode strings)."
"First 'words' here is instead plain %s." % type(document.words)
)
checked_string_types += 1
if document_no % progress_per == 0:
interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
Expand Down Expand Up @@ -845,7 +847,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
fout.write(utils.to_utf8(doctag) + b" " + row.tostring())
else:
fout.write(utils.to_utf8("%s %s\n" % (doctag, ' '.join("%f" % val for val in row))))


class TaggedBrownCorpus(object):
"""Iterate over documents from the Brown corpus (part of NLTK data), yielding
Expand Down
22 changes: 16 additions & 6 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,10 @@ def scan_vocab(self, sentences, progress_per=10000, trim_rule=None):
for sentence_no, sentence in enumerate(sentences):
if not checked_string_types:
if isinstance(sentence, string_types):
logger.warn("Each 'sentences' item should be a list of words (usually unicode strings)."
"First item here is instead plain %s.", type(sentence))
logger.warning(
"Each 'sentences' item should be a list of words (usually unicode strings)."
"First item here is instead plain %s.", type(sentence)
)
checked_string_types += 1
if sentence_no % progress_per == 0:
logger.info("PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
Expand Down Expand Up @@ -845,7 +847,9 @@ def job_producer():
pushed_words, pushed_examples = 0, 0
next_alpha = start_alpha
if next_alpha > self.min_alpha_yet_reached:
logger.warn("Effective 'alpha' higher than previous training cycles")
logger.warning(
"Effective 'alpha' higher than previous training cycles"
)
self.min_alpha_yet_reached = next_alpha
job_no = 0

Expand Down Expand Up @@ -953,13 +957,19 @@ def job_producer():
"training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed)
if job_tally < 10 * self.workers:
logger.warn("under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay")
logger.warning(
"under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay"
)

# check that the input corpus hasn't changed during iteration
if total_examples and total_examples != example_count:
logger.warn("supplied example count (%i) did not equal expected count (%i)", example_count, total_examples)
logger.warning(
"supplied example count (%i) did not equal expected count (%i)", example_count, total_examples
)
if total_words and total_words != raw_word_count:
logger.warn("supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words)
logger.warning(
"supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words
)

self.train_count += 1 # number of times train() has been called
self.total_train_time += elapsed
Expand Down
4 changes: 3 additions & 1 deletion gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,9 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):

def show_topic(self, topicid, num_words=10):
if self.word_topics is None:
logger.warn("Run train or load_word_topics before showing topics.")
logger.warning(
"Run train or load_word_topics before showing topics."
)
topic = self.word_topics[topicid]
topic = topic / topic.sum() # normalize to probability dist
bestn = matutils.argsort(topic, num_words, reverse=True)
Expand Down