Skip to content

Commit

Permalink
minor docfix for word2vec trim_rule (#781)
Browse files Browse the repository at this point in the history
  • Loading branch information
piskvorky authored and tmylk committed Jul 11, 2016
1 parent e40f01f commit 4a6b52c
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
14 changes: 7 additions & 7 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
Hoffman et al, respectively.
`minimum_probability` controls filtering the topics returned for a document (bow).
`random_state` can be a numpy.random.RandomState object or the seed for one
Example:
Expand Down Expand Up @@ -303,7 +303,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
assert self.alpha.shape == (self.num_topics,), "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)

self.eta, self.optimize_eta = self.init_dir_prior(eta, 'eta')

self.random_state = get_random_state(random_state)

assert (self.eta.shape == (self.num_topics, 1) or self.eta.shape == (self.num_topics, self.num_terms)), (
Expand Down Expand Up @@ -907,7 +907,7 @@ def get_document_topics(self, bow, minimum_probability=None, minimum_phi_value=N
Ignore topics with very low probability (below `minimum_probability`).
If per_word_topics is True, it also returns a list of topics, sorted in descending order of most likely topics for that word.
If per_word_topics is True, it also returns a list of topics, sorted in descending order of most likely topics for that word.
It also returns a list of word_ids and each words corresponding topics' phi_values, multiplied by feature length (i.e, word count)
"""
Expand All @@ -929,7 +929,7 @@ def get_document_topics(self, bow, minimum_probability=None, minimum_phi_value=N

document_topics = [(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist)
if topicvalue >= minimum_probability]

if not per_word_topics:
return document_topics
else:
Expand All @@ -941,10 +941,10 @@ def get_document_topics(self, bow, minimum_probability=None, minimum_phi_value=N
for topic_id in range(0, self.num_topics):
if phis[topic_id][word_type] >= minimum_phi_value:
# appends phi values for each topic for that word
# these phi values are scaled by feature length
# these phi values are scaled by feature length
phi_values.append((phis[topic_id][word_type], topic_id))
phi_topic.append((topic_id, phis[topic_id][word_type]))

# list with ({word_id => [(topic_0, phi_value), (topic_1, phi_value) ...]).
word_phi.append((word_type, phi_topic))
# sorts the topics based on most likely topic
Expand Down Expand Up @@ -974,7 +974,7 @@ def get_term_topics(self, word_id, minimum_probability=None):

return values


def __getitem__(self, bow, eps=None):
"""
Return topic distribution for the given document `bow`, as a list of
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def __init__(
`trim_rule` = vocabulary trimming rule, specifies whether certain words should remain
in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count).
Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and
returns either util.RULE_DISCARD, util.RULE_KEEP or util.RULE_DEFAULT.
returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`.
Note: The rule, if given, is only used prune vocabulary during build_vocab() and is not stored as part
of the model.
Expand Down

0 comments on commit 4a6b52c

Please sign in to comment.