From 5a7b43e170ef075aac3d34cbedba0343ca877774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lavur=20Mortensen?= Date: Mon, 20 Feb 2017 13:44:22 +0000 Subject: [PATCH] Fixed bound computation, multiplying the expectation over author assignments by the number of words in the document. (#1156) --- gensim/models/atmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py index adad0191a3..4f550b9ffe 100755 --- a/gensim/models/atmodel.py +++ b/gensim/models/atmodel.py @@ -838,7 +838,7 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, # Computing the bound requires summing over expElogtheta[a, k] * expElogbeta[k, v], which # is the same computation as in normalizing phi. phinorm = self.compute_phinorm(ids, authors_d, expElogtheta[authors_d, :], expElogbeta[:, ids]) - word_score += np.log(1.0 / len(authors_d)) + cts.dot(np.log(phinorm)) + word_score += np.log(1.0 / len(authors_d)) * sum(cts) + cts.dot(np.log(phinorm)) # Compensate likelihood for when `chunk` above is only a sample of the whole corpus. This ensures # that the likelihood is always rougly on the same scale.