From 75a9146c37bf3a32c3c421efd252aa5099bfba86 Mon Sep 17 00:00:00 2001 From: L3viathan Date: Fri, 6 Feb 2015 14:40:33 +0100 Subject: [PATCH] implement PMI --- src/composes/matrix/dense_matrix.py | 15 +++++++++++++ src/composes/matrix/sparse_matrix.py | 14 ++++++++++++ .../transformation/scaling/pmi_weighting.py | 22 +++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 src/composes/transformation/scaling/pmi_weighting.py diff --git a/src/composes/matrix/dense_matrix.py b/src/composes/matrix/dense_matrix.py index ae55185..d642b6a 100644 --- a/src/composes/matrix/dense_matrix.py +++ b/src/composes/matrix/dense_matrix.py @@ -245,6 +245,21 @@ def plog(self): self.mat[self.mat < 1.0] = 1 self.mat = np.log(self.mat) + def log(self): + """ + Applies log to the matrix elements. + + Elements smaller than or equal to 0 (leading to not-defined log) + are set to 0. Log is applied on all other elements. + + Modifies the current matrix. + """ + + #this line uses 3 x size(mat) to run in the worst case + #(if we select the entire matrix - depends on the size of the selection) + self.mat[self.mat <= 0] = 1 + self.mat = np.log(self.mat) + def assert_positive(self): """ diff --git a/src/composes/matrix/sparse_matrix.py b/src/composes/matrix/sparse_matrix.py index 563188b..461dc22 100644 --- a/src/composes/matrix/sparse_matrix.py +++ b/src/composes/matrix/sparse_matrix.py @@ -291,6 +291,20 @@ def plog(self): self.mat.data = np.log(self.mat.data) self.mat.eliminate_zeros() + def log(self): + """ + Applies log to the matrix elements. + + Elements smaller than or equal to 0 (leading to not-defined log) + are set to 0. Log is applied on all other elements. + + Modifies the current matrix. + """ + + self.mat.data[self.mat.data <= 0] = 1 + self.mat.data = np.log(self.mat.data) + self.mat.eliminate_zeros() + def get_non_negative(self): """ Turns negative entries to 0. diff --git a/src/composes/transformation/scaling/pmi_weighting.py b/src/composes/transformation/scaling/pmi_weighting.py new file mode 100644 index 0000000..f1b1564 --- /dev/null +++ b/src/composes/transformation/scaling/pmi_weighting.py @@ -0,0 +1,22 @@ + +from scaling import Scaling +from epmi_weighting import EpmiWeighting + +class PmiWeighting(Scaling): + """ + Point-wise Mutual Information. + + :math:`pmi(r,c) = log\\frac{P(r,c)}{P(r)P(c)}` + """ + + _name = "pmi" + _uses_column_stats = True + + def apply(self, matrix_, column_marginal=None): + + matrix_ = EpmiWeighting().apply(matrix_, column_marginal) + matrix_.log() + return matrix_ + + def get_column_stats(self, matrix_): + return matrix_.sum(0)