more fixup

arthurmensch · Jun 2, 2017 · 0ccd042 · 0ccd042
1 parent 08d5c2d
commit 0ccd042
Showing 5 changed files with 119 additions and 66 deletions.
diff --git a/modl/decomposition/dict_fact.py b/modl/decomposition/dict_fact.py
@@ -1,7 +1,8 @@
 import atexit
 from concurrent.futures import ThreadPoolExecutor
-from math import log, ceil
+from math import log, ceil, sqrt
 from tempfile import TemporaryFile
+from copy import deepcopy
 
 import numpy as np
 import scipy
@@ -14,7 +15,8 @@
 from modl.utils.randomkit import Sampler
 from .dict_fact_fast import _enet_regression_multi_gram, \
     _enet_regression_single_gram, _update_G_average, _batch_weight
-from ..utils.math.enet import enet_norm, enet_projection, enet_scale
+from ..utils.math.enet import enet_norm, enet_scale
+from .proximal import _atomic_prox
 
 MAX_INT = np.iinfo(np.int64).max
 
@@ -133,7 +135,6 @@ def __init__(self,
                  dict_init=None,
                  code_alpha=1,
                  code_l1_ratio=1,
-                 comp_l1_ratio=0,
                  tol=1e-2,
                  max_iter=100,
                  code_pos=False,
@@ -147,6 +148,9 @@ def __init__(self,
                  n_threads=1,
                  rand_size=True,
                  replacement=True,
+                 dict_structure="enet",
+                 dict_structure_params={},
+                 bcd_n_iter=1
                  ):
         """
         Estimator to perform matrix factorization by streaming samples and
@@ -264,7 +268,6 @@ def __init__(self,
                                 max_iter=max_iter,
                                 n_threads=n_threads)
 
-        self.comp_l1_ratio = comp_l1_ratio
         self.comp_pos = comp_pos
 
         self.n_epochs = n_epochs
@@ -277,6 +280,10 @@ def __init__(self,
         self.rand_size = rand_size
         self.replacement = replacement
 
+        self.dict_structure = dict_structure
+        self.dict_structure_params = dict_structure_params
+        self.bcd_n_iter = bcd_n_iter
+
     def fit(self, X):
         """
         Compute the factorisation X ~ code_ x components_, solving for
@@ -448,9 +455,8 @@ def prepare(self, n_samples=None, n_features=None,
             self.components_[self.components_ <= 0] = \
                 - self.components_[self.components_ <= 0]
         for i in range(self.n_components):
-            enet_scale(self.components_[i],
-                       l1_ratio=self.comp_l1_ratio,
-                       radius=1)
+            comp_l1_ratio = self.dict_structure_params.get("l1_ratio", 0.)
+            enet_scale(self.components_[i], l1_ratio=comp_l1_ratio, radius=1)
 
         self.code_ = np.ones((n_samples, self.n_components), dtype=dtype)
 
@@ -489,7 +495,10 @@ def _single_batch_fit(self, X, sample_indices):
         if X.flags['WRITEABLE'] is False:
             X = X.copy()
 
-        subset = self.feature_sampler_.yield_subset(self.reduction)
+        if self.reduction > 1.:
+            subset = self.feature_sampler_.yield_subset(self.reduction)
+        else:
+            subset = np.arange(self.feature_sampler_.range)
         batch_size = X.shape[0]
 
         self.n_iter_ += batch_size
@@ -644,29 +653,52 @@ def _update_dict(self, subset):
             self.G_ -= components_subset.dot(components_subset.T)
 
         gradient_subset -= self.C_.dot(components_subset)
-
-        order = self.random_state.permutation(n_components)
-        for k in order:
-            subset_norm = enet_norm(components_subset[k],
-                                    self.comp_l1_ratio)
-            self.comp_norm_[k] += subset_norm
-            gradient_subset = ger(1.0, self.C_[k], components_subset[k],
-                                  a=gradient_subset, overwrite_a=True)
-            if self.C_[k, k] > 1e-20:
-                components_subset[k] = gradient_subset[k] / self.C_[k, k]
-            # Else do not update
-            if self.comp_pos:
-                components_subset[components_subset < 0] = 0
-            enet_projection(components_subset[k],
-                            atom_temp,
-                            self.comp_norm_[k], self.comp_l1_ratio)
-            components_subset[k] = atom_temp
-            subset_norm = enet_norm(components_subset[k],
-                                    self.comp_l1_ratio)
-            self.comp_norm_[k] -= subset_norm
-            gradient_subset = ger(-1.0, self.C_[k], components_subset[k],
-                                  a=gradient_subset, overwrite_a=True)
-
+        params = deepcopy(self.dict_structure_params)
+        if "l1_ratio" in params:
+            l1_ratio = params.pop("l1_ratio")
+        else:
+            l1_ratio = 0.
+        if "weight" in params:
+            weight = params.pop("weight")
+        else:
+            weight = 1.
+        delta_dict = np.inf
+        for bcd_iter in range(self.bcd_n_iter):
+            if self.verbose and self.bcd_n_iter > 1:
+                print("[BCD] iter %02i/%02i: rel. change in dict = %g" % (
+                    bcd_iter + 1, self.bcd_n_iter, delta_dict))
+            old_dict = components_subset.copy()
+            order = self.random_state.permutation(n_components)
+            for idx, k in enumerate(order):
+                if self.verbose:
+                    print("  (%s) updating component %02i/%02i" % (
+                        self.dict_structure, idx + 1, n_components))
+                comp_norm = None
+                if self.dict_structure == "enet":
+                    subset_norm = enet_norm(components_subset[k],
+                                            l1_ratio)
+                    self.comp_norm_[k] += subset_norm
+                    comp_norm = self.comp_norm_[k]
+                gradient_subset = ger(1.0, self.C_[k], components_subset[k],
+                                      a=gradient_subset, overwrite_a=True)
+                if self.C_[k, k] > 1e-20:
+                    components_subset[k] = gradient_subset[k] / self.C_[k, k]
+                # Else do not update
+                components_subset[k] = _atomic_prox(
+                    components_subset[k], which=self.dict_structure,
+                    weight=weight / self.C_[k, k], norm=comp_norm,
+                    output=atom_temp, **params)
+                if self.dict_structure == "enet":
+                    subset_norm = enet_norm(components_subset[k],
+                                            l1_ratio)
+                    self.comp_norm_[k] -= subset_norm
+                gradient_subset = ger(-1.0, self.C_[k], components_subset[k],
+                                      a=gradient_subset, overwrite_a=True)
+
+            if self.bcd_n_iter > 1:
+                delta_dict = np.sum((components_subset - old_dict) ** 2)
+                delta_dict /= np.sum(old_dict ** 2)
+                delta_dict = sqrt(delta_dict)
         self.components_[:, subset] = components_subset
 
         if self.G_agg == 'full':

diff --git a/modl/decomposition/fmri.py b/modl/decomposition/fmri.py
@@ -10,14 +10,13 @@
 import itertools
 import time
 import warnings
-from math import log, sqrt
+from math import sqrt
 
 import numpy as np
 from nibabel.filebasedimages import ImageFileError
 from nilearn._utils import CacheMixin
 from nilearn._utils import check_niimg
 from nilearn.input_data import NiftiMasker
-from nilearn._utils.compat import _basestring
 from sklearn.base import TransformerMixin
 from sklearn.externals.joblib import Memory
 from sklearn.externals.joblib import Parallel
@@ -44,7 +43,7 @@ def __init__(self,
                  transform_batch_size=None,
                  mask=None, smoothing_fwhm=None,
                  standardize=True, detrend=True,
-                low_pass=None, high_pass=None, t_r=None,
+                 low_pass=None, high_pass=None, t_r=None,
                  target_affine=None, target_shape=None,
                  mask_strategy='background', mask_args=None,
                  memory=Memory(cachedir=None),
@@ -278,7 +277,10 @@ def __init__(self,
                  mask_strategy='background', mask_args=None,
                  memory=Memory(cachedir=None), memory_level=0,
                  n_jobs=1, verbose=0,
-                 callback=None):
+                 callback=None,
+                 dict_structure="enet",
+                 dict_structure_params={},
+                 bcd_n_iter=1):
         fMRICoderMixin.__init__(self, n_components=n_components,
                                 alpha=alpha,
                                 dict_init=dict_init,
@@ -306,6 +308,20 @@ def __init__(self,
         self.random_state = random_state
         self.callback = callback
 
+        self.dict_structure = dict_structure
+        self.dict_structure_params = dict_structure_params
+        self.bcd_n_iter = bcd_n_iter
+
+    def _pre_fit(self, imgs=None, y=None, confounds=None):
+        # Base logic for pipelining estimators
+        if imgs is None:
+            raise ValueError('imgs is None, use fMRICoder instead')
+
+        # Fit mask + pipelining
+        fMRICoderMixin.fit(self, imgs, confounds=confounds)
+
+        return self
+
     def fit(self, imgs=None, y=None, confounds=None):
         """Compute the mask and the dictionary maps across subjects
 
@@ -324,17 +340,10 @@ def fit(self, imgs=None, y=None, confounds=None):
         -------
         self
         """
-        # Base logic for pipelining estimators
-        if imgs is None:
-            raise ValueError('imgs is None, use fMRICoder instead')
-
-        # Fit mask + pipelining
-        fMRICoderMixin.fit(self, imgs, confounds=confounds)
-
-        self.components_ = self._cache(_compute_components,
-                                       func_memory_level=1,
-                                       ignore=['n_jobs',
-                                               'verbose'])(
+        self._pre_fit(imgs=imgs, y=None, confounds=confounds)
+        calc = self._cache(_compute_components, func_memory_level=1,
+                           ignore=['n_jobs', 'verbose'])
+        self.components_ = calc(
             self.masker_, imgs,
             confounds=confounds,
             dict_init=self.components_,
@@ -348,7 +357,14 @@ def fit(self, imgs=None, y=None, confounds=None):
             verbose=self.verbose,
             random_state=self.random_state,
             callback=self.callback,
-            n_jobs=self.n_jobs)
+            n_jobs=self.n_jobs,
+            dict_structure=self.dict_structure,
+            dict_structure_params=self.dict_structure_params,
+            bcd_n_iter=self.bcd_n_iter)
+        self._post_fit()
+        return self
+
+    def _post_fit(self):
         self.components_img_ = self.masker_.inverse_transform(self.components_)
         self.coder_ = Coder(dictionary=self.components_,
                             code_alpha=self.alpha,
@@ -423,7 +439,10 @@ def _compute_components(masker,
                         verbose=0,
                         random_state=None,
                         callback=None,
-                        n_jobs=1):
+                        n_jobs=1,
+                        dict_structure="enet",
+                        dict_structure_params={},
+                        bcd_n_iter=1):
     methods = {'masked': {'G_agg': 'masked', 'Dx_agg': 'masked'},
                'dictionary only': {'G_agg': 'full', 'Dx_agg': 'full'},
                'gram': {'G_agg': 'masked', 'Dx_agg': 'masked'},
@@ -458,17 +477,18 @@ def _compute_components(masker,
         print("Learning...")
     dict_fact = DictFact(n_components=n_components,
                          code_alpha=alpha,
-                         code_l1_ratio=0,
-                         comp_l1_ratio=1,
-                         comp_pos=True,
+                         code_l1_ratio=0.,
                          reduction=reduction,
                          Dx_agg=Dx_agg,
                          G_agg=G_agg,
                          learning_rate=learning_rate,
                          batch_size=batch_size,
                          random_state=random_state,
                          n_threads=n_jobs,
-                         verbose=0)
+                         verbose=0,
+                         dict_structure=dict_structure,
+                         dict_structure_params=dict_structure_params,
+                         bcd_n_iter=bcd_n_iter)
     dict_fact.prepare(n_samples=n_samples, n_features=n_voxels,
                       X=dict_init, dtype=dtype)
     if n_records > 0:

diff --git a/modl/decomposition/tests/test_dict_fact.py b/modl/decomposition/tests/test_dict_fact.py
@@ -58,16 +58,17 @@ def test_dict_mf_reconstruction(solver):
     dict_mf = DictFact(n_components=4,
                        code_alpha=1e-4,
                        n_epochs=5,
-                       comp_l1_ratio=0,
                        G_agg=solver_dict[solver]['G_agg'],
                        Dx_agg=solver_dict[solver]['Dx_agg'],
-                       random_state=rng_global, reduction=1)
+                       random_state=rng_global, reduction=1,
+                       dict_structure_params=dict(l1_ratio=0.))
     dict_mf.fit(X)
     P = dict_mf.transform(X)
     Y = P.dot(dict_mf.components_)
     rel_error = np.sum((X - Y) ** 2) / np.sum(X ** 2)
     assert (rel_error < 0.02)
 
+
 @pytest.mark.parametrize("solver", solvers)
 def test_dict_mf_reconstruction_reduction(solver):
     X, Q = generate_synthetic(n_features=20,
@@ -76,10 +77,10 @@ def test_dict_mf_reconstruction_reduction(solver):
     dict_mf = DictFact(n_components=4,
                        code_alpha=1e-4,
                        n_epochs=2,
-                       comp_l1_ratio=0,
                        G_agg=solver_dict[solver]['G_agg'],
                        Dx_agg=solver_dict[solver]['Dx_agg'],
-                       random_state=rng_global, reduction=2)
+                       random_state=rng_global, reduction=2,
+                       dict_structure_params=dict(l1_ratio=0.))
     dict_mf.fit(X)
     P = dict_mf.transform(X)
     Y = P.dot(dict_mf.components_)
@@ -95,10 +96,10 @@ def test_dict_mf_reconstruction_reproductible(solver):
     dict_mf = DictFact(n_components=4,
                        code_alpha=1e-4,
                        n_epochs=2,
-                       comp_l1_ratio=0,
                        G_agg=solver_dict[solver]['G_agg'],
                        Dx_agg=solver_dict[solver]['Dx_agg'],
-                       random_state=0, reduction=2)
+                       random_state=0, reduction=2,
+                       dict_structure_params=dict(l1_ratio=0))
     dict_mf.fit(X)
     D1 = dict_mf.components_.copy()
     P1 = dict_mf.transform(X)
@@ -120,11 +121,11 @@ def test_dict_mf_reconstruction_reduction_batch(solver):
     dict_mf = DictFact(n_components=4,
                        code_alpha=1e-4,
                        n_epochs=2,
-                       comp_l1_ratio=0,
                        G_agg=solver_dict[solver]['G_agg'],
                        Dx_agg=solver_dict[solver]['Dx_agg'],
                        random_state=rng_global, reduction=2,
-                       batch_size=10)
+                       batch_size=10,
+                       dict_structure_params=dict(l1_ratio=0))
     dict_mf.fit(X)
     P = dict_mf.transform(X)
     Y = P.dot(dict_mf.components_)
@@ -138,12 +139,11 @@ def test_dict_mf_reconstruction_sparse_dict(solver):
     rng = check_random_state(0)
     dict_init = Q  + rng.randn(*Q.shape) * 0.2
     dict_mf = DictFact(n_components=4, code_alpha=1e-2, n_epochs=2,
-                       code_l1_ratio=0,
-                       comp_l1_ratio=1,
                        dict_init=dict_init,
                        G_agg=solver_dict[solver]['G_agg'],
                        Dx_agg=solver_dict[solver]['Dx_agg'],
-                       random_state=rng_global)
+                       random_state=rng_global,
+                       dict_structure_params=dict(l1_ratio=1.))
     dict_mf.fit(X)
     Q_rec = dict_mf.components_
     Q_rec /= np.sqrt(np.sum(Q_rec ** 2, axis=1))[:, np.newaxis]

diff --git a/modl/decomposition/tests/test_fmri.py b/modl/decomposition/tests/test_fmri.py
@@ -92,7 +92,8 @@ def test_dict_fact(method, memory):
                              dict_init=init,
                              method=method,
                              reduction=2,
-                             smoothing_fwhm=0., n_epochs=2, alpha=1)
+                             smoothing_fwhm=0., n_epochs=2, alpha=1,
+                             dict_structure_params=dict(l1_ratio=1.))
     dict_fact.fit(data)
     maps = np.rollaxis(dict_fact.components_img_.get_data(), 3, 0)
     components = np.rollaxis(components.get_data(), 3, 0)
@@ -137,4 +138,4 @@ def test_score():
     pass
 
 def test_transform():
-    pass
+    pass
diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,4 @@ nibabel>=0.2.1
 nilearn>=0.2.6
 scikit_learn>=0.18
 scikit-image>=0.12.3
-pandas>=0.18
+pandas>=0.18