omitted files

arthurmensch · Jun 2, 2017 · c869c15 · c869c15
1 parent bb06074
commit c869c15
Show file tree

Hide file tree

Showing 6 changed files with 653 additions and 54 deletions.
diff --git a/modl/decomposition/dict_fact.py b/modl/decomposition/dict_fact.py
@@ -135,10 +135,11 @@ def __init__(self,
                  dict_init=None,
                  code_alpha=1,
                  code_l1_ratio=1,
+                 comp_pos=False,
+                 comp_l1_ratio=1.,
                  tol=1e-2,
                  max_iter=100,
                  code_pos=False,
-                 comp_pos=False,
                  random_state=None,
                  n_epochs=1,
                  n_components=10,
@@ -257,6 +258,9 @@ def __init__(self,
         self.G_agg = G_agg
         self.reduction = reduction
 
+        self.comp_pos = comp_pos
+        self.comp_l1_ratio = comp_l1_ratio
+
         self.dict_init = dict_init
 
         self._set_coding_params(n_components,
@@ -268,8 +272,6 @@ def __init__(self,
                                 max_iter=max_iter,
                                 n_threads=n_threads)
 
-        self.comp_pos = comp_pos
-
         self.n_epochs = n_epochs
 
         self.verbose = verbose
@@ -451,12 +453,13 @@ def prepare(self, n_samples=None, n_features=None,
                          :self.n_components]
             self.components_ = check_array(X[random_idx], dtype=dtype.type,
                                            copy=True)
-        if self.comp_pos:
-            self.components_[self.components_ <= 0] = \
-                - self.components_[self.components_ <= 0]
-        for i in range(self.n_components):
-            comp_l1_ratio = self.dict_structure_params.get("l1_ratio", 0.)
-            enet_scale(self.components_[i], l1_ratio=comp_l1_ratio, radius=1)
+        if self.dict_structure == "enet":
+            if self.comp_pos:
+                neg = self.components_ <= 0
+                self.components_[neg] = -self.components_[neg]
+            for i in range(self.n_components):
+                enet_scale(self.components_[i], l1_ratio=self.comp_l1_ratio,
+                           radius=1)
 
         self.code_ = np.ones((n_samples, self.n_components), dtype=dtype)
 
@@ -654,12 +657,8 @@ def _update_dict(self, subset):
 
         gradient_subset -= self.C_.dot(components_subset)
         params = deepcopy(self.dict_structure_params)
-        if "l1_ratio" in params:
-            l1_ratio = params.pop("l1_ratio")
-        else:
-            l1_ratio = 0.
-        if "weight" in params:
-            weight = params.pop("weight")
+        if "alpha" in params:
+            weight = params.pop("alpha")
         else:
             weight = 1.
         delta_dict = np.inf
@@ -676,7 +675,7 @@ def _update_dict(self, subset):
                 comp_norm = None
                 if self.dict_structure == "enet":
                     subset_norm = enet_norm(components_subset[k],
-                                            l1_ratio)
+                                            self.comp_l1_ratio)
                     self.comp_norm_[k] += subset_norm
                     comp_norm = self.comp_norm_[k]
                 gradient_subset = ger(1.0, self.C_[k], components_subset[k],
@@ -687,10 +686,11 @@ def _update_dict(self, subset):
                 components_subset[k] = _atomic_prox(
                     components_subset[k], which=self.dict_structure,
                     weight=weight / self.C_[k, k], norm=comp_norm,
-                    output=atom_temp, **params)
+                    output=atom_temp, pos=self.comp_pos,
+                    l1_ratio=self.comp_l1_ratio, **params)
                 if self.dict_structure == "enet":
                     subset_norm = enet_norm(components_subset[k],
-                                            l1_ratio)
+                                            self.comp_l1_ratio)
                     self.comp_norm_[k] -= subset_norm
                 gradient_subset = ger(-1.0, self.C_[k], components_subset[k],
                                       a=gradient_subset, overwrite_a=True)

diff --git a/modl/decomposition/fmri.py b/modl/decomposition/fmri.py
@@ -347,30 +347,29 @@ def fit(self, imgs=None, y=None, confounds=None):
         self
         """
         self._pre_fit(imgs=imgs, y=None, confounds=confounds)
-        calc = self._cache(_compute_components, func_memory_level=1,
-                           ignore=['n_jobs', 'verbose'])
-        self.components_ = calc(
-            self.masker_, imgs,
-            confounds=confounds,
-            dict_init=self.components_,
-            alpha=self.alpha,
-            comp_pos=self.positive,
-            comp_l1_ratio=self.comp_l1_ratio,
-            code_l1_ratio=self.code_l1_ratio,
-            reduction=self.reduction,
-            learning_rate=self.learning_rate,
-            n_components=self.n_components,
-            batch_size=self.batch_size,
-            positive=self.positive,
-            n_epochs=self.n_epochs,
-            method=self.method,
-            verbose=self.verbose,
-            random_state=self.random_state,
-            callback=self.callback,
-            n_jobs=self.n_jobs,
-            dict_structure=self.dict_structure,
-            dict_structure_params=self.dict_structure_params,
-            bcd_n_iter=self.bcd_n_iter)
+        self.components_ = self._cache(
+            _compute_components, func_memory_level=1,
+            ignore=['n_jobs', 'verbose'])(
+                self.masker_, imgs,
+                confounds=confounds,
+                dict_init=self.components_,
+                alpha=self.alpha,
+                positive=self.positive,
+                comp_l1_ratio=self.comp_l1_ratio,
+                code_l1_ratio=self.code_l1_ratio,
+                reduction=self.reduction,
+                learning_rate=self.learning_rate,
+                n_components=self.n_components,
+                batch_size=self.batch_size,
+                n_epochs=self.n_epochs,
+                method=self.method,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                callback=self.callback,
+                n_jobs=self.n_jobs,
+                dict_structure=self.dict_structure,
+                dict_structure_params=self.dict_structure_params,
+                bcd_n_iter=self.bcd_n_iter)
         self._post_fit()
         return self
 

diff --git a/modl/decomposition/proximal.py b/modl/decomposition/proximal.py
@@ -0,0 +1,192 @@
+from math import sqrt
+import numpy as np
+from sklearn.utils import check_random_state
+from sklearn.utils.testing import (assert_array_equal,
+                                   assert_array_almost_equal)
+from nilearn.decoding.objective_functions import _gradient, _div, _unmask
+from nilearn.decoding.fista import mfista
+from nilearn.decoding.proximal_operators import _prox_l1, _prox_tvl1
+from ..utils.math.enet import enet_projection
+from .social_sparsity import _prox_social_sparsity
+
+
+def _gram_schmidt(V, offset=0, normalize=True):
+    """Computes modified Gram-Schmidt (MGS) orthogonalization of a set
+    of vectors specified as rows of a 2D array V.
+
+    Parameters
+    ----------
+    offset: int, optional (default 0)
+    assumes that all vectors from index 0 to offset - 1 (inclusive) have
+    already been processed.
+
+    Returns
+    -------
+    V[offset:]
+    """
+    scales = {}
+    for i in range(offset, len(V)):
+        for j in range(i):
+            if j not in scales:
+                scales[j] = V[j].dot(V[j])
+            if scales[j] > 0.:
+                weight = V[j].dot(V[i]) / scales[j]
+                V[i] -= weight * V[j]
+        if normalize:
+            scales[i] = V[i].dot(V[i])
+            if scales[i] > 0.:
+                V[i] /= sqrt(scales[i])
+    return V[offset:]
+
+
+def test_gram_schmidt():
+
+    V = np.array([[3., 1.], [2., 2.]])
+    assert_array_almost_equal(_gram_schmidt(V, normalize=False),
+                              np.array([[3, 1], [-.4, 1.2]]))
+
+    V = np.array([[1., 1., 1., 1.], [-1., 4., 4., 1.], [4., -2., 2., 0]])
+    assert_array_almost_equal(_gram_schmidt(V, normalize=False),
+                              np.array([[1., 1., 1., 1.], [-3., 2., 2., -1.],
+                                        [1., -10. / 6., 7. / 3, -10. / 6.]]))
+
+    V = np.array([[1., 1., 1., 1.], [-1., 4., 4., 1.], [4., -2., 2., 0]])
+    V_ = V.copy()
+    _gram_schmidt(V, offset=2, normalize=False)
+    assert_array_equal(V[:2], V_[:2])
+
+
+def _atomic_prox(atom, weight, mask=None, pos=True, atom_init=None, idx=None,
+                 which="social", AB=None, l1_ratio=1., norm=None,
+                 output=None, max_iter=100, check_lipschitz=False,
+                 check_grad=False, tol=1e-3, verbose=2, **kwargs):
+    """
+    Solves for SSODL (Smooth Sparse Dictionary Learning) dictionary update.
+
+    \argmin_{V} \frac{1}{2} .5/n * tr(VV^TA) - (1/n)tr(VB^T) +
+                n * alpha \sum_j \varphi(v^j),
+
+    where \varphi is a regularizer that imposes structure: sparsity and
+    smoothness like GraphNet, TVL1, or social sparsity.
+
+    References
+    ==========
+    [1] Dohmatob et al. "Learning brain regions via large-scale online
+        structured sparse dictionary-learning", NIPS 2017
+    [2] Varoquaux et al. "Social-sparsity brain decoders: faster spatial
+        sparsity", PRNI 2016.
+    [3] Kowalski et al. "Social sparsity! neighborhood systems enrich
+        structured shrinkage operators", Transactions on Signal Processing
+    """
+    if verbose and idx is not None:
+        msg = "[SODL (%s)] updating component %02i" % (which, idx)
+        msg += "+" * (80 - len(msg))
+        print(msg)
+
+    n_voxels = len(atom)
+    if which == "enet":
+        if norm is None:
+            raise ValueError
+        if pos:
+            atom[atom < 0.] = 0.
+        enet_projection(atom, output, norm, l1_ratio)
+        atom = output
+    elif which == "social":
+        atom = _prox_social_sparsity(_unmask(atom, mask), weight,
+                                     **kwargs)[mask]
+    elif which == "gram-schmidt":
+        raise NotImplementedError
+        # dictionary[k] = _gram_schmidt(dictionary[:k + 1], offset=k)[-1]
+    elif which == "enet variational":
+        l1_weight = weight * l1_ratio
+        l2_weight = weight - l1_weight
+        scale = sqrt(1. + l2_weight)
+        atom /= scale
+        if l1_weight > 0.:
+            atom = _prox_l1(atom, l1_weight)
+        atom /= scale
+    elif which in ["tv-l1", "graph-net"]:
+        # misc
+        flat_mask = mask.ravel()
+        l1_weight = weight * l1_ratio
+        if which == "laplacian":
+            spatial_weight = weight - l1_weight
+        else:
+            spatial_weight = 0.
+        lap_lips = 4. * mask.ndim * spatial_weight
+        loss_lips = 1.
+        loss_lips *= 1.05
+        lips = loss_lips + lap_lips
+
+        def smooth_energy(v):
+            """Smooth part of energy / cost function.
+            """
+            e = .5 * np.sum((v - atom) ** 2)
+            if which == "laplacian":
+                lap = np.sum(_gradient(_unmask(v, mask))[:, mask] ** 2)
+                lap *= spatial_weight
+                lap *= .5
+                e += lap
+            return e
+
+        def nonsmooth_energy(v):
+            """Non-smooth part of energy / cost function.
+            """
+            e = l1_weight * np.abs(v).sum()
+            if which == "tv":
+                gradient = _gradient(_unmask(v, mask))
+                tv_term = np.sum(np.sqrt(np.sum(gradient ** 2,
+                                                axis=0)))
+                tv_term *= spatial_weight
+                e += tv_term
+            return e
+
+        def total_energy(v):
+            """Total energy / cost function.
+            """
+            return smooth_energy(v) + nonsmooth_energy(v)
+
+        def grad(v):
+            """Gradient of smooth part of energy / cost function.
+            """
+            grad = v - atom
+            if which == "laplacian":
+                lap = -_div(_gradient(_unmask(v, mask)))[mask]
+                lap *= spatial_weight
+                grad += lap
+            return grad
+
+        def prox(v, stepsize, dgap_tol, init=None):
+            """Proximal operator of non-smooth part of energy / cost function
+            """
+            if which == "laplacian":
+                out = _prox_l1(v, stepsize * l1_weight, copy=False)
+                info = dict(converged=True)
+            elif which == "tv":
+                v = _unmask(v, mask)
+                if init is not None:
+                    init = _unmask(init, mask)
+                out, info = _prox_tvl1(v, init=init, l1_ratio=l1_ratio,
+                                       weight=weight * stepsize,
+                                       dgap_tol=dgap_tol,
+                                       max_iter=1000, verbose=verbose)
+                out = out.ravel()[flat_mask]
+            else:
+                raise ValueError("Unknown value for which: %s" % (which))
+            return out, info
+
+        # for debugging
+        if check_grad:
+            from sklearn.utils.testing import assert_less
+            from scipy.optimize import check_grad
+            rng = check_random_state(42)
+            x0 = rng.randn(n_voxels)
+            assert_less(check_grad(smooth_energy, grad, x0), 1e-3)
+
+        # use FISTA update atom
+        atom, _, _ = mfista(
+            grad, prox, total_energy, lips, (n_voxels,), tol=tol,
+            max_iter=max_iter, check_lipschitz=check_lipschitz,
+            verbose=verbose, init=dict(w=atom_init))
+
+    return atom