Skip to content

Commit

Permalink
more fixup
Browse files Browse the repository at this point in the history
  • Loading branch information
dohmatob committed Jun 2, 2017
1 parent 08d5c2d commit 0ccd042
Showing 5 changed files with 119 additions and 66 deletions.
94 changes: 63 additions & 31 deletions modl/decomposition/dict_fact.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import atexit
from concurrent.futures import ThreadPoolExecutor
from math import log, ceil
from math import log, ceil, sqrt
from tempfile import TemporaryFile
from copy import deepcopy

import numpy as np
import scipy
@@ -14,7 +15,8 @@
from modl.utils.randomkit import Sampler
from .dict_fact_fast import _enet_regression_multi_gram, \
_enet_regression_single_gram, _update_G_average, _batch_weight
from ..utils.math.enet import enet_norm, enet_projection, enet_scale
from ..utils.math.enet import enet_norm, enet_scale
from .proximal import _atomic_prox

MAX_INT = np.iinfo(np.int64).max

@@ -133,7 +135,6 @@ def __init__(self,
dict_init=None,
code_alpha=1,
code_l1_ratio=1,
comp_l1_ratio=0,
tol=1e-2,
max_iter=100,
code_pos=False,
@@ -147,6 +148,9 @@ def __init__(self,
n_threads=1,
rand_size=True,
replacement=True,
dict_structure="enet",
dict_structure_params={},
bcd_n_iter=1
):
"""
Estimator to perform matrix factorization by streaming samples and
@@ -264,7 +268,6 @@ def __init__(self,
max_iter=max_iter,
n_threads=n_threads)

self.comp_l1_ratio = comp_l1_ratio
self.comp_pos = comp_pos

self.n_epochs = n_epochs
@@ -277,6 +280,10 @@ def __init__(self,
self.rand_size = rand_size
self.replacement = replacement

self.dict_structure = dict_structure
self.dict_structure_params = dict_structure_params
self.bcd_n_iter = bcd_n_iter

def fit(self, X):
"""
Compute the factorisation X ~ code_ x components_, solving for
@@ -448,9 +455,8 @@ def prepare(self, n_samples=None, n_features=None,
self.components_[self.components_ <= 0] = \
- self.components_[self.components_ <= 0]
for i in range(self.n_components):
enet_scale(self.components_[i],
l1_ratio=self.comp_l1_ratio,
radius=1)
comp_l1_ratio = self.dict_structure_params.get("l1_ratio", 0.)
enet_scale(self.components_[i], l1_ratio=comp_l1_ratio, radius=1)

self.code_ = np.ones((n_samples, self.n_components), dtype=dtype)

@@ -489,7 +495,10 @@ def _single_batch_fit(self, X, sample_indices):
if X.flags['WRITEABLE'] is False:
X = X.copy()

subset = self.feature_sampler_.yield_subset(self.reduction)
if self.reduction > 1.:
subset = self.feature_sampler_.yield_subset(self.reduction)
else:
subset = np.arange(self.feature_sampler_.range)
batch_size = X.shape[0]

self.n_iter_ += batch_size
@@ -644,29 +653,52 @@ def _update_dict(self, subset):
self.G_ -= components_subset.dot(components_subset.T)

gradient_subset -= self.C_.dot(components_subset)

order = self.random_state.permutation(n_components)
for k in order:
subset_norm = enet_norm(components_subset[k],
self.comp_l1_ratio)
self.comp_norm_[k] += subset_norm
gradient_subset = ger(1.0, self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
if self.C_[k, k] > 1e-20:
components_subset[k] = gradient_subset[k] / self.C_[k, k]
# Else do not update
if self.comp_pos:
components_subset[components_subset < 0] = 0
enet_projection(components_subset[k],
atom_temp,
self.comp_norm_[k], self.comp_l1_ratio)
components_subset[k] = atom_temp
subset_norm = enet_norm(components_subset[k],
self.comp_l1_ratio)
self.comp_norm_[k] -= subset_norm
gradient_subset = ger(-1.0, self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)

params = deepcopy(self.dict_structure_params)
if "l1_ratio" in params:
l1_ratio = params.pop("l1_ratio")
else:
l1_ratio = 0.
if "weight" in params:
weight = params.pop("weight")
else:
weight = 1.
delta_dict = np.inf
for bcd_iter in range(self.bcd_n_iter):
if self.verbose and self.bcd_n_iter > 1:
print("[BCD] iter %02i/%02i: rel. change in dict = %g" % (
bcd_iter + 1, self.bcd_n_iter, delta_dict))
old_dict = components_subset.copy()
order = self.random_state.permutation(n_components)
for idx, k in enumerate(order):
if self.verbose:
print(" (%s) updating component %02i/%02i" % (
self.dict_structure, idx + 1, n_components))
comp_norm = None
if self.dict_structure == "enet":
subset_norm = enet_norm(components_subset[k],
l1_ratio)
self.comp_norm_[k] += subset_norm
comp_norm = self.comp_norm_[k]
gradient_subset = ger(1.0, self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
if self.C_[k, k] > 1e-20:
components_subset[k] = gradient_subset[k] / self.C_[k, k]
# Else do not update
components_subset[k] = _atomic_prox(
components_subset[k], which=self.dict_structure,
weight=weight / self.C_[k, k], norm=comp_norm,
output=atom_temp, **params)
if self.dict_structure == "enet":
subset_norm = enet_norm(components_subset[k],
l1_ratio)
self.comp_norm_[k] -= subset_norm
gradient_subset = ger(-1.0, self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)

if self.bcd_n_iter > 1:
delta_dict = np.sum((components_subset - old_dict) ** 2)
delta_dict /= np.sum(old_dict ** 2)
delta_dict = sqrt(delta_dict)
self.components_[:, subset] = components_subset

if self.G_agg == 'full':
62 changes: 41 additions & 21 deletions modl/decomposition/fmri.py
Original file line number Diff line number Diff line change
@@ -10,14 +10,13 @@
import itertools
import time
import warnings
from math import log, sqrt
from math import sqrt

import numpy as np
from nibabel.filebasedimages import ImageFileError
from nilearn._utils import CacheMixin
from nilearn._utils import check_niimg
from nilearn.input_data import NiftiMasker
from nilearn._utils.compat import _basestring
from sklearn.base import TransformerMixin
from sklearn.externals.joblib import Memory
from sklearn.externals.joblib import Parallel
@@ -44,7 +43,7 @@ def __init__(self,
transform_batch_size=None,
mask=None, smoothing_fwhm=None,
standardize=True, detrend=True,
low_pass=None, high_pass=None, t_r=None,
low_pass=None, high_pass=None, t_r=None,
target_affine=None, target_shape=None,
mask_strategy='background', mask_args=None,
memory=Memory(cachedir=None),
@@ -278,7 +277,10 @@ def __init__(self,
mask_strategy='background', mask_args=None,
memory=Memory(cachedir=None), memory_level=0,
n_jobs=1, verbose=0,
callback=None):
callback=None,
dict_structure="enet",
dict_structure_params={},
bcd_n_iter=1):
fMRICoderMixin.__init__(self, n_components=n_components,
alpha=alpha,
dict_init=dict_init,
@@ -306,6 +308,20 @@ def __init__(self,
self.random_state = random_state
self.callback = callback

self.dict_structure = dict_structure
self.dict_structure_params = dict_structure_params
self.bcd_n_iter = bcd_n_iter

def _pre_fit(self, imgs=None, y=None, confounds=None):
# Base logic for pipelining estimators
if imgs is None:
raise ValueError('imgs is None, use fMRICoder instead')

# Fit mask + pipelining
fMRICoderMixin.fit(self, imgs, confounds=confounds)

return self

def fit(self, imgs=None, y=None, confounds=None):
"""Compute the mask and the dictionary maps across subjects
@@ -324,17 +340,10 @@ def fit(self, imgs=None, y=None, confounds=None):
-------
self
"""
# Base logic for pipelining estimators
if imgs is None:
raise ValueError('imgs is None, use fMRICoder instead')

# Fit mask + pipelining
fMRICoderMixin.fit(self, imgs, confounds=confounds)

self.components_ = self._cache(_compute_components,
func_memory_level=1,
ignore=['n_jobs',
'verbose'])(
self._pre_fit(imgs=imgs, y=None, confounds=confounds)
calc = self._cache(_compute_components, func_memory_level=1,
ignore=['n_jobs', 'verbose'])
self.components_ = calc(
self.masker_, imgs,
confounds=confounds,
dict_init=self.components_,
@@ -348,7 +357,14 @@ def fit(self, imgs=None, y=None, confounds=None):
verbose=self.verbose,
random_state=self.random_state,
callback=self.callback,
n_jobs=self.n_jobs)
n_jobs=self.n_jobs,
dict_structure=self.dict_structure,
dict_structure_params=self.dict_structure_params,
bcd_n_iter=self.bcd_n_iter)
self._post_fit()
return self

def _post_fit(self):
self.components_img_ = self.masker_.inverse_transform(self.components_)
self.coder_ = Coder(dictionary=self.components_,
code_alpha=self.alpha,
@@ -423,7 +439,10 @@ def _compute_components(masker,
verbose=0,
random_state=None,
callback=None,
n_jobs=1):
n_jobs=1,
dict_structure="enet",
dict_structure_params={},
bcd_n_iter=1):
methods = {'masked': {'G_agg': 'masked', 'Dx_agg': 'masked'},
'dictionary only': {'G_agg': 'full', 'Dx_agg': 'full'},
'gram': {'G_agg': 'masked', 'Dx_agg': 'masked'},
@@ -458,17 +477,18 @@ def _compute_components(masker,
print("Learning...")
dict_fact = DictFact(n_components=n_components,
code_alpha=alpha,
code_l1_ratio=0,
comp_l1_ratio=1,
comp_pos=True,
code_l1_ratio=0.,
reduction=reduction,
Dx_agg=Dx_agg,
G_agg=G_agg,
learning_rate=learning_rate,
batch_size=batch_size,
random_state=random_state,
n_threads=n_jobs,
verbose=0)
verbose=0,
dict_structure=dict_structure,
dict_structure_params=dict_structure_params,
bcd_n_iter=bcd_n_iter)
dict_fact.prepare(n_samples=n_samples, n_features=n_voxels,
X=dict_init, dtype=dtype)
if n_records > 0:
22 changes: 11 additions & 11 deletions modl/decomposition/tests/test_dict_fact.py
Original file line number Diff line number Diff line change
@@ -58,16 +58,17 @@ def test_dict_mf_reconstruction(solver):
dict_mf = DictFact(n_components=4,
code_alpha=1e-4,
n_epochs=5,
comp_l1_ratio=0,
G_agg=solver_dict[solver]['G_agg'],
Dx_agg=solver_dict[solver]['Dx_agg'],
random_state=rng_global, reduction=1)
random_state=rng_global, reduction=1,
dict_structure_params=dict(l1_ratio=0.))
dict_mf.fit(X)
P = dict_mf.transform(X)
Y = P.dot(dict_mf.components_)
rel_error = np.sum((X - Y) ** 2) / np.sum(X ** 2)
assert (rel_error < 0.02)


@pytest.mark.parametrize("solver", solvers)
def test_dict_mf_reconstruction_reduction(solver):
X, Q = generate_synthetic(n_features=20,
@@ -76,10 +77,10 @@ def test_dict_mf_reconstruction_reduction(solver):
dict_mf = DictFact(n_components=4,
code_alpha=1e-4,
n_epochs=2,
comp_l1_ratio=0,
G_agg=solver_dict[solver]['G_agg'],
Dx_agg=solver_dict[solver]['Dx_agg'],
random_state=rng_global, reduction=2)
random_state=rng_global, reduction=2,
dict_structure_params=dict(l1_ratio=0.))
dict_mf.fit(X)
P = dict_mf.transform(X)
Y = P.dot(dict_mf.components_)
@@ -95,10 +96,10 @@ def test_dict_mf_reconstruction_reproductible(solver):
dict_mf = DictFact(n_components=4,
code_alpha=1e-4,
n_epochs=2,
comp_l1_ratio=0,
G_agg=solver_dict[solver]['G_agg'],
Dx_agg=solver_dict[solver]['Dx_agg'],
random_state=0, reduction=2)
random_state=0, reduction=2,
dict_structure_params=dict(l1_ratio=0))
dict_mf.fit(X)
D1 = dict_mf.components_.copy()
P1 = dict_mf.transform(X)
@@ -120,11 +121,11 @@ def test_dict_mf_reconstruction_reduction_batch(solver):
dict_mf = DictFact(n_components=4,
code_alpha=1e-4,
n_epochs=2,
comp_l1_ratio=0,
G_agg=solver_dict[solver]['G_agg'],
Dx_agg=solver_dict[solver]['Dx_agg'],
random_state=rng_global, reduction=2,
batch_size=10)
batch_size=10,
dict_structure_params=dict(l1_ratio=0))
dict_mf.fit(X)
P = dict_mf.transform(X)
Y = P.dot(dict_mf.components_)
@@ -138,12 +139,11 @@ def test_dict_mf_reconstruction_sparse_dict(solver):
rng = check_random_state(0)
dict_init = Q + rng.randn(*Q.shape) * 0.2
dict_mf = DictFact(n_components=4, code_alpha=1e-2, n_epochs=2,
code_l1_ratio=0,
comp_l1_ratio=1,
dict_init=dict_init,
G_agg=solver_dict[solver]['G_agg'],
Dx_agg=solver_dict[solver]['Dx_agg'],
random_state=rng_global)
random_state=rng_global,
dict_structure_params=dict(l1_ratio=1.))
dict_mf.fit(X)
Q_rec = dict_mf.components_
Q_rec /= np.sqrt(np.sum(Q_rec ** 2, axis=1))[:, np.newaxis]
5 changes: 3 additions & 2 deletions modl/decomposition/tests/test_fmri.py
Original file line number Diff line number Diff line change
@@ -92,7 +92,8 @@ def test_dict_fact(method, memory):
dict_init=init,
method=method,
reduction=2,
smoothing_fwhm=0., n_epochs=2, alpha=1)
smoothing_fwhm=0., n_epochs=2, alpha=1,
dict_structure_params=dict(l1_ratio=1.))
dict_fact.fit(data)
maps = np.rollaxis(dict_fact.components_img_.get_data(), 3, 0)
components = np.rollaxis(components.get_data(), 3, 0)
@@ -137,4 +138,4 @@ def test_score():
pass

def test_transform():
pass
pass
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -7,4 +7,4 @@ nibabel>=0.2.1
nilearn>=0.2.6
scikit_learn>=0.18
scikit-image>=0.12.3
pandas>=0.18
pandas>=0.18

0 comments on commit 0ccd042

Please sign in to comment.