Skip to content

Commit

Permalink
ENH: implemented social sparsity shrinkage
Browse files Browse the repository at this point in the history
  • Loading branch information
dohmatob committed May 30, 2017
1 parent 944f705 commit c2cdd6d
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 108 deletions.
14 changes: 11 additions & 3 deletions modl/datasets/fmri.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import OrderedDict
import json
from os.path import join
from os.path import join, sep

from nilearn import datasets
from nilearn.datasets import fetch_atlas_smith_2009
Expand Down Expand Up @@ -27,15 +28,22 @@ def load_rest_func(data_dir=None, dataset='adhd', n_subjects=40,
except IOError: # FileNotFoundError
raise ValueError(
'Please unmask the data using hcp_prepare.py first.')
data = sorted(list(mapping.values()))
data = mapping.values()
data_ = OrderedDict()
for masked in data:
subject_id = masked.split(sep)[-5]
if subject_id not in data_:
data_[subject_id] = []
data_[subject_id].append(masked)
data = data_.values()
else:
hcp_dataset = fetch_hcp_rest(data_dir=data_dir,
n_subjects=n_subjects)
mask = hcp_dataset.mask
# list of 4D nifti files for each subject
data = hcp_dataset.func
# Flatten it
data = [(record for record in subject) for subject in data]
data = [[record for record in subject] for subject in data]
else:
raise NotImplementedError
train_data, test_data = train_test_split(data,
Expand Down
4 changes: 2 additions & 2 deletions modl/datasets/hcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500):
if head == "HCP":
subject_pattern = "*/*"
elif head == "HCP900":
subject_pattern == "*"
subject_pattern = "*"
else:
raise ValueError
list_dir = sorted(glob.glob(join(source_dir, subject_pattern,
Expand Down Expand Up @@ -321,7 +321,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500):
func.append(subject_func)

results = {'func': func, 'meta': meta,
'mask': mask,
'mask': mask_img,
'description': "'Human connectome project"}
return Bunch(**results)

Expand Down
80 changes: 51 additions & 29 deletions modl/dict_fact.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(self,
max_iter=100,
rand_size=True,
replacement=True,
bcd_n_iter=1,
atomic_prox=None,
mask=None
):
Expand Down Expand Up @@ -187,6 +188,7 @@ def __init__(self,
self.rand_size = rand_size
self.replacement = replacement

self.bcd_n_iter = bcd_n_iter
self.atomic_prox = atomic_prox
self.mask = mask

Expand Down Expand Up @@ -424,7 +426,7 @@ def transform(self, X):
self.code_l1_ratio, self.code_alpha, self.code_pos,
self.tol, self.max_iter)
res = self.pool_.map(par_func, batches)
_ = list(res)
list(res)
else:
_enet_regression_single_gram(
G, Dx, X, code, sample_indices,
Expand Down Expand Up @@ -599,7 +601,7 @@ def _compute_code(self, X, sample_indices,
self.code_l1_ratio, self.code_alpha, self.code_pos,
self.tol, self.max_iter)

def _update_dict(self, subset):
def _update_dict(self, subset=None):
"""Dictionary update part
Parameters
Expand All @@ -612,41 +614,61 @@ def _update_dict(self, subset):
self.components_))
len_subset = subset.shape[0]
n_components, n_features = self.components_.shape
components_subset = self.components_[:, subset]
atom_temp = np.zeros_like(components_subset[0])
components_subset = self.components_
gradient_subset = self.gradient_[:, subset]
if subset is not None:
components_subset = components_subset[:, subset]
gradient_subset = gradient_subset[:, subset]
atom_temp = np.zeros_like(components_subset[0])

if self.G_agg == 'full' and len_subset < n_features / 2.:
self.G_ -= components_subset.dot(components_subset.T)

gradient_subset -= self.C_.dot(components_subset)

for idx, k in enumerate(self.random_state.permutation(n_components)):
gradient_subset = ger(1., self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
if self.C_[k, k] > 1e-20:
components_subset[k] = gradient_subset[k] / self.C_[k, k]
if self.atomic_prox is not None:
components_subset[k] = _atomic_prox(
components_subset[k], self.C_[k, k], mask=self.mask,
which=self.atomic_prox, alpha=self.comp_alpha,
l1_ratio=self.comp_l1_ratio, verbose=self.verbose,
counter=self.n_iter_, idx=idx)
delta_dict = np.inf
for bcd_iter in range(self.bcd_n_iter):
if self.verbose:
print("[BCD] iter %02i/%02i" % (bcd_iter + 1, self.bcd_n_iter))
print("\t ||D - D_old||_F / ||D_old||_F = %g" % delta_dict)
old_dict = components_subset.copy()
order = self.random_state.permutation(n_components)
for idx, k in enumerate(order):
gradient_subset = ger(1., self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
if self.C_[k, k] > 1e-20:
components_subset[k] = gradient_subset[k] / self.C_[k, k]
if self.atomic_prox is not None:
components_subset[k] = _atomic_prox(
components_subset[k], self.C_[k, k],
mask=self.mask, which=self.atomic_prox,
alpha=self.comp_alpha, l1_ratio=self.comp_l1_ratio,
counter=self.n_iter_, verbose=self.verbose,
idx=idx)
else:
components_subset[k] = enet_projection(
components_subset[k], atom_temp, 1.,
self.comp_l1_ratio)
components_subset[k] = atom_temp
gradient_subset = ger(
-1., self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
else:
components_subset[k] = enet_projection(
components_subset[k], atom_temp, 1.,
self.comp_l1_ratio)
gradient_subset = ger(-1., self.C_[k], components_subset[k],
a=gradient_subset, overwrite_a=True)
else:
if self.verbose == 1:
sys.stdout.write("+")
sys.stdout.flush()
elif self.verbose:
print("Adding new random atom")
components_subset[k] = self.random_state.randn(n_features)

self.components_[:, subset] = components_subset
if self.verbose == 1:
sys.stdout.write("+")
sys.stdout.flush()
elif self.verbose:
print("Adding new random atom")
components_subset[k] = self.random_state.randn(n_features)

delta_dict = np.sum((components_subset - old_dict) ** 2)
delta_dict /= np.sum(old_dict ** 2)
delta_dict = np.sqrt(delta_dict)

if subset is None:
self.components_ = components_subset
else:
self.components_[:, subset] = components_subset

if self.G_agg == 'full':
if len_subset < n_features / 2.:
Expand Down
75 changes: 46 additions & 29 deletions modl/fmri.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def __init__(self,
buffer_size=None,
n_jobs=1, verbose=0,
callback=None,
bcd_n_iter=1,
atomic_prox=None):
BaseDecomposition.__init__(self, n_components=n_components,
random_state=random_state,
Expand All @@ -188,6 +189,7 @@ def __init__(self,
self.n_epochs = n_epochs
self.batch_size = batch_size
self.reduction = reduction
self.bcd_n_iter = bcd_n_iter

self.method = method

Expand All @@ -198,24 +200,7 @@ def __init__(self,

self.atomic_prox = atomic_prox

def fit(self, imgs=None, y=None, confounds=None, raw=False):
"""Compute the mask and the dictionary maps across subjects
Parameters
----------
imgs: list of Niimg-like objects
See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
Data on which PCA must be calculated. If this is a list,
the affine is considered the same for all.
confounds: CSV file path or 2D matrix
This parameter is passed to nilearn.signal.clean. Please see the
related documentation for details
Returns
-------
self
"""
def _prepare(self, imgs=None, y=None, confounds=None, raw=False):
if imgs is None or self.n_epochs == 0:
# Will raise error is mask has not been provided
if self.mask is None:
Expand Down Expand Up @@ -260,6 +245,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
random_state=self.random_state,
n_threads=self.n_jobs,
verbose=0,
bcd_n_iter=self.bcd_n_iter,
atomic_prox=self.atomic_prox, mask=mask)
self.dict_fact_.prepare(n_samples=n_samples,
n_features=n_voxels,
Expand All @@ -286,8 +272,6 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
G_agg = method['G_agg']
Dx_agg = method['Dx_agg']

n_records = len(imgs)

if self.verbose:
print("Preloading data")

Expand Down Expand Up @@ -315,7 +299,12 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
n_samples = indices_list[-1] + 1

n_voxels = np.sum(check_niimg(self.masker_.mask_img_).get_data() != 0)
if self.dict_init is not None:
if hasattr(self, "components_") and self.components_ is not None:
dict_init = self.components_
if dict_init is None:
n_components = self.n_com
n_components = len(self.components_)
elif self.dict_init is not None:
if self.verbose:
print("Preloading initial dictionary")
if isinstance(self.dict_init, _basestring) and \
Expand All @@ -342,6 +331,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
elif self.mask_img is not None:
mask = self.mask_img_.get_data().astype(np.bool)

self.components_ = dict_init
self.dict_fact_ = DictFact(n_components=n_components,
code_alpha=self.alpha,
code_l1_ratio=self.code_l1_ratio,
Expand All @@ -355,11 +345,34 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
random_state=self.random_state,
n_threads=self.n_jobs,
verbose=self.verbose,
bcd_n_iter=self.bcd_n_iter,
atomic_prox=self.atomic_prox,
mask=mask)
self.dict_fact_.prepare(n_samples=n_samples, n_features=n_voxels,
X=dict_init, dtype=dtype)
return data_list, indices_list, shelving

def fit(self, imgs=None, y=None, confounds=None, raw=False):
"""Compute the mask and the dictionary maps across subjects
Parameters
----------
imgs: list of Niimg-like objects
See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
Data on which PCA must be calculated. If this is a list,
the affine is considered the same for all.
confounds: CSV file path or 2D matrix
This parameter is passed to nilearn.signal.clean. Please see the
related documentation for details
Returns
-------
self
"""
data_list, indices_list, shelving = self._prepare(
imgs=imgs, y=y, confounds=confounds, raw=raw)
n_records = len(imgs)
current_n_records = 0
for i in range(self.n_epochs):
if self.verbose:
Expand All @@ -372,10 +385,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
data = data[::self.reduction]
sample_indices = np.arange(indices_list[record],
indices_list[record + 1])
if raw:
if isinstance(img, basestring):
data = np.load(data, mmap_mode="r")
else:
if not raw:
if shelving:
data = data.get()
else:
Expand Down Expand Up @@ -443,7 +453,7 @@ def score(self, imgs, confounds=None):
score /= len(data_list)
return score

def transform(self, imgs, confounds=None):
def transform(self, imgs, confounds=None, raw=False):
"""Compute the mask and the ICA maps across subjects
Parameters
Expand All @@ -464,17 +474,24 @@ def transform(self, imgs, confounds=None):
"""
if not isinstance(imgs, (list, tuple)):
imgs = [imgs]
raw = isinstance(imgs[0], np.ndarray)

shelving = self.masker_._shelving
raw = raw or isinstance(imgs[0], np.ndarray)

if hasattr(self, "masker_"):
shelving = self.masker_._shelving
else:
shelving = False
codes = []
if raw:
data_list = imgs
else:
if confounds is None:
confounds = itertools.repeat(None)
data_list = self.masker_.transform(imgs, confounds) # shelved

# XXX using components which are not flipped produces garbage
# predictions!
self.dict_fact_.components_ = self.components_

for idx, data in enumerate(data_list):
if raw:
if isinstance(data, _basestring):
Expand Down
Loading

0 comments on commit c2cdd6d

Please sign in to comment.