From c2cdd6d3ae32f805c156d41190ac38f486ffd4a6 Mon Sep 17 00:00:00 2001 From: DOHMATOB Elvis Date: Fri, 19 May 2017 17:52:47 +0200 Subject: [PATCH] ENH: implemented social sparsity shrinkage --- modl/datasets/fmri.py | 14 ++- modl/datasets/hcp.py | 4 +- modl/dict_fact.py | 80 ++++++++++------ modl/fmri.py | 75 +++++++++------ modl/social_sparsity.py | 168 ++++++++++++++++++++++++--------- modl/structured_dict_update.py | 2 + 6 files changed, 235 insertions(+), 108 deletions(-) diff --git a/modl/datasets/fmri.py b/modl/datasets/fmri.py index c82d2bd..00ced12 100644 --- a/modl/datasets/fmri.py +++ b/modl/datasets/fmri.py @@ -1,5 +1,6 @@ +from collections import OrderedDict import json -from os.path import join +from os.path import join, sep from nilearn import datasets from nilearn.datasets import fetch_atlas_smith_2009 @@ -27,7 +28,14 @@ def load_rest_func(data_dir=None, dataset='adhd', n_subjects=40, except IOError: # FileNotFoundError raise ValueError( 'Please unmask the data using hcp_prepare.py first.') - data = sorted(list(mapping.values())) + data = mapping.values() + data_ = OrderedDict() + for masked in data: + subject_id = masked.split(sep)[-5] + if subject_id not in data_: + data_[subject_id] = [] + data_[subject_id].append(masked) + data = data_.values() else: hcp_dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=n_subjects) @@ -35,7 +43,7 @@ def load_rest_func(data_dir=None, dataset='adhd', n_subjects=40, # list of 4D nifti files for each subject data = hcp_dataset.func # Flatten it - data = [(record for record in subject) for subject in data] + data = [[record for record in subject] for subject in data] else: raise NotImplementedError train_data, test_data = train_test_split(data, diff --git a/modl/datasets/hcp.py b/modl/datasets/hcp.py index f9737e5..2cf704d 100644 --- a/modl/datasets/hcp.py +++ b/modl/datasets/hcp.py @@ -285,7 +285,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500): if head == "HCP": subject_pattern = "*/*" elif head == "HCP900": - subject_pattern == "*" + subject_pattern = "*" else: raise ValueError list_dir = sorted(glob.glob(join(source_dir, subject_pattern, @@ -321,7 +321,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500): func.append(subject_func) results = {'func': func, 'meta': meta, - 'mask': mask, + 'mask': mask_img, 'description': "'Human connectome project"} return Bunch(**results) diff --git a/modl/dict_fact.py b/modl/dict_fact.py index acebae8..ade48de 100644 --- a/modl/dict_fact.py +++ b/modl/dict_fact.py @@ -55,6 +55,7 @@ def __init__(self, max_iter=100, rand_size=True, replacement=True, + bcd_n_iter=1, atomic_prox=None, mask=None ): @@ -187,6 +188,7 @@ def __init__(self, self.rand_size = rand_size self.replacement = replacement + self.bcd_n_iter = bcd_n_iter self.atomic_prox = atomic_prox self.mask = mask @@ -424,7 +426,7 @@ def transform(self, X): self.code_l1_ratio, self.code_alpha, self.code_pos, self.tol, self.max_iter) res = self.pool_.map(par_func, batches) - _ = list(res) + list(res) else: _enet_regression_single_gram( G, Dx, X, code, sample_indices, @@ -599,7 +601,7 @@ def _compute_code(self, X, sample_indices, self.code_l1_ratio, self.code_alpha, self.code_pos, self.tol, self.max_iter) - def _update_dict(self, subset): + def _update_dict(self, subset=None): """Dictionary update part Parameters @@ -612,41 +614,61 @@ def _update_dict(self, subset): self.components_)) len_subset = subset.shape[0] n_components, n_features = self.components_.shape - components_subset = self.components_[:, subset] - atom_temp = np.zeros_like(components_subset[0]) + components_subset = self.components_ gradient_subset = self.gradient_[:, subset] + if subset is not None: + components_subset = components_subset[:, subset] + gradient_subset = gradient_subset[:, subset] + atom_temp = np.zeros_like(components_subset[0]) if self.G_agg == 'full' and len_subset < n_features / 2.: self.G_ -= components_subset.dot(components_subset.T) gradient_subset -= self.C_.dot(components_subset) - for idx, k in enumerate(self.random_state.permutation(n_components)): - gradient_subset = ger(1., self.C_[k], components_subset[k], - a=gradient_subset, overwrite_a=True) - if self.C_[k, k] > 1e-20: - components_subset[k] = gradient_subset[k] / self.C_[k, k] - if self.atomic_prox is not None: - components_subset[k] = _atomic_prox( - components_subset[k], self.C_[k, k], mask=self.mask, - which=self.atomic_prox, alpha=self.comp_alpha, - l1_ratio=self.comp_l1_ratio, verbose=self.verbose, - counter=self.n_iter_, idx=idx) + delta_dict = np.inf + for bcd_iter in range(self.bcd_n_iter): + if self.verbose: + print("[BCD] iter %02i/%02i" % (bcd_iter + 1, self.bcd_n_iter)) + print("\t ||D - D_old||_F / ||D_old||_F = %g" % delta_dict) + old_dict = components_subset.copy() + order = self.random_state.permutation(n_components) + for idx, k in enumerate(order): + gradient_subset = ger(1., self.C_[k], components_subset[k], + a=gradient_subset, overwrite_a=True) + if self.C_[k, k] > 1e-20: + components_subset[k] = gradient_subset[k] / self.C_[k, k] + if self.atomic_prox is not None: + components_subset[k] = _atomic_prox( + components_subset[k], self.C_[k, k], + mask=self.mask, which=self.atomic_prox, + alpha=self.comp_alpha, l1_ratio=self.comp_l1_ratio, + counter=self.n_iter_, verbose=self.verbose, + idx=idx) + else: + components_subset[k] = enet_projection( + components_subset[k], atom_temp, 1., + self.comp_l1_ratio) + components_subset[k] = atom_temp + gradient_subset = ger( + -1., self.C_[k], components_subset[k], + a=gradient_subset, overwrite_a=True) else: - components_subset[k] = enet_projection( - components_subset[k], atom_temp, 1., - self.comp_l1_ratio) - gradient_subset = ger(-1., self.C_[k], components_subset[k], - a=gradient_subset, overwrite_a=True) - else: - if self.verbose == 1: - sys.stdout.write("+") - sys.stdout.flush() - elif self.verbose: - print("Adding new random atom") - components_subset[k] = self.random_state.randn(n_features) - - self.components_[:, subset] = components_subset + if self.verbose == 1: + sys.stdout.write("+") + sys.stdout.flush() + elif self.verbose: + print("Adding new random atom") + components_subset[k] = self.random_state.randn(n_features) + + delta_dict = np.sum((components_subset - old_dict) ** 2) + delta_dict /= np.sum(old_dict ** 2) + delta_dict = np.sqrt(delta_dict) + + if subset is None: + self.components_ = components_subset + else: + self.components_[:, subset] = components_subset if self.G_agg == 'full': if len_subset < n_features / 2.: diff --git a/modl/fmri.py b/modl/fmri.py index 838a2bc..cc6f731 100644 --- a/modl/fmri.py +++ b/modl/fmri.py @@ -163,6 +163,7 @@ def __init__(self, buffer_size=None, n_jobs=1, verbose=0, callback=None, + bcd_n_iter=1, atomic_prox=None): BaseDecomposition.__init__(self, n_components=n_components, random_state=random_state, @@ -188,6 +189,7 @@ def __init__(self, self.n_epochs = n_epochs self.batch_size = batch_size self.reduction = reduction + self.bcd_n_iter = bcd_n_iter self.method = method @@ -198,24 +200,7 @@ def __init__(self, self.atomic_prox = atomic_prox - def fit(self, imgs=None, y=None, confounds=None, raw=False): - """Compute the mask and the dictionary maps across subjects - - Parameters - ---------- - imgs: list of Niimg-like objects - See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. - Data on which PCA must be calculated. If this is a list, - the affine is considered the same for all. - - confounds: CSV file path or 2D matrix - This parameter is passed to nilearn.signal.clean. Please see the - related documentation for details - - Returns - ------- - self - """ + def _prepare(self, imgs=None, y=None, confounds=None, raw=False): if imgs is None or self.n_epochs == 0: # Will raise error is mask has not been provided if self.mask is None: @@ -260,6 +245,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): random_state=self.random_state, n_threads=self.n_jobs, verbose=0, + bcd_n_iter=self.bcd_n_iter, atomic_prox=self.atomic_prox, mask=mask) self.dict_fact_.prepare(n_samples=n_samples, n_features=n_voxels, @@ -286,8 +272,6 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): G_agg = method['G_agg'] Dx_agg = method['Dx_agg'] - n_records = len(imgs) - if self.verbose: print("Preloading data") @@ -315,7 +299,12 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): n_samples = indices_list[-1] + 1 n_voxels = np.sum(check_niimg(self.masker_.mask_img_).get_data() != 0) - if self.dict_init is not None: + if hasattr(self, "components_") and self.components_ is not None: + dict_init = self.components_ + if dict_init is None: + n_components = self.n_com + n_components = len(self.components_) + elif self.dict_init is not None: if self.verbose: print("Preloading initial dictionary") if isinstance(self.dict_init, _basestring) and \ @@ -342,6 +331,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): elif self.mask_img is not None: mask = self.mask_img_.get_data().astype(np.bool) + self.components_ = dict_init self.dict_fact_ = DictFact(n_components=n_components, code_alpha=self.alpha, code_l1_ratio=self.code_l1_ratio, @@ -355,11 +345,34 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): random_state=self.random_state, n_threads=self.n_jobs, verbose=self.verbose, + bcd_n_iter=self.bcd_n_iter, atomic_prox=self.atomic_prox, mask=mask) self.dict_fact_.prepare(n_samples=n_samples, n_features=n_voxels, X=dict_init, dtype=dtype) + return data_list, indices_list, shelving + + def fit(self, imgs=None, y=None, confounds=None, raw=False): + """Compute the mask and the dictionary maps across subjects + Parameters + ---------- + imgs: list of Niimg-like objects + See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. + Data on which PCA must be calculated. If this is a list, + the affine is considered the same for all. + + confounds: CSV file path or 2D matrix + This parameter is passed to nilearn.signal.clean. Please see the + related documentation for details + + Returns + ------- + self + """ + data_list, indices_list, shelving = self._prepare( + imgs=imgs, y=y, confounds=confounds, raw=raw) + n_records = len(imgs) current_n_records = 0 for i in range(self.n_epochs): if self.verbose: @@ -372,10 +385,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False): data = data[::self.reduction] sample_indices = np.arange(indices_list[record], indices_list[record + 1]) - if raw: - if isinstance(img, basestring): - data = np.load(data, mmap_mode="r") - else: + if not raw: if shelving: data = data.get() else: @@ -443,7 +453,7 @@ def score(self, imgs, confounds=None): score /= len(data_list) return score - def transform(self, imgs, confounds=None): + def transform(self, imgs, confounds=None, raw=False): """Compute the mask and the ICA maps across subjects Parameters @@ -464,10 +474,12 @@ def transform(self, imgs, confounds=None): """ if not isinstance(imgs, (list, tuple)): imgs = [imgs] - raw = isinstance(imgs[0], np.ndarray) - - shelving = self.masker_._shelving + raw = raw or isinstance(imgs[0], np.ndarray) + if hasattr(self, "masker_"): + shelving = self.masker_._shelving + else: + shelving = False codes = [] if raw: data_list = imgs @@ -475,6 +487,11 @@ def transform(self, imgs, confounds=None): if confounds is None: confounds = itertools.repeat(None) data_list = self.masker_.transform(imgs, confounds) # shelved + + # XXX using components which are not flipped produces garbage + # predictions! + self.dict_fact_.components_ = self.components_ + for idx, data in enumerate(data_list): if raw: if isinstance(data, _basestring): diff --git a/modl/social_sparsity.py b/modl/social_sparsity.py index f93a225..3d97202 100644 --- a/modl/social_sparsity.py +++ b/modl/social_sparsity.py @@ -6,8 +6,10 @@ # License: simplified BSD from math import sqrt +from numbers import Number import numpy as np +from scipy import ndimage from nilearn.decoding.fista import _check_lipschitz_continuous from nilearn.decoding.objective_functions import (_unmask, _logistic_loss_grad, @@ -127,12 +129,11 @@ def fista(f1_grad, f2_prox, lipschitz_constant, w_size, z = w + ((t0 - 1.) / t) * (w - w_old) w_old[:] = w - init = dict(w=w.copy(), z=z, t=t, stepsize=stepsize) return w, history, init -def _neighboorhood_norm(img, side_weights=.7): +def _neighboorhood_norms_squared(img, side_weights=.7): " Return the squared norm averaged on 3D 6-neighboorhood" # Our stride tricks only work on C-contiguous arrays @@ -159,56 +160,135 @@ def _neighboorhood_norm(img, side_weights=.7): # Now the edges (only 3% of the time is spent below for large arrays) # For 5 neighboors (6 faces of a cube) factor = (1 + 5 * side_weights) - grp_norms[ 0, 1:-1, 1:-1] /= factor - grp_norms[ -1, 1:-1, 1:-1] /= factor - grp_norms[1:-1, 0, 1:-1] /= factor - grp_norms[1:-1, -1, 1:-1] /= factor - grp_norms[1:-1, 1:-1, 0] /= factor - grp_norms[1:-1, 1:-1, -1] /= factor + grp_norms[0, 1:-1, 1:-1] /= factor + grp_norms[-1, 1:-1, 1:-1] /= factor + grp_norms[1:-1, 0, 1:-1] /= factor + grp_norms[1:-1, -1, 1:-1] /= factor + grp_norms[1:-1, 1:-1, 0] /= factor + grp_norms[1:-1, 1:-1, -1] /= factor + # For 4 neighboors (12 edges of a cube) factor = (1 + 4 * side_weights) - grp_norms[ 0, 0, 1:-1] /= factor - grp_norms[ 0, -1, 1:-1] /= factor - grp_norms[ -1, 0, 1:-1] /= factor - grp_norms[ -1, -1, 1:-1] /= factor - grp_norms[1:-1, 0, 0] /= factor - grp_norms[1:-1, 0, -1] /= factor - grp_norms[1:-1, -1, 0] /= factor - grp_norms[1:-1, -1, -1] /= factor - grp_norms[ 0, 1:-1, 0] /= factor - grp_norms[ 0, 1:-1, -1] /= factor - grp_norms[ -1, 1:-1, 0] /= factor - grp_norms[ -1, 1:-1, -1] /= factor + grp_norms[0, 0, 1:-1] /= factor + grp_norms[0, -1, 1:-1] /= factor + grp_norms[-1, 0, 1:-1] /= factor + grp_norms[-1, -1, 1:-1] /= factor + grp_norms[1:-1, 0, 0] /= factor + grp_norms[1:-1, 0, -1] /= factor + grp_norms[1:-1, -1, 0] /= factor + grp_norms[1:-1, -1, -1] /= factor + grp_norms[0, 1:-1, 0] /= factor + grp_norms[0, 1:-1, -1] /= factor + grp_norms[-1, 1:-1, 0] /= factor + grp_norms[-1, 1:-1, -1] /= factor + # For 3 neighboors (8 vertices of a cube) factor = (1 + 3 * side_weights) - grp_norms[ 0, 0, 0] /= factor - grp_norms[ 0, -1, 0] /= factor - grp_norms[-1, 0, 0] /= factor - grp_norms[-1, -1, 0] /= factor - grp_norms[ 0, 0, -1] /= factor - grp_norms[ 0, -1, -1] /= factor - grp_norms[-1, 0, -1] /= factor + grp_norms[0, 0, 0] /= factor + grp_norms[0, -1, 0] /= factor + grp_norms[-1, 0, 0] /= factor + grp_norms[-1, -1, 0] /= factor + grp_norms[0, 0, -1] /= factor + grp_norms[0, -1, -1] /= factor + grp_norms[-1, 0, -1] /= factor grp_norms[-1, -1, -1] /= factor return grp_norms -def _prox_social_sparsity(img, alpha, side_weights=.7): - """Social sparsity on 3x3x3 groups, as defined by eq 4 of Kowalski et - al, 'Social Sparsity...'""" - grp_norms = _neighboorhood_norm(img, side_weights=side_weights) +def _prox_l21(img, alpha, grp_norms_squared=None): # To avoid side effects, assign the raw img values on the side - grp_norms = np.sqrt(grp_norms, out=grp_norms) + if grp_norms_squared is None: + grp_norms_squared = img ** 2 + grp_norms = np.sqrt(grp_norms_squared, out=grp_norms_squared) shrink = np.zeros(img.shape) img_nz = (grp_norms > 1e-10) shrink[img_nz] = (1 - alpha / grp_norms[img_nz]).clip(0) - return img * shrink +def _prox_social_sparsity(img, alpha, side_weights=.7, use_scipy=False, + radius=1): + """Social sparsity on radius x radius x radius groups, as defined + by eq 4 of Kowalski et al, 'Social Sparsity...' + + Parameters + ---------- + side_weight: nonnegative float, optional (default 0.7) + Weights of sides of neigborhood relative to center. A value of 1 + corresponds to the classical overlapping group-Lasso shrinkage + operator. + + radius: int, optional (default 1) + Radius of neighbours. This is a field-of-view (FOV) parameter and + plays a rule similar to the fwhm in Gaussian kernels. The larger + the radius, the smoother the prox. + + use_scipy: boolean, optional (default False) + If True, then we'll use scipy's ndimage.filters.convolve + for the smoothing with an appropriate rectangular local kernel. + + Notes + ----- + For the moment, only the use_scipy=True option supports the radius + parameter. + """ + if use_scipy: + if isinstance(radius, Number): + radius = (radius,) * img.ndim + radius = np.asanyarray(radius) + diameter = 2 * radius + 1 + grp_norms_squared = img ** 2 + if side_weights == 1.: + # use scipy's optimized rectangular uniform filter + ndimage.uniform_filter(grp_norms_squared, size=diameter, + output=grp_norms_squared, + mode="constant") + else: + social_filter = np.full(diameter, 1.) + social_filter *= side_weights + + # adjust weight at center of filter + if img.ndim == 1: + social_filter[radius] = 1. + elif img.ndim == 2: + social_filter[radius, radius] = 1. + elif img.ndim == 3: + social_filter[radius, radius, radius] = 1. + else: + raise RuntimeError("WTF! img.ndim is %i." % img.ndim) + + # normalize filter weights to sum to 1 + social_filter /= social_filter.sum() + + # the actual convolution + ndimage.filters.convolve(grp_norms_squared, social_filter, + output=grp_norms_squared, mode="constant") + else: + # use ninja code from @gael + grp_norms_squared = _neighboorhood_norms_squared( + img, side_weights=side_weights) + return _prox_l21(img, alpha, grp_norms_squared=grp_norms_squared) + + +def _social_sparsity_alpha_grid(XTy, mask, side_weights=.7, n_alphas=10, + eps=1e-3): + imgs = np.zeros((len(XTy),) + mask.shape, dtype=XTy.dtype) + imgs[:, mask] = XTy + grp_norms_squared = [_neighboorhood_norms_squared( + img, side_weights=side_weights) for img in imgs] + alpha_max = np.sqrt(np.max(grp_norms_squared)) + + if n_alphas == 1: + return np.array([alpha_max]) + alpha_min = alpha_max * eps + return np.logspace(np.log10(alpha_min), np.log10(alpha_max), + num=n_alphas)[::-1] + + def social_solver(X, y, alpha, mask, loss=None, max_iter=100, - lipschitz_constant=None, init=None, - tol=1e-4, callback=None, verbose=1): + lipschitz_constant=None, init=None, tol=1e-4, + callback=None, verbose=1, **kwargs): """Solver for social-sparsity models. Can handle least squares (mean squared error --a.k.a mse) or logistic @@ -288,13 +368,13 @@ def f1_grad(w): if loss == "mse": def f2_prox(w, stepsize): out = _prox_social_sparsity(_unmask(w, mask), - alpha * stepsize) + alpha * stepsize, **kwargs) return out[mask] else: # Deal with intercept def f2_prox(w, stepsize): out = _prox_social_sparsity(_unmask(w[:-1], mask), - alpha * stepsize) + alpha * stepsize, **kwargs) return np.append(out[mask], w[-1]) # invoke FISTA solver @@ -306,12 +386,10 @@ def f2_prox(w, stepsize): return w, obj, init -def social_solver_with_l1(X, y, alpha, l1_ratio, mask, - loss=None, max_iter=100, - lipschitz_constant=None, init=None, - tol=1e-4, callback=None, verbose=1): +def social_solver_with_l1(X, y, alpha, l1_ratio, mask, loss=None, max_iter=100, + lipschitz_constant=None, init=None, tol=1e-4, + callback=None, verbose=1, **kwargs): # Hack to plug social-sparsity in SpaceNet - return social_solver(X, y, alpha, mask, - loss=loss, max_iter=max_iter, - lipschitz_constant=lipschitz_constant, init=init, - tol=tol, callback=callback, verbose=verbose) + return social_solver(X, y, alpha, mask, loss=loss, max_iter=max_iter, + lipschitz_constant=lipschitz_constant, init=init, + tol=tol, callback=callback, verbose=verbose, **kwargs) diff --git a/modl/structured_dict_update.py b/modl/structured_dict_update.py index 27c549f..7180275 100644 --- a/modl/structured_dict_update.py +++ b/modl/structured_dict_update.py @@ -94,6 +94,8 @@ def _unmask(v): weight = alpha weight /= a_kk weight *= counter + if counter < 1.: + raise ValueError("Looks like a bug!") if which == "classical": from .utils.math.enet import enet_projection atom_temp = np.zeros_like(atom)