more BFs

arthurmensch · May 26, 2017 · 23abf39 · 23abf39
1 parent 944f705
commit 23abf39
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 57 deletions.
diff --git a/modl/datasets/fmri.py b/modl/datasets/fmri.py
@@ -1,5 +1,5 @@
 import json
-from os.path import join
+from os.path import join, sep
 
 from nilearn import datasets
 from nilearn.datasets import fetch_atlas_smith_2009
@@ -27,15 +27,22 @@ def load_rest_func(data_dir=None, dataset='adhd', n_subjects=40,
             except IOError:  # FileNotFoundError
                 raise ValueError(
                     'Please unmask the data using hcp_prepare.py first.')
-            data = sorted(list(mapping.values()))
+            data = mapping.values()
+            data_ = {}
+            for masked in data:
+                subject_id = masked.split(sep)[10]
+                if subject_id not in data_:
+                    data_[subject_id] = []
+                data_[subject_id].append(masked)
+            data = data_.values()
         else:
             hcp_dataset = fetch_hcp_rest(data_dir=data_dir,
                                          n_subjects=n_subjects)
             mask = hcp_dataset.mask
             # list of 4D nifti files for each subject
             data = hcp_dataset.func
             # Flatten it
-            data = [(record for record in subject) for subject in data]
+            data = [[record for record in subject] for subject in data]
     else:
         raise NotImplementedError
     train_data, test_data = train_test_split(data,

diff --git a/modl/datasets/hcp.py b/modl/datasets/hcp.py
@@ -285,7 +285,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500):
     if head == "HCP":
         subject_pattern = "*/*"
     elif head == "HCP900":
-        subject_pattern == "*"
+        subject_pattern = "*"
     else:
         raise ValueError
     list_dir = sorted(glob.glob(join(source_dir, subject_pattern,
@@ -321,7 +321,7 @@ def fetch_hcp_rest(data_dir=None, n_subjects=500):
             func.append(subject_func)
 
     results = {'func': func, 'meta': meta,
-               'mask': mask,
+               'mask': mask_img,
                'description': "'Human connectome project"}
     return Bunch(**results)
 

diff --git a/modl/dict_fact.py b/modl/dict_fact.py
@@ -55,6 +55,7 @@ def __init__(self,
                  max_iter=100,
                  rand_size=True,
                  replacement=True,
+                 bcd_n_iter=1,
                  atomic_prox=None,
                  mask=None
                  ):
@@ -187,6 +188,7 @@ def __init__(self,
         self.rand_size = rand_size
         self.replacement = replacement
 
+        self.bcd_n_iter = bcd_n_iter
         self.atomic_prox = atomic_prox
         self.mask = mask
 
@@ -621,30 +623,37 @@ def _update_dict(self, subset):
 
         gradient_subset -= self.C_.dot(components_subset)
 
-        for idx, k in enumerate(self.random_state.permutation(n_components)):
-            gradient_subset = ger(1., self.C_[k], components_subset[k],
-                           a=gradient_subset, overwrite_a=True)
-            if self.C_[k, k] > 1e-20:
-                components_subset[k] = gradient_subset[k] / self.C_[k, k]
-                if self.atomic_prox is not None:
-                    components_subset[k] = _atomic_prox(
-                        components_subset[k], self.C_[k, k], mask=self.mask,
-                        which=self.atomic_prox, alpha=self.comp_alpha,
-                        l1_ratio=self.comp_l1_ratio, verbose=self.verbose,
-                        counter=self.n_iter_, idx=idx)
+        for bcd_iter in range(self.bcd_n_iter):
+            if self.verbose:
+                print("[BCD] iter %02i/%02i" % (bcd_iter + 1, self.bcd_n_iter))
+            order = self.random_state.permutation(n_components)
+            for idx, k in enumerate(order):
+                gradient_subset = ger(1., self.C_[k], components_subset[k],
+                               a=gradient_subset, overwrite_a=True)
+                if self.C_[k, k] > 1e-20:
+                    components_subset[k] = gradient_subset[k] / self.C_[k, k]
+                    if self.atomic_prox is not None:
+                        components_subset[k] = _atomic_prox(
+                            components_subset[k], self.C_[k, k],
+                            mask=self.mask, which=self.atomic_prox,
+                            alpha=self.comp_alpha, l1_ratio=self.comp_l1_ratio,
+                            counter=self.n_iter_, verbose=self.verbose,
+                            idx=idx)
+                    else:
+                        components_subset[k] = enet_projection(
+                            components_subset[k], atom_temp, 1.,
+                            self.comp_l1_ratio)
+                        components_subset[k] = atom_temp
+                    gradient_subset = ger(
+                        -1., self.C_[k], components_subset[k],
+                        a=gradient_subset, overwrite_a=True)
                 else:
-                    components_subset[k] = enet_projection(
-                        components_subset[k], atom_temp, 1.,
-                        self.comp_l1_ratio)
-                gradient_subset = ger(-1., self.C_[k], components_subset[k],
-                                      a=gradient_subset, overwrite_a=True)
-            else:
-                if self.verbose == 1:
-                    sys.stdout.write("+")
-                    sys.stdout.flush()
-                elif self.verbose:
-                    print("Adding new random atom")
-                components_subset[k] = self.random_state.randn(n_features)
+                    if self.verbose == 1:
+                        sys.stdout.write("+")
+                        sys.stdout.flush()
+                    elif self.verbose:
+                        print("Adding new random atom")
+                    components_subset[k] = self.random_state.randn(n_features)
 
         self.components_[:, subset] = components_subset
 

diff --git a/modl/fmri.py b/modl/fmri.py
@@ -163,6 +163,7 @@ def __init__(self,
                  buffer_size=None,
                  n_jobs=1, verbose=0,
                  callback=None,
+                 bcd_n_iter=1,
                  atomic_prox=None):
         BaseDecomposition.__init__(self, n_components=n_components,
                                    random_state=random_state,
@@ -188,6 +189,7 @@ def __init__(self,
         self.n_epochs = n_epochs
         self.batch_size = batch_size
         self.reduction = reduction
+        self.bcd_n_iter = bcd_n_iter
 
         self.method = method
 
@@ -198,24 +200,7 @@ def __init__(self,
 
         self.atomic_prox = atomic_prox
 
-    def fit(self, imgs=None, y=None, confounds=None, raw=False):
-        """Compute the mask and the dictionary maps across subjects
-
-        Parameters
-        ----------
-        imgs: list of Niimg-like objects
-            See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
-            Data on which PCA must be calculated. If this is a list,
-            the affine is considered the same for all.
-
-        confounds: CSV file path or 2D matrix
-            This parameter is passed to nilearn.signal.clean. Please see the
-            related documentation for details
-
-        Returns
-        -------
-        self
-        """
+    def _prepare(self, imgs=None, y=None, confounds=None, raw=False):
         if imgs is None or self.n_epochs == 0:
             # Will raise error is mask has not been provided
             if self.mask is None:
@@ -260,6 +245,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
                     random_state=self.random_state,
                     n_threads=self.n_jobs,
                     verbose=0,
+                    bcd_n_iter=self.bcd_n_iter,
                     atomic_prox=self.atomic_prox, mask=mask)
                 self.dict_fact_.prepare(n_samples=n_samples,
                                         n_features=n_voxels,
@@ -286,8 +272,6 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
         G_agg = method['G_agg']
         Dx_agg = method['Dx_agg']
 
-        n_records = len(imgs)
-
         if self.verbose:
             print("Preloading data")
 
@@ -315,7 +299,12 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
         n_samples = indices_list[-1] + 1
 
         n_voxels = np.sum(check_niimg(self.masker_.mask_img_).get_data() != 0)
-        if self.dict_init is not None:
+        if hasattr(self, "components_") and self.components_ is not None:
+            dict_init = self.components_
+            if dict_init is None:
+                n_components = self.n_com
+            n_components = len(self.components_)
+        elif self.dict_init is not None:
             if self.verbose:
                 print("Preloading initial dictionary")
             if isinstance(self.dict_init, _basestring) and \
@@ -342,6 +331,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
             elif self.mask_img is not None:
                 mask = self.mask_img_.get_data().astype(np.bool)
 
+        self.components_ = dict_init
         self.dict_fact_ = DictFact(n_components=n_components,
                                    code_alpha=self.alpha,
                                    code_l1_ratio=self.code_l1_ratio,
@@ -355,11 +345,34 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
                                    random_state=self.random_state,
                                    n_threads=self.n_jobs,
                                    verbose=self.verbose,
+                                   bcd_n_iter=self.bcd_n_iter,
                                    atomic_prox=self.atomic_prox,
                                    mask=mask)
         self.dict_fact_.prepare(n_samples=n_samples, n_features=n_voxels,
                                 X=dict_init, dtype=dtype)
+        return data_list, indices_list, shelving
+
+    def fit(self, imgs=None, y=None, confounds=None, raw=False):
+        """Compute the mask and the dictionary maps across subjects
 
+        Parameters
+        ----------
+        imgs: list of Niimg-like objects
+            See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
+            Data on which PCA must be calculated. If this is a list,
+            the affine is considered the same for all.
+
+        confounds: CSV file path or 2D matrix
+            This parameter is passed to nilearn.signal.clean. Please see the
+            related documentation for details
+
+        Returns
+        -------
+        self
+        """
+        data_list, indices_list, shelving = self._prepare(
+            imgs=imgs, y=y, confounds=confounds, raw=raw)
+        n_records = len(imgs)
         current_n_records = 0
         for i in range(self.n_epochs):
             if self.verbose:
@@ -372,10 +385,7 @@ def fit(self, imgs=None, y=None, confounds=None, raw=False):
                 data = data[::self.reduction]
                 sample_indices = np.arange(indices_list[record],
                                            indices_list[record + 1])
-                if raw:
-                    if isinstance(img, basestring):
-                        data = np.load(data, mmap_mode="r")
-                else:
+                if not raw:
                     if shelving:
                         data = data.get()
                     else:
@@ -443,7 +453,7 @@ def score(self, imgs, confounds=None):
         score /= len(data_list)
         return score
 
-    def transform(self, imgs, confounds=None):
+    def transform(self, imgs, confounds=None, raw=False):
         """Compute the mask and the ICA maps across subjects
 
         Parameters
@@ -464,10 +474,12 @@ def transform(self, imgs, confounds=None):
         """
         if not isinstance(imgs, (list, tuple)):
             imgs = [imgs]
-        raw = isinstance(imgs[0], np.ndarray)
-
-        shelving = self.masker_._shelving
+        raw = raw or isinstance(imgs[0], np.ndarray)
 
+        if hasattr(self, "masker_"):
+            shelving = self.masker_._shelving
+        else:
+            shelving = False
         codes = []
         if raw:
             data_list = imgs