bids-standard · tyarkoni · Oct 6, 2018 · Aug 24, 2018 · Aug 25, 2018 · Aug 25, 2018
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,35 @@
 # Changelog
 
+## Version 0.7.0 (September, 2018)
+This is a major, API-breaking release. It introduces a large number of new features, bug fixes, and improvements.
+
+API-BREAKING CHANGES:
+* A number of entities (or keywords) have been renamed to align more closely with the BIDS specification documents:
+	* 'type' becomes 'suffix'
+	* 'modality' becomes 'datatype'
+	* 'acq' is removed (use 'acquisition')
+	* 'mod' becomes 'modality'
+* The following directories are no longer indexed by default: derivatives/, code/, stimuli/, models/, sourcedata/. They must be explicitly included using the 'include' initialization argument.
+* The grabbids module has been renamed to layout and BIDSLayout.py and BIDSvalidator.py are now layout.py and validation.py, respectively.
+* The BIDS validator is now enabled by default at layout initialization (i.e., `validate=True`)
+* The `exclude` initialization argument has been removed.
+* `BIDSLayout.parse_entities` utility has been removed (use the more flexible `parse_file_entities`).
+
+NEW FEATURES:
+* File metadata is now searchable (use `BIDSLayout.search_metadata()`)
+* A new BIDSFile wrapper provides easy access to `.metadata` and `.image`
+* HRF convolution is now supported via bundling of nistats' hemodynamic_models module; convolution is handled via the `convolve_HRF` transformation.
+
+MINOR IMPROVEMENTS AND BUG FIXES:
+* Specifying 'derivatives' in a paths specification now automatically includes 'bids' (#246)
+* Variable loading via load_variables can now be done incrementally
+* Expanded and improved path-building via `layout.build_path()`
+* `get_collections` no longer breaks when `merge=True` and the list is empty (#202)
+* Layout initialization no longer fails when `validate=True` (#222)
+* The auto_contrasts field in the modeling tools now complies with the BIDS-Model spec (#234)
+* Fix sum transformation
+* Improved test coverage
+
 ## Version 0.6.5 (August 21, 2018)
 
 * FIX: Do not drop rows of NaNs (#217) @adelavega

diff --git a/bids/_version.py b/bids/_version.py
@@ -495,7 +495,7 @@ def get_versions():
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
+        for i in cfg.versionfile_source.split(os.path.sep):
             root = os.path.dirname(root)
     except NameError:
         return {"version": "0+unknown", "full-revisionid": None,

diff --git a/bids/analysis/analysis.py b/bids/analysis/analysis.py
@@ -51,8 +51,7 @@ def _load_blocks(self, blocks):
             block = Block(self.layout, index=i, **block_args)
             self.blocks.append(block)
 
-    def setup(self, blocks=None, agg_func='mean', auto_contrasts=None,
-              **kwargs):
+    def setup(self, blocks=None, agg_func='mean', **kwargs):
         ''' Set up the sequence of blocks for analysis.
 
         Args:
@@ -69,8 +68,6 @@ def setup(self, blocks=None, agg_func='mean', auto_contrasts=None,
                 name of a function recognized by apply() in pandas, or a
                 Callable that takes a DataFrame as input and returns a Series
                 or a DataFrame. NOTE: CURRENTLY UNIMPLEMENTED.
-            auto_contrasts (bool): If True, a contrast is automatically
-                created for each column in the design matrix.
         '''
 
         # In the beginning, there was nothing
@@ -86,7 +83,7 @@ def setup(self, blocks=None, agg_func='mean', auto_contrasts=None,
             if blocks is not None and i not in blocks and b.name not in blocks:
                 continue
 
-            b.setup(input_nodes, auto_contrasts, **selectors)
+            b.setup(input_nodes, **selectors)
             input_nodes = b.output_nodes
 
 
@@ -108,14 +105,16 @@ class Block(object):
             generated when the model is fit.
         input_nodes (list): Optional list of AnalysisNodes to use as input to
             this Block (typically, the output from the preceding Block).
-        auto_contrasts (bool): If True, a contrast is automatically
-            created for each column in the design matrix. This parameter is
-            over-written by the setting in setup(). Default is True.
+        auto_contrasts (list, bool): Optional list of variable names to create
+            an indicator contrast for. Alternatively, if the boolean value True
+            is passed, a contrast is automatically generated for _all_
+            available variables. This parameter is over-written by the setting
+            in setup() if the latter is passed.
     '''
 
     def __init__(self, layout, level, index, name=None, transformations=None,
                 model=None, contrasts=None, input_nodes=None,
-                auto_contrasts=True):
+                auto_contrasts=False):
 
         self.layout = layout
         self.level = level
@@ -125,8 +124,8 @@ def __init__(self, layout, level, index, name=None, transformations=None,
         self.model = model or None
         self.contrasts = contrasts or []
         self.input_nodes = input_nodes or []
+        self.auto_contrasts = auto_contrasts or []
         self.output_nodes = []
-        self.auto_contrasts = auto_contrasts
 
     def _filter_objects(self, objects, kwargs):
         # Keeps only objects that match target entities, and also removes those
@@ -155,31 +154,26 @@ def _group_objects(self, objects):
     def _concatenate_input_nodes(self, nodes):
         data, entities = [], []
         for n in nodes:
-            contrasts = n.contrasts.data.T
-            row = pd.Series(np.ones(len(contrasts)), index=contrasts.index)
+            contrasts = [c.name for c in n.contrasts]
+            row = pd.Series(np.ones(len(contrasts)), index=contrasts)
             data.append(row)
             entities.append(pd.Series(n.entities))
         data = pd.concat(data, axis=1).T
         entities = pd.concat(entities, axis=1).T
         return BIDSVariableCollection.from_df(data, entities, self.level)
 
-    def setup(self, input_nodes=None, auto_contrasts=None, **kwargs):
+    def setup(self, input_nodes=None, **kwargs):
         ''' Set up the Block and construct the design matrix.
 
         Args:
             input_nodes (list): Optional list of Node objects produced by
                 the preceding Block in the analysis. If None, uses any inputs
                 passed in at Block initialization.
-            auto_contrasts (bool): If True, a contrast is automatically
-                created for each column in the design matrix.
             kwargs: Optional keyword arguments to pass onto load_variables.
         '''
         self.output_nodes = []
         input_nodes = input_nodes or self.input_nodes or []
 
-        if auto_contrasts is not None:
-            self.auto_contrasts = auto_contrasts
-
         # TODO: remove the scan_length argument entirely once we switch tests
         # to use the synthetic dataset with image headers.
         if self.level != 'run':
@@ -192,6 +186,14 @@ def setup(self, input_nodes=None, auto_contrasts=None, **kwargs):
         objects, kwargs = self._filter_objects(objects, kwargs)
         groups = self._group_objects(objects)
 
+        # Set up and validate variable lists
+        model = self.model or {}
+        variables = model.get('variables', [])
+        hrf_variables = model.get('HRF_variables', [])
+        if not set(variables) >= set(hrf_variables):
+            raise ValueError("HRF_variables must be a subset ",
+                                "of variables in BIDS model.")
+
         for grp in groups:
             # Split into separate lists of Collections and Nodes
             input_nodes = [o for o in grp if isinstance(o, AnalysisNode)]
@@ -201,19 +203,11 @@ def setup(self, input_nodes=None, auto_contrasts=None, **kwargs):
                 node_coll = self._concatenate_input_nodes(input_nodes)
                 colls.append(node_coll)
 
-            model = self.model or {}
-
-            variables = set(model.get('variables', []))
-            hrf_variables = set(model.get('HRF_variables', []))
-            if not variables >= hrf_variables:
-                raise ValueError("HRF_variables must be a subset ",
-                                 "of variables in BIDS model.")
-
             coll = merge_collections(colls) if len(colls) > 1 else colls[0]
 
             coll = apply_transformations(coll, self.transformations)
-            if model.get('variables'):
-                transform.select(coll, model['variables'])
+            if variables:
+                transform.select(coll, variables)
 
             node = AnalysisNode(self.level, coll, self.contrasts, input_nodes,
                                 self.auto_contrasts)
@@ -257,32 +251,37 @@ def get_design_matrix(self, names=None, format='long', mode='both',
         return [n.get_design_matrix(names, format, mode=mode, force=force,
                                     **kwargs) for n in nodes]
 
-    def get_contrasts(self, names=None, **kwargs):
+    def get_contrasts(self, names=None, variables=None, **kwargs):
         ''' Return contrast information for the current block.
 
         Args:
             names (list): Optional list of names of contrasts to return. If
                 None (default), all contrasts are returned.
+            variables (bool): Optional list of strings giving the names of
+                design matrix columns to use when generating the matrix of
+                weights.
             kwargs: Optional keyword arguments used to constrain which of the
                 available nodes get returned (e.g., passing subject=['01',
                 '02'] will return contrast  information only for subjects '01'
                 and '02').
 
         Returns:
-            A list of ContrastMatrixInfo namedtuples--one per unit of the
-            current analysis level (e.g., if level='run', each element in the
-            list represents the design matrix for a single run).
+            A list with one element per unit of the current analysis level
+            (e.g., if level='run', each element in the list representing the
+            contrast information for a single run). Each element is a list of
+            ContrastInfo namedtuples (one per contrast).
+
         '''
         nodes, kwargs = self._filter_objects(self.output_nodes, kwargs)
-        return [n.get_contrasts(names) for n in nodes]
+        return [n.get_contrasts(names, variables) for n in nodes]
 
 
 DesignMatrixInfo = namedtuple('DesignMatrixInfo',
                               ('sparse', 'dense', 'entities'))
 
 
-ContrastMatrixInfo = namedtuple('ContrastMatrixInfo', ('data', 'index',
-                                                       'entities'))
+ContrastInfo = namedtuple('ContrastInfo', ('name', 'weights', 'type',
+                                           'entities'))
 
 
 class AnalysisNode(object):
@@ -294,17 +293,21 @@ class AnalysisNode(object):
         collection (BIDSVariableCollection): The BIDSVariableCollection
             containing variables at this Node.
         contrasts (list): A list of contrasts defined in the originating Block.
-        auto_contrasts (bool): If True, a contrast is automatically
-            created for each column in the design matrix.
+        auto_contrasts (list): Optional list of variable names to create
+            an indicator contrast for. Alternatively, if the boolean value True
+            is passed, a contrast is automatically generated for _all_
+            available variables.
     '''
 
     def __init__(self, level, collection, contrasts, input_nodes=None,
-                 auto_contrasts=True):
+                 auto_contrasts=None):
         self.level = level
         self.collection = collection
         self._block_contrasts = contrasts
         self.input_nodes = input_nodes
-        self.auto_contrasts = auto_contrasts
+        if auto_contrasts == True:
+            auto_contrasts = collection.variables.keys()
+        self.auto_contrasts = auto_contrasts or []
         self._contrasts = None
 
     @property
@@ -371,17 +374,24 @@ def get_design_matrix(self, names=None, format='long', mode='both',
 
         return DesignMatrixInfo(sparse_df, dense_df, self.entities)
 
-    def get_contrasts(self, names=None, entities=False):
+    def get_contrasts(self, names=None, variables=None):
         ''' Return contrast information for the current block.
 
         Args:
             names (list): Optional list of names of contrasts to return. If
                 None (default), all contrasts are returned.
-            entities (bool): If True, concatenates entity columns to the
-                returned contrast matrix.
+            variables (bool): Optional list of strings giving the names of
+                design matrix columns to use when generating the matrix of
+                weights.
+
+        Notes:
+            The 'variables' argument take precedence over the natural process
+            of column selection. I.e.,
+                if a variable shows up in a contrast, but isn't named in
+                variables, it will *not* be included in the returned
 
         Returns:
-            A ContrastMatrixInfo namedtuple.
+            A list of ContrastInfo namedtuples, one per contrast.
         '''
 
         # Verify that there are no invalid columns in the condition_lists
@@ -401,62 +411,35 @@ def get_contrasts(self, names=None, entities=False):
             raise ValueError("One or more contrasts have the same name")
         contrast_names = list(set(contrast_names))
 
-        ### Add ability to set different types of identity contrasts
         if self.auto_contrasts:
-            for col_name in self.collection.variables.keys():
-                if col_name not in contrast_names:
+            for col_name in self.auto_contrasts:
+                if (col_name in self.collection.variables.keys()
+                    and col_name not in contrast_names):
                     contrasts.append({
                         'name': col_name,
                         'condition_list': [col_name],
                         'weights': [1],
+                        'type': 'T'
                     })
 
         # Filter on desired contrast names if passed
         if names is not None:
             contrasts = [c for c in contrasts if c['name'] in names]
 
-        # Build a "maximal" contrast matrix that has all possible rows and
-        # columns. Then we'll proceed by knocking out invalid/missing rows and
-        # columns separately for each input node.
-        contrast_defs = [pd.Series(c['weights'], index=c['condition_list'])
-                         for c in contrasts]
-        con_mat = pd.DataFrame(contrast_defs).fillna(0).T
-        con_mat.columns = [c['name'] for c in contrasts]
-
-        # Identify all variable names in the current collection that don't show
-        # up in any of the input nodes. This will include any variables read in
-        # at the current level--e.g., if we're at session level, it might
-        # include run-by-run ratings of mood, etc.
-        inputs = []
-        node_cols = [set(n.contrasts[0].columns) for n in self.input_nodes]
-        all_cols = set(chain(*node_cols))
-        common_vars = set(self.collection.variables.keys()) - all_cols
-
-        # Also track entities for each row in each input node's contrast matrix
-        ent_index = []
-
-        # Loop over input nodes. For each one, get all available columns, and
-        # use that to trim down the cloned maximal contrast matrix.
-        if self.input_nodes:
-            for node in self.input_nodes:
-                cols = list(set(node.contrasts[0].columns) | common_vars)
-                node_mat = con_mat.copy()
-                valid = node_mat.index.isin(cols)
-                node_mat = node_mat.loc[valid, :]
-                inputs.append(node_mat)
-                ent_index.extend([node.entities] * len(node_mat))
-        else:
-            # If there are no input nodes, we're at run level, so just use
-            # the maximal contrast matrix.
-            inputs.append(con_mat)
-
-        contrasts = pd.concat(inputs, axis=0)
-
-        # Drop rows that are all zeros
-        contrasts = contrasts[(contrasts.T != 0.0).any()]
-
-        index = pd.DataFrame.from_records(ent_index)
-        self._contrasts = ContrastMatrixInfo(contrasts, index, self.entities)
+        def setup_contrast(c):
+            weights = np.atleast_2d(c['weights'])
+            weights = pd.DataFrame(weights, columns=c['condition_list'])
+            # If variables were explicitly passed, use them as the columns
+            if variables is not None:
+                var_df = pd.DataFrame(columns=variables)
+                weights = pd.concat([weights, var_df])[variables].fillna(0)
+
+            test_type = c.get('type', ('T' if len(weights) == 1 else 'F'))
+
+            return ContrastInfo(c['name'], weights, test_type, self.entities)
+
+        self._contrasts = [setup_contrast(c) for c in contrasts]
+
         return self._contrasts
 
     def matches_entities(self, entities, strict=False):