Skip to content

Commit

Permalink
adds ignore/force_index args for indexing; closes #378
Browse files Browse the repository at this point in the history
  • Loading branch information
tyarkoni committed Feb 12, 2019
1 parent 4da3e8d commit 0c9c7bf
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 42 deletions.
47 changes: 36 additions & 11 deletions bids/layout/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from copy import deepcopy

from .writing import build_path, write_contents_to_file
from bids.utils import listify
from bids.utils import listify, check_path_matches_patterns
from bids.config import get_option
from bids.external import six

Expand Down Expand Up @@ -324,13 +324,16 @@ class BIDSNode(object):
root (BIDSNode): The node at the root of the tree the current node is
part of.
parent (BIDSNode): The parent of the current node.
force_index (bool): Whether or not to forcibly index every file below
this node, even if it fails standard BIDS validation.
"""

_child_class = None
_child_entity = None
_entities = {}

def __init__(self, path, config, root=None, parent=None):
def __init__(self, path, config, root=None, parent=None,
force_index=False):
self.path = path
self.config = listify(config)
self.root = root
Expand All @@ -340,6 +343,7 @@ def __init__(self, path, config, root=None, parent=None):
self.children = []
self.files = []
self.variables = []
self.force_index = force_index

# Check for additional config file in directory
layout_file = self.layout.config_filename
Expand Down Expand Up @@ -439,13 +443,13 @@ def index(self):
layout_file = self.layout.config_filename
if layout_file in filenames:
filenames.remove(layout_file)

for f in filenames:

abs_fn = os.path.join(self.path, f)

# Skip files that fail validation
if not layout._validate_file(abs_fn):
# Skip files that fail validation, unless forcibly indexing
if not self.force_index and not layout._validate_file(abs_fn):
continue

bf = BIDSFile(abs_fn, self)
Expand Down Expand Up @@ -475,14 +479,34 @@ def index(self):

d = os.path.join(dirpath, d)

# Skip directories that fail validation
if not layout._validate_dir(d):
# Derivative directories must always be added separately and
# passed as their own root, so terminate if passed.
if d.startswith(os.path.join(self.layout.root, 'derivatives')):
continue

# Skip directories that fail validation, unless force_index
# is defined, in which case we have to keep scanning, in the
# event that a file somewhere below the current level matches.
# Unfortunately we probably can't do much better than this
# without a lot of additional work, because the elements of
# .force_index can be SRE_Patterns that match files below in
# unpredictable ways.
if check_path_matches_patterns(d, self.layout.force_index):
self.force_index = True
else:
valid_dir = layout._validate_dir(d)
# Note the difference between self.force_index and
# self.layout.force_index.
if not valid_dir and not self.layout.force_index:
continue

child_class = self._get_child_class(d)
# TODO: filter the config files based on include/exclude rules
child = child_class(d, config_list, root_node, self)
self.children.append(child)
child = child_class(d, config_list, root_node, self,
force_index=self.force_index)

if self.force_index or valid_dir:
self.children.append(child)

# prevent subdirectory traversal
break
Expand Down Expand Up @@ -516,9 +540,10 @@ class BIDSRootNode(BIDSNode):
_child_entity = 'subject'
_child_class = BIDSSubjectNode

def __init__(self, path, config, layout):
def __init__(self, path, config, layout, force_index=False):
self._layout = layout
super(BIDSRootNode, self).__init__(path, config)
super(BIDSRootNode, self).__init__(path, config,
force_index=force_index)

def _setup(self):
self.subjects = {c.label: c for c in self.children if
Expand Down
81 changes: 52 additions & 29 deletions bids/layout/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import copy
import warnings

from bids.utils import listify, natural_sort
from bids.utils import listify, natural_sort, check_path_matches_patterns
from bids.config import get_option
from bids.external import inflect, six
from .core import Config, BIDSFile, BIDSRootNode
Expand Down Expand Up @@ -104,10 +104,6 @@ class BIDSLayout(object):
etc. to be ignored.
index_associated (bool): Argument passed onto the BIDSValidator;
ignored if validate = False.
include (str, list): String or list of strings specifying which of the
directories that are by default excluded from indexing should be
included. The default exclusion list is ['code', 'stimuli',
'sourcedata', 'models'].
absolute_paths (bool): If True, queries always return absolute paths.
If False, queries return relative paths, unless the root argument
was left empty (in which case the root defaults to the file system
Expand All @@ -122,6 +118,19 @@ class BIDSLayout(object):
By default (None), uses 'bids'.
sources (BIDLayout, list): Optional BIDSLayout(s) from which the
current BIDSLayout is derived.
ignore (str, SRE_Pattern, list): Path(s) to exclude from indexing. Each
path is either a string or a SRE_Pattern object (i.e., compiled
regular expression). If a string is passed, it must be either an
absolute path, or be relative to the BIDS project root. If an
SRE_Pattern is passed, the contained regular expression will be
matched against the full (absolute) path of all files and
directories.
force_index (str, SRE_Pattern, list): Path(s) to forcibly index in the
BIDSLayout, even if they would otherwise fail validation. See the
documentation for the ignore argument for input format details.
Note that paths in force_index takes precedence over those in
ignore (i.e., if a file matches both ignore and force_index, it
*will* be indexed).
config_filename (str): Optional name of filename within directories
that contains configuration information.
regex_search (bool): Whether to require exact matching (True) or regex
Expand All @@ -130,13 +139,16 @@ class BIDSLayout(object):
can be overridden in individual .get() requests.
"""

_default_ignore = {"code", "stimuli", "sourcedata", "models",
"derivatives", re.compile(r'^\.')}

def __init__(self, root, validate=True, index_associated=True,
include=None, absolute_paths=True, derivatives=False,
config=None, sources=None,
absolute_paths=True, derivatives=False, config=None,
sources=None, ignore=None, force_index=None,
config_filename='layout_config.json', regex_search=False):

self.root = root
self.validator = BIDSValidator(index_associated=index_associated)
self._validator = BIDSValidator(index_associated=index_associated)
self.validate = validate
self.absolute_paths = absolute_paths
self.derivatives = {}
Expand All @@ -147,20 +159,18 @@ def __init__(self, root, validate=True, index_associated=True,
self.files = {}
self.nodes = []
self.entities = {}
self.ignore = [os.path.realpath(os.path.join(self.root, patt))
if isinstance(patt, six.string_types) else patt
for patt in listify(ignore or [])]
self.force_index = [os.path.realpath(os.path.join(self.root, patt))
if isinstance(patt, six.string_types) else patt
for patt in listify(force_index or [])]

# Do basic BIDS validation on root directory
self._validate_root()

# Determine which subdirectories to exclude from indexing
excludes = {"code", "stimuli", "sourcedata", "models", "derivatives"}
if include is not None:
include = listify(include)
if "derivatives" in include:
raise ValueError("Do not pass 'derivatives' in the include "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
excludes -= set([d.strip(os.path.sep) for d in include])
self._exclude_dirs = list(excludes)
# Initialize the BIDS validator and examine ignore/force_index args
self._setup_file_validator()

# Set up configs
if config is None:
Expand All @@ -180,9 +190,9 @@ def __init__(self, root, validate=True, index_associated=True,
derivatives = os.path.join(root, 'derivatives')
self.add_derivatives(
derivatives, validate=validate,
index_associated=index_associated, include=include,
index_associated=index_associated,
absolute_paths=absolute_paths, derivatives=None, config=None,
sources=self)
sources=self, ignore=ignore, force_index=force_index)

def _validate_root(self):
# Validate root argument and make sure it contains mandatory info
Expand All @@ -208,19 +218,32 @@ def _validate_root(self):
if k not in self.description:
raise ValueError("Mandatory '%s' field missing from "
"dataset_description.json." % k)


def _setup_file_validator(self):
# Derivatives get special handling; they shouldn't be indexed normally
if self.force_index is not None:
for entry in self.force_index:
if (isinstance(entry, six.string_types) and
os.path.normpath(entry).startswith('derivatives')):
msg = ("Do not pass 'derivatives' in the force_index "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
raise ValueError(msg)

def _validate_dir(self, d):
# Validate a directory. Exclude special directories like derivatives/
# and code/ from indexing unless they were explicitly included at
# initialization.
no_root = os.path.relpath(d, self.root).split(os.path.sep)[0]
if no_root in self._exclude_dirs:
if check_path_matches_patterns(d, self.ignore):
return False
return True

def _validate_file(self, f):
# Validate a file. Files are excluded from indexing if validation
# is enabled and fails (i.e., file is not a valid BIDS file).
# Validate a file.

if check_path_matches_patterns(f, self.force_index):
return True

if check_path_matches_patterns(f, self.ignore):
return False

if not self.validate:
return True

Expand All @@ -234,7 +257,7 @@ def _validate_file(self, f):
to_check = os.path.relpath(f, self.root)
to_check = os.path.join(os.path.sep, to_check)

return self.validator.is_bids(to_check)
return self._validator.is_bids(to_check)

def _get_layouts_in_scope(self, scope):
# Determine which BIDSLayouts to search
Expand Down
4 changes: 2 additions & 2 deletions bids/layout/tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def layout_ds005_multi_derivs():
@pytest.fixture(scope='module')
def layout_ds005_models():
data_dir = join(get_test_data_path(), 'ds005')
return BIDSLayout(data_dir, validate=False, include=['models/'])
return BIDSLayout(data_dir, validate=True, force_index=['models'])

@pytest.fixture(scope='module')
def layout_synthetic():
Expand Down Expand Up @@ -191,7 +191,7 @@ def test_bids_json(layout_7t_trt):
assert set(res) == {'1', '2'}


def test_include(layout_ds005, layout_ds005_models):
def test_force_index(layout_ds005, layout_ds005_models):
target= join(layout_ds005_models.root, 'models',
'ds-005_type-test_model.json')
assert target not in layout_ds005.files
Expand Down
13 changes: 13 additions & 0 deletions bids/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import os
import six


def listify(obj):
Expand Down Expand Up @@ -85,3 +86,15 @@ def splitext(path):
# li.append(extensions) if you want extensions in another list inside the list that is returned.
li.extend(extensions)
return li


def check_path_matches_patterns(path, patterns):
''' Check if the path matches at least one of the provided patterns. '''
path = os.path.realpath(path)
for patt in patterns:
if isinstance(patt, six.string_types):
if path == patt:
return True
elif patt.search(path):
return True
return False

0 comments on commit 0c9c7bf

Please sign in to comment.