Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] : simplify fpath #721

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Enhancements
API and behavior changes
^^^^^^^^^^^^^^^^^^^^^^^^

- ...
- :func:`mne_bids.BIDSPath.fpath` does not infer a full filepath anymore if the ``suffix``, or ``extension`` is missing. It returns the file path defined by the existing entities, by `Adam Li`_ and `Alexander Gramfort`_ (:gh:`721`)

Requirements
^^^^^^^^^^^^
Expand Down
85 changes: 25 additions & 60 deletions mne_bids/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from mne_bids.config import (
ALLOWED_PATH_ENTITIES, ALLOWED_FILENAME_EXTENSIONS,
ALLOWED_FILENAME_SUFFIX, ALLOWED_PATH_ENTITIES_SHORT,
ALLOWED_DATATYPES, SUFFIX_TO_DATATYPE, ALLOWED_DATATYPE_EXTENSIONS,
ALLOWED_DATATYPES, SUFFIX_TO_DATATYPE,
ALLOWED_SPACES,
reader, ENTITY_VALUE_TYPE)
from mne_bids.utils import (_check_key_val, _check_empty_room_basename,
Expand All @@ -38,9 +38,18 @@ def _get_matched_empty_room(bids_path):
from mne_bids import read_raw_bids # avoid circular import.
bids_path = bids_path.copy()

datatype = 'meg' # We're only concerned about MEG data here
bids_fname = bids_path.update(suffix=datatype,
root=bids_root).fpath
# emptyroom is only for MEG
datatype = 'meg'
bids_path.update(suffix=datatype, datatype=datatype)
bids_fname = bids_path.fpath

if not bids_fname.exists():
raise RuntimeError(
f'BIDS path {bids_fname} does not exist, or '
f'is not a fully specified file path. To find '
f'emptyroom recordings, please fully specify '
f'(e.g. suffix and extension).')

_, ext = _parse_ext(bids_fname)
extra_params = None
if ext == '.fif':
Expand Down Expand Up @@ -95,9 +104,12 @@ def _get_matched_empty_room(bids_path):
params = get_entities_from_fname(er_fname)
er_meas_date = None
params.pop('subject') # er subject entity is different
er_bids_path = BIDSPath(subject='emptyroom', **params, datatype='meg',
root=bids_root, check=False)

# er file is assumed to have the same extension as the
# original BIDS path passed in
er_bids_path = BIDSPath(subject='emptyroom', **params, datatype='meg',
root=bids_root, extension=ext,
check=False)
# Try to extract date from filename.
if params['session'] is not None:
try:
Expand Down Expand Up @@ -199,9 +211,10 @@ class BIDSPath(object):
The "data type" of folder being created at the end of the folder
hierarchy. E.g., ``'anat'``, ``'func'``, ``'eeg'``, ``'meg'``,
``'ieeg'``, etc.
root : str | pathlib.Path | None
root : str | pathlib.Path
The root for the filename to be created. E.g., a path to the folder
in which you wish to create a file with this name.
Defaults to ``'.'``, i.e. the current working directory.
check : bool
If ``True``, enforces BIDS conformity. Defaults to ``True``.

Expand Down Expand Up @@ -279,11 +292,11 @@ class BIDSPath(object):

def __init__(self, subject=None, session=None,
task=None, acquisition=None, run=None, processing=None,
recording=None, space=None, split=None, root=None,
recording=None, space=None, split=None, root='.',
suffix=None, extension=None, datatype=None, check=True):
if all(ii is None for ii in [subject, session, task,
acquisition, run, processing,
recording, space, root, suffix,
recording, space, suffix, root,
extension]):
raise ValueError("At least one parameter must be given.")

Expand Down Expand Up @@ -427,6 +440,8 @@ def fpath(self):
# get the inner-most BIDS directory for this file path
data_path = self.directory

_validate_type(self.root, types=['str', 'path-like'])

# account for MEG data that are directory-based
# else, all other file paths attempt to match
if self.suffix == 'meg' and self.extension == '.ds':
Expand All @@ -435,57 +450,7 @@ def fpath(self):
bids_fpath = op.join(data_path,
op.splitext(self.basename)[0])
else:
# if suffix and/or extension is missing, and root is
# not None, then BIDSPath will infer the dataset
# else, return the relative path with the basename
if (self.suffix is None or self.extension is None) and \
self.root is not None:
# get matching BIDS paths inside the bids root
matching_paths = \
_get_matching_bidspaths_from_filesystem(self)

# FIXME This will break
# FIXME e.g. with FIFF data split across multiple files.
# if extension is not specified and no unique file path
# return filepath of the actual dataset for MEG/EEG/iEEG data
if self.suffix is None or self.suffix in ALLOWED_DATATYPES:
# now only use valid datatype extension
valid_exts = sum(ALLOWED_DATATYPE_EXTENSIONS.values(), [])
matching_paths = [p for p in matching_paths
if _parse_ext(p)[1] in valid_exts]

if (self.split is None and
(not matching_paths or
'_split-' in matching_paths[0])):
# try finding FIF split files (only first one)
this_self = self.copy().update(split='01')
matching_paths = \
_get_matching_bidspaths_from_filesystem(this_self)

# found no matching paths
if not matching_paths:
msg = (f'Could not locate a data file of a supported '
f'format. This is likely a problem with your '
f'BIDS dataset. Please run the BIDS validator '
f'on your data. (root={self.root}, '
f'basename={self.basename}). '
f'{matching_paths}')
warn(msg)

bids_fpath = op.join(data_path, self.basename)
# if paths still cannot be resolved, then there is an error
elif len(matching_paths) > 1:
msg = ('Found more than one matching data file for the '
'requested recording. Cannot proceed due to the '
'ambiguity. This is likely a problem with your '
'BIDS dataset. Please run the BIDS validator on '
'your data.')
raise RuntimeError(msg)
else:
bids_fpath = matching_paths[0]

else:
bids_fpath = op.join(data_path, self.basename)
bids_fpath = op.join(data_path, self.basename)

bids_fpath = Path(bids_fpath)
return bids_fpath
Expand Down
16 changes: 9 additions & 7 deletions mne_bids/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#
# License: BSD (3-clause)
import os.path as op
import glob
import json
import re
from datetime import datetime, timezone
Expand All @@ -30,6 +29,9 @@
def _read_raw(raw_fpath, electrode=None, hsp=None, hpi=None,
allow_maxshield=False, config=None, verbose=None, **kwargs):
"""Read a raw file into MNE, making inferences based on extension."""
if not raw_fpath.exists():
raise FileNotFoundError(f'File or folder not found: {raw_fpath}')

_, ext = _parse_ext(raw_fpath)

# KIT systems
Expand Down Expand Up @@ -485,11 +487,11 @@ def read_raw_bids(bids_path, extra_params=None, verbose=True):
bids_fname = bids_path.fpath.name

if op.splitext(bids_fname)[1] == '.pdf':
bids_raw_folder = op.join(data_dir, f'{bids_path.basename}')
bids_fpath = glob.glob(op.join(bids_raw_folder, 'c,rf*'))[0]
config = op.join(bids_raw_folder, 'config')
bids_raw_folder = data_dir / f'{bids_path.basename}'
bids_fpath = list(bids_raw_folder.glob('c,rf*'))[0]
config = bids_raw_folder / 'config'
else:
bids_fpath = op.join(data_dir, bids_fname)
bids_fpath = data_dir / bids_fname
config = None

if extra_params is None:
Expand Down Expand Up @@ -560,9 +562,9 @@ def read_raw_bids(bids_path, extra_params=None, verbose=True):
verbose=verbose)

# read in associated subject info from participants.tsv
participants_tsv_fpath = op.join(bids_root, 'participants.tsv')
participants_tsv_fpath = bids_root / 'participants.tsv'
subject = f"sub-{bids_path.subject}"
if op.exists(participants_tsv_fpath):
if participants_tsv_fpath.exists():
raw = _handle_participants_reading(participants_tsv_fpath, raw,
subject, verbose=verbose)
else:
Expand Down
73 changes: 27 additions & 46 deletions mne_bids/tests/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,59 +354,30 @@ def test_find_matching_sidecar(return_bids_test_dir):


def test_bids_path_inference(return_bids_test_dir):
"""Test usage of BIDSPath object and fpath."""
"""Test usage of BIDSPath object and fpath.

BIDSPath.fpath will not infer the file path in v0.7+
"""
bids_root = return_bids_test_dir

# without providing all the entities, ambiguous when trying
# to use fpath
bids_path = BIDSPath(
subject=subject_id, session=session_id, acquisition=acq,
subject=subject_id, session=session_id,
task=task, root=bids_root)
with pytest.raises(RuntimeError, match='Found more than one'):
bids_path.fpath

# can't locate a file, but the basename should work
bids_path = BIDSPath(
subject=subject_id, session=session_id, acquisition=acq,
task=task, run='10', root=bids_root)
with pytest.warns(RuntimeWarning, match='Could not locate'):
fpath = bids_path.fpath
assert str(fpath) == op.join(bids_root, f'sub-{subject_id}',
f'ses-{session_id}',
bids_path.basename)

# shouldn't error out when there is no uncertainty
channels_fname = BIDSPath(subject=subject_id, session=session_id,
run=run, acquisition=acq, task=task,
root=bids_root, suffix='channels')
channels_fname.fpath

# create an extra file under 'eeg'
extra_file = op.join(bids_root, f'sub-{subject_id}',
f'ses-{session_id}', 'eeg',
channels_fname.basename + '.tsv')
Path(extra_file).parent.mkdir(exist_ok=True, parents=True)
# Creates a new file and because of this new file, there is now
# ambiguity
with open(extra_file, 'w', encoding='utf-8'):
pass
with pytest.raises(RuntimeError, match='Found data of more than one'):
channels_fname.fpath

# if you set datatype, now there is no ambiguity
channels_fname.update(datatype='eeg')
assert str(channels_fname.fpath) == extra_file
# set state back to original
shutil.rmtree(Path(extra_file).parent)
expected_fpath = f'{bids_root}/sub-{subject_id}/ses-{session_id}/sub-{subject_id}_ses-{session_id}_task-{task}' # noqa
assert bids_path.fpath.as_posix() == expected_fpath


def test_bids_path(return_bids_test_dir):
"""Test usage of BIDSPath object."""
bids_root = return_bids_test_dir

bids_path = BIDSPath(
bids_path_kwargs = dict(
subject=subject_id, session=session_id, run=run, acquisition=acq,
task=task, root=bids_root, suffix='meg')
task=task, suffix='meg', extension='.fif'
)
bids_path = BIDSPath(root=bids_root, **bids_path_kwargs)

expected_parent_dir = op.join(bids_root, f'sub-{subject_id}',
f'ses-{session_id}', 'meg')
Expand All @@ -420,10 +391,10 @@ def test_bids_path(return_bids_test_dir):
assert op.dirname(bids_path.fpath).startswith(bids_root)

# when bids root is not passed in, passes relative path
bids_path2 = bids_path.copy().update(datatype='meg', root=None)
bids_path2 = BIDSPath(datatype='meg', **bids_path_kwargs)
expected_relpath = op.join(
f'sub-{subject_id}', f'ses-{session_id}', 'meg',
expected_basename + '_meg')
expected_basename + '_meg.fif')
assert str(bids_path2.fpath) == expected_relpath

# without bids_root and with suffix/extension
Expand Down Expand Up @@ -551,7 +522,7 @@ def test_bids_path(return_bids_test_dir):
task='03', suffix='ieeg',
extension='.edf')
assert repr(bids_path) == ('BIDSPath(\n'
'root: None\n'
'root: .\n'
'datatype: ieeg\n'
'basename: sub-01_ses-02_task-03_ieeg.edf)')

Expand Down Expand Up @@ -593,7 +564,7 @@ def test_make_filenames():
BIDSPath(subject='one-two', suffix='ieeg', extension='.edf')

with pytest.raises(ValueError, match='At least one'):
BIDSPath()
BIDSPath(root=None)

# emptyroom check: invalid task
with pytest.raises(ValueError, match='task must be'):
Expand Down Expand Up @@ -751,7 +722,7 @@ def test_find_empty_room(return_bids_test_dir, tmpdir):
bids_path = BIDSPath(subject='01', session='01',
task='audiovisual', run='01',
root=bids_root, suffix='meg')
write_raw_bids(raw, bids_path, overwrite=True)
bids_path = write_raw_bids(raw, bids_path, overwrite=True)

# No empty-room data present.
er_basename = bids_path.find_empty_room()
Expand All @@ -775,7 +746,8 @@ def test_find_empty_room(return_bids_test_dir, tmpdir):
er_bids_path = BIDSPath(subject='emptyroom', task='noise',
session=er_date, suffix='meg',
root=bids_root)
write_raw_bids(er_raw, er_bids_path, overwrite=True)
er_bids_path = write_raw_bids(er_raw, er_bids_path,
overwrite=True)

recovered_er_bids_path = bids_path.find_empty_room()
assert er_bids_path == recovered_er_bids_path
Expand Down Expand Up @@ -841,6 +813,13 @@ def test_find_emptyroom_ties(tmpdir):
er_raw.save(op.join(er_dir, f'{er_basename_1}_meg.fif'))
er_raw.save(op.join(er_dir, f'{er_basename_2}_meg.fif'))

with pytest.raises(RuntimeError, match='BIDS path .* does not '
'exist'):
bids_path.find_empty_room()

# now add fully specified BIDS entities to find matching empty
# room recordings, but show a warning if there are multiple matches
bids_path.update(suffix='meg', extension='.fif')
with pytest.warns(RuntimeWarning, match='Found more than one'):
bids_path.find_empty_room()

Expand Down Expand Up @@ -879,6 +858,8 @@ def test_find_emptyroom_no_meas_date(tmpdir):
write_raw_bids(raw, bids_path, overwrite=True)
os.remove(op.join(bids_root, 'participants.tsv'))

# update extension to the actual dataset
bids_path.update(extension='.fif')
with pytest.warns(RuntimeWarning, match='Could not retrieve .* date'):
bids_path.find_empty_room()

Expand Down