Merge branch 'master' of https://github.com/nipy/heudiconv into multi…

…echo
nipy · Jan 7, 2019 · f47b637 · f47b637
2 parents 621a7c4 + b9b5e0d
commit f47b637
Show file tree

Hide file tree

Showing 30 changed files with 396 additions and 209 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,4 +1,3 @@
 [run]
-include = tests/*
-          heudiconv/*
+include = heudiconv/*
           setup.py
diff --git a/.gitignore b/.gitignore
diff --git a/.travis.yml b/.travis.yml
@@ -37,7 +37,7 @@ install:
   - git config --global user.name "Travis Almighty"
 
 script:
-  - coverage run `which py.test` -s -v tests heudiconv/heuristics/*.py
+  - coverage run `which py.test` -s -v heudiconv
 
 after_success:
   - codecov
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,22 +4,45 @@ All notable changes to this project will be documented (for humans) in this file
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## [0.5.2] - Date
+## [0.5.3] - Date
 
 TODO Summary
 
 ### Added
-### Changed
 
-- Reproin heuristic: `__dup` indices would now be assigned incrementally
-  individually per each sequence, so there is a chance to properly treat
-  associate for multi-file (e.g. `fmap`) sequences
+### Changed
 
 ### Deprecated
+
 ### Fixed
+
 ### Removed
+
 ### Security
 
+## [0.5.2] - 2019-01-04
+
+A variety of bugfixes
+
+### Changed
+- Reproin heuristic: `__dup` indices would now be assigned incrementally
+  individually per each sequence, so there is a chance to properly treat
+  associate for multi-file (e.g. `fmap`) sequences
+- Reproin heuristic: also split StudyDescription by space not only by ^
+- `tests/` moved under `heudiconv/tests` to ease maintenance and facilitate
+  testing of an installed heudiconv
+- Protocol name will also be accessed from private Siemens
+  csa.tProtocolName header field if not present in public one
+- nipype>=0.12.0 is required now
+
+### Fixed
+- Multiple files produced by dcm2niix are first sorted to guarantee
+  correct order e.g. of magnitude files in fieldmaps, which otherwise
+  resulted in incorrect according to BIDS ordering of them
+- Aggregated top level .json files now would contain only the fields
+  with the same values from all scanned files. In prior versions,
+  those files were not regenerated after an initial conversion
+- Unicode handling in anonimization scripts
 
 ## [0.5.1] - 2018-07-05
 Bugfix release

diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
-# Generated by Neurodocker version 0.4.1-28-g83dbc15
-# Timestamp: 2018-11-01 22:00:14 UTC
+# Generated by Neurodocker version 0.4.2-3-gf7055a1
+# Timestamp: 2018-11-13 22:04:04 UTC
 # 
 # Thank you for using Neurodocker. If you discover any issues
 # or ways to improve this software, please submit an issue or
@@ -70,6 +70,7 @@ RUN apt-get update -qq \
            liblzma-dev \
            libc-dev \
            git-annex-standalone \
+           netbase \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
@@ -89,15 +90,16 @@ RUN export PATH="/opt/miniconda-latest/bin:$PATH" \
     && conda config --system --set show_channel_urls true \
     && sync && conda clean -tipsy && sync \
     && conda install -y -q --name base \
-           python=3.6 \
-           traits>=4.6.0 \
-           scipy \
-           numpy \
-           nomkl \
+           'python=3.6' \
+           'traits>=4.6.0' \
+           'scipy' \
+           'numpy' \
+           'pandas' \
+           'nomkl' \
     && sync && conda clean -tipsy && sync \
     && bash -c "source activate base \
     &&   pip install --no-cache-dir --editable \
-             /src/heudiconv[all]" \
+             '/src/heudiconv[all]'" \
     && rm -rf ~/.cache/pip/* \
     && sync
 
@@ -125,7 +127,8 @@ RUN echo '{ \
     \n        "pigz", \
     \n        "liblzma-dev", \
     \n        "libc-dev", \
-    \n        "git-annex-standalone" \
+    \n        "git-annex-standalone", \
+    \n        "netbase" \
     \n      ] \
     \n    ], \
     \n    [ \
@@ -144,6 +147,7 @@ RUN echo '{ \
     \n          "traits>=4.6.0", \
     \n          "scipy", \
     \n          "numpy", \
+    \n          "pandas", \
     \n          "nomkl" \
     \n        ], \
     \n        "pip_install": [ \

diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,4 @@
-Copyright [2014-2018] [Heudiconv developers]
+Copyright [2014-2019] [Heudiconv developers]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/heudiconv/bids.py b/heudiconv/bids.py
@@ -74,13 +74,37 @@ def populate_bids_templates(path, defaults={}):
         "TODO: Provide description for the dataset -- basic details about the "
         "study, possibly pointing to pre-registration (if public or embargoed)")
 
+    populate_aggregated_jsons(path)
+
+
+def populate_aggregated_jsons(path):
+    """Aggregate across the entire BIDS dataset .json's into top level .json's
+
+    Top level .json files would contain only the fields which are
+    common to all subject[/session]/type/*_modality.json's.
+
+    ATM aggregating only for *_task*_bold.json files. Only the task- and
+    OPTIONAL _acq- field is retained within the aggregated filename.  The other
+    BIDS _key-value pairs are "aggregated over".
+
+    Parameters
+    ----------
+    path: str
+      Path to the top of the BIDS dataset
+    """
     # TODO: collect all task- .json files for func files to
     tasks = {}
     # way too many -- let's just collect all which are the same!
     # FIELDS_TO_TRACK = {'RepetitionTime', 'FlipAngle', 'EchoTime',
     #                    'Manufacturer', 'SliceTiming', ''}
     for fpath in find_files('.*_task-.*\_bold\.json', topdir=path,
-                        exclude_vcs=True, exclude="/\.(datalad|heudiconv)/"):
+                            exclude_vcs=True,
+                            exclude="/\.(datalad|heudiconv)/"):
+        #
+        # According to BIDS spec I think both _task AND _acq (may be more?
+        # _rec, _dir, ...?) should be retained?
+        # TODO: if we are to fix it, then old ones (without _acq) should be
+        # removed first
         task = re.sub('.*_(task-[^_\.]*(_acq-[^_\.]*)?)_.*', r'\1', fpath)
         json_ = load_json(fpath)
         if task not in tasks:
@@ -115,18 +139,36 @@ def populate_bids_templates(path, defaults={}):
         if not op.lexists(events_file):
             lgr.debug("Generating %s", events_file)
             with open(events_file, 'w') as f:
-                f.write("onset\tduration\ttrial_type\tresponse_time\tstim_file\tTODO -- fill in rows and add more tab-separated columns if desired")
+                f.write(
+                    "onset\tduration\ttrial_type\tresponse_time\tstim_file"
+                    "\tTODO -- fill in rows and add more tab-separated "
+                    "columns if desired")
     # extract tasks files stubs
     for task_acq, fields in tasks.items():
         task_file = op.join(path, task_acq + '_bold.json')
-        # do not touch any existing thing, it may be precious
-        if not op.lexists(task_file):
-            lgr.debug("Generating %s", task_file)
-            fields["TaskName"] = ("TODO: full task name for %s" %
-                                  task_acq.split('_')[0].split('-')[1])
-            fields["CogAtlasID"] = "TODO"
-            with open(task_file, 'w') as f:
-                f.write(json_dumps_pretty(fields, indent=2, sort_keys=True))
+        # Since we are pulling all unique fields we have to possibly
+        # rewrite this file to guarantee consistency.
+        # See https://github.com/nipy/heudiconv/issues/277 for a usecase/bug
+        # when we didn't touch existing one.
+        # But the fields we enter (TaskName and CogAtlasID) might need need
+        # to be populated from the file if it already exists
+        placeholders = {
+            "TaskName": ("TODO: full task name for %s" %
+                         task_acq.split('_')[0].split('-')[1]),
+            "CogAtlasID": "TODO",
+        }
+        if op.lexists(task_file):
+            j = load_json(task_file)
+            # Retain possibly modified placeholder fields
+            for f in placeholders:
+                if f in j:
+                    placeholders[f] = j[f]
+            act = "Regenerating"
+        else:
+            act = "Generating"
+        lgr.debug("%s %s", act, task_file)
+        fields.update(placeholders)
+        save_json(task_file, fields, indent=2, sort_keys=True, pretty=True)
 
 
 def tuneup_bids_json_files(json_files):

diff --git a/heudiconv/convert.py b/heudiconv/convert.py
@@ -101,7 +101,7 @@ def prep_conversion(sid, dicoms, outdir, heuristic, converter, anon_sid,
         anon_outdir = outdir
 
     # Generate heudiconv info folder
-    idir = op.join(outdir, '.heudiconv', sid)
+    idir = op.join(outdir, '.heudiconv', anon_sid)
     if bids and ses:
         idir = op.join(idir, 'ses-%s' % str(ses))
     if anon_outdir == outdir:
@@ -458,6 +458,7 @@ def save_converted_files(res, item_dicoms, bids, outtype, prefix, outname_bids,
         safe_copyfile(res.outputs.bvals, outname_bvals, overwrite)
 
     if isinstance(res_files, list):
+        res_files = sorted(res_files)
         # we should provide specific handling for fmap,
         # dwi etc which might spit out multiple files
 
@@ -473,7 +474,7 @@ def save_converted_files(res, item_dicoms, bids, outtype, prefix, outname_bids,
 
         # Also copy BIDS files although they might need to
         # be merged/postprocessed later
-        bids_files = (res.outputs.bids
+        bids_files = sorted(res.outputs.bids
                       if len(res.outputs.bids) == len(res_files)
                       else [None] * len(res_files))
 

diff --git a/heudiconv/dicoms.py b/heudiconv/dicoms.py
@@ -4,7 +4,7 @@
 import logging
 from collections import OrderedDict
 import tarfile
-
+from nibabel.nicom import csareader
 from heudiconv.external.pydicom import dcm
 
 from .utils import SeqInfo, load_json, set_readonly
@@ -73,6 +73,15 @@ def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
             lgr.info("File {} is missing any StudyInstanceUID".format(filename))
             file_studyUID = None
 
+        # Workaround for protocol name in private siemens csa header
+        try:
+            mw.dcm_data.ProtocolName
+        except AttributeError:
+            if not getattr(mw.dcm_data, 'ProtocolName', '').strip():
+                mw.dcm_data.ProtocolName = parse_private_csa_header(
+                    mw.dcm_data, 'ProtocolName', 'tProtocolName'
+                    ) if mw.is_csa else ''
+
         try:
             series_id = (int(mw.dcm_data.SeriesNumber),
                          mw.dcm_data.ProtocolName)
@@ -208,7 +217,7 @@ def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
             dcminfo.get('PatientID'),
             dcminfo.get('StudyDescription'),
             refphys,
-            dcminfo.get('SeriesDescription'),
+            series_desc,  # We try to set this further up.
             sequence_name,
             image_type,
             accession_number,
@@ -232,7 +241,7 @@ def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
         lgr.debug("%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" % (
             key,
             info.series_id,
-            dcminfo.SeriesDescription,
+            series_desc,
             dcminfo.ProtocolName,
             info.is_derived,
             len(dcminfo.get('ReferencedImageSequence', '')),
@@ -483,3 +492,37 @@ def embed_metadata_from_dicoms(bids, item_dicoms, outname, outname_bids,
     except Exception as exc:
         lgr.error("Embedding failed: %s", str(exc))
         os.chdir(cwd)
+
+def parse_private_csa_header(dcm_data, public_attr, private_attr, default=None):
+    """
+    Parses CSA header in cases where value is not defined publicly
+
+    Parameters
+    ----------
+    dcm_data : pydicom Dataset object
+        DICOM metadata
+    public_attr : string
+        non-private DICOM attribute
+    private_attr : string
+        private DICOM attribute
+    default (optional)
+        default value if private_attr not found
+
+    Returns
+    -------
+    val (default: empty string)
+        private attribute value or default
+    """
+    # TODO: provide mapping to private_attr from public_attr
+    from nibabel.nicom import csareader
+    import dcmstack.extract as dsextract
+    try:
+        # TODO: test with attr besides ProtocolName
+        csastr = csareader.get_csa_header(dcm_data, 'series')['tags']['MrPhoenixProtocol']['items'][0]
+        csastr = csastr.replace("### ASCCONV BEGIN", "### ASCCONV BEGIN ### ")
+        parsedhdr = dsextract.parse_phoenix_prot('MrPhoenixProtocol', csastr)
+        val = parsedhdr[private_attr].replace(' ', '')
+    except Exception as e:
+        lgr.debug("Failed to parse CSA header: %s", str(e))
+        val = default if default else ''
+    return val