Skip to content

Commit

Permalink
[WIP] Parse ADNI XML metadata (#587)
Browse files Browse the repository at this point in the history
* Write metadata to tsv/json files
* Add unit tests
* Doc
* non regression (needs xml data folder)

Co-authored-by: emaheux <[email protected]>
  • Loading branch information
NicolasGensollen and emaheux authored Mar 30, 2022
1 parent 0353cac commit 7d9232d
Show file tree
Hide file tree
Showing 11 changed files with 1,290 additions and 1 deletion.
542 changes: 542 additions & 0 deletions clinica/iotools/converters/adni_to_bids/adni_json.py

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions clinica/iotools/converters/adni_to_bids/adni_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ def convert_clinical_data(
out_path: str,
clinical_data_only: bool = False,
subjects_list_path: Optional[str] = None,
xml_path: Optional[str] = None,
):
"""Convert the clinical data of ADNI specified into the file clinical_specifications_adni.xlsx.
Args:
clinical_data_dir: path to the clinical data directory
out_path: path to the BIDS directory
xml_path: path to the XML metadata files
"""
import os
from os import path
Expand All @@ -72,6 +74,8 @@ def convert_clinical_data(
import clinica.iotools.converters.adni_to_bids.adni_utils as adni_utils
from clinica.utils.stream import cprint

from .adni_json import create_json_metadata

clinic_specs_path = path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
"data",
Expand Down Expand Up @@ -132,6 +136,18 @@ def convert_clinical_data(
cprint("Creating scans files...")
adni_utils.create_adni_scans_files(conversion_path, bids_subjs_paths)

if xml_path is not None:
if os.path.exists(xml_path):
create_json_metadata(bids_subjs_paths, bids_ids, xml_path)
else:
cprint(
msg=(
f"Clinica was unable to find {xml_path}, "
"skipping xml metadata extraction."
),
lvl="warning",
)

def convert_images(
self,
source_dir,
Expand Down
5 changes: 5 additions & 0 deletions clinica/iotools/converters/adni_to_bids/adni_to_bids_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@
default=ALL_MODALITIES,
help="Convert only the selected modality. By default, all available modalities are converted.",
)
@click.option(
"-xml", "--xml_path", help="Path to the root directory containing the xml metadata."
)
def cli(
dataset_directory: str,
clinical_data_directory: str,
bids_directory: str,
xml_path: Optional[str] = None,
subjects_list: Optional[str] = None,
clinical_data_only: bool = False,
force_new_extraction: bool = False,
Expand Down Expand Up @@ -72,6 +76,7 @@ def cli(
out_path=bids_directory,
clinical_data_only=clinical_data_only,
subjects_list_path=subjects_list,
xml_path=xml_path,
)


Expand Down
15 changes: 15 additions & 0 deletions clinica/iotools/converters/adni_to_bids/adni_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import Union


def visits_to_timepoints(
subject,
mri_list_subj,
Expand Down Expand Up @@ -565,6 +568,18 @@ def remove_fields_duplicated(bids_fields):
return [x for x in bids_fields if not (x in seen or seen_add(x))]


def bids_id_to_loni(bids_id: str) -> Union[str, None]:
"""Convert a subject id of the form sub-ADNI000S0000
back to original format 000_S_0000
"""
import re

ids = re.findall("\d+", bids_id) # noqa
if len(ids) == 2:
return ids[0] + "_S_" + ids[1]
return None


def filter_subj_bids(df_files, location, bids_ids):
import clinica.iotools.bids_utils as bids

Expand Down
2 changes: 2 additions & 0 deletions docs/Converters/ADNI2BIDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ For example, we can provide a `subjects.txt` file with the following content:
006_S_4485
```

If the (Optional) parameter `--xml_path` is given, `adni-to-bids` will try to extract meta data information from the xml files found in the `xml_path` provided. These meta data will be properly combined with other sources of data, and written to their respective files following the BIDS specifications (usually tsv scan files and json sidecar files).

For more information about the optional parameters, you can type:

```Text
Expand Down
7 changes: 6 additions & 1 deletion test/nonregression/iotools/test_run_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def run_adni2bids(input_dir: PathLike, output_dir: PathLike, ref_dir: PathLike)

# Arrange
clinical_data_directory = input_dir / "clinical_data"
xml_directory = input_dir / "xml_metadata"
dataset_directory = input_dir / "unorganized_data"
subjects_list = input_dir / "subjects.txt"
modalities = ["T1", "PET_FDG", "PET_AMYLOID", "PET_TAU", "DWI", "FLAIR", "fMRI"]
Expand All @@ -110,7 +111,11 @@ def run_adni2bids(input_dir: PathLike, output_dir: PathLike, ref_dir: PathLike)
subjects_list,
modalities,
)
adni_to_bids.convert_clinical_data(clinical_data_directory, output_dir / "bids")
adni_to_bids.convert_clinical_data(
clinical_data_directory,
output_dir / "bids",
xml_path=xml_directory,
)
# Assert
compare_folders(output_dir / "bids", ref_dir / "bids", output_dir)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<idaxs xmlns="http://ida.loni.usc.edu"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="/xsds/idaxs_2_0.xsd">
<project xmlns="">
<projectIdentifier>$project</projectIdentifier>
<projectDescription>Alzheimer&#039;s Disease Neuroimaging Initiative</projectDescription>
<siteKey>site_key</siteKey>
<subject>
<subjectIdentifier>123_S_4567</subjectIdentifier>
<researchGroup>research_group</researchGroup>
<subjectSex>F</subjectSex>
<visit>
<visitIdentifier>ADNI Screening</visitIdentifier>
</visit>
<study>
<studyIdentifier>$study_id</studyIdentifier>
<subjectAge>75.05</subjectAge>
<ageQualifier>Y</ageQualifier>
<weightKg>63.5029</weightKg>
<postMortem>F</postMortem>
<series>
<seriesIdentifier>$series_id</seriesIdentifier>
<modality>$modality</modality>
<dateAcquired>$acq_time</dateAcquired>
</series>
<imagingProtocol>
<imageUID>$image_id</imageUID>
<description>Accelerated Sagittal MPRAGE</description>
<protocolTerm>
<protocol term="Acquisition Type">3D</protocol>
<protocol term="Weighting">T1</protocol>
<protocol term="Pulse Sequence">GR/IR</protocol>
<protocol term="Slice Thickness">1.0</protocol>
<protocol term="TE">2.98</protocol>
<protocol term="TR">2300.0</protocol>
<protocol term="TI">900.0</protocol>
<protocol term="Coil">PA</protocol>
<protocol term="Flip Angle">9.0</protocol>
<protocol term="Acquisition Plane">SAGITTAL</protocol>
<protocol term="Matrix X">240.0</protocol>
<protocol term="Matrix Y">256.0</protocol>
<protocol term="Matrix Z">208.0</protocol>
<protocol term="Pixel Spacing X">1.0</protocol>
<protocol term="Pixel Spacing Y">1.0</protocol>
<protocol term="Manufacturer">SIEMENS</protocol>
<protocol term="Mfg Model">Prisma_fit</protocol>
<protocol term="Field Strength">3.0</protocol>
</protocolTerm>
</imagingProtocol>
</study>
</subject>
</project>
</idaxs>
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?xml version="1.0" encoding="UTF-8"?>
<idaxs xmlns="http://ida.loni.usc.edu"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="/xsds/idaxs_2_0.xsd">
<project xmlns="">
<projectIdentifier>$project</projectIdentifier>
<projectDescription>Alzheimer&#039;s Disease Neuroimaging Initiative</projectDescription>
<siteKey>site_key</siteKey>
<subject>
<subjectIdentifier>234_S_5678</subjectIdentifier>
<researchGroup>research_group</researchGroup>
<subjectSex>F</subjectSex>
<subjectInfo item="APOE A1">3</subjectInfo>
<subjectInfo item="APOE A2">3</subjectInfo>
<visit>
<visitIdentifier>ADNI2 Year 2 Visit</visitIdentifier>
<assessment name="MMSE">
<component name="MMSE Total Score">
<assessmentScore attribute="MMSCORE">mmse_score</assessmentScore>
</component>
</assessment>
<assessment name="GDSCALE">
<component name="GDS Total Score">
<assessmentScore attribute="GDTOTAL">gdtotal</assessmentScore>
</component>
</assessment>
<assessment name="CDR">
<component name="CDR Total Score">
<assessmentScore attribute="CDGLOBAL">cd_global</assessmentScore>
</component>
</assessment>
<assessment name="Functional Assessment Questionnaire">
<component name="FAQ Total score">
<assessmentScore attribute="FAQTOTAL">faq_total</assessmentScore>
</component>
</assessment>
</visit>
<study>
<studyIdentifier>$study_id</studyIdentifier>
<subjectAge>74.4274</subjectAge>
<ageQualifier>Y</ageQualifier>
<weightKg>92.0793</weightKg>
<postMortem>F</postMortem>
<series>
<seriesIdentifier>$series_id</seriesIdentifier>
<modality>$modality</modality>
<dateAcquired>$acq_time</dateAcquired>
<seriesLevelMeta>
<annotation>
<text>Grinder Pipeline</text>
</annotation>
<derivedProduct>
<imageUID>$proc_id</imageUID>
<processedDataLabel>MT1; GradWarp; N3m</processedDataLabel>
<imageType>image volume</imageType>
<tissue>All</tissue>
<hemisphere>Both</hemisphere>
<anatomicStructure>Brain</anatomicStructure>
<registration>native</registration>
<relatedImage>
<imageUID>$image_id</imageUID>
<relation>derived from</relation>
</relatedImage>
<provenanceDetail>
<step>1</step>
<process>Gradient Unwarping</process>
<package>Grinder Pipeline</package>
<program>nifti_unwarp_resample</program>
<programVersion>1.0</programVersion>
<timestamp>2016-04-07T18:27:50</timestamp>
<platform>Linux</platform>
<operatingSystem>2.6.32-573.18.1.</operatingSystem>
</provenanceDetail>
<provenanceDetail>
<step>2</step>
<process>N3m</process>
<package>Grinder Pipeline</package>
<program>brain_extraction2</program>
<programVersion>2.04</programVersion>
<timestamp>2016-04-07T18:50:36</timestamp>
<platform>Linux</platform>
<operatingSystem>2.6.32-573.18.1.</operatingSystem>
</provenanceDetail>
<creationDate>0000-00-00</creationDate>
</derivedProduct>
<relatedImageDetail>
<originalRelatedImage>
<imageUID>$image_id</imageUID>
<description>MPRAGE GRAPPA2</description>
<protocolTerm>
<protocol term="Acquisition Type">3D</protocol>
<protocol term="Weighting">T1</protocol>
<protocol term="Pulse Sequence">GR/IR</protocol>
<protocol term="Slice Thickness">1.2</protocol>
<protocol term="TE">2.95</protocol>
<protocol term="TR">2300.0</protocol>
<protocol term="TI">900.0</protocol>
<protocol term="Coil">PA</protocol>
<protocol term="Flip Angle">9.0</protocol>
<protocol term="Acquisition Plane">SAGITTAL</protocol>
<protocol term="Matrix X">240.0</protocol>
<protocol term="Matrix Y">256.0</protocol>
<protocol term="Matrix Z">176.0</protocol>
<protocol term="Pixel Spacing X">1.05469</protocol>
<protocol term="Pixel Spacing Y">1.05469</protocol>
<protocol term="Manufacturer">SIEMENS</protocol>
<protocol term="Mfg Model">TrioTim</protocol>
<protocol term="Field Strength">3.0</protocol>
</protocolTerm>
</originalRelatedImage>
</relatedImageDetail>
</seriesLevelMeta>
</series>
</study>
</subject>
</project>
</idaxs>
Loading

0 comments on commit 7d9232d

Please sign in to comment.