bids-apps · htwangtw · Mar 25, 2024 · Dec 14, 2023 · Dec 14, 2023 · Dec 14, 2023
diff --git a/docs/source/changes.md b/docs/source/changes.md
@@ -12,6 +12,8 @@ Released MONTH YEAR
 
 ### Changes
 
+- [ENH] Make output more BIDS compliant. (@Remi-Gau)
+
 ## 0.4.0
 
 Released August 2023

diff --git a/docs/source/outputs.md b/docs/source/outputs.md
@@ -1,7 +1,52 @@
 # Outputs
 
+The output of this app aims to follow the guideline
+of the [BIDS extension proposal 17 - Generic BIDS connectivity data schema](https://bids.neuroimaging.io/bep017).
+
+Metadata files content is described in this BIDS extension proposal.
+
+## participant level
+
 When performing `participant` level analysis, the output is a HDF5 file per participant that was passed to `--participant_label` or all subjects under `bids_dir`.
-The output file name is: `sub-<participant_id>_atlas-<atlas_name>_desc-<denoising_strategy>.h5`
 
-When performing `group` level analysis, the file will contain time series and connectomes of each subject, as well as group average connectomes. The output is a HDF5 file per participant that was passed to `--participant_label` or all subjects under `bids_dir`.
-The output file name is: `atlas-<atlas_name>_desc-<denoising_strategy>.h5`
+The output file name is:
+
+```
+sub-<participant_id>/[ses-<ses_id>]/func/<source_filename>_atlas-<atlas_name>_meas-PearsonCorrelation_desc-<denoising_strategy>_relmat.h5
+```
+
+## group level
+
+When performing `group` level analysis,
+the file will contain time series and connectomes of each subject,
+as well as group average connectomes.
+
+The output is a HDF5 file per participant that was passed to `--participant_label` or all subjects under `bids_dir`.
+
+### Example
+
+```
+giga_connectome/data/test_data/output
+├── dataset_description.json
+├── meas-PearsonCorrelation_desc-simple_relmat.json
+├── group
+│   └── atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+├── sub-1
+│   ├── ses-timepoint1
+│   │   └── func
+│   │       ├── sub-1_ses-timepoint1_task-probabilisticclassification_run-01_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+│   │       └── sub-1_ses-timepoint1_task-probabilisticclassification_run-02_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+│   └── ses-timepoint2
+│       └── func
+│           ├── sub-1_ses-timepoint2_task-probabilisticclassification_run-01_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+│           └── sub-1_ses-timepoint2_task-probabilisticclassification_run-02_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+└── sub-2
+    ├── ses-timepoint1
+    │   └── func
+    │       ├── sub-2_ses-timepoint1_task-probabilisticclassification_run-01_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+    │       └── sub-2_ses-timepoint1_task-probabilisticclassification_run-02_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+    └── ses-timepoint2
+        └── func
+            ├── sub-2_ses-timepoint2_task-probabilisticclassification_run-01_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+            └── sub-2_ses-timepoint2_task-probabilisticclassification_run-02_space-MNI152NLin2009cAsym_res-2_atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5
+```
diff --git a/docs/source/usage.md b/docs/source/usage.md
@@ -67,3 +67,28 @@ In a `json` file, define the customised atlas. We will use the atlas above as an
 ```
 
 See examples in [`giga_connectome/data`](https://github.com/SIMEXP/giga_connectome/tree/main/giga_connectome/data).
+
+
+## Example
+
+You can run a demo of the bids app by downloading some test data.
+
+Run the following from the root of the repository.
+
+```bash
+pip install tox
+tox -e test_data
+```
+
+```bash
+giga_connectome \
+    --atlas Schaefer20187Networks \
+    --denoise-strategy simple \
+    --standardize zscore \
+    --bids-filter giga_connectome/data/test_data/bids_filter.json  \
+    --reindex-bids \
+    --calculate-intranetwork-average-correlation \
+    giga_connectome/data/test_data/ds000017-fmriprep22.0.1-downsampled-nosurface \
+    giga_connectome/data/test_data/output \
+    participant
+```
diff --git a/giga_connectome/atlas.py b/giga_connectome/atlas.py
@@ -17,7 +17,7 @@
 PRESET_ATLAS = ["DiFuMo", "MIST", "Schaefer20187Networks"]
 
 
-def load_atlas_setting(atlas: Union[str, Path, dict]):
+def load_atlas_setting(atlas: Union[str, Path, dict]) -> dict:
     """Load atlas details for templateflow api to fetch.
     The setting file can be configured for atlases not included in the
     templateflow collections, but user has to organise their files to

diff --git a/giga_connectome/connectome.py b/giga_connectome/connectome.py
@@ -141,10 +141,7 @@ def generate_timeseries_connectomes(
     masker_labels = masker.labels_
     # average correlation within each parcel
     if calculate_average_correlation:
-        (
-            correlation_matrix,
-            avg_intranetwork_correlation,
-        ) = calculate_intranetwork_correlation(
+        (correlation_matrix, _,) = calculate_intranetwork_correlation(
             correlation_matrix,
             masker_labels,
             time_series_atlas,

diff --git a/giga_connectome/postprocess.py b/giga_connectome/postprocess.py
@@ -1,12 +1,15 @@
 from typing import Union, List
 from pathlib import Path
 
+import json
+
 import h5py
 from tqdm import tqdm
 import numpy as np
 from nilearn.connectome import ConnectivityMeasure
 from nilearn.maskers import NiftiLabelsMasker, NiftiMapsMasker
 from bids.layout import BIDSImageFile
+import pandas as pd
 
 from giga_connectome import utils
 from giga_connectome.connectome import generate_timeseries_connectomes
@@ -18,6 +21,7 @@
 
 def run_postprocessing_dataset(
     strategy: dict,
+    atlas: str,
     resampled_atlases: List[Union[str, Path]],
     images: List[BIDSImageFile],
     group_mask: Union[str, Path],
@@ -61,6 +65,9 @@ def run_postprocessing_dataset(
     strategy : dict
         Parameters for `load_confounds_strategy` or `load_confounds`.
 
+    atlas : dict
+        Atlas settings.
+
     resampled_atlases : list of str or pathlib.Path
         Atlas niftis resampled to the common space of the dataset.
 
@@ -78,16 +85,14 @@ def run_postprocessing_dataset(
         Smoothing kernel size, passed to nilearn masker.
 
     output_path:
-        Full path to output file, named in the following format:
-        output_dir / atlas-<atlas>_desc-<strategy_name>.h5
+        Full path to output directory.
 
     analysis_level : str
         Level of analysis, either "participant" or "group".
 
     calculate_average_correlation : bool
         Whether to calculate average correlation within each parcel.
     """
-    atlas = output_path.name.split("atlas-")[-1].split("_")[0]
     atlas_maskers, connectomes = {}, {}
     for atlas_path in resampled_atlases:
         if isinstance(atlas_path, str):
@@ -104,7 +109,6 @@ def run_postprocessing_dataset(
     gc_log.info("Processing subject")
 
     for img in tqdm(images):
-
         print()
         gc_log.info(f"Processing image:\n{img.filename}")
 
@@ -114,8 +118,32 @@ def run_postprocessing_dataset(
         )
         # parse file name
         subject, session, specifier = utils.parse_bids_name(img.path)
+
+        # folder for this subject output
+        connectome_path = output_path / subject
+        if session:
+            connectome_path = connectome_path / session
+        connectome_path = connectome_path / "func"
+
+        # All timeseries derivatives have the same metadata
+        # so one json file for them all.
+        json_filename = connectome_path / utils.output_filename(
+            source_file=Path(img.filename).stem,
+            atlas=atlas["name"],
+            extension="json",
+        )
+        utils.check_path(json_filename)
+        with open(json_filename, "w") as f:
+            json.dump(
+                {"RepetitionTime": img.entities["RepetitionTime"]},
+                f,
+                indent=4,
+            )
+
         for desc, masker in atlas_maskers.items():
-            attribute_name = f"{subject}_{specifier}_atlas-{atlas}_desc-{desc}"
+            attribute_name = (
+                f"{subject}_{specifier}_atlas-{atlas['name']}_desc-{desc}"
+            )
             if not denoised_img:
                 time_series_atlas, correlation_matrix = None, None
 
@@ -136,21 +164,37 @@ def run_postprocessing_dataset(
             )
             connectomes[desc].append(correlation_matrix)
 
-            # dump to h5
-            flag = _set_file_flag(output_path)
-            with h5py.File(output_path, flag) as f:
+            # one output file per input file per atlas
+
+            # dump correlation_matrix to tsv
+            tsv_filename = connectome_path / utils.output_filename(
+                source_file=Path(img.filename).stem,
+                atlas=atlas["name"],
+                extension="tsv",
+                strategy=strategy["name"],
+                desc=desc,
+            )
+            utils.check_path(tsv_filename)
+            df = pd.DataFrame(correlation_matrix)
+            df.to_csv(tsv_filename, sep="\t", index=False)
+
+            # dump timeseries to h5
+            h5_filename = connectome_path / utils.output_filename(
+                source_file=Path(img.filename).stem,
+                atlas=atlas["name"],
+                extension="h5",
+                strategy=strategy["name"],
+                desc=desc,
+            )
+            utils.check_path(h5_filename)
+            flag = _set_file_flag(h5_filename)
+            with h5py.File(h5_filename, flag) as f:
                 group = _fetch_h5_group(f, subject, session)
-                timeseries_dset = group.create_dataset(
-                    f"{attribute_name}_timeseries", data=time_series_atlas
-                )
-                timeseries_dset.attrs["RepetitionTime"] = img.entities[
-                    "RepetitionTime"
-                ]
                 group.create_dataset(
-                    f"{attribute_name}_connectome", data=correlation_matrix
+                    f"{attribute_name}_timeseries", data=time_series_atlas
                 )
 
-        gc_log.info(f"Saved to:\n{output_path}")
+        gc_log.info(f"Saved to:\n{connectome_path}")
 
     if analysis_level == "group":
 

diff --git a/giga_connectome/tests/test_cli.py b/giga_connectome/tests/test_cli.py
@@ -31,7 +31,6 @@ def test_help(capsys):
 
 @pytest.mark.smoke
 def test_smoke(tmp_path, capsys):
-
     bids_dir = resource_filename(
         "giga_connectome",
         "data/test_data/ds000017-fmriprep22.0.1-downsampled-nosurface",
@@ -66,7 +65,16 @@ def test_smoke(tmp_path, capsys):
     )
     # check output
     output_group = (
-        output_dir / "sub-1_atlas-Schaefer20187Networks_desc-simple.h5"
+        output_dir
+        / "sub-1"
+        / "ses-timepoint1"
+        / "func"
+        / (
+            "sub-1_ses-timepoint1_task-probabilisticclassification_run-01_"
+            "space-MNI152NLin2009cAsym_res-2_"
+            "atlas-Schaefer20187Networks_meas-PearsonCorrelation_"
+            "desc-simple_relmat.h5"
+        )
     )
     basename = (
         "sub-1_ses-timepoint1_task-probabilisticclassification_run-01_"
@@ -95,7 +103,11 @@ def test_smoke(tmp_path, capsys):
     )
 
     # check output
-    output_group = output_dir / "atlas-Schaefer20187Networks_desc-simple.h5"
+    output_group = (
+        output_dir
+        / "group"
+        / "atlas-Schaefer20187Networks_meas-PearsonCorrelation_desc-simple_relmat.h5"
+    )
     basename = (
         "atlas-Schaefer20187Networks_desc-100Parcels7Networks_connectome"
     )