From f09b68bf4130a5e8234fde79fa7b74957f9c110b Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Fri, 23 Feb 2024 14:49:16 -0500 Subject: [PATCH] DOC: Automatic flowchart --- docs/source/.gitignore | 1 + docs/source/features/gen_steps.py | 145 +++++++++++++++++- docs/source/features/overview.md | 53 ------- docs/source/v1.6.md.inc | 1 + .../preprocessing/_05_regress_artifact.py | 5 +- .../steps/preprocessing/_06a_run_ica.py | 9 +- .../steps/preprocessing/_06b_run_ssp.py | 3 +- .../steps/preprocessing/_08a_apply_ica.py | 3 +- .../steps/preprocessing/_08b_apply_ssp.py | 3 +- .../steps/preprocessing/_09_ptp_reject.py | 4 +- 10 files changed, 152 insertions(+), 75 deletions(-) delete mode 100644 docs/source/features/overview.md diff --git a/docs/source/.gitignore b/docs/source/.gitignore index 77afb012b..ce1332a62 100644 --- a/docs/source/.gitignore +++ b/docs/source/.gitignore @@ -1 +1,2 @@ features/steps.md +features/overview.md diff --git a/docs/source/features/gen_steps.py b/docs/source/features/gen_steps.py index 86ea6283f..2b3cc3bd7 100755 --- a/docs/source/features/gen_steps.py +++ b/docs/source/features/gen_steps.py @@ -6,20 +6,92 @@ from mne_bids_pipeline._config_utils import _get_step_modules -pre = """\ -# Detailed lis of processing steps +autogen_header = f"""\ +[//]: # (AUTO-GENERATED, TO CHANGE EDIT {'/'.join(Path(__file__).parts[-4:])}) +""" + +steps_pre = f"""\ +{autogen_header} + +# Detailed list of processing steps The following table provides a concise summary of each processing step. The step names can be used to run individual steps or entire groups of steps by passing their name(s) to `mne_bids_pipeline` via the `steps=...` argument. +""" # noqa: E501 + +overview_pre = f"""\ +{autogen_header} + +MNE-BIDS-Pipeline processes your data in a sequential manner, i.e., one step +at a time. The next step is only run after the previous steps have been +successfully completed. There are, of course, exceptions; for example, if you +chose not to apply ICA, the respective steps will simply be omitted and we'll +directly move to the subsequent steps. The following flow chart aims to give +you a brief overview of which steps are included in the pipeline, in which +order they are run, and how we group them together. + +!!! info + All intermediate results are saved to disk for later + inspection, and an **extensive report** is generated. + +!!! info + Analyses are conducted on individual (per-subject) as well as group level. """ +icon_map = { + "Filesystem initialization and dataset inspection": ":open_file_folder:", + "Preprocessing": ":broom:", + "Sensor-space analysis": ":satellite:", + "Source-space analysis": ":brain:", + "FreeSurfer-related processing": ":person_surfing:", +} +out_dir = Path(__file__).parent + print("Generating steps …") step_modules = _get_step_modules() +char_start = ord("A") + +# In principle we could try to sort this out based on naming, but for now let's just +# set our hierarchy manually and update it when we move files around since that's easy +# (and rare) enough to do. +manual_order = { + "Preprocessing": ( + ("01", "02"), + ("02", "03"), + ("03", "04"), + ("04", "05"), + ("05", "06a"), + ("05", "06b"), + ("05", "07"), + # technically we could have the raw data flow here, but it doesn't really help + # ("05", "08a"), + # ("05", "08b"), + ("06a", "08a"), + ("07", "08a"), + # Force the artifact-fitting and epoching steps on the same level, in this order + """\ + subgraph Z[" "] + direction LR + B06a + B07 + B06b + end + style Z fill:#0000,stroke-width:0px +""", + ("06b", "08b"), + ("07", "08b"), + ("08a", "09"), + ("08b", "09"), + ), +} # Construct the lines of steps.md -lines = [pre] +lines = [steps_pre] +overview_lines = [overview_pre] +used_titles = set() for di, (dir_, modules) in enumerate(step_modules.items(), 1): + # Steps if dir_ == "all": continue # this is an alias dir_module = importlib.import_module(f"mne_bids_pipeline.steps.{dir_}") @@ -29,7 +101,9 @@ dir_body = dir_body[1].strip() else: dir_body = "" - lines.append(f"## {di}. {dir_header}\n") + icon = icon_map[dir_header] + module_header = f"{di}. {icon} {dir_header}" + lines.append(f"## {module_header}\n") if dir_body: lines.append(f"{dir_body}\n") lines.append("| Step name | Description |") @@ -42,5 +116,64 @@ step_title = module.__doc__.split("\n")[0] lines.append(f"`{step_name}` | {step_title} |") lines.append("") -with open(Path(__file__).parent / "steps.md", "w") as fid: - fid.write("\n".join(lines)) + + # Overview + overview_lines.append( + f"""\ +## {module_header} + +```mermaid +flowchart TD""" + ) + chr_pre = chr(char_start + di - 1) # A, B, C, ... + start = None + prev_idx = None + title_map = {} + for mi, module in enumerate(modules, 1): + step_title = module.__doc__.split("\n")[0].rstrip(".") + idx = module.__name__.split(".")[-1].split("_")[1] # 01, 05a, etc. + # Need to quote the title to deal with parens, and sanitize quotes + step_title = step_title.replace('"', "'") + assert step_title not in used_titles, f"Redundant title: {step_title}" + used_titles.add(step_title) + this_block = f'{chr_pre}{idx}["{step_title}"]' + # special case: manual order + title_map[idx] = step_title + if dir_header in manual_order: + continue + if mi == 1: + start = this_block + assert prev_idx is None + continue + if start is not None: + assert mi == 2, mi + overview_lines.append(f" {start} --> {this_block}") + start = None + else: + overview_lines.append(f" {chr_pre}{prev_idx} --> {this_block}") + prev_idx = idx + if dir_header in manual_order: + mapped = set() + for a_b in manual_order[dir_header]: + if isinstance(a_b, str): # insert directly + overview_lines.append(a_b) + continue + assert isinstance(a_b, tuple), type(a_b) + a_b = list(a_b) # allow modification + for ii, idx in enumerate(a_b): + assert idx in title_map, (dir_header, sorted(title_map)) + if idx not in mapped: + mapped.add(idx) + a_b[ii] = f'{idx}["{title_map[idx]}"]' + overview_lines.append(f" {chr_pre}{a_b[0]} --> {chr_pre}{a_b[1]}") + all_steps = set( + sum( + [a_b for a_b in manual_order[dir_header] if not isinstance(a_b, str)], + (), + ) + ) + assert mapped == all_steps, all_steps.symmetric_difference(mapped) + overview_lines.append("```\n") + +(out_dir / "steps.md").write_text("\n".join(lines), encoding="utf8") +(out_dir / "overview.md").write_text("\n".join(overview_lines), encoding="utf8") diff --git a/docs/source/features/overview.md b/docs/source/features/overview.md deleted file mode 100644 index 9fe044038..000000000 --- a/docs/source/features/overview.md +++ /dev/null @@ -1,53 +0,0 @@ -MNE-BIDS-Pipeline processes your data in a sequential manner, i.e., one step -at a time. The next step is only run after the previous steps have been -successfully completed. There are, of course, exceptions; for example, if you -chose not to apply ICA, the respective steps will simply be omitted and we'll -directly move to the subsequent steps. The following flow chart aims to give -you a brief overview of which steps are included in the pipeline, in which -order they are run, and how we group them together. - -!!! info - All intermediate results are saved to disk for later - inspection, and an **extensive report** is generated. - -!!! info - Analyses are conducted on individual (per-subject) as well as group level. - - -## :open_file_folder: Filesystem initialization and dataset inspection -```mermaid -flowchart TD - A1[initialize the target directories] --> A2[locate empty-room recordings] -``` - -## :broom: Preprocessing -```mermaid - flowchart TD - B1[Noisy & flat channel detection] --> B2[Maxwell filter] - B2 --> B3[Frequency filter] - B3 --> B4[Epoch creation] - B4 --> B5[SSP or ICA fitting] - B5 --> B6[Artifact removal via SSP or ICA] - B6 --> B7[Amplitude-based epoch rejection] -``` - -## :satellite: Sensor-space processing -```mermaid - flowchart TD - C1[ERP / ERF calculation] --> C2[MVPA: full epochs] - C2 --> C3[MVPA: time-by-time decoding] - C3 --> C4[Time-frequency decomposition] - C4 --> C5[MVPA: CSP] - C5 --> C6[Noise covariance estimation] - C6 --> C7[Grand average] -``` - -## :brain: Source-space processing -```mermaid - flowchart TD - D1[BEM surface creation] --> D2[BEM solution] - D2 --> D3[Source space creation] - D3 --> D4[Forward model creation] - D4 --> D5[Inverse solution] - D5 --> D6[Grand average] -``` diff --git a/docs/source/v1.6.md.inc b/docs/source/v1.6.md.inc index 3abc9a081..5ef48dcb8 100644 --- a/docs/source/v1.6.md.inc +++ b/docs/source/v1.6.md.inc @@ -36,3 +36,4 @@ - The package build backend has been switched from `setuptools` to `hatchling`. (#825 by @hoechenberger) - Code formatting now uses `ruff format` instead of `black` (#834, #838 by @larsoner) - Code caching is now tested using GitHub Actions (#836 by @larsoner) +- Steps in the documentatino are now automatically parsed into flowcharts (#859 by @larsoner) diff --git a/mne_bids_pipeline/steps/preprocessing/_05_regress_artifact.py b/mne_bids_pipeline/steps/preprocessing/_05_regress_artifact.py index cb31df04d..9fce737cc 100644 --- a/mne_bids_pipeline/steps/preprocessing/_05_regress_artifact.py +++ b/mne_bids_pipeline/steps/preprocessing/_05_regress_artifact.py @@ -1,7 +1,4 @@ -"""Run Signal Subspace Projections (SSP) for artifact correction. - -These are often also referred to as PCA vectors. -""" +"""Temporal regression for artifact removal.""" from types import SimpleNamespace from typing import Optional diff --git a/mne_bids_pipeline/steps/preprocessing/_06a_run_ica.py b/mne_bids_pipeline/steps/preprocessing/_06a_run_ica.py index 7bfef3c56..fb6f1b089 100644 --- a/mne_bids_pipeline/steps/preprocessing/_06a_run_ica.py +++ b/mne_bids_pipeline/steps/preprocessing/_06a_run_ica.py @@ -1,14 +1,13 @@ -"""Run Independent Component Analysis (ICA) for artifact correction. +"""Fit ICA. -This fits ICA on epoched data filtered with 1 Hz highpass, -for this purpose only using fastICA. Separate ICAs are fitted and stored for -MEG and EEG data. +This fits Independent Component Analysis (ICA) on raw data filtered with 1 Hz highpass, +temporarily creating task-related epochs. Before performing ICA, we reject epochs based on peak-to-peak amplitude above the 'ica_reject' to filter massive non-biological artifacts. To actually remove designated ICA components from your data, you will have to -run 05a-apply_ica.py. +run the apply_ica step. """ from collections.abc import Iterable diff --git a/mne_bids_pipeline/steps/preprocessing/_06b_run_ssp.py b/mne_bids_pipeline/steps/preprocessing/_06b_run_ssp.py index 7ec75ef91..1580836ca 100644 --- a/mne_bids_pipeline/steps/preprocessing/_06b_run_ssp.py +++ b/mne_bids_pipeline/steps/preprocessing/_06b_run_ssp.py @@ -1,5 +1,6 @@ -"""Run Signal Subspace Projections (SSP) for artifact correction. +"""Compute SSP. +Signal subspace projections (SSP) vectors are computed from EOG and ECG signals. These are often also referred to as PCA vectors. """ diff --git a/mne_bids_pipeline/steps/preprocessing/_08a_apply_ica.py b/mne_bids_pipeline/steps/preprocessing/_08a_apply_ica.py index 8fcc8141c..fba8099f2 100644 --- a/mne_bids_pipeline/steps/preprocessing/_08a_apply_ica.py +++ b/mne_bids_pipeline/steps/preprocessing/_08a_apply_ica.py @@ -1,4 +1,4 @@ -"""Apply ICA and obtain the cleaned epochs and raw data. +"""Apply ICA. Blinks and ECG artifacts are automatically detected and the corresponding ICA components are removed from the data. @@ -8,7 +8,6 @@ make sure you did not re-run the ICA in the meantime. Otherwise (especially if the random state was not set, or you used a different machine, the component order might differ). - """ from types import SimpleNamespace diff --git a/mne_bids_pipeline/steps/preprocessing/_08b_apply_ssp.py b/mne_bids_pipeline/steps/preprocessing/_08b_apply_ssp.py index e6fad4b8f..3f6d6576a 100644 --- a/mne_bids_pipeline/steps/preprocessing/_08b_apply_ssp.py +++ b/mne_bids_pipeline/steps/preprocessing/_08b_apply_ssp.py @@ -1,8 +1,7 @@ -"""Apply SSP projections and obtain the cleaned epochs and raw data. +"""Apply SSP. Blinks and ECG artifacts are automatically detected and the corresponding SSP projections components are removed from the data. - """ from types import SimpleNamespace diff --git a/mne_bids_pipeline/steps/preprocessing/_09_ptp_reject.py b/mne_bids_pipeline/steps/preprocessing/_09_ptp_reject.py index d08469b3c..434b235ec 100644 --- a/mne_bids_pipeline/steps/preprocessing/_09_ptp_reject.py +++ b/mne_bids_pipeline/steps/preprocessing/_09_ptp_reject.py @@ -1,6 +1,6 @@ -"""Remove epochs based on peak-to-peak (PTP) amplitudes. +"""Remove epochs based on PTP amplitudes. -Epochs containing peak-to-peak above the thresholds defined +Epochs containing peak-to-peak (PTP) above the thresholds defined in the 'reject' parameter are removed from the data. This step will drop epochs containing non-biological artifacts