Document decision tree steps in report and remove log_extra_report (#…

…1043) * Add comments to the minimal decision tree. * Improve comments. * Move from comments to log_extra_report. * Add comments and fix one description. * Update minimal.json * Retain artifacts. * Store correct artifact path. * Test minimal decision tree. * Add newlines. * Update tedana.py * Update tedana.py * Keep documenting the trees. * Update minimal.json * Fix decision tree description. * Document the remaining nodes in the kundu tree. * Fix manrej typo. * Cite Olafsson paper for decision tree nodes. * updated minimal.json and started references.bib * Merge 'upstream/main' into doc-tree-dh * aligning to main again * test-component_selector * edited comments. added DOIs. Removed log_extra_reports * fixed references for decision trees * solved citep issue and edited decision tree doi * Fixed citep for olafsson * removed figshare url that excluded version number * Apply suggestions from code review Co-authored-by: Taylor Salo <[email protected]> * add warning for log_extra_report --------- Co-authored-by: Taylor Salo <[email protected]> Co-authored-by: Taylor Salo <[email protected]>
ME-ICA · Feb 23, 2024 · 1f1845b · 1f1845b
1 parent d0298f2
commit 1f1845b
Show file tree

Hide file tree

Showing 15 changed files with 214 additions and 238 deletions.
diff --git a/docs/building_decision_trees.rst b/docs/building_decision_trees.rst
@@ -288,9 +288,8 @@ tree function:
 - ``custom_node_label``: A brief label for what happens in this node that can be used in
   a decision tree summary table or flow chart. If custom_node_label is not not defined,
   then each function has default descriptive text.
-- ``log_extra_report``, ``log_extra_info``: Text for each function call is automatically placed
-  in the logger output. In addition to that text, the text in these these strings will
-  also be included in the logger with the report or info codes respectively. These
+- ``log_extra_info``: Text for each function call is automatically placed
+  in the logger output with the info label. These
   might be useful to give a narrative explanation of why a step was parameterized a
   certain way.
 - ``only_used_metrics``: If true, this function will only return the names of the component
@@ -351,8 +350,8 @@ that should be used instead.
 Calculation nodes should check if the value they are calculating was already calculated
 and output a warning if the function overwrites an existing value
 
-Code that adds the text ``log_extra_info`` and ``log_extra_report`` into the appropriate
-logs (if they are provided by the user)
+Code that adds the text ``log_extra_info`` into the output
+log (if they are provided by the user)
 
 After the above information is included,
 all functions will call :func:`~tedana.selection.selection_utils.selectcomps2use`,

diff --git a/tedana/docs.py b/tedana/docs.py
@@ -39,13 +39,6 @@
     components.
 """
 
-docdict[
-    "log_extra_report"
-] = """
-log_extra_report : :obj:`str`
-    Additional text to the report log. Default="".
-"""
-
 docdict[
     "log_extra_info"
 ] = """

diff --git a/tedana/resources/decision_trees/kundu.json b/tedana/resources/decision_trees/kundu.json
diff --git a/tedana/resources/decision_trees/minimal.json b/tedana/resources/decision_trees/minimal.json
@@ -1,7 +1,7 @@
 {
-    "tree_id": "minimal_decision_tree_test1",
-    "info": "Proposed minimal decision tree",
-    "report": "This is based on the minimal criteria of the original MEICA decision tree \\citep{kundu2013integrated} without the more aggressive noise removal steps \\citep{dupre2021te}.",
+    "tree_id": "minimal_decision_tree",
+    "info": "first version of minimal decision tree",
+    "report": "The minimal decision tree \\citep{tedana_decision_trees} is a simplified version of the MEICA decision tree \\citep{kundu2013integrated,dupre2021te} without many criteria that do not rely on kappa and rho thresholds. ",
     "necessary_metrics": [
         "kappa",
         "rho",
@@ -21,6 +21,7 @@
         "Unlikely BOLD",
         "Low variance"
     ],
+    "_comment": "More information on the minimial decision tree and how it differs from other options is at https://tedana.readthedocs.io/en/stable/included_decision_trees.html. Descriptions of the metrics used are in desc-tedana.metrics.json, which is ouputted when this tree is run",
     "nodes": [
         {
             "functionname": "manual_classify",
@@ -29,10 +30,11 @@
                 "decide_comps": "all"
             },
             "kwargs": {
-                "log_extra_report": "",
                 "clear_classification_tags": true,
-                "dont_warn_reclassify": true
-            }
+                "dont_warn_reclassify": true,
+                "log_extra_info": ""
+            },
+            "_comment": "All components are initially labeled as 'unclassified'."
         },
         {
             "functionname": "dec_left_op_right",
@@ -45,9 +47,10 @@
                 "right": "kappa"
             },
             "kwargs": {
-                "log_extra_report": "",
-                "tag_if_true": "Unlikely BOLD"
-            }
+                "tag_if_true": "Unlikely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "The first four steps are for rejecting components that very unlikely to have substantial T2* signal. Any components with rho greater than kappa are rejected. Higher rho than kappa means that the component better fits the TE-independence (S0) model than the TE-dependence (T2*) model."
         },
         {
             "functionname": "dec_left_op_right",
@@ -63,17 +66,22 @@
                 "left2": "countsigFT2",
                 "op2": ">",
                 "right2": 0,
-                "log_extra_report": "",
-                "tag_if_true": "Unlikely BOLD"
-            }
+                "tag_if_true": "Unlikely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "Any components with more voxels that are significant based on the S0 model's F-statistics than the T2* model's are rejected, as long as there is at least one significant voxel for the T2 model."
         },
         {
             "functionname": "calc_median",
             "parameters": {
                 "decide_comps": "all",
                 "metric_name": "variance explained",
                 "median_label": "varex"
-            }
+            },
+            "kwargs": {
+                "log_extra_info": ""
+            },
+            "_comment": "The median variance explained is calculated across all components, for use in later steps."
         },
         {
             "functionname": "dec_left_op_right",
@@ -89,9 +97,10 @@
                 "left2": "variance explained",
                 "op2": ">",
                 "right2": "median_varex",
-                "log_extra_report": "",
-                "tag_if_true": "Unlikely BOLD"
-            }
+                "tag_if_true": "Unlikely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "Any components with higher S0 model beta map-F-statistic map Dice similarity index than T2 model beta map-F-statistic map Dice similarity index and greater than median variance explained are rejected. In slightly plainer English, this step rejects any high-variance components where significant voxels in the F-stat map overlap more with highly S0-associated voxels than T2*-associated voxels."
         },
         {
             "functionname": "dec_left_op_right",
@@ -107,20 +116,20 @@
                 "left2": "variance explained",
                 "op2": ">",
                 "right2": "median_varex",
-                "log_extra_report": "",
-                "tag_if_true": "Unlikely BOLD"
-            }
+                "tag_if_true": "Unlikely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "Any components with a negative t-statistic comparing the distribution of T2* model F-statistics from voxels in clusters to those of voxels not in clusters and variance explained greater than median are rejected. That is reject any high-variance components exhibiting more 'speckled' T2*-associated voxels than 'clustered' ones."
         },
         {
             "functionname": "calc_kappa_elbow",
             "parameters": {
                 "decide_comps": "all"
             },
             "kwargs": {
-                "log_extra_info": "",
-                "log_extra_report": ""
+                "log_extra_info": ""
             },
-            "_comment": ""
+            "_comment": "The kappa elbow is calculated from all components, for use in later steps."
         },
         {
             "functionname": "calc_rho_elbow",
@@ -130,10 +139,9 @@
             "kwargs": {
                 "subset_decide_comps": "unclassified",
                 "rho_elbow_type": "liberal",
-                "log_extra_info": "",
-                "log_extra_report": ""
+                "log_extra_info": ""
             },
-            "_comment": ""
+            "_comment": "This step determines the 'rho elbow' based on the rho values for all of the components, as well as just the unclassified components. It calculates the elbow for each set of components and then takes the maximum of the two."
         },
         {
             "functionname": "dec_left_op_right",
@@ -146,8 +154,9 @@
                 "right": "kappa_elbow_kundu"
             },
             "kwargs": {
-                "log_extra_report": ""
-            }
+                "log_extra_info": ""
+            },
+            "_comment": "Any unclassified components with kappa greater than or equal to the kappa elbow are provisionally accepted. Any remaining unclassified components are provisionally rejected. Nothing is left 'unclassified'"
         },
         {
             "functionname": "dec_left_op_right",
@@ -160,11 +169,11 @@
                 "right": "rho"
             },
             "kwargs": {
-                "log_extra_info": "If kappa>elbow and kappa>2*rho accept even if rho>elbow",
-                "log_extra_report": "",
                 "right_scale": 2,
-                "tag_if_true": "Likely BOLD"
-            }
+                "tag_if_true": "Likely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "Any provisionally accepted components with kappa greater than two times rho are accepted. That is, even if a component has a high rho value, if kappa above threshold and substantially higher, assume it as something work keeping and accept it"
         },
         {
             "functionname": "dec_left_op_right",
@@ -180,8 +189,9 @@
                 "right": "rho_elbow_liberal"
             },
             "kwargs": {
-                "log_extra_report": ""
-            }
+                "log_extra_info": ""
+            },
+            "_comment": "Any provisionally accepted or provisionally rejected components with rho values greater than the liberal rho elbow are provisionally rejected."
         },
         {
             "functionname": "dec_variance_lessthan_thresholds",
@@ -192,12 +202,12 @@
             },
             "kwargs": {
                 "var_metric": "variance explained",
-                "log_extra_info": "",
-                "log_extra_report": "",
                 "single_comp_threshold": 0.1,
                 "all_comp_threshold": 1.0,
-                "tag_if_true": "Low variance"
-            }
+                "tag_if_true": "Low variance",
+                "log_extra_info": ""
+            },
+            "_comment": "This step flags remaining low-variance components (less than 0.1%) and accepts up to 1% cumulative variance across these components. This is done because these components don't explain enough variance to be worth further reducing the degrees of freedom of the denoised data."
         },
         {
             "functionname": "manual_classify",
@@ -206,10 +216,10 @@
                 "decide_comps": "provisionalaccept"
             },
             "kwargs": {
-                "log_extra_info": "",
-                "log_extra_report": "",
-                "tag": "Likely BOLD"
-            }
+                "tag": "Likely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "All remaining provisionally accepted components are accepted."
         },
         {
             "functionname": "manual_classify",
@@ -221,10 +231,10 @@
                 ]
             },
             "kwargs": {
-                "log_extra_info": "",
-                "log_extra_report": "",
-                "tag": "Unlikely BOLD"
-            }
+                "tag": "Unlikely BOLD",
+                "log_extra_info": ""
+            },
+            "_comment": "All remaining unclassified (nothing should be unclassified) or provisionally rejected components are rejected."
         }
     ]
 }
diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib
@@ -313,3 +313,23 @@ @misc{sochat2015ttoz
   url       = {https://doi.org/10.5281/zenodo.32508},
   year      = 2015
 }
+
+@article{olafsson2015enhanced,
+  title     = {Enhanced identification of BOLD-like components with multi-echo simultaneous multi-slice (MESMS) fMRI and multi-echo ICA},
+  author    = {Olafsson, Valur and Kundu, Prantik and Wong, Eric C and Bandettini, Peter A and Liu, Thomas T},
+  journal   = {Neuroimage},
+  volume    = {112},
+  pages     = {43--51},
+  year      = {2015},
+  publisher = {Elsevier},
+  url       = {https://doi.org/10.1016/j.neuroimage.2015.02.052},
+  doi       = {10.1016/j.neuroimage.2015.02.052}
+}
+
+@article{tedana_decision_trees,
+  title   = {Component selection decision trees in tedana},
+  author  = {tedana community},
+  journal = {figshare},
+  year    = {2024},
+  doi     = {10.6084/m9.figshare.25251433.v1}
+}
diff --git a/tedana/selection/component_selector.py b/tedana/selection/component_selector.py
@@ -149,7 +149,15 @@ def validate_tree(tree):
 
         # Only if kwargs are inputted, make sure they are all valid
         if node.get("kwargs") is not None:
-            invalid_kwargs = set(node.get("kwargs").keys()) - kwargs
+            node_kwargs = set(node.get("kwargs").keys())
+            if "log_extra_report" in node_kwargs:
+                LGR.warning(
+                    f"Node {i} includes the 'log_extra_report' parameter. "
+                    "This was removed from the code and will not be used."
+                )
+                node_kwargs.remove("log_extra_report")
+                del tree["nodes"][i]["kwargs"]["log_extra_report"]
+            invalid_kwargs = node_kwargs - kwargs
             if len(invalid_kwargs) > 0:
                 err_msg += (
                     f"Node {i} has additional, undefined optional parameters (kwargs): "