From 74b922d63d733a78c4f022133553356104b1448c Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Wed, 27 Jan 2021 17:28:53 -0500
Subject: [PATCH] [ENH] Add threshold argument to make_adaptive_mask (#635)

* Add threshold argument to make_adaptive_mask.

* Binarize *thresholded* adaptive mask.

* Documentation touchup.

* Update test.

* Fix test.

* Fix PAID and synchronize combination and decay methods.

* Remove unused import.

* Add see alsos to impacted functions' docstrings.

* Fix style issues.

* Address @emdupre's review.

* Finish addressing review.
---
 tedana/combine.py           | 43 ++++++++++++++++++++++++++-----------
 tedana/decay.py             | 43 +++++++++++++++++++++++++++++++------
 tedana/decomposition/pca.py | 10 ++++++++-
 tedana/metrics/kundu_fit.py | 13 ++++++++---
 tedana/tests/test_utils.py  |  8 +++----
 tedana/utils.py             | 16 ++++++++------
 tedana/workflows/t2smap.py  |  2 +-
 tedana/workflows/tedana.py  |  3 ++-
 8 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/tedana/combine.py b/tedana/combine.py
index 3eee8385f..2a567a998 100644
--- a/tedana/combine.py
+++ b/tedana/combine.py
@@ -3,7 +3,6 @@
 """
 import logging
 import numpy as np
-from tedana.utils import unmask
 from tedana.due import due, Doi
 
 LGR = logging.getLogger(__name__)
@@ -137,7 +136,10 @@ def make_optcom(data, tes, adaptive_mask, t2s=None, combmode='t2s', verbose=True
     tes : (E,) :obj:`numpy.ndarray`
         Array of TEs, in seconds.
     adaptive_mask : (S,) :obj:`numpy.ndarray`
-        Adaptive mask of the data indicating the number of echos with signal at each voxel
+        Array where each value indicates the number of echoes with good signal
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     t2s : (S [x T]) :obj:`numpy.ndarray` or None, optional
         Estimated T2* values. Only required if combmode = 't2s'.
         Default is None.
@@ -181,6 +183,11 @@ def make_optcom(data, tes, adaptive_mask, t2s=None, combmode='t2s', verbose=True
            Magnetic Resonance in Medicine: An Official Journal of the
            International Society for Magnetic Resonance in Medicine,
            55(6), 1227-1235.
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     if data.ndim != 3:
         raise ValueError('Input data must be 3D (S x E x T)')
@@ -217,23 +224,35 @@ def make_optcom(data, tes, adaptive_mask, t2s=None, combmode='t2s', verbose=True
                    'estimates')
         LGR.info(msg)
 
-    mask = adaptive_mask >= 3
-    data = data[mask, :, :]  # mask out unstable voxels/samples
+    echos_to_run = np.unique(adaptive_mask)
+    # When there is one good echo, use two
+    if 1 in echos_to_run:
+        echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
+    echos_to_run = echos_to_run[echos_to_run >= 2]
+
     tes = np.array(tes)[np.newaxis, ...]  # (1 x E) array_like
     combined = np.zeros((data.shape[0], data.shape[2]))
     report = True
-    for echo in np.unique(adaptive_mask[mask]):
-        echo_idx = adaptive_mask[mask] == echo
+    for i_echo, echo_num in enumerate(echos_to_run):
+        if echo_num == 2:
+            # Use the first two echoes for cases where there are
+            # either one or two good echoes
+            voxel_idx = np.where(np.logical_and(adaptive_mask > 0, adaptive_mask <= echo_num))[0]
+        else:
+            voxel_idx = np.where(adaptive_mask == echo_num)[0]
 
         if combmode == 'paid':
-            combined[echo_idx, :] = _combine_paid(data[echo_idx, :echo, :],
-                                                  tes[:echo], report=report)
+            combined[voxel_idx, :] = _combine_paid(data[voxel_idx, :echo_num, :],
+                                                   tes[:, :echo_num])
         else:
-            t2s_ = t2s[mask, ..., np.newaxis]  # mask out empty voxels/samples
+            t2s_ = t2s[..., np.newaxis]  # add singleton
 
-            combined[echo_idx, :] = _combine_t2s(
-                data[echo_idx, :echo, :], tes[:, :echo], t2s_[echo_idx, ...], report=report)
+            combined[voxel_idx, :] = _combine_t2s(
+                data[voxel_idx, :echo_num, :],
+                tes[:, :echo_num],
+                t2s_[voxel_idx, ...],
+                report=report
+            )
         report = False
 
-    combined = unmask(combined, mask)
     return combined
diff --git a/tedana/decay.py b/tedana/decay.py
index 4fcd7e19d..d8aa412dc 100644
--- a/tedana/decay.py
+++ b/tedana/decay.py
@@ -82,7 +82,9 @@ def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True):
         Echo times in milliseconds.
     adaptive_mask : (S,) :obj:`numpy.ndarray`
         Array where each value indicates the number of echoes with good signal
-        for that voxel.
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     report : bool, optional
         Whether to log a description of this step or not. Default is True.
 
@@ -94,6 +96,11 @@ def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True):
     Notes
     -----
     This method is slower, but more accurate, than the log-linear approach.
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     if report:
         RepLGR.info("A monoexponential model was fit to the data at each voxel "
@@ -113,6 +120,7 @@ def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True):
         data_cat, echo_times, adaptive_mask, report=False)
 
     echos_to_run = np.unique(adaptive_mask)
+    # When there is one good echo, use two
     if 1 in echos_to_run:
         echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
     echos_to_run = echos_to_run[echos_to_run >= 2]
@@ -123,6 +131,8 @@ def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True):
 
     for i_echo, echo_num in enumerate(echos_to_run):
         if echo_num == 2:
+            # Use the first two echoes for cases where there are
+            # either one or two good echoes
             voxel_idx = np.where(adaptive_mask <= echo_num)[0]
         else:
             voxel_idx = np.where(adaptive_mask == echo_num)[0]
@@ -190,7 +200,9 @@ def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True):
         Echo times in milliseconds.
     adaptive_mask : (S,) :obj:`numpy.ndarray`
         Array where each value indicates the number of echoes with good signal
-        for that voxel.
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     report : :obj:`bool`, optional
         Whether to log a description of this step or not. Default is True.
 
@@ -219,6 +231,7 @@ def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True):
     n_samp, n_echos, n_vols = data_cat.shape
 
     echos_to_run = np.unique(adaptive_mask)
+    # When there is one good echo, use two
     if 1 in echos_to_run:
         echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
     echos_to_run = echos_to_run[echos_to_run >= 2]
@@ -229,7 +242,9 @@ def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True):
 
     for i_echo, echo_num in enumerate(echos_to_run):
         if echo_num == 2:
-            voxel_idx = np.where(adaptive_mask <= echo_num)[0]
+            # Use the first two echoes for cases where there are
+            # either one or two good echoes
+            voxel_idx = np.where(np.logical_and(adaptive_mask > 0, adaptive_mask <= echo_num))[0]
         else:
             voxel_idx = np.where(adaptive_mask == echo_num)[0]
 
@@ -282,8 +297,10 @@ def fit_decay(data, tes, mask, adaptive_mask, fittype, report=True):
         Boolean array indicating samples that are consistently (i.e., across
         time AND echoes) non-zero
     adaptive_mask : (S,) array_like
-        Valued array indicating number of echos that have sufficient signal in
-        given sample
+        Array where each value indicates the number of echoes with good signal
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     fittype : {loglin, curvefit}
         The type of model fit to use
     report : bool, optional
@@ -313,6 +330,11 @@ def fit_decay(data, tes, mask, adaptive_mask, fittype, report=True):
     Additionally, very small :math:`T_2^*` values above zero are replaced with a floor
     value to prevent zero-division errors later on in the workflow.
     It also replaces NaN values in the :math:`S_0` map with 0.
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     if data.shape[1] != len(tes):
         raise ValueError('Second dimension of data ({0}) does not match number '
@@ -372,8 +394,10 @@ def fit_decay_ts(data, tes, mask, adaptive_mask, fittype):
         Boolean array indicating samples that are consistently (i.e., across
         time AND echoes) non-zero
     adaptive_mask : (S,) array_like
-        Valued array indicating number of echos that have sufficient signal in
-        given sample
+        Array where each value indicates the number of echoes with good signal
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     fittype : :obj: `str`
         The type of model fit to use
 
@@ -393,6 +417,11 @@ def fit_decay_ts(data, tes, mask, adaptive_mask, fittype):
         Full S0 timeseries. For voxels affected by dropout, with good signal
         from only one echo, the full timeseries uses the single echo's value
         at that voxel/volume.
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     n_samples, _, n_vols = data.shape
     tes = np.array(tes)
diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py
index 07c2ef165..27ea23af3 100644
--- a/tedana/decomposition/pca.py
+++ b/tedana/decomposition/pca.py
@@ -67,7 +67,10 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     mask : (S,) array_like
         Boolean mask array
     adaptive_mask : (S,) array_like
-        Adaptive mask of the data indicating the number of echos with signal at each voxel
+        Array where each value indicates the number of echoes with good signal
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     t2sG : (S,) array_like
         Map of voxel-wise T2* estimates.
     ref_img : :obj:`str` or img_like
@@ -152,6 +155,11 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     pca_mixing.tsv            PCA mixing matrix.
     pca_components.nii.gz     Component weight maps.
     ======================    =================================================
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     if algorithm == 'kundu':
         alg_str = ("followed by the Kundu component selection decision "
diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py
index c8414e1c8..62813b655 100644
--- a/tedana/metrics/kundu_fit.py
+++ b/tedana/metrics/kundu_fit.py
@@ -36,8 +36,10 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img,
         Mixing matrix for converting input data to component space, where `C`
         is components and `T` is the same as in `catd`
     adaptive_mask : (S) array_like
-        Adaptive mask, where each voxel's value is the number of echoes with
-        "good signal".
+        Array where each value indicates the number of echoes with good signal
+        for that voxel. This mask may be thresholded; for example, with values
+        less than 3 set to 0.
+        For more information on thresholding, see `make_adaptive_mask`.
     tes : list
         List of echo times associated with `catd`, in milliseconds
     ref_img : str or img_like
@@ -69,9 +71,14 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img,
     betas : :obj:`numpy.ndarray`
     mmix_corrected : :obj:`numpy.ndarray`
         Mixing matrix after sign correction and resorting (if reindex is True).
+
+    See Also
+    --------
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
+                                              parameter.
     """
     # Use adaptive_mask as mask
-    mask = adaptive_mask >= 3
+    mask = adaptive_mask > 0
 
     if not (catd.shape[0] == adaptive_mask.shape[0] == tsoc.shape[0]):
         raise ValueError('First dimensions (number of samples) of catd ({0}), '
diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py
index 692134813..eaa723fbe 100644
--- a/tedana/tests/test_utils.py
+++ b/tedana/tests/test_utils.py
@@ -80,15 +80,15 @@ def test_load_image():
 def test_make_adaptive_mask():
     # load data make masks
     data = io.load_data(fnames, n_echos=len(tes))[0]
-    mask, masksum = utils.make_adaptive_mask(data, getsum=True)
+    mask, masksum = utils.make_adaptive_mask(data, getsum=True, threshold=1)
 
     # getsum doesn't change mask values
     assert np.allclose(mask, utils.make_adaptive_mask(data))
     # shapes are all the same
     assert mask.shape == masksum.shape == (64350,)
-    assert np.allclose(mask, (masksum >= 3).astype(bool))
+    assert np.allclose(mask, (masksum >= 1).astype(bool))
     # mask has correct # of entries
-    assert mask.sum() == 41749
+    assert mask.sum() == 50786
     # masksum has correct values
     vals, counts = np.unique(masksum, return_counts=True)
     assert np.allclose(vals, np.array([0, 1, 2, 3]))
@@ -98,7 +98,7 @@ def test_make_adaptive_mask():
     # TODO: Add mask file with no bad voxels to test against
     mask, masksum = utils.make_adaptive_mask(data, mask=pjoin(datadir,
                                                               'mask.nii.gz'),
-                                             getsum=True)
+                                             getsum=True, threshold=3)
     assert np.allclose(mask, (masksum >= 3).astype(bool))
 
 
diff --git a/tedana/utils.py b/tedana/utils.py
index 850497dee..06588400b 100644
--- a/tedana/utils.py
+++ b/tedana/utils.py
@@ -41,7 +41,7 @@ def load_image(data):
     return fdata
 
 
-def make_adaptive_mask(data, mask=None, getsum=False):
+def make_adaptive_mask(data, mask=None, getsum=False, threshold=1):
     """
     Makes map of `data` specifying longest echo a voxel can be sampled with
 
@@ -55,6 +55,9 @@ def make_adaptive_mask(data, mask=None, getsum=False):
         to generate mask from data with good signal across echoes
     getsum : :obj:`bool`, optional
         Return `masksum` in addition to `mask`. Default: False
+    threshold : :obj:`int`, optional
+        Minimum echo count to retain in the mask. Default is 1, which is
+        equivalent not thresholding.
 
     Returns
     -------
@@ -90,19 +93,20 @@ def make_adaptive_mask(data, mask=None, getsum=False):
     masksum = (np.abs(echo_means) > lthrs).sum(axis=-1)
 
     if mask is None:
-        # make it a boolean mask to (where we have at least 1 echo with good signal)
-        mask = (masksum >= 3).astype(bool)
+        # make it a boolean mask to (where we have at least `threshold` echoes with good signal)
+        mask = (masksum >= threshold).astype(bool)
+        masksum[masksum < threshold] = 0
     else:
         # if the user has supplied a binary mask
         mask = load_image(mask).astype(bool)
         masksum = masksum * mask
-        mask = (masksum >= 3).astype(bool)
         # reduce mask based on masksum
         # TODO: Use visual report to make checking the reduced mask easier
-        if np.any(masksum[mask] < 3):
-            n_bad_voxels = np.sum(masksum[mask] < 3)
+        if np.any(masksum[mask] < threshold):
+            n_bad_voxels = np.sum(masksum[mask] < threshold)
             LGR.warning('{0} voxels in user-defined mask do not have good '
                         'signal. Removing voxels from mask.'.format(n_bad_voxels))
+            masksum[masksum < threshold] = 0
             mask = masksum.astype(bool)
 
     if getsum:
diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py
index c66a36f36..94eacade3 100644
--- a/tedana/workflows/t2smap.py
+++ b/tedana/workflows/t2smap.py
@@ -204,7 +204,7 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None,
         LGR.info('Computing adaptive mask')
     else:
         LGR.info('Using user-defined mask')
-    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
+    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True, threshold=1)
 
     LGR.info('Computing adaptive T2* map')
     if fitmode == 'all':
diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py
index cc6cc211b..26b525396 100644
--- a/tedana/workflows/tedana.py
+++ b/tedana/workflows/tedana.py
@@ -474,7 +474,8 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
         RepLGR.info("An initial mask was generated from the first echo using "
                     "nilearn's compute_epi_mask function.")
 
-    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
+    # Create an adaptive mask with at least 3 good echoes.
+    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True, threshold=3)
     LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
     io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)