From 8702d0e25f737070315f59a395b441662c7911e0 Mon Sep 17 00:00:00 2001
From: Evan Hathaway <42755980+ephathaway@users.noreply.github.com>
Date: Thu, 3 Jun 2021 07:14:58 -0700
Subject: [PATCH] MRG, ENH: Export evokeds MFF (#9406)

* ENH: Add export evokeds to MFF function

* ENH: Add export evokeds function

* Add tests for export evokeds

* DOC: Add export_evokeds to python reference

* DOC: Fix history argument type

* STY: Use logger for user message

* FIX: Nest pytz import

* Clarify MFF specific arguments

* DOC: Use shared warning message

* FIX: Test digitization data from categories.xml

* STY: Use regex match for pytest.raises

* STY: Use zipped lists

* Refactor assert statements

* ENH: Store MFF device type in info

* Determine device type from info

* Use kwargs for format-specific exports

* Use current time for record time

Because averaged files are absolute time agnostic,
we can simplify by using plugging in the current time
for the record time. This is also the behavior when
running an averaging tool in EGI Net Station software.

* Add MFF export funtion to python reference

* ENH: Reorganize

* FIX: Missed a file

* ENH: For newer mffpy

Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
---
 doc/export.rst                  |   2 +
 mne/__init__.py                 |   3 +-
 mne/epochs.py                   |   2 +-
 mne/export/__init__.py          |   3 +-
 mne/export/_egimff.py           | 150 ++++++++++++++++++++++++++++++++
 mne/export/_export.py           |  60 ++++++++++++-
 mne/export/tests/test_export.py |  86 +++++++++++++++++-
 mne/io/base.py                  |   2 +-
 mne/io/egi/egimff.py            |  29 +++---
 mne/io/egi/tests/test_egi.py    |   3 +
 mne/utils/docs.py               |   2 +-
 11 files changed, 323 insertions(+), 19 deletions(-)
 create mode 100644 mne/export/_egimff.py

diff --git a/doc/export.rst b/doc/export.rst
index a8349143ba8..32f58f230bf 100644
--- a/doc/export.rst
+++ b/doc/export.rst
@@ -14,4 +14,6 @@ Exporting
    :toctree: generated/
 
    export_epochs
+   export_evokeds
+   export_evokeds_mff
    export_raw
diff --git a/mne/__init__.py b/mne/__init__.py
index 66893ab4b60..b7fc1b99612 100644
--- a/mne/__init__.py
+++ b/mne/__init__.py
@@ -77,7 +77,8 @@
                           events_from_annotations)
 from .epochs import (BaseEpochs, Epochs, EpochsArray, read_epochs,
                      concatenate_epochs, make_fixed_length_epochs)
-from .evoked import Evoked, EvokedArray, read_evokeds, write_evokeds, combine_evoked
+from .evoked import (Evoked, EvokedArray, read_evokeds, write_evokeds,
+                     combine_evoked)
 from .label import (read_label, label_sign_flip,
                     write_label, stc_to_label, grow_labels, Label, split_label,
                     BiHemiLabel, read_labels_from_annot, write_labels_to_annot,
diff --git a/mne/epochs.py b/mne/epochs.py
index e1be76459d9..3b4a6b95714 100644
--- a/mne/epochs.py
+++ b/mne/epochs.py
@@ -1823,7 +1823,7 @@ def export(self, fname, fmt='auto', verbose=None):
         """Export Epochs to external formats.
 
         Supported formats: EEGLAB (set, uses :mod:`eeglabio`)
-        %(export_warning)s
+        %(export_warning)s :meth:`save` instead.
 
         Parameters
         ----------
diff --git a/mne/export/__init__.py b/mne/export/__init__.py
index 8e4ab0dad9b..9d7abae0aff 100644
--- a/mne/export/__init__.py
+++ b/mne/export/__init__.py
@@ -1 +1,2 @@
-from ._export import export_raw, export_epochs
+from ._export import export_raw, export_epochs, export_evokeds
+from ._egimff import export_evokeds_mff
diff --git a/mne/export/_egimff.py b/mne/export/_egimff.py
new file mode 100644
index 00000000000..f7998243ebc
--- /dev/null
+++ b/mne/export/_egimff.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+# Authors: MNE Developers
+#
+# License: BSD (3-clause)
+
+import datetime
+
+import numpy as np
+
+from ..io.egi.egimff import _import_mffpy
+from ..io.pick import pick_types, pick_channels
+from ..utils import verbose
+
+
+@verbose
+def export_evokeds_mff(fname, evoked, history=None, *, verbose=None):
+    """Export evoked dataset to MFF.
+
+    Parameters
+    ----------
+    %(export_params_fname)s
+    evoked : list of Evoked instances
+        List of evoked datasets to export to one file. Note that the
+        measurement info from the first evoked instance is used, so be sure
+        that information matches.
+    history : None (default) | list of dict
+        Optional list of history entries (dictionaries) to be written to
+        history.xml. This must adhere to the format described in
+        mffpy.xml_files.History.content. If None, no history.xml will be
+        written.
+    %(verbose)s
+
+    Notes
+    -----
+    .. versionadded:: 0.24
+
+    Only EEG channels are written to the output file.
+    ``info['device_info']['type']`` must be a valid MFF recording device
+    (e.g. 'HydroCel GSN 256 1.0'). This field is automatically populated when
+    using MFF read functions.
+    """
+    mffpy = _import_mffpy('Export evokeds to MFF.')
+    import pytz
+    info = evoked[0].info
+    if np.round(info['sfreq']) != info['sfreq']:
+        raise ValueError('Sampling frequency must be a whole number. '
+                         f'sfreq: {info["sfreq"]}')
+    sampling_rate = int(info['sfreq'])
+
+    # Initialize writer
+    writer = mffpy.Writer(fname)
+    current_time = pytz.utc.localize(datetime.datetime.utcnow())
+    writer.addxml('fileInfo', recordTime=current_time)
+    try:
+        device = info['device_info']['type']
+    except (TypeError, KeyError):
+        raise ValueError('No device type. Cannot determine sensor layout.')
+    writer.add_coordinates_and_sensor_layout(device)
+
+    # Add EEG data
+    eeg_channels = pick_types(info, eeg=True, exclude=[])
+    eeg_bin = mffpy.bin_writer.BinWriter(sampling_rate)
+    for ave in evoked:
+        # Signals are converted to µV
+        block = (ave.data[eeg_channels] * 1e6).astype(np.float32)
+        eeg_bin.add_block(block, offset_us=0)
+    writer.addbin(eeg_bin)
+
+    # Add categories
+    categories_content = _categories_content_from_evokeds(evoked)
+    writer.addxml('categories', categories=categories_content)
+
+    # Add history
+    if history:
+        writer.addxml('historyEntries', entries=history)
+
+    writer.write()
+
+
+def _categories_content_from_evokeds(evoked):
+    """Return categories.xml content for evoked dataset."""
+    content = dict()
+    begin_time = 0
+    for ave in evoked:
+        # Times are converted to microseconds
+        sfreq = ave.info['sfreq']
+        duration = np.round(len(ave.times) / sfreq * 1e6).astype(int)
+        end_time = begin_time + duration
+        event_time = begin_time - np.round(ave.tmin * 1e6).astype(int)
+        eeg_bads = _get_bad_eeg_channels(ave.info)
+        content[ave.comment] = [
+            _build_segment_content(begin_time, end_time, event_time, eeg_bads,
+                                   name='Average', nsegs=ave.nave)
+        ]
+        begin_time += duration
+    return content
+
+
+def _get_bad_eeg_channels(info):
+    """Return a list of bad EEG channels formatted for categories.xml.
+
+    Given a list of only the EEG channels in file, return the indices of this
+    list (starting at 1) that correspond to bad channels.
+    """
+    if len(info['bads']) == 0:
+        return []
+    eeg_channels = pick_types(info, eeg=True, exclude=[])
+    bad_channels = pick_channels(info['ch_names'], info['bads'])
+    bads_elementwise = np.isin(eeg_channels, bad_channels)
+    return list(np.flatnonzero(bads_elementwise) + 1)
+
+
+def _build_segment_content(begin_time, end_time, event_time, eeg_bads,
+                           status='unedited', name=None, pns_bads=None,
+                           nsegs=None):
+    """Build content for a single segment in categories.xml.
+
+    Segments are sorted into categories in categories.xml. In a segmented MFF
+    each category can contain multiple segments, but in an averaged MFF each
+    category only contains one segment (the average).
+    """
+    channel_status = [{
+        'signalBin': 1,
+        'exclusion': 'badChannels',
+        'channels': eeg_bads
+    }]
+    if pns_bads:
+        channel_status.append({
+            'signalBin': 2,
+            'exclusion': 'badChannels',
+            'channels': pns_bads
+        })
+    content = {
+        'status': status,
+        'beginTime': begin_time,
+        'endTime': end_time,
+        'evtBegin': event_time,
+        'evtEnd': event_time,
+        'channelStatus': channel_status,
+    }
+    if name:
+        content['name'] = name
+    if nsegs:
+        content['keys'] = {
+            '#seg': {
+                'type': 'long',
+                'data': nsegs
+            }
+        }
+    return content
diff --git a/mne/export/_export.py b/mne/export/_export.py
index 52afb4f8d4f..13272752d74 100644
--- a/mne/export/_export.py
+++ b/mne/export/_export.py
@@ -5,7 +5,8 @@
 
 import os.path as op
 
-from ..utils import verbose, _validate_type
+from ._egimff import export_evokeds_mff
+from ..utils import verbose, logger, _validate_type
 
 
 @verbose
@@ -78,6 +79,63 @@ def export_epochs(fname, epochs, fmt='auto', verbose=None):
         raise NotImplementedError('Export to BrainVision not implemented.')
 
 
+@verbose
+def export_evokeds(fname, evoked, fmt='auto', verbose=None):
+    """Export evoked dataset to external formats.
+
+    This function is a wrapper for format-specific export functions. The export
+    function is selected based on the inferred file format. For additional
+    options, use the format-specific functions.
+
+    Supported formats
+        MFF (mff, uses :func:`mne.export.export_evokeds_mff`)
+    %(export_warning)s :func:`mne.write_evokeds` instead.
+
+    Parameters
+    ----------
+    %(export_params_fname)s
+    evoked : Evoked instance, or list of Evoked instances
+        The evoked dataset, or list of evoked datasets, to export to one file.
+        Note that the measurement info from the first evoked instance is used,
+        so be sure that information matches.
+    fmt : 'auto' | 'mff'
+        Format of the export. Defaults to ``'auto'``, which will infer the
+        format from the filename extension. See supported formats above for
+        more information.
+    %(verbose)s
+
+    See Also
+    --------
+    mne.write_evokeds
+    mne.export.export_evokeds_mff
+
+    Notes
+    -----
+    .. versionadded:: 0.24
+    """
+    supported_export_formats = {
+        'mff': ('mff',),
+        'eeglab': ('set',),
+        'edf': ('edf',),
+        'brainvision': ('eeg', 'vmrk', 'vhdr',)
+    }
+    fmt = _infer_check_export_fmt(fmt, fname, supported_export_formats)
+
+    if not isinstance(evoked, list):
+        evoked = [evoked]
+
+    logger.info(f'Exporting evoked dataset to {fname}...')
+
+    if fmt == 'mff':
+        export_evokeds_mff(fname, evoked)
+    elif fmt == 'eeglab':
+        raise NotImplementedError('Export to EEGLAB not implemented.')
+    elif fmt == 'edf':
+        raise NotImplementedError('Export to EDF not implemented.')
+    elif fmt == 'brainvision':
+        raise NotImplementedError('Export to BrainVision not implemented.')
+
+
 def _infer_check_export_fmt(fmt, fname, supported_formats):
     """Infer export format from filename extension if auto.
 
diff --git a/mne/export/tests/test_export.py b/mne/export/tests/test_export.py
index ef7ce429384..2765805ca9b 100644
--- a/mne/export/tests/test_export.py
+++ b/mne/export/tests/test_export.py
@@ -11,10 +11,18 @@
 import numpy as np
 from numpy.testing import assert_allclose, assert_array_equal
 
-from mne import read_epochs_eeglab, Epochs
-from mne.tests.test_epochs import _get_data
+from mne import read_epochs_eeglab, Epochs, read_evokeds, read_evokeds_mff
+from mne.datasets import testing
+from mne.export import export_evokeds, export_evokeds_mff
 from mne.io import read_raw_fif, read_raw_eeglab
-from mne.utils import _check_eeglabio_installed
+from mne.utils import _check_eeglabio_installed, requires_version, object_diff
+from mne.tests.test_epochs import _get_data
+
+base_dir = op.join(op.dirname(__file__), '..', '..', 'io', 'tests', 'data')
+fname_evoked = op.join(base_dir, 'test-ave.fif')
+
+data_path = testing.data_path(download=False)
+egi_evoked_fname = op.join(data_path, 'EGI', 'test_egi_evoked.mff')
 
 
 @pytest.mark.skipif(not _check_eeglabio_installed(strict=False),
@@ -62,3 +70,75 @@ def test_export_epochs_eeglab(tmpdir, preload):
     assert epochs.event_id.keys() == epochs_read.event_id.keys()  # just keys
     assert_allclose(epochs.times, epochs_read.times)
     assert_allclose(epochs.get_data(), epochs_read.get_data())
+
+
+@requires_version('mffpy', '0.5.7')
+@testing.requires_testing_data
+@pytest.mark.parametrize('fmt', ('auto', 'mff'))
+@pytest.mark.parametrize('do_history', (True, False))
+def test_export_evokeds_to_mff(tmpdir, fmt, do_history):
+    """Test exporting evoked dataset to MFF."""
+    evoked = read_evokeds_mff(egi_evoked_fname)
+    export_fname = op.join(str(tmpdir), 'evoked.mff')
+    history = [
+        {
+            'name': 'Test Segmentation',
+            'method': 'Segmentation',
+            'settings': ['Setting 1', 'Setting 2'],
+            'results': ['Result 1', 'Result 2']
+        },
+        {
+            'name': 'Test Averaging',
+            'method': 'Averaging',
+            'settings': ['Setting 1', 'Setting 2'],
+            'results': ['Result 1', 'Result 2']
+        }
+    ]
+    if do_history:
+        export_evokeds_mff(export_fname, evoked, history=history)
+    else:
+        export_evokeds(export_fname, evoked)
+    # Drop non-EEG channels
+    evoked = [ave.drop_channels(['ECG', 'EMG']) for ave in evoked]
+    evoked_exported = read_evokeds_mff(export_fname)
+    assert len(evoked) == len(evoked_exported)
+    for ave, ave_exported in zip(evoked, evoked_exported):
+        # Compare infos
+        assert object_diff(ave_exported.info, ave.info) == ''
+        # Compare data
+        assert_allclose(ave_exported.data, ave.data)
+        # Compare properties
+        assert ave_exported.nave == ave.nave
+        assert ave_exported.kind == ave.kind
+        assert ave_exported.comment == ave.comment
+        assert_allclose(ave_exported.times, ave.times)
+
+
+@requires_version('mffpy', '0.5.7')
+@testing.requires_testing_data
+def test_export_to_mff_no_device():
+    """Test no device type throws ValueError."""
+    evoked = read_evokeds_mff(egi_evoked_fname, condition='Category 1')
+    evoked.info['device_info'] = None
+    with pytest.raises(ValueError, match='No device type.'):
+        export_evokeds('output.mff', evoked)
+
+
+@requires_version('mffpy', '0.5.7')
+def test_export_to_mff_incompatible_sfreq():
+    """Test non-whole number sampling frequency throws ValueError."""
+    evoked = read_evokeds(fname_evoked)
+    with pytest.raises(ValueError, match=f'sfreq: {evoked[0].info["sfreq"]}'):
+        export_evokeds('output.mff', evoked)
+
+
+@pytest.mark.parametrize('fmt,ext', [
+    ('EEGLAB', 'set'),
+    ('EDF', 'edf'),
+    ('BrainVision', 'eeg')
+])
+def test_export_evokeds_unsupported_format(fmt, ext):
+    """Test exporting evoked dataset to non-supported formats."""
+    evoked = read_evokeds(fname_evoked)
+    with pytest.raises(NotImplementedError, match=f'Export to {fmt} not imp'):
+        export_evokeds(f'output.{ext}', evoked)
diff --git a/mne/io/base.py b/mne/io/base.py
index c312dd56a35..d6e5c6837aa 100644
--- a/mne/io/base.py
+++ b/mne/io/base.py
@@ -1455,7 +1455,7 @@ def export(self, fname, fmt='auto', verbose=None):
         """Export Raw to external formats.
 
         Supported formats: EEGLAB (set, uses :mod:`eeglabio`)
-        %(export_warning)s
+        %(export_warning)s :meth:`save` instead.
 
         Parameters
         ----------
diff --git a/mne/io/egi/egimff.py b/mne/io/egi/egimff.py
index 152ae28cf9c..3034212a61b 100644
--- a/mne/io/egi/egimff.py
+++ b/mne/io/egi/egimff.py
@@ -104,6 +104,8 @@ def _read_mff_header(filepath):
     # Add the sensor info.
     sensor_layout_file = op.join(filepath, 'sensorLayout.xml')
     sensor_layout_obj = parse(sensor_layout_file)
+    summaryinfo['device'] = (sensor_layout_obj.getElementsByTagName('name')
+                             [0].firstChild.data)
     sensors = sensor_layout_obj.getElementsByTagName('sensor')
     chan_type = list()
     chan_unit = list()
@@ -456,6 +458,7 @@ def __init__(self, input_fname, eog=None, misc=None,
             egi_info['hour'], egi_info['minute'], egi_info['second'])
         my_timestamp = time.mktime(my_time.timetuple())
         info['meas_date'] = _ensure_meas_date_none_or_dt((my_timestamp, 0))
+        info['device_info'] = dict(type=egi_info['device'])
 
         # First: EEG
         ch_names = [channel_naming % (i + 1) for i in
@@ -845,6 +848,7 @@ def _read_evoked_mff(fname, condition, channel_naming='E%d', verbose=None):
                 range(mff.num_channels['EEG'])]
     ch_names.extend(egi_info['pns_names'])
     info = create_info(ch_names, mff.sampling_rates['EEG'], ch_types)
+    info['device_info'] = dict(type=egi_info['device'])
     info['nchan'] = sum(mff.num_channels.values())
 
     # Add individual channel info
@@ -887,16 +891,21 @@ def _read_evoked_mff(fname, condition, channel_naming='E%d', verbose=None):
     # Add EEG reference to info
     # Initialize 'custom_ref_applied' to False
     info['custom_ref_applied'] = False
-    with mff.directory.filepointer('history') as fp:
-        history = mffpy.XML.from_file(fp)
-    for entry in history.entries:
-        if entry['method'] == 'Montage Operations Tool':
-            if 'Average Reference' in entry['settings']:
-                # Average reference has been applied
-                projector, info = setup_proj(info)
-            else:
-                # Custom reference has been applied that is not an average
-                info['custom_ref_applied'] = True
+    try:
+        fp = mff.directory.filepointer('history')
+    except (ValueError, FileNotFoundError):  # old (<=0.6.3) vs new mffpy
+        pass
+    else:
+        with fp:
+            history = mffpy.XML.from_file(fp)
+        for entry in history.entries:
+            if entry['method'] == 'Montage Operations Tool':
+                if 'Average Reference' in entry['settings']:
+                    # Average reference has been applied
+                    projector, info = setup_proj(info)
+                else:
+                    # Custom reference has been applied that is not an average
+                    info['custom_ref_applied'] = True
 
     # Get nave from categories.xml
     try:
diff --git a/mne/io/egi/tests/test_egi.py b/mne/io/egi/tests/test_egi.py
index 7f3e55e38ef..ca601da55b3 100644
--- a/mne/io/egi/tests/test_egi.py
+++ b/mne/io/egi/tests/test_egi.py
@@ -128,6 +128,7 @@ def test_io_egi_mff():
         loc = raw.info['chs'][i]['loc']
         assert loc[:3].any(), loc[:3]
         assert_array_equal(loc[3:6], ref_loc, err_msg=f'{i}')
+    assert raw.info['device_info']['type'] == 'HydroCel GSN 128 1.0'
 
     assert 'eeg' in raw
     eeg_chan = [c for c in raw.ch_names if 'EEG' in c]
@@ -369,6 +370,8 @@ def test_io_egi_evokeds_mff(idx, cond, tmax, signals, bads):
     assert evoked_cond.info['nchan'] == 259
     assert evoked_cond.info['sfreq'] == 250.0
     assert not evoked_cond.info['custom_ref_applied']
+    assert len(evoked_cond.info['dig']) == 261
+    assert evoked_cond.info['device_info']['type'] == 'HydroCel GSN 256 1.0'
 
 
 @requires_version('mffpy', '0.5.7')
diff --git a/mne/utils/docs.py b/mne/utils/docs.py
index 686376b13b5..e51dd434745 100644
--- a/mne/utils/docs.py
+++ b/mne/utils/docs.py
@@ -2333,7 +2333,7 @@
 .. warning::
     Since we are exporting to external formats, there's no guarantee that all
     the info will be preserved in the external format. To save in native MNE
-    format (``.fif``) without information loss, use ``save`` instead.
+    format (``.fif``) without information loss, use
 """
 docdict['export_params_fname'] = """
 fname : str