From f2c62826698185d24505acf4a07140677933d67c Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 9 Sep 2024 11:14:19 -0600
Subject: [PATCH 01/11] add mocking interface for sorter

---
 CHANGELOG.md                                  |  1 +
 src/neuroconv/tools/testing/__init__.py       |  8 ++-
 .../tools/testing/mock_interfaces.py          | 51 +++++++++++++++++++
 tests/test_ecephys/test_ecephys_interfaces.py | 33 +++++++++++-
 4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd1580540..e87e5f97e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@
 * Added `get_stream_names` to `OpenEphysRecordingInterface`: [PR #1039](https://github.com/catalystneuro/neuroconv/pull/1039)
 * Most data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1022](https://github.com/catalystneuro/neuroconv/pull/1022)
 * All remaining data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1055](https://github.com/catalystneuro/neuroconv/pull/1055)
+* Added a `MockSortingInterface` for testing purposes. [PR #1065](https://github.com/catalystneuro/neuroconv/pull/1065)
 
 ### Improvements
 * Using ruff to enforce existence of public classes' docstrings [PR #1034](https://github.com/catalystneuro/neuroconv/pull/1034)
diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py
index 7179a7544..79b54d3f9 100644
--- a/src/neuroconv/tools/testing/__init__.py
+++ b/src/neuroconv/tools/testing/__init__.py
@@ -5,5 +5,11 @@
     mock_ZarrDatasetIOConfiguration,
 )
 from .mock_files import generate_path_expander_demo_ibl
-from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface
+from .mock_interfaces import (
+    MockBehaviorEventInterface,
+    MockSpikeGLXNIDQInterface,
+    MockRecordingInterface,
+    MockImagingInterface,
+    MockSortingInterface,
+)
 from .mock_ttl_signals import generate_mock_ttl_signal, regenerate_test_cases
diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py
index f05228b34..d9251c858 100644
--- a/src/neuroconv/tools/testing/mock_interfaces.py
+++ b/src/neuroconv/tools/testing/mock_interfaces.py
@@ -11,6 +11,9 @@
 from ...datainterfaces.ecephys.baserecordingextractorinterface import (
     BaseRecordingExtractorInterface,
 )
+from ...datainterfaces.ecephys.basesortingextractorinterface import (
+    BaseSortingExtractorInterface,
+)
 from ...datainterfaces.ophys.baseimagingextractorinterface import (
     BaseImagingExtractorInterface,
 )
@@ -157,6 +160,54 @@ def get_metadata(self) -> dict:
         return metadata
 
 
+class MockSortingInterface(BaseSortingExtractorInterface):
+    """A mock sorting extractor interface for generating synthetic sorting data."""
+
+    # TODO: Implement this class with the lazy generator once is merged
+    # https://github.com/SpikeInterface/spikeinterface/pull/2227
+
+    ExtractorModuleName = "spikeinterface.core.generate"
+    ExtractorName = "generate_sorting"
+
+    def __init__(
+        self,
+        num_units: int = 4,
+        sampling_frequency: float = 30_000.0,
+        durations: tuple[float] = (1.0,),
+        seed: int = 0,
+        verbose: bool = True,
+    ):
+        """
+        Parameters
+        ----------
+        num_units : int, optional
+            Number of units to generate, by default 4.
+        sampling_frequency : float, optional
+            Sampling frequency of the generated data in Hz, by default 30,000.0 Hz.
+        durations : tuple of float, optional
+            Durations of the segments in seconds, by default (1.0,).
+        seed : int, optional
+            Seed for the random number generator, by default 0.
+        verbose : bool, optional
+            Control whether to display verbose messages during writing, by default True.
+
+        """
+
+        super().__init__(
+            num_units=num_units,
+            sampling_frequency=sampling_frequency,
+            durations=durations,
+            seed=seed,
+            verbose=verbose,
+        )
+
+    def get_metadata(self) -> dict:  # noqa D102
+        metadata = super().get_metadata()
+        session_start_time = datetime.now().astimezone()
+        metadata["NWBFile"]["session_start_time"] = session_start_time
+        return metadata
+
+
 class MockImagingInterface(BaseImagingExtractorInterface):
     """
     A mock imaging interface for testing purposes.
diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
index 6372b3535..5591bb6fb 100644
--- a/tests/test_ecephys/test_ecephys_interfaces.py
+++ b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -20,7 +20,10 @@
     BaseSortingExtractorInterface,
 )
 from neuroconv.tools.nwb_helpers import get_module
-from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface
+from neuroconv.tools.testing.mock_interfaces import (
+    MockRecordingInterface,
+    MockSortingInterface,
+)
 
 python_version = Version(get_python_version())
 
@@ -67,7 +70,33 @@ def test_spike2_import_assertions_3_11(self):
             Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx")
 
 
-class TestSortingInterface(unittest.TestCase):
+class TestSortingInterface:
+
+    def test_run_conversion(self, tmp_path):
+
+        nwbfile_path = Path(tmp_path) / "test_sorting.nwb"
+        num_units = 4
+        interface = MockSortingInterface(num_units=num_units, durations=(1.0,))
+        interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"])
+
+        interface.run_conversion(nwbfile_path=nwbfile_path)
+        with NWBHDF5IO(nwbfile_path, "r") as io:
+            nwbfile = io.read()
+
+            units = nwbfile.units
+            assert len(units) == num_units
+            units_df = units.to_dataframe()
+            # Get index in units table
+            for unit_id in interface.sorting_extractor.unit_ids:
+                # In pynwb we write unit name as unit_id
+                row = units_df.query(f"unit_name == '{unit_id}'")
+                spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True)
+                written_spike_times = row["spike_times"].iloc[0]
+
+                np.testing.assert_array_equal(spike_times, written_spike_times)
+
+
+class TestSortingInterfaceOld(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
         cls.test_dir = Path(mkdtemp())

From 5e4dc6afa4ca19ba7afbd3728f73ea8313eb062c Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 12 Sep 2024 16:30:31 -0600
Subject: [PATCH 02/11] wip

---
 src/neuroconv/nwbconverter.py                 |  2 +-
 tests/test_ecephys/test_ecephys_interfaces.py | 22 ++++++++++++++++++-
 .../test_mock_recording_interface.py          |  9 --------
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/neuroconv/nwbconverter.py b/src/neuroconv/nwbconverter.py
index 1f3e7c9f8..7dc862da5 100644
--- a/src/neuroconv/nwbconverter.py
+++ b/src/neuroconv/nwbconverter.py
@@ -144,7 +144,7 @@ def validate_conversion_options(self, conversion_options: dict[str, dict]):
 
     def create_nwbfile(self, metadata: Optional[dict] = None, conversion_options: Optional[dict] = None) -> NWBFile:
         """
-        Create and return an in-memory pynwb.NWBFile object with this interface's data added to it.
+        Create and return an in-memory pynwb.NWBFile object with the conversion data added to it.
 
         Parameters
         ----------
diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
index 5591bb6fb..8c61b89e3 100644
--- a/tests/test_ecephys/test_ecephys_interfaces.py
+++ b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -27,8 +27,28 @@
 
 python_version = Version(get_python_version())
 
+from neuroconv.tools.testing.data_interface_mixins import (
+    RecordingExtractorInterfaceTestMixin,
+    SortingExtractorInterfaceTestMixin,
+)
+
+
+class TestSortingInterface(SortingExtractorInterfaceTestMixin):
+
+    data_interface_cls = MockSortingInterface
+    interface_kwargs = dict(num_units=4, durations=[0.100])
+
+    def test_map_electrode_indices(self):
+
+        self.data_interface.create_nwbfile()
+
+
+class TestRecordingInterface(RecordingExtractorInterfaceTestMixin):
+    data_interface_cls = MockRecordingInterface
+    interface_kwargs = dict(durations=[0.100])
+
 
-class TestRecordingInterface(TestCase):
+class TestRecordingInterfaceOld(TestCase):
     @classmethod
     def setUpClass(cls):
         cls.single_segment_recording_interface = MockRecordingInterface(durations=[0.100])
diff --git a/tests/test_ecephys/test_mock_recording_interface.py b/tests/test_ecephys/test_mock_recording_interface.py
index a33f3acd1..e69de29bb 100644
--- a/tests/test_ecephys/test_mock_recording_interface.py
+++ b/tests/test_ecephys/test_mock_recording_interface.py
@@ -1,9 +0,0 @@
-from neuroconv.tools.testing.data_interface_mixins import (
-    RecordingExtractorInterfaceTestMixin,
-)
-from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface
-
-
-class TestMockRecordingInterface(RecordingExtractorInterfaceTestMixin):
-    data_interface_cls = MockRecordingInterface
-    interface_kwargs = dict(durations=[0.100])

From 4f0c935767a0250e5a359b32158b5a2faca0fbf4 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Nov 2024 14:50:29 -0600
Subject: [PATCH 03/11] propagate unit_electrode_indices

---
 .../ecephys/basesortingextractorinterface.py  |  8 ++
 .../tools/spikeinterface/spikeinterface.py    | 12 +--
 tests/test_ecephys/test_ecephys_interfaces.py | 77 ++++++++++++-------
 3 files changed, 64 insertions(+), 33 deletions(-)

diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
index cd8396154..dca2dea5f 100644
--- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
@@ -288,6 +288,7 @@ def add_to_nwbfile(
         write_as: Literal["units", "processing"] = "units",
         units_name: str = "units",
         units_description: str = "Autogenerated by neuroconv.",
+        unit_electrode_indices: Optional[list[list[int]]] = None,
     ):
         """
         Primary function for converting the data in a SortingExtractor to NWB format.
@@ -312,9 +313,15 @@ def add_to_nwbfile(
         units_name : str, default: 'units'
             The name of the units table. If write_as=='units', then units_name must also be 'units'.
         units_description : str, default: 'Autogenerated by neuroconv.'
+        unit_electrode_indices : list of lists of int, optional
+            A list of lists of integers indicating the indices of the electrodes that each unit is associated with.
+            The length of the list must match the number of units in the sorting extractor.
         """
         from ...tools.spikeinterface import add_sorting_to_nwbfile
 
+        if metadata is None:
+            metadata = self.get_metadata()
+
         metadata_copy = deepcopy(metadata)
         if write_ecephys_metadata:
             self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata_copy)
@@ -346,4 +353,5 @@ def add_to_nwbfile(
             write_as=write_as,
             units_name=units_name,
             units_description=units_description,
+            unit_electrode_indices=unit_electrode_indices,
         )
diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py
index 1be86862a..8a5ebf8f9 100644
--- a/src/neuroconv/tools/spikeinterface/spikeinterface.py
+++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py
@@ -1406,7 +1406,7 @@ def add_units_table_to_nwbfile(
     write_in_processing_module: bool = False,
     waveform_means: Optional[np.ndarray] = None,
     waveform_sds: Optional[np.ndarray] = None,
-    unit_electrode_indices=None,
+    unit_electrode_indices: Optional[list[list[int]]] = None,
     null_values_for_properties: Optional[dict] = None,
 ):
     """
@@ -1443,8 +1443,9 @@ def add_units_table_to_nwbfile(
         Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels).
     unit_electrode_indices : list of lists of int, optional
         For each unit, a list of electrode indices corresponding to waveform data.
-    null_values_for_properties: dict, optional
-        A dictionary mapping properties to null values to use when the property is not present
+    unit_electrode_indices : list of lists of int, optional
+        A list of lists of integers indicating the indices of the electrodes that each unit is associated with.
+        The length of the list must match the number of units in the sorting extractor.
     """
     unit_table_description = unit_table_description or "Autogenerated by neuroconv."
 
@@ -1706,7 +1707,7 @@ def add_sorting_to_nwbfile(
     units_description: str = "Autogenerated by neuroconv.",
     waveform_means: Optional[np.ndarray] = None,
     waveform_sds: Optional[np.ndarray] = None,
-    unit_electrode_indices=None,
+    unit_electrode_indices: Optional[list[list[int]]] = None,
 ):
     """Add sorting data (units and their properties) to an NWBFile.
 
@@ -1741,7 +1742,8 @@ def add_sorting_to_nwbfile(
     waveform_sds : np.ndarray, optional
         Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels).
     unit_electrode_indices : list of lists of int, optional
-        For each unit, a list of electrode indices corresponding to waveform data.
+        A list of lists of integers indicating the indices of the electrodes that each unit is associated with.
+        The length of the list must match the number of units in the sorting extractor.
     """
 
     if skip_features is not None:
diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
index fb1739366..669e90131 100644
--- a/tests/test_ecephys/test_ecephys_interfaces.py
+++ b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -38,9 +38,56 @@ class TestSortingInterface(SortingExtractorInterfaceTestMixin):
     data_interface_cls = MockSortingInterface
     interface_kwargs = dict(num_units=4, durations=[0.100])
 
-    def test_map_electrode_indices(self):
+    def test_use_electrode_indices(self, setup_interface):
 
-        self.data_interface.create_nwbfile()
+        recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100])
+        recording_extractor = recording_interface.recording_extractor
+        recording_extractor = recording_extractor.rename_channels(new_channel_ids=["a", "b", "c", "d"])
+        recording_extractor.set_property(key="property", values=["A", "B", "C", "D"])
+        recording_interface.recording_extractor = recording_extractor
+
+        nwbfile = recording_interface.create_nwbfile()
+
+        unit_electrode_indices = [[3], [0, 1], [1], [2]]
+        expected_properties_matching = [["D"], ["A", "B"], ["B"], ["C"]]
+        self.interface.add_to_nwbfile(nwbfile=nwbfile, unit_electrode_indices=unit_electrode_indices)
+
+        unit_table = nwbfile.units
+
+        for unit_row, electrode_indices, property in zip(
+            unit_table.to_dataframe().itertuples(index=False),
+            unit_electrode_indices,
+            expected_properties_matching,
+        ):
+            electrode_table_region = unit_row.electrodes
+            electrode_table_region_indices = electrode_table_region.index.to_list()
+            assert electrode_table_region_indices == electrode_indices
+
+            electrode_table_region_properties = electrode_table_region["property"].to_list()
+            assert electrode_table_region_properties == property
+
+    def test_run_conversion(self, tmp_path):
+
+        nwbfile_path = Path(tmp_path) / "test_sorting.nwb"
+        num_units = 4
+        interface = MockSortingInterface(num_units=num_units, durations=(1.0,))
+        interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"])
+
+        interface.run_conversion(nwbfile_path=nwbfile_path)
+        with NWBHDF5IO(nwbfile_path, "r") as io:
+            nwbfile = io.read()
+
+            units = nwbfile.units
+            assert len(units) == num_units
+            units_df = units.to_dataframe()
+            # Get index in units table
+            for unit_id in interface.sorting_extractor.unit_ids:
+                # In pynwb we write unit name as unit_id
+                row = units_df.query(f"unit_name == '{unit_id}'")
+                spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True)
+                written_spike_times = row["spike_times"].iloc[0]
+
+                np.testing.assert_array_equal(spike_times, written_spike_times)
 
 
 class TestRecordingInterface(RecordingExtractorInterfaceTestMixin):
@@ -104,32 +151,6 @@ def test_spike2_import_assertions_3_11(self):
             Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx")
 
 
-class TestSortingInterface:
-
-    def test_run_conversion(self, tmp_path):
-
-        nwbfile_path = Path(tmp_path) / "test_sorting.nwb"
-        num_units = 4
-        interface = MockSortingInterface(num_units=num_units, durations=(1.0,))
-        interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"])
-
-        interface.run_conversion(nwbfile_path=nwbfile_path)
-        with NWBHDF5IO(nwbfile_path, "r") as io:
-            nwbfile = io.read()
-
-            units = nwbfile.units
-            assert len(units) == num_units
-            units_df = units.to_dataframe()
-            # Get index in units table
-            for unit_id in interface.sorting_extractor.unit_ids:
-                # In pynwb we write unit name as unit_id
-                row = units_df.query(f"unit_name == '{unit_id}'")
-                spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True)
-                written_spike_times = row["spike_times"].iloc[0]
-
-                np.testing.assert_array_equal(spike_times, written_spike_times)
-
-
 class TestSortingInterfaceOld(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:

From c12a840821e923ee502d4446331ff28453c3a954 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Nov 2024 15:12:45 -0600
Subject: [PATCH 04/11] add tests for electrode propagation

---
 src/neuroconv/tools/spikeinterface/spikeinterface.py | 7 +++++++
 tests/test_ecephys/test_ecephys_interfaces.py        | 9 ++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py
index 8a5ebf8f9..4439800c4 100644
--- a/src/neuroconv/tools/spikeinterface/spikeinterface.py
+++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py
@@ -1453,6 +1453,13 @@ def add_units_table_to_nwbfile(
         nwbfile, pynwb.NWBFile
     ), f"'nwbfile' should be of type pynwb.NWBFile but is of type {type(nwbfile)}"
 
+    if unit_electrode_indices is not None:
+        electrodes_table = nwbfile.electrodes
+        if electrodes_table is None:
+            raise ValueError(
+                "Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first."
+            )
+
     null_values_for_properties = dict() if null_values_for_properties is None else null_values_for_properties
 
     if not write_in_processing_module and units_table_name != "units":
diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
index 669e90131..5a92e8134 100644
--- a/tests/test_ecephys/test_ecephys_interfaces.py
+++ b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -38,7 +38,7 @@ class TestSortingInterface(SortingExtractorInterfaceTestMixin):
     data_interface_cls = MockSortingInterface
     interface_kwargs = dict(num_units=4, durations=[0.100])
 
-    def test_use_electrode_indices(self, setup_interface):
+    def test_electrode_indices(self, setup_interface):
 
         recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100])
         recording_extractor = recording_interface.recording_extractor
@@ -66,6 +66,13 @@ def test_use_electrode_indices(self, setup_interface):
             electrode_table_region_properties = electrode_table_region["property"].to_list()
             assert electrode_table_region_properties == property
 
+    def test_electrode_indices_assertion_error_when_missing_table(self, setup_interface):
+        with pytest.raises(
+            ValueError,
+            match="Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first.",
+        ):
+            self.interface.create_nwbfile(unit_electrode_indices=[[0], [1], [2], [3]])
+
     def test_run_conversion(self, tmp_path):
 
         nwbfile_path = Path(tmp_path) / "test_sorting.nwb"

From ff41d1c5eb11d7acb33918efc2d4c9b467ca14b6 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 5 Nov 2024 15:23:36 -0600
Subject: [PATCH 05/11] fix changelog and sorting interface

---
 CHANGELOG.md                                  |  2 +-
 tests/test_ecephys/test_ecephys_interfaces.py | 23 -------------------
 2 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ece94ecb..807caec57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ## Bug Fixes
 
 ## Features
+* Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124)
 
 ## Improvements
 
@@ -74,7 +75,6 @@
 * Added `get_stream_names` to `OpenEphysRecordingInterface`: [PR #1039](https://github.com/catalystneuro/neuroconv/pull/1039)
 * Most data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1022](https://github.com/catalystneuro/neuroconv/pull/1022)
 * All remaining data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1055](https://github.com/catalystneuro/neuroconv/pull/1055)
-* Added automated EFS volume creation and mounting to the `submit_aws_job` helper function. [PR #1018](https://github.com/catalystneuro/neuroconv/pull/1018)
 
 
 ## Improvements
diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
index 5a92e8134..bbfd2b586 100644
--- a/tests/test_ecephys/test_ecephys_interfaces.py
+++ b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -73,29 +73,6 @@ def test_electrode_indices_assertion_error_when_missing_table(self, setup_interf
         ):
             self.interface.create_nwbfile(unit_electrode_indices=[[0], [1], [2], [3]])
 
-    def test_run_conversion(self, tmp_path):
-
-        nwbfile_path = Path(tmp_path) / "test_sorting.nwb"
-        num_units = 4
-        interface = MockSortingInterface(num_units=num_units, durations=(1.0,))
-        interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"])
-
-        interface.run_conversion(nwbfile_path=nwbfile_path)
-        with NWBHDF5IO(nwbfile_path, "r") as io:
-            nwbfile = io.read()
-
-            units = nwbfile.units
-            assert len(units) == num_units
-            units_df = units.to_dataframe()
-            # Get index in units table
-            for unit_id in interface.sorting_extractor.unit_ids:
-                # In pynwb we write unit name as unit_id
-                row = units_df.query(f"unit_name == '{unit_id}'")
-                spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True)
-                written_spike_times = row["spike_times"].iloc[0]
-
-                np.testing.assert_array_equal(spike_times, written_spike_times)
-
 
 class TestRecordingInterface(RecordingExtractorInterfaceTestMixin):
     data_interface_cls = MockRecordingInterface

From 0a730bd30992063fe50c2c437b82ce14f04d1460 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 7 Nov 2024 17:15:18 -0600
Subject: [PATCH 06/11] add sorted recording

---
 src/neuroconv/converters/__init__.py          |   2 +
 .../ecephys/sortedrecordinginterface.py       | 112 ++++++++++++++++++
 tests/test_ecephys/test_sorted_recording.py   |  91 ++++++++++++++
 3 files changed, 205 insertions(+)
 create mode 100644 src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
 create mode 100644 tests/test_ecephys/test_sorted_recording.py

diff --git a/src/neuroconv/converters/__init__.py b/src/neuroconv/converters/__init__.py
index 7fa38bfd5..a741ff331 100644
--- a/src/neuroconv/converters/__init__.py
+++ b/src/neuroconv/converters/__init__.py
@@ -9,6 +9,7 @@
 from ..datainterfaces.behavior.lightningpose.lightningposeconverter import (
     LightningPoseConverter,
 )
+from ..datainterfaces.ecephys.sortedrecordinginterface import SortedRecordingConverter
 from ..datainterfaces.ecephys.spikeglx.spikeglxconverter import SpikeGLXConverterPipe
 from ..datainterfaces.ophys.brukertiff.brukertiffconverter import (
     BrukerTiffMultiPlaneConverter,
@@ -22,4 +23,5 @@
     BrukerTiffMultiPlaneConverter,
     BrukerTiffSinglePlaneConverter,
     MiniscopeConverter,
+    SortedRecordingConverter,
 ]
diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
new file mode 100644
index 000000000..f0d9856ca
--- /dev/null
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -0,0 +1,112 @@
+from typing import Optional
+
+from neuroconv import ConverterPipe
+from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import (
+    BaseRecordingExtractorInterface,
+)
+from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import (
+    BaseSortingExtractorInterface,
+)
+from neuroconv.tools.spikeinterface.spikeinterface import (
+    _get_electrode_table_indices_for_recording,
+)
+
+
+class SortedRecordingConverter(ConverterPipe):
+    """
+    A converter for handling simultaneous recording and sorting data from multiple probes,
+    ensuring correct mapping between sorted units and their corresponding electrodes.
+
+    This converter addresses the challenge of maintaining proper linkage between sorted units
+    and their recording channels when dealing with multiple recording probes (e.g., multiple
+    Neuropixels probes). It ensures that units from each sorting interface are correctly
+    associated with electrodes from their corresponding recording interface.
+    """
+
+    def __init__(
+        self,
+        recording_interface: BaseRecordingExtractorInterface,
+        sorting_interface: BaseSortingExtractorInterface,
+        unit_ids_to_channel_ids: dict[str | int, list[str | int]],
+    ):
+        """
+        Parameters
+        ----------
+        recording_interface : BaseRecordingExtractorInterface
+            The interface handling the raw recording data. This typically represents data
+            from a single probe, like a SpikeGLXRecordingInterface.
+        sorting_interface : BaseSortingExtractorInterface
+            The interface handling the sorted units data. This typically represents the
+            output of a spike sorting algorithm, like a KiloSortSortingInterface.
+        unit_ids_to_channel_ids : dict[str | int, list[str | int]]
+            A mapping from unit IDs to their associated channel IDs. Each unit ID (key)
+            maps to a list of channel IDs (values) that were used to detect that unit.
+            This mapping ensures proper linkage between sorted units and recording channels.
+        """
+
+        self.recording_interface = recording_interface
+        self.sorting_interface = sorting_interface
+        self.unit_ids_to_channel_ids = unit_ids_to_channel_ids
+
+        self.channel_ids = self.recording_interface.recording_extractor.get_channel_ids()
+        self.unit_ids = self.sorting_interface.sorting_extractor.get_unit_ids()
+
+        # Convert channel_ids to set for comparison
+        available_channels = set(self.channel_ids)
+
+        # Check that all referenced channels exist in recording
+        for unit_id, channel_ids in unit_ids_to_channel_ids.items():
+            unknown_channels = set(channel_ids) - available_channels
+            if unknown_channels:
+                raise ValueError(
+                    f"Inexistent channel IDs {unknown_channels} referenced in mapping for unit {unit_id} "
+                    f"not found in recording. Available channels are {available_channels}"
+                )
+
+        # Check that all units have a channel mapping
+        available_units = set(self.unit_ids)
+        mapped_units = set(unit_ids_to_channel_ids.keys())
+        unmapped_units = available_units - mapped_units
+        if unmapped_units:
+            raise ValueError(f"Units {unmapped_units} from sorting interface have no channel mapping")
+
+        data_interfaces = [recording_interface, sorting_interface]
+        super().__init__(data_interfaces=data_interfaces)
+
+    def add_to_nwbfile(self, nwbfile, metadata, conversion_options: Optional[dict] = None):
+
+        conversion_options = conversion_options or dict()
+        conversion_options_recording = conversion_options.get("recording", dict())
+
+        self.recording_interface.add_to_nwbfile(
+            nwbfile=nwbfile,
+            metadata=metadata,
+            **conversion_options_recording,
+        )
+
+        # This returns the indices in the electrode table that corresponds to the channel ids of the recording
+        electrode_table_indices = _get_electrode_table_indices_for_recording(
+            recording=self.recording_interface.recording_extractor,
+            nwbfile=nwbfile,
+        )
+
+        # Map ids in the recording to the indices in the electrode table
+        channel_id_to_electrode_table_index = {
+            channel_id: electrode_table_indices[channel_index]
+            for channel_index, channel_id in enumerate(self.channel_ids)
+        }
+
+        # Create a list of lists with the indices of the electrodes in the electrode table for each unit
+        unit_electrode_indices = []
+        for unit_id in self.unit_ids:
+            unit_channel_ids = self.unit_ids_to_channel_ids[unit_id]
+            unit_indices = [channel_id_to_electrode_table_index[channel_id] for channel_id in unit_channel_ids]
+            unit_electrode_indices.append(unit_indices)
+
+        conversion_options_sorting = conversion_options.get("sorting", dict())
+        self.sorting_interface.add_to_nwbfile(
+            nwbfile=nwbfile,
+            metadata=metadata,
+            unit_electrode_indices=unit_electrode_indices,
+            **conversion_options_sorting,
+        )
diff --git a/tests/test_ecephys/test_sorted_recording.py b/tests/test_ecephys/test_sorted_recording.py
new file mode 100644
index 000000000..b822e428c
--- /dev/null
+++ b/tests/test_ecephys/test_sorted_recording.py
@@ -0,0 +1,91 @@
+import pytest
+
+from neuroconv.converters import SortedRecordingConverter
+from neuroconv.tools.testing.mock_interfaces import (
+    MockRecordingInterface,
+    MockSortingInterface,
+)
+
+
+class TestSortedRecordingConverter:
+
+    def basic_test(self):
+
+        recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100])
+        recording_extractor = recording_interface.recording_extractor
+        recording_extractor = recording_extractor.rename_channels(new_channel_ids=["A", "B", "C"])
+        recording_interface.recording_extractor = recording_extractor
+
+        sorting_interface = MockSortingInterface(num_units=5, durations=[0.100])
+        sorting_extractor = sorting_interface.sorting_extractor
+        sorting_extractor = sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d", "e"])
+        sorting_interface.sorting_extractor = sorting_extractor
+
+        unit_ids_to_channel_ids = {
+            "a": ["A"],
+            "b": ["B"],
+            "c": ["C"],
+            "d": ["A", "B"],
+            "e": ["C", "A"],
+        }
+        sorted_recording_interface = SortedRecordingConverter(
+            recording_interface=recording_interface,
+            sorting_interface=sorting_interface,
+            unit_ids_to_channel_ids=unit_ids_to_channel_ids,
+        )
+
+        nwbfile = sorted_recording_interface.create_nwbfile()
+
+        # Test that the region in the units table points to the correct electrodes
+        expected_unit_electrode_indices = {
+            "a": [0],
+            "b": [1],
+            "c": [2],
+            "d": [0, 1],
+            "e": [2, 0],
+        }
+        unit_table = nwbfile.units
+        for unit_row in unit_table.to_dataframe().itertuples(index=False):
+
+            # Neuroconv write unit_ids as unit_names
+            unit_id = unit_row.unit_name
+
+            unit_electrode_table_region = unit_row.electrodes
+            expected_unit_electrode_indices = expected_unit_electrode_indices[unit_id]
+            assert unit_electrode_table_region == expected_unit_electrode_indices
+
+    def test_invalid_channel_mapping(self):
+        """Test that invalid channel mappings raise appropriate errors."""
+        recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100])
+        recording_extractor = recording_interface.recording_extractor
+        recording_extractor = recording_extractor.rename_channels(new_channel_ids=["ch1", "ch2", "ch3", "ch4"])
+        recording_interface.recording_extractor = recording_extractor
+
+        sorting_interface = MockSortingInterface(num_units=3, durations=[0.100])
+        sorting_extractor = sorting_interface.sorting_extractor
+        sorting_extractor = sorting_extractor.rename_units(new_unit_ids=["unit1", "unit2", "unit3"])
+        sorting_interface.sorting_extractor = sorting_extractor
+
+        # Test mapping with non-existent channel
+        invalid_channel_mapping = {"unit1": ["ch1", "ch5"], "unit2": ["ch2"], "unit3": ["ch3"]}  # ch5 doesn't exist
+
+        with pytest.raises(ValueError, match="Inexistent channel IDs {'ch5'} referenced in mapping for unit unit1"):
+            SortedRecordingConverter(
+                recording_interface=recording_interface,
+                sorting_interface=sorting_interface,
+                unit_ids_to_channel_ids=invalid_channel_mapping,
+            )
+
+        # Test mapping missing some units
+        incomplete_mapping = {
+            "unit1": ["ch1"],
+            "unit2": ["ch2"],
+            # unit3 is missing
+        }
+
+        with pytest.raises(ValueError, match="Units {'unit3'} from sorting interface have no channel mapping"):
+            SortedRecordingConverter(
+                recording_interface=recording_interface,
+                sorting_interface=sorting_interface,
+                unit_ids_to_channel_ids=incomplete_mapping,
+            )

From ffcbf9204f0481a73af53920c15a30c3721a582d Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 7 Nov 2024 17:35:30 -0600
Subject: [PATCH 07/11] changelog and union typing

---
 CHANGELOG.md                                                  | 1 +
 .../datainterfaces/ecephys/sortedrecordinginterface.py        | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 807caec57..0019bef9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 ## Features
 * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124)
+* Added `SortedRecordingConverter` to convert sorted recordings to NWB with correct metadata mapping between units and electrodes [PR #1132](https://github.com/catalystneuro/neuroconv/pull/1132)
 
 ## Improvements
 
diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
index f0d9856ca..2f8ef7dc8 100644
--- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from neuroconv import ConverterPipe
 from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import (
@@ -27,7 +27,7 @@ def __init__(
         self,
         recording_interface: BaseRecordingExtractorInterface,
         sorting_interface: BaseSortingExtractorInterface,
-        unit_ids_to_channel_ids: dict[str | int, list[str | int]],
+        unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str | int]]],
     ):
         """
         Parameters

From aed86e0b3a6bd99c76530e8c600b39c54232fa1d Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 7 Nov 2024 19:17:13 -0600
Subject: [PATCH 08/11] fix typing

---
 .../datainterfaces/ecephys/sortedrecordinginterface.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
index 2f8ef7dc8..dacc73f8a 100644
--- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -27,7 +27,7 @@ def __init__(
         self,
         recording_interface: BaseRecordingExtractorInterface,
         sorting_interface: BaseSortingExtractorInterface,
-        unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str | int]]],
+        unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str, int]]],
     ):
         """
         Parameters

From 8f02c6a66e63823678308cdd7bae56f14ae09bc8 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 7 Nov 2024 19:26:36 -0600
Subject: [PATCH 09/11] add annoying guide requirements

---
 .../datainterfaces/ecephys/sortedrecordinginterface.py          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
index dacc73f8a..2038a8916 100644
--- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -23,6 +23,8 @@ class SortedRecordingConverter(ConverterPipe):
     associated with electrodes from their corresponding recording interface.
     """
 
+    display_name = "SortedRecordingConverter"
+
     def __init__(
         self,
         recording_interface: BaseRecordingExtractorInterface,

From a677924ac3d8a363d235def08247544d71c42473 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 7 Nov 2024 19:31:24 -0600
Subject: [PATCH 10/11] more annoying guide properties

---
 .../datainterfaces/ecephys/sortedrecordinginterface.py      | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
index 2038a8916..14705a25b 100644
--- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -23,7 +23,13 @@ class SortedRecordingConverter(ConverterPipe):
     associated with electrodes from their corresponding recording interface.
     """
 
+    keywords = (
+        "electrophysiology",
+        "spike sorting",
+    )
     display_name = "SortedRecordingConverter"
+    associated_suffixes = ("None",)
+    info = "A converter for handling simultaneous recording and sorting data linking metadata properly."
 
     def __init__(
         self,

From edaaf40fb070139f4fa268af041de3809861fac8 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 11 Dec 2024 20:05:14 -0600
Subject: [PATCH 11/11] add documentation

---
 docs/user_guide/index.rst                     |  1 +
 docs/user_guide/linking_sorted_data.rst       | 74 +++++++++++++++++++
 .../baserecordingextractorinterface.py        |  5 ++
 .../ecephys/basesortingextractorinterface.py  |  5 ++
 .../ecephys/sortedrecordinginterface.py       |  4 +-
 5 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 docs/user_guide/linking_sorted_data.rst

diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst
index bf9aaf253..6abfa51ca 100644
--- a/docs/user_guide/index.rst
+++ b/docs/user_guide/index.rst
@@ -25,6 +25,7 @@ and synchronize data across multiple sources.
   csvs
   expand_path
   backend_configuration
+  linking_sorted_data
   yaml
   docker_demo
   aws_demo
diff --git a/docs/user_guide/linking_sorted_data.rst b/docs/user_guide/linking_sorted_data.rst
new file mode 100644
index 000000000..8b9d596b4
--- /dev/null
+++ b/docs/user_guide/linking_sorted_data.rst
@@ -0,0 +1,74 @@
+.. _linking_sorted_data:
+
+How to Link Sorted Data to Electrodes
+===================================
+
+The ``SortedRecordingConverter`` maintains proper linkage between sorted units and their corresponding recording channels in NWB files.
+It handles the critical relationship between ``Units`` and ``Electrodes`` tables by:
+
+* Creating electrode table regions for each unit
+* Maintaining electrode group and device relationships
+* Mapping channel IDs to electrode indices correctly
+
+This automated handling ensures proper provenance tracking in the NWB file, which is essential for interpreting and analyzing sorted electrophysiology data.
+
+Basic Usage
+----------
+
+Single Probe and Single Recording
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This example demonstrates linking data from a single Neuropixel probe recorded with SpikeGLX and sorted with Kilosort.
+
+The converter requires three components:
+
+1. A recording interface (:py:class:`~neuroconv.datainterfaces.ecephys.spikeglx.spikeglxrecordinginterface.SpikeGLXRecordingInterface`)
+2. A sorting interface (:py:class:`~neuroconv.datainterfaces.ecephys.kilosort.kilosortinterface.KiloSortSortingInterface`)
+3. A mapping between unit IDs and their associated channel IDs
+
+First, instantiate the interfaces::
+
+    from neuroconv import SortedRecordingConverter
+    from neuroconv.datainterfaces import SpikeGLXRecordingInterface, KiloSortSortingInterface
+
+    # Initialize interfaces
+    recording_interface = SpikeGLXRecordingInterface(
+        folder_path="path/to/spikeglx_data",
+        stream_id="imec0.ap"
+    )
+    sorting_interface = KiloSortSortingInterface(
+        folder_path="path/to/kilosort_data"
+    )
+
+Access channel and unit IDs through interface properties::
+
+    # Access channel IDs
+    print(recording_interface.channel_ids)
+    # Example output: ['imec0.ap#AP0', 'imec0.ap#AP1', 'imec0.ap#AP2', ...]
+
+    # Access unit IDs
+    print(sorting_interface.unit_ids)
+    # Example output: ['0', '1', '2', ...]
+
+Define the mapping between units and channels::
+
+    unit_ids_to_channel_ids = {
+        "0": ["imec0.ap#AP0", "imec0.ap#AP1"],  # Unit 0 detected on two channels
+        "1": ["imec0.ap#AP2"],                   # Unit 1 detected on one channel
+        "2": ["imec0.ap#AP3", "imec0.ap#AP4"],   # Unit 2 detected on two channels
+        ...  # Map all remaining units to their respective channels
+    }
+
+.. note::
+
+    Every unit from the sorting interface must have a corresponding channel mapping.
+
+Create the converter and run the conversion::
+
+    converter = SortedRecordingConverter(
+        recording_interface=recording_interface,
+        sorting_interface=sorting_interface,
+        unit_ids_to_channel_ids=unit_ids_to_channel_ids
+    )
+
+    nwbfile = converter.run_conversion(nwbfile_path="path/to/output.nwb")
diff --git a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
index 6d0df14c1..8b650897b 100644
--- a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
@@ -107,6 +107,11 @@ def get_metadata(self) -> DeepDict:
 
         return metadata
 
+    @property
+    def channel_ids(self):
+        "Gets the channel ids of the data."
+        return self.recording_extractor.get_channel_ids()
+
     def get_original_timestamps(self) -> Union[np.ndarray, list[np.ndarray]]:
         """
         Retrieve the original unaltered timestamps for the data in this interface.
diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
index 8eeb59324..9130270b9 100644
--- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
@@ -75,6 +75,11 @@ def get_metadata_schema(self) -> dict:
         )
         return metadata_schema
 
+    @property
+    def units_ids(self):
+        "Gets the units ids of the data."
+        return self.sorting_extractor.get_unit_ids()
+
     def register_recording(self, recording_interface: BaseRecordingExtractorInterface):
         self.sorting_extractor.register_recording(recording=recording_interface.recording_extractor)
 
diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
index 14705a25b..b934204de 100644
--- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py
@@ -56,8 +56,8 @@ def __init__(
         self.sorting_interface = sorting_interface
         self.unit_ids_to_channel_ids = unit_ids_to_channel_ids
 
-        self.channel_ids = self.recording_interface.recording_extractor.get_channel_ids()
-        self.unit_ids = self.sorting_interface.sorting_extractor.get_unit_ids()
+        self.channel_ids = self.recording_interface.channel_ids
+        self.unit_ids = self.sorting_interface.units_ids
 
         # Convert channel_ids to set for comparison
         available_channels = set(self.channel_ids)