From f2c62826698185d24505acf4a07140677933d67c Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 9 Sep 2024 11:14:19 -0600 Subject: [PATCH 01/11] add mocking interface for sorter --- CHANGELOG.md | 1 + src/neuroconv/tools/testing/__init__.py | 8 ++- .../tools/testing/mock_interfaces.py | 51 +++++++++++++++++++ tests/test_ecephys/test_ecephys_interfaces.py | 33 +++++++++++- 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd1580540..e87e5f97e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Added `get_stream_names` to `OpenEphysRecordingInterface`: [PR #1039](https://github.com/catalystneuro/neuroconv/pull/1039) * Most data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1022](https://github.com/catalystneuro/neuroconv/pull/1022) * All remaining data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1055](https://github.com/catalystneuro/neuroconv/pull/1055) +* Added a `MockSortingInterface` for testing purposes. [PR #1065](https://github.com/catalystneuro/neuroconv/pull/1065) ### Improvements * Using ruff to enforce existence of public classes' docstrings [PR #1034](https://github.com/catalystneuro/neuroconv/pull/1034) diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py index 7179a7544..79b54d3f9 100644 --- a/src/neuroconv/tools/testing/__init__.py +++ b/src/neuroconv/tools/testing/__init__.py @@ -5,5 +5,11 @@ mock_ZarrDatasetIOConfiguration, ) from .mock_files import generate_path_expander_demo_ibl -from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface +from .mock_interfaces import ( + MockBehaviorEventInterface, + MockSpikeGLXNIDQInterface, + MockRecordingInterface, + MockImagingInterface, + MockSortingInterface, +) from .mock_ttl_signals import generate_mock_ttl_signal, regenerate_test_cases diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index f05228b34..d9251c858 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -11,6 +11,9 @@ from ...datainterfaces.ecephys.baserecordingextractorinterface import ( BaseRecordingExtractorInterface, ) +from ...datainterfaces.ecephys.basesortingextractorinterface import ( + BaseSortingExtractorInterface, +) from ...datainterfaces.ophys.baseimagingextractorinterface import ( BaseImagingExtractorInterface, ) @@ -157,6 +160,54 @@ def get_metadata(self) -> dict: return metadata +class MockSortingInterface(BaseSortingExtractorInterface): + """A mock sorting extractor interface for generating synthetic sorting data.""" + + # TODO: Implement this class with the lazy generator once is merged + # https://github.com/SpikeInterface/spikeinterface/pull/2227 + + ExtractorModuleName = "spikeinterface.core.generate" + ExtractorName = "generate_sorting" + + def __init__( + self, + num_units: int = 4, + sampling_frequency: float = 30_000.0, + durations: tuple[float] = (1.0,), + seed: int = 0, + verbose: bool = True, + ): + """ + Parameters + ---------- + num_units : int, optional + Number of units to generate, by default 4. + sampling_frequency : float, optional + Sampling frequency of the generated data in Hz, by default 30,000.0 Hz. + durations : tuple of float, optional + Durations of the segments in seconds, by default (1.0,). + seed : int, optional + Seed for the random number generator, by default 0. + verbose : bool, optional + Control whether to display verbose messages during writing, by default True. + + """ + + super().__init__( + num_units=num_units, + sampling_frequency=sampling_frequency, + durations=durations, + seed=seed, + verbose=verbose, + ) + + def get_metadata(self) -> dict: # noqa D102 + metadata = super().get_metadata() + session_start_time = datetime.now().astimezone() + metadata["NWBFile"]["session_start_time"] = session_start_time + return metadata + + class MockImagingInterface(BaseImagingExtractorInterface): """ A mock imaging interface for testing purposes. diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 6372b3535..5591bb6fb 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -20,7 +20,10 @@ BaseSortingExtractorInterface, ) from neuroconv.tools.nwb_helpers import get_module -from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface +from neuroconv.tools.testing.mock_interfaces import ( + MockRecordingInterface, + MockSortingInterface, +) python_version = Version(get_python_version()) @@ -67,7 +70,33 @@ def test_spike2_import_assertions_3_11(self): Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx") -class TestSortingInterface(unittest.TestCase): +class TestSortingInterface: + + def test_run_conversion(self, tmp_path): + + nwbfile_path = Path(tmp_path) / "test_sorting.nwb" + num_units = 4 + interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) + interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) + + interface.run_conversion(nwbfile_path=nwbfile_path) + with NWBHDF5IO(nwbfile_path, "r") as io: + nwbfile = io.read() + + units = nwbfile.units + assert len(units) == num_units + units_df = units.to_dataframe() + # Get index in units table + for unit_id in interface.sorting_extractor.unit_ids: + # In pynwb we write unit name as unit_id + row = units_df.query(f"unit_name == '{unit_id}'") + spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) + written_spike_times = row["spike_times"].iloc[0] + + np.testing.assert_array_equal(spike_times, written_spike_times) + + +class TestSortingInterfaceOld(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.test_dir = Path(mkdtemp()) From 5e4dc6afa4ca19ba7afbd3728f73ea8313eb062c Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 12 Sep 2024 16:30:31 -0600 Subject: [PATCH 02/11] wip --- src/neuroconv/nwbconverter.py | 2 +- tests/test_ecephys/test_ecephys_interfaces.py | 22 ++++++++++++++++++- .../test_mock_recording_interface.py | 9 -------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/neuroconv/nwbconverter.py b/src/neuroconv/nwbconverter.py index 1f3e7c9f8..7dc862da5 100644 --- a/src/neuroconv/nwbconverter.py +++ b/src/neuroconv/nwbconverter.py @@ -144,7 +144,7 @@ def validate_conversion_options(self, conversion_options: dict[str, dict]): def create_nwbfile(self, metadata: Optional[dict] = None, conversion_options: Optional[dict] = None) -> NWBFile: """ - Create and return an in-memory pynwb.NWBFile object with this interface's data added to it. + Create and return an in-memory pynwb.NWBFile object with the conversion data added to it. Parameters ---------- diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 5591bb6fb..8c61b89e3 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -27,8 +27,28 @@ python_version = Version(get_python_version()) +from neuroconv.tools.testing.data_interface_mixins import ( + RecordingExtractorInterfaceTestMixin, + SortingExtractorInterfaceTestMixin, +) + + +class TestSortingInterface(SortingExtractorInterfaceTestMixin): + + data_interface_cls = MockSortingInterface + interface_kwargs = dict(num_units=4, durations=[0.100]) + + def test_map_electrode_indices(self): + + self.data_interface.create_nwbfile() + + +class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): + data_interface_cls = MockRecordingInterface + interface_kwargs = dict(durations=[0.100]) + -class TestRecordingInterface(TestCase): +class TestRecordingInterfaceOld(TestCase): @classmethod def setUpClass(cls): cls.single_segment_recording_interface = MockRecordingInterface(durations=[0.100]) diff --git a/tests/test_ecephys/test_mock_recording_interface.py b/tests/test_ecephys/test_mock_recording_interface.py index a33f3acd1..e69de29bb 100644 --- a/tests/test_ecephys/test_mock_recording_interface.py +++ b/tests/test_ecephys/test_mock_recording_interface.py @@ -1,9 +0,0 @@ -from neuroconv.tools.testing.data_interface_mixins import ( - RecordingExtractorInterfaceTestMixin, -) -from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface - - -class TestMockRecordingInterface(RecordingExtractorInterfaceTestMixin): - data_interface_cls = MockRecordingInterface - interface_kwargs = dict(durations=[0.100]) From 4f0c935767a0250e5a359b32158b5a2faca0fbf4 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Nov 2024 14:50:29 -0600 Subject: [PATCH 03/11] propagate unit_electrode_indices --- .../ecephys/basesortingextractorinterface.py | 8 ++ .../tools/spikeinterface/spikeinterface.py | 12 +-- tests/test_ecephys/test_ecephys_interfaces.py | 77 ++++++++++++------- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py index cd8396154..dca2dea5f 100644 --- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py @@ -288,6 +288,7 @@ def add_to_nwbfile( write_as: Literal["units", "processing"] = "units", units_name: str = "units", units_description: str = "Autogenerated by neuroconv.", + unit_electrode_indices: Optional[list[list[int]]] = None, ): """ Primary function for converting the data in a SortingExtractor to NWB format. @@ -312,9 +313,15 @@ def add_to_nwbfile( units_name : str, default: 'units' The name of the units table. If write_as=='units', then units_name must also be 'units'. units_description : str, default: 'Autogenerated by neuroconv.' + unit_electrode_indices : list of lists of int, optional + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ from ...tools.spikeinterface import add_sorting_to_nwbfile + if metadata is None: + metadata = self.get_metadata() + metadata_copy = deepcopy(metadata) if write_ecephys_metadata: self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata_copy) @@ -346,4 +353,5 @@ def add_to_nwbfile( write_as=write_as, units_name=units_name, units_description=units_description, + unit_electrode_indices=unit_electrode_indices, ) diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py index 1be86862a..8a5ebf8f9 100644 --- a/src/neuroconv/tools/spikeinterface/spikeinterface.py +++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py @@ -1406,7 +1406,7 @@ def add_units_table_to_nwbfile( write_in_processing_module: bool = False, waveform_means: Optional[np.ndarray] = None, waveform_sds: Optional[np.ndarray] = None, - unit_electrode_indices=None, + unit_electrode_indices: Optional[list[list[int]]] = None, null_values_for_properties: Optional[dict] = None, ): """ @@ -1443,8 +1443,9 @@ def add_units_table_to_nwbfile( Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels). unit_electrode_indices : list of lists of int, optional For each unit, a list of electrode indices corresponding to waveform data. - null_values_for_properties: dict, optional - A dictionary mapping properties to null values to use when the property is not present + unit_electrode_indices : list of lists of int, optional + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ unit_table_description = unit_table_description or "Autogenerated by neuroconv." @@ -1706,7 +1707,7 @@ def add_sorting_to_nwbfile( units_description: str = "Autogenerated by neuroconv.", waveform_means: Optional[np.ndarray] = None, waveform_sds: Optional[np.ndarray] = None, - unit_electrode_indices=None, + unit_electrode_indices: Optional[list[list[int]]] = None, ): """Add sorting data (units and their properties) to an NWBFile. @@ -1741,7 +1742,8 @@ def add_sorting_to_nwbfile( waveform_sds : np.ndarray, optional Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels). unit_electrode_indices : list of lists of int, optional - For each unit, a list of electrode indices corresponding to waveform data. + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ if skip_features is not None: diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index fb1739366..669e90131 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -38,9 +38,56 @@ class TestSortingInterface(SortingExtractorInterfaceTestMixin): data_interface_cls = MockSortingInterface interface_kwargs = dict(num_units=4, durations=[0.100]) - def test_map_electrode_indices(self): + def test_use_electrode_indices(self, setup_interface): - self.data_interface.create_nwbfile() + recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100]) + recording_extractor = recording_interface.recording_extractor + recording_extractor = recording_extractor.rename_channels(new_channel_ids=["a", "b", "c", "d"]) + recording_extractor.set_property(key="property", values=["A", "B", "C", "D"]) + recording_interface.recording_extractor = recording_extractor + + nwbfile = recording_interface.create_nwbfile() + + unit_electrode_indices = [[3], [0, 1], [1], [2]] + expected_properties_matching = [["D"], ["A", "B"], ["B"], ["C"]] + self.interface.add_to_nwbfile(nwbfile=nwbfile, unit_electrode_indices=unit_electrode_indices) + + unit_table = nwbfile.units + + for unit_row, electrode_indices, property in zip( + unit_table.to_dataframe().itertuples(index=False), + unit_electrode_indices, + expected_properties_matching, + ): + electrode_table_region = unit_row.electrodes + electrode_table_region_indices = electrode_table_region.index.to_list() + assert electrode_table_region_indices == electrode_indices + + electrode_table_region_properties = electrode_table_region["property"].to_list() + assert electrode_table_region_properties == property + + def test_run_conversion(self, tmp_path): + + nwbfile_path = Path(tmp_path) / "test_sorting.nwb" + num_units = 4 + interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) + interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) + + interface.run_conversion(nwbfile_path=nwbfile_path) + with NWBHDF5IO(nwbfile_path, "r") as io: + nwbfile = io.read() + + units = nwbfile.units + assert len(units) == num_units + units_df = units.to_dataframe() + # Get index in units table + for unit_id in interface.sorting_extractor.unit_ids: + # In pynwb we write unit name as unit_id + row = units_df.query(f"unit_name == '{unit_id}'") + spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) + written_spike_times = row["spike_times"].iloc[0] + + np.testing.assert_array_equal(spike_times, written_spike_times) class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): @@ -104,32 +151,6 @@ def test_spike2_import_assertions_3_11(self): Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx") -class TestSortingInterface: - - def test_run_conversion(self, tmp_path): - - nwbfile_path = Path(tmp_path) / "test_sorting.nwb" - num_units = 4 - interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) - interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) - - interface.run_conversion(nwbfile_path=nwbfile_path) - with NWBHDF5IO(nwbfile_path, "r") as io: - nwbfile = io.read() - - units = nwbfile.units - assert len(units) == num_units - units_df = units.to_dataframe() - # Get index in units table - for unit_id in interface.sorting_extractor.unit_ids: - # In pynwb we write unit name as unit_id - row = units_df.query(f"unit_name == '{unit_id}'") - spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) - written_spike_times = row["spike_times"].iloc[0] - - np.testing.assert_array_equal(spike_times, written_spike_times) - - class TestSortingInterfaceOld(unittest.TestCase): @classmethod def setUpClass(cls) -> None: From c12a840821e923ee502d4446331ff28453c3a954 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Nov 2024 15:12:45 -0600 Subject: [PATCH 04/11] add tests for electrode propagation --- src/neuroconv/tools/spikeinterface/spikeinterface.py | 7 +++++++ tests/test_ecephys/test_ecephys_interfaces.py | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py index 8a5ebf8f9..4439800c4 100644 --- a/src/neuroconv/tools/spikeinterface/spikeinterface.py +++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py @@ -1453,6 +1453,13 @@ def add_units_table_to_nwbfile( nwbfile, pynwb.NWBFile ), f"'nwbfile' should be of type pynwb.NWBFile but is of type {type(nwbfile)}" + if unit_electrode_indices is not None: + electrodes_table = nwbfile.electrodes + if electrodes_table is None: + raise ValueError( + "Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first." + ) + null_values_for_properties = dict() if null_values_for_properties is None else null_values_for_properties if not write_in_processing_module and units_table_name != "units": diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 669e90131..5a92e8134 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -38,7 +38,7 @@ class TestSortingInterface(SortingExtractorInterfaceTestMixin): data_interface_cls = MockSortingInterface interface_kwargs = dict(num_units=4, durations=[0.100]) - def test_use_electrode_indices(self, setup_interface): + def test_electrode_indices(self, setup_interface): recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100]) recording_extractor = recording_interface.recording_extractor @@ -66,6 +66,13 @@ def test_use_electrode_indices(self, setup_interface): electrode_table_region_properties = electrode_table_region["property"].to_list() assert electrode_table_region_properties == property + def test_electrode_indices_assertion_error_when_missing_table(self, setup_interface): + with pytest.raises( + ValueError, + match="Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first.", + ): + self.interface.create_nwbfile(unit_electrode_indices=[[0], [1], [2], [3]]) + def test_run_conversion(self, tmp_path): nwbfile_path = Path(tmp_path) / "test_sorting.nwb" From ff41d1c5eb11d7acb33918efc2d4c9b467ca14b6 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 5 Nov 2024 15:23:36 -0600 Subject: [PATCH 05/11] fix changelog and sorting interface --- CHANGELOG.md | 2 +- tests/test_ecephys/test_ecephys_interfaces.py | 23 ------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ece94ecb..807caec57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## Bug Fixes ## Features +* Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) ## Improvements @@ -74,7 +75,6 @@ * Added `get_stream_names` to `OpenEphysRecordingInterface`: [PR #1039](https://github.com/catalystneuro/neuroconv/pull/1039) * Most data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1022](https://github.com/catalystneuro/neuroconv/pull/1022) * All remaining data interfaces and converters now use Pydantic to validate their inputs, including existence of file and folder paths. [PR #1055](https://github.com/catalystneuro/neuroconv/pull/1055) -* Added automated EFS volume creation and mounting to the `submit_aws_job` helper function. [PR #1018](https://github.com/catalystneuro/neuroconv/pull/1018) ## Improvements diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 5a92e8134..bbfd2b586 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -73,29 +73,6 @@ def test_electrode_indices_assertion_error_when_missing_table(self, setup_interf ): self.interface.create_nwbfile(unit_electrode_indices=[[0], [1], [2], [3]]) - def test_run_conversion(self, tmp_path): - - nwbfile_path = Path(tmp_path) / "test_sorting.nwb" - num_units = 4 - interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) - interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) - - interface.run_conversion(nwbfile_path=nwbfile_path) - with NWBHDF5IO(nwbfile_path, "r") as io: - nwbfile = io.read() - - units = nwbfile.units - assert len(units) == num_units - units_df = units.to_dataframe() - # Get index in units table - for unit_id in interface.sorting_extractor.unit_ids: - # In pynwb we write unit name as unit_id - row = units_df.query(f"unit_name == '{unit_id}'") - spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) - written_spike_times = row["spike_times"].iloc[0] - - np.testing.assert_array_equal(spike_times, written_spike_times) - class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): data_interface_cls = MockRecordingInterface From 0a730bd30992063fe50c2c437b82ce14f04d1460 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 17:15:18 -0600 Subject: [PATCH 06/11] add sorted recording --- src/neuroconv/converters/__init__.py | 2 + .../ecephys/sortedrecordinginterface.py | 112 ++++++++++++++++++ tests/test_ecephys/test_sorted_recording.py | 91 ++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py create mode 100644 tests/test_ecephys/test_sorted_recording.py diff --git a/src/neuroconv/converters/__init__.py b/src/neuroconv/converters/__init__.py index 7fa38bfd5..a741ff331 100644 --- a/src/neuroconv/converters/__init__.py +++ b/src/neuroconv/converters/__init__.py @@ -9,6 +9,7 @@ from ..datainterfaces.behavior.lightningpose.lightningposeconverter import ( LightningPoseConverter, ) +from ..datainterfaces.ecephys.sortedrecordinginterface import SortedRecordingConverter from ..datainterfaces.ecephys.spikeglx.spikeglxconverter import SpikeGLXConverterPipe from ..datainterfaces.ophys.brukertiff.brukertiffconverter import ( BrukerTiffMultiPlaneConverter, @@ -22,4 +23,5 @@ BrukerTiffMultiPlaneConverter, BrukerTiffSinglePlaneConverter, MiniscopeConverter, + SortedRecordingConverter, ] diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py new file mode 100644 index 000000000..f0d9856ca --- /dev/null +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -0,0 +1,112 @@ +from typing import Optional + +from neuroconv import ConverterPipe +from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( + BaseRecordingExtractorInterface, +) +from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( + BaseSortingExtractorInterface, +) +from neuroconv.tools.spikeinterface.spikeinterface import ( + _get_electrode_table_indices_for_recording, +) + + +class SortedRecordingConverter(ConverterPipe): + """ + A converter for handling simultaneous recording and sorting data from multiple probes, + ensuring correct mapping between sorted units and their corresponding electrodes. + + This converter addresses the challenge of maintaining proper linkage between sorted units + and their recording channels when dealing with multiple recording probes (e.g., multiple + Neuropixels probes). It ensures that units from each sorting interface are correctly + associated with electrodes from their corresponding recording interface. + """ + + def __init__( + self, + recording_interface: BaseRecordingExtractorInterface, + sorting_interface: BaseSortingExtractorInterface, + unit_ids_to_channel_ids: dict[str | int, list[str | int]], + ): + """ + Parameters + ---------- + recording_interface : BaseRecordingExtractorInterface + The interface handling the raw recording data. This typically represents data + from a single probe, like a SpikeGLXRecordingInterface. + sorting_interface : BaseSortingExtractorInterface + The interface handling the sorted units data. This typically represents the + output of a spike sorting algorithm, like a KiloSortSortingInterface. + unit_ids_to_channel_ids : dict[str | int, list[str | int]] + A mapping from unit IDs to their associated channel IDs. Each unit ID (key) + maps to a list of channel IDs (values) that were used to detect that unit. + This mapping ensures proper linkage between sorted units and recording channels. + """ + + self.recording_interface = recording_interface + self.sorting_interface = sorting_interface + self.unit_ids_to_channel_ids = unit_ids_to_channel_ids + + self.channel_ids = self.recording_interface.recording_extractor.get_channel_ids() + self.unit_ids = self.sorting_interface.sorting_extractor.get_unit_ids() + + # Convert channel_ids to set for comparison + available_channels = set(self.channel_ids) + + # Check that all referenced channels exist in recording + for unit_id, channel_ids in unit_ids_to_channel_ids.items(): + unknown_channels = set(channel_ids) - available_channels + if unknown_channels: + raise ValueError( + f"Inexistent channel IDs {unknown_channels} referenced in mapping for unit {unit_id} " + f"not found in recording. Available channels are {available_channels}" + ) + + # Check that all units have a channel mapping + available_units = set(self.unit_ids) + mapped_units = set(unit_ids_to_channel_ids.keys()) + unmapped_units = available_units - mapped_units + if unmapped_units: + raise ValueError(f"Units {unmapped_units} from sorting interface have no channel mapping") + + data_interfaces = [recording_interface, sorting_interface] + super().__init__(data_interfaces=data_interfaces) + + def add_to_nwbfile(self, nwbfile, metadata, conversion_options: Optional[dict] = None): + + conversion_options = conversion_options or dict() + conversion_options_recording = conversion_options.get("recording", dict()) + + self.recording_interface.add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + **conversion_options_recording, + ) + + # This returns the indices in the electrode table that corresponds to the channel ids of the recording + electrode_table_indices = _get_electrode_table_indices_for_recording( + recording=self.recording_interface.recording_extractor, + nwbfile=nwbfile, + ) + + # Map ids in the recording to the indices in the electrode table + channel_id_to_electrode_table_index = { + channel_id: electrode_table_indices[channel_index] + for channel_index, channel_id in enumerate(self.channel_ids) + } + + # Create a list of lists with the indices of the electrodes in the electrode table for each unit + unit_electrode_indices = [] + for unit_id in self.unit_ids: + unit_channel_ids = self.unit_ids_to_channel_ids[unit_id] + unit_indices = [channel_id_to_electrode_table_index[channel_id] for channel_id in unit_channel_ids] + unit_electrode_indices.append(unit_indices) + + conversion_options_sorting = conversion_options.get("sorting", dict()) + self.sorting_interface.add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + unit_electrode_indices=unit_electrode_indices, + **conversion_options_sorting, + ) diff --git a/tests/test_ecephys/test_sorted_recording.py b/tests/test_ecephys/test_sorted_recording.py new file mode 100644 index 000000000..b822e428c --- /dev/null +++ b/tests/test_ecephys/test_sorted_recording.py @@ -0,0 +1,91 @@ +import pytest + +from neuroconv.converters import SortedRecordingConverter +from neuroconv.tools.testing.mock_interfaces import ( + MockRecordingInterface, + MockSortingInterface, +) + + +class TestSortedRecordingConverter: + + def basic_test(self): + + recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100]) + recording_extractor = recording_interface.recording_extractor + recording_extractor = recording_extractor.rename_channels(new_channel_ids=["A", "B", "C"]) + recording_interface.recording_extractor = recording_extractor + + sorting_interface = MockSortingInterface(num_units=5, durations=[0.100]) + sorting_extractor = sorting_interface.sorting_extractor + sorting_extractor = sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d", "e"]) + sorting_interface.sorting_extractor = sorting_extractor + + unit_ids_to_channel_ids = { + "a": ["A"], + "b": ["B"], + "c": ["C"], + "d": ["A", "B"], + "e": ["C", "A"], + } + sorted_recording_interface = SortedRecordingConverter( + recording_interface=recording_interface, + sorting_interface=sorting_interface, + unit_ids_to_channel_ids=unit_ids_to_channel_ids, + ) + + nwbfile = sorted_recording_interface.create_nwbfile() + + # Test that the region in the units table points to the correct electrodes + expected_unit_electrode_indices = { + "a": [0], + "b": [1], + "c": [2], + "d": [0, 1], + "e": [2, 0], + } + unit_table = nwbfile.units + for unit_row in unit_table.to_dataframe().itertuples(index=False): + + # Neuroconv write unit_ids as unit_names + unit_id = unit_row.unit_name + + unit_electrode_table_region = unit_row.electrodes + expected_unit_electrode_indices = expected_unit_electrode_indices[unit_id] + assert unit_electrode_table_region == expected_unit_electrode_indices + + def test_invalid_channel_mapping(self): + """Test that invalid channel mappings raise appropriate errors.""" + recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100]) + recording_extractor = recording_interface.recording_extractor + recording_extractor = recording_extractor.rename_channels(new_channel_ids=["ch1", "ch2", "ch3", "ch4"]) + recording_interface.recording_extractor = recording_extractor + + sorting_interface = MockSortingInterface(num_units=3, durations=[0.100]) + sorting_extractor = sorting_interface.sorting_extractor + sorting_extractor = sorting_extractor.rename_units(new_unit_ids=["unit1", "unit2", "unit3"]) + sorting_interface.sorting_extractor = sorting_extractor + + # Test mapping with non-existent channel + invalid_channel_mapping = {"unit1": ["ch1", "ch5"], "unit2": ["ch2"], "unit3": ["ch3"]} # ch5 doesn't exist + + with pytest.raises(ValueError, match="Inexistent channel IDs {'ch5'} referenced in mapping for unit unit1"): + SortedRecordingConverter( + recording_interface=recording_interface, + sorting_interface=sorting_interface, + unit_ids_to_channel_ids=invalid_channel_mapping, + ) + + # Test mapping missing some units + incomplete_mapping = { + "unit1": ["ch1"], + "unit2": ["ch2"], + # unit3 is missing + } + + with pytest.raises(ValueError, match="Units {'unit3'} from sorting interface have no channel mapping"): + SortedRecordingConverter( + recording_interface=recording_interface, + sorting_interface=sorting_interface, + unit_ids_to_channel_ids=incomplete_mapping, + ) From ffcbf9204f0481a73af53920c15a30c3721a582d Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 17:35:30 -0600 Subject: [PATCH 07/11] changelog and union typing --- CHANGELOG.md | 1 + .../datainterfaces/ecephys/sortedrecordinginterface.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 807caec57..0019bef9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ## Features * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) +* Added `SortedRecordingConverter` to convert sorted recordings to NWB with correct metadata mapping between units and electrodes [PR #1132](https://github.com/catalystneuro/neuroconv/pull/1132) ## Improvements diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py index f0d9856ca..2f8ef7dc8 100644 --- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union from neuroconv import ConverterPipe from neuroconv.datainterfaces.ecephys.baserecordingextractorinterface import ( @@ -27,7 +27,7 @@ def __init__( self, recording_interface: BaseRecordingExtractorInterface, sorting_interface: BaseSortingExtractorInterface, - unit_ids_to_channel_ids: dict[str | int, list[str | int]], + unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str | int]]], ): """ Parameters From aed86e0b3a6bd99c76530e8c600b39c54232fa1d Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 19:17:13 -0600 Subject: [PATCH 08/11] fix typing --- .../datainterfaces/ecephys/sortedrecordinginterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py index 2f8ef7dc8..dacc73f8a 100644 --- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -27,7 +27,7 @@ def __init__( self, recording_interface: BaseRecordingExtractorInterface, sorting_interface: BaseSortingExtractorInterface, - unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str | int]]], + unit_ids_to_channel_ids: dict[Union[str, int], list[Union[str, int]]], ): """ Parameters From 8f02c6a66e63823678308cdd7bae56f14ae09bc8 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 19:26:36 -0600 Subject: [PATCH 09/11] add annoying guide requirements --- .../datainterfaces/ecephys/sortedrecordinginterface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py index dacc73f8a..2038a8916 100644 --- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -23,6 +23,8 @@ class SortedRecordingConverter(ConverterPipe): associated with electrodes from their corresponding recording interface. """ + display_name = "SortedRecordingConverter" + def __init__( self, recording_interface: BaseRecordingExtractorInterface, From a677924ac3d8a363d235def08247544d71c42473 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 19:31:24 -0600 Subject: [PATCH 10/11] more annoying guide properties --- .../datainterfaces/ecephys/sortedrecordinginterface.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py index 2038a8916..14705a25b 100644 --- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -23,7 +23,13 @@ class SortedRecordingConverter(ConverterPipe): associated with electrodes from their corresponding recording interface. """ + keywords = ( + "electrophysiology", + "spike sorting", + ) display_name = "SortedRecordingConverter" + associated_suffixes = ("None",) + info = "A converter for handling simultaneous recording and sorting data linking metadata properly." def __init__( self, From edaaf40fb070139f4fa268af041de3809861fac8 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Wed, 11 Dec 2024 20:05:14 -0600 Subject: [PATCH 11/11] add documentation --- docs/user_guide/index.rst | 1 + docs/user_guide/linking_sorted_data.rst | 74 +++++++++++++++++++ .../baserecordingextractorinterface.py | 5 ++ .../ecephys/basesortingextractorinterface.py | 5 ++ .../ecephys/sortedrecordinginterface.py | 4 +- 5 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 docs/user_guide/linking_sorted_data.rst diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst index bf9aaf253..6abfa51ca 100644 --- a/docs/user_guide/index.rst +++ b/docs/user_guide/index.rst @@ -25,6 +25,7 @@ and synchronize data across multiple sources. csvs expand_path backend_configuration + linking_sorted_data yaml docker_demo aws_demo diff --git a/docs/user_guide/linking_sorted_data.rst b/docs/user_guide/linking_sorted_data.rst new file mode 100644 index 000000000..8b9d596b4 --- /dev/null +++ b/docs/user_guide/linking_sorted_data.rst @@ -0,0 +1,74 @@ +.. _linking_sorted_data: + +How to Link Sorted Data to Electrodes +=================================== + +The ``SortedRecordingConverter`` maintains proper linkage between sorted units and their corresponding recording channels in NWB files. +It handles the critical relationship between ``Units`` and ``Electrodes`` tables by: + +* Creating electrode table regions for each unit +* Maintaining electrode group and device relationships +* Mapping channel IDs to electrode indices correctly + +This automated handling ensures proper provenance tracking in the NWB file, which is essential for interpreting and analyzing sorted electrophysiology data. + +Basic Usage +---------- + +Single Probe and Single Recording +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example demonstrates linking data from a single Neuropixel probe recorded with SpikeGLX and sorted with Kilosort. + +The converter requires three components: + +1. A recording interface (:py:class:`~neuroconv.datainterfaces.ecephys.spikeglx.spikeglxrecordinginterface.SpikeGLXRecordingInterface`) +2. A sorting interface (:py:class:`~neuroconv.datainterfaces.ecephys.kilosort.kilosortinterface.KiloSortSortingInterface`) +3. A mapping between unit IDs and their associated channel IDs + +First, instantiate the interfaces:: + + from neuroconv import SortedRecordingConverter + from neuroconv.datainterfaces import SpikeGLXRecordingInterface, KiloSortSortingInterface + + # Initialize interfaces + recording_interface = SpikeGLXRecordingInterface( + folder_path="path/to/spikeglx_data", + stream_id="imec0.ap" + ) + sorting_interface = KiloSortSortingInterface( + folder_path="path/to/kilosort_data" + ) + +Access channel and unit IDs through interface properties:: + + # Access channel IDs + print(recording_interface.channel_ids) + # Example output: ['imec0.ap#AP0', 'imec0.ap#AP1', 'imec0.ap#AP2', ...] + + # Access unit IDs + print(sorting_interface.unit_ids) + # Example output: ['0', '1', '2', ...] + +Define the mapping between units and channels:: + + unit_ids_to_channel_ids = { + "0": ["imec0.ap#AP0", "imec0.ap#AP1"], # Unit 0 detected on two channels + "1": ["imec0.ap#AP2"], # Unit 1 detected on one channel + "2": ["imec0.ap#AP3", "imec0.ap#AP4"], # Unit 2 detected on two channels + ... # Map all remaining units to their respective channels + } + +.. note:: + + Every unit from the sorting interface must have a corresponding channel mapping. + +Create the converter and run the conversion:: + + converter = SortedRecordingConverter( + recording_interface=recording_interface, + sorting_interface=sorting_interface, + unit_ids_to_channel_ids=unit_ids_to_channel_ids + ) + + nwbfile = converter.run_conversion(nwbfile_path="path/to/output.nwb") diff --git a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py index 6d0df14c1..8b650897b 100644 --- a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py @@ -107,6 +107,11 @@ def get_metadata(self) -> DeepDict: return metadata + @property + def channel_ids(self): + "Gets the channel ids of the data." + return self.recording_extractor.get_channel_ids() + def get_original_timestamps(self) -> Union[np.ndarray, list[np.ndarray]]: """ Retrieve the original unaltered timestamps for the data in this interface. diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py index 8eeb59324..9130270b9 100644 --- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py @@ -75,6 +75,11 @@ def get_metadata_schema(self) -> dict: ) return metadata_schema + @property + def units_ids(self): + "Gets the units ids of the data." + return self.sorting_extractor.get_unit_ids() + def register_recording(self, recording_interface: BaseRecordingExtractorInterface): self.sorting_extractor.register_recording(recording=recording_interface.recording_extractor) diff --git a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py index 14705a25b..b934204de 100644 --- a/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py +++ b/src/neuroconv/datainterfaces/ecephys/sortedrecordinginterface.py @@ -56,8 +56,8 @@ def __init__( self.sorting_interface = sorting_interface self.unit_ids_to_channel_ids = unit_ids_to_channel_ids - self.channel_ids = self.recording_interface.recording_extractor.get_channel_ids() - self.unit_ids = self.sorting_interface.sorting_extractor.get_unit_ids() + self.channel_ids = self.recording_interface.channel_ids + self.unit_ids = self.sorting_interface.units_ids # Convert channel_ids to set for comparison available_channels = set(self.channel_ids)