Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support arbitrary multi probe and multi trigger structures in spikeglx #1150

Merged
merged 13 commits into from
Dec 10, 2024
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
## Bug Fixes
* datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126)
* Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144)
* `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)

## Features
* Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124)
* Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)
* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior requivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
bendichter marked this conversation as resolved.
Show resolved Hide resolved
* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085)
* Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086)

Expand Down
31 changes: 11 additions & 20 deletions src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ def get_source_schema(cls):

@classmethod
def get_streams(cls, folder_path: DirectoryPath) -> list[str]:
"Return the stream ids available in the folder."
from spikeinterface.extractors import SpikeGLXRecordingExtractor

# The first entry is the stream ids the second is the stream names
return SpikeGLXRecordingExtractor.get_streams(folder_path=folder_path)[0]

@validate_call
Expand Down Expand Up @@ -61,28 +63,17 @@ def __init__(
"""
folder_path = Path(folder_path)

streams = streams or self.get_streams(folder_path=folder_path)
streams_ids = streams or self.get_streams(folder_path=folder_path)

data_interfaces = dict()
for stream in streams:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the critical part of this. Previously, the Converter assumed a fix structure of the files. This PR gets rids of that assumption.

if "ap" in stream:
probe_name = stream[:5]
file_path = (
folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.ap.bin"
)
es_key = f"ElectricalSeriesAP{probe_name.capitalize()}"
interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
elif "lf" in stream:
probe_name = stream[:5]
file_path = (
folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.lf.bin"
)
es_key = f"ElectricalSeriesLF{probe_name.capitalize()}"
interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
elif "nidq" in stream:
file_path = folder_path / f"{folder_path.stem}_t0.nidq.bin"
interface = SpikeGLXNIDQInterface(file_path=file_path)
data_interfaces.update({str(stream): interface}) # Without str() casting, is a numpy string

nidq_streams = [stream_id for stream_id in streams_ids if stream_id == "nidq"]
electrical_streams = [stream_id for stream_id in streams_ids if stream_id not in nidq_streams]
for stream_id in electrical_streams:
data_interfaces[stream_id] = SpikeGLXRecordingInterface(folder_path=folder_path, stream_id=stream_id)

for stream_id in nidq_streams:
data_interfaces[stream_id] = SpikeGLXNIDQInterface(folder_path=folder_path)

super().__init__(data_interfaces=data_interfaces, verbose=verbose)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Optional

import numpy as np
from pydantic import FilePath, validate_call
from pydantic import DirectoryPath, FilePath, validate_call

from .spikeglx_utils import (
add_recording_extractor_properties,
Expand Down Expand Up @@ -45,7 +45,6 @@ def get_source_schema(cls) -> dict:
def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:

extractor_kwargs = source_data.copy()
extractor_kwargs.pop("file_path")
extractor_kwargs["folder_path"] = self.folder_path
extractor_kwargs["all_annotations"] = True
extractor_kwargs["stream_id"] = self.stream_id
Expand All @@ -54,38 +53,58 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:
@validate_call
def __init__(
self,
file_path: FilePath,
verbose: bool = True,
folder_path: Optional[DirectoryPath] = None,
stream_id: Optional[str] = None,
es_key: Optional[str] = None,
verbose: bool = True,
file_path: Optional[FilePath] = None,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to deprecate this but I don't want this discussion to get in the way of this PR. If people who review this agree we could start a soft deprecation as we usually do.

Reminder: Spiekinterface takes folder path as input (as in this PR) and not file_path as we currently do in main.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's preserve the order of the args so we do not cause a breaking change for ordered args

):
"""
Parameters
----------
folder_path: DirectoryPath
Folder path containing the binary files of the SpikeGLX recording.
stream_id: str, optional
Stream ID of the SpikeGLX recording.
file_path : FilePathType
Path to .bin file. Point to .ap.bin for SpikeGLXRecordingInterface and .lf.bin for SpikeGLXLFPInterface.
verbose : bool, default: True
Whether to output verbose text.
es_key : str, default: "ElectricalSeries"
es_key : str, the key to access the metadata of the ElectricalSeries.
"""

self.stream_id = fetch_stream_id_for_spikelgx_file(file_path)
if es_key is None:
if "lf" in self.stream_id:
es_key = "ElectricalSeriesLF"
elif "ap" in self.stream_id:
es_key = "ElectricalSeriesAP"
else:
raise ValueError("Cannot automatically determine es_key from path")
file_path = Path(file_path)
self.folder_path = file_path.parent
if file_path is not None and stream_id is None:
self.stream_id = fetch_stream_id_for_spikelgx_file(file_path)
self.folder_path = Path(file_path).parent

else:
self.stream_id = stream_id
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we check here to ensure that stream_id is one of the valid streams and give an error message with a list of valid streams if it is not?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SpikeInterface already throws an error that is similar, example:

image

Does this work or would you like something more specific?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok great! Yeah that works

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything else missing to merge thigs?

self.folder_path = Path(folder_path)

super().__init__(
file_path=file_path,
folder_path=folder_path,
verbose=verbose,
es_key=es_key,
)
self.source_data["file_path"] = str(file_path)
self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, self.stream_id)]["meta"]

signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id)
self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key]
self.meta = self._signals_info_dict["meta"]

if es_key is None:
stream_kind = self._signals_info_dict["stream_kind"] # ap or lf
stream_kind_caps = stream_kind.upper()
device = self._signals_info_dict["device"].capitalize() # imec0, imec1, etc.

electrical_series_name = f"ElectricalSeries{stream_kind_caps}"

# Add imec{probe_index} to the electrical series name when there are multiple probes
# or undefined, `typeImEnabled` is present in the meta of all the production probes
self.probes_enabled_in_run = int(self.meta.get("typeImEnabled", 0))
if self.probes_enabled_in_run != 1:
electrical_series_name += f"{device}"

self.es_key = electrical_series_name

# Set electrodes properties
add_recording_extractor_properties(self.recording_extractor)
Expand All @@ -100,7 +119,7 @@ def get_metadata(self) -> dict:
device = get_device_metadata(self.meta)

# Should follow pattern 'Imec0', 'Imec1', etc.
probe_name = self.stream_id[:5].capitalize()
probe_name = self._signals_info_dict["device"].capitalize()
device["name"] = f"Neuropixel{probe_name}"

# Add groups metadata
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from pathlib import Path
from typing import Optional

import numpy as np
from pydantic import ConfigDict, FilePath, validate_call
from pydantic import ConfigDict, DirectoryPath, FilePath, validate_call

from .spikeglx_utils import get_session_start_time
from ..baserecordingextractorinterface import BaseRecordingExtractorInterface
Expand Down Expand Up @@ -29,15 +30,15 @@ def get_source_schema(cls) -> dict:
def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:

extractor_kwargs = source_data.copy()
extractor_kwargs.pop("file_path")
extractor_kwargs["folder_path"] = self.folder_path
extractor_kwargs["stream_id"] = self.stream_id
return extractor_kwargs

@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def __init__(
self,
file_path: FilePath,
folder_path: Optional[DirectoryPath] = None,
file_path: Optional[FilePath] = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, let's keep file_path as the first arg for now

verbose: bool = True,
load_sync_channel: bool = False,
es_key: str = "ElectricalSeriesNIDQ",
Expand All @@ -49,6 +50,8 @@ def __init__(

Parameters
----------
folder_path : DirectoryPath
Path to the folder containing the .nidq.bin file.
file_path : FilePathType
Path to .nidq.bin file.
verbose : bool, default: True
Expand All @@ -59,10 +62,17 @@ def __init__(
es_key : str, default: "ElectricalSeriesNIDQ"
"""

self.file_path = Path(file_path)
self.folder_path = self.file_path.parent
if file_path is None and folder_path is None:
raise ValueError("Either 'file_path' or 'folder_path' must be provided.")

if file_path is not None:
file_path = Path(file_path)
self.folder_path = file_path.parent

if folder_path is not None:
self.folder_path = Path(folder_path)

super().__init__(
file_path=self.file_path,
verbose=verbose,
load_sync_channel=load_sync_channel,
es_key=es_key,
Expand All @@ -72,7 +82,10 @@ def __init__(
self.recording_extractor.set_property(
key="group_name", values=["NIDQChannelGroup"] * self.recording_extractor.get_num_channels()
)
self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, "nidq")]["meta"]

signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id)
self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key]
self.meta = self._signals_info_dict["meta"]

def get_metadata(self) -> dict:
metadata = super().get_metadata()
Expand Down
12 changes: 6 additions & 6 deletions tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
"device": "NIDQBoard"
}
],
"ElectricalSeriesAPImec0": {
"name": "ElectricalSeriesAPImec0",
"description": "Acquisition traces for the ElectricalSeriesAPImec0."
"ElectricalSeriesAP": {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"name": "ElectricalSeriesAP",
"description": "Acquisition traces for the ElectricalSeriesAP."
},
"Electrodes": [
{
Expand All @@ -51,9 +51,9 @@
"name": "ElectricalSeriesNIDQ",
"description": "Raw acquisition traces from the NIDQ (.nidq.bin) channels."
},
"ElectricalSeriesLFImec0": {
"name": "ElectricalSeriesLFImec0",
"description": "Acquisition traces for the ElectricalSeriesLFImec0."
"ElectricalSeriesLF": {
"name": "ElectricalSeriesLF",
"description": "Acquisition traces for the ElectricalSeriesLF."
}
}
}
4 changes: 1 addition & 3 deletions tests/test_on_data/ecephys/test_lfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ class TestEcephysLFPNwbConversions(unittest.TestCase):
param(
data_interface=SpikeGLXRecordingInterface,
interface_kwargs=dict(
file_path=(
DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.lf.bin"
)
folder_path=DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.lf"
),
expected_write_module="raw",
),
Expand Down
4 changes: 1 addition & 3 deletions tests/test_on_data/ecephys/test_recording_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,9 +641,7 @@ def test_extracted_metadata(self, setup_interface):
class TestSpikeGLXRecordingInterface(RecordingExtractorInterfaceTestMixin):
data_interface_cls = SpikeGLXRecordingInterface
interface_kwargs = dict(
file_path=str(
ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.ap.bin"
)
folder_path=ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.ap"
)
save_directory = OUTPUT_PATH

Expand Down
Loading
Loading