Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sensepark data importer #95

Merged
merged 5 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/examples/basic_01_load_Data.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ channels_dict = {
}

recording = NGMTRecording(
data={"imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)}
data={"lb_imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)}
)
```

Expand Down
211 changes: 116 additions & 95 deletions ngmt/utils/importers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import actipy
import h5py
import numpy as np
import pandas as pd
from ngmt.utils.ngmt_dataclass import NGMTRecording
from ngmt.utils.file_io import get_unit_from_type
import pandas as pd
from pathlib import Path
from typing import Union


# Importher for Axivity
def import_axivity(file_path: str, tracked_point: str):
"""
Imports Axivity data from the specified file path and
Expand Down Expand Up @@ -64,51 +65,50 @@ def import_axivity(file_path: str, tracked_point: str):
return data, channels


# Importher for APDM Mobility Lab system
def import_mobilityLab(
file_name: str | Path,
tracked_points: str | list[str],
# Importher for APDM Mobility Lab for different versions
def import_apdm_mobilitylab(
file_name: str | Path,
tracked_points: str | list[str]
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Imports data from an APDM Mobility Lab system from the specified file path.
Imports data from an APDM Mobility Lab system's different versions from the specified file path.

Args:
file_name (str or Path): The absolute or relative path to the data file.
tracked_point (str or list of str]):
Defines for which tracked points data are to be returned.
tracked_points (str or list of str]): Defines for which tracked points data are to be returned.

Returns:
dict, dict: The loaded data and channels as dictionaries.
tuple[pd.DataFrame, pd.DataFrame]: The loaded data and channels as dataframes.

Examples:
>>> file_path = "/path/to/sensor_data.h5"
>>> tracked_point = "Lumbar"
>>> recording = import_mobilityLab(file_path, tracked_point)
>>> data, channels = import_mobilityLab_all(file_path, tracked_point)
"""
# Convert file_name to a Path object if it is a string
if isinstance(file_name, str):
file_name = Path(file_name)

# Convert tracked_points into a list if the it is provided as a string
# Convert tracked_points into a list if it is provided as a string
if isinstance(tracked_points, str):
tracked_points = [tracked_points]

with h5py.File(file_name, "r") as hfile:
# Get monitor labels and case IDs
monitor_labels = hfile.attrs["MonitorLabelList"]
monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels]
case_ids = hfile.attrs["CaseIdList"]
case_ids = [s.decode("UTF-8")[:9] for s in case_ids]

# Track invalid tracked points
invalid_tracked_points = [
tp for tp in tracked_points if tp not in monitor_labels
]
# Mapping of sensor types to make them consistent with NGMT dataclass definition
sensor_type_mapping = {
'Accelerometer': 'ACCEL',
'Gyroscope': 'GYRO',
'Magnetometer': 'MAGN',
'Accelerometers': 'ACCEL',
'Gyroscopes': 'GYRO',
'Magnetometers': 'MAGN'
}

if invalid_tracked_points:
raise ValueError(
f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}"
)
with h5py.File(file_name, 'r') as hfile:
# Check if there is an attribute or dataset that indicates the version
if 'FileFormatVersion' in hfile.attrs:
version = hfile.attrs['FileFormatVersion']
else:
raise ValueError("Version attribute not found in the h5 file.")

# Initialize dictionaries to store channels and data frames
channels_dict = {
Expand All @@ -119,77 +119,98 @@ def import_mobilityLab(
"units": [],
"sampling_frequency": [],
}

# Create dictionary to store data
data_dict = {}

# Iterate over each sensor
for idx_sensor, (monitor_label, case_id) in enumerate(
zip(monitor_labels, case_ids)
):
if monitor_label not in tracked_points:
continue # to next sensor name
sample_rate = hfile[case_id].attrs["SampleRate"]

# Get raw data
rawAcc = hfile[case_id]["Calibrated"]["Accelerometers"][:]
rawGyro = hfile[case_id]["Calibrated"]["Gyroscopes"][:]
rawMagn = hfile[case_id]["Calibrated"]["Magnetometers"][:]

# Populate data_dict
data_dict[f"{monitor_label}"] = pd.DataFrame(
{
f"{monitor_label}_ACCEL_x": rawAcc[:, 0],
f"{monitor_label}_ACCEL_y": rawAcc[:, 1],
f"{monitor_label}_ACCEL_z": rawAcc[:, 2],
f"{monitor_label}_GYRO_x": rawGyro[:, 0],
f"{monitor_label}_GYRO_y": rawGyro[:, 1],
f"{monitor_label}_GYRO_z": rawGyro[:, 2],
f"{monitor_label}_MAGN_x": rawMagn[:, 0],
f"{monitor_label}_MAGN_y": rawMagn[:, 1],
f"{monitor_label}_MAGN_z": rawMagn[:, 2],
}
)

# Extend lists in channels_dict
channels_dict["name"].extend(
[
f"{monitor_label}_ACCEL_x",
f"{monitor_label}_ACCEL_y",
f"{monitor_label}_ACCEL_z",
f"{monitor_label}_GYRO_x",
f"{monitor_label}_GYRO_y",
f"{monitor_label}_GYRO_z",
f"{monitor_label}_MAGN_x",
f"{monitor_label}_MAGN_y",
f"{monitor_label}_MAGN_z",
]
)

channels_dict["component"].extend(["x", "y", "z"] * 3)
channels_dict["type"].extend(
[
"ACCEL",
"ACCEL",
"ACCEL",
"GYRO",
"GYRO",
"GYRO",
"MAGN",
"MAGN",
"MAGN",
]
)
channels_dict["tracked_point"].extend([monitor_label] * 9)
channels_dict["units"].extend(
["m/s^2", "m/s^2", "m/s^2", "rad/s", "rad/s", "rad/s", "µT", "µT", "µT"]
)
channels_dict["sampling_frequency"].extend([sample_rate] * 9)

# Concatenate data frames from data_dict
data = pd.concat(list(data_dict.values()), axis=1)

# Check the version
if version == 5:
sensors_group = hfile['Sensors']

# Structure for version 5
monitor_labels = list(sensors_group.keys())
sensor_to_label = {
sensor_id: sensors_group[sensor_id]['Configuration'].attrs['Label 0'].decode('utf-8')
for sensor_id in monitor_labels
}

# Convert tracked_points to sensor IDs using sensor_to_label mapping
tracked_points = [sensor for sensor, label in sensor_to_label.items() if label in tracked_points]

# Track invalid tracked points
invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels]

if invalid_tracked_points:
raise ValueError(f"The following tracked points do not exist in sensor names: {invalid_tracked_points}")

# Iterate over each sensor
for sensor_name in monitor_labels:
if sensor_name not in tracked_points:
continue # to next sensor name

sensor_data = sensors_group[sensor_name]
sample_rate = sensor_data['Configuration'].attrs['Sample Rate']
label = sensor_to_label[sensor_name]

# Extract and append sensor data to the DataFrame
for axis_label in ['x', 'y', 'z']:
for sensor_type in ['Accelerometer', 'Gyroscope', 'Magnetometer']:
column_name = f"{label}_{sensor_type_mapping[sensor_type]}_{axis_label}"
if sensor_type in sensor_data:
raw_data = sensor_data[sensor_type][:]
data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)]

# Extend lists in channels_dict
channels_dict["name"].append(column_name)
channels_dict["component"].append(axis_label)
channels_dict["type"].append(sensor_type_mapping[sensor_type])
channels_dict["tracked_point"].append(label)
channels_dict["units"].append(sensor_data[sensor_type].attrs['Units'].decode())
channels_dict["sampling_frequency"].append(sample_rate)

else:
# Structure for version 3 and 4
monitor_labels = hfile.attrs['MonitorLabelList']
monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels]
case_ids = hfile.attrs['CaseIdList']
case_ids = [s.decode("UTF-8")[:9] for s in case_ids]

# Track invalid tracked points
invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels]

if invalid_tracked_points:
raise ValueError(f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}")

# Iterate over each sensor
for idx_sensor, (monitor_label, case_id) in enumerate(zip(monitor_labels, case_ids)):
if monitor_label not in tracked_points:
continue # Skip to next sensor name

sample_rate = hfile[case_id].attrs['SampleRate']
sensor_data = hfile[case_id]['Calibrated']

# Extract data for Accelerometers, Gyroscopes, and Magnetometers
sensor_types = ['Accelerometers', 'Gyroscopes', 'Magnetometers']
for sensor_type in sensor_types:
if sensor_type in sensor_data:
raw_data = sensor_data[sensor_type][:]
units = sensor_data[sensor_type].attrs['Units'].decode()

for axis_label in ['x', 'y', 'z']:
column_name = f"{monitor_label}_{sensor_type_mapping[sensor_type]}_{axis_label}"
data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)]

# Extend lists in channels_dict
channels_dict["name"].append(column_name)
channels_dict["component"].append(axis_label)
channels_dict["type"].append(sensor_type_mapping[sensor_type])
channels_dict["tracked_point"].append(monitor_label)
channels_dict["units"].append(units)
channels_dict["sampling_frequency"].append(sample_rate)

# Create DataFrame from data_dict
data = pd.DataFrame(data_dict)

# Create DataFrame from channels_dict
channels = pd.DataFrame(channels_dict)

return data, channels

15 changes: 5 additions & 10 deletions paper/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ @article{jeung:2023
title={Motion-BIDS: extending the Brain Imaging Data Structure specification to organize motion data for reproducible research},
author={Jeung, Sein and Cockx, Helena and Appelhoff, Stefan and Berg, Timotheus and Gramann, Klaus and Grothkopp, S{\"o}ren and Warmerdam, Elke and Hansen, Clint and Oostenveld, Robert and Welzel, Julius and others},
year={2023},
publisher={PsyArXiv},
doi={10.31234/osf.io/w6z79}
publisher={PsyArXiv}
}

@ARTICLE{kuederle:2024,
Expand Down Expand Up @@ -102,8 +101,7 @@ @article{mahlknecht:2013
number={7},
pages={e69627},
year={2013},
publisher={Public Library of Science San Francisco, USA},
doi={10.1371/journal.pone.0069627}
publisher={Public Library of Science San Francisco, USA}
}

@article{mazza:2021,
Expand All @@ -114,8 +112,7 @@ @article{mazza:2021
number={12},
pages={e050785},
year={2021},
publisher={British Medical Journal Publishing Group},
doi={10.1136/bmjopen-2021-050785}
publisher={British Medical Journal Publishing Group}
}

@article{micoamigo:2023,
Expand All @@ -137,8 +134,7 @@ @article{paraschiv:2019
number={1},
pages={1--11},
year={2019},
publisher={BioMed Central},
doi={10.1186/s12984-019-0494-z}
publisher={BioMed Central}
}

@inproceedings{paraschiv:2020,
Expand All @@ -147,8 +143,7 @@ @inproceedings{paraschiv:2020
booktitle={2020 42nd Annual International Conference of the IEEE Engineering in Medicine \& Biology Society (EMBC)},
pages={4596--4599},
year={2020},
organization={IEEE},
doi={10.1109/EMBC44109.2020.9176281}
organization={IEEE}
}

@article{pham:2017,
Expand Down
Loading