Skip to content

Commit

Permalink
Merge pull request #95 from neurogeriatricskiel/sensepark-data-importer
Browse files Browse the repository at this point in the history
[ADD] importer for Sensepark datasets
  • Loading branch information
JuliusWelzel authored Jul 17, 2024
2 parents c94143d + 183a234 commit 5ace5f4
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 106 deletions.
2 changes: 1 addition & 1 deletion docs/examples/basic_01_load_Data.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ channels_dict = {
}

recording = NGMTRecording(
data={"imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)}
data={"lb_imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)}
)
```

Expand Down
211 changes: 116 additions & 95 deletions ngmt/utils/importers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import actipy
import h5py
import numpy as np
import pandas as pd
from ngmt.utils.ngmt_dataclass import NGMTRecording
from ngmt.utils.file_io import get_unit_from_type
import pandas as pd
from pathlib import Path
from typing import Union


# Importer for Axivity
def import_axivity(file_path: str, tracked_point: str):
"""
Imports Axivity data from the specified file path and
Expand Down Expand Up @@ -64,51 +65,50 @@ def import_axivity(file_path: str, tracked_point: str):
return data, channels


# Importher for APDM Mobility Lab system
def import_mobilityLab(
file_name: str | Path,
tracked_points: str | list[str],
# Importer for APDM Mobility Lab for different versions
def import_apdm_mobilitylab(
file_name: str | Path,
tracked_points: str | list[str]
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Imports data from an APDM Mobility Lab system from the specified file path.
Imports data from an APDM Mobility Lab system's different versions from the specified file path.
Args:
file_name (str or Path): The absolute or relative path to the data file.
tracked_point (str or list of str]):
Defines for which tracked points data are to be returned.
tracked_points (str or list of str]): Defines for which tracked points data are to be returned.
Returns:
dict, dict: The loaded data and channels as dictionaries.
tuple[pd.DataFrame, pd.DataFrame]: The loaded data and channels as dataframes.
Examples:
>>> file_path = "/path/to/sensor_data.h5"
>>> tracked_point = "Lumbar"
>>> recording = import_mobilityLab(file_path, tracked_point)
>>> data, channels = import_mobilityLab_all(file_path, tracked_point)
"""
# Convert file_name to a Path object if it is a string
if isinstance(file_name, str):
file_name = Path(file_name)

# Convert tracked_points into a list if the it is provided as a string
# Convert tracked_points into a list if it is provided as a string
if isinstance(tracked_points, str):
tracked_points = [tracked_points]

with h5py.File(file_name, "r") as hfile:
# Get monitor labels and case IDs
monitor_labels = hfile.attrs["MonitorLabelList"]
monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels]
case_ids = hfile.attrs["CaseIdList"]
case_ids = [s.decode("UTF-8")[:9] for s in case_ids]

# Track invalid tracked points
invalid_tracked_points = [
tp for tp in tracked_points if tp not in monitor_labels
]
# Mapping of sensor types to make them consistent with NGMT dataclass definition
sensor_type_mapping = {
'Accelerometer': 'ACCEL',
'Gyroscope': 'GYRO',
'Magnetometer': 'MAGN',
'Accelerometers': 'ACCEL',
'Gyroscopes': 'GYRO',
'Magnetometers': 'MAGN'
}

if invalid_tracked_points:
raise ValueError(
f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}"
)
with h5py.File(file_name, 'r') as hfile:
# Check if there is an attribute or dataset that indicates the version
if 'FileFormatVersion' in hfile.attrs:
version = hfile.attrs['FileFormatVersion']
else:
raise ValueError("Version attribute not found in the h5 file.")

# Initialize dictionaries to store channels and data frames
channels_dict = {
Expand All @@ -119,77 +119,98 @@ def import_mobilityLab(
"units": [],
"sampling_frequency": [],
}

# Create dictionary to store data
data_dict = {}

# Iterate over each sensor
for idx_sensor, (monitor_label, case_id) in enumerate(
zip(monitor_labels, case_ids)
):
if monitor_label not in tracked_points:
continue # to next sensor name
sample_rate = hfile[case_id].attrs["SampleRate"]

# Get raw data
rawAcc = hfile[case_id]["Calibrated"]["Accelerometers"][:]
rawGyro = hfile[case_id]["Calibrated"]["Gyroscopes"][:]
rawMagn = hfile[case_id]["Calibrated"]["Magnetometers"][:]

# Populate data_dict
data_dict[f"{monitor_label}"] = pd.DataFrame(
{
f"{monitor_label}_ACCEL_x": rawAcc[:, 0],
f"{monitor_label}_ACCEL_y": rawAcc[:, 1],
f"{monitor_label}_ACCEL_z": rawAcc[:, 2],
f"{monitor_label}_GYRO_x": rawGyro[:, 0],
f"{monitor_label}_GYRO_y": rawGyro[:, 1],
f"{monitor_label}_GYRO_z": rawGyro[:, 2],
f"{monitor_label}_MAGN_x": rawMagn[:, 0],
f"{monitor_label}_MAGN_y": rawMagn[:, 1],
f"{monitor_label}_MAGN_z": rawMagn[:, 2],
}
)

# Extend lists in channels_dict
channels_dict["name"].extend(
[
f"{monitor_label}_ACCEL_x",
f"{monitor_label}_ACCEL_y",
f"{monitor_label}_ACCEL_z",
f"{monitor_label}_GYRO_x",
f"{monitor_label}_GYRO_y",
f"{monitor_label}_GYRO_z",
f"{monitor_label}_MAGN_x",
f"{monitor_label}_MAGN_y",
f"{monitor_label}_MAGN_z",
]
)

channels_dict["component"].extend(["x", "y", "z"] * 3)
channels_dict["type"].extend(
[
"ACCEL",
"ACCEL",
"ACCEL",
"GYRO",
"GYRO",
"GYRO",
"MAGN",
"MAGN",
"MAGN",
]
)
channels_dict["tracked_point"].extend([monitor_label] * 9)
channels_dict["units"].extend(
["m/s^2", "m/s^2", "m/s^2", "rad/s", "rad/s", "rad/s", "µT", "µT", "µT"]
)
channels_dict["sampling_frequency"].extend([sample_rate] * 9)

# Concatenate data frames from data_dict
data = pd.concat(list(data_dict.values()), axis=1)

# Check the version
if version == 5:
sensors_group = hfile['Sensors']

# Structure for version 5
monitor_labels = list(sensors_group.keys())
sensor_to_label = {
sensor_id: sensors_group[sensor_id]['Configuration'].attrs['Label 0'].decode('utf-8')
for sensor_id in monitor_labels
}

# Convert tracked_points to sensor IDs using sensor_to_label mapping
tracked_points = [sensor for sensor, label in sensor_to_label.items() if label in tracked_points]

# Track invalid tracked points
invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels]

if invalid_tracked_points:
raise ValueError(f"The following tracked points do not exist in sensor names: {invalid_tracked_points}")

# Iterate over each sensor
for sensor_name in monitor_labels:
if sensor_name not in tracked_points:
continue # to next sensor name

sensor_data = sensors_group[sensor_name]
sample_rate = sensor_data['Configuration'].attrs['Sample Rate']
label = sensor_to_label[sensor_name]

# Extract and append sensor data to the DataFrame
for axis_label in ['x', 'y', 'z']:
for sensor_type in ['Accelerometer', 'Gyroscope', 'Magnetometer']:
column_name = f"{label}_{sensor_type_mapping[sensor_type]}_{axis_label}"
if sensor_type in sensor_data:
raw_data = sensor_data[sensor_type][:]
data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)]

# Extend lists in channels_dict
channels_dict["name"].append(column_name)
channels_dict["component"].append(axis_label)
channels_dict["type"].append(sensor_type_mapping[sensor_type])
channels_dict["tracked_point"].append(label)
channels_dict["units"].append(sensor_data[sensor_type].attrs['Units'].decode())
channels_dict["sampling_frequency"].append(sample_rate)

else:
# Structure for version 3 and 4
monitor_labels = hfile.attrs['MonitorLabelList']
monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels]
case_ids = hfile.attrs['CaseIdList']
case_ids = [s.decode("UTF-8")[:9] for s in case_ids]

# Track invalid tracked points
invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels]

if invalid_tracked_points:
raise ValueError(f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}")

# Iterate over each sensor
for idx_sensor, (monitor_label, case_id) in enumerate(zip(monitor_labels, case_ids)):
if monitor_label not in tracked_points:
continue # Skip to next sensor name

sample_rate = hfile[case_id].attrs['SampleRate']
sensor_data = hfile[case_id]['Calibrated']

# Extract data for Accelerometers, Gyroscopes, and Magnetometers
sensor_types = ['Accelerometers', 'Gyroscopes', 'Magnetometers']
for sensor_type in sensor_types:
if sensor_type in sensor_data:
raw_data = sensor_data[sensor_type][:]
units = sensor_data[sensor_type].attrs['Units'].decode()

for axis_label in ['x', 'y', 'z']:
column_name = f"{monitor_label}_{sensor_type_mapping[sensor_type]}_{axis_label}"
data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)]

# Extend lists in channels_dict
channels_dict["name"].append(column_name)
channels_dict["component"].append(axis_label)
channels_dict["type"].append(sensor_type_mapping[sensor_type])
channels_dict["tracked_point"].append(monitor_label)
channels_dict["units"].append(units)
channels_dict["sampling_frequency"].append(sample_rate)

# Create DataFrame from data_dict
data = pd.DataFrame(data_dict)

# Create DataFrame from channels_dict
channels = pd.DataFrame(channels_dict)

return data, channels

15 changes: 5 additions & 10 deletions paper/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ @article{jeung:2023
title={Motion-BIDS: extending the Brain Imaging Data Structure specification to organize motion data for reproducible research},
author={Jeung, Sein and Cockx, Helena and Appelhoff, Stefan and Berg, Timotheus and Gramann, Klaus and Grothkopp, S{\"o}ren and Warmerdam, Elke and Hansen, Clint and Oostenveld, Robert and Welzel, Julius and others},
year={2023},
publisher={PsyArXiv},
doi={10.31234/osf.io/w6z79}
publisher={PsyArXiv}
}

@ARTICLE{kuederle:2024,
Expand Down Expand Up @@ -102,8 +101,7 @@ @article{mahlknecht:2013
number={7},
pages={e69627},
year={2013},
publisher={Public Library of Science San Francisco, USA},
doi={10.1371/journal.pone.0069627}
publisher={Public Library of Science San Francisco, USA}
}

@article{mazza:2021,
Expand All @@ -114,8 +112,7 @@ @article{mazza:2021
number={12},
pages={e050785},
year={2021},
publisher={British Medical Journal Publishing Group},
doi={10.1136/bmjopen-2021-050785}
publisher={British Medical Journal Publishing Group}
}

@article{micoamigo:2023,
Expand All @@ -137,8 +134,7 @@ @article{paraschiv:2019
number={1},
pages={1--11},
year={2019},
publisher={BioMed Central},
doi={10.1186/s12984-019-0494-z}
publisher={BioMed Central}
}

@inproceedings{paraschiv:2020,
Expand All @@ -147,8 +143,7 @@ @inproceedings{paraschiv:2020
booktitle={2020 42nd Annual International Conference of the IEEE Engineering in Medicine \& Biology Society (EMBC)},
pages={4596--4599},
year={2020},
organization={IEEE},
doi={10.1109/EMBC44109.2020.9176281}
organization={IEEE}
}

@article{pham:2017,
Expand Down

0 comments on commit 5ace5f4

Please sign in to comment.