diff --git a/docs/examples/basic_01_load_Data.md b/docs/examples/basic_01_load_Data.md index 97624ccf..37e3c855 100644 --- a/docs/examples/basic_01_load_Data.md +++ b/docs/examples/basic_01_load_Data.md @@ -94,7 +94,7 @@ channels_dict = { } recording = NGMTRecording( - data={"imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)} + data={"lb_imu": acc_data}, channels={"lb_imu": pd.DataFrame(channels_dict)} ) ``` diff --git a/ngmt/utils/importers.py b/ngmt/utils/importers.py index d906dc37..25c7eed0 100644 --- a/ngmt/utils/importers.py +++ b/ngmt/utils/importers.py @@ -1,12 +1,13 @@ import actipy import h5py import numpy as np +import pandas as pd from ngmt.utils.ngmt_dataclass import NGMTRecording from ngmt.utils.file_io import get_unit_from_type -import pandas as pd from pathlib import Path +from typing import Union - +# Importer for Axivity def import_axivity(file_path: str, tracked_point: str): """ Imports Axivity data from the specified file path and @@ -64,51 +65,50 @@ def import_axivity(file_path: str, tracked_point: str): return data, channels -# Importher for APDM Mobility Lab system -def import_mobilityLab( - file_name: str | Path, - tracked_points: str | list[str], +# Importer for APDM Mobility Lab for different versions +def import_apdm_mobilitylab( + file_name: str | Path, + tracked_points: str | list[str] ) -> tuple[pd.DataFrame, pd.DataFrame]: """ - Imports data from an APDM Mobility Lab system from the specified file path. + Imports data from an APDM Mobility Lab system's different versions from the specified file path. Args: file_name (str or Path): The absolute or relative path to the data file. - tracked_point (str or list of str]): - Defines for which tracked points data are to be returned. + tracked_points (str or list of str]): Defines for which tracked points data are to be returned. Returns: - dict, dict: The loaded data and channels as dictionaries. + tuple[pd.DataFrame, pd.DataFrame]: The loaded data and channels as dataframes. Examples: >>> file_path = "/path/to/sensor_data.h5" >>> tracked_point = "Lumbar" - >>> recording = import_mobilityLab(file_path, tracked_point) + >>> data, channels = import_mobilityLab_all(file_path, tracked_point) """ # Convert file_name to a Path object if it is a string if isinstance(file_name, str): file_name = Path(file_name) - # Convert tracked_points into a list if the it is provided as a string + # Convert tracked_points into a list if it is provided as a string if isinstance(tracked_points, str): tracked_points = [tracked_points] - with h5py.File(file_name, "r") as hfile: - # Get monitor labels and case IDs - monitor_labels = hfile.attrs["MonitorLabelList"] - monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels] - case_ids = hfile.attrs["CaseIdList"] - case_ids = [s.decode("UTF-8")[:9] for s in case_ids] - - # Track invalid tracked points - invalid_tracked_points = [ - tp for tp in tracked_points if tp not in monitor_labels - ] + # Mapping of sensor types to make them consistent with NGMT dataclass definition + sensor_type_mapping = { + 'Accelerometer': 'ACCEL', + 'Gyroscope': 'GYRO', + 'Magnetometer': 'MAGN', + 'Accelerometers': 'ACCEL', + 'Gyroscopes': 'GYRO', + 'Magnetometers': 'MAGN' + } - if invalid_tracked_points: - raise ValueError( - f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}" - ) + with h5py.File(file_name, 'r') as hfile: + # Check if there is an attribute or dataset that indicates the version + if 'FileFormatVersion' in hfile.attrs: + version = hfile.attrs['FileFormatVersion'] + else: + raise ValueError("Version attribute not found in the h5 file.") # Initialize dictionaries to store channels and data frames channels_dict = { @@ -119,77 +119,98 @@ def import_mobilityLab( "units": [], "sampling_frequency": [], } - - # Create dictionary to store data data_dict = {} - # Iterate over each sensor - for idx_sensor, (monitor_label, case_id) in enumerate( - zip(monitor_labels, case_ids) - ): - if monitor_label not in tracked_points: - continue # to next sensor name - sample_rate = hfile[case_id].attrs["SampleRate"] - - # Get raw data - rawAcc = hfile[case_id]["Calibrated"]["Accelerometers"][:] - rawGyro = hfile[case_id]["Calibrated"]["Gyroscopes"][:] - rawMagn = hfile[case_id]["Calibrated"]["Magnetometers"][:] - - # Populate data_dict - data_dict[f"{monitor_label}"] = pd.DataFrame( - { - f"{monitor_label}_ACCEL_x": rawAcc[:, 0], - f"{monitor_label}_ACCEL_y": rawAcc[:, 1], - f"{monitor_label}_ACCEL_z": rawAcc[:, 2], - f"{monitor_label}_GYRO_x": rawGyro[:, 0], - f"{monitor_label}_GYRO_y": rawGyro[:, 1], - f"{monitor_label}_GYRO_z": rawGyro[:, 2], - f"{monitor_label}_MAGN_x": rawMagn[:, 0], - f"{monitor_label}_MAGN_y": rawMagn[:, 1], - f"{monitor_label}_MAGN_z": rawMagn[:, 2], - } - ) - - # Extend lists in channels_dict - channels_dict["name"].extend( - [ - f"{monitor_label}_ACCEL_x", - f"{monitor_label}_ACCEL_y", - f"{monitor_label}_ACCEL_z", - f"{monitor_label}_GYRO_x", - f"{monitor_label}_GYRO_y", - f"{monitor_label}_GYRO_z", - f"{monitor_label}_MAGN_x", - f"{monitor_label}_MAGN_y", - f"{monitor_label}_MAGN_z", - ] - ) - - channels_dict["component"].extend(["x", "y", "z"] * 3) - channels_dict["type"].extend( - [ - "ACCEL", - "ACCEL", - "ACCEL", - "GYRO", - "GYRO", - "GYRO", - "MAGN", - "MAGN", - "MAGN", - ] - ) - channels_dict["tracked_point"].extend([monitor_label] * 9) - channels_dict["units"].extend( - ["m/s^2", "m/s^2", "m/s^2", "rad/s", "rad/s", "rad/s", "µT", "µT", "µT"] - ) - channels_dict["sampling_frequency"].extend([sample_rate] * 9) - - # Concatenate data frames from data_dict - data = pd.concat(list(data_dict.values()), axis=1) - + # Check the version + if version == 5: + sensors_group = hfile['Sensors'] + + # Structure for version 5 + monitor_labels = list(sensors_group.keys()) + sensor_to_label = { + sensor_id: sensors_group[sensor_id]['Configuration'].attrs['Label 0'].decode('utf-8') + for sensor_id in monitor_labels + } + + # Convert tracked_points to sensor IDs using sensor_to_label mapping + tracked_points = [sensor for sensor, label in sensor_to_label.items() if label in tracked_points] + + # Track invalid tracked points + invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels] + + if invalid_tracked_points: + raise ValueError(f"The following tracked points do not exist in sensor names: {invalid_tracked_points}") + + # Iterate over each sensor + for sensor_name in monitor_labels: + if sensor_name not in tracked_points: + continue # to next sensor name + + sensor_data = sensors_group[sensor_name] + sample_rate = sensor_data['Configuration'].attrs['Sample Rate'] + label = sensor_to_label[sensor_name] + + # Extract and append sensor data to the DataFrame + for axis_label in ['x', 'y', 'z']: + for sensor_type in ['Accelerometer', 'Gyroscope', 'Magnetometer']: + column_name = f"{label}_{sensor_type_mapping[sensor_type]}_{axis_label}" + if sensor_type in sensor_data: + raw_data = sensor_data[sensor_type][:] + data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)] + + # Extend lists in channels_dict + channels_dict["name"].append(column_name) + channels_dict["component"].append(axis_label) + channels_dict["type"].append(sensor_type_mapping[sensor_type]) + channels_dict["tracked_point"].append(label) + channels_dict["units"].append(sensor_data[sensor_type].attrs['Units'].decode()) + channels_dict["sampling_frequency"].append(sample_rate) + + else: + # Structure for version 3 and 4 + monitor_labels = hfile.attrs['MonitorLabelList'] + monitor_labels = [s.decode("UTF-8").strip() for s in monitor_labels] + case_ids = hfile.attrs['CaseIdList'] + case_ids = [s.decode("UTF-8")[:9] for s in case_ids] + + # Track invalid tracked points + invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels] + + if invalid_tracked_points: + raise ValueError(f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}") + + # Iterate over each sensor + for idx_sensor, (monitor_label, case_id) in enumerate(zip(monitor_labels, case_ids)): + if monitor_label not in tracked_points: + continue # Skip to next sensor name + + sample_rate = hfile[case_id].attrs['SampleRate'] + sensor_data = hfile[case_id]['Calibrated'] + + # Extract data for Accelerometers, Gyroscopes, and Magnetometers + sensor_types = ['Accelerometers', 'Gyroscopes', 'Magnetometers'] + for sensor_type in sensor_types: + if sensor_type in sensor_data: + raw_data = sensor_data[sensor_type][:] + units = sensor_data[sensor_type].attrs['Units'].decode() + + for axis_label in ['x', 'y', 'z']: + column_name = f"{monitor_label}_{sensor_type_mapping[sensor_type]}_{axis_label}" + data_dict[column_name] = raw_data[:, 'xyz'.index(axis_label)] + + # Extend lists in channels_dict + channels_dict["name"].append(column_name) + channels_dict["component"].append(axis_label) + channels_dict["type"].append(sensor_type_mapping[sensor_type]) + channels_dict["tracked_point"].append(monitor_label) + channels_dict["units"].append(units) + channels_dict["sampling_frequency"].append(sample_rate) + + # Create DataFrame from data_dict + data = pd.DataFrame(data_dict) + # Create DataFrame from channels_dict channels = pd.DataFrame(channels_dict) return data, channels + diff --git a/paper/references.bib b/paper/references.bib index 96e8cadf..0a5825bf 100644 --- a/paper/references.bib +++ b/paper/references.bib @@ -68,8 +68,7 @@ @article{jeung:2023 title={Motion-BIDS: extending the Brain Imaging Data Structure specification to organize motion data for reproducible research}, author={Jeung, Sein and Cockx, Helena and Appelhoff, Stefan and Berg, Timotheus and Gramann, Klaus and Grothkopp, S{\"o}ren and Warmerdam, Elke and Hansen, Clint and Oostenveld, Robert and Welzel, Julius and others}, year={2023}, - publisher={PsyArXiv}, - doi={10.31234/osf.io/w6z79} + publisher={PsyArXiv} } @ARTICLE{kuederle:2024, @@ -102,8 +101,7 @@ @article{mahlknecht:2013 number={7}, pages={e69627}, year={2013}, - publisher={Public Library of Science San Francisco, USA}, - doi={10.1371/journal.pone.0069627} + publisher={Public Library of Science San Francisco, USA} } @article{mazza:2021, @@ -114,8 +112,7 @@ @article{mazza:2021 number={12}, pages={e050785}, year={2021}, - publisher={British Medical Journal Publishing Group}, - doi={10.1136/bmjopen-2021-050785} + publisher={British Medical Journal Publishing Group} } @article{micoamigo:2023, @@ -137,8 +134,7 @@ @article{paraschiv:2019 number={1}, pages={1--11}, year={2019}, - publisher={BioMed Central}, - doi={10.1186/s12984-019-0494-z} + publisher={BioMed Central} } @inproceedings{paraschiv:2020, @@ -147,8 +143,7 @@ @inproceedings{paraschiv:2020 booktitle={2020 42nd Annual International Conference of the IEEE Engineering in Medicine \& Biology Society (EMBC)}, pages={4596--4599}, year={2020}, - organization={IEEE}, - doi={10.1109/EMBC44109.2020.9176281} + organization={IEEE} } @article{pham:2017,