Consistent units

neurogeriatricskiel · Oct 1, 2024 · 5e827cf · 5e827cf
1 parent d00407d
commit 5e827cf
Show file tree

Hide file tree

Showing 9 changed files with 128 additions and 63 deletions.
diff --git a/docs/examples/modules_01_gsd.md b/docs/examples/modules_01_gsd.md
@@ -2,7 +2,7 @@
 
 **Author:** Masoud Abedinifar
 
-**Last update:** Mon 23 Sep 2024
+**Last update:** Tue 01 Oct 2024
 
 ## Learning objectives
 By the end of this tutorial:
@@ -39,15 +39,14 @@ import numpy as np
 import matplotlib.pyplot as plt
 import os
 from pathlib import Path
-
 from kielmat.datasets import mobilised
 from kielmat.modules.gsd import ParaschivIonescuGaitSequenceDetection
 from kielmat.config import cfg_colors
 ```
 
 ## Data Preparation
 
-To implement the Paraschiv-Ionescu gait sequence detection algorithm, we load example data from a congestive heart failure (CHF) cohort, which is publicly available on the Zenodo repository [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7547125.svg)](https://doi.org/10.5281/zenodo.7547125). 
+To implement the Paraschiv-Ionescu gait sequence detection algorithm, we load example data from a publicly available on the Zenodo repository [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7547125.svg)](https://doi.org/10.5281/zenodo.7547125). 
 
 The participant was assessed for 2.5 hours in the real-world while doing different daily life activities and also was asked to perform specific tasks such as outdoor walking, walking up and down a slope and stairs and moving from one room to another [`3`].
 
@@ -85,6 +84,33 @@ sampling_frequency = recording.channels[tracking_sys][
 ]["sampling_frequency"].values[0]
 ```
 
+#### Data Units and Conversion to SI Units
+
+All input data provided to the modules in this toolbox should adhere to SI units to maintain consistency and accuracy across analyses. This ensures compatibility with the underlying algorithms, which are designed to work with standard metric measurements.
+
+If any data is provided in non-SI units (e.g., acceleration in g instead of m/s²), it is needed that the data to be converted into the appropriate SI units before using it as input to the toolbox. Failure to convert non-SI units may lead to incorrect results or misinterpretation of the output.
+
+For instance:
+
+- **Acceleration:** Convert from g to m/s².
+
+```python
+# Get the corresponding unit of the acceleration data
+accel_unit = recording.channels[tracking_sys][
+    recording.channels[tracking_sys]["name"] == "LowerBack_ACCEL_x"
+]["units"].values[0]
+
+# Check unit of acceleration data
+if accel_unit in ["m/s^2"]:
+    pass  # No conversion needed
+elif accel_unit in ["g", "G"]:
+    # Convert acceleration data from "g" to "m/s^2"
+    accel_data *= 9.81
+    # Update unit of acceleration
+    accel_unit = ["m/s^2"]
+```
+
+
 ## Visualisation of the Data
 The raw acceleration data including components of x, y and z axis is represented.
 
@@ -106,17 +132,17 @@ for i in range(3):
         time_in_minute,
         acceleration_data[f"LowerBack_ACCEL_{chr(120 + i)}"],
         color=colors[i],
-        label=f"Acc {'xyz'[i]}",
+        label=f"ACCEL {'xyz'[i]}",
     )
 
 # Add labels and legends
-plt.xlabel("Time [minute]", fontsize=20)
-plt.ylabel("Acceleration [g]", fontsize=20)
+plt.xlabel("Time (minute)", fontsize=20)
+plt.ylabel(f"Acceleration (m/s$^{2}$)", fontsize=20)
 plt.legend(fontsize=18)
 
 # Add a title with a specified font size
 plt.title(
-    "Accelerometer data from lower-back IMU sensor for CHF cohort",
+    "Accelerometer data from lower-back IMU sensor",
     fontsize=30,
 )
 
@@ -154,17 +180,17 @@ for i in range(3):
         time_seconds,
         acceleration_data[f"LowerBack_ACCEL_{chr(120 + i)}"],
         color=colors[i],
-        label=f"Acc {'xyz'[i]}",
+        label=f"ACCEL {'xyz'[i]}",
     )
 
 # Add labels and legends
-plt.xlabel("Time [seconds]", fontsize=20)
-plt.ylabel("Acceleration [g]", fontsize=20)
+plt.xlabel("Time (s)", fontsize=20)
+plt.ylabel("Acceleration (m/s$^{2}$)", fontsize=20)
 plt.legend(fontsize=18)
 
 # Add a title
 plt.title(
-    "Accelerometer data from lower-back IMU sensor for CHF cohort",
+    "Accelerometer data from lower-back IMU sensor",
     fontsize=30,
 )
 
@@ -174,7 +200,6 @@ plt.yticks(fontsize=20)
 
 # Set x-axis and y-axis limits for a specific duration (in seconds) and acceleration range
 plt.xlim(0, 10)
-plt.ylim(-1, 1.5)
 
 # Display a grid for reference
 plt.grid(visible=None, which="both", axis="both")
@@ -194,7 +219,7 @@ Now, we are running Paraschiv-Ionescu gait sequence detection algorithm from gsd
 
 In order to apply gait sequence detection algorithm, an instance of the ParaschivIonescuGaitSequenceDetection class is created using the constructor, `ParaschivIonescuGaitSequenceDetection()`. The `gsd` variable holds the instance, allowing us to access its methods. The inputs of the algorithm are as follows:
 
-- **Input Data:** `data` consist of accelerometer data (N, 3) for the x, y, and z axes in pandas Dataframe format.
+- **Input Data:** `accel_data` consist of accelerometer data (N, 3) for the x, y, and z axes in pandas Dataframe format. The data should be in SI unit as m/s².
 - **Sampling Frequency:** `sampling_freq_Hz` is the sampling frequency of the data, defined in Hz, with a default value of 100 Hz.
 - **Plot Results:** `plot_results`, if set to True, generates a plot showing the detected gait sequences on the data. The default is False. The onset is represented with the vertical green line and the grey area represents the duration of gait sequence detected by the algorithm.
 
@@ -206,7 +231,7 @@ gsd = ParaschivIonescuGaitSequenceDetection()
 
 # Call the gait sequence detection using gsd.detect
 gsd = gsd.detect(
-    data=acceleration_data, sampling_freq_Hz=sampling_frequency, plot_results=True, dt_data=None
+    accel_data=acceleration_data, sampling_freq_Hz=sampling_frequency, plot_results=True, dt_data=None
 )
 
 # Gait sequences are stored in gait_sequences_ attribute of gsd
@@ -291,7 +316,7 @@ for i in range(3):
         time_seconds,
         acceleration_data[f"LowerBack_ACCEL_{chr(120 + i)}"],
         color=colors[i],
-        label=f"Acc {'xyz'[i]}",
+        label=f"ACCEL {'xyz'[i]}",
     )
 
 # Plot the first element of gait sequences
@@ -309,8 +334,8 @@ start_limit = first_gait_sequence["onset"] - 2
 end_limit = first_gait_sequence["onset"] + first_gait_sequence["duration"] + 2
 ax.set_xlim(start_limit, end_limit)
 ax.set_ylim(-1, 1.5)
-ax.set_xlabel("Time (seconds)", fontsize=20)
-ax.set_ylabel("Acceleration (g)", fontsize=20)
+ax.set_xlabel("Time (s)", fontsize=20)
+ax.set_ylabel("Acceleration (m/s$^{2}$)", fontsize=20)
 ax.legend(loc="upper right", fontsize=20)
 plt.xticks(fontsize=20)
 plt.yticks(fontsize=20)

diff --git a/docs/examples/modules_01_gsd_files/modules_01_gsd_11_1.png b/docs/examples/modules_01_gsd_files/modules_01_gsd_11_1.png
diff --git a/docs/examples/modules_01_gsd_files/modules_01_gsd_13_1.png b/docs/examples/modules_01_gsd_files/modules_01_gsd_13_1.png
diff --git a/docs/examples/modules_01_gsd_files/modules_01_gsd_7_0.png b/docs/examples/modules_01_gsd_files/modules_01_gsd_7_0.png
diff --git a/docs/examples/modules_01_gsd_files/modules_01_gsd_9_0.png b/docs/examples/modules_01_gsd_files/modules_01_gsd_9_0.png
diff --git a/examples/modules_01_gsd.ipynb b/examples/modules_01_gsd.ipynb
diff --git a/kielmat/modules/gsd/_paraschiv.py b/kielmat/modules/gsd/_paraschiv.py
@@ -62,7 +62,7 @@ def __init__(
 
     def detect(
         self,
-        data: pd.DataFrame,
+        accel_data: pd.DataFrame,
         sampling_freq_Hz: float,
         plot_results: bool = False,
         dt_data: Optional[pd.Series] = None,
@@ -72,7 +72,7 @@ def detect(
         Detects gait sequences based on the input accelerometer data.
 
         Args:
-            data (pd.DataFrame): Input accelerometer data (N, 3) for x, y, and z axes.
+            accel_data (pd.DataFrame): Input accelerometer data (N, 3) for x, y, and z axes.
             sampling_freq_Hz (float): Sampling frequency of the accelerometer data.
             plot_results (bool, optional): If True, generates a plot showing the pre-processed acceleration data
                 and the detected gait sequences. Default is False.
@@ -88,7 +88,7 @@ def detect(
                     - tracking_system: Tracking systems used the events are derived from.
         """
         # Error handling for invalid input data
-        if not isinstance(data, pd.DataFrame) or data.shape[1] != 3:
+        if not isinstance(accel_data, pd.DataFrame) or accel_data.shape[1] != 3:
             raise ValueError(
                 "Input accelerometer data must be a DataFrame with 3 columns for x, y, and z axes."
             )
@@ -111,11 +111,14 @@ def detect(
             raise ValueError("dt_data must be a pandas Series with datetime values")
 
         # check if dt_data is provided and if it is a series with the same length as data
-        if dt_data is not None and len(dt_data) != len(data):
+        if dt_data is not None and len(dt_data) != len(accel_data):
             raise ValueError("dt_data must be a series with the same length as data")
 
+        # Convert acceleration data from "m/s^2" to "g"
+        accel_data /= 9.81
+
         # Calculate the norm of acceleration
-        acceleration_norm = np.linalg.norm(data, axis=1)
+        acceleration_norm = np.linalg.norm(accel_data, axis=1)
 
         # Resample acceleration_norm to target sampling frequency
         initial_sampling_frequency = sampling_freq_Hz
@@ -396,7 +399,9 @@ def detect(
 
         # Plot results if set to true
         if plot_results:
-
+            # Convert detected_activity_signal from g back to m/s^2 for consistency
+            detected_activity_signal *=9.81
+
             viz_utils.plot_gait(
                 target_sampling_freq_Hz, detected_activity_signal, gait_sequences_
             )

diff --git a/kielmat/test/test_modules.py b/kielmat/test/test_modules.py
@@ -48,7 +48,7 @@ def test_gsd_detect():
     gsd = ParaschivIonescuGaitSequenceDetection()
 
     # Call the detect method
-    gsd.detect(data=acceleration_data, sampling_freq_Hz=sampling_frequency)
+    gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=sampling_frequency)
     gait_sequences_ = gsd.gait_sequences_
 
 
@@ -59,15 +59,15 @@ def test_invalid_sampling_freq():
     # Test with invalid sampling frequency
     invalid_sampling_freq = "invalid"
     with pytest.raises(ValueError):
-        gsd.detect(data=acceleration_data, sampling_freq_Hz=invalid_sampling_freq)
+        gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=invalid_sampling_freq)
 
 
 def test_gait_sequence_detection():
     # Initialize the class
     gsd = ParaschivIonescuGaitSequenceDetection()
 
     # Call the detect method
-    gsd.detect(data=acceleration_data, sampling_freq_Hz=sampling_frequency)
+    gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=sampling_frequency)
 
 
 def test_invalid_input_data_type():
@@ -87,7 +87,7 @@ def test_invalid_sampling_freq_type():
     # Test with invalid sampling frequency type
     invalid_sampling_freq = "invalid"
     with pytest.raises(ValueError):
-        gsd.detect(data=acceleration_data, sampling_freq_Hz=invalid_sampling_freq)
+        gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=invalid_sampling_freq)
 
 
 def test_plot_results_type():
@@ -98,7 +98,7 @@ def test_plot_results_type():
     invalid_plot_results = "invalid"
     with pytest.raises(ValueError):
         gsd.detect(
-            data=acceleration_data,
+            accel_data=acceleration_data,
             sampling_freq_Hz=sampling_frequency,
             plot_results=invalid_plot_results,
         )
@@ -115,7 +115,7 @@ def test_invalid_dt_data_type():
         }
     )
     with pytest.raises(ValueError):
-        gsd.detect(data=acceleration_data, sampling_freq_Hz=100, dt_data="not_a_series")
+        gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=100, dt_data="not_a_series")
 
 
 # Test for ValueError: "dt_data must be a series with the same length as data"
@@ -132,7 +132,7 @@ def test_invalid_dt_data_length():
         pd.date_range(start="2022-01-01", periods=500)
     )  # Different length than data
     with pytest.raises(ValueError):
-        gsd.detect(data=acceleration_data, sampling_freq_Hz=100, dt_data=dt_data)
+        gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=100, dt_data=dt_data)
 
 
 def test_threshold_selected_signal():
@@ -152,7 +152,7 @@ def test_threshold_selected_signal():
     # Call detect with empty data
     with pytest.raises(ValueError):
         gsd.detect(
-            data=acceleration_data,
+            accel_data=acceleration_data,
             sampling_freq_Hz=100,
             dt_data=dt_data,
             plot_results=True,
@@ -174,7 +174,7 @@ def test_no_gait_sequences_detected():
 
     # Call detect with empty data
     with pytest.raises(ValueError):
-        gsd.detect(data=acceleration_data, sampling_freq_Hz=100)
+        gsd.detect(accel_data=acceleration_data, sampling_freq_Hz=100)
 
 
 def test_invalid_indices_warning():
@@ -215,7 +215,7 @@ def test_no_plotting_datetime_values():
         ValueError, match="dt_data must be a pandas Series with datetime values"
     ):
         gsd.detect(
-            data=acceleration_data,
+            accel_data=acceleration_data,
             sampling_freq_Hz=100,
             dt_data=dt_data,
             plot_results=True,

diff --git a/kielmat/utils/viz_utils.py b/kielmat/utils/viz_utils.py
@@ -25,7 +25,7 @@ def plot_gait(target_sampling_freq_Hz, detected_activity_signal, gait_sequences_
     )
     plt.title("Detected gait sequences", fontsize=18)
     plt.xlabel("Time (minutes)", fontsize=14)
-    plt.ylabel("Acceleration (g)", fontsize=14)
+    plt.ylabel("Acceleration (m/s$^{2}$)", fontsize=14)
 
     # Fill the area between start and end times
     for index, sequence in gait_sequences_.iterrows():