feat(loaders): Add loader function for RPH and flexible file gathering (

#147) * feat(constants): Add consts for RPH Added Roll Pitch Heading column constants * feat(loaders): Add loader function for RPH Added 'load_roll_pitch_heading' function to load Roll Pitch Heading (RPH) data from a set of files to a single pandas dataframe. * feat(main): Allow flexible file gather Added 'proc' argument to 'gather_files' function so that user can choose to get file paths for posfilter process * test: Modify test_main for new arg Modified the test_main.py and 'test_gather_files' test to include testing for the new 'proc' argument for flexible file gathering. * fix: Change typo within a comment --------- Ref: Issue #118 Co-authored-by: Aniket Fadia <[email protected]>
seafloor-geodesy · Aug 25, 2023 · ec129d4 · ec129d4
1 parent 6d65625
commit ec129d4
Show file tree

Hide file tree

Showing 4 changed files with 88 additions and 15 deletions.
diff --git a/src/gnatss/constants/__init__.py b/src/gnatss/constants/__init__.py
@@ -65,3 +65,9 @@
     GPS_COV_ZY,
     GPS_COV_ZZ,
 ]  # Covariance matrix columns
+
+# Roll Pitch Heading columns
+RPH_TIME = TIME_J2000
+RPH_ROLL = "roll"
+RPH_PITCH = "pitch"
+RPH_HEADING = "heading"
diff --git a/src/gnatss/loaders.py b/src/gnatss/loaders.py
@@ -172,6 +172,40 @@ def load_travel_times(
     return all_travel_times
 
 
+def load_roll_pitch_heading(files: List[str]) -> pd.DataFrame:
+    """
+    Loads roll pitch heading data into a pandas dataframe from a list of files.
+
+    Parameters
+    ----------
+    files : List[str]
+        The list of path string to files to load
+
+    Returns
+    -------
+    pd.DataFrame
+        Pandas DataFrame containing all of
+        the roll pitch heading data.
+        Expected columns will have 'time' and
+        the 'roll', 'pitch', 'heading' values
+    """
+    columns = [
+        constants.RPH_TIME,
+        constants.RPH_ROLL,
+        constants.RPH_PITCH,
+        constants.RPH_HEADING,
+    ]
+    # Read all rph files
+    rph_dfs = [
+        pd.read_csv(i, delim_whitespace=True, header=None, names=columns)
+        .drop_duplicates(constants.RPH_TIME)
+        .reset_index(drop=True)
+        for i in files
+    ]
+    all_rph = pd.concat(rph_dfs).reset_index(drop=True)
+    return all_rph
+
+
 def load_gps_solutions(
     files: List[str], time_round: int = constants.DELAY_TIME_PRECISION
 ) -> pd.DataFrame:

diff --git a/src/gnatss/main.py b/src/gnatss/main.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Literal, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -24,7 +24,9 @@
 from .utilities.io import _get_filesystem
 
 
-def gather_files(config: Configuration) -> Dict[str, Any]:
+def gather_files(
+    config: Configuration, proc: Literal["solver", "posfilter"] = "solver"
+) -> Dict[str, Any]:
     """Gather file paths for the various dataset files
 
     Parameters
@@ -38,7 +40,12 @@ def gather_files(config: Configuration) -> Dict[str, Any]:
         A dictionary containing the various datasets file paths
     """
     all_files_dict = {}
-    for k, v in config.solver.input_files.dict().items():
+    # Check for process type first
+    if not hasattr(config, proc):
+        raise AttributeError(f"Unknown process type: {proc}")
+
+    proc_config = getattr(config, proc)
+    for k, v in proc_config.input_files.dict().items():
         path = v.get("path", "")
         typer.echo(f"Gathering {k} at {path}")
         storage_options = v.get("storage_options", {})

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -1,7 +1,18 @@
+import pytest
+
 from gnatss.main import gather_files
 
 
-def test_gather_files(mocker):
+@pytest.mark.parametrize("proc", ["solver", "posfilter", "random"])
+def test_gather_files(mocker, proc):
+    tt = "travel_times"
+    rph = "roll_pitch_heading"
+    glob_vals = [tt, rph]
+    expected_procs = {
+        "solver": ["sound_speed", tt, "gps_solution", "deletions"],
+        "posfilter": [rph],
+    }
+
     # Setup get_filesystem mock
     glob_res = [
         "/some/path/to/1",
@@ -16,25 +27,40 @@ def glob(path):
     mocker.patch("gnatss.main._get_filesystem", return_value=Filesystem)
 
     # Setup mock configuration
-    item_keys = ["sound_speed", "travel_times", "gps_solution", "deletions"]
+    item_keys = []
+    if proc in expected_procs:
+        item_keys = expected_procs[proc]
+
     sample_dict = {
         k: {
             "path": f"/some/path/to/{k}"
-            if k != "travel_times"
+            if k not in glob_vals
             else "/some/glob/**/path",
             "storage_options": {},
         }
         for k in item_keys
     }
     config = mocker.patch("gnatss.configs.main.Configuration")
-    config.solver.input_files.dict.return_value = sample_dict
+    if proc in list(expected_procs.keys()):
+        # Test for actual proc that exists
+        getattr(config, proc).input_files.dict.return_value = sample_dict
+
+        all_files_dict = gather_files(config, proc=proc)
+        # Check all_files_dict
+        assert isinstance(all_files_dict, dict)
+        assert sorted(list(all_files_dict.keys())) == sorted(item_keys)
+
+        # Test glob
+        for val in glob_vals:
+            if val in all_files_dict:
+                assert isinstance(all_files_dict[val], list)
+                assert all_files_dict[val] == glob_res
+    else:
+        # Test for random
+        del config.random
 
-    # Perform test
-    all_files_dict = gather_files(config)
-    # Check all_files_dict
-    assert isinstance(all_files_dict, dict)
-    assert sorted(list(all_files_dict.keys())) == sorted(item_keys)
+        with pytest.raises(AttributeError) as exc_info:
+            all_files_dict = gather_files(config, proc=proc)
 
-    # Test glob
-    assert isinstance(all_files_dict["travel_times"], list)
-    assert all_files_dict["travel_times"] == glob_res
+        assert exc_info.type == AttributeError
+        assert exc_info.value.args[0] == f"Unknown process type: {proc}"