Add basic conversion test. (#7)

Fixes #4.
BlueBrain · May 14, 2024 · e176859 · e176859
1 parent eb029ed
commit e176859
Show file tree

Hide file tree

Showing 4 changed files with 121 additions and 26 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,12 +27,23 @@ jobs:
         sudo apt-get install -y cmake g++ libhdf5-openmpi-dev librange-v3-dev ninja-build nlohmann-json3-dev
         sudo apt-get install -y libarrow-dev libparquet-dev
 
-    - name: Configure
+    - name: Configure and build
       run: |
         cmake -B build -S . -GNinja \
+          -DCMAKE_INSTALL_PREFIX=$PWD/install \
           -DCMAKE_CXX_COMPILER=$(which mpicxx)
         cmake --build build
         ctest --test-dir build --output-on-failure
+        cmake --install build
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+
+    - name: Small integration test
+      run: |
+        export PATH=$PATH:$PWD/install/bin
+        python -mpip install -r tests/test_requirements.txt
+        python -mpytest tests
 
   build_docker_no_submodules:
     runs-on: ubuntu-latest

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -0,0 +1,105 @@
+import libsonata
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+def generate_data(
+    location: Path,
+    source_nodes: int = 100,
+    target_nodes: int = 100,
+    avg_connections: int = 100,
+    nfiles: int = 3,
+) -> pd.DataFrame:
+    """Generate SONATA test data and store it in `nfiles` files in `location`.
+
+    Returns the generated data as a Pandas DataFrame.
+    """
+    if nfiles < 1:
+        raise RuntimeError("need 1+ files to generate")
+
+    sids = []
+    tids = []
+
+    rng = np.random.default_rng()
+    target_pool = np.arange(target_nodes)
+
+    for sid in range(source_nodes):
+        (num,) = rng.integers(avg_connections, size=1)
+        nums = np.sort(rng.choice(target_pool, num))
+        tids.append(nums)
+        sids.append(np.full_like(nums, sid))
+
+    sids = np.concatenate(sids)
+    tids = np.concatenate(tids)
+
+    df = pd.DataFrame(
+        {
+            "source_node_id": sids,
+            "target_node_id": tids,
+            "edge_type_id": np.zeros_like(sids),
+            "my_attribute": rng.standard_normal(len(sids)),
+            "my_other_attribute": rng.integers(low=0, high=666, size=len(sids)),
+        }
+    )
+
+    divisions = [0]
+    divisions.extend(
+        np.sort(
+            rng.choice(
+                np.arange(1, len(sids)), nfiles - 1, replace=False, shuffle=False
+            )
+        )
+    )
+    divisions.append(len(sids))
+
+    for i in range(nfiles):
+        slice = df.iloc[divisions[i] : divisions[i + 1]]
+        slice.to_parquet(location / f"data{i}.parquet")
+
+    return df
+
+
+def test_conversion():
+    with tempfile.TemporaryDirectory() as dirname:
+        tmpdir = Path(dirname)
+
+        parquet_name = tmpdir / "data.parquet"
+        parquet_name.mkdir(parents=True, exist_ok=True)
+        sonata_name = tmpdir / "data.h5"
+        population_name = "cells__cells__test"
+
+        df = generate_data(parquet_name)
+
+        subprocess.check_call(
+            ["parquet2hdf5", parquet_name, sonata_name, population_name]
+        )
+
+        store = libsonata.EdgeStorage(sonata_name)
+        assert store.population_names == {population_name}
+        pop = store.open_population(population_name)
+        assert len(pop) == len(df)
+
+        npt.assert_array_equal(
+            pop.source_nodes(pop.select_all()),
+            df["source_node_id"]
+        )
+        npt.assert_array_equal(
+            pop.target_nodes(pop.select_all()),
+            df["target_node_id"]
+        )
+        npt.assert_array_equal(
+            pop.get_attribute("my_attribute", pop.select_all()),
+            df["my_attribute"]
+        )
+        npt.assert_array_equal(
+            pop.get_attribute("my_other_attribute", pop.select_all()),
+            df["my_other_attribute"],
+        )
+
+
+if __name__ == "__main__":
+    test_conversion()
diff --git a/tests/test_requirements.txt b/tests/test_requirements.txt
@@ -0,0 +1,4 @@
+libsonata
+pandas
+pyarrow
+pytest