Skip to content

Commit

Permalink
Add basic conversion test. (#7)
Browse files Browse the repository at this point in the history
Fixes #4.
  • Loading branch information
matz-e authored May 14, 2024
1 parent eb029ed commit e176859
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 26 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,23 @@ jobs:
sudo apt-get install -y cmake g++ libhdf5-openmpi-dev librange-v3-dev ninja-build nlohmann-json3-dev
sudo apt-get install -y libarrow-dev libparquet-dev
- name: Configure
- name: Configure and build
run: |
cmake -B build -S . -GNinja \
-DCMAKE_INSTALL_PREFIX=$PWD/install \
-DCMAKE_CXX_COMPILER=$(which mpicxx)
cmake --build build
ctest --test-dir build --output-on-failure
cmake --install build
- name: Set up Python
uses: actions/setup-python@v5

- name: Small integration test
run: |
export PATH=$PATH:$PWD/install/bin
python -mpip install -r tests/test_requirements.txt
python -mpytest tests
build_docker_no_submodules:
runs-on: ubuntu-latest
Expand Down
25 changes: 0 additions & 25 deletions .gitlab-ci.yml

This file was deleted.

105 changes: 105 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import libsonata
import numpy as np
import numpy.testing as npt
import pandas as pd
import subprocess
import tempfile
from pathlib import Path


def generate_data(
location: Path,
source_nodes: int = 100,
target_nodes: int = 100,
avg_connections: int = 100,
nfiles: int = 3,
) -> pd.DataFrame:
"""Generate SONATA test data and store it in `nfiles` files in `location`.
Returns the generated data as a Pandas DataFrame.
"""
if nfiles < 1:
raise RuntimeError("need 1+ files to generate")

sids = []
tids = []

rng = np.random.default_rng()
target_pool = np.arange(target_nodes)

for sid in range(source_nodes):
(num,) = rng.integers(avg_connections, size=1)
nums = np.sort(rng.choice(target_pool, num))
tids.append(nums)
sids.append(np.full_like(nums, sid))

sids = np.concatenate(sids)
tids = np.concatenate(tids)

df = pd.DataFrame(
{
"source_node_id": sids,
"target_node_id": tids,
"edge_type_id": np.zeros_like(sids),
"my_attribute": rng.standard_normal(len(sids)),
"my_other_attribute": rng.integers(low=0, high=666, size=len(sids)),
}
)

divisions = [0]
divisions.extend(
np.sort(
rng.choice(
np.arange(1, len(sids)), nfiles - 1, replace=False, shuffle=False
)
)
)
divisions.append(len(sids))

for i in range(nfiles):
slice = df.iloc[divisions[i] : divisions[i + 1]]
slice.to_parquet(location / f"data{i}.parquet")

return df


def test_conversion():
with tempfile.TemporaryDirectory() as dirname:
tmpdir = Path(dirname)

parquet_name = tmpdir / "data.parquet"
parquet_name.mkdir(parents=True, exist_ok=True)
sonata_name = tmpdir / "data.h5"
population_name = "cells__cells__test"

df = generate_data(parquet_name)

subprocess.check_call(
["parquet2hdf5", parquet_name, sonata_name, population_name]
)

store = libsonata.EdgeStorage(sonata_name)
assert store.population_names == {population_name}
pop = store.open_population(population_name)
assert len(pop) == len(df)

npt.assert_array_equal(
pop.source_nodes(pop.select_all()),
df["source_node_id"]
)
npt.assert_array_equal(
pop.target_nodes(pop.select_all()),
df["target_node_id"]
)
npt.assert_array_equal(
pop.get_attribute("my_attribute", pop.select_all()),
df["my_attribute"]
)
npt.assert_array_equal(
pop.get_attribute("my_other_attribute", pop.select_all()),
df["my_other_attribute"],
)


if __name__ == "__main__":
test_conversion()
4 changes: 4 additions & 0 deletions tests/test_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
libsonata
pandas
pyarrow
pytest

0 comments on commit e176859

Please sign in to comment.