Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor codebase to use AnnData (Fixes #56) #74

Merged
merged 49 commits into from
Mar 29, 2024
Merged
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
da576d5
Update Dockerfile Entrypoint/CMD
jvivian Mar 24, 2024
8476ab0
Update project dependencies
jvivian Mar 24, 2024
bbbdec3
Add parsed version of project data
jvivian Mar 24, 2024
e94c210
CLI updates
jvivian Mar 24, 2024
69be570
Add GROUPED_FACTOR for mapping
jvivian Mar 24, 2024
773f0b8
Work in Progress: convert DFM to class
jvivian Mar 24, 2024
3ea37cd
Add AnnData parsing and easy project data load
jvivian Mar 24, 2024
ddd41a4
Add Data Processing initial page
jvivian Mar 24, 2024
27f4489
move input data to processed subdir
jvivian Mar 24, 2024
9f848f5
Move all project related data functions to own module
jvivian Mar 25, 2024
4cac0a3
Move all IO-related functions to own module
jvivian Mar 25, 2024
7f77163
add DataLoader for io module
jvivian Mar 27, 2024
93529a6
Change writes to return self so they can be chained
jvivian Mar 27, 2024
afd7aa3
Update DataProcessor class
jvivian Mar 27, 2024
f3da2c7
Clean imports in covid19.py
jvivian Mar 27, 2024
6bc8d52
Refactor DFM ModelRunner
jvivian Mar 27, 2024
1bb06ae
Clean up how DataProcessor handles differencing
jvivian Mar 27, 2024
af4c79e
Add test module for IO
jvivian Mar 27, 2024
aa0c7ac
Rewrite CLI using updated classes
jvivian Mar 27, 2024
4e57a2b
Move factor processing outside class
jvivian Mar 27, 2024
1299bf1
Reorder dataframe to avoid AnnData init bug
jvivian Mar 27, 2024
23cfe7a
Clean up imports
jvivian Mar 27, 2024
16b0da2
Add test.h5ad (only two states)
jvivian Mar 27, 2024
3e2df7f
Expand/fix tests for CLI
jvivian Mar 27, 2024
a0e20ed
Add tests for covid19 project data module
jvivian Mar 27, 2024
e7340ca
Add tests for DFM ModelRunner
jvivian Mar 27, 2024
0c02a39
Add module for testing DataLoader
jvivian Mar 27, 2024
eef8f3d
Add tests for DataProcessor
jvivian Mar 27, 2024
e23eda5
Clean up test_results
jvivian Mar 27, 2024
af697dc
Add teardown
jvivian Mar 27, 2024
2c6015b
Update coverage report
jvivian Mar 27, 2024
13b7818
Update Dashboard to use ModelRunner
jvivian Mar 27, 2024
307ff7f
Update data explorer to work with new changes (stop gap)
jvivian Mar 27, 2024
80cd85e
Stop gap fix for factor analysis page
jvivian Mar 27, 2024
f877ce9
Ensure arviz is added
jvivian Mar 28, 2024
81cf77f
Merge branch 'main' into jvivian/issue56
jvivian Mar 28, 2024
5a57715
Add docs for DFM module
jvivian Mar 28, 2024
119980f
Add docs for IO module (I like how this shows up in mkdocs... try thi…
jvivian Mar 28, 2024
ef0561a
Add docs for DataProcessor
jvivian Mar 28, 2024
fefa3c1
Update docs with IO page
jvivian Mar 28, 2024
0639f93
Add docstring in results.py
jvivian Mar 28, 2024
e728e2e
linting in dashboard files
jvivian Mar 28, 2024
fe5f5a8
Merge branch 'jvivian/issue56' of https://github.com/jvivian/covid19-…
jvivian Mar 28, 2024
462fda7
Linter
jvivian Mar 28, 2024
bcc5aa2
Update coverage
jvivian Mar 28, 2024
be9c801
Fix bug in variable differencing
jvivian Mar 28, 2024
b9d68d7
Update coverage
jvivian Mar 28, 2024
7f7f39f
Streamlit fixes
jvivian Mar 29, 2024
8d7c3c5
Fix bug in data explorerer
jvivian Mar 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add test module for IO
jvivian committed Mar 27, 2024
commit af4c79ef09724eec64e1e3017916a0c567fc04b7
2 changes: 1 addition & 1 deletion covid19_drdfm/io.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ def load(self, data: Path, factors: Path, metadata: Optional[Path] = None) -> "D

def convert(self, ad: AnnData) -> "DataLoader":
self.ad = ad
self.data = ad.X
self.data = ad.to_df()
self.var = ad.var
self.obs = ad.obs
return self
81 changes: 81 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import shutil
import pandas as pd
import numpy as np
from pathlib import Path
from anndata import AnnData
from covid19_drdfm.io import DataLoader
from covid19_drdfm.covid19 import DATA_DIR
import pytest

@pytest.fixture()
def dfs():
data = pd.read_csv(DATA_DIR / 'data.csv')
factors = pd.read_csv(DATA_DIR / 'factors.csv', index_col=0)
metadata = pd.read_csv(DATA_DIR / 'metadata.csv', index_col=0)
return data, factors, metadata


def test_load(tmpdir):
outdir = Path(tmpdir / 'test_load')
outdir.mkdir(exist_ok=True)
loader = DataLoader()
data_path = outdir / "./test_data.csv"
factors_path = outdir / "./test_factors.csv"
metadata_path = outdir / "./test_metadata.csv"

test_data = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
test_factors = pd.DataFrame({"factor": ["Zoo", 'Zoo']}, index=['A', 'B'])
test_metadata = pd.DataFrame({"State": ["S1", "S2", "S3"]})

test_data.to_csv(data_path, index=False)
test_factors.to_csv(factors_path, index=True)
test_metadata.to_csv(metadata_path, index=True)

loader.load(data_path, factors_path, metadata_path)

assert isinstance(loader.ad, AnnData)
assert isinstance(loader.data, pd.DataFrame)
assert isinstance(loader.var, pd.DataFrame)
assert isinstance(loader.obs, pd.DataFrame)

assert loader.data.equals(test_data)
assert loader.var.equals(test_factors)
assert loader.obs.equals(test_metadata)

shutil.rmtree(outdir)

def test_convert():
loader = DataLoader()

ad = AnnData(X=np.array([[1, 2], [3, 4]]), obs=pd.DataFrame({"Sample": ["S1", "S2"]}), var=pd.DataFrame({"Factor": ["X", "Y"]}))
loader.convert(ad)

assert loader.ad is ad
assert loader.data.equals(ad.to_df())
assert loader.var.equals(ad.var)
assert loader.obs.equals(ad.obs)

def test_write_csvs(tmpdir):
loader = DataLoader()
outdir = Path(tmpdir / 'test_csvs')

loader.data = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
loader.var = pd.DataFrame({"Factor": ["X", "Y", "Z"]})
loader.obs = pd.DataFrame({"Sample": ["S1", "S2", "S3"]})
loader.write_csvs(outdir)

assert (outdir / "data.csv").exists()
assert (outdir / "factors.csv").exists()
assert (outdir / "metadata.csv").exists()

shutil.rmtree(outdir)

def test_write_h5ad(tmpdir):
loader = DataLoader()
outdir = Path(tmpdir / 'test_h5ad')

loader.ad = AnnData(X=np.array([[1, 2], [3, 4]]), obs=pd.DataFrame({"Sample": ["S1", "S2"]}), var=pd.DataFrame({"Factor": ["X", "Y"]}))
loader.write_h5ad(outdir)
assert (outdir / "data.h5ad").exists()

shutil.rmtree(outdir)