Skip to content

Commit

Permalink
unit testing [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Dec 6, 2024
1 parent 04885fa commit 483e339
Showing 1 changed file with 100 additions and 16 deletions.
116 changes: 100 additions & 16 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import anndata
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
import scipy
import somacore
Expand Down Expand Up @@ -1349,20 +1350,103 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
)


# TODO:
# * Make adata_under with nobs < tiledbsoma/io/conversions.STRING_DECAT_THRESHOLD
# * Make adata_over with nobs > tiledbsoma/io/conversions.STRING_DECAT_THRESHOLD
# * Ingest adata_under solo
# o Assert is-enum in the AnnData obs
# o Assert is-enum in the soma obs
# * Ingest adata_over solo
# o Assert is-enum in the AnnData obs
# o Assert not-enum in the soma obs
# * Ingest under then append over
# o Assert is-enum in the soma obs
# o Check levels and data
# * Ingest over then append under
# o Assert is-string in the soma obs
# o Check data
## TODO:
## * Make adata_under with nobs < tiledbsoma/io/conversions.STRING_DECAT_THRESHOLD
## * Make adata_over with nobs > tiledbsoma/io/conversions.STRING_DECAT_THRESHOLD
## * Ingest adata_under solo
## o Assert is-enum in the AnnData obs
## o Assert is-enum in the soma obs
## * Ingest adata_over solo
## o Assert is-enum in the AnnData obs
## o Assert not-enum in the soma obs
## * Ingest under then append over
## o Assert is-enum in the soma obs
## o Check levels and data
## * Ingest over then append under
## o Assert is-string in the soma obs
## o Check data
def test_decat_append(tmp_path):
pass
nobs_under = tiledbsoma.io.conversions.STRING_DECAT_THRESHOLD - 2
nobs_over = tiledbsoma.io.conversions.STRING_DECAT_THRESHOLD + 2
nvar = 100

obs_ids_under = [f"under_{e:08}" for e in range(nobs_under)]
obs_ids_over = [f"over_{e:08}" for e in range(nobs_over)]
var_ids = [f"gene_{e:08}" for e in range(nvar)]

enum_values_under = [f"enum_u_{e:06}" for e in range(nobs_under)]
enum_values_over = [f"enum_o_{e:06}" for e in range(nobs_over)]

obs_under = pd.DataFrame(
data={
"obs_id": np.asarray(obs_ids_under),
"is_primary_data": np.asarray([True] * nobs_under),
"myenum": pd.Series(np.asarray(enum_values_under), dtype="category"),
}
)
obs_under.set_index("obs_id", inplace=True)

obs_over = pd.DataFrame(
data={
"obs_id": np.asarray(obs_ids_over),
"is_primary_data": np.asarray([True] * nobs_over),
"myenum": pd.Series(np.asarray(enum_values_over), dtype="category"),
}
)
obs_over.set_index("obs_id", inplace=True)

var = pd.DataFrame(
data={
"var_id": np.asarray(var_ids),
"mybool": np.asarray([True] * nvar),
}
)
var.set_index("var_id", inplace=True)

X_under = scipy.sparse.random(
nobs_under, nvar, density=0.1, dtype=np.float64
).tocsr()
X_over = scipy.sparse.random(nobs_over, nvar, density=0.1, dtype=np.float64).tocsr()

adata_under = anndata.AnnData(
X=X_under, obs=obs_under, var=var, dtype=X_under.dtype
)
adata_over = anndata.AnnData(X=X_over, obs=obs_over, var=var, dtype=X_over.dtype)

path_under = (tmp_path / "under").as_posix()
path_over = (tmp_path / "over").as_posix()

tiledbsoma.io.from_anndata(path_under, adata_under, "RNA")
tiledbsoma.io.from_anndata(path_over, adata_over, "RNA")

with tiledbsoma.Experiment.open(path_under) as exp_under:
assert pa.types.is_dictionary(exp_under.obs.schema.field("myenum").type)
obs_table = exp_under.obs.read().concat()
assert obs_table.column("myenum").to_pylist() == enum_values_under

with tiledbsoma.Experiment.open(path_over) as exp_over:
assert not pa.types.is_dictionary(exp_over.obs.schema.field("myenum").type)
obs_table = exp_over.obs.read().concat()
assert obs_table.column("myenum").to_pylist() == enum_values_over

rd_under_over = tiledbsoma.io.register_anndatas(
experiment_uri=path_under,
adatas=[adata_over],
measurement_name="RNA",
obs_field_name="obs_id",
var_field_name="var_id",
)

rd_over_under = tiledbsoma.io.register_anndatas(
experiment_uri=path_over,
adatas=[adata_under],
measurement_name="RNA",
obs_field_name="obs_id",
var_field_name="var_id",
)

# XXX OVER THEN UNDER
# XXX PRE-ASSERTS AND POST-ASSERTS

# XXX UNDER THEN OVER
# XXX PRE-ASSERTS AND POST-ASSERTS

0 comments on commit 483e339

Please sign in to comment.