Skip to content

Commit

Permalink
used pre-prepared input for categorical-int-nan data
Browse files Browse the repository at this point in the history
  • Loading branch information
John Kerl committed Aug 18, 2023
1 parent e0846eb commit 22368cf
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
Binary file added apis/python/testdata/categorical_int_nan.h5ad
Binary file not shown.
33 changes: 24 additions & 9 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import math
import pathlib
import tempfile
from pathlib import Path
Expand Down Expand Up @@ -39,6 +38,24 @@ def h5ad_file_uns_string_array(request):
return input_path


@pytest.fixture
def h5ad_file_categorical_int_nan(request):
# This has obs["categ_int_nan"] as a categorical int but with math.nan as a
# "not-in-the-category" indicator. Such H5AD files do arise in the wild.
#
# Reference:
# import anndata as ad
# import pandas as pd
# import math
# adata = adata.read_h5ad("whatever.h5ad")
# s = pd.Series(list(range(80)), dtype="category")
# s[0] = math.nan
# adata.obs["categ_int_nan"] = s
# adata.write_h5ad("categorical_int_nan.h5ad")
input_path = HERE.parent / "testdata/categorical_int_nan.h5ad"
return input_path


@pytest.fixture
def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)
Expand Down Expand Up @@ -482,13 +499,11 @@ def test_null_obs(adata, tmp_path: Path):
# There exist in the wild AnnData files with categorical-int columns where the "not in the category"
# is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest
# this.
def test_obs_with_categorical_int_nan_enumeration(tmp_path, adata):
def test_obs_with_categorical_int_nan_enumeration(
tmp_path, h5ad_file_categorical_int_nan
):
output_path = tmp_path.as_uri()

# Currently getting float not int here, failing to repro the problem
s = pd.Series(list(range(len(adata.obs))))
s[0] = math.nan
adata.obs["categ_int_nan"] = s

output_path = tmp_path.as_posix()
tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")
tiledbsoma.io.from_h5ad(
output_path, h5ad_file_categorical_int_nan, measurement_name="RNA"
)

0 comments on commit 22368cf

Please sign in to comment.