diff --git a/apis/python/testdata/categorical_int_nan.h5ad b/apis/python/testdata/categorical_int_nan.h5ad new file mode 100644 index 0000000000..f8b5af115f Binary files /dev/null and b/apis/python/testdata/categorical_int_nan.h5ad differ diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index fc0eb32c83..93f64ecfdd 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -1,4 +1,3 @@ -import math import pathlib import tempfile from pathlib import Path @@ -39,6 +38,24 @@ def h5ad_file_uns_string_array(request): return input_path +@pytest.fixture +def h5ad_file_categorical_int_nan(request): + # This has obs["categ_int_nan"] as a categorical int but with math.nan as a + # "not-in-the-category" indicator. Such H5AD files do arise in the wild. + # + # Reference: + # import anndata as ad + # import pandas as pd + # import math + # adata = adata.read_h5ad("whatever.h5ad") + # s = pd.Series(list(range(80)), dtype="category") + # s[0] = math.nan + # adata.obs["categ_int_nan"] = s + # adata.write_h5ad("categorical_int_nan.h5ad") + input_path = HERE.parent / "testdata/categorical_int_nan.h5ad" + return input_path + + @pytest.fixture def adata(h5ad_file): return anndata.read_h5ad(h5ad_file) @@ -482,13 +499,11 @@ def test_null_obs(adata, tmp_path: Path): # There exist in the wild AnnData files with categorical-int columns where the "not in the category" # is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest # this. -def test_obs_with_categorical_int_nan_enumeration(tmp_path, adata): +def test_obs_with_categorical_int_nan_enumeration( + tmp_path, h5ad_file_categorical_int_nan +): output_path = tmp_path.as_uri() - # Currently getting float not int here, failing to repro the problem - s = pd.Series(list(range(len(adata.obs)))) - s[0] = math.nan - adata.obs["categ_int_nan"] = s - - output_path = tmp_path.as_posix() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") + tiledbsoma.io.from_h5ad( + output_path, h5ad_file_categorical_int_nan, measurement_name="RNA" + )