From ae1474df946f4a84a7d6d836401a7d187dd7d788 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Thu, 14 Sep 2023 18:39:57 -0500 Subject: [PATCH] pre-commit --- apis/python/src/tiledbsoma/_dataframe.py | 11 --- apis/python/tests/test_basic_anndata_io.py | 78 +--------------------- apis/python/tests/test_dataframe.py | 24 ------- 3 files changed, 2 insertions(+), 111 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index c3e2cd9def..8800184cd1 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -283,17 +283,6 @@ def enumeration(self, name: str) -> Tuple[Any, ...]: def column_to_enumeration(self, name: str) -> str: return str(self._soma_reader().get_enum_label_on_attr(name)) - def enumeration(self, name: str) -> Tuple[Any, ...]: - """Doc place holder. - - Returns: - Tuple[Any, ...]: _description_ - """ - return tuple(self._soma_reader().get_enum(name)) - - def column_to_enumeration(self, name: str) -> str: - return str(self._soma_reader().get_enum_label_on_attr(name)) - def __len__(self) -> int: """Returns the number of rows in the dataframe. Same as ``df.count``.""" return self.count diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 7318cbef54..14a400fae1 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -76,24 +76,6 @@ def h5ad_file_X_none(request): return input_path -@pytest.fixture -def h5ad_file_categorical_int_nan(request): - # This has obs["categ_int_nan"] as a categorical int but with math.nan as a - # "not-in-the-category" indicator. Such H5AD files do arise in the wild. - # - # Reference: - # import anndata as ad - # import pandas as pd - # import math - # adata = adata.read_h5ad("whatever.h5ad") - # s = pd.Series(list(range(80)), dtype="category") - # s[0] = math.nan - # adata.obs["categ_int_nan"] = s - # adata.write_h5ad("categorical_int_nan.h5ad") - input_path = HERE.parent / "testdata/categorical_int_nan.h5ad" - return input_path - - @pytest.fixture def adata(h5ad_file): return anndata.read_h5ad(h5ad_file) @@ -749,7 +731,8 @@ def test_X_none(h5ad_file_X_none): assert exp.obs.count == 2638 assert exp.ms["RNA"].var.count == 1838 assert list(exp.ms["RNA"].X.keys()) == [] - + + # There exist in the wild AnnData files with categorical-int columns where the "not in the category" # is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest # this. @@ -761,60 +744,3 @@ def test_obs_with_categorical_int_nan_enumeration( tiledbsoma.io.from_h5ad( output_path, h5ad_file_categorical_int_nan, measurement_name="RNA" ) - - -def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path): - adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix()) - assert 1 == 1 - - # This data file is prepared such that obsm["X_pca"] has shape (2638, 50) - # but its [0][0] element is a 0, so when it's stored as sparse, its nnz - # is not 2638*50=131900. - ado = adata.obsm["X_pca"] - assert ado.shape == (2638, 50) - - output_path = tmp_path.as_posix() - tiledbsoma.io.from_anndata(output_path, adata, "RNA") - - exp = tiledbsoma.Experiment.open(output_path) - - # Verify the bounding box on the SOMA SparseNDArray - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so: - assert so.meta["soma_dim_0_domain_lower"] == 0 - assert so.meta["soma_dim_0_domain_upper"] == 2637 - assert so.meta["soma_dim_1_domain_lower"] == 0 - assert so.meta["soma_dim_1_domain_upper"] == 49 - - # With the bounding box present, all is well for outgest to AnnData format. - try1 = tiledbsoma.io.to_anndata(exp, "RNA") - assert try1.obsm["X_pca"].shape == (2638, 50) - - # Now remove the bounding box to simulate reading older data that lacks a bounding box. - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri, "w") as so: - del so.meta["soma_dim_0_domain_lower"] - del so.meta["soma_dim_0_domain_upper"] - del so.meta["soma_dim_1_domain_lower"] - del so.meta["soma_dim_1_domain_upper"] - - # Re-open to simulate opening afresh a bounding-box-free array. - exp = tiledbsoma.Experiment.open(output_path) - - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so: - with pytest.raises(KeyError): - so.meta["soma_dim_0_domain_lower"] - with pytest.raises(KeyError): - so.meta["soma_dim_0_domain_upper"] - with pytest.raises(KeyError): - so.meta["soma_dim_1_domain_lower"] - with pytest.raises(KeyError): - so.meta["soma_dim_1_domain_upper"] - assert so.meta["soma_object_type"] == "SOMASparseNDArray" - - # Now try the remaining options for outgest. - with pytest.raises(tiledbsoma.SOMAError): - tiledbsoma.io.to_anndata(exp, "RNA") - - try3 = tiledbsoma.io.to_anndata( - exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}} - ) - assert try3.obsm["X_pca"].shape == (2638, 50) diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py index 258a97a08a..3d3ee429d7 100644 --- a/apis/python/tests/test_dataframe.py +++ b/apis/python/tests/test_dataframe.py @@ -146,30 +146,6 @@ def test_dataframe_with_enumeration(tmp_path): assert sdf.enumeration("bar") == enums["enmr2"] -def test_dataframe_with_enumeration(tmp_path): - schema = pa.schema( - [ - pa.field("foo", pa.dictionary(pa.int64(), pa.large_string())), - pa.field("bar", pa.dictionary(pa.int64(), pa.large_string())), - ] - ) - enums = {"enmr1": ("a", "bb", "ccc"), "enmr2": ("cat", "dog")} - - with soma.DataFrame.create( - tmp_path.as_posix(), - schema=schema, - enumerations=enums, - column_to_enumerations={"foo": "enmr1", "bar": "enmr2"}, - ) as sdf: - data = {} - data["soma_joinid"] = [0, 1, 2, 3, 4] - data["foo"] = [2, 1, 2, 1, 0] - data["bar"] = [0, 1, 1, 0, 1] - sdf.write(pa.Table.from_pydict(data)) - assert sdf.enumeration("foo") == enums["enmr1"] - assert sdf.enumeration("bar") == enums["enmr2"] - - @pytest.fixture def simple_data_frame(tmp_path): """