diff --git a/apis/python/src/tiledbsc/soma_slice.py b/apis/python/src/tiledbsc/soma_slice.py index b9165a6800..e33d17eaa2 100644 --- a/apis/python/src/tiledbsc/soma_slice.py +++ b/apis/python/src/tiledbsc/soma_slice.py @@ -105,17 +105,29 @@ def to_anndata(self) -> ad.AnnData: if isinstance(data, pd.DataFrame): # Make obs_id and var_id accessible as columns. data = data.reset_index() + data = util.X_and_ids_to_sparse_matrix( data, "obs_id", # row_dim_name "var_id", # col_dim_name "value", # attr_name - self.obs.index, - self.var.index, + obs.index, + var.index, ) + if isinstance(data, pa.Table): data = data.to_pandas() data.set_index(["obs_id", "var_id"], inplace=True) + + data = util.X_and_ids_to_sparse_matrix( + data, + "obs_id", # row_dim_name + "var_id", # col_dim_name + "value", # attr_name + obs.index, + var.index, + ) + # We use AnnData as our in-memory storage. For SOMAs, all X layers are arrays within the # soma.X group; for AnnData, the 'data' layer is ann.X and all the others are in # ann.layers. diff --git a/apis/python/tests/test_soco_slice_query.py b/apis/python/tests/test_soco_slice_query.py index 05e9782577..c494a3a2c4 100644 --- a/apis/python/tests/test_soco_slice_query.py +++ b/apis/python/tests/test_soco_slice_query.py @@ -37,33 +37,37 @@ def test_soco_slice_query(tmp_path): var_attrs = ["feature_name"] var_query_string = 'feature_name == "MT-CO3"' - soma_slices = [] - for soma in soco: - # E.g. querying for 'cell_type == "blood"' but this SOMA doesn't have a cell_type column in - # its obs at all. - if not soma.obs.has_attr_names(obs_attrs): - continue - # E.g. querying for 'feature_name == "MT-CO3"' but this SOMA doesn't have a feature_name - # column in its var at all. - if not soma.var.has_attr_names(var_attrs): - continue - - # The return_arrow=True case drives arrow-format all the way through SOMA, SOMASlice, - # obs, var, X, etc. - soma_slice = soma.query( - obs_query_string=obs_query_string, var_query_string=var_query_string - ) - if soma_slice is not None: - soma_slices.append(soma_slice) - - result_soma_slice = tiledbsc.SOMASlice.concat(soma_slices) - assert result_soma_slice is not None - - ann = result_soma_slice.to_anndata() - - assert ann.obs.shape == (400, 17) - assert ann.var.shape == (1, 3) - assert ann.X.shape == (400, 1) + # The return_arrow=True case drives Arrow format all the way through SOMA, SOMASlice, + # obs, var, X, etc. + for return_arrow in [False, True]: + + soma_slices = [] + for soma in soco: + # E.g. querying for 'cell_type == "blood"' but this SOMA doesn't have a cell_type column in + # its obs at all. + if not soma.obs.has_attr_names(obs_attrs): + continue + # E.g. querying for 'feature_name == "MT-CO3"' but this SOMA doesn't have a feature_name + # column in its var at all. + if not soma.var.has_attr_names(var_attrs): + continue + + soma_slice = soma.query( + obs_query_string=obs_query_string, + var_query_string=var_query_string, + return_arrow=return_arrow, + ) + if soma_slice is not None: + soma_slices.append(soma_slice) + + result_soma_slice = tiledbsc.SOMASlice.concat(soma_slices) + assert result_soma_slice is not None + + ann = result_soma_slice.to_anndata() + + assert ann.obs.shape == (400, 17) + assert ann.var.shape == (1, 3) + assert ann.X.shape == (400, 1) def test_soco_slice_query_nans(tmp_path):