Skip to content

Commit

Permalink
[python] Add to_spatial_data method to ExperimentAxisQuery (#3431)
Browse files Browse the repository at this point in the history
Adds a `to_spatial_data` method to `ExperimentAxisQuery` that exports the data from the query to a `SpatialData` object.
  • Loading branch information
jp-dark authored Dec 13, 2024
1 parent 382d691 commit 8d92cb5
Show file tree
Hide file tree
Showing 2 changed files with 438 additions and 5 deletions.
102 changes: 97 additions & 5 deletions apis/python/src/tiledbsoma/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""Implementation of a SOMA Experiment.
"""
import enum
import warnings
from concurrent.futures import ThreadPoolExecutor
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -54,6 +55,7 @@

if TYPE_CHECKING:
from ._experiment import Experiment
from ._constants import SPATIAL_DISCLAIMER
from ._fastercsx import CompressedMatrix
from ._measurement import Measurement
from ._sparse_nd_array import SparseNDArray
Expand Down Expand Up @@ -407,9 +409,14 @@ def obs_scene_ids(self) -> pa.Array:
try:
obs_scene = self.experiment.obs_spatial_presence
except KeyError as ke:
raise KeyError("Missing obs_scene") from ke
raise KeyError(
"No obs_spatial_presence dataframe in this experiment."
) from ke
if not isinstance(obs_scene, DataFrame):
raise TypeError("obs_scene must be a dataframe.")
raise TypeError(
f"obs_spatial_presence must be a dataframe; got "
f"{type(obs_scene).__name__}."
)

full_table = obs_scene.read(
coords=((Axis.OBS.getattr_from(self._joinids), slice(None))),
Expand All @@ -428,12 +435,18 @@ def var_scene_ids(self) -> pa.Array:
try:
var_scene = self._ms.var_spatial_presence
except KeyError as ke:
raise KeyError("Missing var_scene") from ke
raise KeyError(
f"No var_spatial_presence dataframe in measurement "
f"'{self.measurement_name}'."
) from ke
if not isinstance(var_scene, DataFrame):
raise TypeError("var_scene must be a dataframe.")
raise TypeError(
f"var_spatial_presence must be a dataframe; got "
f"{type(var_scene).__name__}."
)

full_table = var_scene.read(
coords=((Axis.OBS.getattr_from(self._joinids), slice(None))),
coords=((Axis.VAR.getattr_from(self._joinids), slice(None))),
result_order=ResultOrder.COLUMN_MAJOR,
value_filter="data != 0",
).concat()
Expand Down Expand Up @@ -473,6 +486,85 @@ def to_anndata(

return ad

def to_spatial_data( # type: ignore[no-untyped-def]
self,
X_name: str,
*,
column_names: Optional[AxisColumnNames] = None,
X_layers: Sequence[str] = (),
obsm_layers: Sequence[str] = (),
obsp_layers: Sequence[str] = (),
varm_layers: Sequence[str] = (),
varp_layers: Sequence[str] = (),
drop_levels: bool = False,
scene_presence_mode: str = "obs",
):
"""Returns a SpatialData object containing the query results
This is a low-level routine intended to be used by loaders for other
in-core formats, such as AnnData, which can be created from the
resulting objects.
Args:
X_name: The X layer to read and return in the ``X`` slot.
column_names: The columns in the ``var`` and ``obs`` dataframes
to read.
X_layers: Additional X layers to read and return in the ``layers`` slot.
obsm_layers: Additional obsm layers to read and return in the obsm slot.
obsp_layers: Additional obsp layers to read and return in the obsp slot.
varm_layers: Additional varm layers to read and return in the varm slot.
varp_layers: Additional varp layers to read and return in the varp slot.
drop_levels: If ``True`` remove unused categories from the AnnData table
with the measurement data.
scene_presence_mode: Method for determining what scenes to return data
from. Valid options are ``obs`` (use ``obs_spatial_presence``
dataframe) and ``var`` (use ``var_spatial_presence`` dataframe).
Defaults to ``obs``.
"""

from spatialdata import SpatialData

from .io.spatial.outgest import _add_scene_to_spatial_data

warnings.warn(SPATIAL_DISCLAIMER)

# Get a list of scenes to add to SpatialData object.
if scene_presence_mode == "obs":
scene_ids = self.obs_scene_ids()
elif scene_presence_mode == "var":
scene_ids = self.var_scene_ids()
else:
raise ValueError(
f"Invalid scene presence mode '{scene_presence_mode}'. Valid options "
f"are 'obs' and 'var'."
)

# Get the anndata table.
ad = self.to_anndata(
X_name,
column_names=column_names,
X_layers=X_layers,
obsm_layers=obsm_layers,
obsp_layers=obsp_layers,
varm_layers=varm_layers,
varp_layers=varp_layers,
drop_levels=drop_levels,
)
sdata = SpatialData(tables={self.measurement_name: ad})

for scene_id in scene_ids:
scene = self.experiment.spatial[str(scene_id)]
_add_scene_to_spatial_data(
sdata,
scene_id=str(scene_id),
scene=scene,
obs_id_name="soma_joinid",
var_id_name="soma_joinid",
measurement_names=(self.measurement_name,),
)

return sdata

# Context management

def __enter__(self) -> Self:
Expand Down
Loading

0 comments on commit 8d92cb5

Please sign in to comment.