Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport release-1.15] [python] Export full experiment to SpatialData #3430

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion apis/python/src/tiledbsoma/io/spatial/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
"""

from .ingest import VisiumPaths, from_visium
from .outgest import to_spatial_data

__all__ = ["from_visium", "VisiumPaths"]
__all__ = ["to_spatial_data", "from_visium", "VisiumPaths"]
224 changes: 221 additions & 3 deletions apis/python/src/tiledbsoma/io/spatial/outgest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Licensed under the MIT License.
import warnings
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Sequence, Tuple, Union

import pandas as pd
import somacore
Expand All @@ -26,8 +26,10 @@
warnings.warn("Experimental spatial outgestor requires the geopandas package.")
raise err

from ... import MultiscaleImage, PointCloudDataFrame
from ..._constants import SOMA_JOINID
from ... import Experiment, MultiscaleImage, PointCloudDataFrame, Scene
from ..._constants import SOMA_JOINID, SPATIAL_DISCLAIMER
from ..._spatial_util import transform_from_json
from .. import to_anndata
from ._xarray_backend import dense_nd_array_to_data_array, images_to_datatree

if TYPE_CHECKING:
Expand Down Expand Up @@ -406,3 +408,219 @@ def to_spatial_data_multiscale_image(
)

return images_to_datatree(image_data_arrays)


def _get_transform_from_collection(
key: str, metadata: Mapping[str, Any]
) -> Optional[somacore.CoordinateTransform]:
transform_key = f"soma_scene_registry_{key}"
if transform_key in metadata:
transform_json = metadata[transform_key]
return transform_from_json(transform_json)
return None


def _add_scene_to_spatial_data(
sdata: sd.SpatialData,
scene_id: str,
scene: Scene,
*,
obs_id_name: str,
var_id_name: str,
measurement_names: Optional[Sequence[str]] = None,
) -> None:
"""Adds items from a Scene to a SpatialData object.

Args:
sdata: SpatialData object to update.
scene_id: Name of the scene that will be added to the SpatialData object.
scene: The scene that is being added to the SpatialData object.
obs_id_name: Name to use for the ``soma_joinid`` in ``obsl``.
var_id_name: Name to use fo the ``soma_joinid in ``varl``.
measurement_names: The names of measurements to export. If ``None``, all
measurements are included. Defaults to ``None``.

"""
# Cannot have spatial data if no coordinate space.
if scene.coordinate_space is None:
return

# Get the map from Scene dimension names to SpatialData dimension names.
input_axis_names = scene.coordinate_space.axis_names
_, scene_dim_map = _convert_axis_names(input_axis_names, input_axis_names)

# Export obsl data to SpatialData.
if "obsl" in scene:
for key, df in scene.obsl.items():
output_key = f"{scene_id}_{key}"
transform = _get_transform_from_collection(key, scene.obsl.metadata)
if isinstance(df, PointCloudDataFrame):
if "soma_geometry" in df.metadata:
sdata.shapes[output_key] = to_spatial_data_shapes(
df,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
soma_joinid_name=obs_id_name,
)
else:
sdata.points[output_key] = to_spatial_data_points(
df,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
soma_joinid_name=obs_id_name,
)
else:
warnings.warn(
f"Skipping obsl[{key}] in Scene {scene_id}; unexpected datatype"
f" {type(df).__name__}."
)

# Export varl data to SpatialData.
if "varl" in scene:
for measurement_name, subcoll in scene.varl.items():
if (
measurement_names is not None
and measurement_name not in measurement_names
):
continue
for key, df in subcoll.items():
output_key = f"{scene_id}_{measurement_name}_{key}"
transform = _get_transform_from_collection(key, subcoll.metadata)
if isinstance(df, PointCloudDataFrame):
if "soma_geometry" in df.metadata:
sdata.shapes[output_key] = to_spatial_data_shapes(
df,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
soma_joinid_name=var_id_name,
)
else:
sdata.points[output_key] = to_spatial_data_points(
df,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
soma_joinid_name=var_id_name,
)
else:
warnings.warn(
f"Skipping varl[{measurement_name}][{key}] in Scene "
f"{scene_id}; unexpected datatype {type(df).__name__}."
)

# Export img data to SpatialData.
if "img" in scene:
for key, image in scene.img.items():
output_key = f"{scene_id}_{key}"
transform = _get_transform_from_collection(key, scene.img.metadata)
if not isinstance(image, MultiscaleImage):
warnings.warn( # type: ignore[unreachable]
f"Skipping img[{image}] in Scene {scene_id}; unexpected "
f"datatype {type(image).__name__}."
)
if image.level_count == 1:
sdata.images[output_key] = to_spatial_data_image(
image,
0,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
)
else:
sdata.images[f"{scene_id}_{key}"] = to_spatial_data_multiscale_image(
image,
key=output_key,
scene_id=scene_id,
scene_dim_map=scene_dim_map,
transform=transform,
)


def to_spatial_data(
experiment: Experiment,
*,
measurement_names: Optional[Sequence[str]] = None,
scene_names: Optional[Sequence[str]] = None,
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
table_kwargs: Optional[Mapping[str, Dict[str, Any]]] = None,
) -> sd.SpatialData:
"""Converts the experiment group to SpatialData format.

Args:
experiment:
measurement_names: The names of measurements to export. If ``None``, all
measurements are included. Defaults to ``None``.
scene_names: The names of the scenes to export. If ``None``, all scenes are
included. Defaults to ``None``.
obs_id_name: Column name to use for ``obs`` dataframes. Defaults to
``"obs_id"``.
var_id_name: Column name to use for ``var`` dataframes. Defaults to
``"var_id|``.
table_kwargs: Optional mapping from measurment name to keyword arguments to
pass to table conversions. See :method:`to_anndata` for possible keyword
arguments.
"""
warnings.warn(SPATIAL_DISCLAIMER)

# Read non-spatial data into Anndata tables.
if "ms" in experiment:
if measurement_names is None:
ms_keys = tuple(experiment.ms.keys())
else:
ms_keys = tuple(measurement_names)
if table_kwargs is None:
table_kwargs = {}
tables = {
measurement_name: (
to_anndata(
experiment,
measurement_name,
obs_id_name=obs_id_name,
var_id_name=var_id_name,
**table_kwargs[measurement_name],
)
if measurement_name in table_kwargs
else to_anndata(
experiment,
measurement_name,
obs_id_name=obs_id_name,
var_id_name=var_id_name,
)
)
for measurement_name in ms_keys
}
else:
tables = {}

sdata = sd.SpatialData(tables=tables)

# If no spatial data, return just the tables.
if "spatial" not in experiment:
return sdata

if scene_names is None:
scene_names = tuple(experiment.spatial.keys())
else:
scene_names = tuple(scene_names)

for scene_id in scene_names:
scene = experiment.spatial[scene_id]
_add_scene_to_spatial_data(
sdata=sdata,
scene_id=scene_id,
scene=scene,
obs_id_name=obs_id_name,
var_id_name=var_id_name,
measurement_names=measurement_names,
)

return sdata
Loading