Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) [spatial] Prototype ingestion from Visium #2386

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def run(self):
],
extras_require={
"dev": open("requirements_dev.txt").read(),
"spatial": ["tifffile", "pillow"],
},
python_requires=">=3.8",
cmdclass={"build_ext": build_ext, "bdist_wheel": bdist_wheel},
Expand Down
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@

Do NOT merge this into main.
"""

from .ingest import from_visium

Check warning on line 8 in apis/python/src/tiledbsoma/experimental/__init__.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/__init__.py#L8

Added line #L8 was not covered by tests

__all__ = [

Check warning on line 10 in apis/python/src/tiledbsoma/experimental/__init__.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/__init__.py#L10

Added line #L10 was not covered by tests
"from_visium",
]
340 changes: 340 additions & 0 deletions apis/python/src/tiledbsoma/experimental/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
# Copyright (c) 2024 TileDB, Inc,
#
# Licensed under the MIT License.

"""Experimental ingestion methods.

Check warning on line 5 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L5

Added line #L5 was not covered by tests

This module contains experimental methods to generate Spatial SOMA artifacts
start from other formats.

Do NOT merge into main.
"""

import json
import pathlib
from typing import (

Check warning on line 15 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L13-L15

Added lines #L13 - L15 were not covered by tests
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Sequence,
Tuple,
Type,
Union,
)

import numpy as np
import pandas as pd
import pyarrow as pa
from PIL import Image
import scanpy

Check warning on line 31 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L27-L31

Added lines #L27 - L31 were not covered by tests

from .. import Collection, DataFrame, DenseNDArray, Experiment, SparseNDArray
from .._constants import SOMA_JOINID
from .._tiledb_object import AnyTileDBObject
from .._types import IngestMode
from ..io import from_anndata
from ..io.ingest import (

Check warning on line 38 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L33-L38

Added lines #L33 - L38 were not covered by tests
IngestCtx,
IngestionParams,
_create_or_open_collection,
_maybe_set,
_write_dataframe_impl,
)

if TYPE_CHECKING:
from somacore.options import PlatformConfig

Check warning on line 47 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L46-L47

Added lines #L46 - L47 were not covered by tests

from .._types import Path
from ..io._registration import ExperimentAmbientLabelMapping
from ..io.ingeset import AdditionalMetadata
from ..options import SOMATileDBContext

Check warning on line 52 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L49-L52

Added lines #L49 - L52 were not covered by tests


def from_visium(

Check warning on line 55 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L55

Added line #L55 was not covered by tests
experiment_uri: str,
input_path: "Path",
measurement_name: str,
scene_name: str,
*,
context: Optional["SOMATileDBContext"] = None,
platform_config: Optional["PlatformConfig"] = None,
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
X_layer_name: str = "data",
raw_X_layer_name: str = "data",
ingest_mode: IngestMode = "write",
use_relative_uri: Optional[bool] = None,
X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray,
registration_mapping: Optional["ExperimentAmbientLabelMapping"] = None,
uns_keys: Optional[Sequence[str]] = None,
additional_metadata: "AdditionalMetadata" = None,
use_raw_counts: bool = True,
) -> str:
"""Reads a 10x Visium dataset and writes it to an :class:`Experiment`.

This function is for ingesting Visium data for prototyping and testing the
proposed spatial design.

TODO: Args list

WARNING: This was only tested for Space Ranger version 2 output.

Lifecycle:
Experimental
"""

if ingest_mode != "write":
raise NotImplementedError(

Check warning on line 89 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L88-L89

Added lines #L88 - L89 were not covered by tests
f'the only ingest_mode currently supported is "write"; got "{ingest_mode}"'
)

# Get input file locations.
input_path = pathlib.Path(input_path)

Check warning on line 94 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L94

Added line #L94 was not covered by tests

input_gene_expression = (

Check warning on line 96 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L96

Added line #L96 was not covered by tests
input_path / "raw_feature_bc_matrix.h5"
if use_raw_counts
else input_path / "filtered_feature_bc_matrix.h5"
)

# TODO: Generalize - this is hard-coded for Space Ranger version 2
input_tissue_positions = input_path / "spatial/tissue_positions.csv"
input_scale_factors = input_path / "spatial/scalefactors_json.json"

Check warning on line 104 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L103-L104

Added lines #L103 - L104 were not covered by tests

# TODO: Generalize - hard-coded for Space Ranger version 2
input_hires = input_path / "spatial/tissue_hires_image.png"
input_lowres = input_path / "spatial/tissue_lowres_image.png"
input_fullres = None

Check warning on line 109 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L107-L109

Added lines #L107 - L109 were not covered by tests

# Create the
anndata = scanpy.read_10x_h5(input_gene_expression)
uri = from_anndata(

Check warning on line 113 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L112-L113

Added lines #L112 - L113 were not covered by tests
experiment_uri,
anndata,
measurement_name,
context=context,
platform_config=platform_config,
obs_id_name=obs_id_name,
var_id_name=var_id_name,
X_layer_name=X_layer_name,
raw_X_layer_name=raw_X_layer_name,
ingest_mode=ingest_mode,
use_relative_uri=use_relative_uri,
X_kind=X_kind,
registration_mapping=registration_mapping,
uns_keys=uns_keys,
additional_metadata=additional_metadata,
)

ingest_ctx: IngestCtx = {

Check warning on line 131 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L131

Added line #L131 was not covered by tests
"context": context,
"ingestion_params": IngestionParams(ingest_mode, registration_mapping),
"additional_metadata": additional_metadata,
}

# Get JSON scale factors.
with open(input_scale_factors, mode="r", encoding="utf-8") as scale_factors_json:
scale_factors = json.load(scale_factors_json)

Check warning on line 139 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L138-L139

Added lines #L138 - L139 were not covered by tests

# TODO: The `obs_df` should be in dataframe with only soma_joinid and obs_id. Not
# currently bothering to check/enforce this.
with Experiment.open(uri, mode="r", context=context) as experiment:
obs_df = experiment.obs.read().concat().to_pandas()

Check warning on line 144 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L143-L144

Added lines #L143 - L144 were not covered by tests

# Add spatial information to the experiment.
with Experiment.open(uri, mode="w", context=context) as experiment:
spatial_uri = f"{uri}/spatial"
with _create_or_open_collection(

Check warning on line 149 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L147-L149

Added lines #L147 - L149 were not covered by tests
Collection[Collection[AnyTileDBObject]], spatial_uri, **ingest_ctx
) as spatial:
_maybe_set(

Check warning on line 152 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L152

Added line #L152 was not covered by tests
experiment, "spatial", spatial, use_relative_uri=use_relative_uri
)
scene_uri = f"{spatial_uri}/{scene_name}"
with _create_or_open_collection(

Check warning on line 156 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L155-L156

Added lines #L155 - L156 were not covered by tests
Collection[AnyTileDBObject], scene_uri, **ingest_ctx
) as scene:
_maybe_set(

Check warning on line 159 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L159

Added line #L159 was not covered by tests
spatial, scene_name, scene, use_relative_uri=use_relative_uri
)

obs_locations_uri = f"{scene_uri}/obs_locations"

Check warning on line 163 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L163

Added line #L163 was not covered by tests

# Write spot data and add to the scene.
with _write_visium_spot_dataframe(

Check warning on line 166 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L166

Added line #L166 was not covered by tests
obs_locations_uri,
input_tissue_positions,
scale_factors,
obs_df,
obs_id_name,
**ingest_ctx,
) as obs_locations:
_maybe_set(

Check warning on line 174 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L174

Added line #L174 was not covered by tests
scene,
"obs_locations",
obs_locations,
use_relative_uri=use_relative_uri,
)

# Write image data and add to the scene.
images_uri = f"{scene_uri}/images"
with _write_visium_images(

Check warning on line 183 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L182-L183

Added lines #L182 - L183 were not covered by tests
images_uri,
scale_factors,
input_hires=input_hires,
input_lowres=input_lowres,
input_fullres=input_fullres,
use_relative_uri=use_relative_uri,
**ingest_ctx,
) as images:
_maybe_set(

Check warning on line 192 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L192

Added line #L192 was not covered by tests
scene, "images", images, use_relative_uri=use_relative_uri
)
return uri

Check warning on line 195 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L195

Added line #L195 was not covered by tests


def _write_visium_spot_dataframe(

Check warning on line 198 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L198

Added line #L198 was not covered by tests
df_uri: str,
input_tissue_positions: pathlib.Path,
scale_factors: Dict[str, Any],
obs_df: pd.DataFrame,
id_column_name: str,
*,
ingestion_params: IngestionParams,
additional_metadata: "AdditionalMetadata" = None,
platform_config: Optional["PlatformConfig"] = None,
context: Optional["SOMATileDBContext"] = None,
) -> DataFrame:
"""TODO: Add _write_visium_spot_dataframe docs"""
# Create the
spot_radius = 0.5 * scale_factors["spot_diameter_fullres"]
df = (

Check warning on line 213 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L212-L213

Added lines #L212 - L213 were not covered by tests
pd.read_csv(input_tissue_positions)
.rename(
columns={
"barcode": id_column_name,
"pxl_col_in_fullres": "y",
"pxl_row_in_fullres": "x",
}
)
.assign(_soma_geometry=np.double(spot_radius))
)

df = pd.merge(obs_df, df, how="inner", on=id_column_name)
return _write_dataframe_impl(

Check warning on line 226 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L225-L226

Added lines #L225 - L226 were not covered by tests
df,
df_uri,
id_column_name,
ingestion_params=ingestion_params,
additional_metadata=additional_metadata,
index_column_names=("y", "x", SOMA_JOINID),
platform_config=platform_config,
context=context,
)


def _write_visium_images(

Check warning on line 238 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L238

Added line #L238 was not covered by tests
uri: str,
scale_factors: Dict[str, Any],
*,
input_hires: Optional[pathlib.Path],
input_lowres: Optional[pathlib.Path],
input_fullres: Optional[pathlib.Path],
ingestion_params: IngestionParams,
additional_metadata: "AdditionalMetadata" = None,
platform_config: Optional["PlatformConfig"] = None,
context: Optional["SOMATileDBContext"] = None,
use_relative_uri: Optional[bool] = None,
) -> Collection[DenseNDArray]:
input_images: Dict[str, Tuple[pathlib.Path, List[float]]] = {}
if input_fullres is not None:
input_images["fullres"] = (input_fullres, [1.0, 1.0, 1.0])
if input_hires is not None:
scale = 1.0 / scale_factors["tissue_hires_scalef"]
input_images["hires"] = (input_hires, [1.0, scale, scale])
if input_lowres is not None:
scale = 1.0 / scale_factors["tissue_lowres_scalef"]
input_images["lowres"] = (input_lowres, [1.0, scale, scale])
axes_metadata = [

Check warning on line 260 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L251-L260

Added lines #L251 - L260 were not covered by tests
{"name": "c", "type": "channel"},
{"name": "y", "type": "space", "unit": "micrometer"},
{"name": "x", "type": "space", "unit": "micrometer"},
]
return _write_multiscale_images(

Check warning on line 265 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L265

Added line #L265 was not covered by tests
uri,
input_images,
axes_metadata=axes_metadata,
ingestion_params=ingestion_params,
additional_metadata=additional_metadata,
platform_config=platform_config,
context=context,
use_relative_uri=use_relative_uri,
)


def _write_multiscale_images(

Check warning on line 277 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L277

Added line #L277 was not covered by tests
uri: str,
input_images: Dict[str, Tuple[pathlib.Path, List[float]]],
*,
axes_metadata: List[Dict[str, str]],
ingestion_params: IngestionParams,
additional_metadata: "AdditionalMetadata" = None,
platform_config: Optional["PlatformConfig"] = None,
context: Optional["SOMATileDBContext"] = None,
use_relative_uri: Optional[bool] = None,
) -> Collection[DenseNDArray]:
"""TODO: Write full docs for this function

TODO: Need to add in collection level metadata. In this case it will be

"""
collection = _create_or_open_collection(

Check warning on line 293 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L293

Added line #L293 was not covered by tests
Collection[DenseNDArray],
uri,
ingestion_params=ingestion_params,
additional_metadata=additional_metadata,
context=context,
)
datasets_metadata = []
for image_name, (image_path, image_scales) in input_images.items():
datasets_metadata.append(

Check warning on line 302 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L300-L302

Added lines #L300 - L302 were not covered by tests
{
"path": image_name,
"coordinateTransforms": [{"type": "scale", "scale": image_scales}],
}
)
image_uri = f"{uri}/{image_name}"

Check warning on line 308 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L308

Added line #L308 was not covered by tests

# TODO: Need to create new imaging type with dimensions 'c', 'y', 'x'
im = np.transpose(np.array(Image.open(image_path)), (2, 0, 1))
image_array = DenseNDArray.create(

Check warning on line 312 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L311-L312

Added lines #L311 - L312 were not covered by tests
image_uri,
type=pa.from_numpy_dtype(im.dtype),
shape=im.shape,
platform_config=platform_config,
context=context,
)
tensor = pa.Tensor.from_numpy(im)
image_array.write(

Check warning on line 320 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L319-L320

Added lines #L319 - L320 were not covered by tests
(slice(None), slice(None), slice(None)),
tensor,
platform_config=platform_config,
)
_maybe_set(

Check warning on line 325 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L325

Added line #L325 was not covered by tests
collection, image_name, image_array, use_relative_uri=use_relative_uri
)
metadata_blob = json.dumps(

Check warning on line 328 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L328

Added line #L328 was not covered by tests
{
"multiscales": [
{
"version": "0.1.0-dev",
"name": "visium-example",
"datasets": datasets_metadata,
}
]
}
)
collection.metadata.update({"multiscales": metadata_blob})
return collection

Check warning on line 340 in apis/python/src/tiledbsoma/experimental/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/experimental/ingest.py#L339-L340

Added lines #L339 - L340 were not covered by tests
2 changes: 2 additions & 0 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,7 @@ def _write_dataframe_impl(
*,
ingestion_params: IngestionParams,
additional_metadata: AdditionalMetadata = None,
index_column_names: Sequence[str] = (SOMA_JOINID,),
original_index_name: Optional[str] = None,
platform_config: Optional[PlatformConfig] = None,
context: Optional[SOMATileDBContext] = None,
Expand Down Expand Up @@ -1198,6 +1199,7 @@ def _write_dataframe_impl(
soma_df = DataFrame.create(
df_uri,
schema=arrow_table.schema,
index_column_names=index_column_names,
platform_config=platform_config,
context=context,
)
Expand Down
Loading