Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 27, 2024
2 parents 8c5b2af + 9431cf3 commit b56d07a
Show file tree
Hide file tree
Showing 56 changed files with 1,849 additions and 515 deletions.
20 changes: 16 additions & 4 deletions apis/python/src/tiledbsoma/io/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,31 @@
# Licensed under the MIT License.

"""Common constants and types used during ingestion/outgestion."""

from typing import Any, Mapping, Union
from typing import Dict, List, Mapping, Optional, Union

import h5py
import numpy as np
import pandas as pd
import scipy.sparse as sp
from anndata._core.sparse_dataset import SparseDataset

from tiledbsoma._types import NPNDArray
from tiledbsoma._types import Metadatum, NPNDArray

SparseMatrix = Union[sp.csr_matrix, sp.csc_matrix, SparseDataset]
DenseMatrix = Union[NPNDArray, h5py.Dataset]
Matrix = Union[DenseMatrix, SparseMatrix]
UnsMapping = Mapping[str, Any]

UnsScalar = Union[str, int, float, np.generic]
# TODO: support sparse matrices in `uns`
UnsLeaf = Union[UnsScalar, List[UnsScalar], pd.DataFrame, NPNDArray]
UnsNode = Union[UnsLeaf, Mapping[str, "UnsNode"]]
UnsMapping = Mapping[str, UnsNode]
# Specialize `UnsNode` to `Dict` instead of `Mapping`
# `Mapping` doesn't expose `__set__`, so this is useful for building `uns` dictionaries.
UnsDictNode = Union[UnsLeaf, Dict[str, "UnsDictNode"]]
UnsDict = Dict[str, UnsDictNode]

AdditionalMetadata = Optional[Dict[str, Metadatum]]

# Arrays of strings from AnnData's uns are stored in SOMA as SOMADataFrame,
# since SOMA ND arrays are necessarily arrays *of numbers*. This is okay since
Expand Down
87 changes: 55 additions & 32 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
_INGEST_MODES,
INGEST_MODES,
IngestMode,
Metadatum,
NPNDArray,
Path,
_IngestMode,
Expand All @@ -89,9 +88,11 @@
_UNS_OUTGEST_HINT_1D,
_UNS_OUTGEST_HINT_2D,
_UNS_OUTGEST_HINT_KEY,
AdditionalMetadata,
Matrix,
SparseMatrix,
UnsMapping,
UnsNode,
)
from ._registration import (
AxisIDMapping,
Expand All @@ -107,9 +108,6 @@
_TDBO = TypeVar("_TDBO", bound=SOMAObject[RawHandle])


AdditionalMetadata = Optional[Dict[str, Metadatum]]


def add_metadata(obj: SOMAObject[Any], additional_metadata: AdditionalMetadata) -> None:
if additional_metadata:
obj.verify_open_for_writing()
Expand Down Expand Up @@ -1184,6 +1182,10 @@ def _write_dataframe_impl(
platform_config: Optional[PlatformConfig] = None,
context: Optional[SOMATileDBContext] = None,
) -> DataFrame:
"""Save a Pandas DataFrame as a SOMA DataFrame.
Expects the required ``soma_joinid`` index to have already been added to the ``pd.DataFrame``.
"""
s = _util.get_start_stamp()
logging.log_io(None, f"START WRITING {df_uri}")

Expand Down Expand Up @@ -2485,7 +2487,7 @@ def _maybe_ingest_uns(
def _ingest_uns_dict(
parent: AnyTileDBCollection,
parent_key: str,
dct: Mapping[str, object],
dct: UnsMapping,
*,
platform_config: Optional[PlatformConfig],
context: Optional[SOMATileDBContext],
Expand Down Expand Up @@ -2524,9 +2526,9 @@ def _ingest_uns_dict(


def _ingest_uns_node(
coll: Any,
key: Any,
value: Any,
coll: AnyTileDBCollection,
key: str,
value: UnsNode,
*,
platform_config: Optional[PlatformConfig],
context: Optional[SOMATileDBContext],
Expand Down Expand Up @@ -2579,30 +2581,13 @@ def _ingest_uns_node(
if isinstance(value, list) or "numpy" in str(type(value)):
value = np.asarray(value)
if isinstance(value, np.ndarray):
if value.dtype.names is not None:
msg = f"Skipped {coll.uri}[{key!r}]" " (uns): unsupported structured array"
# This is a structured array, which we do not support.
logging.log_io(msg, msg)
return

if value.dtype.char in ("U", "O"):
# In the wild it's quite common to see arrays of strings in uns data.
# Frequent example: uns["louvain_colors"].
_ingest_uns_string_array(
coll,
key,
value,
use_relative_uri=use_relative_uri,
**ingest_platform_ctx,
)
else:
_ingest_uns_ndarray(
coll,
key,
value,
use_relative_uri=use_relative_uri,
**ingest_platform_ctx,
)
_ingest_uns_array(
coll,
key,
value,
use_relative_uri=use_relative_uri,
ingest_platform_ctx=ingest_platform_ctx,
)
return

msg = (
Expand All @@ -2611,6 +2596,44 @@ def _ingest_uns_node(
logging.log_io(msg, msg)


def _ingest_uns_array(
coll: AnyTileDBCollection,
key: str,
value: NPNDArray,
use_relative_uri: Optional[bool],
ingest_platform_ctx: IngestPlatformCtx,
) -> None:
"""Ingest an uns Numpy array.
Delegates to :func:`_ingest_uns_string_array` for string arrays, and to
:func:`_ingest_uns_ndarray` for numeric arrays.
"""
if value.dtype.names is not None:
# This is a structured array, which we do not support.
logging.log_io_same(
f"Skipped {coll.uri}[{key!r}]" " (uns): unsupported structured array"
)

if value.dtype.char in ("U", "O"):
# In the wild it's quite common to see arrays of strings in uns data.
# Frequent example: uns["louvain_colors"].
_ingest_uns_string_array(
coll,
key,
value,
use_relative_uri=use_relative_uri,
**ingest_platform_ctx,
)
else:
_ingest_uns_ndarray(
coll,
key,
value,
use_relative_uri=use_relative_uri,
**ingest_platform_ctx,
)


def _ingest_uns_string_array(
coll: AnyTileDBCollection,
key: str,
Expand Down
5 changes: 3 additions & 2 deletions apis/python/src/tiledbsoma/io/outgest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
_UNS_OUTGEST_HINT_2D,
_UNS_OUTGEST_HINT_KEY,
Matrix,
UnsDict,
UnsMapping,
)

Expand Down Expand Up @@ -432,12 +433,12 @@ def _extract_uns(
collection: Collection[Any],
uns_keys: Optional[Sequence[str]] = None,
level: int = 0,
) -> UnsMapping:
) -> UnsDict:
"""
This is a helper function for ``to_anndata`` of ``uns`` elements.
"""

extracted: Dict[str, Any] = {}
extracted: UnsDict = {}
for key, element in collection.items():
if level == 0 and uns_keys is not None and key not in uns_keys:
continue
Expand Down
Loading

0 comments on commit b56d07a

Please sign in to comment.