Skip to content

Commit

Permalink
Addition Of DataFrameWrapper
Browse files Browse the repository at this point in the history
* When opening a `DataFrame` in read-mode, use `DataFrameWrapper` which
  wraps around `clib.SOMADataFrame`. Otherwise, `DataFrame` should
  use the already existing write-path with `ArrayWrapper` which wraps
  around a TileDB-Py Array
* Necessary changes to `_dataframe.py` to support the read-path already
  exist on another branch. That branch will be merged into this PR
  shortly
  • Loading branch information
nguyenv committed Oct 16, 2023
1 parent e63989a commit 948fbc8
Show file tree
Hide file tree
Showing 13 changed files with 330 additions and 669 deletions.
1 change: 1 addition & 0 deletions apis/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[build-system]
requires = [
"pyarrow",
"pybind11[global]>=2.10.0",
"setuptools>=65.5.1",
"wheel>=0.37.1",
Expand Down
35 changes: 24 additions & 11 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import sys
from typing import Optional

import pyarrow as pa
import setuptools
import setuptools.command.build_ext
import wheel.bdist_wheel
Expand Down Expand Up @@ -192,6 +193,7 @@ def run(self):
), # since pytiledbsoma.cc does #include of query_condition.cc
str(libtiledbsoma_dir.parent / "build/externals/install/include"),
str(tiledb_dir / "include"),
pa.get_include(),
]

LIB_DIRS = [
Expand Down Expand Up @@ -255,7 +257,11 @@ def run(self):
ext_modules=[
Pybind11Extension(
"tiledbsoma.pytiledbsoma",
["src/tiledbsoma/pytiledbsoma.cc"],
[
"src/tiledbsoma/soma_array.cc",
"src/tiledbsoma/soma_dataframe.cc",
"src/tiledbsoma/pytiledbsoma.cc",
],
include_dirs=INC_DIRS,
library_dirs=LIB_DIRS,
libraries=["tiledbsoma"],
Expand All @@ -269,22 +275,29 @@ def run(self):
install_requires=[
# Needed for Python 3.7 which anndata 0.9 doesn't support but we do
"anndata < 0.9; python_version<'3.8'",
# Tracked in https://github.com/single-cell-data/TileDB-SOMA/issues/1785
"anndata != 0.10.0; python_version>='3.8'",
"anndata; python_version>='3.8'",
"attrs>=22.2",
"numba~=0.58.0; python_version>='3.8'",
# Older numba version needed for Python3.7.
# This older numba version was also incompatble with newer numpy
# versions, and the old pip solver (<=2020) needed us to explicate
# that constraint here (issue #1051).
"numba==0.56.4; python_version<'3.8'",
"numpy>=1.18,<1.24; python_version<'3.8'",
# Pinning numba & its particular numpy constraints:
# The old pip solver (<=2020) doesn't deal with the transitive
# requirements (scanpy -> numba -> numpy) properly resulting in broken
# installation of incompatible numpy>=1.24. Issue #1051
# These pins can be removed either when there's a new numba release
# with less-particular numpy version constraints, or if we decide we no
# longer need to support the old pip solver (default on ubuntu 20.04).
#
# Also: numba doesn't support Python 3.11 until 0.57.0rc1.
# It' not preferable to pin to an RC dependency, so we only do this
# when we must, which is for 3.11.
"numba==0.56.4; python_version<'3.11'",
"numba==0.57; python_version=='3.11'",
"numpy>=1.18,<1.24; python_version<'3.11'",
"numpy>=1.18,<1.25; python_version=='3.11'",
"pandas",
"pyarrow>=9.0.0",
"scanpy>=1.9.2",
"scipy",
"somacore==1.0.4",
"tiledb~=0.23.1",
"tiledb~=0.23.0",
"typing-extensions", # Note "-" even though `import typing_extensions`
],
extras_require={
Expand Down
12 changes: 8 additions & 4 deletions apis/python/src/tiledbsoma/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
from .options import SOMATileDBContext
from .options._soma_tiledb_context import _validate_soma_tiledb_context

from . import pytiledbsoma as clib

_Obj = TypeVar("_Obj", bound="_tiledb_object.AnyTileDBObject")
_Wrapper = TypeVar("_Wrapper", bound=_tdb_handles.AnyWrapper)

Expand Down Expand Up @@ -154,10 +156,11 @@ def _reify_handle(hdl: _Wrapper) -> "_tiledb_object.TileDBObject[_Wrapper]":
typename = _read_soma_type(hdl)
cls = _type_name_to_cls(typename)
if cls._wrapper_type != type(hdl):
raise SOMAError(
f"cannot open {hdl.uri!r}: a {type(hdl._handle)}"
f" cannot be converted to a {typename}"
)
if typename == "SOMADataFrame" and cls != _dataframe.DataFrame:
raise SOMAError(
f"cannot open {hdl.uri!r}: a {type(hdl._handle)}"
f" cannot be converted to a {typename}"
)
return cast(
_tiledb_object.TileDBObject[_Wrapper],
cls(hdl, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code"),
Expand Down Expand Up @@ -208,6 +211,7 @@ def _type_name_to_cls(type_name: str) -> Type["_tiledb_object.AnyTileDBObject"]:
_sparse_nd_array.SparseNDArray,
)
}
type_map["somadataframe"] = _dataframe.DataFrame
try:
return type_map[type_name.lower()]
except KeyError as ke:
Expand Down
34 changes: 33 additions & 1 deletion apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from ._types import OpenTimestamp
from .options._soma_tiledb_context import SOMATileDBContext

from . import pytiledbsoma as clib

RawHandle = Union[tiledb.Array, tiledb.Group]
_RawHdl_co = TypeVar("_RawHdl_co", bound=RawHandle, covariant=True)
"""A raw TileDB object. Covariant because Handles are immutable enough."""
Expand All @@ -48,7 +50,10 @@ def open(
if not obj_type:
raise DoesNotExistError(f"{uri!r} does not exist")
if obj_type == "array":
return ArrayWrapper.open(uri, mode, context, timestamp)
if mode == "r" and clib.SOMADataFrame.exists(uri):
return DataFrameWrapper.open(uri, mode, context, timestamp)
else:
return ArrayWrapper.open(uri, mode, context, timestamp)
if obj_type == "group":
return GroupWrapper.open(uri, mode, context, timestamp)
raise SOMAError(f"{uri!r} has unknown storage type {obj_type!r}")
Expand Down Expand Up @@ -230,6 +235,33 @@ def _do_initial_reads(self, reader: tiledb.Group) -> None:
self.initial_contents = {
o.name: GroupEntry.from_object(o) for o in reader if o.name is not None
}

class DataFrameWrapper(Wrapper[clib.SOMADataFrame]):
@classmethod
def _opener(
cls,
uri: str,
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: int,
) -> tiledb.Array:
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
return clib.SOMADataFrame.open(
uri,
open_mode,
{k: str(v) for k, v in context.tiledb_config.items()},
[],
clib.ResultOrder.automatic,
(0, timestamp),
)

@property
def schema(self) -> tiledb.ArraySchema:
return self._handle.schema

@property
def meta(self):
return self._handle.meta


class _DictMod(enum.Enum):
Expand Down
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/_tiledb_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from ._util import check_type, ms_to_datetime
from .options import SOMATileDBContext
from .options._soma_tiledb_context import _validate_soma_tiledb_context
from ._tdb_handles import DataFrameWrapper
from . import pytiledbsoma as clib

_WrapperType_co = TypeVar(
"_WrapperType_co", bound=_tdb_handles.AnyWrapper, covariant=True
Expand Down Expand Up @@ -81,7 +83,10 @@ def open(
"""
del platform_config # unused
context = _validate_soma_tiledb_context(context)
handle = cls._wrapper_type.open(uri, mode, context, tiledb_timestamp)
if mode == "r" and clib.SOMADataFrame.exists(uri):
handle = DataFrameWrapper.open(uri, mode, context, tiledb_timestamp)
else:
handle = cls._wrapper_type.open(uri, mode, context, tiledb_timestamp)
return cls(
handle,
_dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code",
Expand Down
Loading

0 comments on commit 948fbc8

Please sign in to comment.