Skip to content

Commit

Permalink
Update according to review
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Nov 9, 2023
1 parent 9e5794a commit 03c0d9f
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 117 deletions.
5 changes: 4 additions & 1 deletion apis/python/src/tiledbsoma/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class CollectionBase( # type: ignore[misc] # __eq__ false positive

__slots__ = ("_contents", "_mutated_keys")
_wrapper_type = _tdb_handles.GroupWrapper
_reader_wrapper_type = _tdb_handles.GroupWrapper

# TODO: Implement additional creation of members on collection subclasses.
@classmethod
Expand Down Expand Up @@ -432,7 +433,9 @@ def __getitem__(self, key: str) -> CollectionElementType:
timestamp = self.tiledb_timestamp_ms

try:
wrapper = _tdb_handles._get_wrapper(uri, mode, context, timestamp)
wrapper = _tdb_handles._open_with_clib_wrapper(
uri, mode, context, timestamp
)
entry.soma = _factory._set_internal(wrapper)
except SOMAError:
entry.soma = _factory._open_internal(
Expand Down
84 changes: 15 additions & 69 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ class DataFrame(TileDBArray, somacore.DataFrame):
it must be ``None``.
"""

_reader_wrapper_type = DataFrameWrapper

@classmethod
def create(
cls,
Expand Down Expand Up @@ -265,17 +267,6 @@ def count(self) -> int:
# if is it in read open mode, then it is a DataFrameWrapper
return cast(DataFrameWrapper, self._handle).count

def enumeration(self, name: str) -> Tuple[Any, ...]:
"""Doc place holder.
Returns:
Tuple[Any, ...]: _description_
"""
return tuple(self._soma_reader().get_enum(name))

def column_to_enumeration(self, name: str) -> str:
return str(self._soma_reader().get_enum_label_on_attr(name))

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
return self.count
Expand Down Expand Up @@ -575,30 +566,14 @@ def _set_reader_coord_by_py_seq_or_np_array(
f"only 1D numpy arrays may be used to index; got {coord.ndim}"
)

# See libtiledbsoma.cc for more context on why we need the
# explicit type-check here.

if pa.types.is_int64(dim.type):
sr.set_dim_points_int64(dim.name, coord)
elif pa.types.is_int32(dim.type):
sr.set_dim_points_int32(dim.name, coord)
elif pa.types.is_int16(dim.type):
sr.set_dim_points_int16(dim.name, coord)
elif pa.types.is_int8(dim.type):
sr.set_dim_points_int8(dim.name, coord)
elif pa.types.is_uint64(dim.type):
sr.set_dim_points_uint64(dim.name, coord)
elif pa.types.is_uint32(dim.type):
sr.set_dim_points_uint32(dim.name, coord)
elif pa.types.is_uint16(dim.type):
sr.set_dim_points_uint16(dim.name, coord)
elif pa.types.is_uint8(dim.type):
sr.set_dim_points_uint8(dim.name, coord)
elif pa.types.is_float64(dim.type):
sr.set_dim_points_float64(dim.name, coord)
elif pa.types.is_float32(dim.type):
sr.set_dim_points_float32(dim.name, coord)
elif _util.pa_types_is_string_or_bytes(dim.type):
try:
set_dim_points = getattr(sr, f"set_dim_points_{dim.type}")
set_dim_points(dim.name, coord)
return True
except AttributeError:
pass

if _util.pa_types_is_string_or_bytes(dim.type):
sr.set_dim_points_string_or_bytes(dim.name, coord)
elif pa.types.is_timestamp(dim.type):
if not isinstance(coord, (tuple, list, np.ndarray)):
Expand Down Expand Up @@ -631,41 +606,12 @@ def _set_reader_coord_by_numeric_slice(
if not lo_hi:
return True

elif pa.types.is_int64(dim.type):
sr.set_dim_ranges_int64(dim.name, [lo_hi])
return True
elif pa.types.is_int32(dim.type):
sr.set_dim_ranges_int32(dim.name, [lo_hi])
return True
elif pa.types.is_int16(dim.type):
sr.set_dim_ranges_int16(dim.name, [lo_hi])
return True
elif pa.types.is_int8(dim.type):
sr.set_dim_ranges_int8(dim.name, [lo_hi])
return True
elif pa.types.is_uint64(dim.type):
sr.set_dim_ranges_uint64(dim.name, [lo_hi])
return True
elif pa.types.is_uint32(dim.type):
sr.set_dim_ranges_uint32(dim.name, [lo_hi])
return True
elif pa.types.is_uint16(dim.type):
sr.set_dim_ranges_uint16(dim.name, [lo_hi])
return True
elif pa.types.is_uint8(dim.type):
sr.set_dim_ranges_uint8(dim.name, [lo_hi])
return True
elif pa.types.is_float64(dim.type):
sr.set_dim_ranges_float64(dim.name, [lo_hi])
return True
elif pa.types.is_float32(dim.type):
sr.set_dim_ranges_float32(dim.name, [lo_hi])
try:
set_dim_range = getattr(sr, f"set_dim_ranges_{dim.type}")
set_dim_range(dim.name, [lo_hi])
return True

# TODO:
# elif dim.dtype == np.bool_:

return False
except AttributeError:
return False


def _canonicalize_schema(
Expand Down
3 changes: 3 additions & 0 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from . import _util
from ._common_nd_array import NDArray
from ._exception import SOMAError
from ._tdb_handles import ArrayWrapper
from ._util import dense_indices_to_shape
from .options._tiledb_create_options import TileDBCreateOptions

Expand Down Expand Up @@ -71,6 +72,8 @@ class DenseNDArray(NDArray, somacore.DenseNDArray):

__slots__ = ()

_reader_wrapper_type = ArrayWrapper

def read(
self,
coords: options.DenseNDCoords = (),
Expand Down
4 changes: 1 addition & 3 deletions apis/python/src/tiledbsoma/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,7 @@ def _reify_handle(hdl: _Wrapper) -> "_tiledb_object.TileDBObject[_Wrapper]":
"""Picks out the appropriate SOMA class for a handle and wraps it."""
typename = _read_soma_type(hdl)
cls = _type_name_to_cls(typename)
if cls._wrapper_type != type(hdl) and not (
typename == "SOMADataFrame" and isinstance(hdl, _tdb_handles.DataFrameWrapper)
):
if type(hdl) not in (cls._wrapper_type, cls._reader_wrapper_type):
raise SOMAError(
f"cannot open {hdl.uri!r}: a {type(hdl._handle)}"
f" cannot be converted to a {typename}"
Expand Down
5 changes: 4 additions & 1 deletion apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
SparseCOOTensorReadIter,
TableReadIter,
)
from ._tdb_handles import ArrayWrapper
from ._types import NTuple
from .options._tiledb_create_options import TileDBCreateOptions

Expand Down Expand Up @@ -93,6 +94,8 @@ class SparseNDArray(NDArray, somacore.SparseNDArray):

__slots__ = ()

_reader_wrapper_type = ArrayWrapper

# Inherited from somacore
# * ndim accessor
# * is_sparse: Final = True
Expand Down Expand Up @@ -344,7 +347,7 @@ def used_shape(self) -> Tuple[Tuple[int, int], ...]:
# In the unlikely event that a previous data update succeeded but the
# subsequent metadata update did not, take the union of the core non-empty domain
# (which is done as part of the data update) and the metadata bounding box.
ned = self.non_empty_domain() or tuple()
ned = self.non_empty_domain() or ()
for i, nedslot in enumerate(ned):
ned_lower, ned_upper = nedslot
bbox_lower, bbox_upper = retval[i]
Expand Down
52 changes: 26 additions & 26 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Dict,
Generic,
Iterator,
List,
Mapping,
MutableMapping,
Optional,
Expand All @@ -34,7 +35,6 @@
from somacore import options
from typing_extensions import Literal, Self

from . import _tdb_handles
from . import pytiledbsoma as clib
from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error
from ._types import OpenTimestamp
Expand All @@ -57,7 +57,7 @@ def open(
raise DoesNotExistError(f"{uri!r} does not exist")

try:
return _get_wrapper(uri, mode, context, timestamp)
return _open_with_clib_wrapper(uri, mode, context, timestamp)
except SOMAError:
if obj_type == "array":
return ArrayWrapper.open(uri, mode, context, timestamp)
Expand All @@ -66,22 +66,19 @@ def open(
raise SOMAError(f"{uri!r} has unknown storage type {obj_type!r}")


def _get_wrapper(
def _open_with_clib_wrapper(
uri: str,
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: Optional[OpenTimestamp] = None,
) -> "_tdb_handles.DataFrameWrapper":
) -> "DataFrameWrapper":
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
timestamp_ms = context._open_timestamp_ms(timestamp)
try:
obj = clib.SOMAObject.open(uri, open_mode, config, (0, timestamp_ms))
if obj.type == "SOMADataFrame":
return _tdb_handles.DataFrameWrapper._from_soma_object(obj, context)
raise SOMAError(f"clib.SOMAObject {obj.type!r} not yet supported")
except SOMAError as err:
raise err
obj = clib.SOMAObject.open(uri, open_mode, config, (0, timestamp_ms))
if obj.type == "SOMADataFrame":
return DataFrameWrapper._from_soma_object(obj, context)
raise SOMAError(f"clib.SOMAObject {obj.type!r} not yet supported")


@attrs.define(eq=False, hash=False, slots=False)
Expand Down Expand Up @@ -243,20 +240,17 @@ def _opener(
def schema(self) -> tiledb.ArraySchema:
return self._handle.schema

def non_empty_domain(self) -> Optional[Tuple[Tuple[Any, Any], ...]]:
def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]:
try:
ned = self._handle.nonempty_domain()
if ned is None:
return None
return cast(Tuple[Tuple[Any, Any], ...], ned)
return cast(Tuple[Tuple[object, object], ...], ned)
except tiledb.TileDBError as e:
raise SOMAError(e)

def enum(self, label: str) -> tiledb.Enumeration:
return self._handle.enum(label)

@property
def domain(self) -> Tuple[Tuple[Any, Any], ...]:
def domain(self) -> Tuple[Tuple[object, object], ...]:
dom = self._handle.schema.domain
return tuple(dom.dim(i).domain for i in range(dom.ndim))

Expand All @@ -274,6 +268,9 @@ def dim_names(self) -> Tuple[str, ...]:
schema = self._handle.schema
return tuple(schema.domain.dim(i).name for i in range(schema.domain.ndim))

def enum(self, label: str) -> tiledb.Enumeration:
return self._handle.enum(label)


@attrs.define(frozen=True)
class GroupEntry:
Expand Down Expand Up @@ -325,12 +322,15 @@ def _opener(
timestamp: int,
) -> clib.SOMADataFrame:
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
column_names: List[str] = []
result_order = clib.ResultOrder.automatic
return clib.SOMADataFrame.open(
uri,
open_mode,
{k: str(v) for k, v in context.tiledb_config.items()},
[],
clib.ResultOrder.automatic,
config,
column_names,
result_order,
(0, timestamp),
)

Expand All @@ -355,15 +355,15 @@ def meta(self) -> "MetadataWrapper":

@property
def ndim(self) -> int:
return int(len(self._handle.index_column_names))
return len(self._handle.index_column_names)

@property
def count(self) -> int:
return int(self._handle.count)

def _cast_domain(
self, domain: Callable[[str, DTypeLike], Tuple[Any, Any]]
) -> Tuple[Tuple[Any, Any], ...]:
self, domain: Callable[[str, DTypeLike], Tuple[object, object]]
) -> Tuple[Tuple[object, object], ...]:
result = []
for name in self._handle.index_column_names:
dtype = self._handle.schema.field(name).type
Expand All @@ -387,12 +387,12 @@ def _cast_domain(
return tuple(result)

@property
def domain(self) -> Tuple[Tuple[Any, Any], ...]:
def domain(self) -> Tuple[Tuple[object, object], ...]:
return self._cast_domain(self._handle.domain)

def non_empty_domain(self) -> Optional[Tuple[Tuple[Any, Any], ...]]:
def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]:
result = self._cast_domain(self._handle.non_empty_domain)
return None if len(result) == 0 else result
return result or None

@property
def attr_names(self) -> Tuple[str, ...]:
Expand Down
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/_tiledb_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def open(
del platform_config # unused
context = _validate_soma_tiledb_context(context)
try:
handle = _tdb_handles._get_wrapper(uri, mode, context, tiledb_timestamp)
handle = _tdb_handles._open_with_clib_wrapper(
uri, mode, context, tiledb_timestamp
)
except SOMAError:
handle = cls._wrapper_type.open(uri, mode, context, tiledb_timestamp)
return cls(
Expand Down Expand Up @@ -125,6 +127,9 @@ def __init__(
self._close_stack.enter_context(self._handle)

_wrapper_type: Type[_WrapperType_co]
_reader_wrapper_type: Union[
Type[_WrapperType_co], Type[_tdb_handles.DataFrameWrapper]
]
"""Class variable of the Wrapper class used to open this object type."""

@property
Expand Down
11 changes: 5 additions & 6 deletions apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,17 +265,16 @@ def ms_to_datetime(millis: int) -> datetime.datetime:


def to_clib_result_order(result_order: options.ResultOrderStr) -> clib.ResultOrder:
result_order = options.ResultOrder(result_order)
to_clib_result_order = {
options.ResultOrder.AUTO: clib.ResultOrder.automatic,
options.ResultOrder.ROW_MAJOR: clib.ResultOrder.rowmajor,
options.ResultOrder.COLUMN_MAJOR: clib.ResultOrder.colmajor,
"auto": clib.ResultOrder.automatic,
"row-major": clib.ResultOrder.rowmajor,
"column-major": clib.ResultOrder.colmajor,
}
if result_order not in to_clib_result_order:
raise ValueError(f"Invalid result_order: {result_order}")
return to_clib_result_order[result_order]
try:
return to_clib_result_order[result_order]
except KeyError as ke:
raise ValueError(f"Invalid result_order: {result_order}") from ke


def pa_types_is_string_or_bytes(dtype: pa.DataType) -> bool:
Expand Down
Loading

0 comments on commit 03c0d9f

Please sign in to comment.