Skip to content

Commit

Permalink
Iterating from SOMADataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 4, 2022
1 parent 85673fb commit 1f9bb87
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ apis/python/src/tiledbsoma/libtiledb.*
apis/python/src/tiledbsoma/libtiledbsoma.*

/.quarto/
/tags

/NOTES/
52 changes: 52 additions & 0 deletions apis/python/src/tiledbsoma/soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
import pyarrow as pa
import tiledb

import tiledbsoma.libtiledbsoma as clib

from . import util, util_arrow, util_tiledb
from .logging import log_io
from .query_condition import QueryCondition
from .soma_collection import SOMACollectionBase
from .tiledb_array import TileDBArray
from .types import Ids, NTuple, SOMAResultOrder
Expand Down Expand Up @@ -157,6 +160,55 @@ def is_indexed(self) -> Literal[False]:
def get_index_column_names(self) -> Sequence[str]:
return []

def read_using_lib_temp(
self,
*,
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
# ids: Optional[Union[Sequence[int], Slice]] = None,
ids: Optional[Any] = None,
value_filter: Optional[str] = None,
column_names: Optional[Sequence[str]] = None,
result_order: Optional[str] = None,
# TODO: batch_size
# TODO: partition,
# TODO: platform_config,
) -> Iterator[pa.Table]:
"""
TODO: copy the text
"""

with self._tiledb_open("r") as A:
dim_names, attr_names = util_tiledb.split_column_names(
A.schema, column_names
)

query_condition = None
if value_filter is not None:
# query_condition = tiledb.QueryCondition(value_filter)
query_condition = QueryCondition(value_filter)

# As an arg to this method, `column_names` is optional-None. For the pybind11
# code it's optional-[].
lib_column_names = [] if column_names is None else column_names

sr = clib.SOMAReader(
self._uri,
name=self.name,
schema=A.schema, # query_condition needs this
column_names=lib_column_names,
query_condition=query_condition,
)

# TODO: platform_config
# TODO: batch_size
# TODO: result_order

sr.submit()

while arrow_table := sr.read_next():
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
yield arrow_table # XXX what other post-processing

def read(
self,
*,
Expand Down

0 comments on commit 1f9bb87

Please sign in to comment.