Skip to content

Commit

Permalink
Iterating from SOMADataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 3, 2022
1 parent c89409a commit f6bb262
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ apis/python/src/tiledbsoma/libtiledb.*
apis/python/src/tiledbsoma/libtiledbsoma.*

/.quarto/
/tags

/NOTES/
50 changes: 50 additions & 0 deletions apis/python/src/tiledbsoma/soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pyarrow as pa
import tiledb
import tiledbsoma.libtiledbsoma as clib

from . import util, util_arrow, util_tiledb
from .logging import log_io
Expand Down Expand Up @@ -157,6 +158,55 @@ def is_indexed(self) -> Literal[False]:
def get_index_column_names(self) -> Sequence[str]:
return []

def read_using_lib_temp(
self,
*,
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
# ids: Optional[Union[Sequence[int], Slice]] = None,
ids: Optional[Any] = None,
value_filter: Optional[str] = None,
column_names: Optional[Sequence[str]] = None,
result_order: Optional[str] = None,
# TODO: batch_size
# TODO: partition,
# TODO: platform_config,
) -> Iterator[pa.Table]:
"""
TODO: copy the text
"""

with self._tiledb_open("r") as A:
dim_names, attr_names = util_tiledb.split_column_names(
A.schema, column_names
)

query_condition = None
if value_filter is not None:
# query_condition = tiledb.QueryCondition(value_filter)
query_condition = QueryCondition(value_filter)

# As an arg to this method, `column_names` is optional-None. For the pybind11
# code it's optional-[].
lib_column_names = [] if column_names is None else column_names

sr = clib.SOMAReader(
self._uri,
name=self.name,
schema=A.schema, # query_condition needs this
column_names=lib_column_names,
query_condition=query_condition,
)

# TODO: platform_config
# TODO: batch_size
# TODO: result_order

sr.submit()

while arrow_table := sr.read_next():
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
yield arrow_table # XXX what other post-processing

def read(
self,
*,
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def _check_tbl(tbl, col_names, ids):
print(r)
print("INPUT SCHEMA")
print(i)
print("CMP", r==i)
print("CMP", r == i)

print("")

Expand Down

0 comments on commit f6bb262

Please sign in to comment.