diff --git a/.gitignore b/.gitignore index 332070c63b..8629e6b682 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,6 @@ apis/python/src/tiledbsoma/libtiledb.* apis/python/src/tiledbsoma/libtiledbsoma.* /.quarto/ +/tags /NOTES/ diff --git a/apis/python/src/tiledbsoma/soma_dataframe.py b/apis/python/src/tiledbsoma/soma_dataframe.py index 8add606d03..00e73639ac 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.py +++ b/apis/python/src/tiledbsoma/soma_dataframe.py @@ -4,6 +4,7 @@ import pandas as pd import pyarrow as pa import tiledb +import tiledbsoma.libtiledbsoma as clib from . import util, util_arrow, util_tiledb from .logging import log_io @@ -157,6 +158,55 @@ def is_indexed(self) -> Literal[False]: def get_index_column_names(self) -> Sequence[str]: return [] + def read_using_lib_temp( + self, + *, + # TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)`` + # ids: Optional[Union[Sequence[int], Slice]] = None, + ids: Optional[Any] = None, + value_filter: Optional[str] = None, + column_names: Optional[Sequence[str]] = None, + result_order: Optional[str] = None, + # TODO: batch_size + # TODO: partition, + # TODO: platform_config, + ) -> Iterator[pa.Table]: + """ + TODO: copy the text + """ + + with self._tiledb_open("r") as A: + dim_names, attr_names = util_tiledb.split_column_names( + A.schema, column_names + ) + + query_condition = None + if value_filter is not None: + # query_condition = tiledb.QueryCondition(value_filter) + query_condition = QueryCondition(value_filter) + + # As an arg to this method, `column_names` is optional-None. For the pybind11 + # code it's optional-[]. + lib_column_names = [] if column_names is None else column_names + + sr = clib.SOMAReader( + self._uri, + name=self.name, + schema=A.schema, # query_condition needs this + column_names=lib_column_names, + query_condition=query_condition, + ) + + # TODO: platform_config + # TODO: batch_size + # TODO: result_order + + sr.submit() + + while arrow_table := sr.read_next(): + # yield util_arrow.ascii_to_unicode_pyarrow_readback(batch) + yield arrow_table # XXX what other post-processing + def read( self, *, diff --git a/apis/python/tests/test_soma_dataframe.py b/apis/python/tests/test_soma_dataframe.py index 76f4f27564..5d67a1ff9d 100644 --- a/apis/python/tests/test_soma_dataframe.py +++ b/apis/python/tests/test_soma_dataframe.py @@ -192,7 +192,7 @@ def _check_tbl(tbl, col_names, ids): print(r) print("INPUT SCHEMA") print(i) - print("CMP", r==i) + print("CMP", r == i) print("")