Skip to content

Commit

Permalink
rebase prep
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 4, 2022
1 parent f4c51e7 commit 338587b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 57 deletions.
54 changes: 1 addition & 53 deletions apis/python/src/tiledbsoma/soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@
import pyarrow as pa
import tiledb

import tiledbsoma.libtiledbsoma as clib

from . import util, util_arrow, util_tiledb
from .logging import log_io
from .query_condition import QueryCondition
from .soma_collection import SOMACollectionBase
from .tiledb_array import TileDBArray
from .types import Ids, NTuple, SOMAResultOrder
Expand Down Expand Up @@ -160,55 +157,6 @@ def is_indexed(self) -> Literal[False]:
def get_index_column_names(self) -> Sequence[str]:
return []

def read_using_lib_temp(
self,
*,
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
# ids: Optional[Union[Sequence[int], Slice]] = None,
ids: Optional[Any] = None,
value_filter: Optional[str] = None,
column_names: Optional[Sequence[str]] = None,
result_order: Optional[str] = None,
# TODO: batch_size
# TODO: partition,
# TODO: platform_config,
) -> Iterator[pa.Table]:
"""
TODO: copy the text
"""

with self._tiledb_open("r") as A:
dim_names, attr_names = util_tiledb.split_column_names(
A.schema, column_names
)

query_condition = None
if value_filter is not None:
# query_condition = tiledb.QueryCondition(value_filter)
query_condition = QueryCondition(value_filter)

# As an arg to this method, `column_names` is optional-None. For the pybind11
# code it's optional-[].
lib_column_names = [] if column_names is None else column_names

sr = clib.SOMAReader(
self._uri,
name=self.name,
schema=A.schema, # query_condition needs this
column_names=lib_column_names,
query_condition=query_condition,
)

# TODO: platform_config
# TODO: batch_size
# TODO: result_order

sr.submit()

while arrow_table := sr.read_next():
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
yield arrow_table # XXX what other post-processing

def read(
self,
*,
Expand Down Expand Up @@ -290,7 +238,7 @@ def read_all(
# TODO: platform_config,
) -> pa.Table:
"""
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simplify unit-test cases.
"""
return pa.concat_tables(
self.read(
Expand Down
8 changes: 4 additions & 4 deletions apis/python/src/tiledbsoma/soma_indexed_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,17 +275,17 @@ def read_all(
# TODO: platform_config,
) -> pa.Table:
"""
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the record batches found. Its nominal use is to simplify unit-test cases.
"""
return pa.concat_tables(
self.read(ids=ids, value_filter=value_filter, column_names=column_names)
)

def write(self, values: pa.Table) -> None:
def write(self, values: pa.RecordBatch) -> None:
"""
Write an Arrow.Table to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
Write an Arrow.RecordBatch to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
:param values: An Arrow.Table containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
:param values: An Arrow.RecordBatch containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
"""
self._shape = None # cache-invalidate

Expand Down

0 comments on commit 338587b

Please sign in to comment.