From 2088997a3dd84a0cfa3abf08adbe6ba9a132399f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 4 Oct 2022 11:25:52 -0400 Subject: [PATCH] rebase prep --- .github/workflows/ci.yml | 2 - .github/workflows/cpp-ci.yml | 1 + apis/python/src/tiledbsoma/soma_dataframe.py | 54 +------------------ .../src/tiledbsoma/soma_indexed_dataframe.py | 8 +-- 4 files changed, 6 insertions(+), 59 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c2a0bed359..3d9e23d64a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,8 +20,6 @@ jobs: - runs-on: ubuntu-22.04 cc: gcc-11 cxx: g++-11 - - runs-on: macos-11 - # Pending https://github.com/actions/runner-images/issues/6350 - runs-on: macos-11 cc: gcc-11 cxx: g++-11 diff --git a/.github/workflows/cpp-ci.yml b/.github/workflows/cpp-ci.yml index e22f826532..0b0345e0b1 100644 --- a/.github/workflows/cpp-ci.yml +++ b/.github/workflows/cpp-ci.yml @@ -18,6 +18,7 @@ jobs: cc: gcc-11 cxx: g++-11 # Pending https://github.com/actions/runner-images/issues/6350 + # - runs-on: macos-12 - runs-on: macos-11 cc: gcc-11 cxx: g++-11 diff --git a/apis/python/src/tiledbsoma/soma_dataframe.py b/apis/python/src/tiledbsoma/soma_dataframe.py index 0be6a7660d..ce3f3be8fe 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.py +++ b/apis/python/src/tiledbsoma/soma_dataframe.py @@ -5,11 +5,8 @@ import pyarrow as pa import tiledb -import tiledbsoma.libtiledbsoma as clib - from . import util, util_arrow, util_tiledb from .logging import log_io -from .query_condition import QueryCondition from .soma_collection import SOMACollectionBase from .tiledb_array import TileDBArray from .types import Ids, NTuple, SOMAResultOrder @@ -160,55 +157,6 @@ def is_indexed(self) -> Literal[False]: def get_index_column_names(self) -> Sequence[str]: return [] - def read_using_lib_temp( - self, - *, - # TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)`` - # ids: Optional[Union[Sequence[int], Slice]] = None, - ids: Optional[Any] = None, - value_filter: Optional[str] = None, - column_names: Optional[Sequence[str]] = None, - result_order: Optional[str] = None, - # TODO: batch_size - # TODO: partition, - # TODO: platform_config, - ) -> Iterator[pa.Table]: - """ - TODO: copy the text - """ - - with self._tiledb_open("r") as A: - dim_names, attr_names = util_tiledb.split_column_names( - A.schema, column_names - ) - - query_condition = None - if value_filter is not None: - # query_condition = tiledb.QueryCondition(value_filter) - query_condition = QueryCondition(value_filter) - - # As an arg to this method, `column_names` is optional-None. For the pybind11 - # code it's optional-[]. - lib_column_names = [] if column_names is None else column_names - - sr = clib.SOMAReader( - self._uri, - name=self.name, - schema=A.schema, # query_condition needs this - column_names=lib_column_names, - query_condition=query_condition, - ) - - # TODO: platform_config - # TODO: batch_size - # TODO: result_order - - sr.submit() - - while arrow_table := sr.read_next(): - # yield util_arrow.ascii_to_unicode_pyarrow_readback(batch) - yield arrow_table # XXX what other post-processing - def read( self, *, @@ -290,7 +238,7 @@ def read_all( # TODO: platform_config, ) -> pa.Table: """ - This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases. + This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simplify unit-test cases. """ return pa.concat_tables( self.read( diff --git a/apis/python/src/tiledbsoma/soma_indexed_dataframe.py b/apis/python/src/tiledbsoma/soma_indexed_dataframe.py index 539d4e5140..a6afc17123 100644 --- a/apis/python/src/tiledbsoma/soma_indexed_dataframe.py +++ b/apis/python/src/tiledbsoma/soma_indexed_dataframe.py @@ -275,17 +275,17 @@ def read_all( # TODO: platform_config, ) -> pa.Table: """ - This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases. + This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the record batches found. Its nominal use is to simplify unit-test cases. """ return pa.concat_tables( self.read(ids=ids, value_filter=value_filter, column_names=column_names) ) - def write(self, values: pa.Table) -> None: + def write(self, values: pa.RecordBatch) -> None: """ - Write an Arrow.Table to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added. + Write an Arrow.RecordBatch to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added. - :param values: An Arrow.Table containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``. + :param values: An Arrow.RecordBatch containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``. """ self._shape = None # cache-invalidate