Port the essenceof #360

single-cell-data · Oct 20, 2022 · 9aa952e · 9aa952e
1 parent 0ca798a
commit 9aa952e
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 66 deletions.
diff --git a/apis/python/src/tiledbsoma/soma_dataframe.py b/apis/python/src/tiledbsoma/soma_dataframe.py
@@ -5,8 +5,12 @@
 import pyarrow as pa
 import tiledb
 
-from . import util_arrow, util_tiledb
+# This package's pybind11 code
+import tiledbsoma.libtiledbsoma as clib
+
+from . import util, util_arrow
 from .constants import SOMA_JOINID, SOMA_ROWID
+from .query_condition import QueryCondition  # type: ignore
 from .soma_collection import SOMACollectionBase
 from .tiledb_array import TileDBArray
 from .types import Ids, SOMAResultOrder
@@ -150,40 +154,35 @@ def read(
 
         **Indexing**: the ``ids`` parameter will support, per dimension: a row offset (uint), a row-offset range (slice), or a list of both.
         """
-        tiledb_result_order = util_tiledb.tiledb_result_order_from_soma_result_order(
-            result_order, accept=["rowid-ordered", "unordered"]
-        )
-
         with self._tiledb_open("r") as A:
-            dim_names, attr_names = util_tiledb.split_column_names(
-                A.schema, column_names
+            query_condition = None
+            if value_filter is not None:
+                query_condition = QueryCondition(value_filter)
+
+            # As an arg to this method, `column_names` is optional-None. For the pybind11
+            # code it's optional-[].
+            lib_column_names = [] if column_names is None else column_names
+
+            sr = clib.SOMAReader(
+                self._uri,
+                name=self.__class__.__name__,
+                schema=A.schema,  # query_condition needs this
+                column_names=lib_column_names,
+                query_condition=query_condition,
             )
-            if value_filter is None:
-                query = A.query(
-                    return_arrow=True,
-                    return_incomplete=True,
-                    order=tiledb_result_order,
-                    dims=dim_names,
-                    attrs=attr_names,
-                )
-            else:
-                qc = tiledb.QueryCondition(value_filter)
-                query = A.query(
-                    return_arrow=True,
-                    return_incomplete=True,
-                    attr_cond=qc,
-                    order=tiledb_result_order,
-                    dims=dim_names,
-                    attrs=attr_names,
-                )
-
-            if ids is None:
-                iterator = query.df[:]
-            else:
-                iterator = query.df[ids]
 
-            for table in iterator:
-                yield table
+            if ids is not None:
+                # XXX TODO NEEDS TO ALWAYS BE A LIST NO MATTER WHAT
+                if isinstance(ids, slice):
+                    ids = util.slice_to_list(ids)
+                sr.set_dim_points(SOMA_ROWID, ids)
+            # TODO: platform_config
+            # TODO: batch_size
+            # TODO: result_order
+            sr.submit()
+
+            while arrow_table := sr.read_next():
+                yield arrow_table  # XXX what other post-processing
 
     def read_all(
         self,

diff --git a/apis/python/src/tiledbsoma/soma_indexed_dataframe.py b/apis/python/src/tiledbsoma/soma_indexed_dataframe.py
@@ -5,8 +5,12 @@
 import pyarrow as pa
 import tiledb
 
-from . import util_arrow, util_tiledb
+# This package's pybind11 code
+import tiledbsoma.libtiledbsoma as clib
+
+from . import util, util_arrow
 from .constants import SOMA_JOINID
+from .query_condition import QueryCondition  # type: ignore
 from .soma_collection import SOMACollectionBase
 from .tiledb_array import TileDBArray
 from .types import Ids, SOMAResultOrder
@@ -182,41 +186,37 @@ def read(
 
         **Indexing**: the ``ids`` parameter will support, per dimension: a list of values of the type of the indexed column.
         """
-        tiledb_result_order = util_tiledb.tiledb_result_order_from_soma_result_order(
-            result_order, accept=["row-major", "column-major", "unordered"]
-        )
-
-        # TODO: more about index_column_names
         with self._tiledb_open("r") as A:
-            dim_names, attr_names = util_tiledb.split_column_names(
-                A.schema, column_names
+            query_condition = None
+            if value_filter is not None:
+                query_condition = QueryCondition(value_filter)
+
+            # As an arg to this method, `column_names` is optional-None. For the pybind11
+            # code it's optional-[].
+            lib_column_names = [] if column_names is None else column_names
+
+            sr = clib.SOMAReader(
+                self._uri,
+                name=self.__class__.__name__,
+                schema=A.schema,  # query_condition needs this
+                column_names=lib_column_names,
+                query_condition=query_condition,
             )
-            if value_filter is None:
-                query = A.query(
-                    return_arrow=True,
-                    return_incomplete=True,
-                    order=tiledb_result_order,
-                    dims=dim_names,
-                    attrs=attr_names,
-                )
-            else:
-                qc = tiledb.QueryCondition(value_filter)
-                query = A.query(
-                    return_arrow=True,
-                    return_incomplete=True,
-                    attr_cond=qc,
-                    order=tiledb_result_order,
-                    dims=dim_names,
-                    attrs=attr_names,
-                )
-
-            if ids is None:
-                iterator = query.df[:]
-            else:
-                iterator = query.df[ids]
 
-            for table in iterator:
-                yield table
+            if ids is not None:
+                # XXX TODO NEEDS TO ALWAYS BE A LIST NO MATTER WHAT
+                if isinstance(ids, slice):
+                    ids = util.slice_to_list(ids)
+                sr.set_dim_points(A.schema.domain.dim(0).name, ids)
+
+            # TODO: platform_config
+            # TODO: batch_size
+            # TODO: result_order
+
+            sr.submit()
+
+            while arrow_table := sr.read_next():
+                yield arrow_table  # XXX what other post-processing
 
     def read_all(
         self,

diff --git a/apis/python/src/tiledbsoma/util.py b/apis/python/src/tiledbsoma/util.py
@@ -1,7 +1,7 @@
 import pathlib
 import time
 import urllib.parse
-from typing import TypeVar
+from typing import List, TypeVar
 
 import numpy as np
 import pandas as pd
@@ -91,3 +91,19 @@ def uri_joinpath(base: str, path: str) -> str:
                 parts[2] = parts[2] + "/" + path
 
     return urllib.parse.urlunparse(parts)
+
+
+def slice_to_list(sl: slice) -> List[int]:
+    """
+    TODO: COMMENT GOES HERE
+    """
+    # xxx note tiledb doesn't support stepped slices -- only 1 (and maybe -1)?
+    assert isinstance(sl, slice)
+    step = sl.step
+    if step is None:
+        if sl.start <= sl.stop:
+            step = 1
+        else:
+            step = -1
+    stop = sl.stop + step
+    return list(range(sl.start, stop, step))