diff --git a/.github/workflows/python-ci-single.yml b/.github/workflows/python-ci-single.yml
index 4803dfe260..a67076231b 100644
--- a/.github/workflows/python-ci-single.yml
+++ b/.github/workflows/python-ci-single.yml
@@ -122,13 +122,6 @@ jobs:
     - name: Run libtiledbsoma unit tests
       run: ctest --output-on-failure --test-dir build/libtiledbsoma -C Release --verbose
 
-    - name: Run pytests for C++
-      shell: bash
-      # Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src
-      # instead of copy we `pip install`ed to site-packages above. That's needed for the code
-      # coverage analysis to work.
-      run: PYTHONPATH=$(pwd)/apis/python/src python -m pytest --cov=apis/python/src --cov-report=xml libtiledbsoma/test -v --durations=20
-
     - name: Run pytests for Python
       shell: bash
       # Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src
diff --git a/Makefile b/Makefile
index cdcebb3835..f033dce8d8 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ update:
 .PHONY: test
 test: data
 	ctest --test-dir build/libtiledbsoma -C Release --verbose --rerun-failed --output-on-failure
-	pytest apis/python/tests libtiledbsoma/test
+	pytest apis/python/tests 
 
 .PHONY: data
 data:
diff --git a/apis/python/setup.py b/apis/python/setup.py
index 78f475870e..5640a9c820 100644
--- a/apis/python/setup.py
+++ b/apis/python/setup.py
@@ -186,9 +186,6 @@ def run(self):
     "dist_links/libtiledbsoma/external/include",
     "../../build/externals/install/include",
     str(libtiledbsoma_dir / "include"),
-    str(
-        "./src/tiledbsoma"
-    ),  # since pytiledbsoma.cc does #include of query_condition.cc
     str(libtiledbsoma_dir.parent / "build/externals/install/include"),
     str(tiledb_dir / "include"),
 ]
@@ -258,7 +255,14 @@ def run(self):
     ext_modules=[
         Pybind11Extension(
             "tiledbsoma.pytiledbsoma",
-            ["src/tiledbsoma/pytiledbsoma.cc"],
+            [
+                "src/tiledbsoma/common.cc",
+                "src/tiledbsoma/query_condition.cc",
+                "src/tiledbsoma/soma_array.cc",
+                "src/tiledbsoma/soma_object.cc",
+                "src/tiledbsoma/soma_dataframe.cc",
+                "src/tiledbsoma/pytiledbsoma.cc",
+            ],
             include_dirs=INC_DIRS,
             library_dirs=LIB_DIRS,
             libraries=["tiledbsoma"] + (["tiledb"] if os.name == "nt" else []),
diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py
index 131435d973..09934d5811 100644
--- a/apis/python/src/tiledbsoma/_collection.py
+++ b/apis/python/src/tiledbsoma/_collection.py
@@ -75,6 +75,7 @@ class CollectionBase(  # type: ignore[misc]  # __eq__ false positive
 
     __slots__ = ("_contents", "_mutated_keys")
     _wrapper_type = _tdb_handles.GroupWrapper
+    _reader_wrapper_type = _tdb_handles.GroupWrapper
 
     # TODO: Implement additional creation of members on collection subclasses.
     @classmethod
@@ -426,13 +427,20 @@ def __getitem__(self, key: str) -> CollectionElementType:
         if entry.soma is None:
             from . import _factory  # Delayed binding to resolve circular import.
 
-            entry.soma = _factory._open_internal(
-                entry.entry.wrapper_type.open,
-                entry.entry.uri,
-                self.mode,
-                self.context,
-                self.tiledb_timestamp_ms,
-            )
+            uri = entry.entry.uri
+            mode = self.mode
+            context = self.context
+            timestamp = self.tiledb_timestamp_ms
+
+            try:
+                wrapper = _tdb_handles._open_with_clib_wrapper(
+                    uri, mode, context, timestamp
+                )
+                entry.soma = _factory.reify_handle(wrapper)
+            except SOMAError:
+                entry.soma = _factory._open_internal(
+                    entry.entry.wrapper_type.open, uri, mode, context, timestamp
+                )
             # Since we just opened this object, we own it and should close it.
             self._close_stack.enter_context(entry.soma)
         return cast(CollectionElementType, entry.soma)
diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py
index 90ce28cdb3..fd3700ecb5 100644
--- a/apis/python/src/tiledbsoma/_dataframe.py
+++ b/apis/python/src/tiledbsoma/_dataframe.py
@@ -21,6 +21,7 @@
 from ._constants import SOMA_JOINID
 from ._query_condition import QueryCondition
 from ._read_iters import TableReadIter
+from ._tdb_handles import DataFrameWrapper
 from ._tiledb_array import TileDBArray
 from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of
 from .options import SOMATileDBContext
@@ -121,6 +122,8 @@ class DataFrame(TileDBArray, somacore.DataFrame):
         it must be ``None``.
     """
 
+    _reader_wrapper_type = DataFrameWrapper
+
     @classmethod
     def create(
         cls,
@@ -261,18 +264,8 @@ def count(self) -> int:
             Experimental.
         """
         self._check_open_read()
-        return cast(int, self._soma_reader().nnz())
-
-    def enumeration(self, name: str) -> Tuple[Any, ...]:
-        """Doc place holder.
-
-        Returns:
-            Tuple[Any, ...]: _description_
-        """
-        return tuple(self._soma_reader().get_enum(name))
-
-    def column_to_enumeration(self, name: str) -> str:
-        return str(self._soma_reader().get_enum_label_on_attr(name))
+        # if is it in read open mode, then it is a DataFrameWrapper
+        return cast(DataFrameWrapper, self._handle).count
 
     def __len__(self) -> int:
         """Returns the number of rows in the dataframe. Same as ``df.count``."""
@@ -341,25 +334,28 @@ def read(
         Lifecycle:
             Experimental.
         """
-        del batch_size, platform_config  # Currently unused.
+        del batch_size  # Currently unused.
         _util.check_unpartitioned(partitions)
         self._check_open_read()
 
-        schema = self._handle.schema
-        query_condition = None
-        if value_filter is not None:
-            query_condition = QueryCondition(value_filter)
-
-        sr = self._soma_reader(
-            schema=schema,  # query_condition needs this
-            column_names=column_names,
-            query_condition=query_condition,
-            result_order=result_order,
+        ts = None
+        if self._handle._handle.timestamp is not None:
+            ts = (0, self._handle._handle.timestamp)
+
+        sr = clib.SOMADataFrame.open(
+            uri=self._handle._handle.uri,
+            mode=clib.OpenMode.read,
+            platform_config=platform_config or {},
+            column_names=column_names or [],
+            result_order=_util.to_clib_result_order(result_order),
+            timestamp=ts,
         )
 
+        if value_filter is not None:
+            sr.set_condition(QueryCondition(value_filter), self._handle.schema)
+
         self._set_reader_coords(sr, coords)
 
-        # TODO: platform_config
         # TODO: batch_size
 
         return TableReadIter(sr)
@@ -415,7 +411,7 @@ def write(
                     if not pa.types.is_dictionary(col_info.type):
                         raise ValueError(
                             "Expected dictionary type for enumerated attribute "
-                            f"{name} but saw {col_info.type}"
+                            f"{name} but saw {col.type}"
                         )
 
                     enmr = self._handle.enum(attr.name)
@@ -521,20 +517,23 @@ def _set_reader_coord(
             if self._set_reader_coord_by_numeric_slice(sr, dim_idx, dim, coord):
                 return True
 
+        domain = self.domain[dim_idx]
+
         # Note: slice(None, None) matches the is_slice_of part, unless we also check the dim-type
         # part.
-        if (is_slice_of(coord, str) or is_slice_of(coord, bytes)) and (
-            dim.dtype == "str" or dim.dtype == "bytes"
-        ):
+        if (
+            is_slice_of(coord, str) or is_slice_of(coord, bytes)
+        ) and _util.pa_types_is_string_or_bytes(dim.type):
             _util.validate_slice(coord)
             # Figure out which one.
-            dim_type: Union[Type[str], Type[bytes]] = type(dim.domain[0])
+            dim_type: Union[Type[str], Type[bytes]] = type(domain[0])
             # A ``None`` or empty start is always equivalent to empty str/bytes.
             start = coord.start or dim_type()
             if coord.stop is None:
                 # There's no way to specify "to infinity" for strings.
                 # We have to get the nonempty domain and use that as the end.
-                _, stop = self._handle.reader.nonempty_domain()[dim_idx]
+                ned = self._handle.non_empty_domain()
+                _, stop = ned[dim_idx]
             else:
                 stop = coord.stop
             sr.set_dim_ranges_string_or_bytes(dim.name, [(start, stop)])
@@ -542,16 +541,14 @@ def _set_reader_coord(
 
         # Note: slice(None, None) matches the is_slice_of part, unless we also check the dim-type
         # part.
-        if is_slice_of(coord, np.datetime64) and dim.dtype.name.startswith(
-            "datetime64"
-        ):
+        if is_slice_of(coord, np.datetime64) and pa.types.is_timestamp(dim.type):
             _util.validate_slice(coord)
             # These timestamp types are stored in Arrow as well as TileDB as 64-bit integers (with
             # distinguishing metadata of course). For purposes of the query logic they're just
             # int64.
-            istart = coord.start or dim.domain[0]
+            istart = coord.start or domain[0]
             istart = int(istart.astype("int64"))
-            istop = coord.stop or dim.domain[1]
+            istop = coord.stop or domain[1]
             istop = int(istop.astype("int64"))
             sr.set_dim_ranges_int64(dim.name, [(istart, istop)])
             return True
@@ -574,41 +571,19 @@ def _set_reader_coord_by_py_seq_or_np_array(
                     f"only 1D numpy arrays may be used to index; got {coord.ndim}"
                 )
 
-        # See libtiledbsoma.cc for more context on why we need the
-        # explicit type-check here.
-
-        if dim.dtype == np.int64:
-            sr.set_dim_points_int64(dim.name, coord)
-        elif dim.dtype == np.int32:
-            sr.set_dim_points_int32(dim.name, coord)
-        elif dim.dtype == np.int16:
-            sr.set_dim_points_int16(dim.name, coord)
-        elif dim.dtype == np.int8:
-            sr.set_dim_points_int8(dim.name, coord)
-
-        elif dim.dtype == np.uint64:
-            sr.set_dim_points_uint64(dim.name, coord)
-        elif dim.dtype == np.uint32:
-            sr.set_dim_points_uint32(dim.name, coord)
-        elif dim.dtype == np.uint16:
-            sr.set_dim_points_uint16(dim.name, coord)
-        elif dim.dtype == np.uint8:
-            sr.set_dim_points_uint8(dim.name, coord)
-
-        elif dim.dtype == np.float64:
-            sr.set_dim_points_float64(dim.name, coord)
-        elif dim.dtype == np.float32:
-            sr.set_dim_points_float32(dim.name, coord)
-
-        elif dim.dtype == "str" or dim.dtype == "bytes":
-            sr.set_dim_points_string_or_bytes(dim.name, coord)
+        try:
+            set_dim_points = getattr(sr, f"set_dim_points_{dim.type}")
+        except AttributeError:
+            # We have to handle this type specially below
+            pass
+        else:
+            set_dim_points(dim.name, coord)
+            return True
 
-        elif (
-            dim.dtype == "datetime64[s]"
-            or dim.dtype == "datetime64[ms]"
-            or dim.dtype == "datetime64[us]"
-            or dim.dtype == "datetime64[ns]"
-        ):
+        if _util.pa_types_is_string_or_bytes(dim.type):
+            sr.set_dim_points_string_or_bytes(dim.name, coord)
+            return True
+        elif pa.types.is_timestamp(dim.type):
             if not isinstance(coord, (tuple, list, np.ndarray)):
                 raise ValueError(
                     f"unhandled coord type {type(coord)} for index column named {dim.name}"
@@ -618,64 +593,31 @@ def _set_reader_coord_by_py_seq_or_np_array(
                 for e in coord
             ]
             sr.set_dim_points_int64(dim.name, icoord)
+            return True
 
         # TODO: bool
 
-        else:
-            raise ValueError(
-                f"unhandled type {dim.dtype} for index column named {dim.name}"
-            )
-
-        return True
+        raise ValueError(
+            f"unhandled type {dim.dtype} for index column named {dim.name}"
+        )
 
     def _set_reader_coord_by_numeric_slice(
-        self, sr: clib.SOMAArray, dim_idx: int, dim: tiledb.Dim, coord: Slice[Any]
+        self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: Slice[Any]
     ) -> bool:
         try:
-            lo_hi = _util.slice_to_numeric_range(coord, dim.domain)
+            lo_hi = _util.slice_to_numeric_range(coord, self.domain[dim_idx])
         except _util.NonNumericDimensionError:
             return False  # We only handle numeric dimensions here.
 
         if not lo_hi:
             return True
 
-        elif dim.dtype == np.int64:
-            sr.set_dim_ranges_int64(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.int32:
-            sr.set_dim_ranges_int32(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.int16:
-            sr.set_dim_ranges_int16(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.int8:
-            sr.set_dim_ranges_int8(dim.name, [lo_hi])
-            return True
-
-        elif dim.dtype == np.uint64:
-            sr.set_dim_ranges_uint64(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.uint32:
-            sr.set_dim_ranges_uint32(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.uint16:
-            sr.set_dim_ranges_uint16(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.uint8:
-            sr.set_dim_ranges_uint8(dim.name, [lo_hi])
-            return True
-
-        elif dim.dtype == np.float64:
-            sr.set_dim_ranges_float64(dim.name, [lo_hi])
-            return True
-        elif dim.dtype == np.float32:
-            sr.set_dim_ranges_float32(dim.name, [lo_hi])
+        try:
+            set_dim_range = getattr(sr, f"set_dim_ranges_{dim.type}")
+            set_dim_range(dim.name, [lo_hi])
             return True
-
-        # TODO:
-        # elif dim.dtype == np.bool_:
-
-        return False
+        except AttributeError:
+            return False
 
 
 def _canonicalize_schema(
diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py
index 82030254ea..4617bd93b0 100644
--- a/apis/python/src/tiledbsoma/_dense_nd_array.py
+++ b/apis/python/src/tiledbsoma/_dense_nd_array.py
@@ -17,6 +17,7 @@
 from . import _util
 from ._common_nd_array import NDArray
 from ._exception import SOMAError
+from ._tdb_handles import ArrayWrapper
 from ._util import dense_indices_to_shape
 from .options._tiledb_create_options import TileDBCreateOptions
 
@@ -71,6 +72,8 @@ class DenseNDArray(NDArray, somacore.DenseNDArray):
 
     __slots__ = ()
 
+    _reader_wrapper_type = ArrayWrapper
+
     def read(
         self,
         coords: options.DenseNDCoords = (),
diff --git a/apis/python/src/tiledbsoma/_factory.py b/apis/python/src/tiledbsoma/_factory.py
index b0eb385bbf..2bcf5b084f 100644
--- a/apis/python/src/tiledbsoma/_factory.py
+++ b/apis/python/src/tiledbsoma/_factory.py
@@ -142,18 +142,18 @@ def _open_internal(
     """Lower-level open function for internal use only."""
     handle = opener(uri, mode, context, timestamp)
     try:
-        return _reify_handle(handle)
+        return reify_handle(handle)
     except Exception:
         handle.close()
         raise
 
 
 @typeguard_ignore
-def _reify_handle(hdl: _Wrapper) -> "_tiledb_object.TileDBObject[_Wrapper]":
+def reify_handle(hdl: _Wrapper) -> "_tiledb_object.TileDBObject[_Wrapper]":
     """Picks out the appropriate SOMA class for a handle and wraps it."""
     typename = _read_soma_type(hdl)
     cls = _type_name_to_cls(typename)
-    if cls._wrapper_type != type(hdl):
+    if type(hdl) not in (cls._wrapper_type, cls._reader_wrapper_type):
         raise SOMAError(
             f"cannot open {hdl.uri!r}: a {type(hdl._handle)}"
             f" cannot be converted to a {typename}"
diff --git a/apis/python/src/tiledbsoma/_query_condition.py b/apis/python/src/tiledbsoma/_query_condition.py
index 1717aa810f..6d1d62984f 100644
--- a/apis/python/src/tiledbsoma/_query_condition.py
+++ b/apis/python/src/tiledbsoma/_query_condition.py
@@ -11,10 +11,11 @@
 
 import attrs
 import numpy as np
-import tiledb
+import pyarrow as pa
 
 from . import pytiledbsoma as clib
 from ._exception import SOMAError
+from ._util import pa_types_is_string_or_bytes
 
 # In Python 3.7, a boolean literal like `True` is of type `ast.NameConstant`.
 # Above that, it's of type `ast.Constant`.
@@ -130,15 +131,17 @@ def __attrs_post_init__(self):
 
     def init_query_condition(
         self,
-        schema: tiledb.ArraySchema,
-        enum_to_dtype: dict,
+        schema: pa.Schema,
         query_attrs: Optional[List[str]],
     ):
-        qctree = QueryConditionTree(schema, enum_to_dtype, query_attrs)
-        self.c_obj = qctree.visit(self.tree.body)
+        try:
+            qctree = QueryConditionTree(schema, query_attrs)
+            self.c_obj = qctree.visit(self.tree.body)
+        except Exception as pex:
+            raise SOMAError(pex)
 
         if not isinstance(self.c_obj, clib.PyQueryCondition):
-            raise tiledb.TileDBError(
+            raise SOMAError(
                 "Malformed query condition statement. A query condition must "
                 "be made up of one or more Boolean expressions."
             )
@@ -148,8 +151,7 @@ def init_query_condition(
 
 @attrs.define
 class QueryConditionTree(ast.NodeVisitor):
-    schema: tiledb.ArraySchema
-    enum_to_dtype: dict
+    schema: pa.Schema
     query_attrs: List[str]
 
     def visit_BitOr(self, node):
@@ -219,25 +221,25 @@ def visit_Compare(self, node: ast.Compare) -> clib.PyQueryCondition:
         elif isinstance(operator, (ast.In, ast.NotIn)):
             rhs = node.comparators[0]
             if not isinstance(rhs, ast.List):
-                raise tiledb.TileDBError(
+                raise SOMAError(
                     "`in` operator syntax must be written as `attr in ['l', 'i', 's', 't']`"
                 )
 
             variable = node.left.id
             values = [self.get_val_from_node(val) for val in self.visit(rhs)]
             if len(values) == 0:
-                raise tiledb.TileDBError(
+                raise SOMAError(
                     "At least one value must be provided to the set membership"
                 )
 
-            if self.schema.has_attr(variable):
-                enum_label = self.schema.attr(variable).enum_label
-                if enum_label is not None:
-                    dt = self.enum_to_dtype[enum_label]
-                else:
-                    dt = self.schema.attr(variable).dtype
+            dt = self.schema.field(variable).type
+            if pa.types.is_dictionary(dt):
+                dt = dt.value_type
+
+            if pa_types_is_string_or_bytes(dt):
+                dtype = "string"
             else:
-                dt = self.schema.attr_or_dim_dtype(variable)
+                dtype = np.dtype(dt.to_pandas_dtype()).name
 
             # sdf.read(column_names=["foo"], value_filter='bar == 999') should
             # result in bar being added to the column names. See also
@@ -246,7 +248,6 @@ def visit_Compare(self, node: ast.Compare) -> clib.PyQueryCondition:
             if att not in self.query_attrs:
                 self.query_attrs.append(att)
 
-            dtype = "string" if dt.kind in "SUa" else dt.name
             op = clib.TILEDB_IN if isinstance(operator, ast.In) else clib.TILEDB_NOT_IN
             result = self.create_pyqc(dtype)(node.left.id, values, op)
 
@@ -262,12 +263,15 @@ def aux_visit_Compare(
 
         att = self.get_att_from_node(att)
         val = self.get_val_from_node(val)
-        enum_label = self.schema.attr(att).enum_label
-        if enum_label is not None:
-            dt = self.enum_to_dtype[enum_label]
+
+        dt = self.schema.field(att).type
+        if pa.types.is_dictionary(dt):
+            dt = dt.value_type
+
+        if pa_types_is_string_or_bytes(dt):
+            dtype = "string"
         else:
-            dt = self.schema.attr(att).dtype
-        dtype = "string" if dt.kind in "SUa" else dt.name
+            dtype = np.dtype(dt.to_pandas_dtype()).name
         val = self.cast_val_to_dtype(val, dtype)
 
         pyqc = clib.PyQueryCondition()
@@ -278,7 +282,7 @@ def aux_visit_Compare(
     def is_att_node(self, att: QueryConditionNodeElem) -> bool:
         if isinstance(att, ast.Call):
             if not isinstance(att.func, ast.Name):
-                raise tiledb.TileDBError(f"Unrecognized expression {att.func}.")
+                raise SOMAError(f"Unrecognized expression {att.func}.")
 
             if att.func.id != "attr":
                 return False
@@ -323,9 +327,7 @@ def get_att_from_node(self, node: QueryConditionNodeElem) -> Any:
 
             if isinstance(att_node, ast.Call):
                 if not isinstance(att_node.func, ast.Name):
-                    raise tiledb.TileDBError(
-                        f"Unrecognized expression {att_node.func}."
-                    )
+                    raise SOMAError(f"Unrecognized expression {att_node.func}.")
                 att_node = att_node.args[0]
 
             if isinstance(att_node, ast.Name):
@@ -338,21 +340,14 @@ def get_att_from_node(self, node: QueryConditionNodeElem) -> Any:
                 # deprecated in 3.8
                 att = str(att_node.s)
             else:
-                raise tiledb.TileDBError(
+                raise SOMAError(
                     f"Incorrect type for attribute name: {ast.dump(att_node)}"
                 )
         else:
-            raise tiledb.TileDBError(
-                f"Incorrect type for attribute name: {ast.dump(node)}"
-            )
+            raise SOMAError(f"Incorrect type for attribute name: {ast.dump(node)}")
 
-        if not self.schema.has_attr(att):
-            if self.schema.domain.has_dim(att):
-                raise tiledb.TileDBError(
-                    f"`{att}` is a dimension. QueryConditions currently only "
-                    "work on attributes."
-                )
-            raise tiledb.TileDBError(f"Attribute `{att}` not found in schema.")
+        if not att not in self.schema:
+            raise SOMAError(f"`{att}` not found in schema.")
 
         # sdf.read(column_names=["foo"], value_filter='bar == 999') should
         # result in bar being added to the column names. See also
@@ -367,14 +362,12 @@ def get_val_from_node(self, node: QueryConditionNodeElem) -> Any:
 
         if isinstance(node, ast.Call):
             if not isinstance(node.func, ast.Name):
-                raise tiledb.TileDBError(f"Unrecognized expression {node.func}.")
+                raise SOMAError(f"Unrecognized expression {node.func}.")
 
             if node.func.id == "val":
                 val_node = node.args[0]
             else:
-                raise tiledb.TileDBError(
-                    f"Incorrect type for cast value: {node.func.id}"
-                )
+                raise SOMAError(f"Incorrect type for cast value: {node.func.id}")
 
         if isinstance(val_node, ast.Constant) or isinstance(val_node, ast.NameConstant):
             val = val_node.value
@@ -385,7 +378,7 @@ def get_val_from_node(self, node: QueryConditionNodeElem) -> Any:
             # deprecated in 3.8
             val = val_node.s
         else:
-            raise tiledb.TileDBError(
+            raise SOMAError(
                 f"Incorrect type for comparison value: {ast.dump(val_node)}"
             )
 
@@ -399,7 +392,7 @@ def cast_val_to_dtype(
                 # this prevents numeric strings ("1", '123.32') from getting
                 # casted to numeric types
                 if isinstance(val, str):
-                    raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.")
+                    raise SOMAError(f"Cannot cast `{val}` to {dtype}.")
                 if np.issubdtype(dtype, np.datetime64):
                     cast = getattr(np, "int64")
                 # silence DeprecationWarning: `np.bool`
@@ -409,7 +402,7 @@ def cast_val_to_dtype(
                     cast = getattr(np, dtype)
                 val = cast(val)
             except ValueError:
-                raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.")
+                raise SOMAError(f"Cannot cast `{val}` to {dtype}.")
 
         return val
 
@@ -420,7 +413,7 @@ def init_pyqc(self, pyqc: clib.PyQueryCondition, dtype: str) -> Callable:
         init_fn_name = f"init_{dtype}"
 
         if not hasattr(pyqc, init_fn_name):
-            raise tiledb.TileDBError(f"PyQueryCondition.{init_fn_name}() not found.")
+            raise SOMAError(f"PyQueryCondition.{init_fn_name}() not found.")
 
         return getattr(pyqc, init_fn_name)
 
@@ -436,14 +429,13 @@ def create_pyqc(self, dtype: str) -> Callable:
         try:
             return getattr(clib.PyQueryCondition, create_fn_name)
         except AttributeError as ae:
-            raise tiledb.TileDBError(
-                f"PyQueryCondition.{create_fn_name}() not found."
-            ) from ae
+            raise SOMAError(f"PyQueryCondition.{create_fn_name}() not found.") from ae
 
     def visit_BinOp(self, node: ast.BinOp) -> clib.PyQueryCondition:
-        op = self.visit(node.op)
-        if op is None:
-            raise tiledb.TileDBError(
+        try:
+            op = self.visit(node.op)
+        except KeyError:
+            raise SOMAError(
                 f"Unsupported binary operator: {ast.dump(node.op)}. Only & is currently supported."
             )
 
@@ -458,9 +450,7 @@ def visit_BoolOp(self, node: ast.BoolOp) -> clib.PyQueryCondition:
         try:
             op = self.visit(node.op)
         except KeyError:
-            raise tiledb.TileDBError(
-                f"Unsupported Boolean operator: {ast.dump(node.op)}."
-            )
+            raise SOMAError(f"Unsupported Boolean operator: {ast.dump(node.op)}.")
 
         result = self.visit(node.values[0])
         for value in node.values[1:]:
@@ -470,13 +460,13 @@ def visit_BoolOp(self, node: ast.BoolOp) -> clib.PyQueryCondition:
 
     def visit_Call(self, node: ast.Call) -> ast.Call:
         if not isinstance(node.func, ast.Name):
-            raise tiledb.TileDBError(f"Unrecognized expression {node.func}.")
+            raise SOMAError(f"Unrecognized expression {node.func}.")
 
         if node.func.id not in ["attr", "val"]:
-            raise tiledb.TileDBError("Valid casts are attr() or val().")
+            raise SOMAError("Valid casts are attr() or val().")
 
         if len(node.args) != 1:
-            raise tiledb.TileDBError(
+            raise SOMAError(
                 f"Exactly one argument must be provided to {node.func.id}()."
             )
 
@@ -497,7 +487,7 @@ def visit_UnaryOp(self, node: ast.UnaryOp, sign: int = 1):
         elif isinstance(node.op, ast.USub):
             sign *= -1
         else:
-            raise tiledb.TileDBError(f"Unsupported UnaryOp type. Saw {ast.dump(node)}.")
+            raise SOMAError(f"Unsupported UnaryOp type. Saw {ast.dump(node)}.")
 
         if isinstance(node.operand, ast.UnaryOp):
             return self.visit_UnaryOp(node.operand, sign)
@@ -509,7 +499,7 @@ def visit_UnaryOp(self, node: ast.UnaryOp, sign: int = 1):
             elif isinstance(node.operand, ast.Num):
                 node.operand.n *= sign
             else:
-                raise tiledb.TileDBError(
+                raise SOMAError(
                     f"Unexpected node type following UnaryOp. Saw {ast.dump(node)}."
                 )
 
diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py
index 880e5be720..f98a368b85 100644
--- a/apis/python/src/tiledbsoma/_sparse_nd_array.py
+++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py
@@ -38,6 +38,7 @@
     SparseCOOTensorReadIter,
     TableReadIter,
 )
+from ._tdb_handles import ArrayWrapper
 from ._types import NTuple
 from .options._tiledb_create_options import TileDBCreateOptions
 
@@ -94,6 +95,8 @@ class SparseNDArray(NDArray, somacore.SparseNDArray):
 
     __slots__ = ()
 
+    _reader_wrapper_type = ArrayWrapper
+
     # Inherited from somacore
     # * ndim accessor
     # * is_sparse: Final = True
@@ -264,15 +267,15 @@ def write(
         )
 
     def _set_reader_coord(
-        self, sr: clib.SOMAArray, dim_idx: int, dim: tiledb.Dim, coord: object
+        self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: object
     ) -> bool:
         if super()._set_reader_coord(sr, dim_idx, dim, coord):
             return True
         if isinstance(coord, Sequence):
-            if dim.dtype == np.int64:
+            if pa.types.is_int64(dim.type):
                 sr.set_dim_points_int64(dim.name, coord)
                 return True
-            elif dim.dtype == "str" or dim.dtype == "bytes":
+            elif _util.pa_types_is_string_or_bytes(dim.type):
                 sr.set_dim_points_string_or_bytes(dim.name, coord)
                 return True
             else:
@@ -283,10 +286,10 @@ def _set_reader_coord(
                 raise ValueError(
                     f"only 1D numpy arrays may be used to index; got {coord.ndim}"
                 )
-            if dim.dtype == np.int64:
+            if pa.types.is_int64(dim.type):
                 sr.set_dim_points_int64(dim.name, coord)
                 return True
-            elif dim.dtype == "str" or dim.dtype == "bytes":
+            elif _util.pa_types_is_string_or_bytes(dim.type):
                 sr.set_dim_points_string_or_bytes(dim.name, coord)
                 return True
 
@@ -345,7 +348,7 @@ def used_shape(self) -> Tuple[Tuple[int, int], ...]:
         # In the unlikely event that a previous data update succeeded but the
         # subsequent metadata update did not, take the union of the core non-empty domain
         # (which is done as part of the data update) and the metadata bounding box.
-        ned = self.non_empty_domain()
+        ned = self.non_empty_domain() or ()
         for i, nedslot in enumerate(ned):
             ned_lower, ned_upper = nedslot
             bbox_lower, bbox_upper = retval[i]
diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py
index f05a333b40..c28743240b 100644
--- a/apis/python/src/tiledbsoma/_tdb_handles.py
+++ b/apis/python/src/tiledbsoma/_tdb_handles.py
@@ -12,9 +12,11 @@
 import enum
 from typing import (
     Any,
+    Callable,
     Dict,
     Generic,
     Iterator,
+    List,
     Mapping,
     MutableMapping,
     Optional,
@@ -25,15 +27,19 @@
 )
 
 import attrs
+import numpy as np
+import pyarrow as pa
 import tiledb
+from numpy.typing import DTypeLike
 from somacore import options
 from typing_extensions import Literal, Self
 
+from . import pytiledbsoma as clib
 from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error
 from ._types import OpenTimestamp
 from .options._soma_tiledb_context import SOMATileDBContext
 
-RawHandle = Union[tiledb.Array, tiledb.Group]
+RawHandle = Union[tiledb.Array, tiledb.Group, clib.SOMADataFrame]
 _RawHdl_co = TypeVar("_RawHdl_co", bound=RawHandle, covariant=True)
 """A raw TileDB object. Covariant because Handles are immutable enough."""
 
@@ -48,13 +54,35 @@ def open(
     obj_type = tiledb.object_type(uri, ctx=context.tiledb_ctx)
     if not obj_type:
         raise DoesNotExistError(f"{uri!r} does not exist")
-    if obj_type == "array":
-        return ArrayWrapper.open(uri, mode, context, timestamp)
-    if obj_type == "group":
-        return GroupWrapper.open(uri, mode, context, timestamp)
+
+    try:
+        return _open_with_clib_wrapper(uri, mode, context, timestamp)
+    except SOMAError:
+        # This object still uses tiledb-py and must be handled below
+        if obj_type == "array":
+            return ArrayWrapper.open(uri, mode, context, timestamp)
+        if obj_type == "group":
+            return GroupWrapper.open(uri, mode, context, timestamp)
+
+    # Invalid object
     raise SOMAError(f"{uri!r} has unknown storage type {obj_type!r}")
 
 
+def _open_with_clib_wrapper(
+    uri: str,
+    mode: options.OpenMode,
+    context: SOMATileDBContext,
+    timestamp: Optional[OpenTimestamp] = None,
+) -> "DataFrameWrapper":
+    open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
+    config = {k: str(v) for k, v in context.tiledb_config.items()}
+    timestamp_ms = context._open_timestamp_ms(timestamp)
+    obj = clib.SOMAObject.open(uri, open_mode, config, (0, timestamp_ms))
+    if obj.type == "SOMADataFrame":
+        return DataFrameWrapper._from_soma_object(obj, context)
+    raise SOMAError(f"clib.SOMAObject {obj.type!r} not yet supported")
+
+
 @attrs.define(eq=False, hash=False, slots=False)
 class Wrapper(Generic[_RawHdl_co], metaclass=abc.ABCMeta):
     """Wrapper for TileDB handles to manage lifecycle and metadata.
@@ -95,6 +123,26 @@ def open(
             raise
         return handle
 
+    @classmethod
+    def _from_soma_object(
+        cls, soma_object: clib.SOMAObject, context: SOMATileDBContext
+    ) -> Self:
+        uri = soma_object.uri
+        mode = soma_object.mode
+        timestamp = soma_object.timestamp
+        try:
+            handle = cls(uri, mode, context, timestamp, soma_object)
+            if handle.mode == "w":
+                with cls._opener(uri, mode, context, timestamp) as auxiliary_reader:
+                    handle._do_initial_reads(auxiliary_reader)
+            else:
+                handle._do_initial_reads(soma_object)
+        except tiledb.TileDBError as tdbe:
+            if is_does_not_exist_error(tdbe):
+                raise DoesNotExistError(f"{handle.uri!r} does not exist") from tdbe
+            raise
+        return handle
+
     @classmethod
     @abc.abstractmethod
     def _opener(
@@ -194,16 +242,30 @@ def _opener(
     def schema(self) -> tiledb.ArraySchema:
         return self._handle.schema
 
-    def non_empty_domain(self) -> Tuple[Tuple[int, int], ...]:
-        """
-        Retrieves the non-empty domain for each dimension, namely the smallest
-        and largest indices in each dimension for which the array/dataframe has
-        data occupied.  This is nominally the same as the domain used at
-        creation time, but if for example only a portion of the available domain
-        has actually had data written, this function will return a tighter
-        range.
-        """
-        return self._handle.nonempty_domain()  # type: ignore
+    def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]:
+        try:
+            return self._handle.nonempty_domain() or ()
+        except tiledb.TileDBError as e:
+            raise SOMAError(e)
+
+    @property
+    def domain(self) -> Tuple[Tuple[object, object], ...]:
+        dom = self._handle.schema.domain
+        return tuple(dom.dim(i).domain for i in range(dom.ndim))
+
+    @property
+    def ndim(self) -> int:
+        return int(self._handle.schema.domain.ndim)
+
+    @property
+    def attr_names(self) -> Tuple[str, ...]:
+        schema = self._handle.schema
+        return tuple(schema.attr(i).name for i in range(schema.nattr))
+
+    @property
+    def dim_names(self) -> Tuple[str, ...]:
+        schema = self._handle.schema
+        return tuple(schema.domain.dim(i).name for i in range(schema.domain.ndim))
 
     def enum(self, label: str) -> tiledb.Enumeration:
         return self._handle.enum(label)
@@ -247,6 +309,106 @@ def _do_initial_reads(self, reader: tiledb.Group) -> None:
         }
 
 
+class DataFrameWrapper(Wrapper[clib.SOMADataFrame]):
+    """Wrapper around a Pybind11 SOMADataFrame handle."""
+
+    @classmethod
+    def _opener(
+        cls,
+        uri: str,
+        mode: options.OpenMode,
+        context: SOMATileDBContext,
+        timestamp: int,
+    ) -> clib.SOMADataFrame:
+        open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
+        config = {k: str(v) for k, v in context.tiledb_config.items()}
+        column_names: List[str] = []
+        result_order = clib.ResultOrder.automatic
+        return clib.SOMADataFrame.open(
+            uri,
+            open_mode,
+            config,
+            column_names,
+            result_order,
+            (0, timestamp),
+        )
+
+    # Covariant types should normally not be in parameters, but this is for
+    # internal use only so it's OK.
+    def _do_initial_reads(self, reader: _RawHdl_co) -> None:  # type: ignore[misc]
+        """Final setup step before returning the Handle.
+
+        This is passed a raw TileDB object opened in read mode, since writers
+        will need to retrieve data from the backing store on setup.
+        """
+        # non–attrs-managed field
+        self.metadata = MetadataWrapper(self, dict(reader.meta))
+
+    @property
+    def schema(self) -> pa.Schema:
+        return self._handle.schema
+
+    @property
+    def meta(self) -> "MetadataWrapper":
+        return MetadataWrapper(self, dict(self._handle.meta))
+
+    @property
+    def ndim(self) -> int:
+        return len(self._handle.index_column_names)
+
+    @property
+    def count(self) -> int:
+        return int(self._handle.count)
+
+    def _cast_domain(
+        self, domain: Callable[[str, DTypeLike], Tuple[object, object]]
+    ) -> Tuple[Tuple[object, object], ...]:
+        result = []
+        for name in self._handle.index_column_names:
+            dtype = self._handle.schema.field(name).type
+            if pa.types.is_timestamp(dtype):
+                np_dtype = np.dtype(dtype.to_pandas_dtype())
+                dom = domain(name, np_dtype)
+                result.append(
+                    (
+                        np_dtype.type(dom[0], dtype.unit),
+                        np_dtype.type(dom[1], dtype.unit),
+                    )
+                )
+            else:
+                if pa.types.is_large_string(dtype) or pa.types.is_string(dtype):
+                    dtype = np.dtype("U")
+                elif pa.types.is_large_binary(dtype) or pa.types.is_binary(dtype):
+                    dtype = np.dtype("S")
+                else:
+                    dtype = np.dtype(dtype.to_pandas_dtype())
+                result.append(domain(name, dtype))
+        return tuple(result)
+
+    @property
+    def domain(self) -> Tuple[Tuple[object, object], ...]:
+        return self._cast_domain(self._handle.domain)
+
+    def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]:
+        return self._cast_domain(self._handle.non_empty_domain) or ()
+
+    @property
+    def attr_names(self) -> Tuple[str, ...]:
+        return tuple(
+            f.name for f in self.schema if f.name not in self._handle.index_column_names
+        )
+
+    @property
+    def dim_names(self) -> Tuple[str, ...]:
+        return tuple(self._handle.index_column_names)
+
+    def enum(self, label: str) -> tiledb.Enumeration:
+        # The DataFrame handle may either be ArrayWrapper or DataFrameWrapper.
+        # enum is only used in the DataFrame write path and is implemented by
+        # ArrayWrapper. If enum is called in the read path, it is an error.
+        raise NotImplementedError
+
+
 class _DictMod(enum.Enum):
     """State machine to keep track of modifications to a dictionary.
 
diff --git a/apis/python/src/tiledbsoma/_tiledb_array.py b/apis/python/src/tiledbsoma/_tiledb_array.py
index 15c589b05a..a937ab00a3 100644
--- a/apis/python/src/tiledbsoma/_tiledb_array.py
+++ b/apis/python/src/tiledbsoma/_tiledb_array.py
@@ -65,9 +65,13 @@ def schema(self) -> pa.Schema:
         Lifecycle:
             Experimental.
         """
-        return tiledb_schema_to_arrow(self._tiledb_array_schema(), self.uri, self._ctx)
+        if isinstance(self._tiledb_array_schema(), tiledb.ArraySchema):
+            return tiledb_schema_to_arrow(
+                self._tiledb_array_schema(), self.uri, self._ctx
+            )
+        return self._tiledb_array_schema()
 
-    def non_empty_domain(self) -> Tuple[Tuple[int, int], ...]:
+    def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]:
         """
         Retrieves the non-empty domain for each dimension, namely the smallest
         and largest indices in each dimension for which the array/dataframe has
@@ -88,19 +92,16 @@ def _tiledb_array_keys(self) -> Tuple[str, ...]:
 
     def _tiledb_dim_names(self) -> Tuple[str, ...]:
         """Reads the dimension names from the schema: for example, ['obs_id', 'var_id']."""
-        schema = self._handle.schema
-        return tuple(schema.domain.dim(i).name for i in range(schema.domain.ndim))
+        return self._handle.dim_names
 
     def _tiledb_attr_names(self) -> Tuple[str, ...]:
         """Reads the attribute names from the schema:
         for example, the list of column names in a dataframe.
         """
-        schema = self._handle.schema
-        return tuple(schema.attr(i).name for i in range(schema.nattr))
+        return self._handle.attr_names
 
     def _tiledb_domain(self) -> Tuple[Tuple[Any, Any], ...]:
-        schema = self._handle.schema
-        return tuple(schema.domain.dim(i).domain for i in range(0, schema.domain.ndim))
+        return self._handle.domain
 
     def _soma_reader(
         self,
@@ -147,14 +148,14 @@ def _set_reader_coords(self, sr: clib.SOMAArray, coords: Sequence[object]) -> No
                 f"coords type {type(coords)} must be a regular sequence,"
                 " not str or bytes"
             )
-        schema = self._handle.schema
-        if len(coords) > schema.domain.ndim:
+
+        if len(coords) > self._handle.ndim:
             raise ValueError(
                 f"coords ({len(coords)} elements) must be shorter than ndim"
-                f" ({schema.domain.ndim})"
+                f" ({self._handle.ndim})"
             )
         for i, coord in enumerate(coords):
-            dim = self._handle.schema.domain.dim(i)
+            dim = self.schema.field(i)
             if not self._set_reader_coord(sr, i, dim, coord):
                 raise TypeError(
                     f"coord type {type(coord)} for dimension {dim.name}"
@@ -162,7 +163,7 @@ def _set_reader_coords(self, sr: clib.SOMAArray, coords: Sequence[object]) -> No
                 )
 
     def _set_reader_coord(
-        self, sr: clib.SOMAArray, dim_idx: int, dim: tiledb.Dim, coord: object
+        self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: object
     ) -> bool:
         """Parses a single coordinate entry.
 
@@ -173,7 +174,6 @@ def _set_reader_coord(
         Returns:
             True if successful, False if unrecognized.
         """
-        del dim_idx  # Unused.
         if coord is None:
             return True  # No constraint; select all in this dimension
 
@@ -183,7 +183,8 @@ def _set_reader_coord(
         if isinstance(coord, slice):
             _util.validate_slice(coord)
             try:
-                lo_hi = _util.slice_to_numeric_range(coord, dim.domain)
+                dom = self._handle.domain[dim_idx]
+                lo_hi = _util.slice_to_numeric_range(coord, dom)
             except _util.NonNumericDimensionError:
                 return False  # We only handle numeric dimensions here.
             if lo_hi:
diff --git a/apis/python/src/tiledbsoma/_tiledb_object.py b/apis/python/src/tiledbsoma/_tiledb_object.py
index 06de6a8c53..ccc72453ae 100644
--- a/apis/python/src/tiledbsoma/_tiledb_object.py
+++ b/apis/python/src/tiledbsoma/_tiledb_object.py
@@ -5,7 +5,7 @@
 
 import datetime
 from contextlib import ExitStack
-from typing import Any, Generic, MutableMapping, Optional, Type, TypeVar
+from typing import Any, Generic, MutableMapping, Optional, Type, TypeVar, Union
 
 import somacore
 import tiledb
@@ -81,7 +81,12 @@ def open(
         """
         del platform_config  # unused
         context = _validate_soma_tiledb_context(context)
-        handle = cls._wrapper_type.open(uri, mode, context, tiledb_timestamp)
+        try:
+            handle = _tdb_handles._open_with_clib_wrapper(
+                uri, mode, context, tiledb_timestamp
+            )
+        except SOMAError:
+            handle = cls._wrapper_type.open(uri, mode, context, tiledb_timestamp)
         return cls(
             handle,
             _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code",
@@ -89,7 +94,8 @@ def open(
 
     def __init__(
         self,
-        handle: _WrapperType_co,
+        # TODO DataFrameWrapper should be _WrapperType_co
+        handle: Union[_WrapperType_co, _tdb_handles.DataFrameWrapper],
         *,
         _dont_call_this_use_create_or_open_instead: str = "unset",
     ):
@@ -121,6 +127,9 @@ def __init__(
         self._close_stack.enter_context(self._handle)
 
     _wrapper_type: Type[_WrapperType_co]
+    _reader_wrapper_type: Union[
+        Type[_WrapperType_co], Type[_tdb_handles.DataFrameWrapper]
+    ]
     """Class variable of the Wrapper class used to open this object type."""
 
     @property
diff --git a/apis/python/src/tiledbsoma/_util.py b/apis/python/src/tiledbsoma/_util.py
index b29e022d85..d2ea2db55e 100644
--- a/apis/python/src/tiledbsoma/_util.py
+++ b/apis/python/src/tiledbsoma/_util.py
@@ -10,9 +10,11 @@
 from itertools import zip_longest
 from typing import Any, Optional, Tuple, Type, TypeVar
 
+import pyarrow as pa
 import somacore
 from somacore import options
 
+from . import pytiledbsoma as clib
 from ._types import OpenTimestamp, Slice, is_slice_of
 
 
@@ -260,3 +262,25 @@ def ms_to_datetime(millis: int) -> datetime.datetime:
     secs, millis = divmod(millis, 1000)
     dt = datetime.datetime.fromtimestamp(secs, tz=datetime.timezone.utc)
     return dt.replace(microsecond=millis * 1000)
+
+
+def to_clib_result_order(result_order: options.ResultOrderStr) -> clib.ResultOrder:
+    result_order = options.ResultOrder(result_order)
+    to_clib_result_order = {
+        options.ResultOrder.AUTO: clib.ResultOrder.automatic,
+        options.ResultOrder.ROW_MAJOR: clib.ResultOrder.rowmajor,
+        options.ResultOrder.COLUMN_MAJOR: clib.ResultOrder.colmajor,
+    }
+    try:
+        return to_clib_result_order[result_order]
+    except KeyError as ke:
+        raise ValueError(f"Invalid result_order: {result_order}") from ke
+
+
+def pa_types_is_string_or_bytes(dtype: pa.DataType) -> bool:
+    return bool(
+        pa.types.is_large_string(dtype)
+        or pa.types.is_large_binary(dtype)
+        or pa.types.is_string(dtype)
+        or pa.types.is_binary(dtype)
+    )
diff --git a/apis/python/src/tiledbsoma/common.cc b/apis/python/src/tiledbsoma/common.cc
new file mode 100644
index 0000000000..e2d0b94e99
--- /dev/null
+++ b/apis/python/src/tiledbsoma/common.cc
@@ -0,0 +1,181 @@
+#include "common.h"
+
+namespace tiledbsoma {
+
+std::unordered_map<tiledb_datatype_t, std::string> _tdb_to_np_name_dtype = {
+    {TILEDB_INT32, "int32"},
+    {TILEDB_INT64, "int64"},
+    {TILEDB_FLOAT32, "float32"},
+    {TILEDB_FLOAT64, "float64"},
+    {TILEDB_INT8, "int8"},
+    {TILEDB_UINT8, "uint8"},
+    {TILEDB_INT16, "int16"},
+    {TILEDB_UINT16, "uint16"},
+    {TILEDB_UINT32, "uint32"},
+    {TILEDB_UINT64, "uint64"},
+    {TILEDB_STRING_ASCII, "S"},
+    {TILEDB_STRING_UTF8, "U1"},
+    {TILEDB_CHAR, "S1"},
+    {TILEDB_DATETIME_YEAR, "M8[Y]"},
+    {TILEDB_DATETIME_MONTH, "M8[M]"},
+    {TILEDB_DATETIME_WEEK, "M8[W]"},
+    {TILEDB_DATETIME_DAY, "M8[D]"},
+    {TILEDB_DATETIME_HR, "M8[h]"},
+    {TILEDB_DATETIME_MIN, "M8[m]"},
+    {TILEDB_DATETIME_SEC, "M8[s]"},
+    {TILEDB_DATETIME_MS, "M8[ms]"},
+    {TILEDB_DATETIME_US, "M8[us]"},
+    {TILEDB_DATETIME_NS, "M8[ns]"},
+    {TILEDB_DATETIME_PS, "M8[ps]"},
+    {TILEDB_DATETIME_FS, "M8[fs]"},
+    {TILEDB_DATETIME_AS, "M8[as]"},
+    {TILEDB_TIME_HR, "m8[h]"},
+    {TILEDB_TIME_MIN, "m8[m]"},
+    {TILEDB_TIME_SEC, "m8[s]"},
+    {TILEDB_TIME_MS, "m8[ms]"},
+    {TILEDB_TIME_US, "m8[us]"},
+    {TILEDB_TIME_NS, "m8[ns]"},
+    {TILEDB_TIME_PS, "m8[ps]"},
+    {TILEDB_TIME_FS, "m8[fs]"},
+    {TILEDB_TIME_AS, "m8[as]"},
+    {TILEDB_BLOB, "byte"},
+    {TILEDB_BOOL, "bool"},
+};
+
+std::unordered_map<std::string, tiledb_datatype_t> _np_name_to_tdb_dtype = {
+    {"int32", TILEDB_INT32},
+    {"int64", TILEDB_INT64},
+    {"float32", TILEDB_FLOAT32},
+    {"float64", TILEDB_FLOAT64},
+    {"int8", TILEDB_INT8},
+    {"uint8", TILEDB_UINT8},
+    {"int16", TILEDB_INT16},
+    {"uint16", TILEDB_UINT16},
+    {"uint32", TILEDB_UINT32},
+    {"uint64", TILEDB_UINT64},
+    {"datetime64[Y]", TILEDB_DATETIME_YEAR},
+    {"datetime64[M]", TILEDB_DATETIME_MONTH},
+    {"datetime64[W]", TILEDB_DATETIME_WEEK},
+    {"datetime64[D]", TILEDB_DATETIME_DAY},
+    {"datetime64[h]", TILEDB_DATETIME_HR},
+    {"datetime64[m]", TILEDB_DATETIME_MIN},
+    {"datetime64[s]", TILEDB_DATETIME_SEC},
+    {"datetime64[ms]", TILEDB_DATETIME_MS},
+    {"datetime64[us]", TILEDB_DATETIME_US},
+    {"datetime64[ns]", TILEDB_DATETIME_NS},
+    {"datetime64[ps]", TILEDB_DATETIME_PS},
+    {"datetime64[fs]", TILEDB_DATETIME_FS},
+    {"datetime64[as]", TILEDB_DATETIME_AS},
+    /* duration types map to timedelta */
+    {"timedelta64[h]", TILEDB_TIME_HR},
+    {"timedelta64[m]", TILEDB_TIME_MIN},
+    {"timedelta64[s]", TILEDB_TIME_SEC},
+    {"timedelta64[ms]", TILEDB_TIME_MS},
+    {"timedelta64[us]", TILEDB_TIME_US},
+    {"timedelta64[ns]", TILEDB_TIME_NS},
+    {"timedelta64[ps]", TILEDB_TIME_PS},
+    {"timedelta64[fs]", TILEDB_TIME_FS},
+    {"timedelta64[as]", TILEDB_TIME_AS},
+    {"bool", TILEDB_BOOL},
+};
+
+py::dtype tdb_to_np_dtype(tiledb_datatype_t type, uint32_t cell_val_num) {
+  if (type == TILEDB_CHAR || type == TILEDB_STRING_UTF8 ||
+      type == TILEDB_STRING_ASCII) {
+    std::string base_str = (type == TILEDB_STRING_UTF8) ? "|U" : "|S";
+    if (cell_val_num < TILEDB_VAR_NUM)
+      base_str += std::to_string(cell_val_num);
+    return py::dtype(base_str);
+  }
+
+  if (cell_val_num == 1) {
+    if (type == TILEDB_STRING_UTF16 || type == TILEDB_STRING_UTF32)
+      TPY_ERROR_LOC("Unimplemented UTF16 or UTF32 string conversion!");
+    if (type == TILEDB_STRING_UCS2 || type == TILEDB_STRING_UCS4)
+      TPY_ERROR_LOC("Unimplemented UCS2 or UCS4 string conversion!");
+
+    if (_tdb_to_np_name_dtype.count(type) == 1)
+      return py::dtype(_tdb_to_np_name_dtype[type]);
+  }
+
+  if (cell_val_num == 2) {
+    if (type == TILEDB_FLOAT32)
+      return py::dtype("complex64");
+    if (type == TILEDB_FLOAT64)
+      return py::dtype("complex128");
+  }
+
+  if (cell_val_num == TILEDB_VAR_NUM)
+    return tdb_to_np_dtype(type, 1);
+
+  if (cell_val_num > 1) {
+    py::dtype base_dtype = tdb_to_np_dtype(type, 1);
+    py::tuple rec_elem = py::make_tuple("", base_dtype);
+    py::list rec_list;
+    for (size_t i = 0; i < cell_val_num; i++)
+      rec_list.append(rec_elem);
+    // note: we call the 'dtype' constructor b/c py::dtype does not accept
+    // list
+    auto np = py::module::import("numpy");
+    auto np_dtype = np.attr("dtype");
+    return np_dtype(rec_list);
+  }
+
+  TPY_ERROR_LOC("tiledb datatype not understood ('" +
+        tiledb::impl::type_to_str(type) +
+        "', cell_val_num: " + std::to_string(cell_val_num) + ")");
+}
+
+tiledb_datatype_t np_to_tdb_dtype(py::dtype type) {
+  auto name = py::str(py::getattr(type, "name"));
+  if (_np_name_to_tdb_dtype.count(name) == 1)
+    return _np_name_to_tdb_dtype[name];
+
+  auto kind = py::str(py::getattr(type, "kind"));
+  if (kind == py::str("S"))
+    return TILEDB_STRING_ASCII;
+  if (kind == py::str("U"))
+    return TILEDB_STRING_UTF8;
+
+  TPY_ERROR_LOC("could not handle numpy dtype");
+}
+
+/**
+ * @brief Convert ArrayBuffers to Arrow table.
+ *
+ * @param cbs ArrayBuffers
+ * @return py::object
+ */
+py::object _buffer_to_table(std::shared_ptr<ArrayBuffers> buffers) {
+    auto pa = py::module::import("pyarrow");
+    auto pa_table_from_arrays = pa.attr("Table").attr("from_arrays");
+    auto pa_array_import = pa.attr("Array").attr("_import_from_c");
+    auto pa_schema_import = pa.attr("Schema").attr("_import_from_c");
+
+    py::list array_list;
+    py::list names;
+
+    for (auto& name : buffers->names()) {
+        auto column = buffers->at(name);
+        auto [pa_array, pa_schema] = ArrowAdapter::to_arrow(column);
+        auto array = pa_array_import(py::capsule(pa_array.get()), 
+                                     py::capsule(pa_schema.get()));
+        array_list.append(array);
+        names.append(name);
+    }
+
+    return pa_table_from_arrays(array_list, names);
+}
+
+std::optional<py::object> to_table(
+    std::optional<std::shared_ptr<ArrayBuffers>> buffers){
+    // If more data was read, convert it to an arrow table and return
+    if (buffers.has_value()) {
+        return _buffer_to_table(*buffers);
+    }
+
+    // No data was read, the query is complete, return nullopt
+    return std::nullopt;
+}
+
+}
\ No newline at end of file
diff --git a/apis/python/src/tiledbsoma/common.h b/apis/python/src/tiledbsoma/common.h
new file mode 100644
index 0000000000..42173acba4
--- /dev/null
+++ b/apis/python/src/tiledbsoma/common.h
@@ -0,0 +1,139 @@
+#include <exception>
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/pytypes.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+
+#include <tiledb/tiledb> // C++
+#include <tiledbsoma/tiledbsoma>
+
+using namespace std;
+using namespace tiledb;
+namespace py = pybind11;
+
+#define TPY_ERROR_LOC(m) throw TileDBSOMAPyError(m);
+
+class TileDBSOMAPyError : std::runtime_error {
+public:
+  explicit TileDBSOMAPyError(const char *m) : std::runtime_error(m) {}
+  explicit TileDBSOMAPyError(std::string m) : std::runtime_error(m.c_str()) {}
+
+public:
+  virtual const char *what() const noexcept override {
+    return std::runtime_error::what();
+  }
+};
+
+namespace tiledbsoma {
+
+py::dtype tdb_to_np_dtype(tiledb_datatype_t type, uint32_t cell_val_num);
+tiledb_datatype_t np_to_tdb_dtype(py::dtype type);
+std::optional<py::object> to_table(
+    std::optional<std::shared_ptr<ArrayBuffers>> buffers);
+  
+class PyQueryCondition {
+
+private:
+  Context ctx_;
+  shared_ptr<QueryCondition> qc_;
+
+public:
+  PyQueryCondition(){
+    try {
+      // create one global context for all query conditions
+      static Context context = Context();
+      ctx_ = context;
+      qc_ = shared_ptr<QueryCondition>(new QueryCondition(ctx_));
+    } catch (TileDBError &e) {
+      TPY_ERROR_LOC(e.what());
+    }
+  }
+
+  PyQueryCondition(py::object ctx) {
+    (void)ctx;
+    try {
+      // create one global context for all query conditions
+      static Context context = Context();
+      ctx_ = context;
+      qc_ = shared_ptr<QueryCondition>(new QueryCondition(ctx_));
+    } catch (TileDBError &e) {
+      TPY_ERROR_LOC(e.what());
+    }
+  }
+
+  void init(const string &attribute_name, const string &condition_value,
+            tiledb_query_condition_op_t op) {
+    try {
+      qc_->init(attribute_name, condition_value, op);
+    } catch (TileDBError &e) {
+      TPY_ERROR_LOC(e.what());
+    }
+  }
+
+  template <typename T>
+  void init(const string &attribute_name, T condition_value,
+            tiledb_query_condition_op_t op) {
+    try {
+      qc_->init(attribute_name, &condition_value, sizeof(condition_value), op);
+    } catch (TileDBError &e) {
+      TPY_ERROR_LOC(e.what());
+    }
+  }
+
+  shared_ptr<QueryCondition> ptr() { return qc_; }
+
+  py::capsule __capsule__() { return py::capsule(&qc_, "qc"); }
+
+  template <typename T>
+  static PyQueryCondition
+  create(const std::string &field_name,
+         const std::vector<T> &values, tiledb_query_condition_op_t op) {
+    auto pyqc = PyQueryCondition();
+
+    const Context ctx = std::as_const(pyqc.ctx_);
+
+    auto set_membership_qc =
+        QueryConditionExperimental::create(ctx, field_name, values, op);
+
+    pyqc.qc_ = std::make_shared<QueryCondition>(std::move(set_membership_qc));
+
+    return pyqc;
+  }
+
+  PyQueryCondition
+  combine(PyQueryCondition qc,
+          tiledb_query_condition_combination_op_t combination_op) const {
+
+    auto pyqc = PyQueryCondition(nullptr, ctx_.ptr().get());
+
+    tiledb_query_condition_t *combined_qc = nullptr;
+    ctx_.handle_error(
+        tiledb_query_condition_alloc(ctx_.ptr().get(), &combined_qc));
+
+    ctx_.handle_error(tiledb_query_condition_combine(
+        ctx_.ptr().get(), qc_->ptr().get(), qc.qc_->ptr().get(),
+        combination_op, &combined_qc));
+
+    pyqc.qc_ = std::shared_ptr<QueryCondition>(
+        new QueryCondition(pyqc.ctx_, combined_qc));
+
+    return pyqc;
+  }
+
+private:
+  PyQueryCondition(shared_ptr<QueryCondition> qc, tiledb_ctx_t *c_ctx)
+      : qc_(qc) {
+    ctx_ = Context(c_ctx, false);
+  }
+
+  void set_ctx(py::object ctx) {
+    tiledb_ctx_t *c_ctx;
+    if ((c_ctx = (py::capsule)ctx.attr("__capsule__")()) == nullptr)
+      TPY_ERROR_LOC("Invalid context pointer!")
+    
+    ctx_ = Context(c_ctx, false);
+  }
+};
+}
diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py
index b1abd5d298..67d5651fe3 100644
--- a/apis/python/src/tiledbsoma/io/ingest.py
+++ b/apis/python/src/tiledbsoma/io/ingest.py
@@ -1777,7 +1777,7 @@ def _write_matrix_to_denseNDArray(
 
 def _read_nonempty_domain(arr: TileDBArray) -> Any:
     try:
-        return arr._handle.reader.nonempty_domain()
+        return arr._handle.non_empty_domain()
     except SOMAError:
         # This means that we're open in write-only mode.
         # Reopen the array in read mode.
@@ -1785,7 +1785,7 @@ def _read_nonempty_domain(arr: TileDBArray) -> Any:
 
     cls = type(arr)
     with cls.open(arr.uri, "r", platform_config=None, context=arr.context) as readarr:
-        return readarr._handle.reader.nonempty_domain()
+        return readarr._handle.non_empty_domain()
 
 
 def _find_sparse_chunk_size(
@@ -2251,7 +2251,7 @@ def _coo_to_table(
 
 def _chunk_is_contained_in(
     chunk_bounds: Sequence[Tuple[int, int]],
-    storage_nonempty_domain: Optional[Sequence[Tuple[Optional[int], Optional[int]]]],
+    storage_nonempty_domain: Sequence[Tuple[Optional[int], Optional[int]]],
 ) -> bool:
     """
     Determines if a dim range is included within the array's non-empty domain.  Ranges are inclusive
@@ -2269,7 +2269,7 @@ def _chunk_is_contained_in(
     user that they declare they are retrying the exact same input file -- and we do our best to
     fulfill their ask by checking the dimension being strided on.
     """
-    if storage_nonempty_domain is None:
+    if len(storage_nonempty_domain) == 0:
         return False
 
     if len(chunk_bounds) != len(storage_nonempty_domain):
@@ -2288,6 +2288,9 @@ def _chunk_is_contained_in_axis(
     stride_axis: int,
 ) -> bool:
     """Helper function for ``_chunk_is_contained_in``."""
+    if len(storage_nonempty_domain) == 0:
+        return False
+
     storage_lo, storage_hi = storage_nonempty_domain[stride_axis]
     if storage_lo is None or storage_hi is None:
         # E.g. an array has had its schema created but no data written yet
diff --git a/apis/python/src/tiledbsoma/pytiledbsoma.cc b/apis/python/src/tiledbsoma/pytiledbsoma.cc
index 9d25d018aa..eee25aad4e 100644
--- a/apis/python/src/tiledbsoma/pytiledbsoma.cc
+++ b/apis/python/src/tiledbsoma/pytiledbsoma.cc
@@ -1,231 +1,49 @@
-/**
- * @file   pytiledbsoma.cc
- *
- * @section LICENSE
- *
- * The MIT License
- *
- * @copyright Copyright (c) 2022 TileDB, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * @section DESCRIPTION
- *
- * This file defines the a pybind11 api into SOMA C++ library.
- */
+#include <tiledbsoma/tiledbsoma>
+#include <tiledbsoma/reindexer/reindexer.h>
 
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/pytypes.h>
 #include <pybind11/stl.h>
-#include <pybind11/stl_bind.h>
 
-#include <tiledbsoma/tiledbsoma>
-#include <tiledbsoma/reindexer/reindexer.h>
+#include "common.h"
 
-#include "query_condition.cc"
-
-#define DENUM(x) .value(#x, TILEDB_##x)
-
-using namespace tiledbsoma;
+namespace libtiledbsomacpp {
 
 namespace py = pybind11;
 using namespace py::literals;
+using namespace tiledbsoma;
 
-namespace tiledbsoma {
-
-std::unordered_map<tiledb_datatype_t, std::string> _tdb_to_np_name_dtype = {
-    {TILEDB_INT32, "int32"},
-    {TILEDB_INT64, "int64"},
-    {TILEDB_FLOAT32, "float32"},
-    {TILEDB_FLOAT64, "float64"},
-    {TILEDB_INT8, "int8"},
-    {TILEDB_UINT8, "uint8"},
-    {TILEDB_INT16, "int16"},
-    {TILEDB_UINT16, "uint16"},
-    {TILEDB_UINT32, "uint32"},
-    {TILEDB_UINT64, "uint64"},
-    {TILEDB_STRING_ASCII, "S"},
-    {TILEDB_STRING_UTF8, "U1"},
-    {TILEDB_CHAR, "S1"},
-    {TILEDB_DATETIME_YEAR, "M8[Y]"},
-    {TILEDB_DATETIME_MONTH, "M8[M]"},
-    {TILEDB_DATETIME_WEEK, "M8[W]"},
-    {TILEDB_DATETIME_DAY, "M8[D]"},
-    {TILEDB_DATETIME_HR, "M8[h]"},
-    {TILEDB_DATETIME_MIN, "M8[m]"},
-    {TILEDB_DATETIME_SEC, "M8[s]"},
-    {TILEDB_DATETIME_MS, "M8[ms]"},
-    {TILEDB_DATETIME_US, "M8[us]"},
-    {TILEDB_DATETIME_NS, "M8[ns]"},
-    {TILEDB_DATETIME_PS, "M8[ps]"},
-    {TILEDB_DATETIME_FS, "M8[fs]"},
-    {TILEDB_DATETIME_AS, "M8[as]"},
-    {TILEDB_TIME_HR, "m8[h]"},
-    {TILEDB_TIME_MIN, "m8[m]"},
-    {TILEDB_TIME_SEC, "m8[s]"},
-    {TILEDB_TIME_MS, "m8[ms]"},
-    {TILEDB_TIME_US, "m8[us]"},
-    {TILEDB_TIME_NS, "m8[ns]"},
-    {TILEDB_TIME_PS, "m8[ps]"},
-    {TILEDB_TIME_FS, "m8[fs]"},
-    {TILEDB_TIME_AS, "m8[as]"},
-    {TILEDB_BLOB, "byte"},
-    {TILEDB_BOOL, "bool"},
-};
-
-py::dtype tdb_to_np_dtype(tiledb_datatype_t type, uint32_t cell_val_num) {
-  if (type == TILEDB_CHAR || type == TILEDB_STRING_UTF8 ||
-      type == TILEDB_STRING_ASCII) {
-    std::string base_str = (type == TILEDB_STRING_UTF8) ? "|U" : "|S";
-    if (cell_val_num < TILEDB_VAR_NUM)
-      base_str += std::to_string(cell_val_num);
-    return py::dtype(base_str);
-  }
-
-  if (cell_val_num == 1) {
-    if (type == TILEDB_STRING_UTF16 || type == TILEDB_STRING_UTF32)
-      TileDBSOMAError("Unimplemented UTF16 or UTF32 string conversion!");
-    if (type == TILEDB_STRING_UCS2 || type == TILEDB_STRING_UCS4)
-      TileDBSOMAError("Unimplemented UCS2 or UCS4 string conversion!");
-
-    if (_tdb_to_np_name_dtype.count(type) == 1)
-      return py::dtype(_tdb_to_np_name_dtype[type]);
-  }
-
-  if (cell_val_num == 2) {
-    if (type == TILEDB_FLOAT32)
-      return py::dtype("complex64");
-    if (type == TILEDB_FLOAT64)
-      return py::dtype("complex128");
-  }
-
-  if (cell_val_num == TILEDB_VAR_NUM)
-    return tdb_to_np_dtype(type, 1);
-
-  if (cell_val_num > 1) {
-    py::dtype base_dtype = tdb_to_np_dtype(type, 1);
-    py::tuple rec_elem = py::make_tuple("", base_dtype);
-    py::list rec_list;
-    for (size_t i = 0; i < cell_val_num; i++)
-      rec_list.append(rec_elem);
-    // note: we call the 'dtype' constructor b/c py::dtype does not accept
-    // list
-    auto np = py::module::import("numpy");
-    auto np_dtype = np.attr("dtype");
-    return np_dtype(rec_list);
-  }
-
-  TileDBSOMAError("tiledb datatype not understood ('" +
-                tiledb::impl::type_to_str(type) +
-                "', cell_val_num: " + std::to_string(cell_val_num) + ")");
-}
-
-py::tuple get_enum(SOMAArray& sr, std::string attr_name){
-    auto attr_to_enmrs = sr.get_attr_to_enum_mapping();
-    if(attr_to_enmrs.count(attr_name) == 0)
-        throw TileDBSOMAError("Given attribute does not have enumeration");
-
-    Enumeration enmr(attr_to_enmrs.at(attr_name));
-
-    switch (enmr.type()) {
-        case TILEDB_UINT8:
-            return py::tuple(py::cast(enmr.as_vector<uint8_t>()));
-        case TILEDB_INT8:
-            return py::tuple(py::cast(enmr.as_vector<int8_t>()));
-        case TILEDB_UINT16:
-            return py::tuple(py::cast(enmr.as_vector<uint16_t>()));
-        case TILEDB_INT16:
-            return py::tuple(py::cast(enmr.as_vector<int16_t>()));
-        case TILEDB_UINT32:
-            return py::tuple(py::cast(enmr.as_vector<uint32_t>()));
-        case TILEDB_INT32:
-            return py::tuple(py::cast(enmr.as_vector<int32_t>()));
-        case TILEDB_UINT64:
-            return py::tuple(py::cast(enmr.as_vector<uint64_t>()));
-        case TILEDB_INT64:
-            return py::tuple(py::cast(enmr.as_vector<int64_t>()));
-        case TILEDB_FLOAT32:
-            return py::tuple(py::cast(enmr.as_vector<float>()));
-        case TILEDB_FLOAT64:
-            return py::tuple(py::cast(enmr.as_vector<double>()));
-        case TILEDB_STRING_ASCII:
-        case TILEDB_STRING_UTF8:
-        case TILEDB_CHAR:
-            return py::tuple(py::cast(enmr.as_vector<std::string>()));
-        case TILEDB_BOOL:
-            return py::tuple(py::cast(enmr.as_vector<bool>()));
-        default:
-            throw TileDBSOMAError("Unsupported enumeration type.");
-    }
-}
-
-bool get_enum_is_ordered(SOMAArray& sr, std::string attr_name){
-    auto attr_to_enmrs = sr.get_attr_to_enum_mapping();
-    if(attr_to_enmrs.count(attr_name) == 0)
-        throw TileDBSOMAError("Given attribute does not have enumeration");
-    return attr_to_enmrs.at(attr_name).ordered();
-}
-
-/**
- * @brief Convert ArrayBuffers to Arrow table.
- *
- * @param cbs ArrayBuffers
- * @return py::object
- */
-py::object _buffer_to_table(std::shared_ptr<ArrayBuffers> buffers) {
-    auto pa = py::module::import("pyarrow");
-    auto pa_table_from_arrays = pa.attr("Table").attr("from_arrays");
-    auto pa_array_import = pa.attr("Array").attr("_import_from_c");
-    auto pa_schema_import = pa.attr("Schema").attr("_import_from_c");
-
-    py::list array_list;
-    py::list names;
-
-    for (auto& name : buffers->names()) {
-        auto column = buffers->at(name);
-        auto [pa_array, pa_schema] = ArrowAdapter::to_arrow(column);
-        auto array = pa_array_import(py::capsule(pa_array.get()), 
-                                     py::capsule(pa_schema.get()));
-        array_list.append(array);
-        names.append(name);
-    }
-
-    return pa_table_from_arrays(array_list, names);
-}
-
-std::optional<py::object> to_table(
-    std::optional<std::shared_ptr<ArrayBuffers>> buffers){
-    // If more data was read, convert it to an arrow table and return
-    if (buffers.has_value()) {
-        return _buffer_to_table(*buffers);
-    }
+template <typename... Args>
+using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
 
-    // No data was read, the query is complete, return nullopt
-    return std::nullopt;
-}
+void load_soma_array(py::module &);
+void load_soma_object(py::module &);
+void load_soma_dataframe(py::module &);
+void load_query_condition(py::module &);
 
-/**
- * @brief pybind11 bindings
- *
- */
 PYBIND11_MODULE(pytiledbsoma, m) {
+    py::register_exception<TileDBSOMAError>(m, "SOMAError");
+
+    /* We need to make sure C++ TileDBSOMAError is translated to a correctly-typed 
+    * Python error
+    */
+    py::register_exception_translator([](std::exception_ptr p) {
+    auto tiledb_soma_error =
+        (py::object)py::module::import("tiledbsoma").attr("SOMAError");
+
+    try {
+        if (p)
+        std::rethrow_exception(p);
+    } catch (const TileDBSOMAError &e) {
+        PyErr_SetString(tiledb_soma_error.ptr(), e.what());
+    } catch (const TileDBSOMAPyError &e) {
+        PyErr_SetString(tiledb_soma_error.ptr(), e.what());
+    } catch (py::builtin_exception &e) {
+        throw;
+    };
+    });
+
     py::enum_<OpenMode>(m, "OpenMode")
         .value("read", OpenMode::read)
         .value("write", OpenMode::write);
@@ -235,8 +53,6 @@ PYBIND11_MODULE(pytiledbsoma, m) {
         .value("rowmajor", ResultOrder::rowmajor)
         .value("colmajor", ResultOrder::colmajor);
 
-    tiledbpy::load_query_condition(m);
-
     m.doc() = "SOMA acceleration library";
 
     m.def("version", []() { return tiledbsoma::version::as_string(); });
@@ -273,427 +89,6 @@ PYBIND11_MODULE(pytiledbsoma, m) {
         },
         "Print TileDB internal statistics. Lifecycle: experimental.");
 
-    py::class_<SOMAArray>(m, "SOMAArray")
-        .def(
-            py::init(
-                [](std::string_view uri,
-                   std::string_view name,
-                   std::optional<std::vector<std::string>> column_names_in,
-                   std::string_view batch_size,
-                   ResultOrder result_order,
-                   std::map<std::string, std::string> platform_config,
-                   std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
-                    // Handle optional args
-                    std::vector<std::string> column_names;
-                    if (column_names_in) {
-                        column_names = *column_names_in;
-                    }
-
-                    return SOMAArray::open(
-                        OpenMode::read,
-                        uri,
-                        name,
-                        platform_config,
-                        column_names,
-                        batch_size,
-                        result_order,
-                        timestamp);
-                }),
-            "uri"_a,
-            py::kw_only(),
-            "name"_a = "unnamed",
-            "column_names"_a = py::none(),
-            "batch_size"_a = "auto",
-            "result_order"_a = ResultOrder::automatic,
-            "platform_config"_a = py::dict(),
-            "timestamp"_a = py::none())
-
-        .def(
-            "set_condition", 
-            [](SOMAArray& reader, 
-               py::object py_query_condition,
-               py::object py_schema){
-                   auto attr_to_enum = reader.get_attr_to_enum_mapping();
-                   std::map<std::string, py::dtype> enum_to_dtype;
-                   for(auto const& [attr, enmr] : attr_to_enum){
-                        enum_to_dtype[attr] = tdb_to_np_dtype(
-                            enmr.type(), enmr.cell_val_num());
-                   }   
-                   auto column_names = reader.column_names();
-                   // Handle query condition based on
-                   // TileDB-Py::PyQuery::set_attr_cond()
-                   QueryCondition* qc = nullptr;
-                   if (!py_query_condition.is(py::none())) {
-                       py::object init_pyqc = py_query_condition.attr(
-                           "init_query_condition");   
-                       try {
-                           // Column names will be updated with columns present
-                           // in the query condition
-                           auto new_column_names =
-                               init_pyqc(py_schema, enum_to_dtype, column_names)
-                                   .cast<std::vector<std::string>>();   
-                           // Update the column_names list if it was not empty,
-                           // otherwise continue selecting all columns with an
-                           // empty column_names list
-                           if (!column_names.empty()) {
-                               column_names = new_column_names;
-                           }
-                       } catch (const std::exception& e) {
-                           throw TileDBSOMAError(e.what());
-                       }   
-                       qc = py_query_condition.attr("c_obj")
-                                   .cast<tiledbpy::PyQueryCondition>()
-                                   .ptr()
-                                   .get();
-                   }   
-                   reader.reset(column_names);
-
-                    // Release python GIL after we're done accessing python
-                   // objects
-                   py::gil_scoped_release release;   
-                   // Set query condition if present
-                   if (qc) {
-                       reader.set_condition(*qc);
-                   }
-                }, 
-            "py_query_condition"_a,
-            "py_schema"_a)
-
-        .def(
-            "reset",
-            [](SOMAArray& reader,
-               std::optional<std::vector<std::string>> column_names_in,
-               std::string_view batch_size,
-               ResultOrder result_order) {
-                // Handle optional args
-                std::vector<std::string> column_names;
-                if (column_names_in) {
-                    column_names = *column_names_in;
-                }
-
-                // Reset state of the existing SOMAArray object
-                reader.reset(column_names, batch_size, result_order);
-            },
-            py::kw_only(),
-            "column_names"_a = py::none(),
-            "batch_size"_a = "auto",
-            "result_order"_a = ResultOrder::automatic)
-
-        // After this are short functions expected to be invoked when the coords
-        // are Python list/tuple, or NumPy arrays.  Arrow arrays are in this
-        // long if-else-if function.
-        .def(
-            "set_dim_points_arrow",
-            [](SOMAArray& reader,
-               const std::string& dim,
-               py::object py_arrow_array,
-               int partition_index,
-               int partition_count) {
-                // Create a list of array chunks
-                py::list array_chunks;
-                if (py::hasattr(py_arrow_array, "chunks")) {
-                    array_chunks = py_arrow_array.attr("chunks")
-                                       .cast<py::list>();
-                } else {
-                    array_chunks.append(py_arrow_array);
-                }
-
-                for (const pybind11::handle array : array_chunks) {
-                    ArrowSchema arrow_schema;
-                    ArrowArray arrow_array;
-                    uintptr_t arrow_schema_ptr = (uintptr_t)(&arrow_schema);
-                    uintptr_t arrow_array_ptr = (uintptr_t)(&arrow_array);
-
-                    // Call array._export_to_c to get arrow array and schema
-                    //
-                    // If ever a NumPy array gets in here, there will be an
-                    // exception like "AttributeError: 'numpy.ndarray' object
-                    // has no attribute '_export_to_c'".
-                    array.attr("_export_to_c")(
-                        arrow_array_ptr, arrow_schema_ptr);
-
-                    auto coords = array.attr("tolist")();
-
-                    if (!strcmp(arrow_schema.format, "l")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<int64_t>>());
-                    } else if (!strcmp(arrow_schema.format, "i")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<int32_t>>());
-                    } else if (!strcmp(arrow_schema.format, "s")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<int16_t>>());
-                    } else if (!strcmp(arrow_schema.format, "c")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<int8_t>>());
-                    } else if (!strcmp(arrow_schema.format, "L")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<uint64_t>>());
-                    } else if (!strcmp(arrow_schema.format, "I")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<uint32_t>>());
-                    } else if (!strcmp(arrow_schema.format, "S")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<uint16_t>>());
-                    } else if (!strcmp(arrow_schema.format, "C")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<uint8_t>>());
-                    } else if (!strcmp(arrow_schema.format, "f")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<float>>());
-                    } else if (!strcmp(arrow_schema.format, "g")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<double>>());
-                    } else if (
-                        !strcmp(arrow_schema.format, "u") ||
-                        !strcmp(arrow_schema.format, "z")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<std::string>>());
-                    } else if (
-                        !strcmp(arrow_schema.format, "tss:") ||
-                        !strcmp(arrow_schema.format, "tsm:") ||
-                        !strcmp(arrow_schema.format, "tsu:") ||
-                        !strcmp(arrow_schema.format, "tsn:")) {
-                        // convert the Arrow Array to int64
-                        auto pa = py::module::import("pyarrow");
-                        coords = array.attr("cast")(pa.attr("int64")()).attr("tolist")();
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<int64_t>>());
-                    } else if (
-                        !strcmp(arrow_schema.format, "U") ||
-                        !strcmp(arrow_schema.format, "Z")) {
-                        reader.set_dim_points(
-                            dim, coords.cast<std::vector<std::string>>());
-                    } else {
-                        throw TileDBSOMAError(
-                            "[pytiledbsoma] set_dim_points: type=" + std::string(arrow_schema.format) + " not "
-                            "supported");
-                    }
-
-                    // Release arrow schema
-                    arrow_schema.release(&arrow_schema);
-                }
-            },
-            "dim"_a,
-            "py_arrow_array"_a,
-            "partition_index"_a = 0,
-            "partition_count"_a = 1)
-
-        // The following short functions are expected to be invoked when the
-        // coords are Python list/tuple, or NumPy arrays.  Arrow arrays are in
-        // the long if-else-if function above.
-        //
-        // Binding overloaded methods to templated member functions requires
-        // more effort, see:
-        // https://pybind11.readthedocs.io/en/stable/classes.html#overloaded-methods
-
-        // In an initial version of this file we had `set_dim_ranges` relying
-        // solely on type-overloading. This worked since we supported only int
-        // and string indices. In a subsequent version we are now supporting
-        // various NumPy/PyArrow types including float32, float64, int8, uint16,
-        // etc. It is an unfortunate fact that pybind11 does _not_ successfully
-        // disambiguate between float32 and float64, or between int8 and int64,
-        // etc. given that we ask it to disambiguate using not just types but
-        // std::vector of types or std::vector of std::pair of types.
-        // Experiments have shown that when both float32 and float64 are
-        // implemented with overloaded names to be differentiated solely by
-        // type, pybind11 uses the _first found_. Therefore it is necessary for
-        // us to no longer use common overloaded names.
-
-        .def(
-            "set_dim_points_string_or_bytes",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<std::string>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_float64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<double>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_float32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<float>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_int64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<int64_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_int32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<int32_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_int16",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<int16_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_int8",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<int8_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_uint64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<uint64_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_uint32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<uint32_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_uint16",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<uint16_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        .def(
-            "set_dim_points_uint8",
-            static_cast<void (SOMAArray::*)(
-                const std::string&, const std::vector<uint8_t>&)>(
-                &SOMAArray::set_dim_points))
-
-        // In an initial version of this file we had `set_dim_ranges` relying
-        // solely on type-overloading. This worked since we supported only int
-        // and string indices. In a subsequent version we are now supporting
-        // various NumPy/PyArrow types including float32, float64, int8, uint16,
-        // etc. It is an unfortunate fact that pybind11 does _not_ successfully
-        // disambiguate between float32 and float64, or between int8 and int64,
-        // etc. given that we ask it to disambiguate using not just types but
-        // std::vector of types or std::vector of std::pair of types.
-        // Experiments have shown that when both float32 and float64 are
-        // implemented with overloaded names to be differentiated solely by
-        // type, pybind11 uses the _first found_. Therefore it is necessary for
-        // us to no longer use common overloaded names.
-
-        .def(
-            "set_dim_ranges_string_or_bytes",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<std::string, std::string>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_int64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<int64_t, int64_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_int32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<int32_t, int32_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_int16",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<int16_t, int16_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_int8",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<int8_t, int8_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_uint64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<uint64_t, uint64_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_uint32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<uint32_t, uint32_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_uint16",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<uint16_t, uint16_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_uint8",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<uint8_t, uint8_t>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_float64",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<double, double>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def(
-            "set_dim_ranges_float32",
-            static_cast<void (SOMAArray::*)(
-                const std::string&,
-                const std::vector<std::pair<float, float>>&)>(
-                &SOMAArray::set_dim_ranges))
-
-        .def("results_complete", &SOMAArray::results_complete)
-
-        .def(
-            "read_next",
-            [](SOMAArray& reader) -> std::optional<py::object> {
-                // Release python GIL before reading data
-                py::gil_scoped_release release;
-
-                // Try to read more data
-                auto buffers = reader.read_next();
-
-                // If more data was read, convert it to an arrow table and
-                // return
-                if (buffers.has_value()) {
-                    // Acquire python GIL before accessing python objects
-                    py::gil_scoped_acquire acquire;
-                    return to_table(*buffers);
-                }
-
-                // No data was read, the query is complete, return nullopt
-                return std::nullopt;
-            })
-
-        .def("nnz", &SOMAArray::nnz, py::call_guard<py::gil_scoped_release>())
-
-        .def_property_readonly("shape", &SOMAArray::shape)
-
-        .def_property_readonly("uri", &SOMAArray::uri)
-
-        .def_property_readonly("column_names", &SOMAArray::column_names)
-
-        .def_property_readonly("result_order", &SOMAArray::result_order)
-
-        .def("get_enum", get_enum)
-
-        .def("get_enum_is_ordered", get_enum_is_ordered)
-
-        .def("get_enum_label_on_attr", &SOMAArray::get_enum_label_on_attr);
     // Efficient C++ re-indexing (aka hashing unique key values to an index
     // between 0 and number of keys - 1) based on khash
     py::class_<IntIndexer>(m, "IntIndexer")
@@ -702,8 +97,8 @@ PYBIND11_MODULE(pytiledbsoma, m) {
         .def(
             "map_locations",
             [](IntIndexer& indexer,
-               py::array_t<int64_t> keys,
-               int num_threads) {
+                py::array_t<int64_t> keys,
+                int num_threads) {
                 auto buffer = keys.request();
                 int64_t* data = static_cast<int64_t*>(buffer.ptr);
                 size_t length = buffer.shape[0];
@@ -712,8 +107,8 @@ PYBIND11_MODULE(pytiledbsoma, m) {
         .def(
             "map_locations",
             [](IntIndexer& indexer,
-               std::vector<int64_t> keys,
-               int num_threads) {
+                std::vector<int64_t> keys,
+                int num_threads) {
                 indexer.map_locations(keys.data(), keys.size(), num_threads);
             })
         // Perform lookup for a large input array of keys and return the looked
@@ -740,8 +135,8 @@ PYBIND11_MODULE(pytiledbsoma, m) {
         .def(
             "get_indexer",
             [](IntIndexer& indexer,
-               py::array_t<int64_t> lookups,
-               py::array_t<int64_t>& results) {
+                py::array_t<int64_t> lookups,
+                py::array_t<int64_t>& results) {
                 auto input_buffer = lookups.request();
                 int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
                 size_t size = input_buffer.shape[0];
@@ -752,5 +147,11 @@ PYBIND11_MODULE(pytiledbsoma, m) {
                 size_t results_size = input_buffer.shape[0];
                 indexer.lookup(input_ptr, input_ptr, size);
             });
+
+    load_soma_array(m);
+    load_soma_object(m);
+    load_soma_dataframe(m);
+    load_query_condition(m);
 }
-}  // namespace tiledbsoma
+
+}; 
diff --git a/apis/python/src/tiledbsoma/query_condition.cc b/apis/python/src/tiledbsoma/query_condition.cc
index c8f1134c6a..2cf021c435 100644
--- a/apis/python/src/tiledbsoma/query_condition.cc
+++ b/apis/python/src/tiledbsoma/query_condition.cc
@@ -30,137 +30,20 @@
  *   This file implements the TileDB-Py query condition.
  */
 
-// clang-format off
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
-
-#include <exception>
-
-// #define TILEDB_DEPRECATED
-// #define TILEDB_DEPRECATED_EXPORT
-
-// #include "util.h"
-#include <tiledb/tiledb> // C++
-#include <tiledbsoma/tiledbsoma>
+#include "common.h"
 
 #define TPY_ERROR_LOC(m) throw tiledbsoma::TileDBSOMAError(m);
 
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2
-
 #if !defined(NDEBUG)
-//#include "debug.cc"
 #endif
-
-namespace tiledbpy {
+namespace libtiledbsomacpp {
 
 using namespace std;
 using namespace tiledb;
 namespace py = pybind11;
-using namespace pybind11::literals;
-
-class PyQueryCondition {
-
-private:
-  Context ctx_;
-  shared_ptr<QueryCondition> qc_;
-
-public:
-  PyQueryCondition(){
-    try {
-      // create one global context for all query conditions
-      static Context context = Context();
-      ctx_ = context;
-      qc_ = shared_ptr<QueryCondition>(new QueryCondition(ctx_));
-    } catch (TileDBError &e) {
-      TPY_ERROR_LOC(e.what());
-    }
-  }
-
-  PyQueryCondition(py::object ctx) {
-    (void)ctx;
-    try {
-      // create one global context for all query conditions
-      static Context context = Context();
-      ctx_ = context;
-      qc_ = shared_ptr<QueryCondition>(new QueryCondition(ctx_));
-    } catch (TileDBError &e) {
-      TPY_ERROR_LOC(e.what());
-    }
-  }
-
-  void init(const string &attribute_name, const string &condition_value,
-            tiledb_query_condition_op_t op) {
-    try {
-      qc_->init(attribute_name, condition_value, op);
-    } catch (TileDBError &e) {
-      TPY_ERROR_LOC(e.what());
-    }
-  }
-
-  template <typename T>
-  void init(const string &attribute_name, T condition_value,
-            tiledb_query_condition_op_t op) {
-    try {
-      qc_->init(attribute_name, &condition_value, sizeof(condition_value), op);
-    } catch (TileDBError &e) {
-      TPY_ERROR_LOC(e.what());
-    }
-  }
-
-  shared_ptr<QueryCondition> ptr() { return qc_; }
-
-  py::capsule __capsule__() { return py::capsule(&qc_, "qc"); }
-
-  template <typename T>
-  static PyQueryCondition
-  create(const std::string &field_name,
-         const std::vector<T> &values, tiledb_query_condition_op_t op) {
-    auto pyqc = PyQueryCondition();
-
-    const Context ctx = std::as_const(pyqc.ctx_);
-
-    auto set_membership_qc =
-        QueryConditionExperimental::create(ctx, field_name, values, op);
-
-    pyqc.qc_ = std::make_shared<QueryCondition>(std::move(set_membership_qc));
-
-    return pyqc;
-  }
-
-  PyQueryCondition
-  combine(PyQueryCondition qc,
-          tiledb_query_condition_combination_op_t combination_op) const {
-
-    auto pyqc = PyQueryCondition(nullptr, ctx_.ptr().get());
-
-    tiledb_query_condition_t *combined_qc = nullptr;
-    ctx_.handle_error(
-        tiledb_query_condition_alloc(ctx_.ptr().get(), &combined_qc));
-
-    ctx_.handle_error(tiledb_query_condition_combine(
-        ctx_.ptr().get(), qc_->ptr().get(), qc.qc_->ptr().get(),
-        combination_op, &combined_qc));
-
-    pyqc.qc_ = std::shared_ptr<QueryCondition>(
-        new QueryCondition(pyqc.ctx_, combined_qc));
-
-    return pyqc;
-  }
-
-private:
-  PyQueryCondition(shared_ptr<QueryCondition> qc, tiledb_ctx_t *c_ctx)
-      : qc_(qc) {
-    ctx_ = Context(c_ctx, false);
-  }
-
-  void set_ctx(py::object ctx) {
-    tiledb_ctx_t *c_ctx;
-    if ((c_ctx = (py::capsule)ctx.attr("__capsule__")()) == nullptr)
-      TPY_ERROR_LOC("Invalid context pointer!")
-    
-    ctx_ = Context(c_ctx, false);
-  }
-}; // namespace tiledbpy
+using namespace py::literals;
+using namespace tiledbsoma;
 
 void load_query_condition(py::module &m) {
   py::class_<PyQueryCondition>(m, "PyQueryCondition", py::module_local())
@@ -299,7 +182,6 @@ void load_query_condition(py::module &m) {
       .value("TILEDB_AND", TILEDB_AND)
       .value("TILEDB_OR", TILEDB_OR)
       .export_values();
-}
-}; // namespace tiledbpy
+}}
 
 #endif
diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc
new file mode 100644
index 0000000000..2434c0b4ae
--- /dev/null
+++ b/apis/python/src/tiledbsoma/soma_array.cc
@@ -0,0 +1,506 @@
+/**
+ * @file   soma_array.cc
+ *
+ * @section LICENSE
+ *
+ * The MIT License
+ *
+ * @copyright Copyright (c) 2022 TileDB, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * @section DESCRIPTION
+ *
+ * This file defines the SOMAArray bindings.
+ */
+
+#include "common.h"
+
+#define DENUM(x) .value(#x, TILEDB_##x)
+namespace libtiledbsomacpp {
+
+namespace py = pybind11;
+using namespace py::literals;
+using namespace tiledbsoma;
+
+py::tuple get_enum(SOMAArray& sr, std::string attr_name){
+    auto attr_to_enmrs = sr.get_attr_to_enum_mapping();
+    if(attr_to_enmrs.count(attr_name) == 0)
+        TPY_ERROR_LOC("Given attribute does not have enumeration");
+
+    Enumeration enmr(attr_to_enmrs.at(attr_name));
+
+    switch (enmr.type()) {
+        case TILEDB_UINT8:
+            return py::tuple(py::cast(enmr.as_vector<uint8_t>()));
+        case TILEDB_INT8:
+            return py::tuple(py::cast(enmr.as_vector<int8_t>()));
+        case TILEDB_UINT16:
+            return py::tuple(py::cast(enmr.as_vector<uint16_t>()));
+        case TILEDB_INT16:
+            return py::tuple(py::cast(enmr.as_vector<int16_t>()));
+        case TILEDB_UINT32:
+            return py::tuple(py::cast(enmr.as_vector<uint32_t>()));
+        case TILEDB_INT32:
+            return py::tuple(py::cast(enmr.as_vector<int32_t>()));
+        case TILEDB_UINT64:
+            return py::tuple(py::cast(enmr.as_vector<uint64_t>()));
+        case TILEDB_INT64:
+            return py::tuple(py::cast(enmr.as_vector<int64_t>()));
+        case TILEDB_FLOAT32:
+            return py::tuple(py::cast(enmr.as_vector<float>()));
+        case TILEDB_FLOAT64:
+            return py::tuple(py::cast(enmr.as_vector<double>()));
+        case TILEDB_STRING_ASCII:
+        case TILEDB_STRING_UTF8:
+        case TILEDB_CHAR:
+            return py::tuple(py::cast(enmr.as_vector<std::string>()));
+        case TILEDB_BOOL:
+            return py::tuple(py::cast(enmr.as_vector<bool>()));
+        default:
+            TPY_ERROR_LOC("Unsupported enumeration type.");
+    }
+}
+
+bool get_enum_is_ordered(SOMAArray& sr, std::string attr_name){
+    auto attr_to_enmrs = sr.get_attr_to_enum_mapping();
+    if(attr_to_enmrs.count(attr_name) == 0)
+        TPY_ERROR_LOC("Given attribute does not have enumeration");
+    return attr_to_enmrs.at(attr_name).ordered();
+}
+
+void load_soma_array(py::module &m) {
+    py::class_<SOMAArray>(m, "SOMAArray")
+        .def(
+            py::init(
+                [](std::string_view uri,
+                   std::string_view name,
+                   std::optional<std::vector<std::string>> column_names_in,
+                   std::string_view batch_size,
+                   ResultOrder result_order,
+                   std::map<std::string, std::string> platform_config,
+                   std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
+                    // Handle optional args
+                    std::vector<std::string> column_names;
+                    if (column_names_in) {
+                        column_names = *column_names_in;
+                    }
+
+                    return SOMAArray::open(
+                        OpenMode::read,
+                        uri,
+                        name,
+                        platform_config,
+                        column_names,
+                        batch_size,
+                        result_order,
+                        timestamp);
+                }),
+            "uri"_a,
+            py::kw_only(),
+            "name"_a = "unnamed",
+            "column_names"_a = py::none(),
+            "batch_size"_a = "auto",
+            "result_order"_a = ResultOrder::automatic,
+            "platform_config"_a = py::dict(),
+            "timestamp"_a = py::none())
+
+        .def(
+            "set_condition", 
+            [](SOMAArray& reader, 
+               py::object py_query_condition,
+               py::object py_schema){
+                   auto column_names = reader.column_names();
+                   // Handle query condition based on
+                   // TileDB-Py::PyQuery::set_attr_cond()
+                   QueryCondition* qc = nullptr;
+                   if (!py_query_condition.is(py::none())) {
+                       py::object init_pyqc = py_query_condition.attr(
+                           "init_query_condition");   
+                       try {
+                           // Column names will be updated with columns present
+                           // in the query condition
+                           auto new_column_names =
+                               init_pyqc(py_schema, column_names)
+                                   .cast<std::vector<std::string>>();   
+                           // Update the column_names list if it was not empty,
+                           // otherwise continue selecting all columns with an
+                           // empty column_names list
+                           if (!column_names.empty()) {
+                               column_names = new_column_names;
+                           }
+                       } catch (const std::exception& e) {
+                           TPY_ERROR_LOC(e.what());
+                       }   
+                       qc = py_query_condition.attr("c_obj")
+                                   .cast<PyQueryCondition>()
+                                   .ptr()
+                                   .get();
+                   }   
+                   reader.reset(column_names);
+
+                    // Release python GIL after we're done accessing python
+                   // objects
+                   py::gil_scoped_release release;   
+                   // Set query condition if present
+                   if (qc) {
+                       reader.set_condition(*qc);
+                   }
+                }, 
+            "py_query_condition"_a,
+            "py_schema"_a)
+
+        .def(
+            "reset",
+            [](SOMAArray& reader,
+               std::optional<std::vector<std::string>> column_names_in,
+               std::string_view batch_size,
+               ResultOrder result_order) {
+                // Handle optional args
+                std::vector<std::string> column_names;
+                if (column_names_in) {
+                    column_names = *column_names_in;
+                }
+
+                // Reset state of the existing SOMAArray object
+                reader.reset(column_names, batch_size, result_order);
+            },
+            py::kw_only(),
+            "column_names"_a = py::none(),
+            "batch_size"_a = "auto",
+            "result_order"_a = ResultOrder::automatic)
+
+        // After this are short functions expected to be invoked when the coords
+        // are Python list/tuple, or NumPy arrays.  Arrow arrays are in this
+        // long if-else-if function.
+        .def(
+            "set_dim_points_arrow",
+            [](SOMAArray& reader,
+               const std::string& dim,
+               py::object py_arrow_array,
+               int partition_index,
+               int partition_count) {
+                // Create a list of array chunks
+                py::list array_chunks;
+                if (py::hasattr(py_arrow_array, "chunks")) {
+                    array_chunks = py_arrow_array.attr("chunks")
+                                       .cast<py::list>();
+                } else {
+                    array_chunks.append(py_arrow_array);
+                }
+
+                for (const pybind11::handle array : array_chunks) {
+                    ArrowSchema arrow_schema;
+                    ArrowArray arrow_array;
+                    uintptr_t arrow_schema_ptr = (uintptr_t)(&arrow_schema);
+                    uintptr_t arrow_array_ptr = (uintptr_t)(&arrow_array);
+
+                    // Call array._export_to_c to get arrow array and schema
+                    //
+                    // If ever a NumPy array gets in here, there will be an
+                    // exception like "AttributeError: 'numpy.ndarray' object
+                    // has no attribute '_export_to_c'".
+                    array.attr("_export_to_c")(
+                        arrow_array_ptr, arrow_schema_ptr);
+
+                    auto coords = array.attr("tolist")();
+
+                    if (!strcmp(arrow_schema.format, "l")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<int64_t>>());
+                    } else if (!strcmp(arrow_schema.format, "i")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<int32_t>>());
+                    } else if (!strcmp(arrow_schema.format, "s")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<int16_t>>());
+                    } else if (!strcmp(arrow_schema.format, "c")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<int8_t>>());
+                    } else if (!strcmp(arrow_schema.format, "L")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<uint64_t>>());
+                    } else if (!strcmp(arrow_schema.format, "I")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<uint32_t>>());
+                    } else if (!strcmp(arrow_schema.format, "S")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<uint16_t>>());
+                    } else if (!strcmp(arrow_schema.format, "C")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<uint8_t>>());
+                    } else if (!strcmp(arrow_schema.format, "f")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<float>>());
+                    } else if (!strcmp(arrow_schema.format, "g")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<double>>());
+                    } else if (
+                        !strcmp(arrow_schema.format, "u") ||
+                        !strcmp(arrow_schema.format, "z")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<std::string>>());
+                    } else if (
+                        !strcmp(arrow_schema.format, "tss:") ||
+                        !strcmp(arrow_schema.format, "tsm:") ||
+                        !strcmp(arrow_schema.format, "tsu:") ||
+                        !strcmp(arrow_schema.format, "tsn:")) {
+                        // convert the Arrow Array to int64
+                        auto pa = py::module::import("pyarrow");
+                        coords = array.attr("cast")(pa.attr("int64")()).attr("tolist")();
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<int64_t>>());
+                    } else if (
+                        !strcmp(arrow_schema.format, "U") ||
+                        !strcmp(arrow_schema.format, "Z")) {
+                        reader.set_dim_points(
+                            dim, coords.cast<std::vector<std::string>>());
+                    } else {
+                        TPY_ERROR_LOC(
+                            "[pytiledbsoma] set_dim_points: type={} not "
+                            "supported" + 
+                            std::string(arrow_schema.format));
+                    }
+
+                    // Release arrow schema
+                    arrow_schema.release(&arrow_schema);
+                }
+            },
+            "dim"_a,
+            "py_arrow_array"_a,
+            "partition_index"_a = 0,
+            "partition_count"_a = 1)
+
+        // The following short functions are expected to be invoked when the
+        // coords are Python list/tuple, or NumPy arrays.  Arrow arrays are in
+        // the long if-else-if function above.
+        //
+        // Binding overloaded methods to templated member functions requires
+        // more effort, see:
+        // https://pybind11.readthedocs.io/en/stable/classes.html#overloaded-methods
+
+        // In an initial version of this file we had `set_dim_ranges` relying
+        // solely on type-overloading. This worked since we supported only int
+        // and string indices. In a subsequent version we are now supporting
+        // various NumPy/PyArrow types including float32, float64, int8, uint16,
+        // etc. It is an unfortunate fact that pybind11 does _not_ successfully
+        // disambiguate between float32 and float64, or between int8 and int64,
+        // etc. given that we ask it to disambiguate using not just types but
+        // std::vector of types or std::vector of std::pair of types.
+        // Experiments have shown that when both float32 and float64 are
+        // implemented with overloaded names to be differentiated solely by
+        // type, pybind11 uses the _first found_. Therefore it is necessary for
+        // us to no longer use common overloaded names.
+
+        .def(
+            "set_dim_points_string_or_bytes",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<std::string>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_float64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<double>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_float32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<float>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_int64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<int64_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_int32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<int32_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_int16",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<int16_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_int8",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<int8_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_uint64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<uint64_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_uint32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<uint32_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_uint16",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<uint16_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        .def(
+            "set_dim_points_uint8",
+            static_cast<void (SOMAArray::*)(
+                const std::string&, const std::vector<uint8_t>&)>(
+                &SOMAArray::set_dim_points))
+
+        // In an initial version of this file we had `set_dim_ranges` relying
+        // solely on type-overloading. This worked since we supported only int
+        // and string indices. In a subsequent version we are now supporting
+        // various NumPy/PyArrow types including float32, float64, int8, uint16,
+        // etc. It is an unfortunate fact that pybind11 does _not_ successfully
+        // disambiguate between float32 and float64, or between int8 and int64,
+        // etc. given that we ask it to disambiguate using not just types but
+        // std::vector of types or std::vector of std::pair of types.
+        // Experiments have shown that when both float32 and float64 are
+        // implemented with overloaded names to be differentiated solely by
+        // type, pybind11 uses the _first found_. Therefore it is necessary for
+        // us to no longer use common overloaded names.
+
+        .def(
+            "set_dim_ranges_string_or_bytes",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<std::string, std::string>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_int64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<int64_t, int64_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_int32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<int32_t, int32_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_int16",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<int16_t, int16_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_int8",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<int8_t, int8_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_uint64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<uint64_t, uint64_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_uint32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<uint32_t, uint32_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_uint16",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<uint16_t, uint16_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_uint8",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<uint8_t, uint8_t>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_float64",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<double, double>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def(
+            "set_dim_ranges_float32",
+            static_cast<void (SOMAArray::*)(
+                const std::string&,
+                const std::vector<std::pair<float, float>>&)>(
+                &SOMAArray::set_dim_ranges))
+
+        .def("results_complete", &SOMAArray::results_complete)
+
+        .def(
+            "read_next",
+            [](SOMAArray& reader) -> std::optional<py::object> {
+                // Release python GIL before reading data
+                py::gil_scoped_release release;
+
+                // Try to read more data
+                auto buffers = reader.read_next();
+
+                // If more data was read, convert it to an arrow table and
+                // return
+                if (buffers.has_value()) {
+                    // Acquire python GIL before accessing python objects
+                    py::gil_scoped_acquire acquire;
+                    return to_table(*buffers);
+                }
+
+                // No data was read, the query is complete, return nullopt
+                return std::nullopt;
+            })
+
+        .def("nnz", &SOMAArray::nnz, py::call_guard<py::gil_scoped_release>())
+
+        .def_property_readonly("shape", &SOMAArray::shape)
+
+        .def_property_readonly("uri", &SOMAArray::uri)
+
+        .def_property_readonly("column_names", &SOMAArray::column_names)
+
+        .def_property_readonly("result_order", &SOMAArray::result_order)
+
+        .def("get_enum", get_enum)
+
+        .def("get_enum_is_ordered", get_enum_is_ordered)
+
+        .def("get_enum_label_on_attr", &SOMAArray::get_enum_label_on_attr);
+}
+}  // namespace tiledbsoma
\ No newline at end of file
diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc
new file mode 100644
index 0000000000..18717fc9ed
--- /dev/null
+++ b/apis/python/src/tiledbsoma/soma_dataframe.cc
@@ -0,0 +1,474 @@
+/**
+ * @file   soma_dataframe.cc
+ *
+ * @section LICENSE
+ *
+ * The MIT License
+ *
+ * @copyright Copyright (c) 2023 TileDB, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * @section DESCRIPTION
+ *
+ * This file defines the SOMADataFrame bindings.
+ */
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/pytypes.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+
+#include <tiledbsoma/tiledbsoma>
+
+#include "common.h"
+
+namespace libtiledbsomacpp {
+
+namespace py = pybind11;
+using namespace py::literals;
+using namespace tiledbsoma;
+
+void load_soma_dataframe(py::module &m) {
+    py::class_<SOMADataFrame, SOMAObject>(m, "SOMADataFrame")
+
+    .def_static(
+        "open", 
+        py::overload_cast<
+            std::string_view, 
+            OpenMode, 
+            std::map<std::string, std::string>, 
+            std::vector<std::string>, 
+            ResultOrder, 
+            std::optional<std::pair<uint64_t, uint64_t>>>(&SOMADataFrame::open),
+        "uri"_a,
+        "mode"_a,
+        py::kw_only(),
+        "platform_config"_a = py::dict(),
+        "column_names"_a = py::none(),
+        "result_order"_a = ResultOrder::automatic,
+        "timestamp"_a = py::none())
+
+    .def_static("exists", &SOMADataFrame::exists)
+    .def("reopen", py::overload_cast<OpenMode, std::optional<std::pair<uint64_t, uint64_t>>>(&SOMADataFrame::open))
+    .def("close", &SOMADataFrame::close)
+    .def_property_readonly("closed", [](SOMADataFrame& soma_df) -> bool { 
+        return not soma_df.is_open();
+    })
+    .def("reset", &SOMADataFrame::reset)
+    .def("set_condition", 
+        [](SOMADataFrame& reader, 
+            py::object py_query_condition,
+            py::object pa_schema){  
+                auto column_names = reader.column_names();
+                // Handle query condition based on
+                // TileDB-Py::PyQuery::set_attr_cond()
+                QueryCondition* qc = nullptr;
+                if (!py_query_condition.is(py::none())) {
+                py::object init_pyqc = py_query_condition.attr(
+                    "init_query_condition");   
+                try {
+                    // Column names will be updated with columns present
+                    // in the query condition
+                    auto new_column_names =
+                        init_pyqc(pa_schema, column_names)
+                            .cast<std::vector<std::string>>();   
+                    // Update the column_names list if it was not empty,
+                    // otherwise continue selecting all columns with an
+                    // empty column_names list
+                    if (!column_names.empty()) {
+                        column_names = new_column_names;
+                    }
+                } catch (const std::exception& e) {
+                    throw TileDBSOMAError(e.what());
+                }   
+                qc = py_query_condition.attr("c_obj")
+                    .cast<PyQueryCondition>()
+                    .ptr()
+                    .get();
+                reader.reset(column_names);
+
+                // Release python GIL after we're done accessing python
+                // objects
+                py::gil_scoped_release release;   
+                // Set query condition if present
+                if (qc) {
+                    reader.set_condition(*qc);
+                }
+            }
+        }, 
+        "py_query_condition"_a,
+        "py_schema"_a)
+    .def_property_readonly("type", &SOMADataFrame::type)
+    .def_property_readonly("uri", &SOMADataFrame::uri)
+    .def_property_readonly("mode", [](SOMADataFrame& soma_df){
+        return soma_df.mode() == OpenMode::read ? "r" : "w";
+    })
+    .def_property_readonly("schema", [](SOMADataFrame& soma_df) -> py::object {
+        auto pa = py::module::import("pyarrow");
+        auto pa_schema_import = pa.attr("Schema").attr("_import_from_c");
+        return pa_schema_import(py::capsule(soma_df.schema().get()));
+    })
+    .def_property_readonly("timestamp", [](SOMADataFrame& soma_df) -> py::object {
+        if(!soma_df.timestamp().has_value())
+            return py::none();
+        return py::cast(soma_df.timestamp()->second);
+    })
+    .def_property_readonly("index_column_names", &SOMADataFrame::index_column_names)
+    .def("non_empty_domain", [](SOMADataFrame& soma_df, std::string name, py::dtype dtype){
+        switch (np_to_tdb_dtype(dtype)) {
+        case TILEDB_UINT64:
+            return py::cast(soma_df.non_empty_domain<uint64_t>(name));
+        case TILEDB_DATETIME_YEAR:
+        case TILEDB_DATETIME_MONTH:
+        case TILEDB_DATETIME_WEEK:
+        case TILEDB_DATETIME_DAY:
+        case TILEDB_DATETIME_HR:
+        case TILEDB_DATETIME_MIN:
+        case TILEDB_DATETIME_SEC:
+        case TILEDB_DATETIME_MS:
+        case TILEDB_DATETIME_US:
+        case TILEDB_DATETIME_NS:
+        case TILEDB_DATETIME_PS:
+        case TILEDB_DATETIME_FS:
+        case TILEDB_DATETIME_AS:
+        case TILEDB_INT64:
+            return py::cast(soma_df.non_empty_domain<int64_t>(name));
+        case TILEDB_UINT32:
+            return py::cast(soma_df.non_empty_domain<uint32_t>(name));
+        case TILEDB_INT32:
+            return py::cast(soma_df.non_empty_domain<int32_t>(name));
+        case TILEDB_UINT16:
+            return py::cast(soma_df.non_empty_domain<uint16_t>(name));
+        case TILEDB_INT16:
+            return py::cast(soma_df.non_empty_domain<int16_t>(name));
+        case TILEDB_UINT8:
+            return py::cast(soma_df.non_empty_domain<uint8_t>(name));
+        case TILEDB_INT8:
+            return py::cast(soma_df.non_empty_domain<int8_t>(name));
+        case TILEDB_FLOAT64:
+            return py::cast(soma_df.non_empty_domain<double>(name));
+        case TILEDB_FLOAT32:
+            return py::cast(soma_df.non_empty_domain<float>(name));
+        case TILEDB_STRING_UTF8:
+        case TILEDB_STRING_ASCII: 
+            return py::cast(soma_df.non_empty_domain_var(name));
+        default:
+            throw TileDBSOMAError("Unsupported dtype for nonempty domain.");
+        }
+    })
+    .def("domain", [](SOMADataFrame& soma_df, std::string name, py::dtype dtype) {
+        switch (np_to_tdb_dtype(dtype)) {
+        case TILEDB_UINT64:
+            return py::cast(soma_df.domain<uint64_t>(name));
+        case TILEDB_DATETIME_YEAR:
+        case TILEDB_DATETIME_MONTH:
+        case TILEDB_DATETIME_WEEK:
+        case TILEDB_DATETIME_DAY:
+        case TILEDB_DATETIME_HR:
+        case TILEDB_DATETIME_MIN:
+        case TILEDB_DATETIME_SEC:
+        case TILEDB_DATETIME_MS:
+        case TILEDB_DATETIME_US:
+        case TILEDB_DATETIME_NS:
+        case TILEDB_DATETIME_PS:
+        case TILEDB_DATETIME_FS:
+        case TILEDB_DATETIME_AS:
+        case TILEDB_INT64:
+            return py::cast(soma_df.domain<int64_t>(name));
+        case TILEDB_UINT32:
+            return py::cast(soma_df.domain<uint32_t>(name));
+        case TILEDB_INT32:
+            return py::cast(soma_df.domain<int32_t>(name));
+        case TILEDB_UINT16:
+            return py::cast(soma_df.domain<uint16_t>(name));
+        case TILEDB_INT16:
+            return py::cast(soma_df.domain<int16_t>(name));
+        case TILEDB_UINT8:
+            return py::cast(soma_df.domain<uint8_t>(name));
+        case TILEDB_INT8:
+            return py::cast(soma_df.domain<int8_t>(name));
+        case TILEDB_FLOAT64:
+            return py::cast(soma_df.domain<double>(name));
+        case TILEDB_FLOAT32:
+            return py::cast(soma_df.domain<float>(name));
+        case TILEDB_STRING_UTF8:
+        case TILEDB_STRING_ASCII: {
+            std::pair<std::string, std::string> str_domain;
+            return py::cast(std::make_pair("", ""));
+        }
+        default:
+            throw TileDBSOMAError("Unsupported dtype for Dimension's domain");
+        }
+    })
+    .def_property_readonly("count", &SOMADataFrame::count)
+    .def("read_next", [](SOMADataFrame& dataframe){
+        // Release GIL when reading data
+        py::gil_scoped_release release;
+        auto buffers = dataframe.read_next();
+        py::gil_scoped_acquire acquire;
+
+        return to_table(buffers);
+    })
+    .def("set_metadata", &SOMADataFrame::set_metadata)
+    .def("delete_metadata", &SOMADataFrame::delete_metadata)
+    .def("get_metadata", 
+        py::overload_cast<const std::string&>(&SOMADataFrame::get_metadata))
+    .def_property_readonly("meta", [](SOMADataFrame&soma_dataframe) -> py::dict {
+        py::dict results;
+            
+        for (auto const& [key, val] : soma_dataframe.get_metadata()){
+            tiledb_datatype_t tdb_type = std::get<MetadataInfo::dtype>(val);
+            uint32_t value_num = std::get<MetadataInfo::num>(val);
+            const void *value = std::get<MetadataInfo::value>(val);
+
+            if(tdb_type == TILEDB_STRING_UTF8){
+                results[py::str(key)] = py::str(std::string((const char*)value, value_num));
+            }else if(tdb_type == TILEDB_STRING_ASCII){
+                results[py::str(key)] = py::bytes(std::string((const char*)value, value_num));
+            }else{
+                py::dtype value_type = tdb_to_np_dtype(tdb_type, 1);
+                results[py::str(key)] = py::array(value_type, value_num, value);
+            }
+        }
+        return results;
+    })
+    .def("has_metadata", &SOMADataFrame::has_metadata)
+    .def("metadata_num", &SOMADataFrame::metadata_num)
+    .def(
+        "set_dim_points_arrow",
+        [](SOMADataFrame& reader,
+            const std::string& dim,
+            py::object py_arrow_array,
+            int partition_index,
+            int partition_count) {
+            // Create a list of array chunks
+            py::list array_chunks;
+            if (py::hasattr(py_arrow_array, "chunks")) {
+                array_chunks = py_arrow_array.attr("chunks")
+                                    .cast<py::list>();
+            } else {
+                array_chunks.append(py_arrow_array);
+            }
+
+            for (const pybind11::handle array : array_chunks) {
+                ArrowSchema arrow_schema;
+                ArrowArray arrow_array;
+                uintptr_t arrow_schema_ptr = (uintptr_t)(&arrow_schema);
+                uintptr_t arrow_array_ptr = (uintptr_t)(&arrow_array);
+
+                // Call array._export_to_c to get arrow array and schema
+                //
+                // If ever a NumPy array gets in here, there will be an
+                // exception like "AttributeError: 'numpy.ndarray' object
+                // has no attribute '_export_to_c'".
+                array.attr("_export_to_c")(
+                    arrow_array_ptr, arrow_schema_ptr);
+
+                auto coords = array.attr("tolist")();
+
+                if (!strcmp(arrow_schema.format, "l")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<int64_t>>());
+                } else if (!strcmp(arrow_schema.format, "i")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<int32_t>>());
+                } else if (!strcmp(arrow_schema.format, "s")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<int16_t>>());
+                } else if (!strcmp(arrow_schema.format, "c")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<int8_t>>());
+                } else if (!strcmp(arrow_schema.format, "L")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<uint64_t>>());
+                } else if (!strcmp(arrow_schema.format, "I")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<uint32_t>>());
+                } else if (!strcmp(arrow_schema.format, "S")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<uint16_t>>());
+                } else if (!strcmp(arrow_schema.format, "C")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<uint8_t>>());
+                } else if (!strcmp(arrow_schema.format, "f")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<float>>());
+                } else if (!strcmp(arrow_schema.format, "g")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<double>>());
+                } else if (
+                    !strcmp(arrow_schema.format, "u") ||
+                    !strcmp(arrow_schema.format, "z")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<std::string>>());
+                } else if (
+                    !strcmp(arrow_schema.format, "tss:") ||
+                    !strcmp(arrow_schema.format, "tsm:") ||
+                    !strcmp(arrow_schema.format, "tsu:") ||
+                    !strcmp(arrow_schema.format, "tsn:")) {
+                    // convert the Arrow Array to int64
+                    auto pa = py::module::import("pyarrow");
+                    coords = array.attr("cast")(pa.attr("int64")()).attr("tolist")();
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<int64_t>>());
+                } else if (
+                    !strcmp(arrow_schema.format, "U") ||
+                    !strcmp(arrow_schema.format, "Z")) {
+                    reader.set_dim_points(
+                        dim, coords.cast<std::vector<std::string>>());
+                } else {
+                    throw TileDBSOMAError(
+                        "[pytiledbsoma] set_dim_points: type={} not "
+                        "supported" + 
+                        std::string(arrow_schema.format));
+                }
+
+                // Release arrow schema
+                arrow_schema.release(&arrow_schema);
+            }
+        },
+        "dim"_a,
+        "py_arrow_array"_a,
+        "partition_index"_a = 0,
+        "partition_count"_a = 1)
+    .def(
+        "set_dim_points_string_or_bytes",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<std::string>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_double",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<double>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_float",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<float>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_int64",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<int64_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_int32",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<int32_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_int16",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<int16_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_int8",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<int8_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_uint64",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<uint64_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_uint32",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<uint32_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_uint16",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<uint16_t>&)>(
+            &SOMADataFrame::set_dim_points))
+    .def(
+        "set_dim_points_uint8",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&, const std::vector<uint8_t>&)>(
+            &SOMADataFrame::set_dim_points))       
+    .def(
+        "set_dim_ranges_string_or_bytes",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<std::string, std::string>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_int64",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<int64_t, int64_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_int32",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<int32_t, int32_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_int16",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<int16_t, int16_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_int8",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<int8_t, int8_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_uint64",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<uint64_t, uint64_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_uint32",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<uint32_t, uint32_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_uint16",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<uint16_t, uint16_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_uint8",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<uint8_t, uint8_t>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_double",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<double, double>>&)>(
+            &SOMADataFrame::set_dim_ranges))
+    .def(
+        "set_dim_ranges_float",
+        static_cast<void (SOMADataFrame::*)(
+            const std::string&,
+            const std::vector<std::pair<float, float>>&)>(
+            &SOMADataFrame::set_dim_ranges));
+    }
+}
\ No newline at end of file
diff --git a/apis/python/src/tiledbsoma/soma_object.cc b/apis/python/src/tiledbsoma/soma_object.cc
new file mode 100644
index 0000000000..6192961817
--- /dev/null
+++ b/apis/python/src/tiledbsoma/soma_object.cc
@@ -0,0 +1,70 @@
+/**
+ * @file   soma_object.cc
+ *
+ * @section LICENSE
+ *
+ * The MIT License
+ *
+ * @copyright Copyright (c) 2023 TileDB, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * @section DESCRIPTION
+ *
+ * This file defines the SOMAObject bindings.
+ */
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/pytypes.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+
+#include <memory>
+#include <tiledbsoma/tiledbsoma>
+
+#include "common.h"
+
+namespace libtiledbsomacpp {
+
+namespace py = pybind11;
+using namespace py::literals;
+using namespace tiledbsoma;
+
+void load_soma_object(py::module &m) {
+    py::class_<SOMAObject>(m, "SOMAObject")
+
+    .def_static("open", [](std::string uri, 
+                           OpenMode mode, 
+                           std::map<std::string, std::string> config, 
+                           std::optional<std::pair<uint64_t, uint64_t>> timestamp) -> py::object {
+        if(mode == OpenMode::write)
+            TPY_ERROR_LOC("SOMAObjects for write mode not handled in Python API yet.");
+
+        try{
+            auto obj = SOMAObject::open(uri, mode, config, timestamp);
+            if (obj->type() == "SOMADataFrame")
+                return py::cast(dynamic_cast<SOMADataFrame&>(*obj));
+        }
+        catch(...){
+            TPY_ERROR_LOC("SOMAObject not handled in Python API yet.");
+        }
+    });
+}
+}
diff --git a/apis/python/tests/test_collection.py b/apis/python/tests/test_collection.py
index 8564a30dbd..39a6918ff8 100644
--- a/apis/python/tests/test_collection.py
+++ b/apis/python/tests/test_collection.py
@@ -78,10 +78,10 @@ def test_collection_basic(tmp_path):
     assert len(readback_collection) == 2
 
     with readback_collection["sdf"] as sdf:
-        assert len(sdf._handle.reader.df[:]) == 5
+        assert len(sdf.read().concat()) == 5
 
     with readback_collection["snda"] as snda:
-        assert len(snda._handle.reader.df[:]) == 3
+        assert len(snda.read().tables().concat()) == 3
 
 
 @pytest.fixture(
diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py
index 7fc9592504..dbc8ef945b 100644
--- a/apis/python/tests/test_dataframe.py
+++ b/apis/python/tests/test_dataframe.py
@@ -131,10 +131,7 @@ def test_dataframe_with_enumeration(tmp_path):
         ]
     )
     enums = {"enmr1": ("a", "bb", "ccc"), "enmr2": ("cat", "dog")}
-    with soma.DataFrame.create(
-        tmp_path.as_posix(),
-        schema=schema,
-    ) as sdf:
+    with soma.DataFrame.create(tmp_path.as_posix(), schema=schema) as sdf:
         data = {}
         data["soma_joinid"] = [0, 1, 2, 3, 4]
         data["foo"] = ["a", "bb", "ccc", "bb", "a"]
@@ -1359,3 +1356,7 @@ def test_enum_extend_past_numerical_limit(tmp_path):
     with pytest.raises(ValueError):
         with soma.open(uri, mode="w") as A:
             A.write(tbl)
+
+
+def test_write_str_empty_ned(tmp_path):
+    tmp_path.as_posix()
diff --git a/libtiledbsoma/test/test_indexer.py b/apis/python/tests/test_indexer.py
similarity index 94%
rename from libtiledbsoma/test/test_indexer.py
rename to apis/python/tests/test_indexer.py
index 3bbd9ec520..c2acb53fd7 100644
--- a/libtiledbsoma/test/test_indexer.py
+++ b/apis/python/tests/test_indexer.py
@@ -1,14 +1,10 @@
 import numpy as np
 import pandas as pd
-import tiledb
 
 from tiledbsoma.options import SOMATileDBContext
 from tiledbsoma.options._soma_tiledb_context import _validate_soma_tiledb_context
-from tiledbsoma.pytiledbsoma import config_logging
 from tiledbsoma.utils import build_index
 
-config_logging("debug")
-
 
 def indexer_test(keys: np.array, lookups: np.array, fail: bool):
     if fail:
@@ -19,7 +15,7 @@ def indexer_test(keys: np.array, lookups: np.array, fail: bool):
 
 def indexer_test_fail(keys: np.array, lookups: np.array):
     try:
-        context = _validate_soma_tiledb_context(SOMATileDBContext(tiledb.default_ctx()))
+        context = _validate_soma_tiledb_context(SOMATileDBContext())
         index = build_index(keys, context)
         index.get_indexer(lookups)
         raise AssertionError("should have failed")
@@ -35,7 +31,7 @@ def indexer_test_fail(keys: np.array, lookups: np.array):
 
 
 def indexer_test_pass(keys: np.array, lookups: np.array):
-    context = _validate_soma_tiledb_context(SOMATileDBContext(tiledb.default_ctx()))
+    context = _validate_soma_tiledb_context(SOMATileDBContext())
     indexer = build_index(keys, context)
     results = indexer.get_indexer(lookups)
     panda_indexer = pd.Index(keys)
diff --git a/apis/python/tests/test_platform_config.py b/apis/python/tests/test_platform_config.py
index 282504380e..3849e33b53 100644
--- a/apis/python/tests/test_platform_config.py
+++ b/apis/python/tests/test_platform_config.py
@@ -70,9 +70,20 @@ def test_platform_config(adata):
             assert list(x_arr.dim("soma_dim_1").filters) == [
                 tiledb.ZstdFilter(level=-1)
             ]
-            var_df = exp.ms["RNA"].var
-            var_arr = var_df._handle.reader
-            assert var_arr.dim("soma_joinid").filters == [tiledb.ZstdFilter(level=1)]
+            # TODO as we remove usage of TileDB-Py in favor of ArrowSchema, we
+            # need a new method to get which filters have applied to the column
+            # rather than grabbing it from the ArraySchema. One consideration
+            # would be to store TileDB information in JSON format as a field in
+            # the ArraySchema metadata very similar to how Pandas stores information
+            # within pa.Schema.pandas_metadata. This could hold not only which
+            # filters have been applied to the column, but other info that cannot
+            # be "directly" stored in the ArrowSchema such as whether the column
+            # is a TileDB attribute or dimension, whether this represent a dense
+            # or sparse array, etc. This may be as easy as simply copying the
+            # platform_config by calling pa.Schema.with_metadata(platform_config).
+            # var_df = exp.ms["RNA"].var
+            # var_arr = var_df._handle.reader
+            # assert var_arr.dim("soma_joinid").filters == [tiledb.ZstdFilter(level=1)]
 
 
 def test__from_platform_config__admits_ignored_config_structure():
diff --git a/libtiledbsoma/test/test_query_condition.py b/apis/python/tests/test_query_condition.py
similarity index 88%
rename from libtiledbsoma/test/test_query_condition.py
rename to apis/python/tests/test_query_condition.py
index 218816fbcd..3fad4f47fa 100644
--- a/libtiledbsoma/test/test_query_condition.py
+++ b/apis/python/tests/test_query_condition.py
@@ -6,13 +6,14 @@
 import tiledb
 
 import tiledbsoma.pytiledbsoma as clib
+from tiledbsoma._arrow_types import tiledb_schema_to_arrow
 from tiledbsoma._exception import SOMAError
 from tiledbsoma._query_condition import QueryCondition
 
 VERBOSE = False
 
 TEST_DIR = os.path.dirname(__file__)
-SOMA_URI = f"{TEST_DIR}/../../test/soco/pbmc3k_processed"
+SOMA_URI = f"{TEST_DIR}/../../../test/soco/pbmc3k_processed"
 
 if VERBOSE:
     clib.config_logging("debug")
@@ -29,7 +30,7 @@ def pandas_query(uri, condition):
 def soma_query(uri, condition):
     qc = QueryCondition(condition)
     sr = clib.SOMAArray(uri)
-    schema = tiledb.open(uri).schema
+    schema = tiledb_schema_to_arrow(tiledb.open(uri).schema, uri, tiledb.default_ctx())
     sr.set_condition(qc, schema)
     arrow_table = sr.read_next()
     assert sr.results_complete()
@@ -107,10 +108,9 @@ def test_query_condition_select_columns():
     uri = os.path.join(SOMA_URI, "obs")
     condition = "percent_mito > 0.02"
 
-    qc = QueryCondition(condition)
-    schema = tiledb.open(uri).schema
-
     sr = clib.SOMAArray(uri, column_names=["n_genes"])
+    qc = QueryCondition(condition)
+    schema = tiledb_schema_to_arrow(tiledb.open(uri).schema, uri, tiledb.default_ctx())
     sr.set_condition(qc, schema)
     arrow_table = sr.read_next()
 
@@ -124,7 +124,7 @@ def test_query_condition_all_columns():
     condition = "percent_mito > 0.02"
 
     qc = QueryCondition(condition)
-    schema = tiledb.open(uri).schema
+    schema = tiledb_schema_to_arrow(tiledb.open(uri).schema, uri, tiledb.default_ctx())
 
     sr = clib.SOMAArray(uri)
     sr.set_condition(qc, schema)
@@ -140,7 +140,7 @@ def test_query_condition_reset():
     condition = "percent_mito > 0.02"
 
     qc = QueryCondition(condition)
-    schema = tiledb.open(uri).schema
+    schema = tiledb_schema_to_arrow(tiledb.open(uri).schema, uri, tiledb.default_ctx())
 
     sr = clib.SOMAArray(uri)
     sr.set_condition(qc, schema)
@@ -218,22 +218,17 @@ def test_parsing_error_conditions(malformed_condition):
 def test_eval_error_conditions(malformed_condition):
     """Conditions which should not evaluate (but WILL parse)"""
     uri = os.path.join(SOMA_URI, "obs")
+    schema = tiledb_schema_to_arrow(tiledb.open(uri).schema, uri, tiledb.default_ctx())
+    qc = QueryCondition(malformed_condition)
 
-    # TODO: these raise the wrong error - it should be SOMAError. Change the test
-    # when https://github.com/single-cell-data/TileDB-SOMA/issues/783 is fixed
-
-    with pytest.raises(RuntimeError):
-        qc = QueryCondition(malformed_condition)
-        schema = tiledb.open(uri).schema
+    with pytest.raises(SOMAError):
         sr = clib.SOMAArray(uri)
         sr.set_condition(qc, schema)
-        sr.read_next()
 
-    with pytest.raises(tiledb.TileDBError):
-        qc = QueryCondition(malformed_condition)
-        schema = tiledb.open(uri).schema
+    with pytest.raises(SOMAError):
         # test function directly for codecov
-        qc.init_query_condition(schema, {}, [])
+        qc.init_query_condition(schema, [])
+        qc.init_query_condition(schema, ["bad_query_attr"])
 
 
 if __name__ == "__main__":
diff --git a/libtiledbsoma/test/test_simple.py b/apis/python/tests/test_simple.py
similarity index 100%
rename from libtiledbsoma/test/test_simple.py
rename to apis/python/tests/test_simple.py
diff --git a/libtiledbsoma/test/test_soma_array.py b/apis/python/tests/test_soma_array.py
similarity index 99%
rename from libtiledbsoma/test/test_soma_array.py
rename to apis/python/tests/test_soma_array.py
index f6b9ef4519..e090cada8e 100644
--- a/libtiledbsoma/test/test_soma_array.py
+++ b/apis/python/tests/test_soma_array.py
@@ -9,7 +9,7 @@
 VERBOSE = False
 
 TEST_DIR = os.path.dirname(__file__)
-SOMA_URI = f"{TEST_DIR}/../../test/soco/pbmc3k_processed"
+SOMA_URI = f"{TEST_DIR}/../../../test/soco/pbmc3k_processed"
 
 if VERBOSE:
     clib.config_logging("debug")
diff --git a/apis/python/tests/test_unicode.py b/apis/python/tests/test_unicode.py
index 4b59fd4646..68d252525d 100644
--- a/apis/python/tests/test_unicode.py
+++ b/apis/python/tests/test_unicode.py
@@ -57,7 +57,9 @@ def test_dataframe_unicode_columns(sample_dataframe_path, sample_arrow_table):
         sdf.write(sample_arrow_table)
 
     with soma.DataFrame.open(sample_dataframe_path) as sdf:
-        assert sample_arrow_table.schema == sdf.schema
+        # TODO when coverting from Pandas to Arrow, the schema has information
+        # stored in the pandas_metadata
+        # assert sample_arrow_table.schema == sdf.schema
         assert sdf.read().concat().equals(sample_arrow_table)
 
 
diff --git a/apis/r/src/rinterface.cpp b/apis/r/src/rinterface.cpp
index c279719a20..4a312bb4c3 100644
--- a/apis/r/src/rinterface.cpp
+++ b/apis/r/src/rinterface.cpp
@@ -87,7 +87,7 @@ Rcpp::List soma_array_reader(const std::string& uri,
                                     tdb_result_order);
 
     std::unordered_map<std::string, std::shared_ptr<tiledb::Dimension>> name2dim;
-    std::shared_ptr<tiledb::ArraySchema> schema = sr->schema();
+    std::shared_ptr<tiledb::ArraySchema> schema = sr->tiledb_schema();
     tiledb::Domain domain = schema->domain();
     std::vector<tiledb::Dimension> dims = domain.dimensions();
     for (auto& dim: dims) {
diff --git a/apis/r/src/riterator.cpp b/apis/r/src/riterator.cpp
index f7da66555f..1cd170aacc 100644
--- a/apis/r/src/riterator.cpp
+++ b/apis/r/src/riterator.cpp
@@ -118,7 +118,7 @@ Rcpp::List sr_setup(const std::string& uri,
                                    tdb_result_order, std::make_pair(ts_start, ts_end));
 
     std::unordered_map<std::string, std::shared_ptr<tiledb::Dimension>> name2dim;
-    std::shared_ptr<tiledb::ArraySchema> schema = ptr->schema();
+    std::shared_ptr<tiledb::ArraySchema> schema = ptr->tiledb_schema();
     tiledb::Domain domain = schema->domain();
     std::vector<tiledb::Dimension> dims = domain.dimensions();
     for (auto& dim: dims) {
diff --git a/libtiledbsoma/src/CMakeLists.txt b/libtiledbsoma/src/CMakeLists.txt
index ea62affc5f..24b74fd040 100644
--- a/libtiledbsoma/src/CMakeLists.txt
+++ b/libtiledbsoma/src/CMakeLists.txt
@@ -54,6 +54,7 @@ add_library(TILEDB_SOMA_OBJECTS OBJECT
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/managed_query.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_array.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_group.cc
+  ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_object.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_collection.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_experiment.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_measurement.cc
diff --git a/libtiledbsoma/src/external/khash/khash.h b/libtiledbsoma/src/external/khash/khash.h
index f75f3474c1..9142c5df5a 100644
--- a/libtiledbsoma/src/external/khash/khash.h
+++ b/libtiledbsoma/src/external/khash/khash.h
@@ -29,35 +29,35 @@
 #include "khash.h"
 KHASH_MAP_INIT_INT(32, char)
 int main() {
-	int ret, is_missing;
-	khiter_t k;
-	khash_t(32) *h = kh_init(32);
-	k = kh_put(32, h, 5, &ret);
-	kh_value(h, k) = 10;
-	k = kh_get(32, h, 10);
-	is_missing = (k == kh_end(h));
-	k = kh_get(32, h, 5);
-	kh_del(32, h, k);
-	for (k = kh_begin(h); k != kh_end(h); ++k)
-		if (kh_exist(h, k)) kh_value(h, k) = 1;
-	kh_destroy(32, h);
-	return 0;
+        int ret, is_missing;
+        khiter_t k;
+        khash_t(32) *h = kh_init(32);
+        k = kh_put(32, h, 5, &ret);
+        kh_value(h, k) = 10;
+        k = kh_get(32, h, 10);
+        is_missing = (k == kh_end(h));
+        k = kh_get(32, h, 5);
+        kh_del(32, h, k);
+        for (k = kh_begin(h); k != kh_end(h); ++k)
+                if (kh_exist(h, k)) kh_value(h, k) = 1;
+        kh_destroy(32, h);
+        return 0;
 }
 */
 
 /*
   2013-05-02 (0.2.8):
 
-	* Use quadratic probing. When the capacity is power of 2, stepping function
-	  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
-	  hashing on cache performance and is more robust than linear probing.
+        * Use quadratic probing. When the capacity is power of 2, stepping
+  function i*(i+1)/2 guarantees to traverse each bucket. It is better than
+  double hashing on cache performance and is more robust than linear probing.
 
-	  In theory, double hashing should be more robust than quadratic probing.
-	  However, my implementation is probably not for large hash tables, because
-	  the second hash function is closely tied to the first hash function,
-	  which reduce the effectiveness of double hashing.
+          In theory, double hashing should be more robust than quadratic
+  probing. However, my implementation is probably not for large hash tables,
+  because the second hash function is closely tied to the first hash function,
+          which reduce the effectiveness of double hashing.
 
-	Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+        Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
 
   2011-12-29 (0.2.7):
 
@@ -65,18 +65,18 @@ int main() {
 
   2011-09-16 (0.2.6):
 
-	* The capacity is a power of 2. This seems to dramatically improve the
-	  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
+        * The capacity is a power of 2. This seems to dramatically improve the
+          speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
 
-	   - http://code.google.com/p/ulib/
-	   - http://nothings.org/computer/judy/
+           - http://code.google.com/p/ulib/
+           - http://nothings.org/computer/judy/
 
-	* Allow to optionally use linear probing which usually has better
-	  performance for random input. Double hashing is still the default as it
-	  is more robust to certain non-random input.
+        * Allow to optionally use linear probing which usually has better
+          performance for random input. Double hashing is still the default as
+  it is more robust to certain non-random input.
 
-	* Added Wang's integer hash function (not used by default). This hash
-	  function is more robust to certain non-random input.
+        * Added Wang's integer hash function (not used by default). This hash
+          function is more robust to certain non-random input.
 
   2011-02-14 (0.2.5):
 
@@ -88,32 +88,31 @@ int main() {
 
   2008-09-19 (0.2.3):
 
-	* Corrected the example
-	* Improved interfaces
+        * Corrected the example
+        * Improved interfaces
 
   2008-09-11 (0.2.2):
 
-	* Improved speed a little in kh_put()
+        * Improved speed a little in kh_put()
 
   2008-09-10 (0.2.1):
 
-	* Added kh_clear()
-	* Fixed a compiling error
+        * Added kh_clear()
+        * Fixed a compiling error
 
   2008-09-02 (0.2.0):
 
-	* Changed to token concatenation which increases flexibility.
+        * Changed to token concatenation which increases flexibility.
 
   2008-08-31 (0.1.2):
 
-	* Fixed a bug in kh_get(), which has not been tested previously.
+        * Fixed a bug in kh_get(), which has not been tested previously.
 
   2008-08-31 (0.1.1):
 
-	* Added destructor
+        * Added destructor
 */
 
-
 #ifndef __AC_KHASH_H
 #define __AC_KHASH_H
 
@@ -125,9 +124,9 @@ int main() {
 
 #define AC_VERSION_KHASH_H "0.2.8"
 
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
 
 /* compiler specific configuration */
 
@@ -152,8 +151,9 @@ typedef unsigned long long khint64_t;
 #endif /* kh_inline */
 
 #ifndef klib_unused
-#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
-#define klib_unused __attribute__ ((__unused__))
+#if (defined __clang__ && __clang_major__ >= 3) || \
+    (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__((__unused__))
 #else
 #define klib_unused
 #endif
@@ -162,28 +162,38 @@ typedef unsigned long long khint64_t;
 typedef khint32_t khint_t;
 typedef khint_t khiter_t;
 
-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
-#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
-#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
+#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
+#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3)
+#define __ac_set_isdel_false(flag, i) \
+  (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
+#define __ac_set_isempty_false(flag, i) \
+  (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
+#define __ac_set_isboth_false(flag, i) \
+  (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
+#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1))
 
-#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4)
 
 #ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#define kroundup32(x) \
+  (--(x),             \
+   (x) |= (x) >> 1,   \
+   (x) |= (x) >> 2,   \
+   (x) |= (x) >> 4,   \
+   (x) |= (x) >> 8,   \
+   (x) |= (x) >> 16,  \
+   ++(x))
 #endif
 
 #ifndef kcalloc
-#define kcalloc(N,Z) calloc(N,Z)
+#define kcalloc(N, Z) calloc(N, Z)
 #endif
 #ifndef kmalloc
 #define kmalloc(Z) malloc(Z)
 #endif
 #ifndef krealloc
-#define krealloc(P,Z) realloc(P,Z)
+#define krealloc(P, Z) realloc(P, Z)
 #endif
 #ifndef kfree
 #define kfree(P) free(P)
@@ -191,179 +201,240 @@ typedef khint_t khiter_t;
 
 static const double __ac_HASH_UPPER = 0.77;
 
-#define __KHASH_TYPE(name, khkey_t, khval_t) \
-	typedef struct kh_##name##_s { \
-		khint_t n_buckets, size, n_occupied, upper_bound; \
-		khint32_t *flags; \
-		khkey_t *keys; \
-		khval_t *vals; \
-	} kh_##name##_t;
-
-#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)	 					\
-	extern kh_##name##_t *kh_init_##name(void);							\
-	extern void kh_destroy_##name(kh_##name##_t *h);					\
-	extern void kh_clear_##name(kh_##name##_t *h);						\
-	extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); 	\
-	extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
-	extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
-	extern void kh_del_##name(kh_##name##_t *h, khint_t x);
-
-#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-	SCOPE kh_##name##_t *kh_init_##name(void) {							\
-		return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));		\
-	}																	\
-	SCOPE void kh_destroy_##name(kh_##name##_t *h)						\
-	{																	\
-		if (h) {														\
-			kfree((void *)h->keys); kfree(h->flags);					\
-			kfree((void *)h->vals);										\
-			kfree(h);													\
-		}																\
-	}																	\
-	SCOPE void kh_clear_##name(kh_##name##_t *h)						\
-	{																	\
-		if (h && h->flags) {											\
-			memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
-			h->size = h->n_occupied = 0;								\
-		}																\
-	}																	\
-	SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) 	\
-	{																	\
-		if (h->n_buckets) {												\
-			khint_t k, i, last, mask, step = 0; \
-			mask = h->n_buckets - 1;									\
-			k = __hash_func(key); i = k & mask;							\
-			last = i; \
-			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-				i = (i + (++step)) & mask; \
-				if (i == last) return h->n_buckets;						\
-			}															\
-			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
-		} else return 0;												\
-	}																	\
-	SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
-	{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
-		khint32_t *new_flags = 0;										\
-		khint_t j = 1;													\
-		{																\
-			kroundup32(new_n_buckets); 									\
-			if (new_n_buckets < 4) new_n_buckets = 4;					\
-			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	/* requested size is too small */ \
-			else { /* hash table size to be changed (shrink or expand); rehash */ \
-				new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));	\
-				if (!new_flags) return -1;								\
-				memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
-				if (h->n_buckets < new_n_buckets) {	/* expand */		\
-					khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
-					if (!new_keys) { kfree(new_flags); return -1; }		\
-					h->keys = new_keys;									\
-					if (kh_is_map) {									\
-						khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
-						if (!new_vals) { kfree(new_flags); return -1; }	\
-						h->vals = new_vals;								\
-					}													\
-				} /* otherwise shrink */								\
-			}															\
-		}																\
-		if (j) { /* rehashing is needed */								\
-			for (j = 0; j != h->n_buckets; ++j) {						\
-				if (__ac_iseither(h->flags, j) == 0) {					\
-					khkey_t key = h->keys[j];							\
-					khval_t val;										\
-					khint_t new_mask;									\
-					new_mask = new_n_buckets - 1; 						\
-					if (kh_is_map) val = h->vals[j];					\
-					__ac_set_isdel_true(h->flags, j);					\
-					while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
-						khint_t k, i, step = 0; \
-						k = __hash_func(key);							\
-						i = k & new_mask;								\
-						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
-						__ac_set_isempty_false(new_flags, i);			\
-						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
-							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
-							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
-							__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
-						} else { /* write the element and jump out of the loop */ \
-							h->keys[i] = key;							\
-							if (kh_is_map) h->vals[i] = val;			\
-							break;										\
-						}												\
-					}													\
-				}														\
-			}															\
-			if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
-				h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
-				if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
-			}															\
-			kfree(h->flags); /* free the working space */				\
-			h->flags = new_flags;										\
-			h->n_buckets = new_n_buckets;								\
-			h->n_occupied = h->size;									\
-			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
-		}																\
-		return 0;														\
-	}																	\
-	SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
-	{																	\
-		khint_t x;														\
-		if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
-			if (h->n_buckets > (h->size<<1)) {							\
-				if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
-					*ret = -1; return h->n_buckets;						\
-				}														\
-			} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
-				*ret = -1; return h->n_buckets;							\
-			}															\
-		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
-		{																\
-			khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
-			x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
-			if (__ac_isempty(h->flags, i)) x = i; /* for speed up */	\
-			else {														\
-				last = i; \
-				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-					if (__ac_isdel(h->flags, i)) site = i;				\
-					i = (i + (++step)) & mask; \
-					if (i == last) { x = site; break; }					\
-				}														\
-				if (x == h->n_buckets) {								\
-					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
-					else x = i;											\
-				}														\
-			}															\
-		}																\
-		if (__ac_isempty(h->flags, x)) { /* not present at all */		\
-			h->keys[x] = key;											\
-			__ac_set_isboth_false(h->flags, x);							\
-			++h->size; ++h->n_occupied;									\
-			*ret = 1;													\
-		} else if (__ac_isdel(h->flags, x)) { /* deleted */				\
-			h->keys[x] = key;											\
-			__ac_set_isboth_false(h->flags, x);							\
-			++h->size;													\
-			*ret = 2;													\
-		} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
-		return x;														\
-	}																	\
-	SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)				\
-	{																	\
-		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
-			__ac_set_isdel_true(h->flags, x);							\
-			--h->size;													\
-		}																\
-	}
-
-#define KHASH_DECLARE(name, khkey_t, khval_t)		 					\
-	__KHASH_TYPE(name, khkey_t, khval_t) 								\
-	__KHASH_PROTOTYPES(name, khkey_t, khval_t)
-
-#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-	__KHASH_TYPE(name, khkey_t, khval_t) 								\
-	__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
-
-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-	KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+#define __KHASH_TYPE(name, khkey_t, khval_t)          \
+  typedef struct kh_##name##_s {                      \
+    khint_t n_buckets, size, n_occupied, upper_bound; \
+    khint32_t* flags;                                 \
+    khkey_t* keys;                                    \
+    khval_t* vals;                                    \
+  } kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)                       \
+  extern kh_##name##_t* kh_init_##name(void);                            \
+  extern void kh_destroy_##name(kh_##name##_t* h);                       \
+  extern void kh_clear_##name(kh_##name##_t* h);                         \
+  extern khint_t kh_get_##name(const kh_##name##_t* h, khkey_t key);     \
+  extern int kh_resize_##name(kh_##name##_t* h, khint_t new_n_buckets);  \
+  extern khint_t kh_put_##name(kh_##name##_t* h, khkey_t key, int* ret); \
+  extern void kh_del_##name(kh_##name##_t* h, khint_t x);
+
+#define __KHASH_IMPL(                                                          \
+    name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)       \
+  SCOPE kh_##name##_t* kh_init_##name(void) {                                  \
+    return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));                  \
+  }                                                                            \
+  SCOPE void kh_destroy_##name(kh_##name##_t* h) {                             \
+    if (h) {                                                                   \
+      kfree((void*)h->keys);                                                   \
+      kfree(h->flags);                                                         \
+      kfree((void*)h->vals);                                                   \
+      kfree(h);                                                                \
+    }                                                                          \
+  }                                                                            \
+  SCOPE void kh_clear_##name(kh_##name##_t* h) {                               \
+    if (h && h->flags) {                                                       \
+      memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t));    \
+      h->size = h->n_occupied = 0;                                             \
+    }                                                                          \
+  }                                                                            \
+  SCOPE khint_t kh_get_##name(const kh_##name##_t* h, khkey_t key) {           \
+    if (h->n_buckets) {                                                        \
+      khint_t k, i, last, mask, step = 0;                                      \
+      mask = h->n_buckets - 1;                                                 \
+      k = __hash_func(key);                                                    \
+      i = k & mask;                                                            \
+      last = i;                                                                \
+      while (!__ac_isempty(h->flags, i) &&                                     \
+             (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) {    \
+        i = (i + (++step)) & mask;                                             \
+        if (i == last)                                                         \
+          return h->n_buckets;                                                 \
+      }                                                                        \
+      return __ac_iseither(h->flags, i) ? h->n_buckets : i;                    \
+    } else                                                                     \
+      return 0;                                                                \
+  }                                                                            \
+  SCOPE int kh_resize_##name(                                                  \
+      kh_##name##_t* h,                                                        \
+      khint_t new_n_buckets) { /* This function uses 0.25*n_buckets bytes of   \
+                                  working space instead of                     \
+                                  [sizeof(key_t+val_t)+.25]*n_buckets. */      \
+    khint32_t* new_flags = 0;                                                  \
+    khint_t j = 1;                                                             \
+    {                                                                          \
+      kroundup32(new_n_buckets);                                               \
+      if (new_n_buckets < 4)                                                   \
+        new_n_buckets = 4;                                                     \
+      if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5))         \
+        j = 0; /* requested size is too small */                               \
+      else {   /* hash table size to be changed (shrink or expand); rehash */  \
+        new_flags = (khint32_t*)kmalloc(                                       \
+            __ac_fsize(new_n_buckets) * sizeof(khint32_t));                    \
+        if (!new_flags)                                                        \
+          return -1;                                                           \
+        memset(                                                                \
+            new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t));   \
+        if (h->n_buckets < new_n_buckets) { /* expand */                       \
+          khkey_t* new_keys = (khkey_t*)krealloc(                              \
+              (void*)h->keys, new_n_buckets * sizeof(khkey_t));                \
+          if (!new_keys) {                                                     \
+            kfree(new_flags);                                                  \
+            return -1;                                                         \
+          }                                                                    \
+          h->keys = new_keys;                                                  \
+          if (kh_is_map) {                                                     \
+            khval_t* new_vals = (khval_t*)krealloc(                            \
+                (void*)h->vals, new_n_buckets * sizeof(khval_t));              \
+            if (!new_vals) {                                                   \
+              kfree(new_flags);                                                \
+              return -1;                                                       \
+            }                                                                  \
+            h->vals = new_vals;                                                \
+          }                                                                    \
+        } /* otherwise shrink */                                               \
+      }                                                                        \
+    }                                                                          \
+    if (j) { /* rehashing is needed */                                         \
+      for (j = 0; j != h->n_buckets; ++j) {                                    \
+        if (__ac_iseither(h->flags, j) == 0) {                                 \
+          khkey_t key = h->keys[j];                                            \
+          khval_t val;                                                         \
+          khint_t new_mask;                                                    \
+          new_mask = new_n_buckets - 1;                                        \
+          if (kh_is_map)                                                       \
+            val = h->vals[j];                                                  \
+          __ac_set_isdel_true(h->flags, j);                                    \
+          while (1) { /* kick-out process; sort of like in Cuckoo hashing */   \
+            khint_t k, i, step = 0;                                            \
+            k = __hash_func(key);                                              \
+            i = k & new_mask;                                                  \
+            while (!__ac_isempty(new_flags, i))                                \
+              i = (i + (++step)) & new_mask;                                   \
+            __ac_set_isempty_false(new_flags, i);                              \
+            if (i < h->n_buckets &&                                            \
+                __ac_iseither(h->flags, i) ==                                  \
+                    0) { /* kick out the existing element */                   \
+              {                                                                \
+                khkey_t tmp = h->keys[i];                                      \
+                h->keys[i] = key;                                              \
+                key = tmp;                                                     \
+              }                                                                \
+              if (kh_is_map) {                                                 \
+                khval_t tmp = h->vals[i];                                      \
+                h->vals[i] = val;                                              \
+                val = tmp;                                                     \
+              }                                                                \
+              __ac_set_isdel_true(                                             \
+                  h->flags, i); /* mark it as deleted in the old hash table */ \
+            } else { /* write the element and jump out of the loop */          \
+              h->keys[i] = key;                                                \
+              if (kh_is_map)                                                   \
+                h->vals[i] = val;                                              \
+              break;                                                           \
+            }                                                                  \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+      if (h->n_buckets > new_n_buckets) { /* shrink the hash table */          \
+        h->keys = (khkey_t*)krealloc(                                          \
+            (void*)h->keys, new_n_buckets * sizeof(khkey_t));                  \
+        if (kh_is_map)                                                         \
+          h->vals = (khval_t*)krealloc(                                        \
+              (void*)h->vals, new_n_buckets * sizeof(khval_t));                \
+      }                                                                        \
+      kfree(h->flags); /* free the working space */                            \
+      h->flags = new_flags;                                                    \
+      h->n_buckets = new_n_buckets;                                            \
+      h->n_occupied = h->size;                                                 \
+      h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5);        \
+    }                                                                          \
+    return 0;                                                                  \
+  }                                                                            \
+  SCOPE khint_t kh_put_##name(kh_##name##_t* h, khkey_t key, int* ret) {       \
+    khint_t x;                                                                 \
+    if (h->n_occupied >= h->upper_bound) { /* update the hash table */         \
+      if (h->n_buckets > (h->size << 1)) {                                     \
+        if (kh_resize_##name(h, h->n_buckets - 1) <                            \
+            0) { /* clear "deleted" elements */                                \
+          *ret = -1;                                                           \
+          return h->n_buckets;                                                 \
+        }                                                                      \
+      } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the    \
+                                                                 hash table */ \
+        *ret = -1;                                                             \
+        return h->n_buckets;                                                   \
+      }                                                                        \
+    } /* TODO: to implement automatically shrinking; resize() already support  \
+         shrinking */                                                          \
+    {                                                                          \
+      khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;             \
+      x = site = h->n_buckets;                                                 \
+      k = __hash_func(key);                                                    \
+      i = k & mask;                                                            \
+      if (__ac_isempty(h->flags, i))                                           \
+        x = i; /* for speed up */                                              \
+      else {                                                                   \
+        last = i;                                                              \
+        while (!__ac_isempty(h->flags, i) &&                                   \
+               (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) {  \
+          if (__ac_isdel(h->flags, i))                                         \
+            site = i;                                                          \
+          i = (i + (++step)) & mask;                                           \
+          if (i == last) {                                                     \
+            x = site;                                                          \
+            break;                                                             \
+          }                                                                    \
+        }                                                                      \
+        if (x == h->n_buckets) {                                               \
+          if (__ac_isempty(h->flags, i) && site != h->n_buckets)               \
+            x = site;                                                          \
+          else                                                                 \
+            x = i;                                                             \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+    if (__ac_isempty(h->flags, x)) { /* not present at all */                  \
+      h->keys[x] = key;                                                        \
+      __ac_set_isboth_false(h->flags, x);                                      \
+      ++h->size;                                                               \
+      ++h->n_occupied;                                                         \
+      *ret = 1;                                                                \
+    } else if (__ac_isdel(h->flags, x)) { /* deleted */                        \
+      h->keys[x] = key;                                                        \
+      __ac_set_isboth_false(h->flags, x);                                      \
+      ++h->size;                                                               \
+      *ret = 2;                                                                \
+    } else                                                                     \
+      *ret = 0; /* Don't touch h->keys[x] if present and not deleted */        \
+    return x;                                                                  \
+  }                                                                            \
+  SCOPE void kh_del_##name(kh_##name##_t* h, khint_t x) {                      \
+    if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {                    \
+      __ac_set_isdel_true(h->flags, x);                                        \
+      --h->size;                                                               \
+    }                                                                          \
+  }
+
+#define KHASH_DECLARE(name, khkey_t, khval_t) \
+  __KHASH_TYPE(name, khkey_t, khval_t)        \
+  __KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(                                                     \
+    name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+  __KHASH_TYPE(name, khkey_t, khval_t)                                   \
+  __KHASH_IMPL(                                                          \
+      name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+#define KHASH_INIT(                                               \
+    name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+  KHASH_INIT2(                                                    \
+      name,                                                       \
+      static kh_inline klib_unused,                               \
+      khkey_t,                                                    \
+      khval_t,                                                    \
+      kh_is_map,                                                  \
+      __hash_func,                                                \
+      __hash_equal)
 
 /* --- BEGIN OF HASH FUNCTIONS --- */
 
@@ -382,7 +453,7 @@ static const double __ac_HASH_UPPER = 0.77;
   @param  key   The integer [khint64_t]
   @return       The hash value [khint_t]
  */
-#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+#define kh_int64_hash_func(key) (khint32_t)((key) >> 33 ^ (key) ^ (key) << 11)
 /*! @function
   @abstract     64-bit integer comparison function
  */
@@ -392,11 +463,12 @@ static const double __ac_HASH_UPPER = 0.77;
   @param  s     Pointer to a null terminated string
   @return       The hash value
  */
-static kh_inline khint_t __ac_X31_hash_string(const char *s)
-{
-	khint_t h = (khint_t)*s;
-	if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
-	return h;
+static kh_inline khint_t __ac_X31_hash_string(const char* s) {
+  khint_t h = (khint_t)*s;
+  if (h)
+    for (++s; *s; ++s)
+      h = (h << 5) - h + (khint_t)*s;
+  return h;
 }
 /*! @function
   @abstract     Another interface to const char* hash function
@@ -409,15 +481,14 @@ static kh_inline khint_t __ac_X31_hash_string(const char *s)
  */
 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
 
-static kh_inline khint_t __ac_Wang_hash(khint_t key)
-{
-    key += ~(key << 15);
-    key ^=  (key >> 10);
-    key +=  (key << 3);
-    key ^=  (key >> 6);
-    key += ~(key << 11);
-    key ^=  (key >> 16);
-    return key;
+static kh_inline khint_t __ac_Wang_hash(khint_t key) {
+  key += ~(key << 15);
+  key ^= (key >> 10);
+  key += (key << 3);
+  key ^= (key >> 6);
+  key += ~(key << 11);
+  key ^= (key >> 16);
+  return key;
 }
 #define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
 
@@ -468,7 +539,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
   @param  r     Extra return code: -1 if the operation failed;
                 0 if the key is present in the hash table;
                 1 if the bucket is empty (never used); 2 if the element in
-				the bucket has been deleted [int*]
+                                the bucket has been deleted [int*]
   @return       Iterator to the inserted element [khint_t]
  */
 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
@@ -478,7 +549,8 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
   @param  name  Name of the hash table [symbol]
   @param  h     Pointer to the hash table [khash_t(name)*]
   @param  k     Key [type of keys]
-  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
+  @return       Iterator to the found element, or kh_end(h) if the element is
+  absent [khint_t]
  */
 #define kh_get(name, h, k) kh_get_##name(h, k)
 
@@ -555,13 +627,17 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
   @param  vvar  Variable to which value will be assigned
   @param  code  Block of code to execute
  */
-#define kh_foreach(h, kvar, vvar, code) { khint_t __i;		\
-	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
-		if (!kh_exist(h,__i)) continue;						\
-		(kvar) = kh_key(h,__i);								\
-		(vvar) = kh_val(h,__i);								\
-		code;												\
-	} }
+#define kh_foreach(h, kvar, vvar, code)                \
+  {                                                    \
+    khint_t __i;                                       \
+    for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+      if (!kh_exist(h, __i))                           \
+        continue;                                      \
+      (kvar) = kh_key(h, __i);                         \
+      (vvar) = kh_val(h, __i);                         \
+      code;                                            \
+    }                                                  \
+  }
 
 /*! @function
   @abstract     Iterate over the values in the hash table
@@ -569,12 +645,16 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
   @param  vvar  Variable to which value will be assigned
   @param  code  Block of code to execute
  */
-#define kh_foreach_value(h, vvar, code) { khint_t __i;		\
-	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
-		if (!kh_exist(h,__i)) continue;						\
-		(vvar) = kh_val(h,__i);								\
-		code;												\
-	} }
+#define kh_foreach_value(h, vvar, code)                \
+  {                                                    \
+    khint_t __i;                                       \
+    for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+      if (!kh_exist(h, __i))                           \
+        continue;                                      \
+      (vvar) = kh_val(h, __i);                         \
+      code;                                            \
+    }                                                  \
+  }
 
 /* More convenient interfaces */
 
@@ -582,46 +662,47 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
   @abstract     Instantiate a hash set containing integer keys
   @param  name  Name of the hash table [symbol]
  */
-#define KHASH_SET_INIT_INT(name)										\
-	KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+#define KHASH_SET_INIT_INT(name) \
+  KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
 
 /*! @function
   @abstract     Instantiate a hash map containing integer keys
   @param  name  Name of the hash table [symbol]
   @param  khval_t  Type of values [type]
  */
-#define KHASH_MAP_INIT_INT(name, khval_t)								\
-	KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+#define KHASH_MAP_INIT_INT(name, khval_t) \
+  KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
 
 /*! @function
   @abstract     Instantiate a hash set containing 64-bit integer keys
   @param  name  Name of the hash table [symbol]
  */
-#define KHASH_SET_INIT_INT64(name)										\
-	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+#define KHASH_SET_INIT_INT64(name) \
+  KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
 
 /*! @function
   @abstract     Instantiate a hash map containing 64-bit integer keys
   @param  name  Name of the hash table [symbol]
   @param  khval_t  Type of values [type]
  */
-#define KHASH_MAP_INIT_INT64(name, khval_t)								\
-	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+#define KHASH_MAP_INIT_INT64(name, khval_t) \
+  KHASH_INIT(                               \
+      name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
 
-typedef const char *kh_cstr_t;
+typedef const char* kh_cstr_t;
 /*! @function
   @abstract     Instantiate a hash map containing const char* keys
   @param  name  Name of the hash table [symbol]
  */
-#define KHASH_SET_INIT_STR(name)										\
-	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+#define KHASH_SET_INIT_STR(name) \
+  KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
 
 /*! @function
   @abstract     Instantiate a hash map containing const char* keys
   @param  name  Name of the hash table [symbol]
   @param  khval_t  Type of values [type]
  */
-#define KHASH_MAP_INIT_STR(name, khval_t)								\
-	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+#define KHASH_MAP_INIT_STR(name, khval_t) \
+  KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
 
 #endif /* __AC_KHASH_H */
diff --git a/libtiledbsoma/src/external/khash/khashl.h b/libtiledbsoma/src/external/khash/khashl.h
index 93ce31354c..dcbffa704b 100644
--- a/libtiledbsoma/src/external/khash/khashl.h
+++ b/libtiledbsoma/src/external/khash/khashl.h
@@ -28,9 +28,9 @@
 
 #define AC_VERSION_KHASHL_H "0.1"
 
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
 
 /************************************
  * Compiler specific configurations *
@@ -57,8 +57,9 @@ typedef int64_t khint64_t;
 #endif /* kh_inline */
 
 #ifndef klib_unused
-#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
-#define klib_unused __attribute__ ((__unused__))
+#if (defined __clang__ && __clang_major__ >= 3) || \
+    (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__((__unused__))
 #else
 #define klib_unused
 #endif
@@ -73,13 +74,13 @@ typedef khint32_t khint_t;
  ******************/
 
 #ifndef kcalloc
-#define kcalloc(N,Z) calloc(N,Z)
+#define kcalloc(N, Z) calloc(N, Z)
 #endif
 #ifndef kmalloc
 #define kmalloc(Z) malloc(Z)
 #endif
 #ifndef krealloc
-#define krealloc(P,Z) realloc(P,Z)
+#define krealloc(P, Z) realloc(P, Z)
 #endif
 #ifndef kfree
 #define kfree(P) free(P)
@@ -89,216 +90,364 @@ typedef khint32_t khint_t;
  * Simple private functions *
  ****************************/
 
-#define __kh_used(flag, i)       (flag[i>>5] >> (i&0x1fU) & 1U)
-#define __kh_set_used(flag, i)   (flag[i>>5] |= 1U<<(i&0x1fU))
-#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
+#define __kh_used(flag, i) (flag[i >> 5] >> (i & 0x1fU) & 1U)
+#define __kh_set_used(flag, i) (flag[i >> 5] |= 1U << (i & 0x1fU))
+#define __kh_set_unused(flag, i) (flag[i >> 5] &= ~(1U << (i & 0x1fU)))
 
-#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
+#define __kh_fsize(m) ((m) < 32 ? 1 : (m) >> 5)
 
-static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) {
+  return hash * 2654435769U >> (32 - bits);
+}
 
 /*******************
  * Hash table base *
  *******************/
 
 #define __KHASHL_TYPE(HType, khkey_t) \
-	typedef struct HType { \
-		khint_t bits, count; \
-		khint32_t *used; \
-		khkey_t *keys; \
-	} HType;
-
-#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
-	extern HType *prefix##_init(void); \
-	extern void prefix##_destroy(HType *h); \
-	extern void prefix##_clear(HType *h); \
-	extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
-	extern int prefix##_resize(HType *h, khint_t new_n_buckets); \
-	extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
-	extern void prefix##_del(HType *h, khint_t k);
-
-#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
-	SCOPE HType *prefix##_init(void) { \
-		return (HType*)kcalloc(1, sizeof(HType)); \
-	} \
-	SCOPE void prefix##_destroy(HType *h) { \
-		if (!h) return; \
-		kfree((void *)h->keys); kfree(h->used); \
-		kfree(h); \
-	} \
-	SCOPE void prefix##_clear(HType *h) { \
-		if (h && h->used) { \
-			uint32_t n_buckets = 1U << h->bits; \
-			memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
-			h->count = 0; \
-		} \
-	}
+  typedef struct HType {              \
+    khint_t bits, count;              \
+    khint32_t* used;                  \
+    khkey_t* keys;                    \
+  } HType;
+
+#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t)                        \
+  extern HType* prefix##_init(void);                                       \
+  extern void prefix##_destroy(HType* h);                                  \
+  extern void prefix##_clear(HType* h);                                    \
+  extern khint_t prefix##_getp(const HType* h, const khkey_t* key);        \
+  extern int prefix##_resize(HType* h, khint_t new_n_buckets);             \
+  extern khint_t prefix##_putp(HType* h, const khkey_t* key, int* absent); \
+  extern void prefix##_del(HType* h, khint_t k);
+
+#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix)                    \
+  SCOPE HType* prefix##_init(void) {                                 \
+    return (HType*)kcalloc(1, sizeof(HType));                        \
+  }                                                                  \
+  SCOPE void prefix##_destroy(HType* h) {                            \
+    if (!h)                                                          \
+      return;                                                        \
+    kfree((void*)h->keys);                                           \
+    kfree(h->used);                                                  \
+    kfree(h);                                                        \
+  }                                                                  \
+  SCOPE void prefix##_clear(HType* h) {                              \
+    if (h && h->used) {                                              \
+      uint32_t n_buckets = 1U << h->bits;                            \
+      memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
+      h->count = 0;                                                  \
+    }                                                                \
+  }
 
 #define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { \
-		khint_t i, last, n_buckets, mask; \
-		if (h->keys == 0) return 0; \
-		n_buckets = 1U << h->bits; \
-		mask = n_buckets - 1U; \
-		i = last = __kh_h2b(__hash_fn(*key), h->bits); \
-		while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
-			i = (i + 1U) & mask; \
-			if (i == last) return n_buckets; \
-		} \
-		return !__kh_used(h->used, i)? n_buckets : i; \
-	} \
-	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp(h, &key); }
-
-#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \
-		khint32_t *new_used = 0; \
-		khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
-		while ((x >>= 1) != 0) ++j; \
-		if (new_n_buckets & (new_n_buckets - 1)) ++j; \
-		new_bits = j > 2? j : 2; \
-		new_n_buckets = 1U << new_bits; \
-		if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \
-		new_used = (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
-		memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
-		if (!new_used) return -1; /* not enough memory */ \
-		n_buckets = h->keys? 1U<<h->bits : 0U; \
-		if (n_buckets < new_n_buckets) { /* expand */ \
-			khkey_t *new_keys = (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \
-			if (!new_keys) { kfree(new_used); return -1; } \
-			h->keys = new_keys; \
-		} /* otherwise shrink */ \
-		new_mask = new_n_buckets - 1; \
-		for (j = 0; j != n_buckets; ++j) { \
-			khkey_t key; \
-			if (!__kh_used(h->used, j)) continue; \
-			key = h->keys[j]; \
-			__kh_set_unused(h->used, j); \
-			while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
-				khint_t i; \
-				i = __kh_h2b(__hash_fn(key), new_bits); \
-				while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
-				__kh_set_used(new_used, i); \
-				if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
-					{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
-					__kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
-				} else { /* write the element and jump out of the loop */ \
-					h->keys[i] = key; \
-					break; \
-				} \
-			} \
-		} \
-		if (n_buckets > new_n_buckets) /* shrink the hash table */ \
-			h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
-		kfree(h->used); /* free the working space */ \
-		h->used = new_used, h->bits = new_bits; \
-		return 0; \
-	}
+  SCOPE khint_t prefix##_getp(const HType* h, const khkey_t* key) {            \
+    khint_t i, last, n_buckets, mask;                                          \
+    if (h->keys == 0)                                                          \
+      return 0;                                                                \
+    n_buckets = 1U << h->bits;                                                 \
+    mask = n_buckets - 1U;                                                     \
+    i = last = __kh_h2b(__hash_fn(*key), h->bits);                             \
+    while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) {            \
+      i = (i + 1U) & mask;                                                     \
+      if (i == last)                                                           \
+        return n_buckets;                                                      \
+    }                                                                          \
+    return !__kh_used(h->used, i) ? n_buckets : i;                             \
+  }                                                                            \
+  SCOPE khint_t prefix##_get(const HType* h, khkey_t key) {                    \
+    return prefix##_getp(h, &key);                                             \
+  }
+
+#define __KHASHL_IMPL_RESIZE(                                                  \
+    SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)                       \
+  SCOPE int prefix##_resize(HType* h, khint_t new_n_buckets) {                 \
+    khint32_t* new_used = 0;                                                   \
+    khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask;           \
+    while ((x >>= 1) != 0)                                                     \
+      ++j;                                                                     \
+    if (new_n_buckets & (new_n_buckets - 1))                                   \
+      ++j;                                                                     \
+    new_bits = j > 2 ? j : 2;                                                  \
+    new_n_buckets = 1U << new_bits;                                            \
+    if (h->count > (new_n_buckets >> 1) + (new_n_buckets >> 2))                \
+      return 0; /* requested size is too small */                              \
+    new_used =                                                                 \
+        (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t));    \
+    memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t));        \
+    if (!new_used)                                                             \
+      return -1; /* not enough memory */                                       \
+    n_buckets = h->keys ? 1U << h->bits : 0U;                                  \
+    if (n_buckets < new_n_buckets) { /* expand */                              \
+      khkey_t* new_keys =                                                      \
+          (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \
+      if (!new_keys) {                                                         \
+        kfree(new_used);                                                       \
+        return -1;                                                             \
+      }                                                                        \
+      h->keys = new_keys;                                                      \
+    } /* otherwise shrink */                                                   \
+    new_mask = new_n_buckets - 1;                                              \
+    for (j = 0; j != n_buckets; ++j) {                                         \
+      khkey_t key;                                                             \
+      if (!__kh_used(h->used, j))                                              \
+        continue;                                                              \
+      key = h->keys[j];                                                        \
+      __kh_set_unused(h->used, j);                                             \
+      while (1) { /* kick-out process; sort of like in Cuckoo hashing */       \
+        khint_t i;                                                             \
+        i = __kh_h2b(__hash_fn(key), new_bits);                                \
+        while (__kh_used(new_used, i))                                         \
+          i = (i + 1) & new_mask;                                              \
+        __kh_set_used(new_used, i);                                            \
+        if (i < n_buckets &&                                                   \
+            __kh_used(h->used, i)) { /* kick out the existing element */       \
+          {                                                                    \
+            khkey_t tmp = h->keys[i];                                          \
+            h->keys[i] = key;                                                  \
+            key = tmp;                                                         \
+          }                                                                    \
+          __kh_set_unused(                                                     \
+              h->used, i); /* mark it as deleted in the old hash table */      \
+        } else {           /* write the element and jump out of the loop */    \
+          h->keys[i] = key;                                                    \
+          break;                                                               \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+    if (n_buckets > new_n_buckets) /* shrink the hash table */                 \
+      h->keys =                                                                \
+          (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \
+    kfree(h->used); /* free the working space */                               \
+    h->used = new_used, h->bits = new_bits;                                    \
+    return 0;                                                                  \
+  }
 
 #define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { \
-		khint_t n_buckets, i, last, mask; \
-		n_buckets = h->keys? 1U<<h->bits : 0U; \
-		*absent = -1; \
-		if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \
-			if (prefix##_resize(h, n_buckets + 1U) < 0) \
-				return n_buckets; \
-			n_buckets = 1U<<h->bits; \
-		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
-		mask = n_buckets - 1; \
-		i = last = __kh_h2b(__hash_fn(*key), h->bits); \
-		while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
-			i = (i + 1U) & mask; \
-			if (i == last) break; \
-		} \
-		if (!__kh_used(h->used, i)) { /* not present at all */ \
-			h->keys[i] = *key; \
-			__kh_set_used(h->used, i); \
-			++h->count; \
-			*absent = 1; \
-		} else *absent = 0; /* Don't touch h->keys[i] if present */ \
-		return i; \
-	} \
-	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp(h, &key, absent); }
-
-#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
-	SCOPE int prefix##_del(HType *h, khint_t i) { \
-		khint_t j = i, k, mask, n_buckets; \
-		if (h->keys == 0) return 0; \
-		n_buckets = 1U<<h->bits; \
-		mask = n_buckets - 1U; \
-		while (1) { \
-			j = (j + 1U) & mask; \
-			if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
-			k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
-			if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
-				h->keys[i] = h->keys[j], i = j; \
-		} \
-		__kh_set_unused(h->used, i); \
-		--h->count; \
-		return 1; \
-	}
+  SCOPE khint_t prefix##_putp(HType* h, const khkey_t* key, int* absent) {     \
+    khint_t n_buckets, i, last, mask;                                          \
+    n_buckets = h->keys ? 1U << h->bits : 0U;                                  \
+    *absent = -1;                                                              \
+    if (h->count >= (n_buckets >> 1) + (n_buckets >> 2)) { /* rehashing */     \
+      if (prefix##_resize(h, n_buckets + 1U) < 0)                              \
+        return n_buckets;                                                      \
+      n_buckets = 1U << h->bits;                                               \
+    } /* TODO: to implement automatically shrinking; resize() already support  \
+         shrinking */                                                          \
+    mask = n_buckets - 1;                                                      \
+    i = last = __kh_h2b(__hash_fn(*key), h->bits);                             \
+    while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) {            \
+      i = (i + 1U) & mask;                                                     \
+      if (i == last)                                                           \
+        break;                                                                 \
+    }                                                                          \
+    if (!__kh_used(h->used, i)) { /* not present at all */                     \
+      h->keys[i] = *key;                                                       \
+      __kh_set_used(h->used, i);                                               \
+      ++h->count;                                                              \
+      *absent = 1;                                                             \
+    } else                                                                     \
+      *absent = 0; /* Don't touch h->keys[i] if present */                     \
+    return i;                                                                  \
+  }                                                                            \
+  SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) {             \
+    return prefix##_putp(h, &key, absent);                                     \
+  }
+
+#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)     \
+  SCOPE int prefix##_del(HType* h, khint_t i) {                         \
+    khint_t j = i, k, mask, n_buckets;                                  \
+    if (h->keys == 0)                                                   \
+      return 0;                                                         \
+    n_buckets = 1U << h->bits;                                          \
+    mask = n_buckets - 1U;                                              \
+    while (1) {                                                         \
+      j = (j + 1U) & mask;                                              \
+      if (j == i || !__kh_used(h->used, j))                             \
+        break; /* j==i only when the table is completely full */        \
+      k = __kh_h2b(__hash_fn(h->keys[j]), h->bits);                     \
+      if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
+        h->keys[i] = h->keys[j], i = j;                                 \
+    }                                                                   \
+    __kh_set_unused(h->used, i);                                        \
+    --h->count;                                                         \
+    return 1;                                                           \
+  }
 
 #define KHASHL_DECLARE(HType, prefix, khkey_t) \
-	__KHASHL_TYPE(HType, khkey_t) \
-	__KHASHL_PROTOTYPES(HType, prefix, khkey_t)
+  __KHASHL_TYPE(HType, khkey_t)                \
+  __KHASHL_PROTOTYPES(HType, prefix, khkey_t)
 
-#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	__KHASHL_TYPE(HType, khkey_t) \
-	__KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
-	__KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	__KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	__KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	__KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
+#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)    \
+  __KHASHL_TYPE(HType, khkey_t)                                             \
+  __KHASHL_IMPL_BASIC(SCOPE, HType, prefix)                                 \
+  __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)    \
+  __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+  __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)    \
+  __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
 
 /*****************************
  * More convenient interface *
  *****************************/
 
-#define __kh_packed __attribute__ ((__packed__))
+#define __kh_packed __attribute__((__packed__))
 #define __kh_cached_hash(x) ((x).hash)
 
 #define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \
-	static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
-	static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
-	KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
-	SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
-	SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
-	SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \
-	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
-	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
-	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); }
-
-#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
-	typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
-	static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
-	static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
-	KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
-	SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
-	SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
-	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
-	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
-	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); }
+  typedef struct {                                                           \
+    khkey_t key;                                                             \
+  } __kh_packed HType##_s_bucket_t;                                          \
+  static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) {           \
+    return __hash_fn(x.key);                                                 \
+  }                                                                          \
+  static kh_inline int prefix##_s_eq(                                        \
+      HType##_s_bucket_t x, HType##_s_bucket_t y) {                          \
+    return __hash_eq(x.key, y.key);                                          \
+  }                                                                          \
+  KHASHL_INIT(                                                               \
+      KH_LOCAL,                                                              \
+      HType,                                                                 \
+      prefix##_s,                                                            \
+      HType##_s_bucket_t,                                                    \
+      prefix##_s_hash,                                                       \
+      prefix##_s_eq)                                                         \
+  SCOPE HType* prefix##_init(void) {                                         \
+    return prefix##_s_init();                                                \
+  }                                                                          \
+  SCOPE void prefix##_destroy(HType* h) {                                    \
+    prefix##_s_destroy(h);                                                   \
+  }                                                                          \
+  SCOPE void prefix##_resize(HType* h, khint_t new_n_buckets) {              \
+    prefix##_s_resize(h, new_n_buckets);                                     \
+  }                                                                          \
+  SCOPE khint_t prefix##_get(const HType* h, khkey_t key) {                  \
+    HType##_s_bucket_t t;                                                    \
+    t.key = key;                                                             \
+    return prefix##_s_getp(h, &t);                                           \
+  }                                                                          \
+  SCOPE int prefix##_del(HType* h, khint_t k) {                              \
+    return prefix##_s_del(h, k);                                             \
+  }                                                                          \
+  SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) {           \
+    HType##_s_bucket_t t;                                                    \
+    t.key = key;                                                             \
+    return prefix##_s_putp(h, &t, absent);                                   \
+  }
+
+#define KHASHL_MAP_INIT(                                           \
+    SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+  typedef struct {                                                 \
+    khkey_t key;                                                   \
+    kh_val_t val;                                                  \
+  } __kh_packed HType##_m_bucket_t;                                \
+  static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { \
+    return __hash_fn(x.key);                                       \
+  }                                                                \
+  static kh_inline int prefix##_m_eq(                              \
+      HType##_m_bucket_t x, HType##_m_bucket_t y) {                \
+    return __hash_eq(x.key, y.key);                                \
+  }                                                                \
+  KHASHL_INIT(                                                     \
+      KH_LOCAL,                                                    \
+      HType,                                                       \
+      prefix##_m,                                                  \
+      HType##_m_bucket_t,                                          \
+      prefix##_m_hash,                                             \
+      prefix##_m_eq)                                               \
+  SCOPE HType* prefix##_init(void) {                               \
+    return prefix##_m_init();                                      \
+  }                                                                \
+  SCOPE void prefix##_destroy(HType* h) {                          \
+    prefix##_m_destroy(h);                                         \
+  }                                                                \
+  SCOPE khint_t prefix##_get(const HType* h, khkey_t key) {        \
+    HType##_m_bucket_t t;                                          \
+    t.key = key;                                                   \
+    return prefix##_m_getp(h, &t);                                 \
+  }                                                                \
+  SCOPE int prefix##_del(HType* h, khint_t k) {                    \
+    return prefix##_m_del(h, k);                                   \
+  }                                                                \
+  SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \
+    HType##_m_bucket_t t;                                          \
+    t.key = key;                                                   \
+    return prefix##_m_putp(h, &t, absent);                         \
+  }
 
 #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
-	typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \
-	static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
-	KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
-	SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
-	SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
-	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
-	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
-	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
-
-#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
-	typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \
-	static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
-	KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
-	SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
-	SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
-	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
-	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
-	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
+  typedef struct {                                                            \
+    khkey_t key;                                                              \
+    khint_t hash;                                                             \
+  } __kh_packed HType##_cs_bucket_t;                                          \
+  static kh_inline int prefix##_cs_eq(                                        \
+      HType##_cs_bucket_t x, HType##_cs_bucket_t y) {                         \
+    return x.hash == y.hash && __hash_eq(x.key, y.key);                       \
+  }                                                                           \
+  KHASHL_INIT(                                                                \
+      KH_LOCAL,                                                               \
+      HType,                                                                  \
+      prefix##_cs,                                                            \
+      HType##_cs_bucket_t,                                                    \
+      __kh_cached_hash,                                                       \
+      prefix##_cs_eq)                                                         \
+  SCOPE HType* prefix##_init(void) {                                          \
+    return prefix##_cs_init();                                                \
+  }                                                                           \
+  SCOPE void prefix##_destroy(HType* h) {                                     \
+    prefix##_cs_destroy(h);                                                   \
+  }                                                                           \
+  SCOPE khint_t prefix##_get(const HType* h, khkey_t key) {                   \
+    HType##_cs_bucket_t t;                                                    \
+    t.key = key;                                                              \
+    t.hash = __hash_fn(key);                                                  \
+    return prefix##_cs_getp(h, &t);                                           \
+  }                                                                           \
+  SCOPE int prefix##_del(HType* h, khint_t k) {                               \
+    return prefix##_cs_del(h, k);                                             \
+  }                                                                           \
+  SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) {            \
+    HType##_cs_bucket_t t;                                                    \
+    t.key = key, t.hash = __hash_fn(key);                                     \
+    return prefix##_cs_putp(h, &t, absent);                                   \
+  }
+
+#define KHASHL_CMAP_INIT(                                          \
+    SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+  typedef struct {                                                 \
+    khkey_t key;                                                   \
+    kh_val_t val;                                                  \
+    khint_t hash;                                                  \
+  } __kh_packed HType##_cm_bucket_t;                               \
+  static kh_inline int prefix##_cm_eq(                             \
+      HType##_cm_bucket_t x, HType##_cm_bucket_t y) {              \
+    return x.hash == y.hash && __hash_eq(x.key, y.key);            \
+  }                                                                \
+  KHASHL_INIT(                                                     \
+      KH_LOCAL,                                                    \
+      HType,                                                       \
+      prefix##_cm,                                                 \
+      HType##_cm_bucket_t,                                         \
+      __kh_cached_hash,                                            \
+      prefix##_cm_eq)                                              \
+  SCOPE HType* prefix##_init(void) {                               \
+    return prefix##_cm_init();                                     \
+  }                                                                \
+  SCOPE void prefix##_destroy(HType* h) {                          \
+    prefix##_cm_destroy(h);                                        \
+  }                                                                \
+  SCOPE khint_t prefix##_get(const HType* h, khkey_t key) {        \
+    HType##_cm_bucket_t t;                                         \
+    t.key = key;                                                   \
+    t.hash = __hash_fn(key);                                       \
+    return prefix##_cm_getp(h, &t);                                \
+  }                                                                \
+  SCOPE int prefix##_del(HType* h, khint_t k) {                    \
+    return prefix##_cm_del(h, k);                                  \
+  }                                                                \
+  SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \
+    HType##_cm_bucket_t t;                                         \
+    t.key = key, t.hash = __hash_fn(key);                          \
+    return prefix##_cm_putp(h, &t, absent);                        \
+  }
 
 /**************************
  * Public macro functions *
@@ -306,7 +455,7 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26
 
 #define kh_bucket(h, x) ((h)->keys[x])
 #define kh_size(h) ((h)->count)
-#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
+#define kh_capacity(h) ((h)->keys ? 1U << (h)->bits : 0U)
 #define kh_end(h) kh_capacity(h)
 
 #define kh_key(h, x) ((h)->keys[x].key)
@@ -322,30 +471,32 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26
 #define kh_hash_dummy(x) ((khint_t)(x))
 
 static kh_inline khint_t kh_hash_uint32(khint_t key) {
-	key += ~(key << 15);
-	key ^=  (key >> 10);
-	key +=  (key << 3);
-	key ^=  (key >> 6);
-	key += ~(key << 11);
-	key ^=  (key >> 16);
-	return key;
+  key += ~(key << 15);
+  key ^= (key >> 10);
+  key += (key << 3);
+  key ^= (key >> 6);
+  key += ~(key << 11);
+  key ^= (key >> 16);
+  return key;
 }
 
 static kh_inline khint_t kh_hash_uint64(khint64_t key) {
-	key = ~key + (key << 21);
-	key = key ^ key >> 24;
-	key = (key + (key << 3)) + (key << 8);
-	key = key ^ key >> 14;
-	key = (key + (key << 2)) + (key << 4);
-	key = key ^ key >> 28;
-	key = key + (key << 31);
-	return (khint_t)key;
+  key = ~key + (key << 21);
+  key = key ^ key >> 24;
+  key = (key + (key << 3)) + (key << 8);
+  key = key ^ key >> 14;
+  key = (key + (key << 2)) + (key << 4);
+  key = key ^ key >> 28;
+  key = key + (key << 31);
+  return (khint_t)key;
 }
 
-static kh_inline khint_t kh_hash_str(const char *s) {
-	khint_t h = (khint_t)*s;
-	if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
-	return h;
+static kh_inline khint_t kh_hash_str(const char* s) {
+  khint_t h = (khint_t)*s;
+  if (h)
+    for (++s; *s; ++s)
+      h = (h << 5) - h + (khint_t)*s;
+  return h;
 }
 
 #endif /* __AC_KHASHL_H */
diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc
index e3e8005b6a..c35315632f 100644
--- a/libtiledbsoma/src/soma/soma_array.cc
+++ b/libtiledbsoma/src/soma/soma_array.cc
@@ -468,12 +468,12 @@ std::vector<int64_t> SOMAArray::shape() {
 }
 
 uint64_t SOMAArray::ndim() const {
-    return this->schema().get()->domain().ndim();
+    return tiledb_schema()->domain().ndim();
 }
 
 std::vector<std::string> SOMAArray::dimension_names() const {
     std::vector<std::string> result;
-    auto dimensions = this->schema().get()->domain().dimensions();
+    auto dimensions = tiledb_schema()->domain().dimensions();
     for (const auto& dim : dimensions) {
         result.push_back(dim.name());
     }
diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h
index 0845cd694b..2814c2c0b8 100644
--- a/libtiledbsoma/src/soma/soma_array.h
+++ b/libtiledbsoma/src/soma/soma_array.h
@@ -39,6 +39,7 @@
 
 #include <tiledb/tiledb>
 #include <tiledb/tiledb_experimental>
+#include "../utils/arrow_adapter.h"
 #include "enums.h"
 #include "logger_public.h"
 #include "managed_query.h"
@@ -185,6 +186,19 @@ class SOMAArray {
      */
     std::shared_ptr<Context> ctx();
 
+    std::optional<std::string> soma_object_type() {
+        auto soma_object_type = this->get_metadata("soma_object_type");
+
+        if (!soma_object_type.has_value())
+            return std::nullopt;
+
+        const char* dtype = (const char*)std::get<MetadataInfo::value>(
+            *soma_object_type);
+        uint32_t sz = std::get<MetadataInfo::num>(*soma_object_type);
+
+        return std::string(dtype, sz);
+    }
+
     /**
      * Open the SOMAArray object.
      *
@@ -209,6 +223,11 @@ class SOMAArray {
         return arr_->is_open();
     }
 
+    OpenMode mode() const {
+        return mq_->query_type() == TILEDB_READ ? OpenMode::read :
+                                                  OpenMode::write;
+    }
+
     /**
      * @brief Reset the state of this SOMAArray object to prepare for a
      * new query, while holding the array open.
@@ -472,14 +491,24 @@ class SOMAArray {
     uint64_t nnz();
 
     /**
-     * @brief Get the schema of the array.
+     * @brief Get the TileDB ArraySchema. This should eventually
+     * be removed in lieu of arrow_schema below.
      *
      * @return std::shared_ptr<ArraySchema> Schema
      */
-    std::shared_ptr<ArraySchema> schema() const {
+    std::shared_ptr<ArraySchema> tiledb_schema() const {
         return mq_->schema();
     }
 
+    /**
+     * @brief Get the Arrow schema of the array.
+     *
+     * @return std::unique_ptr<ArrowSchema> Schema
+     */
+    std::unique_ptr<ArrowSchema> arrow_schema() const {
+        return ArrowAdapter::arrow_schema_from_tiledb_array(ctx_, arr_);
+    }
+
     /**
      * @brief Get the capacity of each dimension.
      *
@@ -495,6 +524,36 @@ class SOMAArray {
      */
     uint64_t ndim() const;
 
+    /**
+     * Retrieves the non-empty domain from the array. This is the union of the
+     * non-empty domains of the array fragments.
+     */
+    template <typename T>
+    std::pair<T, T> non_empty_domain(const std::string& name) {
+        return arr_->non_empty_domain<T>(name);
+    };
+
+    /**
+     * Retrieves the non-empty domain from the array on the given dimension.
+     * This is the union of the non-empty domains of the array fragments.
+     * Applicable only to var-sized dimensions.
+     */
+    std::pair<std::string, std::string> non_empty_domain_var(
+        const std::string& name) {
+        return arr_->non_empty_domain_var(name);
+    };
+
+    /**
+     * Returns the domain of the given dimension.
+     *
+     * @tparam T Domain datatype
+     * @return Pair of [lower, upper] inclusive bounds.
+     */
+    template <typename T>
+    std::pair<T, T> domain(const std::string& name) const {
+        return arr_->schema().domain().dimension(name).domain<T>();
+    }
+
     /**
      * @brief Get the name of each dimensions.
      *
diff --git a/libtiledbsoma/src/soma/soma_dataframe.cc b/libtiledbsoma/src/soma/soma_dataframe.cc
index 00edd05b1a..20ef2bd4f1 100644
--- a/libtiledbsoma/src/soma/soma_dataframe.cc
+++ b/libtiledbsoma/src/soma/soma_dataframe.cc
@@ -105,19 +105,37 @@ SOMADataFrame::SOMADataFrame(
         "auto",  // batch_size,
         result_order,
         timestamp);
-    array_->reset();
 }
 
 void SOMADataFrame::open(
     OpenMode mode, std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
     array_->open(mode, timestamp);
-    array_->reset();
 }
 
 void SOMADataFrame::close() {
     array_->close();
 }
 
+bool SOMADataFrame::exists(std::string_view uri) {
+    try {
+        auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read);
+        auto soma_object_type = soma_dataframe->get_metadata(
+            "soma_object_type");
+
+        if (!soma_object_type.has_value())
+            return false;
+
+        const char* dtype = (const char*)std::get<MetadataInfo::value>(
+            *soma_object_type);
+
+        uint32_t sz = std::get<MetadataInfo::num>(*soma_object_type);
+
+        return std::string(dtype, sz) == "SOMADataFrame";
+    } catch (std::exception& e) {
+        return false;
+    }
+}
+
 bool SOMADataFrame::is_open() const {
     return array_->is_open();
 }
@@ -130,8 +148,12 @@ std::shared_ptr<Context> SOMADataFrame::ctx() {
     return array_->ctx();
 }
 
-std::shared_ptr<ArraySchema> SOMADataFrame::schema() const {
-    return array_->schema();
+std::unique_ptr<ArrowSchema> SOMADataFrame::schema() const {
+    return array_->arrow_schema();
+}
+
+std::shared_ptr<ArraySchema> SOMADataFrame::tiledb_schema() const {
+    return array_->tiledb_schema();
 }
 
 const std::vector<std::string> SOMADataFrame::index_column_names() const {
@@ -139,7 +161,7 @@ const std::vector<std::string> SOMADataFrame::index_column_names() const {
 }
 
 int64_t SOMADataFrame::count() const {
-    return array_->ndim();
+    return array_->nnz();
 }
 
 std::optional<std::shared_ptr<ArrayBuffers>> SOMADataFrame::read_next() {
diff --git a/libtiledbsoma/src/soma/soma_dataframe.h b/libtiledbsoma/src/soma/soma_dataframe.h
index c96a829495..9fb0871b14 100644
--- a/libtiledbsoma/src/soma/soma_dataframe.h
+++ b/libtiledbsoma/src/soma/soma_dataframe.h
@@ -33,7 +33,6 @@
 #ifndef SOMA_DATAFRAME
 #define SOMA_DATAFRAME
 
-#include <tiledb/tiledb>
 #include "enums.h"
 #include "soma_array.h"
 #include "soma_object.h"
@@ -146,8 +145,11 @@ class SOMADataFrame : public SOMAObject {
         ResultOrder result_order,
         std::optional<std::pair<uint64_t, uint64_t>> timestamp = std::nullopt);
 
+    SOMADataFrame(std::shared_ptr<SOMAArray> array)
+        : array_(array){};
+
     SOMADataFrame() = delete;
-    SOMADataFrame(const SOMADataFrame&) = delete;
+    SOMADataFrame(const SOMADataFrame&) = default;
     SOMADataFrame(SOMADataFrame&&) = default;
     ~SOMADataFrame() = default;
 
@@ -166,6 +168,18 @@ class SOMADataFrame : public SOMAObject {
      */
     void close();
 
+    void reset(
+        std::vector<std::string> column_names = {},
+        std::string_view batch_size = "auto",
+        ResultOrder result_order = ResultOrder::automatic) {
+        array_->reset(column_names, batch_size, result_order);
+    }
+
+    /**
+     * @brief Check if the SOMADataFrame exists at the URI.
+     */
+    static bool exists(std::string_view uri);
+
     /**
      * Check if the SOMADataFrame is open.
      *
@@ -173,6 +187,10 @@ class SOMADataFrame : public SOMAObject {
      */
     bool is_open() const;
 
+    OpenMode mode() const {
+        return array_->mode();
+    }
+
     /**
      * Return the constant "SOMADataFrame".
      *
@@ -196,12 +214,26 @@ class SOMADataFrame : public SOMAObject {
      */
     std::shared_ptr<Context> ctx();
 
+    /**
+     * Return optional timestamp pair SOMADataFrame was opened with.
+     */
+    std::optional<std::pair<uint64_t, uint64_t>> timestamp() {
+        return array_->timestamp();
+    }
+
+    /**
+     * Return the data schema, in the form of a ArrowSchema.
+     *
+     * @return std::unique_ptr<ArrowSchema>
+     */
+    std::unique_ptr<ArrowSchema> schema() const;
+
     /**
      * Return the data schema, in the form of a TileDB ArraySchema.
      *
      * @return std::shared_ptr<ArraySchema>
      */
-    std::shared_ptr<ArraySchema> schema() const;
+    std::shared_ptr<ArraySchema> tiledb_schema() const;
 
     /**
      * Return the index (dimension) column names.
@@ -211,18 +243,126 @@ class SOMADataFrame : public SOMAObject {
     const std::vector<std::string> index_column_names() const;
 
     /**
-     * Return the number of rows in the SOMADataFrame.
+     * Return the number of rows.
      *
      * @return int64_t
      */
     int64_t count() const;
 
+    /**
+     * Retrieves the non-empty domain of the column index.
+     *
+     * @return int64_t
+     */
+    template <typename T>
+    std::pair<T, T> non_empty_domain(const std::string& column_index_name) {
+        return array_->non_empty_domain<T>(column_index_name);
+    };
+
+    /**
+     * Retrieves the non-empty domain of the column index.
+     * Applicable only to var-sized dimensions.
+     */
+    std::pair<std::string, std::string> non_empty_domain_var(
+        const std::string& column_index_name) {
+        return array_->non_empty_domain_var(column_index_name);
+    };
+
+    /**
+     * Returns the domain of the given column index.
+     *
+     * @tparam T Domain datatype
+     * @return Pair of [lower, upper] inclusive bounds.
+     */
+    template <typename T>
+    std::pair<T, T> domain(const std::string& column_index_name) const {
+        return array_->domain<T>(column_index_name);
+    }
+
     /**
      * @brief Read the next chunk of results from the query. If all results have
      * already been read, std::nullopt is returned.
      */
     std::optional<std::shared_ptr<ArrayBuffers>> read_next();
 
+    /**
+     * @brief Set the dimension slice using one point
+     *
+     * @note Partitioning is not supported
+     *
+     * @tparam T
+     * @param dim
+     * @param point
+     */
+    template <typename T>
+    void set_dim_point(const std::string& dim, const T& point) {
+        array_->set_dim_point(dim, point);
+    }
+
+    /**
+     * @brief Set the dimension slice using multiple points, with support
+     * for partitioning.
+     *
+     * @tparam T
+     * @param dim
+     * @param points
+     */
+    template <typename T>
+    void set_dim_points(
+        const std::string& dim,
+        const tcb::span<T> points,
+        int partition_index,
+        int partition_count) {
+        array_->set_dim_points(dim, points, partition_index, partition_count);
+    }
+
+    /**
+     * @brief Set the dimension slice using multiple points
+     *
+     * @note Partitioning is not supported
+     *
+     * @tparam T
+     * @param dim
+     * @param points
+     */
+    template <typename T>
+    void set_dim_points(const std::string& dim, const std::vector<T>& points) {
+        array_->set_dim_points(dim, points);
+    }
+
+    /**
+     * @brief Set the dimension slice using multiple ranges
+     *
+     * @note Partitioning is not supported
+     *
+     * @tparam T
+     * @param dim
+     * @param ranges
+     */
+    template <typename T>
+    void set_dim_ranges(
+        const std::string& dim, const std::vector<std::pair<T, T>>& ranges) {
+        array_->set_dim_ranges(dim, ranges);
+    }
+
+    /**
+     * @brief Set a query condition.
+     *
+     * @param qc Query condition
+     */
+    void set_condition(QueryCondition& qc) {
+        array_->set_condition(qc);
+    }
+
+    /**
+     * @brief Returns the column names set by the query.
+     *
+     * @return std::vector<std::string>
+     */
+    std::vector<std::string> column_names() {
+        return array_->column_names();
+    }
+
     /**
      * @brief Write data to the dataframe.
      * @param buffers The ArrayBuffers to write
diff --git a/libtiledbsoma/src/soma/soma_dense_ndarray.cc b/libtiledbsoma/src/soma/soma_dense_ndarray.cc
index 5f66ba8abb..93d33f0095 100644
--- a/libtiledbsoma/src/soma/soma_dense_ndarray.cc
+++ b/libtiledbsoma/src/soma/soma_dense_ndarray.cc
@@ -131,8 +131,12 @@ std::shared_ptr<Context> SOMADenseNDArray::ctx() {
     return array_->ctx();
 }
 
-std::shared_ptr<ArraySchema> SOMADenseNDArray::schema() const {
-    return array_->schema();
+std::unique_ptr<ArrowSchema> SOMADenseNDArray::schema() const {
+    return array_->arrow_schema();
+}
+
+std::shared_ptr<ArraySchema> SOMADenseNDArray::tiledb_schema() const {
+    return array_->tiledb_schema();
 }
 
 std::vector<int64_t> SOMADenseNDArray::shape() const {
diff --git a/libtiledbsoma/src/soma/soma_dense_ndarray.h b/libtiledbsoma/src/soma/soma_dense_ndarray.h
index e4c771a064..8efd776966 100644
--- a/libtiledbsoma/src/soma/soma_dense_ndarray.h
+++ b/libtiledbsoma/src/soma/soma_dense_ndarray.h
@@ -33,7 +33,6 @@
 #ifndef SOMA_DENSE_NDARRAY
 #define SOMA_DENSE_NDARRAY
 
-#include <tiledb/tiledb>
 #include "enums.h"
 #include "soma_array.h"
 #include "soma_object.h"
@@ -200,11 +199,18 @@ class SOMADenseNDArray : public SOMAObject {
     const std::string uri() const;
 
     /**
-     * Return data schema, in the form of a TileDB ArraySchema.
+     * Return the data schema, in the form of an ArrowSchema.
+     *
+     * @return std::unique_ptr<ArrowSchema>
+     */
+    std::unique_ptr<ArrowSchema> schema() const;
+
+    /**
+     * Return the data schema, in the form of a TileDB ArraySchema.
      *
      * @return std::shared_ptr<ArraySchema>
      */
-    std::shared_ptr<ArraySchema> schema() const;
+    std::shared_ptr<ArraySchema> tiledb_schema() const;
 
     /**
      * @brief Get the capacity of each dimension.
diff --git a/libtiledbsoma/src/soma/soma_object.cc b/libtiledbsoma/src/soma/soma_object.cc
new file mode 100644
index 0000000000..655adbf30e
--- /dev/null
+++ b/libtiledbsoma/src/soma/soma_object.cc
@@ -0,0 +1,42 @@
+#include <map>
+#include <string>
+#include <tiledb/tiledb>
+
+#include "soma_array.h"
+#include "soma_dataframe.h"
+
+namespace tiledbsoma {
+
+using namespace tiledb;
+
+std::unique_ptr<SOMAObject> SOMAObject::open(
+    std::string uri,
+    OpenMode mode,
+    std::map<std::string, std::string> platform_config,
+    std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
+    auto ctx = std::make_shared<Context>(Config(platform_config));
+    return SOMAObject::open(uri, mode, ctx, timestamp);
+}
+
+std::unique_ptr<SOMAObject> SOMAObject::open(
+    std::string uri,
+    OpenMode mode,
+    std::shared_ptr<Context> ctx,
+    std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
+    auto obj = tiledb::Object::object(*ctx, uri);
+
+    if (obj.type() == tiledb::Object::Type::Array) {
+        auto array_ = SOMAArray::open(
+            mode, ctx, uri, "", {}, "auto", ResultOrder::automatic, timestamp);
+
+        if (array_->soma_object_type() == "SOMADataFrame")
+            return std::make_unique<SOMADataFrame>(std::move(array_));
+        else
+            throw TileDBSOMAError(
+                "Invalid SOMAObject passed to SOMAObject::open");
+    }
+
+    throw TileDBSOMAError("Invalid TileDB object passed to SOMAObject::open");
+}
+
+}  // namespace tiledbsoma
diff --git a/libtiledbsoma/src/soma/soma_object.h b/libtiledbsoma/src/soma/soma_object.h
index 47beeb6047..43a209885e 100644
--- a/libtiledbsoma/src/soma/soma_object.h
+++ b/libtiledbsoma/src/soma/soma_object.h
@@ -41,6 +41,8 @@
 
 namespace tiledbsoma {
 
+class SOMADataFrame;
+
 using namespace tiledb;
 class SOMAObject {
    public:
@@ -49,6 +51,18 @@ class SOMAObject {
     //===================================================================
     virtual ~SOMAObject() = default;
 
+    static std::unique_ptr<SOMAObject> open(
+        std::string uri,
+        OpenMode mode,
+        std::map<std::string, std::string> platform_config = {},
+        std::optional<std::pair<uint64_t, uint64_t>> timestamp = std::nullopt);
+
+    static std::unique_ptr<SOMAObject> open(
+        std::string uri,
+        OpenMode mode,
+        std::shared_ptr<Context> ctx,
+        std::optional<std::pair<uint64_t, uint64_t>> timestamp = std::nullopt);
+
     /**
      * @brief Return a constant string describing the type of the object.
      */
diff --git a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc
index 29f4b16afe..308fb0ddde 100644
--- a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc
+++ b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc
@@ -131,8 +131,12 @@ std::shared_ptr<Context> SOMASparseNDArray::ctx() {
     return array_->ctx();
 }
 
-std::shared_ptr<ArraySchema> SOMASparseNDArray::schema() const {
-    return array_->schema();
+std::unique_ptr<ArrowSchema> SOMASparseNDArray::schema() const {
+    return array_->arrow_schema();
+}
+
+std::shared_ptr<ArraySchema> SOMASparseNDArray::tiledb_schema() const {
+    return array_->tiledb_schema();
 }
 
 std::vector<int64_t> SOMASparseNDArray::shape() const {
diff --git a/libtiledbsoma/src/soma/soma_sparse_ndarray.h b/libtiledbsoma/src/soma/soma_sparse_ndarray.h
index 6aaf2eb10b..749418b597 100644
--- a/libtiledbsoma/src/soma/soma_sparse_ndarray.h
+++ b/libtiledbsoma/src/soma/soma_sparse_ndarray.h
@@ -33,7 +33,6 @@
 #ifndef SOMA_SPARSE_NDARRAY
 #define SOMA_SPARSE_NDARRAY
 
-#include <tiledb/tiledb>
 #include "enums.h"
 #include "soma_array.h"
 #include "soma_object.h"
@@ -200,11 +199,18 @@ class SOMASparseNDArray : public SOMAObject {
     const std::string uri() const;
 
     /**
-     * Return data schema, in the form of a TileDB ArraySchema.
+     * Return the data schema, in the form of an ArrowSchema.
+     *
+     * @return std::unique_ptr<ArrowSchema>
+     */
+    std::unique_ptr<ArrowSchema> schema() const;
+
+    /**
+     * Return the data schema, in the form of a TileDB ArraySchema.
      *
      * @return std::shared_ptr<ArraySchema>
      */
-    std::shared_ptr<ArraySchema> schema() const;
+    std::shared_ptr<ArraySchema> tiledb_schema() const;
 
     /**
      * @brief Get the capacity of each dimension.
diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc
index 93058428b8..4739bd0ed2 100644
--- a/libtiledbsoma/src/utils/arrow_adapter.cc
+++ b/libtiledbsoma/src/utils/arrow_adapter.cc
@@ -101,6 +101,68 @@ void ArrowAdapter::release_array(struct ArrowArray* array) {
     array->release = nullptr;
 }
 
+std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
+    std::shared_ptr<Context> ctx, std::shared_ptr<Array> tiledb_array) {
+    auto tiledb_schema = tiledb_array->schema();
+    auto ndim = tiledb_schema.domain().ndim();
+    auto nattr = tiledb_schema.attribute_num();
+
+    std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
+    arrow_schema->format = "+s";
+    arrow_schema->n_children = ndim + nattr;
+    arrow_schema->release = &ArrowAdapter::release_schema;
+    arrow_schema->children = new ArrowSchema*[arrow_schema->n_children];
+
+    ArrowSchema* child = nullptr;
+
+    for (uint32_t i = 0; i < ndim; ++i) {
+        auto dim = tiledb_schema.domain().dimension(i);
+        child = arrow_schema->children[i] = new ArrowSchema;
+        child->format = ArrowAdapter::to_arrow_format(dim.type()).data();
+        child->name = strdup(dim.name().c_str());
+        child->metadata = nullptr;
+        child->flags = 0;
+        child->n_children = 0;
+        child->dictionary = nullptr;
+        child->children = nullptr;
+        child->release = &ArrowAdapter::release_schema;
+    }
+
+    for (uint32_t i = 0; i < nattr; ++i) {
+        auto attr = tiledb_schema.attribute(i);
+        child = arrow_schema->children[ndim + i] = new ArrowSchema;
+        child->format = ArrowAdapter::to_arrow_format(attr.type()).data();
+        child->name = strdup(attr.name().c_str());
+        child->metadata = nullptr;
+        child->flags = attr.nullable() ? ARROW_FLAG_NULLABLE : 0;
+        child->n_children = 0;
+        child->children = nullptr;
+        child->dictionary = nullptr;
+
+        auto enmr_name = AttributeExperimental::get_enumeration_name(
+            *ctx, attr);
+        if (enmr_name.has_value()) {
+            auto enmr = ArrayExperimental::get_enumeration(
+                *ctx, *tiledb_array, attr.name());
+            auto dict = new ArrowSchema;
+            dict->format = strdup(
+                ArrowAdapter::to_arrow_format(enmr.type(), false).data());
+            dict->name = strdup(enmr.name().c_str());
+            dict->metadata = nullptr;
+            dict->flags = 0;
+            dict->n_children = 0;
+            dict->children = nullptr;
+            dict->dictionary = nullptr;
+            dict->release = &ArrowAdapter::release_schema;
+            dict->private_data = nullptr;
+            child->dictionary = dict;
+        }
+        child->release = &ArrowAdapter::release_schema;
+    }
+
+    return arrow_schema;
+}
+
 std::pair<const void*, std::size_t> ArrowAdapter::_get_data_and_length(
     Enumeration& enmr, const void* dst) {
     switch (enmr.type()) {
@@ -117,7 +179,7 @@ std::pair<const void*, std::size_t> ArrowAdapter::_get_data_and_length(
 
             // Allocate a single byte to copy the bits into
             size_t sz = 1;
-            dst = (const void*)malloc(sz);
+            dst = new const void*[sz];
             std::memcpy((void*)dst, &src, sz);
 
             return std::pair(dst, data.size());
@@ -196,12 +258,15 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
     int n_buffers = column->is_var() ? 3 : 2;
 
     // Create an ArrowBuffer to manage the lifetime of `column`.
-    // - `arrow_buffer` holds a shared_ptr to `column`, which increments
+    // - `arrow_buffer` holds a shared_ptr to `column`, which
+    // increments
     //   the use count and keeps the ColumnBuffer data alive.
-    // - When the arrow array is released, `array->release()` is called with
-    //   `arrow_buffer` in `private_data`. `arrow_buffer` is deleted, which
-    //   decrements the the `column` use count. When the `column` use count
-    //   reaches 0, the ColumnBuffer data will be deleted.
+    // - When the arrow array is released, `array->release()` is
+    // called with
+    //   `arrow_buffer` in `private_data`. `arrow_buffer` is
+    //   deleted, which decrements the the `column` use count. When
+    //   the `column` use count reaches 0, the ColumnBuffer data
+    //   will be deleted.
     auto arrow_buffer = new ArrowBuffer(column);
 
     array->length = column->size();
@@ -220,7 +285,7 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
         column->name(),
         column.use_count()));
 
-    array->buffers = (const void**)malloc(sizeof(void*) * n_buffers);
+    array->buffers = new const void*[n_buffers];
     assert(array->buffers != nullptr);
     array->buffers[0] = nullptr;                                   // validity
     array->buffers[n_buffers - 1] = column->data<void*>().data();  // data
@@ -244,18 +309,18 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
         schema->flags |= ARROW_FLAG_DICTIONARY_ORDERED;
     }
 
-    /* Workaround to cast TILEDB_BOOL from uint8 to 1-bit Arrow boolean. */
+    // Workaround to cast TILEDB_BOOL from uint8 to 1-bit Arrow boolean
     if (column->type() == TILEDB_BOOL) {
         column->data_to_bitmap();
     }
 
     if (column->has_enumeration()) {
-        ArrowSchema* dict_sch = new ArrowSchema;
-        ArrowArray* dict_arr = new ArrowArray;
+        auto dict_sch = new ArrowSchema;
+        auto dict_arr = new ArrowArray;
 
         auto enmr = column->get_enumeration_info();
         dict_sch->format = strdup(to_arrow_format(enmr->type(), false).data());
-        dict_sch->name = strdup(enmr->name().c_str());
+        dict_sch->name = nullptr;
         dict_sch->metadata = nullptr;
         dict_sch->flags = 0;
         dict_sch->n_children = 0;
@@ -275,18 +340,19 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
         dict_arr->release = &release_array;
         dict_arr->private_data = nullptr;
 
-        dict_arr->buffers = (const void**)malloc(sizeof(void*) * n_buf);
+        dict_arr->buffers = new const void*[n_buf];
         dict_arr->buffers[0] = nullptr;  // validity: none here
 
-        // TODO string types currently get the data and offset buffers from
-        // ColumnBuffer::enum_offsets and ColumnBuffer::enum_string which is
-        // retrieved via ColumnBuffer::convert_enumeration. This may be
-        // refactored to all use ColumnBuffer::get_enumeration_info. Note
-        // that ColumnBuffer::has_enumeration may also be removed in a
-        // future refactor as ColumnBuffer::get_enumeration_info returns
-        // std::optional where std::nullopt indicates the column does not
-        // contain enumerated values.
-        if (enmr->type() == TILEDB_STRING_ASCII ||
+        // TODO string types currently get the data and offset
+        // buffers from ColumnBuffer::enum_offsets and
+        // ColumnBuffer::enum_string which is retrieved via
+        // ColumnBuffer::convert_enumeration. This may be refactored
+        // to all use ColumnBuffer::get_enumeration_info. Note that
+        // ColumnBuffer::has_enumeration may also be removed in a
+        // future refactor as ColumnBuffer::get_enumeration_info
+        // returns std::optional where std::nullopt indicates the
+        // column does not contain enumerated values.
+        if (enmr->type() == TILEDB_STRING_ASCII or
             enmr->type() == TILEDB_STRING_UTF8) {
             auto dict_vec = enmr->as_vector<std::string>();
             column->convert_enumeration();
@@ -294,7 +360,7 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
             dict_arr->buffers[2] = column->enum_string().data();
             dict_arr->length = dict_vec.size();
         } else {
-            auto [dict_data, dict_length] = ArrowAdapter::_get_data_and_length(
+            auto [dict_data, dict_length] = _get_data_and_length(
                 *enmr, dict_arr->buffers[1]);
             dict_arr->buffers[1] = dict_data;
             dict_arr->length = dict_length;
@@ -312,12 +378,12 @@ std::string_view ArrowAdapter::to_arrow_format(
     switch (datatype) {
         case TILEDB_STRING_ASCII:
         case TILEDB_STRING_UTF8:
-            return use_large ? "U" :
-                               "u";  // large because TileDB uses 64bit offsets
+            return use_large ? "U" : "u";  // large because TileDB
+                                           // uses 64bit offsets
         case TILEDB_CHAR:
         case TILEDB_BLOB:
-            return use_large ? "Z" :
-                               "z";  // large because TileDB uses 64bit offsets
+            return use_large ? "Z" : "z";  // large because TileDB
+                                           // uses 64bit offsets
         case TILEDB_BOOL:
             return "b";
         case TILEDB_INT32:
diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h
index fd33bff882..a210aca77c 100644
--- a/libtiledbsoma/src/utils/arrow_adapter.h
+++ b/libtiledbsoma/src/utils/arrow_adapter.h
@@ -46,6 +46,9 @@ class ArrowAdapter {
     static std::pair<std::unique_ptr<ArrowArray>, std::unique_ptr<ArrowSchema>>
     to_arrow(std::shared_ptr<ColumnBuffer> column);
 
+    static std::unique_ptr<ArrowSchema> arrow_schema_from_tiledb_array(
+        std::shared_ptr<Context> ctx, std::shared_ptr<Array> tiledb_array);
+
     /**
      * @brief Get Arrow format string from TileDB datatype.
      *
diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc
index 112669e4fc..1e6b2bb0c3 100644
--- a/libtiledbsoma/test/unit_soma_array.cc
+++ b/libtiledbsoma/test/unit_soma_array.cc
@@ -127,10 +127,6 @@ std::tuple<std::vector<int64_t>, std::vector<int>> write_array(
             ResultOrder::automatic,
             std::pair<uint64_t, uint64_t>(timestamp + i, timestamp + i));
 
-        if (LOG_DEBUG_ENABLED()) {
-            soma_array->schema()->dump();
-        }
-
         std::vector<int64_t> d0(num_cells_per_fragment);
         for (int j = 0; j < num_cells_per_fragment; j++) {
             // Overlap odd fragments when generating overlaps
diff --git a/libtiledbsoma/test/unit_soma_collection.cc b/libtiledbsoma/test/unit_soma_collection.cc
index 1e79a41c7a..4066461403 100644
--- a/libtiledbsoma/test/unit_soma_collection.cc
+++ b/libtiledbsoma/test/unit_soma_collection.cc
@@ -107,8 +107,6 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") {
     REQUIRE(soma_sparse->ctx() == ctx);
     REQUIRE(soma_sparse->type() == "SOMASparseNDArray");
     REQUIRE(soma_sparse->is_sparse() == true);
-    REQUIRE(soma_sparse->schema()->has_attribute("a0"));
-    REQUIRE(soma_sparse->schema()->domain().has_dimension("d0"));
     REQUIRE(soma_sparse->ndim() == 1);
     REQUIRE(soma_sparse->nnz() == 0);
     soma_sparse->close();
@@ -137,8 +135,6 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") {
     REQUIRE(soma_dense->ctx() == ctx);
     REQUIRE(soma_dense->type() == "SOMADenseNDArray");
     REQUIRE(soma_dense->is_sparse() == false);
-    REQUIRE(soma_dense->schema()->has_attribute("a0"));
-    REQUIRE(soma_dense->schema()->domain().has_dimension("d0"));
     REQUIRE(soma_dense->ndim() == 1);
     REQUIRE(soma_dense->shape() == std::vector<int64_t>{1001});
     soma_collection->close();
@@ -154,7 +150,7 @@ TEST_CASE("SOMACollection: add SOMADataFrame") {
     std::string sub_uri = "mem://unit-test-add-dataframe/sub";
 
     SOMACollection::create(base_uri, ctx);
-    auto schema = create_schema(*ctx, false);
+    auto schema = create_schema(*ctx, true);
 
     std::map<std::string, std::string> expected_map{{"dataframe", sub_uri}};
 
@@ -165,16 +161,14 @@ TEST_CASE("SOMACollection: add SOMADataFrame") {
     REQUIRE(soma_dataframe->uri() == sub_uri);
     REQUIRE(soma_dataframe->ctx() == ctx);
     REQUIRE(soma_dataframe->type() == "SOMADataFrame");
-    REQUIRE(soma_dataframe->schema()->has_attribute("a0"));
-    REQUIRE(soma_dataframe->schema()->domain().has_dimension("d0"));
     std::vector<std::string> expected_index_column_names = {"d0"};
     REQUIRE(
         soma_dataframe->index_column_names() == expected_index_column_names);
-    REQUIRE(soma_dataframe->count() == 1);
     soma_collection->close();
 
     soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx);
     REQUIRE(soma_collection->member_to_uri_mapping() == expected_map);
+    REQUIRE(soma_dataframe->count() == 0);
     soma_collection->close();
 }
 
diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc
index e54f34a805..174e2db124 100644
--- a/libtiledbsoma/test/unit_soma_dataframe.cc
+++ b/libtiledbsoma/test/unit_soma_dataframe.cc
@@ -59,7 +59,7 @@ const std::string src_path = TILEDBSOMA_SOURCE_ROOT;
 namespace {
 ArraySchema create_schema(Context& ctx, bool allow_duplicates = false) {
     // Create schema
-    ArraySchema schema(ctx, TILEDB_DENSE);
+    ArraySchema schema(ctx, TILEDB_SPARSE);
 
     auto dim = Dimension::create<int64_t>(ctx, "d0", {0, 1000});
 
@@ -86,16 +86,15 @@ TEST_CASE("SOMADataFrame: basic") {
     REQUIRE(soma_dataframe->uri() == uri);
     REQUIRE(soma_dataframe->ctx() == ctx);
     REQUIRE(soma_dataframe->type() == "SOMADataFrame");
-    auto schema = soma_dataframe->schema();
-    REQUIRE(schema->has_attribute("a0"));
-    REQUIRE(schema->domain().has_dimension("d0"));
     std::vector<std::string> expected_index_column_names = {"d0"};
     REQUIRE(
         soma_dataframe->index_column_names() == expected_index_column_names);
-    REQUIRE(soma_dataframe->count() == 1);
+    REQUIRE(soma_dataframe->count() == 0);
     soma_dataframe->close();
 
-    std::vector<int64_t> d0{1, 10};
+    std::vector<int64_t> d0(10);
+    for (int j = 0; j < 10; j++)
+        d0[j] = j;
     std::vector<int> a0(10, 1);
 
     auto array_buffer = std::make_shared<ArrayBuffers>();
@@ -103,21 +102,24 @@ TEST_CASE("SOMADataFrame: basic") {
     array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0));
     array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0));
 
-    soma_dataframe->open(OpenMode::write);
+    soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx);
     soma_dataframe->write(array_buffer);
     soma_dataframe->close();
 
-    soma_dataframe->open(OpenMode::read);
+    soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx);
     while (auto batch = soma_dataframe->read_next()) {
         auto arrbuf = batch.value();
         auto d0span = arrbuf->at("d0")->data<int64_t>();
         auto a0span = arrbuf->at("a0")->data<int>();
-        REQUIRE(
-            std::vector<int64_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} ==
-            std::vector<int64_t>(d0span.begin(), d0span.end()));
+        REQUIRE(d0 == std::vector<int64_t>(d0span.begin(), d0span.end()));
         REQUIRE(a0 == std::vector<int>(a0span.begin(), a0span.end()));
     }
     soma_dataframe->close();
+
+    auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx);
+    REQUIRE(soma_object->uri() == uri);
+    REQUIRE(soma_object->type() == "SOMADataFrame");
+    soma_object->close();
 }
 
 TEST_CASE("SOMADataFrame: metadata") {
diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc
index 55f15f54f7..5770d91b54 100644
--- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc
+++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc
@@ -87,7 +87,7 @@ TEST_CASE("SOMADenseNDArray: basic") {
     REQUIRE(soma_dense->ctx() == ctx);
     REQUIRE(soma_dense->type() == "SOMADenseNDArray");
     REQUIRE(soma_dense->is_sparse() == false);
-    auto schema = soma_dense->schema();
+    auto schema = soma_dense->tiledb_schema();
     REQUIRE(schema->has_attribute("a0"));
     REQUIRE(schema->domain().has_dimension("d0"));
     REQUIRE(soma_dense->ndim() == 1);
diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc
index d4a418ef77..6b848ade19 100644
--- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc
+++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc
@@ -87,7 +87,7 @@ TEST_CASE("SOMASparseNDArray: basic") {
     REQUIRE(soma_sparse->ctx() == ctx);
     REQUIRE(soma_sparse->type() == "SOMASparseNDArray");
     REQUIRE(soma_sparse->is_sparse() == true);
-    auto schema = soma_sparse->schema();
+    auto schema = soma_sparse->tiledb_schema();
     REQUIRE(schema->has_attribute("a0"));
     REQUIRE(schema->domain().has_dimension("d0"));
     REQUIRE(soma_sparse->ndim() == 1);