From bc075acb1cb6e9fca5b63314aa5520b3c6901843 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Fri, 16 Aug 2024 20:08:50 -0400
Subject: [PATCH] more unit-test findings [skip ci]

---
 apis/python/src/tiledbsoma/soma_array.cc      | 29 ++++++------
 apis/python/src/tiledbsoma/soma_dataframe.cc  | 13 +++++-
 .../src/tiledbsoma/soma_dense_ndarray.cc      | 14 +++++-
 .../src/tiledbsoma/soma_sparse_ndarray.cc     | 13 +++++-
 apis/python/tests/test_basic_anndata_io.py    | 13 +++++-
 .../tests/test_dataframe_index_columns.py     |  4 +-
 .../tests/test_registration_mappings.py       | 11 +++++
 apis/python/tests/test_shape.py               | 23 +++++-----
 libtiledbsoma/src/soma/soma_array.cc          | 44 +++++++++++--------
 9 files changed, 112 insertions(+), 52 deletions(-)

diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc
index fc2bc36cea..03f9d6ad5f 100644
--- a/apis/python/src/tiledbsoma/soma_array.cc
+++ b/apis/python/src/tiledbsoma/soma_array.cc
@@ -500,14 +500,21 @@ void load_soma_array(py::module& m) {
                 py::gil_scoped_release release;
 
                 // Try to read more data
-                auto buffers = array.read_next();
-
-                // If more data was read, convert it to an arrow table and
-                // return
-                if (buffers.has_value()) {
-                    // Acquire python GIL before accessing python objects
-                    py::gil_scoped_acquire acquire;
-                    return to_table(*buffers);
+                try {
+                    auto buffers = array.read_next();
+
+                    // If more data was read, convert it to an arrow table and
+                    // return
+                    if (buffers.has_value()) {
+                        // Acquire python GIL before accessing python objects
+                        py::gil_scoped_acquire acquire;
+                        return to_table(*buffers);
+                    }
+
+                } catch (const std::exception& e) {
+                    // Re-raise as ValueError to preserve index-out-of-bounds
+                    // reporting semantics in the current-domain/new-shape era.
+                    throw py::value_error(e.what());
                 }
 
                 // No data was read, the query is complete, return nullopt
@@ -520,12 +527,6 @@ void load_soma_array(py::module& m) {
 
         .def("nnz", &SOMAArray::nnz, py::call_guard<py::gil_scoped_release>())
 
-        // xxx temp .def("resize", &SOMAArray::resize)
-
-        // xxx temp .def_property_readonly("shape", &SOMAArray::shape)
-
-        // xxx temp .def_property_readonly("maxshape", &SOMAArray::maxshape)
-
         .def_property_readonly("uri", &SOMAArray::uri)
 
         .def_property_readonly("column_names", &SOMAArray::column_names)
diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc
index ce7340eef2..df6eb78a8c 100644
--- a/apis/python/src/tiledbsoma/soma_dataframe.cc
+++ b/apis/python/src/tiledbsoma/soma_dataframe.cc
@@ -145,7 +145,18 @@ void load_soma_dataframe(py::module& m) {
         .def_static("exists", &SOMADataFrame::exists)
 
         // XXX TEMP
-        .def("resize", &SOMADataFrame::resize1)
+        .def(
+            "resize",
+            [](SOMAArray& array, const std::vector<int64_t>& newshape) {
+                try {
+                    array.resize1(newshape);
+                } catch (const std::exception& e) {
+                    // Re-raise as ValueError to preserve index-out-of-bounds
+                    // reporting semantics in the current-domain/new-shape era.
+                    throw py::value_error(e.what());
+                }
+            },
+            "newshape"_a)
 
         // XXX TEMP
         .def_property_readonly("shape", &SOMADataFrame::shape1)
diff --git a/apis/python/src/tiledbsoma/soma_dense_ndarray.cc b/apis/python/src/tiledbsoma/soma_dense_ndarray.cc
index 818d437506..a6a79ce8a1 100644
--- a/apis/python/src/tiledbsoma/soma_dense_ndarray.cc
+++ b/apis/python/src/tiledbsoma/soma_dense_ndarray.cc
@@ -124,8 +124,18 @@ void load_soma_dense_ndarray(py::module& m) {
 
         .def_static("exists", &SOMADenseNDArray::exists)
 
-        // XXX TEMP
-        .def("resize", &SOMAArray::resize)
+        .def(
+            "resize",
+            [](SOMAArray& array, const std::vector<int64_t>& newshape) {
+                try {
+                    array.resize(newshape);
+                } catch (const std::exception& e) {
+                    // Re-raise as ValueError to preserve index-out-of-bounds
+                    // reporting semantics in the current-domain/new-shape era.
+                    throw py::value_error(e.what());
+                }
+            },
+            "newshape"_a)
 
         // XXX TEMP
         .def_property_readonly("shape", &SOMAArray::shape)
diff --git a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
index 9eb0d41e5f..de93c99c92 100644
--- a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
+++ b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
@@ -113,7 +113,18 @@ void load_soma_sparse_ndarray(py::module& m) {
         .def_static("exists", &SOMASparseNDArray::exists)
 
         // XXX TEMP
-        .def("resize", &SOMAArray::resize)
+        .def(
+            "resize",
+            [](SOMAArray& array, const std::vector<int64_t>& newshape) {
+                try {
+                    array.resize(newshape);
+                } catch (const std::exception& e) {
+                    // Re-raise as ValueError to preserve index-out-of-bounds
+                    // reporting semantics in the current-domain/new-shape era.
+                    throw py::value_error(e.what());
+                }
+            },
+            "newshape"_a)
 
         // XXX TEMP
         .def_property_readonly("shape", &SOMAArray::shape)
diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py
index 92f2d530a2..faa5d35c5d 100644
--- a/apis/python/tests/test_basic_anndata_io.py
+++ b/apis/python/tests/test_basic_anndata_io.py
@@ -1333,7 +1333,18 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
         var_field_name="var_id",
     )
 
-    # XXX TO DO: NOW NEEDS A RESIZE AS OF 1.13
+    # XXX TO DO: NOW NEEDS A RESIZE AS OF 2.26
+    # XXX TEMP -- needs an all-in-one experiment-level mutator ...
+    with tiledbsoma.Experiment.open(SOMA_URI, "w") as exp:
+        nobs2 = len(rd.obs_axis.data)
+        new_obs_shape = (nobs2,)
+        exp.obs.resize(new_obs_shape)
+
+        new_X_shape = (nobs2, len(adata2.var))
+        exp.ms["RNA"].X["data"].resize(new_X_shape)
+
+        new_X_shape = (nobs2, len(adata2.raw.var))
+        exp.ms["raw"].X["data"].resize(new_X_shape)
 
     # Append the second anndata object
     tiledbsoma.io.from_anndata(
diff --git a/apis/python/tests/test_dataframe_index_columns.py b/apis/python/tests/test_dataframe_index_columns.py
index 7629840603..7cb721c467 100644
--- a/apis/python/tests/test_dataframe_index_columns.py
+++ b/apis/python/tests/test_dataframe_index_columns.py
@@ -3,7 +3,6 @@
 import pytest
 
 import tiledbsoma as soma
-import tiledb
 
 
 @pytest.fixture
@@ -1899,6 +1898,7 @@ def test_types_read_errors(
     with soma.DataFrame.open(uri, "w") as sdf:
         sdf.write(arrow_table)
 
-    with pytest.raises((RuntimeError, tiledb.cc.TileDBError)):
+    # XXX TO DO
+    with pytest.raises((soma.SOMAError)):
         with soma.DataFrame.open(uri, "r") as sdf:
             sdf.read(coords=coords).concat()
diff --git a/apis/python/tests/test_registration_mappings.py b/apis/python/tests/test_registration_mappings.py
index 45065b6a68..0708c3ee9d 100644
--- a/apis/python/tests/test_registration_mappings.py
+++ b/apis/python/tests/test_registration_mappings.py
@@ -430,6 +430,8 @@ def test_multiples_without_experiment(
         "ZZZ3": 9,
     }
 
+    # XXX TO DO: RESIZE
+
     # Now do the ingestion per se.  Note that once registration is done sequentially, ingest order
     # mustn't matter, and in fact, can be done in parallel. This is why we test various permutations
     # of the ordering of the h5ad file names.
@@ -818,6 +820,8 @@ def test_append_with_disjoint_measurements(
         var_field_name=var_field_name,
     )
 
+    # XXX TO DO: RESIZE
+
     tiledbsoma.io.from_anndata(
         soma_uri,
         anndata2,
@@ -1166,6 +1170,9 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
         tiledbsoma.io.from_anndata(
             soma_uri, adata, measurement_name=measurement_name, registration_mapping=rd
         )
+
+        # XXX TO DO: RESIZE
+
         tiledbsoma.io.from_anndata(
             soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
         )
@@ -1181,6 +1188,8 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
             var_field_name=var_field_name,
         )
 
+        # XXX TO DO: RESIZE
+
         tiledbsoma.io.from_anndata(
             soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
         )
@@ -1256,6 +1265,8 @@ def test_multimodal_names(tmp_path, conftest_pbmc3k_adata):
         var_field_name=adata_protein.var.index.name,
     )
 
+    # XXX TO DO: RESIZE
+
     # Ingest the second anndata object into the protein measurement
     tiledbsoma.io.from_anndata(
         experiment_uri=uri,
diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py
index efa6308a2a..47adc0e83e 100644
--- a/apis/python/tests/test_shape.py
+++ b/apis/python/tests/test_shape.py
@@ -4,7 +4,6 @@
 import pytest
 
 import tiledbsoma
-import tiledb
 
 from tests._util import maybe_raises
 
@@ -155,9 +154,7 @@ def test_sparse_nd_array_basics(
 
     # Test reads out of bounds
     with tiledbsoma.SparseNDArray.open(uri) as snda:
-        # AS NOTED ABOVE
-        # with pytest.raises(tiledbsoma.SOMAError):
-        with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
+        with pytest.raises(ValueError):
             coords = tuple([arg_shape[i] + 10 for i in range(ndim)])
             snda.read(coords).tables().concat()
 
@@ -177,7 +174,7 @@ def test_sparse_nd_array_basics(
     new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)])
     # TODO: why union with tiledb.cc.TileDBError -- needed in sandbox
     with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
-        with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
+        with pytest.raises(ValueError):
             snda.resize(new_shape)
 
     with tiledbsoma.SparseNDArray.open(uri) as snda:
@@ -185,7 +182,7 @@ def test_sparse_nd_array_basics(
 
     # Test resize too big
     new_shape = tuple([4_000_000_000 for i in range(ndim)])
-    with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
+    with pytest.raises(ValueError):
         with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
             snda.resize(new_shape)
     with tiledbsoma.SparseNDArray.open(uri) as snda:
@@ -217,7 +214,7 @@ def test_sparse_nd_array_basics(
             assert readback == table
 
 
-# Pending 2.26 timeframe.
+# Pending 2.26 timeframe for dense support
 # TODO: mark these with a linked GitHub tracking issue
 def test_dense_nd_array_basics(tmp_path):
     uri = tmp_path.as_posix()
@@ -227,12 +224,9 @@ def test_dense_nd_array_basics(tmp_path):
     with tiledbsoma.DenseNDArray.open(uri) as dnda:
         assert dnda.shape == (100, 200)
 
-        # Pending 2.26
-        # XXX TODO
-        # with pytest.raises(NotImplementedError):
-        #     assert dnda.maxshape == (100, 200)
+        with pytest.raises(NotImplementedError):
+            assert dnda.maxshape == (100, 200)
 
-        # Pending 2.26
         with pytest.raises(NotImplementedError):
             dnda.resize((300, 400))
 
@@ -311,7 +305,10 @@ def test_dataframe_basics(tmp_path, domain0, index_column_names):
     if domain0 is not None:
         new_size = 10000
         with tiledbsoma.DataFrame.open(uri, "r") as sdf:
-            with pytest.raises(tiledbsoma.SOMAError):  # must be open for write
+            # Must be open for write.
+            # XXX TO DO fix this
+            # with pytest.raises(tiledbsoma.SOMAError):
+            with pytest.raises(ValueError):
                 sdf.resize([new_size])
         with tiledbsoma.DataFrame.open(uri, "w") as sdf:
             sdf.resize([new_size])
diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc
index 4877ec8c29..9eea922b64 100644
--- a/libtiledbsoma/src/soma/soma_array.cc
+++ b/libtiledbsoma/src/soma/soma_array.cc
@@ -280,30 +280,37 @@ void SOMAArray::reset(
 }
 
 std::optional<std::shared_ptr<ArrayBuffers>> SOMAArray::read_next() {
-    // If the query is complete, return `std::nullopt`
-    if (mq_->is_complete(true)) {
-        return std::nullopt;
-    }
-
-    // Configure query and allocate result buffers
-    mq_->setup_read();
+    try {
+        // Can throw with current-domain support
 
-    // Continue to submit the empty query on first read to return empty results
-    if (mq_->is_empty_query()) {
-        if (first_read_next_) {
-            first_read_next_ = false;
-            return mq_->results();
-        } else {
+        // If the query is complete, return `std::nullopt`
+        if (mq_->is_complete(true)) {
             return std::nullopt;
         }
-    }
 
-    first_read_next_ = false;
+        // Configure query and allocate result buffers
+        mq_->setup_read();
 
-    mq_->submit_read();
+        // Continue to submit the empty query on first read to return empty
+        // results
+        if (mq_->is_empty_query()) {
+            if (first_read_next_) {
+                first_read_next_ = false;
+                return mq_->results();
+            } else {
+                return std::nullopt;
+            }
+        }
+
+        first_read_next_ = false;
 
-    // Return the results, possibly incomplete
-    return mq_->results();
+        mq_->submit_read();
+
+        // Return the results, possibly incomplete
+        return mq_->results();
+    } catch (const std::exception& e) {
+        throw TileDBSOMAError(e.what());
+    }
 }
 
 bool SOMAArray::_extend_enumeration(
@@ -740,6 +747,7 @@ ArrowTable SOMAArray::_cast_table(
     // Go through all columns in the ArrowTable and cast the values to what is
     // in the ArraySchema on disk
     ArraySchemaEvolution se(*ctx_->tiledb_ctx());
+    // XXX METHODIZE THIS
     if (timestamp_.has_value()) {
         // ArraySchemaEvolution requires us to pair (t2, t2) even if our range
         // is (t1, t2).