From bc075acb1cb6e9fca5b63314aa5520b3c6901843 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 16 Aug 2024 20:08:50 -0400 Subject: [PATCH] more unit-test findings [skip ci] --- apis/python/src/tiledbsoma/soma_array.cc | 29 ++++++------ apis/python/src/tiledbsoma/soma_dataframe.cc | 13 +++++- .../src/tiledbsoma/soma_dense_ndarray.cc | 14 +++++- .../src/tiledbsoma/soma_sparse_ndarray.cc | 13 +++++- apis/python/tests/test_basic_anndata_io.py | 13 +++++- .../tests/test_dataframe_index_columns.py | 4 +- .../tests/test_registration_mappings.py | 11 +++++ apis/python/tests/test_shape.py | 23 +++++----- libtiledbsoma/src/soma/soma_array.cc | 44 +++++++++++-------- 9 files changed, 112 insertions(+), 52 deletions(-) diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc index fc2bc36cea..03f9d6ad5f 100644 --- a/apis/python/src/tiledbsoma/soma_array.cc +++ b/apis/python/src/tiledbsoma/soma_array.cc @@ -500,14 +500,21 @@ void load_soma_array(py::module& m) { py::gil_scoped_release release; // Try to read more data - auto buffers = array.read_next(); - - // If more data was read, convert it to an arrow table and - // return - if (buffers.has_value()) { - // Acquire python GIL before accessing python objects - py::gil_scoped_acquire acquire; - return to_table(*buffers); + try { + auto buffers = array.read_next(); + + // If more data was read, convert it to an arrow table and + // return + if (buffers.has_value()) { + // Acquire python GIL before accessing python objects + py::gil_scoped_acquire acquire; + return to_table(*buffers); + } + + } catch (const std::exception& e) { + // Re-raise as ValueError to preserve index-out-of-bounds + // reporting semantics in the current-domain/new-shape era. + throw py::value_error(e.what()); } // No data was read, the query is complete, return nullopt @@ -520,12 +527,6 @@ void load_soma_array(py::module& m) { .def("nnz", &SOMAArray::nnz, py::call_guard()) - // xxx temp .def("resize", &SOMAArray::resize) - - // xxx temp .def_property_readonly("shape", &SOMAArray::shape) - - // xxx temp .def_property_readonly("maxshape", &SOMAArray::maxshape) - .def_property_readonly("uri", &SOMAArray::uri) .def_property_readonly("column_names", &SOMAArray::column_names) diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc index ce7340eef2..df6eb78a8c 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.cc +++ b/apis/python/src/tiledbsoma/soma_dataframe.cc @@ -145,7 +145,18 @@ void load_soma_dataframe(py::module& m) { .def_static("exists", &SOMADataFrame::exists) // XXX TEMP - .def("resize", &SOMADataFrame::resize1) + .def( + "resize", + [](SOMAArray& array, const std::vector& newshape) { + try { + array.resize1(newshape); + } catch (const std::exception& e) { + // Re-raise as ValueError to preserve index-out-of-bounds + // reporting semantics in the current-domain/new-shape era. + throw py::value_error(e.what()); + } + }, + "newshape"_a) // XXX TEMP .def_property_readonly("shape", &SOMADataFrame::shape1) diff --git a/apis/python/src/tiledbsoma/soma_dense_ndarray.cc b/apis/python/src/tiledbsoma/soma_dense_ndarray.cc index 818d437506..a6a79ce8a1 100644 --- a/apis/python/src/tiledbsoma/soma_dense_ndarray.cc +++ b/apis/python/src/tiledbsoma/soma_dense_ndarray.cc @@ -124,8 +124,18 @@ void load_soma_dense_ndarray(py::module& m) { .def_static("exists", &SOMADenseNDArray::exists) - // XXX TEMP - .def("resize", &SOMAArray::resize) + .def( + "resize", + [](SOMAArray& array, const std::vector& newshape) { + try { + array.resize(newshape); + } catch (const std::exception& e) { + // Re-raise as ValueError to preserve index-out-of-bounds + // reporting semantics in the current-domain/new-shape era. + throw py::value_error(e.what()); + } + }, + "newshape"_a) // XXX TEMP .def_property_readonly("shape", &SOMAArray::shape) diff --git a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc index 9eb0d41e5f..de93c99c92 100644 --- a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc +++ b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc @@ -113,7 +113,18 @@ void load_soma_sparse_ndarray(py::module& m) { .def_static("exists", &SOMASparseNDArray::exists) // XXX TEMP - .def("resize", &SOMAArray::resize) + .def( + "resize", + [](SOMAArray& array, const std::vector& newshape) { + try { + array.resize(newshape); + } catch (const std::exception& e) { + // Re-raise as ValueError to preserve index-out-of-bounds + // reporting semantics in the current-domain/new-shape era. + throw py::value_error(e.what()); + } + }, + "newshape"_a) // XXX TEMP .def_property_readonly("shape", &SOMAArray::shape) diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 92f2d530a2..faa5d35c5d 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -1333,7 +1333,18 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids): var_field_name="var_id", ) - # XXX TO DO: NOW NEEDS A RESIZE AS OF 1.13 + # XXX TO DO: NOW NEEDS A RESIZE AS OF 2.26 + # XXX TEMP -- needs an all-in-one experiment-level mutator ... + with tiledbsoma.Experiment.open(SOMA_URI, "w") as exp: + nobs2 = len(rd.obs_axis.data) + new_obs_shape = (nobs2,) + exp.obs.resize(new_obs_shape) + + new_X_shape = (nobs2, len(adata2.var)) + exp.ms["RNA"].X["data"].resize(new_X_shape) + + new_X_shape = (nobs2, len(adata2.raw.var)) + exp.ms["raw"].X["data"].resize(new_X_shape) # Append the second anndata object tiledbsoma.io.from_anndata( diff --git a/apis/python/tests/test_dataframe_index_columns.py b/apis/python/tests/test_dataframe_index_columns.py index 7629840603..7cb721c467 100644 --- a/apis/python/tests/test_dataframe_index_columns.py +++ b/apis/python/tests/test_dataframe_index_columns.py @@ -3,7 +3,6 @@ import pytest import tiledbsoma as soma -import tiledb @pytest.fixture @@ -1899,6 +1898,7 @@ def test_types_read_errors( with soma.DataFrame.open(uri, "w") as sdf: sdf.write(arrow_table) - with pytest.raises((RuntimeError, tiledb.cc.TileDBError)): + # XXX TO DO + with pytest.raises((soma.SOMAError)): with soma.DataFrame.open(uri, "r") as sdf: sdf.read(coords=coords).concat() diff --git a/apis/python/tests/test_registration_mappings.py b/apis/python/tests/test_registration_mappings.py index 45065b6a68..0708c3ee9d 100644 --- a/apis/python/tests/test_registration_mappings.py +++ b/apis/python/tests/test_registration_mappings.py @@ -430,6 +430,8 @@ def test_multiples_without_experiment( "ZZZ3": 9, } + # XXX TO DO: RESIZE + # Now do the ingestion per se. Note that once registration is done sequentially, ingest order # mustn't matter, and in fact, can be done in parallel. This is why we test various permutations # of the ordering of the h5ad file names. @@ -818,6 +820,8 @@ def test_append_with_disjoint_measurements( var_field_name=var_field_name, ) + # XXX TO DO: RESIZE + tiledbsoma.io.from_anndata( soma_uri, anndata2, @@ -1166,6 +1170,9 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b): tiledbsoma.io.from_anndata( soma_uri, adata, measurement_name=measurement_name, registration_mapping=rd ) + + # XXX TO DO: RESIZE + tiledbsoma.io.from_anndata( soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd ) @@ -1181,6 +1188,8 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b): var_field_name=var_field_name, ) + # XXX TO DO: RESIZE + tiledbsoma.io.from_anndata( soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd ) @@ -1256,6 +1265,8 @@ def test_multimodal_names(tmp_path, conftest_pbmc3k_adata): var_field_name=adata_protein.var.index.name, ) + # XXX TO DO: RESIZE + # Ingest the second anndata object into the protein measurement tiledbsoma.io.from_anndata( experiment_uri=uri, diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py index efa6308a2a..47adc0e83e 100644 --- a/apis/python/tests/test_shape.py +++ b/apis/python/tests/test_shape.py @@ -4,7 +4,6 @@ import pytest import tiledbsoma -import tiledb from tests._util import maybe_raises @@ -155,9 +154,7 @@ def test_sparse_nd_array_basics( # Test reads out of bounds with tiledbsoma.SparseNDArray.open(uri) as snda: - # AS NOTED ABOVE - # with pytest.raises(tiledbsoma.SOMAError): - with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)): + with pytest.raises(ValueError): coords = tuple([arg_shape[i] + 10 for i in range(ndim)]) snda.read(coords).tables().concat() @@ -177,7 +174,7 @@ def test_sparse_nd_array_basics( new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)]) # TODO: why union with tiledb.cc.TileDBError -- needed in sandbox with tiledbsoma.SparseNDArray.open(uri, "w") as snda: - with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)): + with pytest.raises(ValueError): snda.resize(new_shape) with tiledbsoma.SparseNDArray.open(uri) as snda: @@ -185,7 +182,7 @@ def test_sparse_nd_array_basics( # Test resize too big new_shape = tuple([4_000_000_000 for i in range(ndim)]) - with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)): + with pytest.raises(ValueError): with tiledbsoma.SparseNDArray.open(uri, "w") as snda: snda.resize(new_shape) with tiledbsoma.SparseNDArray.open(uri) as snda: @@ -217,7 +214,7 @@ def test_sparse_nd_array_basics( assert readback == table -# Pending 2.26 timeframe. +# Pending 2.26 timeframe for dense support # TODO: mark these with a linked GitHub tracking issue def test_dense_nd_array_basics(tmp_path): uri = tmp_path.as_posix() @@ -227,12 +224,9 @@ def test_dense_nd_array_basics(tmp_path): with tiledbsoma.DenseNDArray.open(uri) as dnda: assert dnda.shape == (100, 200) - # Pending 2.26 - # XXX TODO - # with pytest.raises(NotImplementedError): - # assert dnda.maxshape == (100, 200) + with pytest.raises(NotImplementedError): + assert dnda.maxshape == (100, 200) - # Pending 2.26 with pytest.raises(NotImplementedError): dnda.resize((300, 400)) @@ -311,7 +305,10 @@ def test_dataframe_basics(tmp_path, domain0, index_column_names): if domain0 is not None: new_size = 10000 with tiledbsoma.DataFrame.open(uri, "r") as sdf: - with pytest.raises(tiledbsoma.SOMAError): # must be open for write + # Must be open for write. + # XXX TO DO fix this + # with pytest.raises(tiledbsoma.SOMAError): + with pytest.raises(ValueError): sdf.resize([new_size]) with tiledbsoma.DataFrame.open(uri, "w") as sdf: sdf.resize([new_size]) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 4877ec8c29..9eea922b64 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -280,30 +280,37 @@ void SOMAArray::reset( } std::optional> SOMAArray::read_next() { - // If the query is complete, return `std::nullopt` - if (mq_->is_complete(true)) { - return std::nullopt; - } - - // Configure query and allocate result buffers - mq_->setup_read(); + try { + // Can throw with current-domain support - // Continue to submit the empty query on first read to return empty results - if (mq_->is_empty_query()) { - if (first_read_next_) { - first_read_next_ = false; - return mq_->results(); - } else { + // If the query is complete, return `std::nullopt` + if (mq_->is_complete(true)) { return std::nullopt; } - } - first_read_next_ = false; + // Configure query and allocate result buffers + mq_->setup_read(); - mq_->submit_read(); + // Continue to submit the empty query on first read to return empty + // results + if (mq_->is_empty_query()) { + if (first_read_next_) { + first_read_next_ = false; + return mq_->results(); + } else { + return std::nullopt; + } + } + + first_read_next_ = false; - // Return the results, possibly incomplete - return mq_->results(); + mq_->submit_read(); + + // Return the results, possibly incomplete + return mq_->results(); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } } bool SOMAArray::_extend_enumeration( @@ -740,6 +747,7 @@ ArrowTable SOMAArray::_cast_table( // Go through all columns in the ArrowTable and cast the values to what is // in the ArraySchema on disk ArraySchemaEvolution se(*ctx_->tiledb_ctx()); + // XXX METHODIZE THIS if (timestamp_.has_value()) { // ArraySchemaEvolution requires us to pair (t2, t2) even if our range // is (t1, t2).