Skip to content

Commit

Permalink
more unit-test findings [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 17, 2024
1 parent 5522869 commit d3c6647
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 51 deletions.
29 changes: 15 additions & 14 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -500,14 +500,21 @@ void load_soma_array(py::module& m) {
py::gil_scoped_release release;

// Try to read more data
auto buffers = array.read_next();

// If more data was read, convert it to an arrow table and
// return
if (buffers.has_value()) {
// Acquire python GIL before accessing python objects
py::gil_scoped_acquire acquire;
return to_table(*buffers);
try {
auto buffers = array.read_next();

// If more data was read, convert it to an arrow table and
// return
if (buffers.has_value()) {
// Acquire python GIL before accessing python objects
py::gil_scoped_acquire acquire;
return to_table(*buffers);
}

} catch (const std::exception& e) {
// Re-raise as ValueError to preserve index-out-of-bounds
// reporting semantics in the current-domain/new-shape era.
throw py::value_error(e.what());
}

// No data was read, the query is complete, return nullopt
Expand All @@ -520,12 +527,6 @@ void load_soma_array(py::module& m) {

.def("nnz", &SOMAArray::nnz, py::call_guard<py::gil_scoped_release>())

// xxx temp .def("resize", &SOMAArray::resize)

// xxx temp .def_property_readonly("shape", &SOMAArray::shape)

// xxx temp .def_property_readonly("maxshape", &SOMAArray::maxshape)

.def_property_readonly("uri", &SOMAArray::uri)

.def_property_readonly("column_names", &SOMAArray::column_names)
Expand Down
13 changes: 12 additions & 1 deletion apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,18 @@ void load_soma_dataframe(py::module& m) {
.def_static("exists", &SOMADataFrame::exists)

// XXX TEMP
.def("resize", &SOMADataFrame::resize1)
.def(
"resize",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
try {
array.resize1(newshape);
} catch (const std::exception& e) {
// Re-raise as ValueError to preserve index-out-of-bounds
// reporting semantics in the current-domain/new-shape era.
throw py::value_error(e.what());
}
},
"newshape"_a)

// XXX TEMP
.def_property_readonly("shape", &SOMADataFrame::shape1)
Expand Down
14 changes: 12 additions & 2 deletions apis/python/src/tiledbsoma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,18 @@ void load_soma_dense_ndarray(py::module& m) {

.def_static("exists", &SOMADenseNDArray::exists)

// XXX TEMP
.def("resize", &SOMAArray::resize)
.def(
"resize",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
try {
array.resize(newshape);
} catch (const std::exception& e) {
// Re-raise as ValueError to preserve index-out-of-bounds
// reporting semantics in the current-domain/new-shape era.
throw py::value_error(e.what());
}
},
"newshape"_a)

// XXX TEMP
.def_property_readonly("shape", &SOMAArray::shape)
Expand Down
13 changes: 12 additions & 1 deletion apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,18 @@ void load_soma_sparse_ndarray(py::module& m) {
.def_static("exists", &SOMASparseNDArray::exists)

// XXX TEMP
.def("resize", &SOMAArray::resize)
.def(
"resize",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
try {
array.resize(newshape);
} catch (const std::exception& e) {
// Re-raise as ValueError to preserve index-out-of-bounds
// reporting semantics in the current-domain/new-shape era.
throw py::value_error(e.what());
}
},
"newshape"_a)

// XXX TEMP
.def_property_readonly("shape", &SOMAArray::shape)
Expand Down
13 changes: 12 additions & 1 deletion apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,18 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
var_field_name="var_id",
)

# XXX TO DO: NOW NEEDS A RESIZE AS OF 1.13
# XXX TO DO: NOW NEEDS A RESIZE AS OF 2.26
# XXX TEMP -- needs an all-in-one experiment-level mutator ...
with tiledbsoma.Experiment.open(SOMA_URI, "w") as exp:
nobs2 = len(rd.obs_axis.data)
new_obs_shape = (nobs2,)
exp.obs.resize(new_obs_shape)

new_X_shape = (nobs2, len(adata2.var))
exp.ms["RNA"].X["data"].resize(new_X_shape)

new_X_shape = (nobs2, len(adata2.raw.var))
exp.ms["raw"].X["data"].resize(new_X_shape)

# Append the second anndata object
tiledbsoma.io.from_anndata(
Expand Down
3 changes: 2 additions & 1 deletion apis/python/tests/test_dataframe_index_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -1899,6 +1899,7 @@ def test_types_read_errors(
with soma.DataFrame.open(uri, "w") as sdf:
sdf.write(arrow_table)

with pytest.raises((RuntimeError, tiledb.cc.TileDBError)):
# XXX TO DO
with pytest.raises((ValueError, RuntimeError, tiledb.cc.TileDBError)):
with soma.DataFrame.open(uri, "r") as sdf:
sdf.read(coords=coords).concat()
11 changes: 11 additions & 0 deletions apis/python/tests/test_registration_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,8 @@ def test_multiples_without_experiment(
"ZZZ3": 9,
}

# XXX TO DO: RESIZE

# Now do the ingestion per se. Note that once registration is done sequentially, ingest order
# mustn't matter, and in fact, can be done in parallel. This is why we test various permutations
# of the ordering of the h5ad file names.
Expand Down Expand Up @@ -818,6 +820,8 @@ def test_append_with_disjoint_measurements(
var_field_name=var_field_name,
)

# XXX TO DO: RESIZE

tiledbsoma.io.from_anndata(
soma_uri,
anndata2,
Expand Down Expand Up @@ -1166,6 +1170,9 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
tiledbsoma.io.from_anndata(
soma_uri, adata, measurement_name=measurement_name, registration_mapping=rd
)

# XXX TO DO: RESIZE

tiledbsoma.io.from_anndata(
soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
)
Expand All @@ -1181,6 +1188,8 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
var_field_name=var_field_name,
)

# XXX TO DO: RESIZE

tiledbsoma.io.from_anndata(
soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
)
Expand Down Expand Up @@ -1256,6 +1265,8 @@ def test_multimodal_names(tmp_path, conftest_pbmc3k_adata):
var_field_name=adata_protein.var.index.name,
)

# XXX TO DO: RESIZE

# Ingest the second anndata object into the protein measurement
tiledbsoma.io.from_anndata(
experiment_uri=uri,
Expand Down
23 changes: 10 additions & 13 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest

import tiledbsoma
import tiledb

from tests._util import maybe_raises

Expand Down Expand Up @@ -155,9 +154,7 @@ def test_sparse_nd_array_basics(

# Test reads out of bounds
with tiledbsoma.SparseNDArray.open(uri) as snda:
# AS NOTED ABOVE
# with pytest.raises(tiledbsoma.SOMAError):
with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
with pytest.raises(ValueError):
coords = tuple([arg_shape[i] + 10 for i in range(ndim)])
snda.read(coords).tables().concat()

Expand All @@ -177,15 +174,15 @@ def test_sparse_nd_array_basics(
new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)])
# TODO: why union with tiledb.cc.TileDBError -- needed in sandbox
with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
with pytest.raises(ValueError):
snda.resize(new_shape)

with tiledbsoma.SparseNDArray.open(uri) as snda:
assert snda.shape == arg_shape

# Test resize too big
new_shape = tuple([4_000_000_000 for i in range(ndim)])
with pytest.raises((tiledbsoma.SOMAError, tiledb.cc.TileDBError)):
with pytest.raises(ValueError):
with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
snda.resize(new_shape)
with tiledbsoma.SparseNDArray.open(uri) as snda:
Expand Down Expand Up @@ -217,7 +214,7 @@ def test_sparse_nd_array_basics(
assert readback == table


# Pending 2.26 timeframe.
# Pending 2.26 timeframe for dense support
# TODO: mark these with a linked GitHub tracking issue
def test_dense_nd_array_basics(tmp_path):
uri = tmp_path.as_posix()
Expand All @@ -227,12 +224,9 @@ def test_dense_nd_array_basics(tmp_path):
with tiledbsoma.DenseNDArray.open(uri) as dnda:
assert dnda.shape == (100, 200)

# Pending 2.26
# XXX TODO
# with pytest.raises(NotImplementedError):
# assert dnda.maxshape == (100, 200)
with pytest.raises(NotImplementedError):
assert dnda.maxshape == (100, 200)

# Pending 2.26
with pytest.raises(NotImplementedError):
dnda.resize((300, 400))

Expand Down Expand Up @@ -311,7 +305,10 @@ def test_dataframe_basics(tmp_path, domain0, index_column_names):
if domain0 is not None:
new_size = 10000
with tiledbsoma.DataFrame.open(uri, "r") as sdf:
with pytest.raises(tiledbsoma.SOMAError): # must be open for write
# Must be open for write.
# XXX TO DO fix this
# with pytest.raises(tiledbsoma.SOMAError):
with pytest.raises(ValueError):
sdf.resize([new_size])
with tiledbsoma.DataFrame.open(uri, "w") as sdf:
sdf.resize([new_size])
Expand Down
44 changes: 26 additions & 18 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -280,30 +280,37 @@ void SOMAArray::reset(
}

std::optional<std::shared_ptr<ArrayBuffers>> SOMAArray::read_next() {
// If the query is complete, return `std::nullopt`
if (mq_->is_complete(true)) {
return std::nullopt;
}

// Configure query and allocate result buffers
mq_->setup_read();
try {
// Can throw with current-domain support

// Continue to submit the empty query on first read to return empty results
if (mq_->is_empty_query()) {
if (first_read_next_) {
first_read_next_ = false;
return mq_->results();
} else {
// If the query is complete, return `std::nullopt`
if (mq_->is_complete(true)) {
return std::nullopt;
}
}

first_read_next_ = false;
// Configure query and allocate result buffers
mq_->setup_read();

mq_->submit_read();
// Continue to submit the empty query on first read to return empty
// results
if (mq_->is_empty_query()) {
if (first_read_next_) {
first_read_next_ = false;
return mq_->results();
} else {
return std::nullopt;
}
}

first_read_next_ = false;

// Return the results, possibly incomplete
return mq_->results();
mq_->submit_read();

// Return the results, possibly incomplete
return mq_->results();
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
}

bool SOMAArray::_extend_enumeration(
Expand Down Expand Up @@ -740,6 +747,7 @@ ArrowTable SOMAArray::_cast_table(
// Go through all columns in the ArrowTable and cast the values to what is
// in the ArraySchema on disk
ArraySchemaEvolution se(*ctx_->tiledb_ctx());
// XXX METHODIZE THIS
if (timestamp_.has_value()) {
// ArraySchemaEvolution requires us to pair (t2, t2) even if our range
// is (t1, t2).
Expand Down

0 comments on commit d3c6647

Please sign in to comment.