Skip to content

Commit

Permalink
Move all Python tests to same directory, Require KWs for `SOMADataFra…
Browse files Browse the repository at this point in the history
…me.open`
  • Loading branch information
nguyenv committed Jan 19, 2024
1 parent 9bb751d commit 53cca7e
Show file tree
Hide file tree
Showing 16 changed files with 897 additions and 604 deletions.
7 changes: 0 additions & 7 deletions .github/workflows/python-ci-single.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,6 @@ jobs:
- name: Run libtiledbsoma unit tests
run: ctest --output-on-failure --test-dir build/libtiledbsoma -C Release --verbose

- name: Run pytests for C++
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src
# instead of copy we `pip install`ed to site-packages above. That's needed for the code
# coverage analysis to work.
run: PYTHONPATH=$(pwd)/apis/python/src python -m pytest --cov=apis/python/src --cov-report=xml libtiledbsoma/test -v --durations=20

- name: Run pytests for Python
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ update:
.PHONY: test
test: data
ctest --test-dir build/libtiledbsoma -C Release --verbose --rerun-failed --output-on-failure
pytest apis/python/tests libtiledbsoma/test
pytest apis/python/tests

.PHONY: data
data:
Expand Down
28 changes: 11 additions & 17 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,12 @@ def read(
ts = (0, self._handle._handle.timestamp)

sr = clib.SOMADataFrame.open(
self._handle._handle.uri,
clib.OpenMode.read,
platform_config or {},
column_names or [],
_util.to_clib_result_order(result_order),
ts,
uri=self._handle._handle.uri,
mode=clib.OpenMode.read,
platform_config=platform_config or {},
column_names=column_names or [],
result_order=_util.to_clib_result_order(result_order),
timestamp=ts,
)

if value_filter is not None:
Expand Down Expand Up @@ -533,11 +533,6 @@ def _set_reader_coord(
# There's no way to specify "to infinity" for strings.
# We have to get the nonempty domain and use that as the end.
ned = self._handle.non_empty_domain()
if ned is None:
raise ValueError(
"Found empty nonempty domain when setting "
"string coordinates in _set_reader_coord"
)
_, stop = ned[dim_idx]
else:
stop = coord.stop
Expand Down Expand Up @@ -587,6 +582,7 @@ def _set_reader_coord_by_py_seq_or_np_array(

if _util.pa_types_is_string_or_bytes(dim.type):
sr.set_dim_points_string_or_bytes(dim.name, coord)
return True
elif pa.types.is_timestamp(dim.type):
if not isinstance(coord, (tuple, list, np.ndarray)):
raise ValueError(
Expand All @@ -597,15 +593,13 @@ def _set_reader_coord_by_py_seq_or_np_array(
for e in coord
]
sr.set_dim_points_int64(dim.name, icoord)
return True

# TODO: bool

else:
raise ValueError(
f"unhandled type {dim.dtype} for index column named {dim.name}"
)

return True
raise ValueError(

Check warning on line 600 in apis/python/src/tiledbsoma/_dataframe.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dataframe.py#L600

Added line #L600 was not covered by tests
f"unhandled type {dim.dtype} for index column named {dim.name}"
)

def _set_reader_coord_by_numeric_slice(
self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: Slice[Any]
Expand Down
13 changes: 4 additions & 9 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Type,
TypeVar,
Union,
cast,
)

import attrs
Expand Down Expand Up @@ -243,12 +242,9 @@ def _opener(
def schema(self) -> tiledb.ArraySchema:
return self._handle.schema

def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]:
def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]:
try:
ned = self._handle.nonempty_domain()
if ned is None:
return None
return cast(Tuple[Tuple[object, object], ...], ned)
return self._handle.nonempty_domain() or ()
except tiledb.TileDBError as e:
raise SOMAError(e)

Expand Down Expand Up @@ -393,9 +389,8 @@ def _cast_domain(
def domain(self) -> Tuple[Tuple[object, object], ...]:
return self._cast_domain(self._handle.domain)

def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]:
result = self._cast_domain(self._handle.non_empty_domain)
return result or None
def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]:
return self._cast_domain(self._handle.non_empty_domain) or ()

@property
def attr_names(self) -> Tuple[str, ...]:
Expand Down
5 changes: 2 additions & 3 deletions apis/python/src/tiledbsoma/_tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ def schema(self) -> pa.Schema:
return tiledb_schema_to_arrow(
self._tiledb_array_schema(), self.uri, self._ctx
)
else:
return self._tiledb_array_schema()
return self._tiledb_array_schema()

def non_empty_domain(self) -> Optional[Tuple[Tuple[Any, Any], ...]]:
def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]:
"""
Retrieves the non-empty domain for each dimension, namely the smallest
and largest indices in each dimension for which the array/dataframe has
Expand Down
7 changes: 5 additions & 2 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2251,7 +2251,7 @@ def _coo_to_table(

def _chunk_is_contained_in(
chunk_bounds: Sequence[Tuple[int, int]],
storage_nonempty_domain: Optional[Sequence[Tuple[Optional[int], Optional[int]]]],
storage_nonempty_domain: Sequence[Tuple[Optional[int], Optional[int]]],
) -> bool:
"""
Determines if a dim range is included within the array's non-empty domain. Ranges are inclusive
Expand All @@ -2269,7 +2269,7 @@ def _chunk_is_contained_in(
user that they declare they are retrying the exact same input file -- and we do our best to
fulfill their ask by checking the dimension being strided on.
"""
if storage_nonempty_domain is None:
if len(storage_nonempty_domain) == 0:
return False

if len(chunk_bounds) != len(storage_nonempty_domain):
Expand All @@ -2288,6 +2288,9 @@ def _chunk_is_contained_in_axis(
stride_axis: int,
) -> bool:
"""Helper function for ``_chunk_is_contained_in``."""
if len(storage_nonempty_domain) == 0:
return False

storage_lo, storage_hi = storage_nonempty_domain[stride_axis]
if storage_lo is None or storage_hi is None:
# E.g. an array has had its schema created but no data written yet
Expand Down
180 changes: 120 additions & 60 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <tiledbsoma/tiledbsoma>
#include <tiledbsoma/reindexer/reindexer.h>

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
Expand All @@ -22,76 +23,135 @@ void load_soma_dataframe(py::module &);
void load_query_condition(py::module &);

PYBIND11_MODULE(pytiledbsoma, m) {
py::register_exception<TileDBSOMAError>(m, "SOMAError");
py::register_exception<TileDBSOMAError>(m, "SOMAError");

/* We need to make sure C++ TileDBSOMAError is translated to a correctly-typed
* Python error
*/
py::register_exception_translator([](std::exception_ptr p) {
/* We need to make sure C++ TileDBSOMAError is translated to a correctly-typed
* Python error
*/
py::register_exception_translator([](std::exception_ptr p) {
auto tiledb_soma_error =
(py::object)py::module::import("tiledbsoma").attr("SOMAError");

try {
if (p)
if (p)
std::rethrow_exception(p);
} catch (const TileDBSOMAError &e) {
PyErr_SetString(tiledb_soma_error.ptr(), e.what());
PyErr_SetString(tiledb_soma_error.ptr(), e.what());
} catch (const TileDBSOMAPyError &e) {
PyErr_SetString(tiledb_soma_error.ptr(), e.what());
PyErr_SetString(tiledb_soma_error.ptr(), e.what());
} catch (py::builtin_exception &e) {
throw;
throw;
};
});

py::enum_<OpenMode>(m, "OpenMode")
.value("read", OpenMode::read)
.value("write", OpenMode::write);

py::enum_<ResultOrder>(m, "ResultOrder")
.value("automatic", ResultOrder::automatic)
.value("rowmajor", ResultOrder::rowmajor)
.value("colmajor", ResultOrder::colmajor);

m.doc() = "SOMA acceleration library";

m.def("version", []() { return tiledbsoma::version::as_string(); });

m.def(
"config_logging",
[](const std::string& level, const std::string& logfile) {
LOG_CONFIG(level, logfile);
},
"level"_a,
"logfile"_a = "");

m.def("info", &LOG_INFO, "message"_a = "");
m.def("debug", &LOG_DEBUG, "message"_a = "");

m.def(
"tiledbsoma_stats_enable",
[]() { tiledbsoma::stats::enable(); },
"Enable TileDB internal statistics. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_disable",
[]() { tiledbsoma::stats::disable(); },
"Disable TileDB internal statistics. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_reset",
[]() { tiledbsoma::stats::reset(); },
"Reset all TileDB internal statistics to 0. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_dump",
[]() {
py::print(tiledbsoma::version::as_string());
std::string stats = tiledbsoma::stats::dump();
py::print(stats);
},
"Print TileDB internal statistics. Lifecycle: experimental.");

load_soma_array(m);
load_soma_object(m);
load_soma_dataframe(m);
load_query_condition(m);
});

py::enum_<OpenMode>(m, "OpenMode")
.value("read", OpenMode::read)
.value("write", OpenMode::write);

py::enum_<ResultOrder>(m, "ResultOrder")
.value("automatic", ResultOrder::automatic)
.value("rowmajor", ResultOrder::rowmajor)
.value("colmajor", ResultOrder::colmajor);

m.doc() = "SOMA acceleration library";

m.def("version", []() { return tiledbsoma::version::as_string(); });

m.def(
"config_logging",
[](const std::string& level, const std::string& logfile) {
LOG_CONFIG(level, logfile);
},
"level"_a,
"logfile"_a = "");

m.def("info", &LOG_INFO, "message"_a = "");
m.def("debug", &LOG_DEBUG, "message"_a = "");

m.def(
"tiledbsoma_stats_enable",
[]() { tiledbsoma::stats::enable(); },
"Enable TileDB internal statistics. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_disable",
[]() { tiledbsoma::stats::disable(); },
"Disable TileDB internal statistics. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_reset",
[]() { tiledbsoma::stats::reset(); },
"Reset all TileDB internal statistics to 0. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_dump",
[]() {
py::print(tiledbsoma::version::as_string());
std::string stats = tiledbsoma::stats::dump();
py::print(stats);
},
"Print TileDB internal statistics. Lifecycle: experimental.");

// Efficient C++ re-indexing (aka hashing unique key values to an index
// between 0 and number of keys - 1) based on khash
py::class_<IntIndexer>(m, "IntIndexer")
.def(py::init<>())
.def(py::init<std::vector<int64_t>&, int>())
.def(
"map_locations",
[](IntIndexer& indexer,
py::array_t<int64_t> keys,
int num_threads) {
auto buffer = keys.request();
int64_t* data = static_cast<int64_t*>(buffer.ptr);
size_t length = buffer.shape[0];
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
.def(
"map_locations",
[](IntIndexer& indexer,
std::vector<int64_t> keys,
int num_threads) {
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
// Perform lookup for a large input array of keys and return the looked
// up value array (passing ownership from C++ to python)
.def(
"get_indexer",
[](IntIndexer& indexer, py::array_t<int64_t> lookups) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];
auto results = py::array_t<int64_t>(size);
auto results_buffer = results.request();
size_t results_size = results_buffer.shape[0];

int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);

indexer.lookup(input_ptr, results_ptr, size);
return results;
})
// Perform lookup for a large input array of keys and writes the looked
// up values into previously allocated array (works for the cases in
// which python and R pre-allocate the array)
.def(
"get_indexer",
[](IntIndexer& indexer,
py::array_t<int64_t> lookups,
py::array_t<int64_t>& results) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];

auto results_buffer = results.request();
int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);
size_t results_size = input_buffer.shape[0];
indexer.lookup(input_ptr, input_ptr, size);
});

load_soma_array(m);
load_soma_object(m);
load_soma_dataframe(m);
load_query_condition(m);
}

};
18 changes: 17 additions & 1 deletion apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,23 @@ using namespace tiledbsoma;
void load_soma_dataframe(py::module &m) {
py::class_<SOMADataFrame, SOMAObject>(m, "SOMADataFrame")

.def_static("open", py::overload_cast<std::string_view, OpenMode, std::map<std::string, std::string>, std::vector<std::string>, ResultOrder, std::optional<std::pair<uint64_t, uint64_t>>>(&SOMADataFrame::open))
.def_static(
"open",
py::overload_cast<
std::string_view,
OpenMode,
std::map<std::string, std::string>,
std::vector<std::string>,
ResultOrder,
std::optional<std::pair<uint64_t, uint64_t>>>(&SOMADataFrame::open),
"uri"_a,
"mode"_a,
py::kw_only(),
"platform_config"_a = py::dict(),
"column_names"_a = py::none(),
"result_order"_a = ResultOrder::automatic,
"timestamp"_a = py::none())

.def_static("exists", &SOMADataFrame::exists)
.def("reopen", py::overload_cast<OpenMode, std::optional<std::pair<uint64_t, uint64_t>>>(&SOMADataFrame::open))
.def("close", &SOMADataFrame::close)
Expand Down
4 changes: 4 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,3 +1356,7 @@ def test_enum_extend_past_numerical_limit(tmp_path):
with pytest.raises(ValueError):
with soma.open(uri, mode="w") as A:
A.write(tbl)


def test_write_str_empty_ned(tmp_path):
tmp_path.as_posix()
Loading

0 comments on commit 53cca7e

Please sign in to comment.