Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Pre-neatens for polymorphic domainish accessors #3011

Merged
merged 6 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ test: ctest
pytest apis/python/tests

.PHONY: ctest
ctest: data
ctest: data ctest_update

.PHONY: ctest_update
ctest_update:
ctest --test-dir build/libtiledbsoma -C Release --verbose --rerun-failed --output-on-failure

.PHONY: data
Expand Down
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,9 @@ def _fill_out_slot_soma_domain(
)
slot_domain = slot_domain[0], slot_domain[1]
elif isinstance(dtype, str):
# Core string dims have no extent and no (core) domain. We return "" here
# simply so we can pass libtiledbsoma "" for domain and extent, while it
# will (and must) ignore these when creating the TileDB schema.
slot_domain = "", ""
elif np.issubdtype(dtype, NPInteger):
iinfo = np.iinfo(cast(NPInteger, dtype))
Expand Down Expand Up @@ -887,6 +890,9 @@ def _find_extent_for_domain(
if isinstance(dtype, np.dtype) and dtype.itemsize == 1:
extent = 1

# Core string dims have no extent and no (core) domain. We return "" here
# simply so we can pass libtiledbsoma "" for domain and extent, while it
# will (and must) ignore these when creating the TileDB schema.
if isinstance(dtype, str):
return ""

Expand Down
125 changes: 68 additions & 57 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,9 +726,10 @@ class SOMAArray : public SOMAObject {
}

/**
* Exposed for testing purposes.
* Exposed for testing purposes within this library.
* Not for use by Python/R.
*/
CurrentDomain get_current_domain() const {
CurrentDomain get_current_domain_for_test() const {
return _get_current_domain();
}

Expand All @@ -755,9 +756,9 @@ class SOMAArray : public SOMAObject {
template <typename T>
std::pair<T, T> soma_domain_slot(const std::string& name) const {
if (has_current_domain()) {
return core_current_domain_slot<T>(name);
return _core_current_domain_slot<T>(name);
} else {
return core_domain_slot<T>(name);
return _core_domain_slot<T>(name);
}
}

Expand All @@ -771,58 +772,7 @@ class SOMAArray : public SOMAObject {
*/
template <typename T>
std::pair<T, T> soma_maxdomain_slot(const std::string& name) const {
return core_domain_slot<T>(name);
}

/**
* Returns the core current domain at the given dimension.
*
* o For arrays with core current-domain support:
* - soma domain is core current domain
* - soma maxdomain is core domain
* o For arrays without core current-domain support:
* - soma domain is core domain
* - soma maxdomain is core domain
* - core current domain is not accessed at the soma level
*
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
template <typename T>
std::pair<T, T> core_current_domain_slot(const std::string& name) const {
CurrentDomain current_domain = _get_current_domain();
if (current_domain.is_empty()) {
throw TileDBSOMAError(
"core_current_domain_slot: internal coding error");
}
if (current_domain.type() != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError(
"core_current_domain_slot: found non-rectangle type");
}
NDRectangle ndrect = current_domain.ndrectangle();

// Convert from two-element array (core API) to pair (tiledbsoma API)
std::array<T, 2> arr = ndrect.range<T>(name);
return std::pair<T, T>(arr[0], arr[1]);
}

/**
* Returns the core current domain at the given dimension.
*
* o For arrays with core current-domain support:
* - soma domain is core current domain
* - soma maxdomain is core domain
* o For arrays without core current-domain support:
* - soma domain is core domain
* - soma maxdomain is core domain
* - core current domain is not accessed at the soma level
*
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
template <typename T>
std::pair<T, T> core_domain_slot(const std::string& name) const {
return arr_->schema().domain().dimension(name).domain<T>();
return _core_domain_slot<T>(name);
}

/**
Expand Down Expand Up @@ -945,6 +895,57 @@ class SOMAArray : public SOMAObject {
*ctx_->tiledb_ctx(), arr_->schema());
}

/**
* Returns the core current domain at the given dimension.
*
* o For arrays with core current-domain support:
* - soma domain is core current domain
* - soma maxdomain is core domain
* o For arrays without core current-domain support:
* - soma domain is core domain
* - soma maxdomain is core domain
* - core current domain is not accessed at the soma level
*
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
template <typename T>
std::pair<T, T> _core_current_domain_slot(const std::string& name) const {
CurrentDomain current_domain = _get_current_domain();
if (current_domain.is_empty()) {
throw TileDBSOMAError(
"_core_current_domain_slot: internal coding error");
}
if (current_domain.type() != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError(
"_core_current_domain_slot: found non-rectangle type");
}
NDRectangle ndrect = current_domain.ndrectangle();

// Convert from two-element array (core API) to pair (tiledbsoma API)
std::array<T, 2> arr = ndrect.range<T>(name);
return std::pair<T, T>(arr[0], arr[1]);
}

/**
* Returns the core current domain at the given dimension.
*
* o For arrays with core current-domain support:
* - soma domain is core current domain
* - soma maxdomain is core domain
* o For arrays without core current-domain support:
* - soma domain is core domain
* - soma maxdomain is core domain
* - core current domain is not accessed at the soma level
*
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
template <typename T>
std::pair<T, T> _core_domain_slot(const std::string& name) const {
return arr_->schema().domain().dimension(name).domain<T>();
}

/**
* Helper method for resize and upgrade_shape.
*/
Expand All @@ -963,7 +964,7 @@ class SOMAArray : public SOMAObject {
void _check_dims_are_int64();

/**
* With old shape: core domain mapped to tiledbsoma shape; core current
* With old shape: core domain used to map to tiledbsoma shape; core current
* domain did not exist.
*
* With new shape: core domain maps to tiledbsoma maxshape;
Expand Down Expand Up @@ -1270,6 +1271,16 @@ class SOMAArray : public SOMAObject {
uint64_t _nnz_slow();
};

// These are all specializations to string/bool of various methods
// which require special handling for that type.
//
// Declaring them down here is a bit weird -- they're easy to miss
// on a read-through. However, we're in a bit of a bind regarding
// various compilers: if we do these specializations within the
// `class SOMAArray { ... }`, then one compiler errors if we do
// include `template <>`, while another errors if we don't.
// Doing it down here, no compiler complains.

template <>
void SOMAArray::_cast_dictionary_values<std::string>(
ArrowSchema* schema, ArrowArray* array);
Expand Down
12 changes: 8 additions & 4 deletions libtiledbsoma/test/unit_soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ TEST_CASE_METHOD(
// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
CurrentDomain current_domain = soma_dataframe
->get_current_domain_for_test();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
Expand Down Expand Up @@ -602,7 +603,8 @@ TEST_CASE_METHOD(
// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
CurrentDomain current_domain = soma_dataframe
->get_current_domain_for_test();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
Expand Down Expand Up @@ -715,7 +717,8 @@ TEST_CASE_METHOD(
// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
CurrentDomain current_domain = soma_dataframe
->get_current_domain_for_test();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
Expand Down Expand Up @@ -831,7 +834,8 @@ TEST_CASE_METHOD(
// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
CurrentDomain current_domain = soma_dataframe
->get_current_domain_for_test();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
Expand Down
Loading