Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Support Enumeration in C++ codebase #1519

Merged
merged 13 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ jobs:

- name: Install BioConductor package SingleCellExperiment
run: cd apis/r && tools/r-ci.sh install_bioc SingleCellExperiment

- name: Install rc version of tiledb-r (macOS)
if: ${{ matrix.os == 'macOS-latest' }}
run: cd apis/r && Rscript -e "install.packages('tiledb', repos = c('https://eddelbuettel.r-universe.dev', 'https://cloud.r-project.org'))"

- name: Install rc version of tiledb-r (linux)
if: ${{ matrix.os != 'macOS-latest' }}
run: cd apis/r && Rscript -e "options(bspm.version.check=TRUE); install.packages('tiledb', repos = c('https://eddelbuettel.r-universe.dev/bin/linux/jammy/4.3/', 'https://cloud.r-project.org'))"

- name: Dependencies
run: cd apis/r && tools/r-ci.sh install_all
Expand Down
4 changes: 2 additions & 2 deletions apis/r/tools/get_tarball.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env Rscript

## version pinning info
tiledb_core_version <- "2.16.2"
tiledb_core_sha1 <- "07b65de"
tiledb_core_version <- "2.17.0"
tiledb_core_sha1 <- "93c173d"

if ( ! dir.exists("inst/") ) {
stop("No 'inst/' directory. Exiting.", call. = FALSE)
Expand Down
28 changes: 14 additions & 14 deletions libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ else()
# NB When updating the pinned URLs here, please also update in file apis/r/tools/get_tarball.R
if(DOWNLOAD_TILEDB_PREBUILT)
if (WIN32) # Windows
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-windows-x86_64-2.16.2-07b65de.zip")
SET(DOWNLOAD_SHA1 "1cda23235ceeff70cb2b30e0c0e22fcd9fd83b51")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-windows-x86_64-2.17.0-93c173d.zip")
SET(DOWNLOAD_SHA1 "d43589b22de95d45b40de9918d105a6174ec352e")
elseif(APPLE) # OSX

# Status quo as of 2023-05-18:
Expand All @@ -76,22 +76,22 @@ else()
# o CMAKE_SYSTEM_PROCESSOR is x86_64

if (CMAKE_OSX_ARCHITECTURES STREQUAL x86_64)
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-x86_64-2.16.2-07b65de.tar.gz")
SET(DOWNLOAD_SHA1 "355233cee1515857c91b2f12fe4f7bbc1ac02465")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-x86_64-2.17.0-93c173d.tar.gz")
SET(DOWNLOAD_SHA1 "9a232015cbf09c5bd37375537cef80a382e1ffa4")
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL arm64)
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-arm64-2.16.2-07b65de.tar.gz")
SET(DOWNLOAD_SHA1 "5aad92b76e6fe3f7129f514ed926ef1c8af4bfa3")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-arm64-2.17.0-93c173d.tar.gz")
SET(DOWNLOAD_SHA1 "b861b90b462963db44fe0217087fac3510fd6293")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-x86_64-2.16.2-07b65de.tar.gz")
SET(DOWNLOAD_SHA1 "355233cee1515857c91b2f12fe4f7bbc1ac02465")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-x86_64-2.17.0-93c173d.tar.gz")
SET(DOWNLOAD_SHA1 "9a232015cbf09c5bd37375537cef80a382e1ffa4")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-arm64-2.16.2-07b65de.tar.gz")
SET(DOWNLOAD_SHA1 "5aad92b76e6fe3f7129f514ed926ef1c8af4bfa3")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-arm64-2.17.0-93c173d.tar.gz")
SET(DOWNLOAD_SHA1 "b861b90b462963db44fe0217087fac3510fd6293")
endif()

else() # Linux
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-linux-x86_64-2.16.2-07b65de.tar.gz")
SET(DOWNLOAD_SHA1 "b9fc44a104f31a9348a399e55ef9e32903b99590")
SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-linux-x86_64-2.17.0-93c173d.tar.gz")
SET(DOWNLOAD_SHA1 "5c04c07a73d3fe48a9ba8f3ad8af5e1912a39ce8")
endif()

ExternalProject_Add(ep_tiledb
Expand All @@ -113,8 +113,8 @@ else()
else() # Build from source
ExternalProject_Add(ep_tiledb
PREFIX "externals"
URL "https://github.com/TileDB-Inc/TileDB/archive/2.16.2.zip"
URL_HASH SHA1=d54ff7fc4c3a1c5afb1027bab1ba011ae47c3d79
URL "https://github.com/TileDB-Inc/TileDB/archive/2.17.0.zip"
URL_HASH SHA1=bbf5b34fec1c729f048f48bf1a0f03abb447d7de
DOWNLOAD_NAME "tiledb.zip"
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${EP_INSTALL_PREFIX}
Expand Down
33 changes: 21 additions & 12 deletions libtiledbsoma/src/soma/column_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,31 @@ using namespace tiledb;

std::shared_ptr<ColumnBuffer> ColumnBuffer::create(
std::shared_ptr<Array> array, std::string_view name) {
return ColumnBuffer::create(array->schema(), name);
}

std::shared_ptr<ColumnBuffer> ColumnBuffer::create(
ArraySchema schema, std::string_view name) {
auto schema = array->schema();
auto name_str = std::string(name); // string for TileDB API

if (schema.has_attribute(name_str)) {
auto attr = schema.attribute(name_str);
auto type = attr.type();
bool is_var = attr.cell_val_num() == TILEDB_VAR_NUM;
bool is_nullable = attr.nullable();
auto enum_name = AttributeExperimental::get_enumeration_name(
schema.context(), attr);
std::optional<Enumeration> enumeration = std::nullopt;
if (enum_name.has_value()) {
enumeration = std::make_optional<Enumeration>(
ArrayExperimental::get_enumeration(
schema.context(), *array, *enum_name));
}

if (!is_var && attr.cell_val_num() != 1) {
throw TileDBSOMAError(
"[ColumnBuffer] Values per cell > 1 is not supported: " +
name_str);
}

return ColumnBuffer::alloc(schema, name_str, type, is_var, is_nullable);
return ColumnBuffer::alloc(
schema, name_str, type, is_var, is_nullable, enumeration);

} else if (schema.domain().has_dimension(name_str)) {
auto dim = schema.domain().dimension(name_str);
Expand All @@ -76,7 +81,8 @@ std::shared_ptr<ColumnBuffer> ColumnBuffer::create(
name_str);
}

return ColumnBuffer::alloc(schema, name_str, type, is_var, false);
return ColumnBuffer::alloc(
schema, name_str, type, is_var, false, std::nullopt);
}

throw TileDBSOMAError("[ColumnBuffer] Column name not found: " + name_str);
Expand Down Expand Up @@ -109,13 +115,15 @@ ColumnBuffer::ColumnBuffer(
size_t num_cells,
size_t num_bytes,
bool is_var,
bool is_nullable)
bool is_nullable,
std::optional<Enumeration> enumeration)
: name_(name)
, type_(type)
, type_size_(tiledb::impl::type_size(type))
, num_cells_(0)
, is_var_(is_var)
, is_nullable_(is_nullable) {
, is_nullable_(is_nullable)
, enumeration_(enumeration) {
LOG_DEBUG(fmt::format(
"[ColumnBuffer] '{}' {} bytes is_var={} is_nullable={}",
name,
Expand Down Expand Up @@ -192,7 +200,8 @@ std::shared_ptr<ColumnBuffer> ColumnBuffer::alloc(
std::string_view name,
tiledb_datatype_t type,
bool is_var,
bool is_nullable) {
bool is_nullable,
std::optional<Enumeration> enumeration) {
// Set number of bytes for the data buffer. Override with a value from
// the config if present.
auto num_bytes = DEFAULT_ALLOC_BYTES;
Expand Down Expand Up @@ -224,7 +233,7 @@ std::shared_ptr<ColumnBuffer> ColumnBuffer::alloc(
num_bytes / tiledb::impl::type_size(type);

return std::make_shared<ColumnBuffer>(
name, type, num_cells, num_bytes, is_var, is_nullable);
name, type, num_cells, num_bytes, is_var, is_nullable, enumeration);
}

} // namespace tiledbsoma
} // namespace tiledbsoma
36 changes: 20 additions & 16 deletions libtiledbsoma/src/soma/column_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <stdexcept> // for windows: error C2039: 'runtime_error': is not a member of 'std'

#include <tiledb/tiledb>
#include <tiledb/tiledb_experimental>

#include "../utils/common.h"
#include "../utils/logger.h"
Expand Down Expand Up @@ -69,28 +70,20 @@ class ColumnBuffer {
static std::shared_ptr<ColumnBuffer> create(
std::shared_ptr<Array> array, std::string_view name);

/**
* @brief Create a ColumnBuffer from a schema and column name.
*
* @param schema TileDB schema
* @param name TileDB dimension or attribute name
* @return ColumnBuffer
*/
static std::shared_ptr<ColumnBuffer> create(
ArraySchema schema, std::string_view name);

/**
* @brief Create a ColumnBuffer from a schema, column name, and data.
*
* @param schema TileDB schema
* @param array TileDB array
* @param name TileDB dimension or attribute name
* @param data Data to set in buffer
* @return ColumnBuffer
*/
template <typename T>
static std::shared_ptr<ColumnBuffer> create(
ArraySchema schema, std::string_view name, std::vector<T> data) {
auto column_buff = ColumnBuffer::create(schema, name);
std::shared_ptr<Array> array,
std::string_view name,
std::vector<T> data) {
auto column_buff = ColumnBuffer::create(array, name);
column_buff->num_cells_ = data.size();
column_buff->data_.resize(data.size());
column_buff->data_.assign(
Expand Down Expand Up @@ -118,14 +111,16 @@ class ColumnBuffer {
* @param num_bytes Number of bytes to allocate for data
* @param is_var Column type is variable length
* @param is_nullable Column can contain null values
* @param enumeration Optional Enumeration associated with column
*/
ColumnBuffer(
std::string_view name,
tiledb_datatype_t type,
size_t num_cells,
size_t num_bytes,
bool is_var = false,
bool is_nullable = false);
bool is_nullable = false,
std::optional<Enumeration> enumeration = std::nullopt);

ColumnBuffer() = delete;
ColumnBuffer(const ColumnBuffer&) = delete;
Expand Down Expand Up @@ -243,6 +238,10 @@ class ColumnBuffer {
return is_nullable_;
}

std::optional<Enumeration> get_enumeration() const {
return enumeration_;
}

/**
* @brief Convert the data bytemap to a bitmap in place.
*
Expand Down Expand Up @@ -272,14 +271,16 @@ class ColumnBuffer {
* @param type TileDB datatype
* @param is_var True if variable length data
* @param is_nullable True if nullable data
* @param enumeration Optional Enumeration associated with column
* @return ColumnBuffer
*/
static std::shared_ptr<ColumnBuffer> alloc(
ArraySchema schema,
std::string_view name,
tiledb_datatype_t type,
bool is_var,
bool is_nullable);
bool is_nullable,
std::optional<Enumeration> enumeration);

//===================================================================
//= private non-static
Expand All @@ -303,6 +304,9 @@ class ColumnBuffer {
// If true, the data is nullable
bool is_nullable_;

// If applicable, the Enumeration associated with the column
std::optional<Enumeration> enumeration_;

// Data buffer.
std::vector<std::byte> data_;

Expand All @@ -314,4 +318,4 @@ class ColumnBuffer {
};

} // namespace tiledbsoma
#endif
#endif
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,30 @@ std::vector<std::string> SOMAArray::dimension_names() const {
return result;
}

std::map<std::string, Enumeration> SOMAArray::get_attr_to_enum_mapping() {
std::map<std::string, Enumeration> result;
for (uint32_t i = 0; i < arr_->schema().attribute_num(); ++i) {
auto attr = arr_->schema().attribute(i);
if (attr_has_enum(attr.name())) {
auto enmr_label = *get_enum_label_on_attr(attr.name());
auto enmr = ArrayExperimental::get_enumeration(
*ctx_, *arr_, enmr_label);
result.insert({attr.name(), enmr});
}
}
return result;
}

std::optional<std::string> SOMAArray::get_enum_label_on_attr(
std::string attr_name) {
auto attr = arr_->schema().attribute(attr_name);
return AttributeExperimental::get_enumeration_name(*ctx_, attr);
}

nguyenv marked this conversation as resolved.
Show resolved Hide resolved
bool SOMAArray::attr_has_enum(std::string attr_name) {
return get_enum_label_on_attr(attr_name).has_value();
}

void SOMAArray::set_metadata(
const std::string& key,
tiledb_datatype_t value_type,
Expand Down
Loading