From 1b8e7c73dd9ae7c677e5423064b5ecbecfac73ce Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 5 Jul 2023 16:32:56 -0500 Subject: [PATCH 01/11] [c++] Support `Enumeration` in C++ Codebase * Addition of `SOMAArray::get_enum` and `SOMAArray::get_enum_label_on_attr` * Attach an enumeration/dictionary to the `ColumnBuffer` if applicable; this is used when converting from `ArrayBuffers` to Arrow Tables in the Python and R APIs --- libtiledbsoma/src/soma/column_buffer.cc | 31 ++++++++++++++-------- libtiledbsoma/src/soma/column_buffer.h | 34 ++++++++++++++----------- libtiledbsoma/src/soma/soma_array.cc | 8 ++++++ libtiledbsoma/src/soma/soma_array.h | 19 ++++++++++++++ libtiledbsoma/test/unit_soma_array.cc | 6 ++--- 5 files changed, 69 insertions(+), 29 deletions(-) diff --git a/libtiledbsoma/src/soma/column_buffer.cc b/libtiledbsoma/src/soma/column_buffer.cc index 30d3d675c3..2c88638599 100644 --- a/libtiledbsoma/src/soma/column_buffer.cc +++ b/libtiledbsoma/src/soma/column_buffer.cc @@ -42,11 +42,7 @@ using namespace tiledb; std::shared_ptr ColumnBuffer::create( std::shared_ptr array, std::string_view name) { - return ColumnBuffer::create(array->schema(), name); -} - -std::shared_ptr ColumnBuffer::create( - ArraySchema schema, std::string_view name) { + auto schema = array->schema(); auto name_str = std::string(name); // string for TileDB API if (schema.has_attribute(name_str)) { @@ -54,6 +50,14 @@ std::shared_ptr ColumnBuffer::create( auto type = attr.type(); bool is_var = attr.cell_val_num() == TILEDB_VAR_NUM; bool is_nullable = attr.nullable(); + auto enum_name = AttributeExperimental::get_enumeration_name( + schema.context(), attr); + std::optional enumeration = std::nullopt; + if (enum_name.has_value()) { + enumeration = std::make_optional( + ArrayExperimental::get_enumeration( + schema.context(), *array, name_str)); + } if (!is_var && attr.cell_val_num() != 1) { throw TileDBSOMAError( @@ -61,7 +65,8 @@ std::shared_ptr ColumnBuffer::create( name_str); } - return ColumnBuffer::alloc(schema, name_str, type, is_var, is_nullable); + return ColumnBuffer::alloc( + schema, name_str, type, is_var, is_nullable, enumeration); } else if (schema.domain().has_dimension(name_str)) { auto dim = schema.domain().dimension(name_str); @@ -76,7 +81,8 @@ std::shared_ptr ColumnBuffer::create( name_str); } - return ColumnBuffer::alloc(schema, name_str, type, is_var, false); + return ColumnBuffer::alloc( + schema, name_str, type, is_var, false, std::nullopt); } throw TileDBSOMAError("[ColumnBuffer] Column name not found: " + name_str); @@ -109,13 +115,15 @@ ColumnBuffer::ColumnBuffer( size_t num_cells, size_t num_bytes, bool is_var, - bool is_nullable) + bool is_nullable, + std::optional enumeration) : name_(name) , type_(type) , type_size_(tiledb::impl::type_size(type)) , num_cells_(0) , is_var_(is_var) - , is_nullable_(is_nullable) { + , is_nullable_(is_nullable) + , enumeration_(enumeration) { LOG_DEBUG(fmt::format( "[ColumnBuffer] '{}' {} bytes is_var={} is_nullable={}", name, @@ -192,7 +200,8 @@ std::shared_ptr ColumnBuffer::alloc( std::string_view name, tiledb_datatype_t type, bool is_var, - bool is_nullable) { + bool is_nullable, + std::optional enumeration) { // Set number of bytes for the data buffer. Override with a value from // the config if present. auto num_bytes = DEFAULT_ALLOC_BYTES; @@ -224,7 +233,7 @@ std::shared_ptr ColumnBuffer::alloc( num_bytes / tiledb::impl::type_size(type); return std::make_shared( - name, type, num_cells, num_bytes, is_var, is_nullable); + name, type, num_cells, num_bytes, is_var, is_nullable, enumeration); } } // namespace tiledbsoma diff --git a/libtiledbsoma/src/soma/column_buffer.h b/libtiledbsoma/src/soma/column_buffer.h index af0b11b6e0..fd58ef2b62 100644 --- a/libtiledbsoma/src/soma/column_buffer.h +++ b/libtiledbsoma/src/soma/column_buffer.h @@ -36,6 +36,7 @@ #include // for windows: error C2039: 'runtime_error': is not a member of 'std' #include +#include #include "../utils/common.h" #include "../utils/logger.h" @@ -69,28 +70,20 @@ class ColumnBuffer { static std::shared_ptr create( std::shared_ptr array, std::string_view name); - /** - * @brief Create a ColumnBuffer from a schema and column name. - * - * @param schema TileDB schema - * @param name TileDB dimension or attribute name - * @return ColumnBuffer - */ - static std::shared_ptr create( - ArraySchema schema, std::string_view name); - /** * @brief Create a ColumnBuffer from a schema, column name, and data. * - * @param schema TileDB schema + * @param array TileDB array * @param name TileDB dimension or attribute name * @param data Data to set in buffer * @return ColumnBuffer */ template static std::shared_ptr create( - ArraySchema schema, std::string_view name, std::vector data) { - auto column_buff = ColumnBuffer::create(schema, name); + std::shared_ptr array, + std::string_view name, + std::vector data) { + auto column_buff = ColumnBuffer::create(array, name); column_buff->num_cells_ = data.size(); column_buff->data_.resize(data.size()); column_buff->data_.assign( @@ -118,6 +111,7 @@ class ColumnBuffer { * @param num_bytes Number of bytes to allocate for data * @param is_var Column type is variable length * @param is_nullable Column can contain null values + * @param enumeration Optional Enumeration associated with column */ ColumnBuffer( std::string_view name, @@ -125,7 +119,8 @@ class ColumnBuffer { size_t num_cells, size_t num_bytes, bool is_var = false, - bool is_nullable = false); + bool is_nullable = false, + std::optional enumeration = std::nullopt); ColumnBuffer() = delete; ColumnBuffer(const ColumnBuffer&) = delete; @@ -243,6 +238,10 @@ class ColumnBuffer { return is_nullable_; } + std::optional get_enumeration() const { + return enumeration_; + } + /** * @brief Convert the data bytemap to a bitmap in place. * @@ -272,6 +271,7 @@ class ColumnBuffer { * @param type TileDB datatype * @param is_var True if variable length data * @param is_nullable True if nullable data + * @param enumeration Optional Enumeration associated with column * @return ColumnBuffer */ static std::shared_ptr alloc( @@ -279,7 +279,8 @@ class ColumnBuffer { std::string_view name, tiledb_datatype_t type, bool is_var, - bool is_nullable); + bool is_nullable, + std::optional enumeration); //=================================================================== //= private non-static @@ -303,6 +304,9 @@ class ColumnBuffer { // If true, the data is nullable bool is_nullable_; + // If applicable, the Enumeration associated with the column + std::optional enumeration_; + // Data buffer. std::vector data_; diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 25893e242f..02d58a3477 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -470,6 +470,14 @@ std::vector SOMAArray::dimension_names() const { result.push_back(dim.name()); } return result; + +Enumeration SOMAArray::get_enum(std::string name) { + return ArrayExperimental::get_enumeration(*ctx_, *arr_, name); +} + +std::optional SOMAArray::get_enum_label_on_attr(std::string name) { + auto attr = arr_->schema().attribute(name); + return AttributeExperimental::get_enumeration_name(*ctx_, attr); } void SOMAArray::set_metadata( diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 5fa5b4774a..8d297061b2 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -39,7 +39,12 @@ #include +<<<<<<< HEAD #include "enums.h" +======= +#include + +>>>>>>> [c++] Support `Enumeration` in C++ Codebase #include "managed_query.h" namespace tiledbsoma { @@ -487,6 +492,20 @@ class SOMAArray { */ std::vector dimension_names() const; + * @brief Get the Enumeration associated with the given name from the + * ArraySchema. + * + * @return Enumeration The enumeration. + */ + Enumeration get_enum(std::string name); + + /** + * @brief Get the Enumeration name associated with the given Attr. + * + * @return std::optional The enumeration name if one exists. + */ + std::optional get_enum_label_on_attr(std::string name); + /** * Set metadata key-value items to an open array. The array must * opened in WRITE mode, otherwise the function will error out. diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index 8d84253872..89e4273991 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -150,10 +150,10 @@ std::tuple, std::vector> write_array( } std::vector a0(num_cells_per_fragment, frag_num); - auto schema = *soma_array->schema(); + auto tdb_array = std::make_shared(*ctx, uri, TILEDB_READ); auto array_buffer = std::make_shared(); - array_buffer->emplace("a0", ColumnBuffer::create(schema, "a0", a0)); - array_buffer->emplace("d0", ColumnBuffer::create(schema, "d0", d0)); + array_buffer->emplace("a0", ColumnBuffer::create(tdb_array, "a0", a0)); + array_buffer->emplace("d0", ColumnBuffer::create(tdb_array, "d0", d0)); // Write data to array soma_array->submit(); From a49b38589096de0962ef887d68179fb1d24b518a Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 5 Jul 2023 20:41:00 -0500 Subject: [PATCH 02/11] Add `get_attr_to_enum_mapping` Function --- libtiledbsoma/src/soma/soma_array.cc | 24 ++++++++++++++++++++++-- libtiledbsoma/src/soma/soma_array.h | 14 +++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 02d58a3477..59fcb27a3a 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -459,6 +459,7 @@ std::vector SOMAArray::shape() { return result; } +<<<<<<< HEAD uint64_t SOMAArray::ndim() const { return this->schema().get()->domain().ndim(); } @@ -473,13 +474,32 @@ std::vector SOMAArray::dimension_names() const { Enumeration SOMAArray::get_enum(std::string name) { return ArrayExperimental::get_enumeration(*ctx_, *arr_, name); +======= +std::map SOMAArray::get_attr_to_enum_mapping() { + std::map result; + for (uint32_t i = 0; i < arr_->schema().attribute_num(); ++i) { + auto attr = arr_->schema().attribute(i); + if (attr_has_enum(attr.name())) { + auto enmr_label = *get_enum_label_on_attr(attr.name()); + auto enmr = ArrayExperimental::get_enumeration( + *ctx_, *arr_, enmr_label); + result.insert({attr.name(), enmr}); + } + } + return result; +>>>>>>> Add `get_attr_to_enum_mapping` Function } -std::optional SOMAArray::get_enum_label_on_attr(std::string name) { - auto attr = arr_->schema().attribute(name); +std::optional SOMAArray::get_enum_label_on_attr( + std::string attr_name) { + auto attr = arr_->schema().attribute(attr_name); return AttributeExperimental::get_enumeration_name(*ctx_, attr); } +bool SOMAArray::attr_has_enum(std::string attr_name) { + return get_enum_label_on_attr(attr_name).has_value(); +} + void SOMAArray::set_metadata( const std::string& key, tiledb_datatype_t value_type, diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 8d297061b2..c4ae055e03 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -494,17 +494,25 @@ class SOMAArray { * @brief Get the Enumeration associated with the given name from the * ArraySchema. + * @brief Get the mapping of attributes to Enumerations. * - * @return Enumeration The enumeration. + * @return std::map */ - Enumeration get_enum(std::string name); + std::map get_attr_to_enum_mapping(); /** * @brief Get the Enumeration name associated with the given Attr. * * @return std::optional The enumeration name if one exists. */ - std::optional get_enum_label_on_attr(std::string name); + std::optional get_enum_label_on_attr(std::string attr_name); + + /** + * @brief Check if the given attribute has an associated enumeration. + * + * @return bool + */ + bool attr_has_enum(std::string attr_name); /** * Set metadata key-value items to an open array. The array must From 395409553813cfca9d0c2b0b6f0bbf9c3aff26ad Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Thu, 6 Jul 2023 21:04:07 -0600 Subject: [PATCH 03/11] WIP fix bug where attr name was passed instead of enum name --- libtiledbsoma/src/soma/column_buffer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libtiledbsoma/src/soma/column_buffer.cc b/libtiledbsoma/src/soma/column_buffer.cc index 2c88638599..b4e0d0ef66 100644 --- a/libtiledbsoma/src/soma/column_buffer.cc +++ b/libtiledbsoma/src/soma/column_buffer.cc @@ -56,7 +56,7 @@ std::shared_ptr ColumnBuffer::create( if (enum_name.has_value()) { enumeration = std::make_optional( ArrayExperimental::get_enumeration( - schema.context(), *array, name_str)); + schema.context(), *array, *enum_name)); } if (!is_var && attr.cell_val_num() != 1) { From 26992be7b68d041ac1873d423e1a9857518746d7 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Thu, 13 Jul 2023 04:24:00 -0500 Subject: [PATCH 04/11] Add Unit Tests for Enumeration in C++ --- libtiledbsoma/test/unit_soma_array.cc | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index 89e4273991..6b3e03487d 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -431,4 +431,34 @@ TEST_CASE("SOMAArray: Test buffer size") { ++loops; REQUIRE(loops == 10); soma_array->close(); +} + +TEST_CASE("SOMAArray: Enumeration") { + std::string uri = "mem://unit-test-array-enmr"; + auto ctx = std::make_shared(); + ArraySchema schema(*ctx, TILEDB_SPARSE); + + auto dim = Dimension::create( + *ctx, "d", {0, std::numeric_limits::max() - 1}); + + Domain dom(*ctx); + dom.add_dimension(dim); + schema.set_domain(dom); + + std::vector vals = {"red", "blue", "green"}; + auto enmr = Enumeration::create(*ctx, "rbg", vals); + ArraySchemaExperimental::add_enumeration(*ctx, schema, enmr); + + auto attr = Attribute::create(*ctx, "a"); + AttributeExperimental::set_enumeration_name(*ctx, attr, "rbg"); + schema.add_attribute(attr); + + Array::create(uri, schema); + + auto soma_array = SOMAArray::open(TILEDB_READ, ctx, uri); + auto attr_to_enum = soma_array->get_attr_to_enum_mapping(); + REQUIRE(attr_to_enum.size() == 1); + REQUIRE(attr_to_enum.at("a").name() == "rbg"); + REQUIRE(soma_array->get_enum_label_on_attr("a")); + REQUIRE(soma_array->attr_has_enum("a")); } \ No newline at end of file From 1310207b982f8e10412f7adbe6fdb8da353befe0 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 16 Aug 2023 15:18:05 -0400 Subject: [PATCH 05/11] `to_varlen_buffers` Returns `std::string` * Prior to https://github.com/TileDB-Inc/TileDB/pull/4272, the SOMA unit tests were erroneously writing a byte vector for string dimensions which maps to `TILEDB_BLOB` rather than `TILEDB_STRING_ASCII` --- libtiledbsoma/src/soma/column_buffer.cc | 2 +- libtiledbsoma/src/soma/column_buffer.h | 2 +- libtiledbsoma/src/soma/soma_array.cc | 6 +- libtiledbsoma/src/soma/soma_array.h | 83 +++++++++++-------- libtiledbsoma/test/unit_soma_array.cc | 8 +- libtiledbsoma/test/unit_soma_dataframe.cc | 5 +- libtiledbsoma/test/unit_soma_dense_ndarray.cc | 5 +- .../test/unit_soma_sparse_ndarray.cc | 5 +- 8 files changed, 63 insertions(+), 53 deletions(-) diff --git a/libtiledbsoma/src/soma/column_buffer.cc b/libtiledbsoma/src/soma/column_buffer.cc index b4e0d0ef66..0a02cb2b4a 100644 --- a/libtiledbsoma/src/soma/column_buffer.cc +++ b/libtiledbsoma/src/soma/column_buffer.cc @@ -236,4 +236,4 @@ std::shared_ptr ColumnBuffer::alloc( name, type, num_cells, num_bytes, is_var, is_nullable, enumeration); } -} // namespace tiledbsoma +} // namespace tiledbsoma \ No newline at end of file diff --git a/libtiledbsoma/src/soma/column_buffer.h b/libtiledbsoma/src/soma/column_buffer.h index fd58ef2b62..2103ec317a 100644 --- a/libtiledbsoma/src/soma/column_buffer.h +++ b/libtiledbsoma/src/soma/column_buffer.h @@ -318,4 +318,4 @@ class ColumnBuffer { }; } // namespace tiledbsoma -#endif +#endif \ No newline at end of file diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 59fcb27a3a..4e12c1a490 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -459,7 +459,6 @@ std::vector SOMAArray::shape() { return result; } -<<<<<<< HEAD uint64_t SOMAArray::ndim() const { return this->schema().get()->domain().ndim(); } @@ -471,10 +470,8 @@ std::vector SOMAArray::dimension_names() const { result.push_back(dim.name()); } return result; +} -Enumeration SOMAArray::get_enum(std::string name) { - return ArrayExperimental::get_enumeration(*ctx_, *arr_, name); -======= std::map SOMAArray::get_attr_to_enum_mapping() { std::map result; for (uint32_t i = 0; i < arr_->schema().attribute_num(); ++i) { @@ -487,7 +484,6 @@ std::map SOMAArray::get_attr_to_enum_mapping() { } } return result; ->>>>>>> Add `get_attr_to_enum_mapping` Function } std::optional SOMAArray::get_enum_label_on_attr( diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index c4ae055e03..08c8c1c514 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -38,13 +38,8 @@ #include #include - -<<<<<<< HEAD -#include "enums.h" -======= #include - ->>>>>>> [c++] Support `Enumeration` in C++ Codebase +#include "enums.h" #include "managed_query.h" namespace tiledbsoma { @@ -62,7 +57,8 @@ class SOMAArray { * @param ctx TileDB context * @param uri URI to create the SOMAArray * @param schema TileDB ArraySchema - * @param soma_type SOMADataFrame, SOMADenseNDArray, or SOMASparseNDArray + * @param soma_type SOMADataFrame, SOMADenseNDArray, or + * SOMASparseNDArray */ static void create( std::shared_ptr ctx, @@ -80,8 +76,8 @@ class SOMAArray { * @param platform_config Config parameter dictionary * @param column_names Columns to read * @param batch_size Read batch size - * @param result_order Read result order: automatic (default), rowmajor, or - * colmajor + * @param result_order Read result order: automatic (default), rowmajor, + * or colmajor * @param timestamp Optional pair indicating timestamp start and end * @return std::unique_ptr SOMAArray */ @@ -105,8 +101,8 @@ class SOMAArray { * @param name Name of the array * @param column_names Columns to read * @param batch_size Read batch size - * @param result_order Read result order: automatic (default), rowmajor, or - * colmajor + * @param result_order Read result order: automatic (default), rowmajor, + * or colmajor * @param timestamp Optional pair indicating timestamp start and end * @return std::unique_ptr SOMAArray */ @@ -155,8 +151,8 @@ class SOMAArray { * @param ctx TileDB context * @param column_names Columns to read * @param batch_size Batch size - * @param result_order Read result order: automatic (default), rowmajor, or - * colmajor + * @param result_order Read result order: automatic (default), rowmajor, + * or colmajor * @param timestamp Timestamp */ SOMAArray( @@ -240,8 +236,8 @@ class SOMAArray { } /** - * @brief Set the dimension slice using multiple points, with support for - * partitioning. + * @brief Set the dimension slice using multiple points, with support + * for partitioning. * * @tparam T * @param dim @@ -335,12 +331,14 @@ class SOMAArray { /** * @brief Select columns names to query (dim and attr). If the - * `if_not_empty` parameter is `true`, the column will be selected iff the - * list of selected columns is empty. This prevents a `select_columns` call - * from changing an empty list (all columns) to a subset of columns. + * `if_not_empty` parameter is `true`, the column will be selected iff + * the list of selected columns is empty. This prevents a + * `select_columns` call from changing an empty list (all columns) to a + * subset of columns. * * @param names Vector of column names - * @param if_not_empty Prevent changing an "empty" selection of all columns + * @param if_not_empty Prevent changing an "empty" selection of all + * columns */ void select_columns( const std::vector& names, bool if_not_empty = false) { @@ -354,8 +352,8 @@ class SOMAArray { void submit(); /** - * @brief Read the next chunk of results from the query. If all results have - * already been read, std::nullopt is returned. + * @brief Read the next chunk of results from the query. If all results + * have already been read, std::nullopt is returned. * * An example use model: * @@ -394,8 +392,9 @@ class SOMAArray { * * auto schema = *soma_array->schema(); * auto array_buffer = std::make_shared(); - * array_buffer->emplace("att", ColumnBuffer::create(schema, "att", att)); - * array_buffer->emplace("dim", ColumnBuffer::create(schema, "dim", dim)); + * array_buffer->emplace("att", ColumnBuffer::create(schema, "att", + * att)); array_buffer->emplace("dim", ColumnBuffer::create(schema, + * "dim", dim)); * * std::vector x(10, 1); * writer->submit(); @@ -413,8 +412,8 @@ class SOMAArray { * complete. * * If `query_status_only` is false, return true if the query status - * is complete or if the query is empty (no ranges have been added to the - * query). + * is complete or if the query is empty (no ranges have been added to + * the query). * * @param query_status_only Query complete mode. * @return true if the query is complete, as described above @@ -492,8 +491,7 @@ class SOMAArray { */ std::vector dimension_names() const; - * @brief Get the Enumeration associated with the given name from the - * ArraySchema. + /** * @brief Get the mapping of attributes to Enumerations. * * @return std::map @@ -549,10 +547,12 @@ class SOMAArray { /** * @brief Given a key, get the associated value datatype, number of - * values, and value in binary form. The array must be opened in READ mode, + * values, and value in binary form. The array must be opened in READ + mode, * otherwise the function will error out. * - * The value may consist of more than one items of the same datatype. Keys + * The value may consist of more than one items of the same datatype. + Keys * that do not exist in the metadata will be return NULL for the value. * * **Example:** @@ -568,20 +568,31 @@ class SOMAArray { int32_t*)std::get(meta_val)); * @endcode * - * @param key The key of the metadata item to be retrieved. UTF-8 encodings + * @param key The key of the metadata item to be retrieved. UTF-8 + encodings * are acceptable. * @return MetadataValue (std::tuple) */ - std::map get_metadata(); - std::optional get_metadata(const std::string& key); + MetadataValue get_metadata(const std::string& key) const; /** - * Check if the key exists in metadata from an open array. The array must - * be opened in READ mode, otherwise the function will error out. + * @brief Given an index, get the associated value datatype, number of + * values, and value in binary form. The array must be opened in READ + * mode, otherwise the function will error out. * - * @param key The key of the metadata item to be checked. UTF-8 encodings - * are acceptable. + * @param index The index used to get the metadata. + * @return MetadataValue (std::tuple) + */ + MetadataValue get_metadata(uint64_t index) const; + + /** + * Check if the key exists in metadata from an open array. The array + * must be opened in READ mode, otherwise the function will error out. + * + * @param key The key of the metadata item to be checked. UTF-8 + * encodings are acceptable. * @return true if the key exists, else false. */ bool has_metadata(const std::string& key); diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index 6b3e03487d..0b90cdbfba 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -150,10 +150,10 @@ std::tuple, std::vector> write_array( } std::vector a0(num_cells_per_fragment, frag_num); - auto tdb_array = std::make_shared(*ctx, uri, TILEDB_READ); auto array_buffer = std::make_shared(); - array_buffer->emplace("a0", ColumnBuffer::create(tdb_array, "a0", a0)); - array_buffer->emplace("d0", ColumnBuffer::create(tdb_array, "d0", d0)); + auto tdb_arr = std::make_shared(*ctx, uri, TILEDB_READ); + array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); + array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); // Write data to array soma_array->submit(); @@ -455,7 +455,7 @@ TEST_CASE("SOMAArray: Enumeration") { Array::create(uri, schema); - auto soma_array = SOMAArray::open(TILEDB_READ, ctx, uri); + auto soma_array = SOMAArray::open(OpenMode::read, ctx, uri); auto attr_to_enum = soma_array->get_attr_to_enum_mapping(); REQUIRE(attr_to_enum.size() == 1); REQUIRE(attr_to_enum.at("a").name() == "rbg"); diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index b67a4a7b41..e54f34a805 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -99,8 +99,9 @@ TEST_CASE("SOMADataFrame: basic") { std::vector a0(10, 1); auto array_buffer = std::make_shared(); - array_buffer->emplace("a0", ColumnBuffer::create(*schema, "a0", a0)); - array_buffer->emplace("d0", ColumnBuffer::create(*schema, "d0", d0)); + auto tdb_arr = std::make_shared(*ctx, uri, TILEDB_READ); + array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); + array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); soma_dataframe->open(OpenMode::write); soma_dataframe->write(array_buffer); diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc index 6249faa612..55f15f54f7 100644 --- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc @@ -98,8 +98,9 @@ TEST_CASE("SOMADenseNDArray: basic") { std::vector a0(10, 1); auto array_buffer = std::make_shared(); - array_buffer->emplace("a0", ColumnBuffer::create(*schema, "a0", a0)); - array_buffer->emplace("d0", ColumnBuffer::create(*schema, "d0", d0)); + auto tdb_arr = std::make_shared(*ctx, uri, TILEDB_READ); + array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); + array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); soma_dense->open(OpenMode::write); soma_dense->write(array_buffer); diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index 6f59afb11a..d4a418ef77 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -100,8 +100,9 @@ TEST_CASE("SOMASparseNDArray: basic") { std::vector a0(10, 1); auto array_buffer = std::make_shared(); - array_buffer->emplace("a0", ColumnBuffer::create(*schema, "a0", a0)); - array_buffer->emplace("d0", ColumnBuffer::create(*schema, "d0", d0)); + auto tdb_arr = std::make_shared(*ctx, uri, TILEDB_READ); + array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); + array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); soma_sparse->open(OpenMode::write); soma_sparse->write(array_buffer); From 3d01f8352b0868cb211ef2712e5269d8416b3551 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 6 Sep 2023 10:58:23 -0500 Subject: [PATCH 06/11] Update SOMA Array get_metadata Signatures --- libtiledbsoma/src/soma/soma_array.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 08c8c1c514..e065f26f1c 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -574,18 +574,15 @@ class SOMAArray { * @return MetadataValue (std::tuple) */ - MetadataValue get_metadata(const std::string& key) const; + std::optional get_metadata(const std::string& key); /** - * @brief Given an index, get the associated value datatype, number of - * values, and value in binary form. The array must be opened in READ - * mode, otherwise the function will error out. + * Get a mapping of all metadata keys with its associated value datatype, + * number of values, and value in binary form. * - * @param index The index used to get the metadata. - * @return MetadataValue (std::tuple) + * @return std::map */ - MetadataValue get_metadata(uint64_t index) const; + std::map get_metadata(); /** * Check if the key exists in metadata from an open array. The array From 10fa884c1acea91b456506c257605fd9c1505989 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 8 Sep 2023 14:37:42 -0400 Subject: [PATCH 07/11] Depend on 2.17.0-rc0 --- .../cmake/Modules/FindTileDB_EP.cmake | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake index c6efe9aaaa..c973a109de 100644 --- a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake +++ b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake @@ -58,8 +58,8 @@ else() # NB When updating the pinned URLs here, please also update in file apis/r/tools/get_tarball.R if(DOWNLOAD_TILEDB_PREBUILT) if (WIN32) # Windows - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-windows-x86_64-2.16.2-07b65de.zip") - SET(DOWNLOAD_SHA1 "1cda23235ceeff70cb2b30e0c0e22fcd9fd83b51") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-windows-x86_64-2.17.0-rc0-46b9ca5.zip") + SET(DOWNLOAD_SHA1 "740c7885b5191a467cb742b3420a51ae7b2a0a99") elseif(APPLE) # OSX # Status quo as of 2023-05-18: @@ -76,22 +76,22 @@ else() # o CMAKE_SYSTEM_PROCESSOR is x86_64 if (CMAKE_OSX_ARCHITECTURES STREQUAL x86_64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-x86_64-2.16.2-07b65de.tar.gz") - SET(DOWNLOAD_SHA1 "355233cee1515857c91b2f12fe4f7bbc1ac02465") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-x86_64-2.17.0-rc0-46b9ca5.tar.gz") + SET(DOWNLOAD_SHA1 "a3280bfaad8697080f9804b2d0756887962bdaa5") elseif (CMAKE_OSX_ARCHITECTURES STREQUAL arm64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-arm64-2.16.2-07b65de.tar.gz") - SET(DOWNLOAD_SHA1 "5aad92b76e6fe3f7129f514ed926ef1c8af4bfa3") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-arm64-2.17.0-rc0-46b9ca5.tar.gz") + SET(DOWNLOAD_SHA1 "1b15a9aad373d491224f586fcbd677d47c2b86e8") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-x86_64-2.16.2-07b65de.tar.gz") - SET(DOWNLOAD_SHA1 "355233cee1515857c91b2f12fe4f7bbc1ac02465") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-x86_64-2.17.0-rc0-46b9ca5.tar.gz") + SET(DOWNLOAD_SHA1 "a3280bfaad8697080f9804b2d0756887962bdaa5") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-macos-arm64-2.16.2-07b65de.tar.gz") - SET(DOWNLOAD_SHA1 "5aad92b76e6fe3f7129f514ed926ef1c8af4bfa3") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-arm64-2.17.0-rc0-46b9ca5.tar.gz") + SET(DOWNLOAD_SHA1 "1b15a9aad373d491224f586fcbd677d47c2b86e8") endif() else() # Linux - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.16.2/tiledb-linux-x86_64-2.16.2-07b65de.tar.gz") - SET(DOWNLOAD_SHA1 "b9fc44a104f31a9348a399e55ef9e32903b99590") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-linux-x86_64-2.17.0-rc0-46b9ca5.tar.gz") + SET(DOWNLOAD_SHA1 "f7fe14f444ab3ea9799844ebed247f18f6e40a90") endif() ExternalProject_Add(ep_tiledb @@ -113,8 +113,8 @@ else() else() # Build from source ExternalProject_Add(ep_tiledb PREFIX "externals" - URL "https://github.com/TileDB-Inc/TileDB/archive/2.16.2.zip" - URL_HASH SHA1=d54ff7fc4c3a1c5afb1027bab1ba011ae47c3d79 + URL "https://github.com/TileDB-Inc/TileDB/archive/2.17.0-rc0.zip" + URL_HASH SHA1=54c229897ead5fb578d28ba3c134e7f17a95c3a3 DOWNLOAD_NAME "tiledb.zip" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EP_INSTALL_PREFIX} From 5fba376922e05c04c4bd019c765fd23995dcf077 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 9 Sep 2023 23:04:05 -0400 Subject: [PATCH 08/11] resolve an incomplete merge with #1559 --- apis/r/tools/get_tarball.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apis/r/tools/get_tarball.R b/apis/r/tools/get_tarball.R index 7d5eac3270..0aaa542dc1 100644 --- a/apis/r/tools/get_tarball.R +++ b/apis/r/tools/get_tarball.R @@ -1,8 +1,8 @@ #!/usr/bin/env Rscript ## version pinning info -tiledb_core_version <- "2.16.2" -tiledb_core_sha1 <- "07b65de" ++tiledb_core_version <- "2.17.0-rc0" ++tiledb_core_sha1 <- "46b9ca5" if ( ! dir.exists("inst/") ) { stop("No 'inst/' directory. Exiting.", call. = FALSE) From dcd1c2d517e98bb5b89234482cd0050d773d6acf Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Mon, 11 Sep 2023 07:45:08 -0500 Subject: [PATCH 09/11] [r] Update tiledb-r to RC (borrowed from #1663, #1665) --- .github/workflows/r-ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index 1c063ca8d8..c99e098f49 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -39,6 +39,14 @@ jobs: - name: Install BioConductor package SingleCellExperiment run: cd apis/r && tools/r-ci.sh install_bioc SingleCellExperiment + + - name: Install rc version of tiledb-r (macOS) + if: ${{ matrix.os == 'macOS-latest' }} + run: cd apis/r && Rscript -e "install.packages('tiledb', repos = c('https://eddelbuettel.r-universe.dev', 'https://cloud.r-project.org'))" + + - name: Install rc version of tiledb-r (linux) + if: ${{ matrix.os != 'macOS-latest' }} + run: cd apis/r && Rscript -e "options(bspm.version.check=TRUE); install.packages('tiledb', repos = c('https://eddelbuettel.r-universe.dev/bin/linux/jammy/4.3/', 'https://cloud.r-project.org'))" - name: Dependencies run: cd apis/r && tools/r-ci.sh install_all From ddf591bb4c7032d5eff025ab96f28d5ab67779e1 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Mon, 11 Sep 2023 08:22:08 -0500 Subject: [PATCH 10/11] [r] Undo brown-bag typo in helper script --- apis/r/tools/get_tarball.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apis/r/tools/get_tarball.R b/apis/r/tools/get_tarball.R index 0aaa542dc1..daaea5c3ab 100644 --- a/apis/r/tools/get_tarball.R +++ b/apis/r/tools/get_tarball.R @@ -1,8 +1,8 @@ #!/usr/bin/env Rscript ## version pinning info -+tiledb_core_version <- "2.17.0-rc0" -+tiledb_core_sha1 <- "46b9ca5" +tiledb_core_version <- "2.17.0-rc0" +tiledb_core_sha1 <- "46b9ca5" if ( ! dir.exists("inst/") ) { stop("No 'inst/' directory. Exiting.", call. = FALSE) From f945ecaf583e2e2fcc1e772edefa24e48064326e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 14 Sep 2023 16:05:51 -0400 Subject: [PATCH 11/11] 2.17.0 --- apis/r/tools/get_tarball.R | 4 +-- .../cmake/Modules/FindTileDB_EP.cmake | 28 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/apis/r/tools/get_tarball.R b/apis/r/tools/get_tarball.R index daaea5c3ab..97ad341e2e 100644 --- a/apis/r/tools/get_tarball.R +++ b/apis/r/tools/get_tarball.R @@ -1,8 +1,8 @@ #!/usr/bin/env Rscript ## version pinning info -tiledb_core_version <- "2.17.0-rc0" -tiledb_core_sha1 <- "46b9ca5" +tiledb_core_version <- "2.17.0" +tiledb_core_sha1 <- "93c173d" if ( ! dir.exists("inst/") ) { stop("No 'inst/' directory. Exiting.", call. = FALSE) diff --git a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake index c973a109de..f689401ad6 100644 --- a/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake +++ b/libtiledbsoma/cmake/Modules/FindTileDB_EP.cmake @@ -58,8 +58,8 @@ else() # NB When updating the pinned URLs here, please also update in file apis/r/tools/get_tarball.R if(DOWNLOAD_TILEDB_PREBUILT) if (WIN32) # Windows - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-windows-x86_64-2.17.0-rc0-46b9ca5.zip") - SET(DOWNLOAD_SHA1 "740c7885b5191a467cb742b3420a51ae7b2a0a99") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-windows-x86_64-2.17.0-93c173d.zip") + SET(DOWNLOAD_SHA1 "d43589b22de95d45b40de9918d105a6174ec352e") elseif(APPLE) # OSX # Status quo as of 2023-05-18: @@ -76,22 +76,22 @@ else() # o CMAKE_SYSTEM_PROCESSOR is x86_64 if (CMAKE_OSX_ARCHITECTURES STREQUAL x86_64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-x86_64-2.17.0-rc0-46b9ca5.tar.gz") - SET(DOWNLOAD_SHA1 "a3280bfaad8697080f9804b2d0756887962bdaa5") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-x86_64-2.17.0-93c173d.tar.gz") + SET(DOWNLOAD_SHA1 "9a232015cbf09c5bd37375537cef80a382e1ffa4") elseif (CMAKE_OSX_ARCHITECTURES STREQUAL arm64) - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-arm64-2.17.0-rc0-46b9ca5.tar.gz") - SET(DOWNLOAD_SHA1 "1b15a9aad373d491224f586fcbd677d47c2b86e8") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-arm64-2.17.0-93c173d.tar.gz") + SET(DOWNLOAD_SHA1 "b861b90b462963db44fe0217087fac3510fd6293") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-x86_64-2.17.0-rc0-46b9ca5.tar.gz") - SET(DOWNLOAD_SHA1 "a3280bfaad8697080f9804b2d0756887962bdaa5") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-x86_64-2.17.0-93c173d.tar.gz") + SET(DOWNLOAD_SHA1 "9a232015cbf09c5bd37375537cef80a382e1ffa4") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-macos-arm64-2.17.0-rc0-46b9ca5.tar.gz") - SET(DOWNLOAD_SHA1 "1b15a9aad373d491224f586fcbd677d47c2b86e8") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-arm64-2.17.0-93c173d.tar.gz") + SET(DOWNLOAD_SHA1 "b861b90b462963db44fe0217087fac3510fd6293") endif() else() # Linux - SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0-rc0/tiledb-linux-x86_64-2.17.0-rc0-46b9ca5.tar.gz") - SET(DOWNLOAD_SHA1 "f7fe14f444ab3ea9799844ebed247f18f6e40a90") + SET(DOWNLOAD_URL "https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-linux-x86_64-2.17.0-93c173d.tar.gz") + SET(DOWNLOAD_SHA1 "5c04c07a73d3fe48a9ba8f3ad8af5e1912a39ce8") endif() ExternalProject_Add(ep_tiledb @@ -113,8 +113,8 @@ else() else() # Build from source ExternalProject_Add(ep_tiledb PREFIX "externals" - URL "https://github.com/TileDB-Inc/TileDB/archive/2.17.0-rc0.zip" - URL_HASH SHA1=54c229897ead5fb578d28ba3c134e7f17a95c3a3 + URL "https://github.com/TileDB-Inc/TileDB/archive/2.17.0.zip" + URL_HASH SHA1=bbf5b34fec1c729f048f48bf1a0f03abb447d7de DOWNLOAD_NAME "tiledb.zip" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EP_INSTALL_PREFIX}