Skip to content

Commit

Permalink
[c++] Unit-test variant-indexed dataframes (#2944)
Browse files Browse the repository at this point in the history
* [c++] Split out a test fixture for dataframes

* fix a failure case

* [c++] Unit-test variant-indexed dataframes [WIP]

* neaten

* code-review feedback [skip_ci]
  • Loading branch information
johnkerl authored Sep 3, 2024
1 parent 37b6e50 commit ded1e42
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 4 deletions.
7 changes: 7 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,13 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

/**
* Exposed for testing purposes.
*/
CurrentDomain get_current_domain() {
return _get_current_domain();
}

private:
//===================================================================
//= private non-static
Expand Down
5 changes: 2 additions & 3 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -868,9 +868,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
// nullptr)
//
// Fortunately, these are ASCII dims and we can range
// these accordingly. These are minimum and maximum
// values, avoiding the extremes 0x00 and 0xff.
ndrect.set_range(col_name, "\x01", "\xfe");
// these accordingly.
ndrect.set_range(col_name, "", "\xff");
} else {
const void* buff = index_column_array->children[i]
->buffers[1];
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ static std::unique_ptr<ArrowArray> _create_index_cols_info_array(
std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes);
} else {
// domain small; current_domain feature not being used
int64_t dom[] = {0, info.dim_max, 1};
uint32_t dom[] = {0, (uint32_t)info.dim_max, 1};
void* vsrc = (void*)&dom[0];
std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes);
}
Expand Down
194 changes: 194 additions & 0 deletions libtiledbsoma/test/unit_soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,197 @@ TEST_CASE_METHOD(

soma_dataframe->close();
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe dim:sjid attr:str,u32") {
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-1");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos(
{str_attr_info(), u32_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);
}

soma_dataframe->close();

write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe dim:sjid,u32 attr:str") {
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-2");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain),
u32_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({str_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);

std::array<uint32_t, 2> u32_range = ndrect.range<uint32_t>(
dim_infos[0].name);
REQUIRE(u32_range[0] == (uint32_t)0);
REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max);
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe dim:sjid,str attr:u32") {
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-3");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain),
str_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({u32_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);

std::array<std::string, 2> str_range = ndrect.range<std::string>(
dim_infos[1].name);
// Can we write empty strings in this range?
REQUIRE(str_range[0] <= "");
REQUIRE(str_range[1] >= "");
// Can we write ASCII values in this range?
REQUIRE(str_range[0] < " ");
REQUIRE(str_range[1] > "~");
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe dim:str,u32 attr:sjid") {
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-4");

std::vector<helper::DimInfo> dim_infos(
{str_dim_info(use_current_domain),
u32_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({i64_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<std::string, 2> str_range = ndrect.range<std::string>(
dim_infos[0].name);
// Can we write empty strings in this range?
REQUIRE(str_range[0] <= "");
REQUIRE(str_range[1] >= "");
// Can we write ASCII values in this range?
REQUIRE(str_range[0] < " ");
REQUIRE(str_range[1] > "~");

std::array<uint32_t, 2> u32_range = ndrect.range<uint32_t>(
dim_infos[1].name);
REQUIRE(u32_range[0] == (uint32_t)0);
REQUIRE(u32_range[1] == (uint32_t)dim_infos[1].dim_max);
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

0 comments on commit ded1e42

Please sign in to comment.