Skip to content

Commit

Permalink
[c++] Unit-test variant-indexed dataframes [skip_ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 30, 2024
1 parent 426e6a2 commit fc2e789
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 4 deletions.
7 changes: 7 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,13 @@ class SOMAArray : public SOMAObject {
*/
std::optional<TimestampRange> timestamp();

/**
* Exposed for testing purposes.
*/
CurrentDomain get_current_domain() {
return _get_current_domain();
}

private:
//===================================================================
//= private non-static
Expand Down
6 changes: 3 additions & 3 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -868,9 +868,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
// nullptr)
//
// Fortunately, these are ASCII dims and we can range
// these accordingly. These are minimum and maximum
// values, avoiding the extremes 0x00 and 0xff.
ndrect.set_range(col_name, "\x01", "\xfe");
// these accordingly.
ndrect.set_range(col_name, "", "\xff");
} else {
const void* buff = index_column_array->children[i]
->buffers[1];
Expand All @@ -892,6 +891,7 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(

LOG_DEBUG(fmt::format("[ArrowAdapter] check"));
schema.check();
// debug: schema.dump();;

LOG_DEBUG(fmt::format("[ArrowAdapter] returning"));
return schema;
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ static std::unique_ptr<ArrowArray> _create_index_cols_info_array(
std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes);
} else {
// domain small; current_domain feature not being used
int64_t dom[] = {0, info.dim_max, 1};
uint32_t dom[] = {0, (uint32_t)info.dim_max, 1};
void* vsrc = (void*)&dom[0];
std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes);
}
Expand Down
199 changes: 199 additions & 0 deletions libtiledbsoma/test/unit_soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ TEST_CASE_METHOD(
soma_dataframe->close();

soma_dataframe = open(OpenMode::write);

soma_dataframe->resize(std::vector<int64_t>({new_max}));
soma_dataframe->close();

Expand All @@ -457,3 +458,201 @@ TEST_CASE_METHOD(

soma_dataframe->close();
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe 1") {
// LOG_SET_LEVEL("debug");
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-1");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos(
{str_attr_info(), u32_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);
}

soma_dataframe->close();

write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe 2") {
// LOG_SET_LEVEL("debug");
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-2");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain),
u32_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({str_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);

std::array<uint32_t, 2> u32_range = ndrect.range<uint32_t>(
dim_infos[0].name);
REQUIRE(u32_range[0] == (uint32_t)0);
REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max);
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe 3") {
// LOG_SET_LEVEL("debug");
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-3");

std::vector<helper::DimInfo> dim_infos(
{i64_dim_info(use_current_domain),
str_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({u32_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<int64_t, 2> i64_range = ndrect.range<int64_t>(
dim_infos[0].name);
REQUIRE(i64_range[0] == (int64_t)0);
REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max);

std::array<std::string, 2> str_range = ndrect.range<std::string>(
dim_infos[1].name);
// Can we write empty strings in this range?
REQUIRE(str_range[0] <= "");
REQUIRE(str_range[1] >= "");
// Can we write ASCII values in this range?
REQUIRE(str_range[0] < " ");
REQUIRE(str_range[1] > "~");
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

TEST_CASE_METHOD(
VariouslyIndexedDataFrameFixture,
"SOMADataFrame: variant-indexed dataframe 4") {
// LOG_SET_LEVEL("debug");
auto use_current_domain = GENERATE(false, true);
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
set_up(
std::make_shared<SOMAContext>(),
"mem://unit-test-variant-indexed-dataframe-4");

std::vector<helper::DimInfo> dim_infos(
{str_dim_info(use_current_domain),
u32_dim_info(use_current_domain)});
std::vector<helper::AttrInfo> attr_infos({i64_attr_info()});

// Create
create(dim_infos, attr_infos);

// Check current domain
auto soma_dataframe = open(OpenMode::read);

CurrentDomain current_domain = soma_dataframe->get_current_domain();
if (!use_current_domain) {
REQUIRE(current_domain.is_empty());
} else {
REQUIRE(!current_domain.is_empty());
REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE);
NDRectangle ndrect = current_domain.ndrectangle();

std::array<std::string, 2> str_range = ndrect.range<std::string>(
dim_infos[0].name);
// Can we write empty strings in this range?
REQUIRE(str_range[0] <= "");
REQUIRE(str_range[1] >= "");
// Can we write ASCII values in this range?
REQUIRE(str_range[0] < " ");
REQUIRE(str_range[1] > "~");

std::array<uint32_t, 2> u32_range = ndrect.range<uint32_t>(
dim_infos[1].name);
REQUIRE(u32_range[0] == (uint32_t)0);
REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max);
}

soma_dataframe->close();

// Write
write_generic_data();
}
}

0 comments on commit fc2e789

Please sign in to comment.