Skip to content

Commit

Permalink
[c++] Some use_current_domain unit-test/feature-flag teardown, part 3/n
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Nov 22, 2024
1 parent 6c26800 commit b6bcaac
Show file tree
Hide file tree
Showing 9 changed files with 522 additions and 1,003 deletions.
176 changes: 82 additions & 94 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,6 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(

std::map<std::string, Dimension> dims;

bool use_current_domain = true;

for (int64_t sch_idx = 0; sch_idx < arrow_schema->n_children; ++sch_idx) {
auto child = arrow_schema->children[sch_idx];
std::string_view type_metadata;
Expand Down Expand Up @@ -1059,105 +1057,95 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(

// Note: this must be done after we've got the core domain, since the
// NDRectangle constructor requires access to the core domain.
if (use_current_domain) {
CurrentDomain current_domain(*ctx);
NDRectangle ndrect(*ctx, domain);

for (int64_t sch_idx = 0; sch_idx < arrow_schema->n_children;
++sch_idx) {
auto child = arrow_schema->children[sch_idx];
auto type = ArrowAdapter::to_tiledb_format(child->format);

for (int64_t i = 0; i < index_column_schema->n_children; ++i) {
auto col_name = index_column_schema->children[i]->name;
if (strcmp(child->name, col_name) != 0) {
continue;
}

if (strcmp(child->name, SOMA_GEOMETRY_COLUMN_NAME.c_str()) ==
0 &&
spatial_column_info.first.get() != nullptr) {
for (int64_t j = 0;
j < spatial_column_info.first->n_children;
++j) {
auto col_name = SOMA_GEOMETRY_DIMENSION_PREFIX +
std::string(spatial_column_info.second
->children[j]
->name);
const void* buff = spatial_column_info.first
->children[j]
->buffers[1];
auto type = ArrowAdapter::to_tiledb_format(
spatial_column_info.second->children[j]->format);

_set_current_domain_slot(
type, buff, ndrect, col_name + "__min");
_set_current_domain_slot(
type, buff, ndrect, col_name + "__max");
}
} else if (ArrowAdapter::arrow_is_var_length_type(
child->format)) {
// In the core API:
//
// * domain for strings must be set as (nullptr, nullptr)
// * current_domain for strings cannot be set as (nullptr,
// nullptr)
//
// Fortunately, these are ASCII dims and we can range
// these accordingly.

ArrowArray* child_array = index_column_array->children[i];
ArrowSchema* child_schema = index_column_schema
->children[i];

std::vector<std::string>
strings = ArrowAdapter::get_array_string_column(
child_array, child_schema);
if (strings.size() != 5) {
throw TileDBSOMAError(std::format(
"ArrowAdapter::tiledb_schema_from_arrow_schema: "
"internal error: "
"expected 5 strings, got {}",
strings.size()));
}

std::string lo = strings[3];
std::string hi = strings[4];
if (lo == "" && hi == "") {
// These mean "I the caller don't care, you
// libtiledbsoma make it as big as possible".
// See also comments in soma_array.h.
ndrect.set_range(col_name, "", "\x7f");
} else {
ndrect.set_range(col_name, lo, hi);
LOG_DEBUG(std::format(
"[ArrowAdapter] index_column_info nbuf {}",
index_column_array->children[i]->n_buffers));
}
CurrentDomain current_domain(*ctx);
NDRectangle ndrect(*ctx, domain);

LOG_DEBUG(std::format(
"[ArrowAdapter] current domain {} \"{}\"-\"{}\"",
child_schema->name,
lo,
hi));
} else {
const void* buff = index_column_array->children[i]
for (int64_t sch_idx = 0; sch_idx < arrow_schema->n_children; ++sch_idx) {
auto child = arrow_schema->children[sch_idx];
auto type = ArrowAdapter::to_tiledb_format(child->format);

for (int64_t i = 0; i < index_column_schema->n_children; ++i) {
auto col_name = index_column_schema->children[i]->name;
if (strcmp(child->name, col_name) != 0) {
continue;
}

if (strcmp(child->name, SOMA_GEOMETRY_COLUMN_NAME.c_str()) == 0 &&
spatial_column_info.first.get() != nullptr) {
for (int64_t j = 0; j < spatial_column_info.first->n_children;
++j) {
auto col_name = SOMA_GEOMETRY_DIMENSION_PREFIX +
std::string(
spatial_column_info.second->children[j]
->name);
const void* buff = spatial_column_info.first->children[j]
->buffers[1];
_set_current_domain_slot(type, buff, ndrect, col_name);
auto type = ArrowAdapter::to_tiledb_format(
spatial_column_info.second->children[j]->format);

_set_current_domain_slot(
type, buff, ndrect, col_name + "__min");
_set_current_domain_slot(
type, buff, ndrect, col_name + "__max");
}
break;
} else if (ArrowAdapter::arrow_is_var_length_type(child->format)) {
// In the core API:
//
// * domain for strings must be set as (nullptr, nullptr)
// * current_domain for strings cannot be set as (nullptr,
// nullptr)
//
// Fortunately, these are ASCII dims and we can range
// these accordingly.

ArrowArray* child_array = index_column_array->children[i];
ArrowSchema* child_schema = index_column_schema->children[i];

std::vector<std::string>
strings = ArrowAdapter::get_array_string_column(
child_array, child_schema);
if (strings.size() != 5) {
throw TileDBSOMAError(std::format(
"ArrowAdapter::tiledb_schema_from_arrow_schema: "
"internal error: "
"expected 5 strings, got {}",
strings.size()));
}

std::string lo = strings[3];
std::string hi = strings[4];
if (lo == "" && hi == "") {
// These mean "I the caller don't care, you
// libtiledbsoma make it as big as possible"
ndrect.set_range(col_name, "", "\x7f");
} else {
ndrect.set_range(col_name, lo, hi);
LOG_DEBUG(std::format(
"[ArrowAdapter] index_column_info nbuf {}",
index_column_array->children[i]->n_buffers));
}

LOG_DEBUG(std::format(
"[ArrowAdapter] current domain {} \"{}\"-\"{}\"",
child_schema->name,
lo,
hi));
} else {
const void* buff = index_column_array->children[i]->buffers[1];
_set_current_domain_slot(type, buff, ndrect, col_name);
}
break;
}
}

current_domain.set_ndrectangle(ndrect);
current_domain.set_ndrectangle(ndrect);

LOG_DEBUG(std::format(
"[ArrowAdapter] before setting current_domain from ndrect"));
ArraySchemaExperimental::set_current_domain(
*ctx, schema, current_domain);
LOG_DEBUG(std::format(
"[ArrowAdapter] after setting current_domain from ndrect"));
}
LOG_DEBUG(std::format(
"[ArrowAdapter] before setting current_domain from ndrect"));
ArraySchemaExperimental::set_current_domain(*ctx, schema, current_domain);
LOG_DEBUG(
std::format("[ArrowAdapter] after setting current_domain from ndrect"));

LOG_DEBUG(std::format("[ArrowAdapter] check"));
schema.check();
Expand Down Expand Up @@ -1399,7 +1387,7 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
array->release = &release_array;
if (array->private_data != nullptr) { // as we use nanoarrow's init
free(array->private_data); // free what was allocated before
} // assigning our ArrowBuffer pointer
} // assigning our ArrowBuffer pointer
array->private_data = (void*)arrow_buffer;

LOG_TRACE(std::format(
Expand Down
5 changes: 2 additions & 3 deletions libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,10 @@ create_arrow_schema_and_index_columns(
ArrowTable create_column_index_info(const std::vector<DimInfo>& dim_infos) {
for (auto info : dim_infos) {
LOG_DEBUG(std::format(
"create_column_index_info name={} type={} dim_max={} ucd={}",
"create_column_index_info name={} type={} dim_max={}",
info.name,
tiledb::impl::to_str(info.tiledb_datatype),
info.dim_max,
info.use_current_domain));
info.dim_max));
}

auto index_cols_info_schema = create_index_cols_info_schema(dim_infos);
Expand Down
1 change: 0 additions & 1 deletion libtiledbsoma/test/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ struct DimInfo {
int64_t dim_max;
std::string string_lo; // For custom/restricted DataFrame domains
std::string string_hi; // For custom/restricted DataFrame domains
bool use_current_domain;
};

// E.g. "a0" is of type TILEDB_FLOAT64
Expand Down
21 changes: 7 additions & 14 deletions libtiledbsoma/test/unit_soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});

auto index_columns = helper::create_column_index_info(dim_infos);

Expand Down Expand Up @@ -127,8 +126,7 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
auto index_columns = helper::create_column_index_info(dim_infos);

std::map<std::string, SOMAGroupEntry> expected_map{
Expand Down Expand Up @@ -188,8 +186,7 @@ TEST_CASE("SOMACollection: add SOMADataFrame") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
std::vector<helper::AttrInfo> attr_infos(
{{.name = attr_name, .tiledb_datatype = tiledb_datatype}});
auto [schema, index_columns] =
Expand Down Expand Up @@ -284,8 +281,7 @@ TEST_CASE("SOMACollection: add SOMAExperiment") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
std::vector<helper::AttrInfo> attr_infos(
{{.name = attr_name, .tiledb_datatype = tiledb_datatype}});
auto [schema, index_columns] =
Expand Down Expand Up @@ -340,8 +336,7 @@ TEST_CASE("SOMACollection: add SOMAMeasurement") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
std::vector<helper::AttrInfo> attr_infos(
{{.name = attr_name, .tiledb_datatype = tiledb_datatype}});
auto [schema, index_columns] =
Expand Down Expand Up @@ -455,8 +450,7 @@ TEST_CASE("SOMAExperiment: metadata") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
std::vector<helper::AttrInfo> attr_infos(
{{.name = attr_name, .tiledb_datatype = tiledb_datatype}});
auto [schema, index_columns] =
Expand Down Expand Up @@ -549,8 +543,7 @@ TEST_CASE("SOMAMeasurement: metadata") {
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
.string_hi = "N/A"}});
std::vector<helper::AttrInfo> attr_infos(
{{.name = attr_name, .tiledb_datatype = tiledb_datatype}});
auto [schema, index_columns] =
Expand Down
Loading

0 comments on commit b6bcaac

Please sign in to comment.