Skip to content

Commit

Permalink
Add support for LargeUtf8 arrow column type
Browse files Browse the repository at this point in the history
  • Loading branch information
texodus committed May 1, 2022
1 parent 65b01c5 commit 6a48539
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion cpp/perspective/src/cpp/arrow_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ namespace apachearrow {

t_dtype
convert_type(const std::string& src) {
if (src == "dictionary" || src == "utf8" || src == "binary") {
if (src == "dictionary" || src == "utf8" || src == "binary"
|| src == "large_utf8") {
return DTYPE_STR;
} else if (src == "bool") {
return DTYPE_BOOL;
Expand Down Expand Up @@ -299,6 +300,23 @@ namespace apachearrow {
}
}
} break;
case arrow::LargeStringType::type_id: {
std::shared_ptr<arrow::LargeStringArray> scol
= std::static_pointer_cast<arrow::LargeStringArray>(src);
const arrow::LargeStringArray::offset_type* offsets
= scol->raw_value_offsets();
const uint8_t* values = scol->value_data()->data();

std::string elem;

for (std::uint32_t i = 0; i < len; ++i) {
arrow::LargeStringArray::offset_type bidx = offsets[i];
std::size_t es = offsets[i + 1] - bidx;
elem.assign(
reinterpret_cast<const char*>(values) + bidx, es);
dest->set_nth(offset + i, elem);
}
} break;
case arrow::BinaryType::type_id:
case arrow::StringType::type_id: {
std::shared_ptr<arrow::StringArray> scol
Expand Down

0 comments on commit 6a48539

Please sign in to comment.