Skip to content

Commit

Permalink
Add support for LargeUtf8 arrow column type
Browse files Browse the repository at this point in the history
  • Loading branch information
texodus committed May 1, 2022
1 parent 65b01c5 commit fb827a8
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion cpp/perspective/src/cpp/arrow_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ namespace apachearrow {

t_dtype
convert_type(const std::string& src) {
if (src == "dictionary" || src == "utf8" || src == "binary") {
if (src == "dictionary" || src == "utf8" || src == "binary"
|| src == "large_utf8") {
return DTYPE_STR;
} else if (src == "bool") {
return DTYPE_BOOL;
Expand Down Expand Up @@ -299,6 +300,22 @@ namespace apachearrow {
}
}
} break;
case arrow::LargeStringType::type_id: {
std::shared_ptr<arrow::LargeStringArray> scol
= std::static_pointer_cast<arrow::LargeStringArray>(src);
const long long* offsets = scol->raw_value_offsets();
const uint8_t* values = scol->value_data()->data();

std::string elem;

for (std::uint32_t i = 0; i < len; ++i) {
long long bidx = offsets[i];
std::size_t es = offsets[i + 1] - bidx;
elem.assign(
reinterpret_cast<const char*>(values) + bidx, es);
dest->set_nth(offset + i, elem);
}
} break;
case arrow::BinaryType::type_id:
case arrow::StringType::type_id: {
std::shared_ptr<arrow::StringArray> scol
Expand Down

0 comments on commit fb827a8

Please sign in to comment.