Skip to content

Commit

Permalink
GH=36099: [C++] Add Utf8View and BinaryView to the c data bridge
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Oct 24, 2023
1 parent 6f2e851 commit b0bcb16
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
22 changes: 20 additions & 2 deletions cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <algorithm>
#include <cerrno>
#include <cstring>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
Expand Down Expand Up @@ -260,7 +261,7 @@ struct SchemaExporter {
// Dictionary type: parent struct describes index type,
// child dictionary struct describes value type.
RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), this));
dict_exporter_.reset(new SchemaExporter());
dict_exporter_ = std::make_unique<SchemaExporter>();
RETURN_NOT_OK(dict_exporter_->ExportType(*dict_type.value_type()));
} else {
RETURN_NOT_OK(VisitTypeInline(type, this));
Expand Down Expand Up @@ -357,10 +358,14 @@ struct SchemaExporter {

Status Visit(const LargeBinaryType& type) { return SetFormat("Z"); }

Status Visit(const BinaryViewType& type) { return SetFormat("vz"); }

Status Visit(const StringType& type) { return SetFormat("u"); }

Status Visit(const LargeStringType& type) { return SetFormat("U"); }

Status Visit(const StringViewType& type) { return SetFormat("vu"); }

Status Visit(const Date32Type& type) { return SetFormat("tdD"); }

Status Visit(const Date64Type& type) { return SetFormat("tdm"); }
Expand Down Expand Up @@ -574,7 +579,7 @@ struct ArrayExporter {

// Export dictionary
if (data->dictionary != nullptr) {
dict_exporter_.reset(new ArrayExporter());
dict_exporter_ = std::make_unique<ArrayExporter>();
RETURN_NOT_OK(dict_exporter_->Export(data->dictionary));
}

Expand Down Expand Up @@ -1064,6 +1069,8 @@ struct SchemaImporter {
return ProcessPrimitive(binary());
case 'Z':
return ProcessPrimitive(large_binary());
case 'v':
return ProcessBinaryView();
case 'w':
return ProcessFixedSizeBinary();
case 'd':
Expand All @@ -1076,6 +1083,17 @@ struct SchemaImporter {
return f_parser_.Invalid();
}

Status ProcessBinaryView() {
RETURN_NOT_OK(f_parser_.CheckHasNext());
switch (f_parser_.Next()) {
case 'z':
return ProcessPrimitive(binary_view());
case 'u':
return ProcessPrimitive(utf8_view());
}
return f_parser_.Invalid();
}

Status ProcessTemporal() {
RETURN_NOT_OK(f_parser_.CheckHasNext());
switch (f_parser_.Next()) {
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@ TEST_F(TestSchemaExport, Primitive) {
TestPrimitive(large_binary(), "Z");
TestPrimitive(utf8(), "u");
TestPrimitive(large_utf8(), "U");
TestPrimitive(binary_view(), "vz");
TestPrimitive(utf8_view(), "vu");

TestPrimitive(decimal(16, 4), "d:16,4");
TestPrimitive(decimal256(16, 4), "d:16,4,256");
Expand Down Expand Up @@ -874,6 +876,8 @@ TEST_F(TestArrayExport, Primitive) {
TestPrimitive(large_binary(), R"(["foo", "bar", null])");
TestPrimitive(utf8(), R"(["foo", "bar", null])");
TestPrimitive(large_utf8(), R"(["foo", "bar", null])");
TestPrimitive(binary_view(), R"(["foo", "bar", null])");
TestPrimitive(utf8_view(), R"(["foo", "bar", null])");

TestPrimitive(decimal(16, 4), R"(["1234.5670", null])");
TestPrimitive(decimal256(16, 4), R"(["1234.5670", null])");
Expand Down Expand Up @@ -1891,6 +1895,10 @@ TEST_F(TestSchemaImport, String) {
CheckImport(large_utf8());
FillPrimitive("Z");
CheckImport(large_binary());
FillPrimitive("vu");
CheckImport(utf8_view());
FillPrimitive("vz");
CheckImport(binary_view());

FillPrimitive("w:3");
CheckImport(fixed_size_binary(3));
Expand Down
4 changes: 4 additions & 0 deletions docs/source/format/CDataInterface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,14 @@ strings:
+-----------------+---------------------------------------------------+------------+
| ``Z`` | large binary | |
+-----------------+---------------------------------------------------+------------+
| ``vz`` | binary view | |
+-----------------+---------------------------------------------------+------------+
| ``u`` | utf-8 string | |
+-----------------+---------------------------------------------------+------------+
| ``U`` | large utf-8 string | |
+-----------------+---------------------------------------------------+------------+
| ``vu`` | utf-8 view | |
+-----------------+---------------------------------------------------+------------+
| ``d:19,10`` | decimal128 [precision 19, scale 10] | |
+-----------------+---------------------------------------------------+------------+
| ``d:19,10,NNN`` | decimal bitwidth = NNN [precision 19, scale 10] | |
Expand Down

0 comments on commit b0bcb16

Please sign in to comment.