Skip to content

Commit

Permalink
ARROW-13735: [C++][Python] Creating a Map array with non-default fiel…
Browse files Browse the repository at this point in the history
…d names segfaults

The segfault only happened in Debug build (which is why some weren't able to repro) because it was from a failing DCHECK on this line:

https://github.com/apache/arrow/blob/e734856676a00335f3dfe79899a995d87286f5a9/cpp/src/arrow/array/array_nested.cc#L193

The LHS of the DCHECK is missing field names, while the RHS has them.

The basic problem is that MapBuilder doesn't preserve field names when creating the array. This PR fixes that.

Closes #11855 from wjones127/ARROW-13735-map-custom-name-segfault

Lead-authored-by: Will Jones <[email protected]>
Co-authored-by: Will Jones <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
2 people authored and pitrou committed Jan 4, 2022
1 parent 7235698 commit 31a07be
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 1 deletion.
24 changes: 24 additions & 0 deletions cpp/src/arrow/array/array_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,30 @@ TEST_F(TestMapArray, BuildingStringToInt) {
ASSERT_ARRAYS_EQUAL(*actual, expected);
}

TEST_F(TestMapArray, BuildingWithFieldNames) {
// Builder should preserve field names in output Array
ASSERT_OK_AND_ASSIGN(auto map_type,
MapType::Make(field("some_entries",
struct_({field("some_key", int16(), false),
field("some_value", int16())}),
false)));

auto key_builder = std::make_shared<Int16Builder>();
auto item_builder = std::make_shared<Int16Builder>();
MapBuilder map_builder(default_memory_pool(), key_builder, item_builder, map_type);

std::shared_ptr<Array> actual;
ASSERT_OK(map_builder.Append());
ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5}));
ASSERT_OK(item_builder->AppendValues({1, 1, 2, 3, 5, 8}));
ASSERT_OK(map_builder.AppendNull());
ASSERT_OK(map_builder.Finish(&actual));
ASSERT_OK(actual->ValidateFull());

ASSERT_EQ(actual->type()->ToString(), map_type->ToString());
ASSERT_EQ(map_builder.type()->ToString(), map_type->ToString());
}

TEST_F(TestMapArray, ValidateErrorNullStruct) {
ASSERT_OK_AND_ASSIGN(
auto values,
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/array/builder_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& ke
const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool), key_builder_(key_builder), item_builder_(item_builder) {
auto map_type = internal::checked_cast<const MapType*>(type.get());
entries_name_ = map_type->field(0)->name();
key_name_ = map_type->key_field()->name();
item_name_ = map_type->item_field()->name();
item_nullable_ = map_type->item_field()->nullable();
keys_sorted_ = map_type->keys_sorted();

std::vector<std::shared_ptr<ArrayBuilder>> child_builders{key_builder, item_builder};
Expand All @@ -59,6 +63,10 @@ MapBuilder::MapBuilder(MemoryPool* pool,
const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool) {
auto map_type = internal::checked_cast<const MapType*>(type.get());
entries_name_ = map_type->field(0)->name();
key_name_ = map_type->key_field()->name();
item_name_ = map_type->item_field()->name();
item_nullable_ = map_type->item_field()->nullable();
keys_sorted_ = map_type->keys_sorted();
key_builder_ = struct_builder->child_builder(0);
item_builder_ = struct_builder->child_builder(1);
Expand Down
13 changes: 12 additions & 1 deletion cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,14 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }

std::shared_ptr<DataType> type() const override {
return map(key_builder_->type(), item_builder_->type(), keys_sorted_);
// Key and Item builder may update types, but they don't contain the field names,
// so we need to reconstruct the type. (See ARROW-13735.)
return std::make_shared<MapType>(
field(entries_name_,
struct_({field(key_name_, key_builder_->type(), false),
field(item_name_, item_builder_->type(), item_nullable_)}),
false),
keys_sorted_);
}

Status ValidateOverflow(int64_t new_elements) {
Expand All @@ -342,6 +349,10 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {

protected:
bool keys_sorted_ = false;
bool item_nullable_ = false;
std::string entries_name_;
std::string key_name_;
std::string item_name_;
std::shared_ptr<ListBuilder> list_builder_;
std::shared_ptr<ArrayBuilder> key_builder_;
std::shared_ptr<ArrayBuilder> item_builder_;
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,15 @@ def test_list_from_arrays(list_array_type, list_type_factory):
result.validate(full=True)


def test_map_labelled():
# ARROW-13735
t = pa.map_(pa.field("name", "string", nullable=False), "int64")
arr = pa.array([[('a', 1), ('b', 2)], [('c', 3)]], type=t)
assert arr.type.key_field == pa.field("name", pa.utf8(), nullable=False)
assert arr.type.item_field == pa.field("value", pa.int64())
assert len(arr) == 2


def test_map_from_arrays():
offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
offsets = pa.array(offsets_arr, type='int32')
Expand Down

0 comments on commit 31a07be

Please sign in to comment.