Skip to content

Commit

Permalink
ARROW-6158: [C++/Python] Validate child array types with type fields …
Browse files Browse the repository at this point in the history
…of StructArray

https://issues.apache.org/jira/browse/ARROW-6158

Closes #5488 from jorisvandenbossche/ARROW-6158-struct-array-validation and squashes the following commits:

7573781 <Joris Van den Bossche> ARROW-6158:  Validate child array types with type fields of StructArray

Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
jorisvandenbossche authored and wesm committed Sep 25, 2019
1 parent 232cde0 commit 199d3cf
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
10 changes: 9 additions & 1 deletion cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,7 @@ struct ValidateVisitor {
}

Status Visit(const StructArray& array) {
const auto& struct_type = checked_cast<const StructType&>(*array.type());
if (array.num_fields() > 0) {
// Validate fields
int64_t array_length = array.field(0)->length();
Expand All @@ -1245,10 +1246,17 @@ struct ValidateVisitor {
it->type()->ToString(), " at position [", idx, "]");
}

auto it_type = struct_type.child(i)->type();
if (!it->type()->Equals(it_type)) {
return Status::Invalid("Child array at position [", idx,
"] does not match type field: ", it->type()->ToString(),
" vs ", it_type->ToString());
}

const Status child_valid = it->Validate();
if (!child_valid.ok()) {
return Status::Invalid("Child array invalid: ", child_valid.ToString(),
" at position [", idx, "}");
" at position [", idx, "]");
}
++idx;
}
Expand Down
20 changes: 15 additions & 5 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1288,7 +1288,9 @@ cdef class UnionArray(Array):
check_status(CUnionArray.MakeDense(
deref(types.ap), deref(value_offsets.ap), c, c_field_names,
c_type_codes, &out))
return pyarrow_wrap_array(out)
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result

@staticmethod
def from_sparse(Array types, list children, list field_names=None,
Expand Down Expand Up @@ -1326,7 +1328,9 @@ cdef class UnionArray(Array):
c_field_names,
c_type_codes,
&out))
return pyarrow_wrap_array(out)
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result


cdef class StringArray(Array):
Expand Down Expand Up @@ -1501,7 +1505,9 @@ cdef class DictionaryArray(Array):
c_result.reset(new CDictionaryArray(c_type, _indices.sp_array,
_dictionary.sp_array))

return pyarrow_wrap_array(c_result)
cdef Array result = pyarrow_wrap_array(c_result)
result.validate()
return result


cdef class StructArray(Array):
Expand Down Expand Up @@ -1626,7 +1632,9 @@ cdef class StructArray(Array):
else:
c_result = CStructArray.MakeFromFields(
c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0)
return pyarrow_wrap_array(GetResultValue(c_result))
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
result.validate()
return result


cdef class ExtensionArray(Array):
Expand Down Expand Up @@ -1665,7 +1673,9 @@ cdef class ExtensionArray(Array):
"for extension type {1}".format(storage.type, typ))

ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array)
return pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
cdef Array result = pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
result.validate()
return result


cdef dict _array_classes = {
Expand Down
9 changes: 7 additions & 2 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def test_struct_from_buffers():


def test_struct_from_arrays():
a = pa.array([4, 5, 6])
a = pa.array([4, 5, 6], type=pa.int64())
b = pa.array(["bar", None, ""])
c = pa.array([[1, 2], None, [3, None]])
expected_list = [
Expand All @@ -447,7 +447,7 @@ def test_struct_from_arrays():
# From fields
fa = pa.field("a", a.type, nullable=False)
fb = pa.field("b", b.type)
fc = pa.field("c", b.type)
fc = pa.field("c", c.type)
arr = pa.StructArray.from_arrays([a, b, c], fields=[fa, fb, fc])
assert arr.type == pa.struct([fa, fb, fc])
assert not arr.type[0].nullable
Expand All @@ -460,6 +460,11 @@ def test_struct_from_arrays():
assert arr.type == pa.struct([])
assert arr.to_pylist() == []

# Inconsistent fields
fa2 = pa.field("a", pa.int32())
with pytest.raises(ValueError, match="int64 vs int32"):
pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])


def test_dictionary_from_numpy():
indices = np.repeat([0, 1, 2], 2)
Expand Down

0 comments on commit 199d3cf

Please sign in to comment.