Skip to content

Commit

Permalink
ARROW-12431: [Python] Mask is inverted when creating FixedSizeBinaryA…
Browse files Browse the repository at this point in the history
…rray

Closes apache#10199 from amol-/ARROW-12431

Authored-by: Alessandro Molina <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
amol- authored and pitrou committed Jun 15, 2021
1 parent 655b281 commit 4b3f6c3
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 3 deletions.
15 changes: 13 additions & 2 deletions cpp/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -594,9 +594,20 @@ Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {

if (mask_ != nullptr) {
Ndarray1DIndexer<uint8_t> mask_values(mask_);
RETURN_NOT_OK(builder.AppendValues(data, length_, mask_values.data()));
RETURN_NOT_OK(builder.Reserve(length_));
for (int64_t i = 0; i < length_; ++i) {
if (mask_values[i]) {
RETURN_NOT_OK(builder.AppendNull());
} else {
RETURN_NOT_OK(builder.Append(data));
}
data += stride_;
}
} else {
RETURN_NOT_OK(builder.AppendValues(data, length_));
for (int64_t i = 0; i < length_; ++i) {
RETURN_NOT_OK(builder.Append(data));
data += stride_;
}
}

std::shared_ptr<Array> result;
Expand Down
45 changes: 45 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2714,6 +2714,51 @@ def test_array_masked():
assert arr.type == pa.int64()


def test_binary_array_masked():
# ARROW-12431
masked_basic = pa.array([b'\x05'], type=pa.binary(1),
mask=np.array([False]))
assert [b'\x05'] == masked_basic.to_pylist()

# Fixed Length Binary
masked = pa.array(np.array([b'\x05']), type=pa.binary(1),
mask=np.array([False]))
assert [b'\x05'] == masked.to_pylist()

masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(1),
mask=np.array([True]))
assert [None] == masked_nulls.to_pylist()

# Variable Length Binary
masked = pa.array(np.array([b'\x05']), type=pa.binary(),
mask=np.array([False]))
assert [b'\x05'] == masked.to_pylist()

masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(),
mask=np.array([True]))
assert [None] == masked_nulls.to_pylist()

# Fixed Length Binary, copy
npa = np.array([b'aaa', b'bbb', b'ccc']*10)
arrow_array = pa.array(npa, type=pa.binary(3),
mask=np.array([False, False, False]*10))
npa[npa == b"bbb"] = b"XXX"
assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()


def test_binary_array_strided():
# Masked
nparray = np.array([b"ab", b"cd", b"ef"])
arrow_array = pa.array(nparray[::2], pa.binary(2),
mask=np.array([False, False]))
assert [b"ab", b"ef"] == arrow_array.to_pylist()

# Unmasked
nparray = np.array([b"ab", b"cd", b"ef"])
arrow_array = pa.array(nparray[::2], pa.binary(2))
assert [b"ab", b"ef"] == arrow_array.to_pylist()


def test_array_invalid_mask_raises():
# ARROW-10742
cases = [
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ def test_numpy_string_array_to_fixed_size_binary(self):
expected = pa.array(list(arr), type=pa.binary(3))
assert converted.equals(expected)

mask = np.array([True, False, True])
mask = np.array([False, True, False])
converted = pa.array(arr, type=pa.binary(3), mask=mask)
expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3))
assert converted.equals(expected)
Expand Down

0 comments on commit 4b3f6c3

Please sign in to comment.