From 4b3f6c39d5818873974d1d996d876ed3e09e7870 Mon Sep 17 00:00:00 2001 From: Alessandro Molina Date: Tue, 15 Jun 2021 11:56:52 +0200 Subject: [PATCH] ARROW-12431: [Python] Mask is inverted when creating FixedSizeBinaryArray Closes #10199 from amol-/ARROW-12431 Authored-by: Alessandro Molina Signed-off-by: Antoine Pitrou --- cpp/src/arrow/python/numpy_to_arrow.cc | 15 +++++++-- python/pyarrow/tests/test_array.py | 45 ++++++++++++++++++++++++++ python/pyarrow/tests/test_pandas.py | 2 +- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index c17e70823d561..a382f76633336 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -594,9 +594,20 @@ Status NumPyConverter::Visit(const FixedSizeBinaryType& type) { if (mask_ != nullptr) { Ndarray1DIndexer mask_values(mask_); - RETURN_NOT_OK(builder.AppendValues(data, length_, mask_values.data())); + RETURN_NOT_OK(builder.Reserve(length_)); + for (int64_t i = 0; i < length_; ++i) { + if (mask_values[i]) { + RETURN_NOT_OK(builder.AppendNull()); + } else { + RETURN_NOT_OK(builder.Append(data)); + } + data += stride_; + } } else { - RETURN_NOT_OK(builder.AppendValues(data, length_)); + for (int64_t i = 0; i < length_; ++i) { + RETURN_NOT_OK(builder.Append(data)); + data += stride_; + } } std::shared_ptr result; diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 086ed4cb1606e..30500bc3c5bc4 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2714,6 +2714,51 @@ def test_array_masked(): assert arr.type == pa.int64() +def test_binary_array_masked(): + # ARROW-12431 + masked_basic = pa.array([b'\x05'], type=pa.binary(1), + mask=np.array([False])) + assert [b'\x05'] == masked_basic.to_pylist() + + # Fixed Length Binary + masked = pa.array(np.array([b'\x05']), type=pa.binary(1), + mask=np.array([False])) + assert [b'\x05'] == masked.to_pylist() + + masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(1), + mask=np.array([True])) + assert [None] == masked_nulls.to_pylist() + + # Variable Length Binary + masked = pa.array(np.array([b'\x05']), type=pa.binary(), + mask=np.array([False])) + assert [b'\x05'] == masked.to_pylist() + + masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(), + mask=np.array([True])) + assert [None] == masked_nulls.to_pylist() + + # Fixed Length Binary, copy + npa = np.array([b'aaa', b'bbb', b'ccc']*10) + arrow_array = pa.array(npa, type=pa.binary(3), + mask=np.array([False, False, False]*10)) + npa[npa == b"bbb"] = b"XXX" + assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist() + + +def test_binary_array_strided(): + # Masked + nparray = np.array([b"ab", b"cd", b"ef"]) + arrow_array = pa.array(nparray[::2], pa.binary(2), + mask=np.array([False, False])) + assert [b"ab", b"ef"] == arrow_array.to_pylist() + + # Unmasked + nparray = np.array([b"ab", b"cd", b"ef"]) + arrow_array = pa.array(nparray[::2], pa.binary(2)) + assert [b"ab", b"ef"] == arrow_array.to_pylist() + + def test_array_invalid_mask_raises(): # ARROW-10742 cases = [ diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 77c18b839c669..7f904433fa2b7 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -1705,7 +1705,7 @@ def test_numpy_string_array_to_fixed_size_binary(self): expected = pa.array(list(arr), type=pa.binary(3)) assert converted.equals(expected) - mask = np.array([True, False, True]) + mask = np.array([False, True, False]) converted = pa.array(arr, type=pa.binary(3), mask=mask) expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3)) assert converted.equals(expected)