apache · pitrou · Apr 15, 2024 · Apr 8, 2024 · Apr 9, 2024 · Apr 9, 2024
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
@@ -1543,7 +1543,20 @@ cdef class Array(_PandasConvertible):
     def _to_pandas(self, options, types_mapper=None, **kwargs):
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
-    def __array__(self, dtype=None):
+    def __array__(self, dtype=None, copy=None):
+        # TODO honor the copy=True case
+        if copy is False:
+            try:
+                values = self.to_numpy(zero_copy_only=True)
+            except ArrowInvalid as exc:
+                raise ArrowInvalid(
+                    "Unable to avoid a copy while creating a numpy array as requested.\n"
+                    "If using `np.array(obj, copy=False)` replace it with "
+                    "`np.asarray(obj)` to allow a copy when needed"
+                )
+            # values is already a numpy array at this point, but calling np.array(..)
+            # again to handle the `dtype` keyword with a no-copy guarantee
+            return np.array(values, dtype=dtype, copy=False)
         values = self.to_numpy(zero_copy_only=False)
         if dtype is None:
             return values

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
@@ -525,7 +525,8 @@ cdef class ChunkedArray(_PandasConvertible):
 
         return values
 
-    def __array__(self, dtype=None):
+    def __array__(self, dtype=None, copy=None):
+        # copy keyword can be ignored because to_numpy() already returns a copy
         values = self.to_numpy()
         if dtype is None:
             return values
@@ -1533,7 +1534,8 @@ cdef class _Tabular(_PandasConvertible):
         raise TypeError(f"Do not call {self.__class__.__name__}'s constructor directly, use "
                         f"one of the `{self.__class__.__name__}.from_*` functions instead.")
 
-    def __array__(self, dtype=None):
+    def __array__(self, dtype=None, copy=None):
+        # copy keyword can be ignored as this always already returns a copy
         column_arrays = [
             np.asarray(self.column(i), dtype=dtype) for i in range(self.num_columns)
         ]

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
@@ -31,6 +31,7 @@
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
+from pyarrow.vendored.version import Version
 
 
 def test_total_bytes_allocated():
@@ -3301,6 +3302,43 @@ def test_array_from_large_pyints():
         pa.array([int(2 ** 63)])
 
 
+def test_numpy_array_protocol():
+    # test the __array__ method on pyarrow.Array
+    arr = pa.array([1, 2, 3])
+    result = np.asarray(arr)
+    expected = np.array([1, 2, 3], dtype="int64")
+    np.testing.assert_array_equal(result, expected)
+
+    # this should not raise a deprecation warning with numpy 2.0+
+    result = np.array(arr, copy=False)
+    np.testing.assert_array_equal(result, expected)
+
+    result = np.array(arr, dtype="int64", copy=False)
+    np.testing.assert_array_equal(result, expected)
+
+    # no zero-copy is possible
+    arr = pa.array([1, 2, None])
+    expected = np.array([1, 2, np.nan], dtype="float64")
+    result = np.asarray(arr)
+    np.testing.assert_array_equal(result, expected)
+
+    if Version(np.__version__) < Version("2.0"):
+        # copy keyword is not strict and not passed down to __array__
+        result = np.array(arr, copy=False)
+        np.testing.assert_array_equal(result, expected)
+
+        result = np.array(arr, dtype="float64", copy=False)
+        np.testing.assert_array_equal(result, expected)
+    else:
+        # starting with numpy 2.0, the copy=False keyword is assumed to be strict
+        with pytest.raises(ValueError, match="Unable to avoid a copy"):
+            np.array(arr, copy=False)
+
+        arr = pa.array([1, 2, 3])
+        with pytest.raises(ValueError):
+            np.array(arr, dtype="float64", copy=False)
+
+
 def test_array_protocol():
 
     class MyArray: