From abf4fbf924391149ba2717aa9b57090094271a5d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 9 Feb 2024 09:45:45 -0400 Subject: [PATCH] GH-39942: [Python] Make capsule name check more lenient (#39977) ### Rationale for this change While #39969 fixed the immediate issue caused by the update of the capsule name used by reticulate whilst converting an R "external pointer", it will still result in an error if somebody is using an older version of the Arrow R package. ### What changes are included in this PR? The pyarrow Cython code was modified to accept capsules with the name NULL or "r_extptr". ### Are these changes tested? Not sure where the best place for this is, but: CRAN arrow + released pyarrow + new reticulate (errors): ``` r library(arrow, warn.conflicts = FALSE) reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv") packageVersion("arrow") #> [1] '14.0.0.2' packageVersion("reticulate") #> [1] '1.35.0' pa <- reticulate::import("pyarrow") pa[["__version__"]] #> [1] "15.0.0" reticulate::r_to_py(arrow::int32()) #> PyCapsule_GetPointer called with incorrect name ``` CRAN arrow + pyarrow from this PR + old reticulate: ``` r library(arrow, warn.conflicts = FALSE) reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv") packageVersion("arrow") #> [1] '14.0.0.2' packageVersion("reticulate") #> [1] '1.34.0' pa <- reticulate::import("pyarrow") pa[["__version__"]] #> [1] "16.0.0.dev92+geafcff7a5" reticulate::r_to_py(arrow::int32()) #> DataType(int32) ``` CRAN arrow + pyarrow from this PR + new reticulate: ``` r library(arrow, warn.conflicts = FALSE) reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv") packageVersion("arrow") #> [1] '14.0.0.2' packageVersion("reticulate") #> [1] '1.35.0' pa <- reticulate::import("pyarrow") pa[["__version__"]] #> [1] "16.0.0.dev92+geafcff7a5" reticulate::r_to_py(arrow::int32()) #> DataType(int32) ``` ### Are there any user-facing changes? No * Closes: #39942 Lead-authored-by: Dewey Dunnington Co-authored-by: Dewey Dunnington Co-authored-by: Antoine Pitrou Co-authored-by: Joris Van den Bossche Signed-off-by: Dewey Dunnington --- python/pyarrow/types.pxi | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 50b10c5512dc1..e9bf56c6213f6 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -15,7 +15,13 @@ # specific language governing permissions and limitations # under the License. -from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New, PyCapsule_IsValid +from cpython.pycapsule cimport ( + PyCapsule_CheckExact, + PyCapsule_GetPointer, + PyCapsule_GetName, + PyCapsule_New, + PyCapsule_IsValid +) import atexit from collections.abc import Mapping @@ -105,6 +111,7 @@ cdef void* _as_c_pointer(v, allow_null=False) except *: (the latter for compatibility with raw pointers exported by reticulate) """ cdef void* c_ptr + cdef const char* capsule_name if isinstance(v, int): c_ptr = v elif isinstance(v, float): @@ -114,7 +121,20 @@ cdef void* _as_c_pointer(v, allow_null=False) except *: "Arrow library", UserWarning, stacklevel=2) c_ptr = v elif PyCapsule_CheckExact(v): - c_ptr = PyCapsule_GetPointer(v, NULL) + # An R external pointer was how the R bindings passed pointer values to + # Python from versions 7 to 15 (inclusive); however, the reticulate 1.35.0 + # update changed the name of the capsule from NULL to "r_extptr". + # Newer versions of the R package pass a Python integer; however, this + # workaround ensures that old versions of the R package continue to work + # with newer versions of pyarrow. + capsule_name = PyCapsule_GetName(v) + if capsule_name == NULL or capsule_name == b"r_extptr": + c_ptr = PyCapsule_GetPointer(v, capsule_name) + else: + capsule_name_str = capsule_name.decode() + raise ValueError( + f"Can't convert PyCapsule with name '{capsule_name_str}' to pointer address" + ) else: raise TypeError(f"Expected a pointer value, got {type(v)!r}") if not allow_null and c_ptr == NULL: