From 45e77c393d405a06d0a17253847a27888bf58391 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 10 Sep 2024 14:09:58 +0000 Subject: [PATCH] GH-44046: [Python] Fix threading issues with borrowed refs and pandas --- python/pyarrow/src/arrow/python/helpers.cc | 29 ++++++++++++++----- python/pyarrow/src/arrow/python/iterators.h | 4 +++ .../pyarrow/src/arrow/python/numpy_convert.cc | 12 ++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc index 18302e6fe0401..3ff5169a5e295 100644 --- a/python/pyarrow/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/arrow/python/helpers.cc @@ -292,7 +292,11 @@ bool PyFloat_IsNaN(PyObject* obj) { namespace { +#ifdef Py_GIL_DISABLED +static std::once_flag pandas_static_initialized; +#else static bool pandas_static_initialized = false; +#endif // Once initialized, these variables hold borrowed references to Pandas static data. // We should not use OwnedRef here because Python destructors would be @@ -306,13 +310,7 @@ static PyObject* pandas_DateOffset = nullptr; } // namespace -void InitPandasStaticData() { - // NOTE: This is called with the GIL held. We needn't (and shouldn't, - // to avoid deadlocks) use an additional C++ lock (ARROW-10519). - if (pandas_static_initialized) { - return; - } - +void GetPandasStaticSymbols() { OwnedRef pandas; // Import pandas @@ -321,11 +319,14 @@ void InitPandasStaticData() { return; } +#ifndef Py_GIL_DISABLED // Since ImportModule can release the GIL, another thread could have // already initialized the static data. if (pandas_static_initialized) { return; } +#endif + OwnedRef ref; // set NaT sentinel and its type @@ -355,9 +356,23 @@ void InitPandasStaticData() { if (ImportFromModule(pandas.obj(), "DateOffset", &ref).ok()) { pandas_DateOffset = ref.obj(); } +} +#ifdef Py_GIL_DISABLED +void InitPandasStaticData() { + std::call_once(pandas_static_initialized, GetPandasStaticSymbols); +} +#else +void InitPandasStaticData() { + // NOTE: This is called with the GIL held. We needn't (and shouldn't, + // to avoid deadlocks) use an additional C++ lock (ARROW-10519). + if (pandas_static_initialized) { + return; + } + GetPandasStaticSymbols(); pandas_static_initialized = true; } +#endif bool PandasObjectIsNull(PyObject* obj) { if (!MayHaveNaN(obj)) { diff --git a/python/pyarrow/src/arrow/python/iterators.h b/python/pyarrow/src/arrow/python/iterators.h index 8512276848272..dd467f6ac4077 100644 --- a/python/pyarrow/src/arrow/python/iterators.h +++ b/python/pyarrow/src/arrow/python/iterators.h @@ -67,7 +67,11 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&& } if (PySequence_Check(obj)) { +#ifdef Py_GIL_DISABLED + if (PyTuple_Check(obj)) { +#else if (PyList_Check(obj) || PyTuple_Check(obj)) { +#endif // Use fast item access const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj); for (Py_ssize_t i = offset; keep_going && i < size; ++i) { diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc index 5fd2cb511ff8a..4113cc67d2fc6 100644 --- a/python/pyarrow/src/arrow/python/numpy_convert.cc +++ b/python/pyarrow/src/arrow/python/numpy_convert.cc @@ -488,7 +488,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject* std::vector> indices(ndim); for (int i = 0; i < ndim - 1; ++i) { +#ifdef Py_GIL_DISABLED + PyObject* item = PySequence_ITEM(indptr_ao, i); + RETURN_IF_PYERROR(); + OwnedRef item_ref(item); +#else PyObject* item = PySequence_Fast_GET_ITEM(indptr_ao, i); +#endif if (!PyArray_Check(item)) { return Status::TypeError("Did not pass ndarray object for indptr"); } @@ -497,7 +503,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject* } for (int i = 0; i < ndim; ++i) { +#ifdef Py_GIL_DISABLED + PyObject* item = PySequence_ITEM(indices_ao, i); + RETURN_IF_PYERROR(); + OwnedRef item_ref(item); +#else PyObject* item = PySequence_Fast_GET_ITEM(indices_ao, i); +#endif if (!PyArray_Check(item)) { return Status::TypeError("Did not pass ndarray object for indices"); }