Skip to content

Commit

Permalink
apacheGH-44046: [Python] Fix threading issues with borrowed refs and …
Browse files Browse the repository at this point in the history
…pandas
  • Loading branch information
lysnikolaou committed Sep 10, 2024
1 parent fed5fcb commit 45e77c3
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 7 deletions.
29 changes: 22 additions & 7 deletions python/pyarrow/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,11 @@ bool PyFloat_IsNaN(PyObject* obj) {

namespace {

#ifdef Py_GIL_DISABLED
static std::once_flag pandas_static_initialized;
#else
static bool pandas_static_initialized = false;
#endif

// Once initialized, these variables hold borrowed references to Pandas static data.
// We should not use OwnedRef here because Python destructors would be
Expand All @@ -306,13 +310,7 @@ static PyObject* pandas_DateOffset = nullptr;

} // namespace

void InitPandasStaticData() {
// NOTE: This is called with the GIL held. We needn't (and shouldn't,
// to avoid deadlocks) use an additional C++ lock (ARROW-10519).
if (pandas_static_initialized) {
return;
}

void GetPandasStaticSymbols() {
OwnedRef pandas;

// Import pandas
Expand All @@ -321,11 +319,14 @@ void InitPandasStaticData() {
return;
}

#ifndef Py_GIL_DISABLED
// Since ImportModule can release the GIL, another thread could have
// already initialized the static data.
if (pandas_static_initialized) {
return;
}
#endif

OwnedRef ref;

// set NaT sentinel and its type
Expand Down Expand Up @@ -355,9 +356,23 @@ void InitPandasStaticData() {
if (ImportFromModule(pandas.obj(), "DateOffset", &ref).ok()) {
pandas_DateOffset = ref.obj();
}
}

#ifdef Py_GIL_DISABLED
void InitPandasStaticData() {
std::call_once(pandas_static_initialized, GetPandasStaticSymbols);
}
#else
void InitPandasStaticData() {
// NOTE: This is called with the GIL held. We needn't (and shouldn't,
// to avoid deadlocks) use an additional C++ lock (ARROW-10519).
if (pandas_static_initialized) {
return;
}
GetPandasStaticSymbols();
pandas_static_initialized = true;
}
#endif

bool PandasObjectIsNull(PyObject* obj) {
if (!MayHaveNaN(obj)) {
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/src/arrow/python/iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
}

if (PySequence_Check(obj)) {
#ifdef Py_GIL_DISABLED
if (PyTuple_Check(obj)) {
#else
if (PyList_Check(obj) || PyTuple_Check(obj)) {
#endif
// Use fast item access
const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj);
for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
Expand Down
12 changes: 12 additions & 0 deletions python/pyarrow/src/arrow/python/numpy_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
std::vector<std::shared_ptr<Tensor>> indices(ndim);

for (int i = 0; i < ndim - 1; ++i) {
#ifdef Py_GIL_DISABLED
PyObject* item = PySequence_ITEM(indptr_ao, i);
RETURN_IF_PYERROR();
OwnedRef item_ref(item);
#else
PyObject* item = PySequence_Fast_GET_ITEM(indptr_ao, i);
#endif
if (!PyArray_Check(item)) {
return Status::TypeError("Did not pass ndarray object for indptr");
}
Expand All @@ -497,7 +503,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
}

for (int i = 0; i < ndim; ++i) {
#ifdef Py_GIL_DISABLED
PyObject* item = PySequence_ITEM(indices_ao, i);
RETURN_IF_PYERROR();
OwnedRef item_ref(item);
#else
PyObject* item = PySequence_Fast_GET_ITEM(indices_ao, i);
#endif
if (!PyArray_Check(item)) {
return Status::TypeError("Did not pass ndarray object for indices");
}
Expand Down

0 comments on commit 45e77c3

Please sign in to comment.