Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-4036: [C++] Pluggable Status message, by exposing an abstract delegate class. #4484

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions c_glib/arrow-glib/error.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,15 @@ garrow_error_code(const arrow::Status &status)
return GARROW_ERROR_NOT_IMPLEMENTED;
case arrow::StatusCode::SerializationError:
return GARROW_ERROR_SERIALIZATION;
case arrow::StatusCode::PythonError:
return GARROW_ERROR_PYTHON;
case arrow::StatusCode::PlasmaObjectExists:
return GARROW_ERROR_PLASMA_OBJECT_EXISTS;
case arrow::StatusCode::PlasmaObjectNonexistent:
return GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT;
case arrow::StatusCode::PlasmaStoreFull:
return GARROW_ERROR_PLASMA_STORE_FULL;
case arrow::StatusCode::PlasmaObjectAlreadySealed:
return GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED;
case arrow::StatusCode::CodeGenError:
return GARROW_ERROR_CODE_GENERATION;
case arrow::StatusCode::ExpressionValidationError:
return GARROW_ERROR_EXPRESSION_VALIDATION;
case arrow::StatusCode::ExecutionError:
return GARROW_ERROR_EXECUTION;
case arrow::StatusCode::AlreadyExists:
return GARROW_ERROR_ALREADY_EXISTS;

default:
return GARROW_ERROR_UNKNOWN;
}
Expand Down
12 changes: 2 additions & 10 deletions c_glib/arrow-glib/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,11 @@ G_BEGIN_DECLS
* @GARROW_ERROR_UNKNOWN: Unknown error.
* @GARROW_ERROR_NOT_IMPLEMENTED: The feature is not implemented.
* @GARROW_ERROR_SERIALIZATION: Serialization error.
* @GARROW_ERROR_PYTHON: Python error.
* @GARROW_ERROR_PLASMA_OBJECT_EXISTS: Object already exists on Plasma.
* @GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT: Object doesn't exist on Plasma.
* @GARROW_ERROR_PLASMA_STORE_FULL: Store full error on Plasma.
* @GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED: Object already sealed on Plasma.
* @GARROW_ERROR_CODE_GENERATION: Error generating code for expression evaluation
* in Gandiva.
* @GARROW_ERROR_EXPRESSION_VALIDATION: Validation errors in expression given for code generation.
* @GARROW_ERROR_EXECUTION: Execution error while evaluating the expression against a record batch.
* @GARROW_ALREADY_EXISTS: Item already exists error.
*
* The error codes are used by all arrow-glib functions.
*
Expand All @@ -60,14 +56,10 @@ typedef enum {
GARROW_ERROR_UNKNOWN = 9,
GARROW_ERROR_NOT_IMPLEMENTED,
GARROW_ERROR_SERIALIZATION,
GARROW_ERROR_PYTHON,
GARROW_ERROR_PLASMA_OBJECT_EXISTS = 20,
GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT,
GARROW_ERROR_PLASMA_STORE_FULL,
GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED,
GARROW_ERROR_CODE_GENERATION = 40,
GARROW_ERROR_EXPRESSION_VALIDATION = 41,
GARROW_ERROR_EXECUTION = 42,
GARROW_ERROR_ALREADY_EXISTS = 45,
} GArrowError;

#define GARROW_ERROR garrow_error_quark()
Expand Down
2 changes: 1 addition & 1 deletion c_glib/test/plasma/test-plasma-created-object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def teardown

test("#abort") do
@object.data.set_data(0, @data)
assert_raise(Arrow::Error::PlasmaObjectExists) do
assert_raise(Arrow::Error::AlreadyExists) do
@client.create(@id, @data.bytesize, @options)
end
@object.abort
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
if (ARROW_PREDICT_FALSE(!_s.ok())) { \
std::stringstream ss; \
ss << __FILE__ << ":" << __LINE__ << " code: " << #s << "\n" << _s.message(); \
ctx->SetStatus(Status(_s.code(), ss.str())); \
ctx->SetStatus(Status(_s.code(), ss.str(), s.detail())); \
emkornfield marked this conversation as resolved.
Show resolved Hide resolved
return; \
} \
} while (0)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/csv/column-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class TypedColumnBuilder : public ColumnBuilder {
} else {
std::stringstream ss;
ss << "In column #" << col_index_ << ": " << st.message();
return Status(st.code(), ss.str());
return Status(st.code(), ss.str(), st.detail());
}
}

Expand Down
180 changes: 128 additions & 52 deletions cpp/src/arrow/python/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,15 @@

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"

#include "arrow/python/helpers.h"

namespace arrow {

using internal::checked_cast;

namespace py {

static std::mutex memory_pool_mutex;
Expand All @@ -47,6 +51,129 @@ MemoryPool* get_memory_pool() {
}
}

// ----------------------------------------------------------------------
// PythonErrorDetail

namespace {

const char kErrorDetailTypeId[] = "arrow::py::PythonErrorDetail";

// Try to match the Python exception type with an appropriate Status code
StatusCode MapPyError(PyObject* exc_type) {
StatusCode code;

if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
code = StatusCode::OutOfMemory;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_IndexError)) {
code = StatusCode::IndexError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
code = StatusCode::KeyError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
code = StatusCode::TypeError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError) ||
PyErr_GivenExceptionMatches(exc_type, PyExc_OverflowError)) {
code = StatusCode::Invalid;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
code = StatusCode::IOError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
code = StatusCode::NotImplemented;
} else {
code = StatusCode::UnknownError;
}
return code;
}

// PythonErrorDetail indicates a Python exception was raised.
class PythonErrorDetail : public StatusDetail {
public:
const char* type_id() const override { return kErrorDetailTypeId; }

std::string ToString() const override {
// This is simple enough not to need the GIL
const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
// XXX Should we also print traceback?
return std::string("Python exception: ") + ty->tp_name;
}

void RestorePyError() const {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for doing this, I started down this path and decided to keep it simpler because there were semantics of the GIL I was hazy on.

Py_INCREF(exc_type_.obj());
Py_INCREF(exc_value_.obj());
Py_INCREF(exc_traceback_.obj());
PyErr_Restore(exc_type_.obj(), exc_value_.obj(), exc_traceback_.obj());
}

PyObject* exc_type() const { return exc_type_.obj(); }

PyObject* exc_value() const { return exc_value_.obj(); }

static std::shared_ptr<PythonErrorDetail> FromPyError() {
PyObject* exc_type = nullptr;
PyObject* exc_value = nullptr;
PyObject* exc_traceback = nullptr;

PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
PyErr_NormalizeException(&exc_type, &exc_value, &exc_traceback);
ARROW_CHECK(exc_type)
<< "PythonErrorDetail::FromPyError called without a Python error set";
DCHECK(PyType_Check(exc_type));
DCHECK(exc_value); // Ensured by PyErr_NormalizeException, double-check
if (exc_traceback == nullptr) {
// Needed by PyErr_Restore()
Py_INCREF(Py_None);
exc_traceback = Py_None;
}

std::shared_ptr<PythonErrorDetail> detail(new PythonErrorDetail);
detail->exc_type_.reset(exc_type);
detail->exc_value_.reset(exc_value);
detail->exc_traceback_.reset(exc_traceback);
return detail;
}

protected:
PythonErrorDetail() = default;

OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;
};

} // namespace

// ----------------------------------------------------------------------
// Python exception <-> Status

Status ConvertPyError(StatusCode code) {
auto detail = PythonErrorDetail::FromPyError();
if (code == StatusCode::UnknownError) {
code = MapPyError(detail->exc_type());
}

std::string message;
RETURN_NOT_OK(internal::PyObject_StdStringStr(detail->exc_value(), &message));
return Status(code, message, detail);
}

Status PassPyError() {
if (PyErr_Occurred()) {
return ConvertPyError();
}
return Status::OK();
}

bool IsPyError(const Status& status) {
if (status.ok()) {
return false;
}
auto detail = status.detail();
bool result = detail != nullptr && detail->type_id() == kErrorDetailTypeId;
return result;
}

void RestorePyError(const Status& status) {
ARROW_CHECK(IsPyError(status));
const auto& detail = checked_cast<const PythonErrorDetail&>(*status.detail());
detail.RestorePyError();
}

// ----------------------------------------------------------------------
// PyBuffer

Expand All @@ -64,7 +191,7 @@ Status PyBuffer::Init(PyObject* obj) {
}
return Status::OK();
} else {
return Status(StatusCode::PythonError, "");
return ConvertPyError(StatusCode::Invalid);
}
}

Expand All @@ -83,56 +210,5 @@ PyBuffer::~PyBuffer() {
}
}

// ----------------------------------------------------------------------
// Python exception -> Status

Status ConvertPyError(StatusCode code) {
PyObject* exc_type = nullptr;
PyObject* exc_value = nullptr;
PyObject* traceback = nullptr;

PyErr_Fetch(&exc_type, &exc_value, &traceback);
PyErr_NormalizeException(&exc_type, &exc_value, &traceback);

DCHECK_NE(exc_type, nullptr) << "ConvertPyError called without an exception set";

OwnedRef exc_type_ref(exc_type);
OwnedRef exc_value_ref(exc_value);
OwnedRef traceback_ref(traceback);

std::string message;
RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));

if (code == StatusCode::UnknownError) {
// Try to match the Python exception type with an appropriate Status code
if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
code = StatusCode::OutOfMemory;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_IndexError)) {
code = StatusCode::IndexError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
code = StatusCode::KeyError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
code = StatusCode::TypeError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError) ||
PyErr_GivenExceptionMatches(exc_type, PyExc_OverflowError)) {
code = StatusCode::Invalid;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
code = StatusCode::IOError;
} else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
code = StatusCode::NotImplemented;
}
}
return Status(code, message);
}

Status PassPyError() {
if (PyErr_Occurred()) {
// Do not call PyErr_Clear, the assumption is that someone further
// up the call stack will want to deal with the Python error.
return Status(StatusCode::PythonError, "");
}
return Status::OK();
}

} // namespace py
} // namespace arrow
25 changes: 21 additions & 4 deletions cpp/src/arrow/python/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ class Result;

namespace py {

// Convert current Python error to a Status. The Python error state is cleared
// and can be restored with RestorePyError().
ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
// Same as ConvertPyError(), but returns Status::OK() if no Python error is set.
ARROW_PYTHON_EXPORT Status PassPyError();
// Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
// Restore a Python error wrapped in a Status.
ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);

// Catch a pending Python exception and return the corresponding Status.
// If no exception is pending, Status::OK() is returned.
Expand All @@ -48,9 +56,6 @@ inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
}
}

ARROW_PYTHON_EXPORT Status PassPyError();

// TODO(wesm): We can just let errors pass through. To be explored later
#define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError());

#define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE));
Expand Down Expand Up @@ -97,6 +102,18 @@ class ARROW_PYTHON_EXPORT PyAcquireGIL {
ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
};

// A RAII-style helper that releases the GIL until the end of a lexical block
class ARROW_PYTHON_EXPORT PyReleaseGIL {
public:
PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }

~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }

private:
PyThreadState* saved_state_;
ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
};

// A helper to call safely into the Python interpreter from arbitrary C++ code.
// The GIL is acquired, and the current thread's error status is preserved.
template <typename Function>
Expand All @@ -109,7 +126,7 @@ Status SafeCallIntoPython(Function&& func) {
Status st = std::forward<Function>(func)();
// If the return Status is a "Python error", the current Python error status
// describes the error and shouldn't be clobbered.
if (!st.IsPythonError() && exc_type != NULLPTR) {
if (!IsPyError(st) && exc_type != NULLPTR) {
PyErr_Restore(exc_type, exc_value, exc_traceback);
}
return st;
Expand Down
Loading