Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EXP] ARROW-4036: [C++] Pluggable status codes #3199

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions cpp/src/arrow/status-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@
#include "arrow/status.h"

namespace arrow {
namespace test {

TEST(StatusTest, TestCodeAndMessage) {
Status ok = Status::OK();
ASSERT_EQ(StatusCode::OK, ok.code());
ASSERT_EQ(nullptr, ok.status_range());
Status file_error = Status::IOError("file error");
ASSERT_EQ(StatusCode::IOError, file_error.code());
ASSERT_EQ("file error", file_error.message());
ASSERT_EQ(nullptr, file_error.status_range());
}

TEST(StatusTest, TestToString) {
Expand Down Expand Up @@ -77,4 +80,39 @@ TEST(StatusTest, AndStatus) {
ASSERT_TRUE(res.IsInvalid());
}

static const char* test_messages1[] = {"First error", "Second error"};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See example use here.

static const char* test_messages2[] = {"Third error", "Fourth error"};

enum class TestCode1 : int32_t {
FirstError = 1,
SecondError = 2
};

enum class TestCode2 : int32_t {
ThirdError = 1,
FourthError = 2
};

static StatusRange test_range1("xxtest", 0x787874657374ULL, 2, test_messages1);
static StatusRange test_range2("xxtest2", 2, test_messages2);

TEST(StatusTest, StatusRange) {
ASSERT_EQ(test_range1.range_id(), 0x787874657374ULL);
ASSERT_EQ(test_range2.range_id(), 0x78787465737432ULL);

auto st = Status(test_range1.range_id(), 1, "foo");
ASSERT_EQ(st.ToString(), "[xxtest] First error: foo");
ASSERT_EQ(st.status_range(), &test_range1);
ASSERT_EQ(st.code<TestCode1>(), TestCode1::FirstError);

st = Status(test_range2.range_id(), 2, "bar");
ASSERT_EQ(st.ToString(), "[xxtest2] Fourth error: bar");
ASSERT_EQ(st.status_range(), &test_range2);
ASSERT_EQ(st.code<TestCode2>(), TestCode2::FourthError);

st = Status(12345ULL, 42, "bar");
ASSERT_EQ(st.ToString(), "Error 42 in unregistered status range 12345: bar");
}

} // namespace test
} // namespace arrow
86 changes: 81 additions & 5 deletions cpp/src/arrow/status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,62 @@

#include "arrow/status.h"

#include <assert.h>
#include <algorithm>
#include <cstring>
#include <sstream>

#include "arrow/util/logging.h"

namespace arrow {

uint64_t StatusRange::RangeIdFromName(const char* name) {
uint64_t n = std::min<uint64_t>(8U, strlen(name));
uint64_t range_id = 0;
for (uint64_t i = 0; i < n; ++i) {
range_id = (range_id << 8) | static_cast<uint8_t>(name[i]);
}
return range_id;
}

StatusRange::StatusRange(const char* name, uint64_t range_id, int32_t num_codes,
const char** messages)
: name_(name), range_id_(range_id), num_codes_(num_codes), messages_(messages)
{
Status::RegisterStatusRange(this);
// XXX should unregister in destructor? Could be safer if non-trivial code
// is executed at program end...
}

StatusRange::StatusRange(const char* name, int32_t num_codes, const char** messages)
: StatusRange(name, RangeIdFromName(name), num_codes, messages)
{
}

std::string StatusRange::CodeAsString(int32_t code) const {
DCHECK_GT(code, 0) << "error code should be > 0";
DCHECK_LE(code, num_codes_) << "error code should be <= num_codes";
return messages_[code - 1];
}

std::unordered_map<uint64_t, const StatusRange*> Status::ranges_;

void Status::RegisterStatusRange(const StatusRange* status_range) {
uint64_t range_id = status_range->range_id();
DCHECK_EQ(range_id >> 56, 0) << "Upper byte of status range should be == 0";
auto p = ranges_.insert(std::make_pair(range_id, status_range));
DCHECK_EQ(p.second, true)
<< "Duplicate range id " << range_id << " for range '"
<< status_range->name() << "'";
}

Status::Status(StatusCode code, const std::string& msg) {
assert(code != StatusCode::OK);
state_ = new State;
state_->code = code;
state_->msg = msg;
DCHECK_NE(code, StatusCode::OK);
state_ = new State{0U, static_cast<int32_t>(code), msg};
}

Status::Status(uint64_t range_id, int32_t code, const std::string& msg) {
DCHECK_GT(code, 0);
state_ = new State{range_id, code, msg};
}

void Status::CopyFrom(const Status& s) {
Expand All @@ -32,11 +79,40 @@ void Status::CopyFrom(const Status& s) {
}
}

const StatusRange* Status::status_range() const {
if (state_ == nullptr) {
return nullptr;
}
auto range_id = state_->range_id;
if (range_id != 0) {
auto it = ranges_.find(range_id);
if (it != ranges_.end()) {
return it->second;
}
}
return nullptr;
}

std::string Status::CodeAsString() const {
if (state_ == nullptr) {
return "OK";
}

if (state_->range_id != 0) {
// Delegate to supplementary status range
auto range = status_range();
if (range != nullptr) {
std::stringstream ss;
ss << "[" << range->name() << "] ";
ss << range->CodeAsString(state_->code);
return ss.str();
} else {
std::stringstream ss;
ss << "Error " << state_->code << " in unregistered status range " << state_->range_id;
return ss.str();
}
}

const char* type;
switch (code()) {
case StatusCode::OK:
Expand Down
45 changes: 41 additions & 4 deletions cpp/src/arrow/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cstring>
#include <iosfwd>
#include <string>
#include <unordered_map>
#include <utility>

#ifdef ARROW_EXTRA_ERROR_CONTEXT
Expand Down Expand Up @@ -80,7 +81,7 @@

namespace arrow {

enum class StatusCode : char {
enum class StatusCode : int32_t {
OK = 0,
OutOfMemory = 1,
KeyError = 2,
Expand Down Expand Up @@ -109,6 +110,31 @@ enum class StatusCode : char {
class ARROW_MUST_USE_RESULT ARROW_EXPORT Status;
#endif

class ARROW_EXPORT StatusRange {
public:
StatusRange(const char* name, uint64_t range_id, int32_t num_codes, const char** messages);
// range_id will be deduced from name
StatusRange(const char* name, int32_t num_codes, const char** messages);

const char* name() const { return name_; }

int32_t num_codes() const { return num_codes_; }

uint64_t range_id() const { return range_id_; }

std::string CodeAsString(int32_t code) const;

protected:
ARROW_DISALLOW_COPY_AND_ASSIGN(StatusRange);

static uint64_t RangeIdFromName(const char* name);

const char* name_;
const uint64_t range_id_;
const int32_t num_codes_;
const char** messages_;
};

/// \brief Status outcome object (success or error)
///
/// The Status object is an object holding the outcome of an operation.
Expand All @@ -130,6 +156,7 @@ class ARROW_EXPORT Status {
}

Status(StatusCode code, const std::string& msg);
Status(uint64_t range_id, int32_t code, const std::string& msg);

// Copy the specified status.
Status(const Status& s);
Expand Down Expand Up @@ -286,21 +313,31 @@ class ARROW_EXPORT Status {
/// text or POSIX code information.
std::string CodeAsString() const;

/// \brief Return the StatusCode value attached to this status.
StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
/// \brief Return the status code enum value attached to this status.
template <typename T = StatusCode>
T code(T* = nullptr) const {
return static_cast<T>(ok() ? 0 : state_->code);
}

const StatusRange* status_range() const;

/// \brief Return the specific error message attached to this status.
std::string message() const { return ok() ? "" : state_->msg; }

static void RegisterStatusRange(const StatusRange* status_range);

private:
struct State {
StatusCode code;
uint64_t range_id;
int32_t code;
std::string msg;
};
// OK status has a `NULL` state_. Otherwise, `state_` points to
// a `State` structure containing the error code and message(s)
State* state_;

static std::unordered_map<uint64_t, const StatusRange*> ranges_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we're needing more and more a global arrow::init().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you afraid of the static initialization here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, seems likely that we'll need more and more controlled static initialization:

  • Utf8 utils make use of call_once
  • This patch (which other static initializers might depend in proper order, but controllable since we're referencing with int32/int64)
  • Gandiva make use of call_once, I think this one can be tricky due to possible failures and the time it takes to init, e.g. it's friendlier to the user if he has visibility (time take, failure, ...)


void DeleteState() {
delete state_;
state_ = NULL;
Expand Down