Skip to content

Commit

Permalink
Basic object model for sequence->arrow conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent bdb02e7 commit 94f122f
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 39 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "arrow/table/table.h"

#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
#include "arrow/types/floating.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,14 @@ class ArrayBuilder {
// ownership of the data
virtual std::shared_ptr<Array> Finish() = 0;

const std::shared_ptr<DataType>& type() const {
return type_;
}

protected:
MemoryPool* pool_;

TypePtr type_;
std::shared_ptr<DataType> type_;

// When nulls are first appended to the builder, the null bitmap is allocated
std::shared_ptr<PoolBuffer> nulls_;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
install(FILES
boolean.h
collection.h
construct.h
datetime.h
decimal.h
floating.h
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/types/boolean.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace arrow {

typedef PrimitiveArrayImpl<BooleanType> BooleanArray;

// typedef PrimitiveBuilder<BooleanType, BooleanArray> BooleanBuilder;
class BooleanBuilder : public ArrayBuilder {
};

} // namespace arrow

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/types/construct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ArrayBuilder;
out->reset(new BuilderType(pool, type)); \
return Status::OK();

Status make_builder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out) {
switch (type->type) {
BUILDER_CASE(UINT8, UInt8Builder);
Expand All @@ -62,7 +62,7 @@ Status make_builder(MemoryPool* pool, const std::shared_ptr<DataType>& type,

const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
type.get())->value_type;
RETURN_NOT_OK(make_builder(pool, value_type, &value_builder));
RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
out->reset(new ListBuilder(pool, type, value_builder));
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/construct.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class ArrayBuilder;
class MemoryPool;
class Status;

Status make_builder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out);

} // namespace arrow
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class TestListBuilder : public TestBuilder {
type_ = TypePtr(new ListType(value_type_));

std::shared_ptr<ArrayBuilder> tmp;
ASSERT_OK(make_builder(pool_, type_, &tmp));
ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/types/primitive-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ class TestPrimitiveBuilder : public TestBuilder {
type_ = Attrs::type();

std::shared_ptr<ArrayBuilder> tmp;
ASSERT_OK(make_builder(pool_, type_, &tmp));
ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
builder_ = std::dynamic_pointer_cast<BuilderType>(tmp);

ASSERT_OK(make_builder(pool_, type_, &tmp));
ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
builder_nn_ = std::dynamic_pointer_cast<BuilderType>(tmp);
}

Expand Down
88 changes: 57 additions & 31 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@
// under the License.

#include <Python.h>
#include <sstream>

#include "pyarrow/adapters/builtin.h"

#include <arrow/api.h>

#include "pyarrow/status.h"

namespace pyarrow {

using arrow::ArrayBuilder;
using arrow::DataType;
using arrow::LogicalType;

namespace pyarrow {

static inline bool IsPyInteger(PyObject* obj) {
#if PYARROW_IS_PY2
return PyLong_Check(obj) || PyInt_Check(obj);
Expand Down Expand Up @@ -132,80 +134,93 @@ static Status InferArrowType(PyObject* obj, int64_t* size,
// Marshal Python sequence (list, tuple, etc.) to Arrow array
class SeqConverter {
public:
SeqConverter();
virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) {
builder_ = builder;
return Status::OK();
}

virtual Status AppendData(PyObject* seq) = 0;

private:
// Borrowed reference for now
PyObject* obj_;
protected:
std::shared_ptr<ArrayBuilder> builder_;
};

class BooleanConverter : SeqConverter {
template <typename BuilderType>
class TypedConverter : public SeqConverter {
public:
Status Init(const std::shared_ptr<ArrayBuilder>& builder) override {
builder_ = builder;
typed_builder_ = static_cast<BuilderType*>(builder.get());
return Status::OK();
}

protected:
BuilderType* typed_builder_;
};

class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
public:
Status AppendData(PyObject* obj) override {
return Status::OK();
}
};

template <typename T>
class IntegerConverter : SeqConverter {
class Int64Converter : public TypedConverter<arrow::Int64Builder> {
public:

Status AppendData(PyObject* obj) override {
return Status::OK();
}
};

template <typename T>
class FloatingConverter : SeqConverter {
class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
public:

Status AppendData(PyObject* obj) override {
return Status::OK();
}
};

class StringConverter : SeqConverter {
class StringConverter : public TypedConverter<arrow::StringBuilder> {
public:

Status AppendData(PyObject* obj) override {
return Status::OK();
}

private:
arrow::StringBuilder builder_;
};

class ListConverter : SeqConverter {
class ListConverter : public TypedConverter<arrow::ListBuilder> {
public:
Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;

Status AppendData(PyObject* obj) override {
return Status::OK();
}

private:
arrow::ListBuilder builder_;
protected:
std::shared_ptr<SeqConverter> value_converter_;
};

Status GetConverter(const std::shared_ptr<DataType>& type,
std::shared_ptr<SeqConverter>* out) {
// Dynamic constructor for sequence converters
std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
switch (type->type) {
case LogicalType::BOOL:
break;
return std::make_shared<BoolConverter>();
case LogicalType::INT64:
break;
return std::make_shared<Int64Converter>();
case LogicalType::DOUBLE:
break;
return std::make_shared<DoubleConverter>();
case LogicalType::STRING:
break;
return std::make_shared<StringConverter>();
case LogicalType::LIST:
return std::make_shared<ListConverter>();
case LogicalType::STRUCT:
default:
return Status::NotImplemented("No type converter implemetned");
return nullptr;
break;
}
}

Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
builder_ = builder;
typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
value_converter_ = GetConverter(builder->type());
return Status::OK();
}

Expand All @@ -214,8 +229,19 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
int64_t size;
RETURN_NOT_OK(InferArrowType(obj, &size, &type));

std::shared_ptr<SeqConverter> converter;
RETURN_NOT_OK(GetConverter(type, &converter));
std::shared_ptr<SeqConverter> converter = GetConverter(type);
if (converter == nullptr) {
std::stringstream ss;
ss << "No type converter implemented for "
<< type->ToString();
return Status::NotImplemented(ss.str());
}

// Give the sequence converter an array builder
std::shared_ptr<ArrayBuilder> builder;
RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder));
converter->Init(builder);

RETURN_NOT_OK(converter->AppendData(obj));

return Status::OK();
Expand Down
7 changes: 7 additions & 0 deletions python/src/pyarrow/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ namespace pyarrow {
return Status::UnknownError(message); \
}

#define RETURN_ARROW_NOT_OK(s) do { \
arrow::Status _s = (s); \
if (!_s.ok()) { \
return Status::ArrowError(s.ToString()); \
} \
} while (0);

class OwnedRef {
public:
OwnedRef(PyObject* obj) :
Expand Down

0 comments on commit 94f122f

Please sign in to comment.