diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 7620450d96ff6..282b9ff2c9fcc 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -29,6 +29,7 @@ #include "arrow/table/table.h" #include "arrow/types/boolean.h" +#include "arrow/types/construct.h" #include "arrow/types/floating.h" #include "arrow/types/integer.h" #include "arrow/types/list.h" diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index b7c3935d2ac18..fafee91f92831 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -80,10 +80,14 @@ class ArrayBuilder { // ownership of the data virtual std::shared_ptr Finish() = 0; + const std::shared_ptr& type() const { + return type_; + } + protected: MemoryPool* pool_; - TypePtr type_; + std::shared_ptr type_; // When nulls are first appended to the builder, the null bitmap is allocated std::shared_ptr nulls_; diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt index bae4b6235ff1d..57cabdefd2525 100644 --- a/cpp/src/arrow/types/CMakeLists.txt +++ b/cpp/src/arrow/types/CMakeLists.txt @@ -23,6 +23,7 @@ install(FILES boolean.h collection.h + construct.h datetime.h decimal.h floating.h diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h index 8fc9cfd19c0d4..a5023d7b368d2 100644 --- a/cpp/src/arrow/types/boolean.h +++ b/cpp/src/arrow/types/boolean.h @@ -24,7 +24,8 @@ namespace arrow { typedef PrimitiveArrayImpl BooleanArray; -// typedef PrimitiveBuilder BooleanBuilder; +class BooleanBuilder : public ArrayBuilder { +}; } // namespace arrow diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc index 5a46ef605f130..43f01a3051385 100644 --- a/cpp/src/arrow/types/construct.cc +++ b/cpp/src/arrow/types/construct.cc @@ -37,7 +37,7 @@ class ArrayBuilder; out->reset(new BuilderType(pool, type)); \ return Status::OK(); -Status make_builder(MemoryPool* pool, const std::shared_ptr& type, +Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, std::shared_ptr* out) { switch (type->type) { BUILDER_CASE(UINT8, UInt8Builder); @@ -62,7 +62,7 @@ Status make_builder(MemoryPool* pool, const std::shared_ptr& type, const std::shared_ptr& value_type = static_cast( type.get())->value_type; - RETURN_NOT_OK(make_builder(pool, value_type, &value_builder)); + RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder)); out->reset(new ListBuilder(pool, type, value_builder)); return Status::OK(); } diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h index 39d6342743a68..59ebe1acddc98 100644 --- a/cpp/src/arrow/types/construct.h +++ b/cpp/src/arrow/types/construct.h @@ -28,7 +28,7 @@ class ArrayBuilder; class MemoryPool; class Status; -Status make_builder(MemoryPool* pool, const std::shared_ptr& type, +Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, std::shared_ptr* out); } // namespace arrow diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 6de95a31abb1c..516008b7763c7 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -74,7 +74,7 @@ class TestListBuilder : public TestBuilder { type_ = TypePtr(new ListType(value_type_)); std::shared_ptr tmp; - ASSERT_OK(make_builder(pool_, type_, &tmp)); + ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); builder_ = std::dynamic_pointer_cast(tmp); } diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index e77aec10a424c..e25729dfb67dc 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -100,10 +100,10 @@ class TestPrimitiveBuilder : public TestBuilder { type_ = Attrs::type(); std::shared_ptr tmp; - ASSERT_OK(make_builder(pool_, type_, &tmp)); + ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); builder_ = std::dynamic_pointer_cast(tmp); - ASSERT_OK(make_builder(pool_, type_, &tmp)); + ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); builder_nn_ = std::dynamic_pointer_cast(tmp); } diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 8b85deceafd96..fb3b07ccffc23 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "pyarrow/adapters/builtin.h" @@ -23,11 +24,12 @@ #include "pyarrow/status.h" -namespace pyarrow { - +using arrow::ArrayBuilder; using arrow::DataType; using arrow::LogicalType; +namespace pyarrow { + static inline bool IsPyInteger(PyObject* obj) { #if PYARROW_IS_PY2 return PyLong_Check(obj) || PyInt_Check(obj); @@ -132,80 +134,93 @@ static Status InferArrowType(PyObject* obj, int64_t* size, // Marshal Python sequence (list, tuple, etc.) to Arrow array class SeqConverter { public: - SeqConverter(); + virtual Status Init(const std::shared_ptr& builder) { + builder_ = builder; + return Status::OK(); + } virtual Status AppendData(PyObject* seq) = 0; - private: - // Borrowed reference for now - PyObject* obj_; + protected: + std::shared_ptr builder_; }; -class BooleanConverter : SeqConverter { +template +class TypedConverter : public SeqConverter { public: + Status Init(const std::shared_ptr& builder) override { + builder_ = builder; + typed_builder_ = static_cast(builder.get()); + return Status::OK(); + } + + protected: + BuilderType* typed_builder_; +}; +class BoolConverter : public TypedConverter { + public: Status AppendData(PyObject* obj) override { return Status::OK(); } }; -template -class IntegerConverter : SeqConverter { +class Int64Converter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { return Status::OK(); } }; -template -class FloatingConverter : SeqConverter { +class DoubleConverter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { return Status::OK(); } }; -class StringConverter : SeqConverter { +class StringConverter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { return Status::OK(); } - - private: - arrow::StringBuilder builder_; }; -class ListConverter : SeqConverter { +class ListConverter : public TypedConverter { public: + Status Init(const std::shared_ptr& builder) override; Status AppendData(PyObject* obj) override { return Status::OK(); } - - private: - arrow::ListBuilder builder_; + protected: + std::shared_ptr value_converter_; }; -Status GetConverter(const std::shared_ptr& type, - std::shared_ptr* out) { +// Dynamic constructor for sequence converters +std::shared_ptr GetConverter(const std::shared_ptr& type) { switch (type->type) { case LogicalType::BOOL: - break; + return std::make_shared(); case LogicalType::INT64: - break; + return std::make_shared(); case LogicalType::DOUBLE: - break; + return std::make_shared(); case LogicalType::STRING: - break; + return std::make_shared(); case LogicalType::LIST: + return std::make_shared(); case LogicalType::STRUCT: default: - return Status::NotImplemented("No type converter implemetned"); + return nullptr; break; } +} + +Status ListConverter::Init(const std::shared_ptr& builder) { + builder_ = builder; + typed_builder_ = static_cast(builder.get()); + value_converter_ = GetConverter(builder->type()); return Status::OK(); } @@ -214,8 +229,19 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { int64_t size; RETURN_NOT_OK(InferArrowType(obj, &size, &type)); - std::shared_ptr converter; - RETURN_NOT_OK(GetConverter(type, &converter)); + std::shared_ptr converter = GetConverter(type); + if (converter == nullptr) { + std::stringstream ss; + ss << "No type converter implemented for " + << type->ToString(); + return Status::NotImplemented(ss.str()); + } + + // Give the sequence converter an array builder + std::shared_ptr builder; + RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder)); + converter->Init(builder); + RETURN_NOT_OK(converter->AppendData(obj)); return Status::OK(); diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h index a3daf938922f1..4f23b6a4445da 100644 --- a/python/src/pyarrow/common.h +++ b/python/src/pyarrow/common.h @@ -37,6 +37,13 @@ namespace pyarrow { return Status::UnknownError(message); \ } +#define RETURN_ARROW_NOT_OK(s) do { \ + arrow::Status _s = (s); \ + if (!_s.ok()) { \ + return Status::ArrowError(s.ToString()); \ + } \ + } while (0); + class OwnedRef { public: OwnedRef(PyObject* obj) :