Skip to content

Commit

Permalink
Build array successfully, without validating contents
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent 731544a commit c28bf09
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 25 deletions.
5 changes: 5 additions & 0 deletions python/arrow/array.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@
from arrow.includes.common cimport shared_ptr
from arrow.includes.arrow cimport CArray, LogicalType

from arrow.schema cimport DataType

cdef class Array:
cdef:
shared_ptr[CArray] sp_array

cdef readonly:
DataType type

cdef init(self, const shared_ptr[CArray]& sp_array)


Expand Down
4 changes: 3 additions & 1 deletion python/arrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ cdef class Array:

cdef init(self, const shared_ptr[CArray]& sp_array):
self.sp_array = sp_array
self.type = DataType()
self.type.init(self.sp_array.get().type())

property null_count:

def __get__(self):
return self.sp_array.get().null_count()

def __len__(self):
return self.array.length()
return self.sp_array.get().length()


cdef class NullArray(Array):
Expand Down
2 changes: 2 additions & 0 deletions python/arrow/includes/arrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
LogicalType type
c_bool nullable

c_bool Equals(const CDataType* other)

c_string ToString()

cdef cppclass CListType" arrow::ListType"(CDataType):
Expand Down
11 changes: 11 additions & 0 deletions python/arrow/schema.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ from arrow.compat import frombytes, tobytes
from arrow.includes.arrow cimport *
cimport arrow.includes.pyarrow as pyarrow

cimport cpython

cdef class DataType:

def __cinit__(self):
Expand All @@ -41,6 +43,15 @@ cdef class DataType:
def __repr__(self):
return 'DataType({0})'.format(str(self))

def __richcmp__(DataType self, DataType other, int op):
if op == cpython.Py_EQ:
return self.type.Equals(other.type)
elif op == cpython.Py_NE:
return not self.type.Equals(other.type)
else:
raise TypeError('Invalid comparison')


cdef class Field:

def __cinit__(self, object name, DataType type):
Expand Down
12 changes: 10 additions & 2 deletions python/arrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@ def test_boolean(self):
def test_empty_list(self):
arr = arrow.from_list([])
assert len(arr) == 0
assert arr.null_count == 0
assert arr.type == arrow.null()

def test_all_none(self):
arr = arrow.from_list([None, None])
assert len(arr) == 2
assert arr.null_count == 2
assert arr.type == arrow.null()

def test_integer(self):
arr = arrow.from_list([1, 2, 3])
assert len(arr) == 3
arr = arrow.from_list([1, None, 3, None])
assert len(arr) == 4
assert arr.null_count == 2
assert arr.type == arrow.int64()

def test_double(self):
Expand Down
5 changes: 4 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@ def _run_cmake(self):
static_lib_option, source]

self.spawn(cmake_command)
self.spawn(['make'])
args = ['make']
if 'PYARROW_PARALLEL' in os.environ:
args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL']))
self.spawn(args)
else:
import shlex
cmake_generator = 'Visual Studio 14 2015'
Expand Down
29 changes: 20 additions & 9 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static Status InferArrowType(PyObject* obj, int64_t* size,
// TODO(wesm): inferring types for collections
return Status::NotImplemented("No type inference for collections");
} else {
inferer.Visit(obj);
inferer.Visit(item);
}
}

Expand All @@ -139,7 +139,7 @@ class SeqConverter {
return Status::OK();
}

virtual Status AppendData(PyObject* seq) = 0;
virtual Status AppendData(PyObject* seq, int64_t size) = 0;

protected:
std::shared_ptr<ArrayBuilder> builder_;
Expand All @@ -160,28 +160,39 @@ class TypedConverter : public SeqConverter {

class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
public:
Status AppendData(PyObject* obj) override {
Status AppendData(PyObject* seq, int64_t size) override {
return Status::OK();
}
};

class Int64Converter : public TypedConverter<arrow::Int64Builder> {
public:
Status AppendData(PyObject* obj) override {
Status AppendData(PyObject* obj, int64_t size) override {
int64_t val;
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(obj, i));
if (item.obj() == Py_None) {
RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
} else {
val = PyLong_AsLongLong(item.obj());
RETURN_IF_PYERROR();
RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
}
}
return Status::OK();
}
};

class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
public:
Status AppendData(PyObject* obj) override {
Status AppendData(PyObject* seq, int64_t size) override {
return Status::OK();
}
};

class StringConverter : public TypedConverter<arrow::StringBuilder> {
public:
Status AppendData(PyObject* obj) override {
Status AppendData(PyObject* seq, int64_t size) override {
return Status::OK();
}
};
Expand All @@ -190,7 +201,7 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
public:
Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;

Status AppendData(PyObject* obj) override {
Status AppendData(PyObject* seq, int64_t size) override {
return Status::OK();
}
protected:
Expand Down Expand Up @@ -231,7 +242,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {

// Handle NA / NullType case
if (type->type == LogicalType::NA) {
out->reset(new arrow::Array(type, size));
out->reset(new arrow::Array(type, size, size));
return Status::OK();
}

Expand All @@ -248,7 +259,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder));
converter->Init(builder);

RETURN_NOT_OK(converter->AppendData(obj));
RETURN_NOT_OK(converter->AppendData(obj, size));

*out = builder->Finish();

Expand Down
50 changes: 38 additions & 12 deletions python/src/pyarrow/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,6 @@ namespace pyarrow {

#define PYARROW_IS_PY2 PY_MAJOR_VERSION < 2

// TODO(wesm): We can just let errors pass through. To be explored later
#define RETURN_IF_PYERROR() \
if (PyErr_Occurred()) { \
PyObject *exc_type, *exc_value, *traceback; \
PyErr_Fetch(&exc_type, &exc_value, &traceback); \
std::string message(PyString_AsString(exc_value)); \
Py_DECREF(exc_type); \
Py_DECREF(exc_value); \
Py_DECREF(traceback); \
return Status::UnknownError(message); \
}

#define RETURN_ARROW_NOT_OK(s) do { \
arrow::Status _s = (s); \
if (!_s.ok()) { \
Expand All @@ -47,13 +35,22 @@ namespace pyarrow {

class OwnedRef {
public:
OwnedRef() : obj_(nullptr) {}

OwnedRef(PyObject* obj) :
obj_(obj) {}

~OwnedRef() {
Py_XDECREF(obj_);
}

void reset(PyObject* obj) {
if (obj_ != nullptr) {
Py_XDECREF(obj_);
}
obj_ = obj;
}

PyObject* obj() const{
return obj_;
}
Expand All @@ -62,6 +59,35 @@ class OwnedRef {
PyObject* obj_;
};

struct PyObjectStringify {
OwnedRef tmp_obj;
const char* bytes;

PyObjectStringify(PyObject* obj) {
PyObject* bytes_obj;
if (PyUnicode_Check(obj)) {
bytes_obj = PyUnicode_AsUTF8String(obj);
tmp_obj.reset(bytes_obj);
} else {
bytes_obj = obj;
}
bytes = PyBytes_AsString(bytes_obj);
}
};

// TODO(wesm): We can just let errors pass through. To be explored later
#define RETURN_IF_PYERROR() \
if (PyErr_Occurred()) { \
PyObject *exc_type, *exc_value, *traceback; \
PyErr_Fetch(&exc_type, &exc_value, &traceback); \
PyObjectStringify stringified(exc_value); \
std::string message(stringified.bytes); \
Py_DECREF(exc_type); \
Py_DECREF(exc_value); \
Py_DECREF(traceback); \
return Status::UnknownError(message); \
}

arrow::MemoryPool* GetMemoryPool();

} // namespace pyarrow
Expand Down

0 comments on commit c28bf09

Please sign in to comment.