diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index cfed238dc416e..265770822ce90 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -30,15 +30,6 @@ std::string Field::ToString() const { DataType::~DataType() {} -template -inline std::string PrimitiveType::ToString() const { - std::string result(static_cast(this)->name()); - if (!nullable) { - result.append(" not null"); - } - return result; -} - StringType::StringType(bool nullable) : DataType(LogicalType::STRING, nullable) {} diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index e078e2e656ba3..e78e49491193e 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -221,6 +221,15 @@ struct PrimitiveType : public DataType { std::string ToString() const override; }; +template +inline std::string PrimitiveType::ToString() const { + std::string result(static_cast(this)->name()); + if (!nullable) { + result.append(" not null"); + } + return result; +} + #define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \ typedef C_TYPE c_type; \ static constexpr LogicalType::type type_enum = LogicalType::ENUM; \ diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py index 41bc8971d39c7..e59c6fda40b44 100644 --- a/python/arrow/__init__.py +++ b/python/arrow/__init__.py @@ -18,7 +18,8 @@ # flake8: noqa from arrow.array import Array, from_list -from arrow.schema import (bool_, int8, int16, int32, int64, +from arrow.schema import (null, bool_, + int8, int16, int32, int64, uint8, uint16, uint32, uint64, float_, double, string, list_, struct, field, diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx index cf396da0ef962..2845c9bdec1b2 100644 --- a/python/arrow/array.pyx +++ b/python/arrow/array.pyx @@ -39,6 +39,10 @@ cdef class Array: return self.array.length() +cdef class NullArray(Array): + pass + + cdef class BooleanArray(Array): pass @@ -88,6 +92,7 @@ cdef class StringArray(Array): cdef dict _array_classes = { + LogicalType_NA: NullArray, LogicalType_BOOL: BooleanArray, LogicalType_INT64: Int64Array, LogicalType_LIST: ListArray, diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd index 666bafc3dad5c..a1a8c25467a24 100644 --- a/python/arrow/includes/arrow.pxd +++ b/python/arrow/includes/arrow.pxd @@ -22,6 +22,8 @@ from arrow.includes.common cimport * cdef extern from "arrow/api.h" namespace "arrow" nogil: enum LogicalType" arrow::LogicalType::type": + LogicalType_NA" arrow::LogicalType::NA" + LogicalType_BOOL" arrow::LogicalType::BOOL" LogicalType_UINT8" arrow::LogicalType::UINT8" diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx index ffb159ab3d4d5..d22c3937efc13 100644 --- a/python/arrow/schema.pyx +++ b/python/arrow/schema.pyx @@ -74,6 +74,9 @@ cdef DataType primitive_type(LogicalType type, bint nullable=True): def field(name, type): return Field(name, type) +def null(): + return primitive_type(LogicalType_NA) + def bool_(c_bool nullable=True): return primitive_type(LogicalType_BOOL, nullable) diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py index f88e3cdad5ee3..68875d5b39ddb 100644 --- a/python/arrow/tests/test_convert_builtin.py +++ b/python/arrow/tests/test_convert_builtin.py @@ -24,6 +24,11 @@ class TestConvertList(unittest.TestCase): def test_boolean(self): pass + def test_empty_list(self): + arr = arrow.from_list([]) + assert len(arr) == 0 + assert arr.type == arrow.null() + def test_integer(self): arr = arrow.from_list([1, 2, 3]) assert len(arr) == 3 diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index fb3b07ccffc23..e8429fd491219 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -229,6 +229,12 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { int64_t size; RETURN_NOT_OK(InferArrowType(obj, &size, &type)); + // Handle NA / NullType case + if (type->type == LogicalType::NA) { + out->reset(new arrow::Array(type, size)); + return Status::OK(); + } + std::shared_ptr converter = GetConverter(type); if (converter == nullptr) { std::stringstream ss; @@ -244,6 +250,8 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { RETURN_NOT_OK(converter->AppendData(obj)); + *out = builder->Finish(); + return Status::OK(); }